1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011
6 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
70
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
73
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
76 #endif
77
78
79 /* Reduce conditional compilation elsewhere. */
80 #ifndef HAVE_insv
81 #define HAVE_insv 0
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
84 #endif
85 #ifndef HAVE_extv
86 #define HAVE_extv 0
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
89 #endif
90 #ifndef HAVE_extzv
91 #define HAVE_extzv 0
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
94 #endif
95
96 void
init_expmed(void)97 init_expmed (void)
98 {
99 struct
100 {
101 struct rtx_def reg; rtunion reg_fld[2];
102 struct rtx_def plus; rtunion plus_fld1;
103 struct rtx_def neg;
104 struct rtx_def mult; rtunion mult_fld1;
105 struct rtx_def sdiv; rtunion sdiv_fld1;
106 struct rtx_def udiv; rtunion udiv_fld1;
107 struct rtx_def zext;
108 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
109 struct rtx_def smod_32; rtunion smod_32_fld1;
110 struct rtx_def wide_mult; rtunion wide_mult_fld1;
111 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
112 struct rtx_def wide_trunc;
113 struct rtx_def shift; rtunion shift_fld1;
114 struct rtx_def shift_mult; rtunion shift_mult_fld1;
115 struct rtx_def shift_add; rtunion shift_add_fld1;
116 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
117 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
118 } all;
119
120 rtx pow2[MAX_BITS_PER_WORD];
121 rtx cint[MAX_BITS_PER_WORD];
122 int m, n;
123 enum machine_mode mode, wider_mode;
124 int speed;
125
126
127 for (m = 1; m < MAX_BITS_PER_WORD; m++)
128 {
129 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
130 cint[m] = GEN_INT (m);
131 }
132 memset (&all, 0, sizeof all);
133
134 PUT_CODE (&all.reg, REG);
135 /* Avoid using hard regs in ways which may be unsupported. */
136 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
137
138 PUT_CODE (&all.plus, PLUS);
139 XEXP (&all.plus, 0) = &all.reg;
140 XEXP (&all.plus, 1) = &all.reg;
141
142 PUT_CODE (&all.neg, NEG);
143 XEXP (&all.neg, 0) = &all.reg;
144
145 PUT_CODE (&all.mult, MULT);
146 XEXP (&all.mult, 0) = &all.reg;
147 XEXP (&all.mult, 1) = &all.reg;
148
149 PUT_CODE (&all.sdiv, DIV);
150 XEXP (&all.sdiv, 0) = &all.reg;
151 XEXP (&all.sdiv, 1) = &all.reg;
152
153 PUT_CODE (&all.udiv, UDIV);
154 XEXP (&all.udiv, 0) = &all.reg;
155 XEXP (&all.udiv, 1) = &all.reg;
156
157 PUT_CODE (&all.sdiv_32, DIV);
158 XEXP (&all.sdiv_32, 0) = &all.reg;
159 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
160
161 PUT_CODE (&all.smod_32, MOD);
162 XEXP (&all.smod_32, 0) = &all.reg;
163 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
164
165 PUT_CODE (&all.zext, ZERO_EXTEND);
166 XEXP (&all.zext, 0) = &all.reg;
167
168 PUT_CODE (&all.wide_mult, MULT);
169 XEXP (&all.wide_mult, 0) = &all.zext;
170 XEXP (&all.wide_mult, 1) = &all.zext;
171
172 PUT_CODE (&all.wide_lshr, LSHIFTRT);
173 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
174
175 PUT_CODE (&all.wide_trunc, TRUNCATE);
176 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
177
178 PUT_CODE (&all.shift, ASHIFT);
179 XEXP (&all.shift, 0) = &all.reg;
180
181 PUT_CODE (&all.shift_mult, MULT);
182 XEXP (&all.shift_mult, 0) = &all.reg;
183
184 PUT_CODE (&all.shift_add, PLUS);
185 XEXP (&all.shift_add, 0) = &all.shift_mult;
186 XEXP (&all.shift_add, 1) = &all.reg;
187
188 PUT_CODE (&all.shift_sub0, MINUS);
189 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
190 XEXP (&all.shift_sub0, 1) = &all.reg;
191
192 PUT_CODE (&all.shift_sub1, MINUS);
193 XEXP (&all.shift_sub1, 0) = &all.reg;
194 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
195
196 for (speed = 0; speed < 2; speed++)
197 {
198 crtl->maybe_hot_insn_p = speed;
199 zero_cost[speed] = set_src_cost (const0_rtx, speed);
200
201 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
202 mode != VOIDmode;
203 mode = GET_MODE_WIDER_MODE (mode))
204 {
205 PUT_MODE (&all.reg, mode);
206 PUT_MODE (&all.plus, mode);
207 PUT_MODE (&all.neg, mode);
208 PUT_MODE (&all.mult, mode);
209 PUT_MODE (&all.sdiv, mode);
210 PUT_MODE (&all.udiv, mode);
211 PUT_MODE (&all.sdiv_32, mode);
212 PUT_MODE (&all.smod_32, mode);
213 PUT_MODE (&all.wide_trunc, mode);
214 PUT_MODE (&all.shift, mode);
215 PUT_MODE (&all.shift_mult, mode);
216 PUT_MODE (&all.shift_add, mode);
217 PUT_MODE (&all.shift_sub0, mode);
218 PUT_MODE (&all.shift_sub1, mode);
219
220 add_cost[speed][mode] = set_src_cost (&all.plus, speed);
221 neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
222 mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
223 sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
224 udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
225
226 sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
227 <= 2 * add_cost[speed][mode]);
228 smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
229 <= 4 * add_cost[speed][mode]);
230
231 wider_mode = GET_MODE_WIDER_MODE (mode);
232 if (wider_mode != VOIDmode)
233 {
234 PUT_MODE (&all.zext, wider_mode);
235 PUT_MODE (&all.wide_mult, wider_mode);
236 PUT_MODE (&all.wide_lshr, wider_mode);
237 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
238
239 mul_widen_cost[speed][wider_mode]
240 = set_src_cost (&all.wide_mult, speed);
241 mul_highpart_cost[speed][mode]
242 = set_src_cost (&all.wide_trunc, speed);
243 }
244
245 shift_cost[speed][mode][0] = 0;
246 shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
247 = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
248
249 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
250 for (m = 1; m < n; m++)
251 {
252 XEXP (&all.shift, 1) = cint[m];
253 XEXP (&all.shift_mult, 1) = pow2[m];
254
255 shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
256 shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
257 speed);
258 shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
259 speed);
260 shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
261 speed);
262 }
263 }
264 }
265 if (alg_hash_used_p)
266 memset (alg_hash, 0, sizeof (alg_hash));
267 else
268 alg_hash_used_p = true;
269 default_rtl_profile ();
270 }
271
272 /* Return an rtx representing minus the value of X.
273 MODE is the intended mode of the result,
274 useful if X is a CONST_INT. */
275
276 rtx
negate_rtx(enum machine_mode mode,rtx x)277 negate_rtx (enum machine_mode mode, rtx x)
278 {
279 rtx result = simplify_unary_operation (NEG, mode, x, mode);
280
281 if (result == 0)
282 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
283
284 return result;
285 }
286
287 /* Report on the availability of insv/extv/extzv and the desired mode
288 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
289 is false; else the mode of the specified operand. If OPNO is -1,
290 all the caller cares about is whether the insn is available. */
291 enum machine_mode
mode_for_extraction(enum extraction_pattern pattern,int opno)292 mode_for_extraction (enum extraction_pattern pattern, int opno)
293 {
294 const struct insn_data_d *data;
295
296 switch (pattern)
297 {
298 case EP_insv:
299 if (HAVE_insv)
300 {
301 data = &insn_data[CODE_FOR_insv];
302 break;
303 }
304 return MAX_MACHINE_MODE;
305
306 case EP_extv:
307 if (HAVE_extv)
308 {
309 data = &insn_data[CODE_FOR_extv];
310 break;
311 }
312 return MAX_MACHINE_MODE;
313
314 case EP_extzv:
315 if (HAVE_extzv)
316 {
317 data = &insn_data[CODE_FOR_extzv];
318 break;
319 }
320 return MAX_MACHINE_MODE;
321
322 default:
323 gcc_unreachable ();
324 }
325
326 if (opno == -1)
327 return VOIDmode;
328
329 /* Everyone who uses this function used to follow it with
330 if (result == VOIDmode) result = word_mode; */
331 if (data->operand[opno].mode == VOIDmode)
332 return word_mode;
333 return data->operand[opno].mode;
334 }
335
336 /* A subroutine of store_bit_field, with the same arguments. Return true
337 if the operation could be implemented.
338
339 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
340 no other way of implementing the operation. If FALLBACK_P is false,
341 return false instead. */
342
343 static bool
store_bit_field_1(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end,enum machine_mode fieldmode,rtx value,bool fallback_p)344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
345 unsigned HOST_WIDE_INT bitnum,
346 unsigned HOST_WIDE_INT bitregion_start,
347 unsigned HOST_WIDE_INT bitregion_end,
348 enum machine_mode fieldmode,
349 rtx value, bool fallback_p)
350 {
351 unsigned int unit
352 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
353 unsigned HOST_WIDE_INT offset, bitpos;
354 rtx op0 = str_rtx;
355 int byte_offset;
356 rtx orig_value;
357
358 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
359
360 while (GET_CODE (op0) == SUBREG)
361 {
362 /* The following line once was done only if WORDS_BIG_ENDIAN,
363 but I think that is a mistake. WORDS_BIG_ENDIAN is
364 meaningful at a much higher level; when structures are copied
365 between memory and regs, the higher-numbered regs
366 always get higher addresses. */
367 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
368 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
369
370 byte_offset = 0;
371
372 /* Paradoxical subregs need special handling on big endian machines. */
373 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
374 {
375 int difference = inner_mode_size - outer_mode_size;
376
377 if (WORDS_BIG_ENDIAN)
378 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
379 if (BYTES_BIG_ENDIAN)
380 byte_offset += difference % UNITS_PER_WORD;
381 }
382 else
383 byte_offset = SUBREG_BYTE (op0);
384
385 bitnum += byte_offset * BITS_PER_UNIT;
386 op0 = SUBREG_REG (op0);
387 }
388
389 /* No action is needed if the target is a register and if the field
390 lies completely outside that register. This can occur if the source
391 code contains an out-of-bounds access to a small array. */
392 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
393 return true;
394
395 /* Use vec_set patterns for inserting parts of vectors whenever
396 available. */
397 if (VECTOR_MODE_P (GET_MODE (op0))
398 && !MEM_P (op0)
399 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
400 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
401 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
402 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
403 {
404 struct expand_operand ops[3];
405 enum machine_mode outermode = GET_MODE (op0);
406 enum machine_mode innermode = GET_MODE_INNER (outermode);
407 enum insn_code icode = optab_handler (vec_set_optab, outermode);
408 int pos = bitnum / GET_MODE_BITSIZE (innermode);
409
410 create_fixed_operand (&ops[0], op0);
411 create_input_operand (&ops[1], value, innermode);
412 create_integer_operand (&ops[2], pos);
413 if (maybe_expand_insn (icode, 3, ops))
414 return true;
415 }
416
417 /* If the target is a register, overwriting the entire object, or storing
418 a full-word or multi-word field can be done with just a SUBREG.
419
420 If the target is memory, storing any naturally aligned field can be
421 done with a simple store. For targets that support fast unaligned
422 memory, any naturally sized, unit aligned field can be done directly. */
423
424 offset = bitnum / unit;
425 bitpos = bitnum % unit;
426 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
427 + (offset * UNITS_PER_WORD);
428
429 if (bitpos == 0
430 && bitsize == GET_MODE_BITSIZE (fieldmode)
431 && (!MEM_P (op0)
432 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
433 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
434 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
435 || validate_subreg (fieldmode, GET_MODE (op0), op0,
436 byte_offset)))
437 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
438 || (offset * BITS_PER_UNIT % bitsize == 0
439 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
440 {
441 if (MEM_P (op0))
442 op0 = adjust_address (op0, fieldmode, offset);
443 else if (GET_MODE (op0) != fieldmode)
444 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
445 byte_offset);
446 emit_move_insn (op0, value);
447 return true;
448 }
449
450 /* Make sure we are playing with integral modes. Pun with subregs
451 if we aren't. This must come after the entire register case above,
452 since that case is valid for any mode. The following cases are only
453 valid for integral modes. */
454 {
455 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
456 if (imode != GET_MODE (op0))
457 {
458 if (MEM_P (op0))
459 op0 = adjust_address (op0, imode, 0);
460 else
461 {
462 gcc_assert (imode != BLKmode);
463 op0 = gen_lowpart (imode, op0);
464 }
465 }
466 }
467
468 /* We may be accessing data outside the field, which means
469 we can alias adjacent data. */
470 /* ?? not always for C++0x memory model ?? */
471 if (MEM_P (op0))
472 {
473 op0 = shallow_copy_rtx (op0);
474 set_mem_alias_set (op0, 0);
475 set_mem_expr (op0, 0);
476 }
477
478 /* If OP0 is a register, BITPOS must count within a word.
479 But as we have it, it counts within whatever size OP0 now has.
480 On a bigendian machine, these are not the same, so convert. */
481 if (BYTES_BIG_ENDIAN
482 && !MEM_P (op0)
483 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
484 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
485
486 /* Storing an lsb-aligned field in a register
487 can be done with a movestrict instruction. */
488
489 if (!MEM_P (op0)
490 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
491 && bitsize == GET_MODE_BITSIZE (fieldmode)
492 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
493 {
494 struct expand_operand ops[2];
495 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
496 rtx arg0 = op0;
497 unsigned HOST_WIDE_INT subreg_off;
498
499 if (GET_CODE (arg0) == SUBREG)
500 {
501 /* Else we've got some float mode source being extracted into
502 a different float mode destination -- this combination of
503 subregs results in Severe Tire Damage. */
504 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
505 || GET_MODE_CLASS (fieldmode) == MODE_INT
506 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
507 arg0 = SUBREG_REG (arg0);
508 }
509
510 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
511 + (offset * UNITS_PER_WORD);
512 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
513 {
514 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
515
516 create_fixed_operand (&ops[0], arg0);
517 /* Shrink the source operand to FIELDMODE. */
518 create_convert_operand_to (&ops[1], value, fieldmode, false);
519 if (maybe_expand_insn (icode, 2, ops))
520 return true;
521 }
522 }
523
524 /* Handle fields bigger than a word. */
525
526 if (bitsize > BITS_PER_WORD)
527 {
528 /* Here we transfer the words of the field
529 in the order least significant first.
530 This is because the most significant word is the one which may
531 be less than full.
532 However, only do that if the value is not BLKmode. */
533
534 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
535 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
536 unsigned int i;
537 rtx last;
538
539 /* This is the mode we must force value to, so that there will be enough
540 subwords to extract. Note that fieldmode will often (always?) be
541 VOIDmode, because that is what store_field uses to indicate that this
542 is a bit field, but passing VOIDmode to operand_subword_force
543 is not allowed. */
544 fieldmode = GET_MODE (value);
545 if (fieldmode == VOIDmode)
546 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
547
548 last = get_last_insn ();
549 for (i = 0; i < nwords; i++)
550 {
551 /* If I is 0, use the low-order word in both field and target;
552 if I is 1, use the next to lowest word; and so on. */
553 unsigned int wordnum = (backwards
554 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
555 - i - 1
556 : i);
557 unsigned int bit_offset = (backwards
558 ? MAX ((int) bitsize - ((int) i + 1)
559 * BITS_PER_WORD,
560 0)
561 : (int) i * BITS_PER_WORD);
562 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
563 unsigned HOST_WIDE_INT new_bitsize =
564 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
565
566 /* If the remaining chunk doesn't have full wordsize we have
567 to make sure that for big endian machines the higher order
568 bits are used. */
569 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
570 value_word = simplify_expand_binop (word_mode, lshr_optab,
571 value_word,
572 GEN_INT (BITS_PER_WORD
573 - new_bitsize),
574 NULL_RTX, true,
575 OPTAB_LIB_WIDEN);
576
577 if (!store_bit_field_1 (op0, new_bitsize,
578 bitnum + bit_offset,
579 bitregion_start, bitregion_end,
580 word_mode,
581 value_word, fallback_p))
582 {
583 delete_insns_since (last);
584 return false;
585 }
586 }
587 return true;
588 }
589
590 /* From here on we can assume that the field to be stored in is
591 a full-word (whatever type that is), since it is shorter than a word. */
592
593 /* OFFSET is the number of words or bytes (UNIT says which)
594 from STR_RTX to the first word or byte containing part of the field. */
595
596 if (!MEM_P (op0))
597 {
598 if (offset != 0
599 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
600 {
601 if (!REG_P (op0))
602 {
603 /* Since this is a destination (lvalue), we can't copy
604 it to a pseudo. We can remove a SUBREG that does not
605 change the size of the operand. Such a SUBREG may
606 have been added above. */
607 gcc_assert (GET_CODE (op0) == SUBREG
608 && (GET_MODE_SIZE (GET_MODE (op0))
609 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
610 op0 = SUBREG_REG (op0);
611 }
612 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
613 op0, (offset * UNITS_PER_WORD));
614 }
615 offset = 0;
616 }
617
618 /* If VALUE has a floating-point or complex mode, access it as an
619 integer of the corresponding size. This can occur on a machine
620 with 64 bit registers that uses SFmode for float. It can also
621 occur for unaligned float or complex fields. */
622 orig_value = value;
623 if (GET_MODE (value) != VOIDmode
624 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
625 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
626 {
627 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
628 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
629 }
630
631 /* Now OFFSET is nonzero only if OP0 is memory
632 and is therefore always measured in bytes. */
633
634 if (HAVE_insv
635 && GET_MODE (value) != BLKmode
636 && bitsize > 0
637 && GET_MODE_BITSIZE (op_mode) >= bitsize
638 /* Do not use insv for volatile bitfields when
639 -fstrict-volatile-bitfields is in effect. */
640 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
641 && flag_strict_volatile_bitfields > 0)
642 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
643 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
644 /* Do not use insv if the bit region is restricted and
645 op_mode integer at offset doesn't fit into the
646 restricted region. */
647 && !(MEM_P (op0) && bitregion_end
648 && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
649 > bitregion_end + 1))
650 {
651 struct expand_operand ops[4];
652 int xbitpos = bitpos;
653 rtx value1;
654 rtx xop0 = op0;
655 rtx last = get_last_insn ();
656 bool copy_back = false;
657
658 /* Add OFFSET into OP0's address. */
659 if (MEM_P (xop0))
660 xop0 = adjust_address (xop0, byte_mode, offset);
661
662 /* If xop0 is a register, we need it in OP_MODE
663 to make it acceptable to the format of insv. */
664 if (GET_CODE (xop0) == SUBREG)
665 /* We can't just change the mode, because this might clobber op0,
666 and we will need the original value of op0 if insv fails. */
667 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
668 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
669 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
670
671 /* If the destination is a paradoxical subreg such that we need a
672 truncate to the inner mode, perform the insertion on a temporary and
673 truncate the result to the original destination. Note that we can't
674 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
675 X) 0)) is (reg:N X). */
676 if (GET_CODE (xop0) == SUBREG
677 && REG_P (SUBREG_REG (xop0))
678 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
679 op_mode)))
680 {
681 rtx tem = gen_reg_rtx (op_mode);
682 emit_move_insn (tem, xop0);
683 xop0 = tem;
684 copy_back = true;
685 }
686
687 /* We have been counting XBITPOS within UNIT.
688 Count instead within the size of the register. */
689 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
690 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
691
692 unit = GET_MODE_BITSIZE (op_mode);
693
694 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
695 "backwards" from the size of the unit we are inserting into.
696 Otherwise, we count bits from the most significant on a
697 BYTES/BITS_BIG_ENDIAN machine. */
698
699 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
700 xbitpos = unit - bitsize - xbitpos;
701
702 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
703 value1 = value;
704 if (GET_MODE (value) != op_mode)
705 {
706 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
707 {
708 /* Optimization: Don't bother really extending VALUE
709 if it has all the bits we will actually use. However,
710 if we must narrow it, be sure we do it correctly. */
711
712 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
713 {
714 rtx tmp;
715
716 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
717 if (! tmp)
718 tmp = simplify_gen_subreg (op_mode,
719 force_reg (GET_MODE (value),
720 value1),
721 GET_MODE (value), 0);
722 value1 = tmp;
723 }
724 else
725 value1 = gen_lowpart (op_mode, value1);
726 }
727 else if (CONST_INT_P (value))
728 value1 = gen_int_mode (INTVAL (value), op_mode);
729 else
730 /* Parse phase is supposed to make VALUE's data type
731 match that of the component reference, which is a type
732 at least as wide as the field; so VALUE should have
733 a mode that corresponds to that type. */
734 gcc_assert (CONSTANT_P (value));
735 }
736
737 create_fixed_operand (&ops[0], xop0);
738 create_integer_operand (&ops[1], bitsize);
739 create_integer_operand (&ops[2], xbitpos);
740 create_input_operand (&ops[3], value1, op_mode);
741 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
742 {
743 if (copy_back)
744 convert_move (op0, xop0, true);
745 return true;
746 }
747 delete_insns_since (last);
748 }
749
750 /* If OP0 is a memory, try copying it to a register and seeing if a
751 cheap register alternative is available. */
752 if (HAVE_insv && MEM_P (op0))
753 {
754 enum machine_mode bestmode;
755 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
756
757 if (bitregion_end)
758 maxbits = bitregion_end - bitregion_start + 1;
759
760 /* Get the mode to use for inserting into this field. If OP0 is
761 BLKmode, get the smallest mode consistent with the alignment. If
762 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
763 mode. Otherwise, use the smallest mode containing the field. */
764
765 if (GET_MODE (op0) == BLKmode
766 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
767 || (op_mode != MAX_MACHINE_MODE
768 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
769 bestmode = get_best_mode (bitsize, bitnum,
770 bitregion_start, bitregion_end,
771 MEM_ALIGN (op0),
772 (op_mode == MAX_MACHINE_MODE
773 ? VOIDmode : op_mode),
774 MEM_VOLATILE_P (op0));
775 else
776 bestmode = GET_MODE (op0);
777
778 if (bestmode != VOIDmode
779 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
780 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
781 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
782 {
783 rtx last, tempreg, xop0;
784 unsigned HOST_WIDE_INT xoffset, xbitpos;
785
786 last = get_last_insn ();
787
788 /* Adjust address to point to the containing unit of
789 that mode. Compute the offset as a multiple of this unit,
790 counting in bytes. */
791 unit = GET_MODE_BITSIZE (bestmode);
792 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
793 xbitpos = bitnum % unit;
794 xop0 = adjust_address (op0, bestmode, xoffset);
795
796 /* Fetch that unit, store the bitfield in it, then store
797 the unit. */
798 tempreg = copy_to_reg (xop0);
799 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
800 bitregion_start, bitregion_end,
801 fieldmode, orig_value, false))
802 {
803 emit_move_insn (xop0, tempreg);
804 return true;
805 }
806 delete_insns_since (last);
807 }
808 }
809
810 if (!fallback_p)
811 return false;
812
813 store_fixed_bit_field (op0, offset, bitsize, bitpos,
814 bitregion_start, bitregion_end, value);
815 return true;
816 }
817
818 /* Generate code to store value from rtx VALUE
819 into a bit-field within structure STR_RTX
820 containing BITSIZE bits starting at bit BITNUM.
821
822 BITREGION_START is bitpos of the first bitfield in this region.
823 BITREGION_END is the bitpos of the ending bitfield in this region.
824 These two fields are 0, if the C++ memory model does not apply,
825 or we are not interested in keeping track of bitfield regions.
826
827 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
828
829 void
store_bit_field(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end,enum machine_mode fieldmode,rtx value)830 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
831 unsigned HOST_WIDE_INT bitnum,
832 unsigned HOST_WIDE_INT bitregion_start,
833 unsigned HOST_WIDE_INT bitregion_end,
834 enum machine_mode fieldmode,
835 rtx value)
836 {
837 /* Under the C++0x memory model, we must not touch bits outside the
838 bit region. Adjust the address to start at the beginning of the
839 bit region. */
840 if (MEM_P (str_rtx) && bitregion_start > 0)
841 {
842 enum machine_mode bestmode;
843 enum machine_mode op_mode;
844 unsigned HOST_WIDE_INT offset;
845
846 op_mode = mode_for_extraction (EP_insv, 3);
847 if (op_mode == MAX_MACHINE_MODE)
848 op_mode = VOIDmode;
849
850 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
851
852 offset = bitregion_start / BITS_PER_UNIT;
853 bitnum -= bitregion_start;
854 bitregion_end -= bitregion_start;
855 bitregion_start = 0;
856 bestmode = get_best_mode (bitsize, bitnum,
857 bitregion_start, bitregion_end,
858 MEM_ALIGN (str_rtx),
859 op_mode,
860 MEM_VOLATILE_P (str_rtx));
861 str_rtx = adjust_address (str_rtx, bestmode, offset);
862 }
863
864 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
865 bitregion_start, bitregion_end,
866 fieldmode, value, true))
867 gcc_unreachable ();
868 }
869
870 /* Use shifts and boolean operations to store VALUE
871 into a bit field of width BITSIZE
872 in a memory location specified by OP0 except offset by OFFSET bytes.
873 (OFFSET must be 0 if OP0 is a register.)
874 The field starts at position BITPOS within the byte.
875 (If OP0 is a register, it may be a full word or a narrower mode,
876 but BITPOS still counts within a full word,
877 which is significant on bigendian machines.) */
878
879 static void
store_fixed_bit_field(rtx op0,unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end,rtx value)880 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
881 unsigned HOST_WIDE_INT bitsize,
882 unsigned HOST_WIDE_INT bitpos,
883 unsigned HOST_WIDE_INT bitregion_start,
884 unsigned HOST_WIDE_INT bitregion_end,
885 rtx value)
886 {
887 enum machine_mode mode;
888 unsigned int total_bits = BITS_PER_WORD;
889 rtx temp;
890 int all_zero = 0;
891 int all_one = 0;
892
893 /* There is a case not handled here:
894 a structure with a known alignment of just a halfword
895 and a field split across two aligned halfwords within the structure.
896 Or likewise a structure with a known alignment of just a byte
897 and a field split across two bytes.
898 Such cases are not supposed to be able to occur. */
899
900 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
901 {
902 gcc_assert (!offset);
903 /* Special treatment for a bit field split across two registers. */
904 if (bitsize + bitpos > BITS_PER_WORD)
905 {
906 store_split_bit_field (op0, bitsize, bitpos,
907 bitregion_start, bitregion_end,
908 value);
909 return;
910 }
911 }
912 else
913 {
914 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
915
916 if (bitregion_end)
917 maxbits = bitregion_end - bitregion_start + 1;
918
919 /* Get the proper mode to use for this field. We want a mode that
920 includes the entire field. If such a mode would be larger than
921 a word, we won't be doing the extraction the normal way.
922 We don't want a mode bigger than the destination. */
923
924 mode = GET_MODE (op0);
925 if (GET_MODE_BITSIZE (mode) == 0
926 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
927 mode = word_mode;
928
929 if (MEM_VOLATILE_P (op0)
930 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
931 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
932 && flag_strict_volatile_bitfields > 0)
933 mode = GET_MODE (op0);
934 else
935 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
936 bitregion_start, bitregion_end,
937 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
938
939 if (mode == VOIDmode)
940 {
941 /* The only way this should occur is if the field spans word
942 boundaries. */
943 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
944 bitregion_start, bitregion_end, value);
945 return;
946 }
947
948 total_bits = GET_MODE_BITSIZE (mode);
949
950 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
951 be in the range 0 to total_bits-1, and put any excess bytes in
952 OFFSET. */
953 if (bitpos >= total_bits)
954 {
955 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
956 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
957 * BITS_PER_UNIT);
958 }
959
960 /* Get ref to an aligned byte, halfword, or word containing the field.
961 Adjust BITPOS to be position within a word,
962 and OFFSET to be the offset of that word.
963 Then alter OP0 to refer to that word. */
964 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
965 offset -= (offset % (total_bits / BITS_PER_UNIT));
966 op0 = adjust_address (op0, mode, offset);
967 }
968
969 mode = GET_MODE (op0);
970
971 /* Now MODE is either some integral mode for a MEM as OP0,
972 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
973 The bit field is contained entirely within OP0.
974 BITPOS is the starting bit number within OP0.
975 (OP0's mode may actually be narrower than MODE.) */
976
977 if (BYTES_BIG_ENDIAN)
978 /* BITPOS is the distance between our msb
979 and that of the containing datum.
980 Convert it to the distance from the lsb. */
981 bitpos = total_bits - bitsize - bitpos;
982
983 /* Now BITPOS is always the distance between our lsb
984 and that of OP0. */
985
986 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
987 we must first convert its mode to MODE. */
988
989 if (CONST_INT_P (value))
990 {
991 HOST_WIDE_INT v = INTVAL (value);
992
993 if (bitsize < HOST_BITS_PER_WIDE_INT)
994 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
995
996 if (v == 0)
997 all_zero = 1;
998 else if ((bitsize < HOST_BITS_PER_WIDE_INT
999 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1000 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1001 all_one = 1;
1002
1003 value = lshift_value (mode, value, bitpos, bitsize);
1004 }
1005 else
1006 {
1007 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1008 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1009
1010 if (GET_MODE (value) != mode)
1011 value = convert_to_mode (mode, value, 1);
1012
1013 if (must_and)
1014 value = expand_binop (mode, and_optab, value,
1015 mask_rtx (mode, 0, bitsize, 0),
1016 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1017 if (bitpos > 0)
1018 value = expand_shift (LSHIFT_EXPR, mode, value,
1019 bitpos, NULL_RTX, 1);
1020 }
1021
1022 /* Now clear the chosen bits in OP0,
1023 except that if VALUE is -1 we need not bother. */
1024 /* We keep the intermediates in registers to allow CSE to combine
1025 consecutive bitfield assignments. */
1026
1027 temp = force_reg (mode, op0);
1028
1029 if (! all_one)
1030 {
1031 temp = expand_binop (mode, and_optab, temp,
1032 mask_rtx (mode, bitpos, bitsize, 1),
1033 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1034 temp = force_reg (mode, temp);
1035 }
1036
1037 /* Now logical-or VALUE into OP0, unless it is zero. */
1038
1039 if (! all_zero)
1040 {
1041 temp = expand_binop (mode, ior_optab, temp, value,
1042 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1043 temp = force_reg (mode, temp);
1044 }
1045
1046 if (op0 != temp)
1047 {
1048 op0 = copy_rtx (op0);
1049 emit_move_insn (op0, temp);
1050 }
1051 }
1052
1053 /* Store a bit field that is split across multiple accessible memory objects.
1054
1055 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1056 BITSIZE is the field width; BITPOS the position of its first bit
1057 (within the word).
1058 VALUE is the value to store.
1059
1060 This does not yet handle fields wider than BITS_PER_WORD. */
1061
1062 static void
store_split_bit_field(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end,rtx value)1063 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1064 unsigned HOST_WIDE_INT bitpos,
1065 unsigned HOST_WIDE_INT bitregion_start,
1066 unsigned HOST_WIDE_INT bitregion_end,
1067 rtx value)
1068 {
1069 unsigned int unit;
1070 unsigned int bitsdone = 0;
1071
1072 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1073 much at a time. */
1074 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1075 unit = BITS_PER_WORD;
1076 else
1077 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1078
1079 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1080 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1081 that VALUE might be a floating-point constant. */
1082 if (CONSTANT_P (value) && !CONST_INT_P (value))
1083 {
1084 rtx word = gen_lowpart_common (word_mode, value);
1085
1086 if (word && (value != word))
1087 value = word;
1088 else
1089 value = gen_lowpart_common (word_mode,
1090 force_reg (GET_MODE (value) != VOIDmode
1091 ? GET_MODE (value)
1092 : word_mode, value));
1093 }
1094
1095 while (bitsdone < bitsize)
1096 {
1097 unsigned HOST_WIDE_INT thissize;
1098 rtx part, word;
1099 unsigned HOST_WIDE_INT thispos;
1100 unsigned HOST_WIDE_INT offset;
1101
1102 offset = (bitpos + bitsdone) / unit;
1103 thispos = (bitpos + bitsdone) % unit;
1104
1105 /* When region of bytes we can touch is restricted, decrease
1106 UNIT close to the end of the region as needed. */
1107 if (bitregion_end
1108 && unit > BITS_PER_UNIT
1109 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1110 {
1111 unit = unit / 2;
1112 continue;
1113 }
1114
1115 /* THISSIZE must not overrun a word boundary. Otherwise,
1116 store_fixed_bit_field will call us again, and we will mutually
1117 recurse forever. */
1118 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1119 thissize = MIN (thissize, unit - thispos);
1120
1121 if (BYTES_BIG_ENDIAN)
1122 {
1123 int total_bits;
1124
1125 /* We must do an endian conversion exactly the same way as it is
1126 done in extract_bit_field, so that the two calls to
1127 extract_fixed_bit_field will have comparable arguments. */
1128 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1129 total_bits = BITS_PER_WORD;
1130 else
1131 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1132
1133 /* Fetch successively less significant portions. */
1134 if (CONST_INT_P (value))
1135 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1136 >> (bitsize - bitsdone - thissize))
1137 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1138 else
1139 /* The args are chosen so that the last part includes the
1140 lsb. Give extract_bit_field the value it needs (with
1141 endianness compensation) to fetch the piece we want. */
1142 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1143 total_bits - bitsize + bitsdone,
1144 NULL_RTX, 1, false);
1145 }
1146 else
1147 {
1148 /* Fetch successively more significant portions. */
1149 if (CONST_INT_P (value))
1150 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1151 >> bitsdone)
1152 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1153 else
1154 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1155 bitsdone, NULL_RTX, 1, false);
1156 }
1157
1158 /* If OP0 is a register, then handle OFFSET here.
1159
1160 When handling multiword bitfields, extract_bit_field may pass
1161 down a word_mode SUBREG of a larger REG for a bitfield that actually
1162 crosses a word boundary. Thus, for a SUBREG, we must find
1163 the current word starting from the base register. */
1164 if (GET_CODE (op0) == SUBREG)
1165 {
1166 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1167 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1168 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1169 word = word_offset ? const0_rtx : op0;
1170 else
1171 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1172 GET_MODE (SUBREG_REG (op0)));
1173 offset = 0;
1174 }
1175 else if (REG_P (op0))
1176 {
1177 enum machine_mode op0_mode = GET_MODE (op0);
1178 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1179 word = offset ? const0_rtx : op0;
1180 else
1181 word = operand_subword_force (op0, offset, GET_MODE (op0));
1182 offset = 0;
1183 }
1184 else
1185 word = op0;
1186
1187 /* OFFSET is in UNITs, and UNIT is in bits.
1188 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1189 it is just an out-of-bounds access. Ignore it. */
1190 if (word != const0_rtx)
1191 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1192 thispos, bitregion_start, bitregion_end, part);
1193 bitsdone += thissize;
1194 }
1195 }
1196
1197 /* A subroutine of extract_bit_field_1 that converts return value X
1198 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1199 to extract_bit_field. */
1200
1201 static rtx
convert_extracted_bit_field(rtx x,enum machine_mode mode,enum machine_mode tmode,bool unsignedp)1202 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1203 enum machine_mode tmode, bool unsignedp)
1204 {
1205 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1206 return x;
1207
1208 /* If the x mode is not a scalar integral, first convert to the
1209 integer mode of that size and then access it as a floating-point
1210 value via a SUBREG. */
1211 if (!SCALAR_INT_MODE_P (tmode))
1212 {
1213 enum machine_mode smode;
1214
1215 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1216 x = convert_to_mode (smode, x, unsignedp);
1217 x = force_reg (smode, x);
1218 return gen_lowpart (tmode, x);
1219 }
1220
1221 return convert_to_mode (tmode, x, unsignedp);
1222 }
1223
1224 /* A subroutine of extract_bit_field, with the same arguments.
1225 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1226 if we can find no other means of implementing the operation.
1227 if FALLBACK_P is false, return NULL instead. */
1228
1229 static rtx
extract_bit_field_1(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,bool packedp,rtx target,enum machine_mode mode,enum machine_mode tmode,bool fallback_p)1230 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1231 unsigned HOST_WIDE_INT bitnum,
1232 int unsignedp, bool packedp, rtx target,
1233 enum machine_mode mode, enum machine_mode tmode,
1234 bool fallback_p)
1235 {
1236 unsigned int unit
1237 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1238 unsigned HOST_WIDE_INT offset, bitpos;
1239 rtx op0 = str_rtx;
1240 enum machine_mode int_mode;
1241 enum machine_mode ext_mode;
1242 enum machine_mode mode1;
1243 int byte_offset;
1244
1245 if (tmode == VOIDmode)
1246 tmode = mode;
1247
1248 while (GET_CODE (op0) == SUBREG)
1249 {
1250 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1251 op0 = SUBREG_REG (op0);
1252 }
1253
1254 /* If we have an out-of-bounds access to a register, just return an
1255 uninitialized register of the required mode. This can occur if the
1256 source code contains an out-of-bounds access to a small array. */
1257 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1258 return gen_reg_rtx (tmode);
1259
1260 if (REG_P (op0)
1261 && mode == GET_MODE (op0)
1262 && bitnum == 0
1263 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1264 {
1265 /* We're trying to extract a full register from itself. */
1266 return op0;
1267 }
1268
1269 /* See if we can get a better vector mode before extracting. */
1270 if (VECTOR_MODE_P (GET_MODE (op0))
1271 && !MEM_P (op0)
1272 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1273 {
1274 enum machine_mode new_mode;
1275
1276 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1277 new_mode = MIN_MODE_VECTOR_FLOAT;
1278 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1279 new_mode = MIN_MODE_VECTOR_FRACT;
1280 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1281 new_mode = MIN_MODE_VECTOR_UFRACT;
1282 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1283 new_mode = MIN_MODE_VECTOR_ACCUM;
1284 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1285 new_mode = MIN_MODE_VECTOR_UACCUM;
1286 else
1287 new_mode = MIN_MODE_VECTOR_INT;
1288
1289 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1290 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1291 && targetm.vector_mode_supported_p (new_mode))
1292 break;
1293 if (new_mode != VOIDmode)
1294 op0 = gen_lowpart (new_mode, op0);
1295 }
1296
1297 /* Use vec_extract patterns for extracting parts of vectors whenever
1298 available. */
1299 if (VECTOR_MODE_P (GET_MODE (op0))
1300 && !MEM_P (op0)
1301 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1302 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1303 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1304 {
1305 struct expand_operand ops[3];
1306 enum machine_mode outermode = GET_MODE (op0);
1307 enum machine_mode innermode = GET_MODE_INNER (outermode);
1308 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1309 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1310
1311 create_output_operand (&ops[0], target, innermode);
1312 create_input_operand (&ops[1], op0, outermode);
1313 create_integer_operand (&ops[2], pos);
1314 if (maybe_expand_insn (icode, 3, ops))
1315 {
1316 target = ops[0].value;
1317 if (GET_MODE (target) != mode)
1318 return gen_lowpart (tmode, target);
1319 return target;
1320 }
1321 }
1322
1323 /* Make sure we are playing with integral modes. Pun with subregs
1324 if we aren't. */
1325 {
1326 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1327 if (imode != GET_MODE (op0))
1328 {
1329 if (MEM_P (op0))
1330 op0 = adjust_address (op0, imode, 0);
1331 else if (imode != BLKmode)
1332 {
1333 op0 = gen_lowpart (imode, op0);
1334
1335 /* If we got a SUBREG, force it into a register since we
1336 aren't going to be able to do another SUBREG on it. */
1337 if (GET_CODE (op0) == SUBREG)
1338 op0 = force_reg (imode, op0);
1339 }
1340 else if (REG_P (op0))
1341 {
1342 rtx reg, subreg;
1343 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1344 MODE_INT);
1345 reg = gen_reg_rtx (imode);
1346 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1347 emit_move_insn (subreg, op0);
1348 op0 = reg;
1349 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1350 }
1351 else
1352 {
1353 rtx mem = assign_stack_temp (GET_MODE (op0),
1354 GET_MODE_SIZE (GET_MODE (op0)), 0);
1355 emit_move_insn (mem, op0);
1356 op0 = adjust_address (mem, BLKmode, 0);
1357 }
1358 }
1359 }
1360
1361 /* We may be accessing data outside the field, which means
1362 we can alias adjacent data. */
1363 if (MEM_P (op0))
1364 {
1365 op0 = shallow_copy_rtx (op0);
1366 set_mem_alias_set (op0, 0);
1367 set_mem_expr (op0, 0);
1368 }
1369
1370 /* Extraction of a full-word or multi-word value from a structure
1371 in a register or aligned memory can be done with just a SUBREG.
1372 A subword value in the least significant part of a register
1373 can also be extracted with a SUBREG. For this, we need the
1374 byte offset of the value in op0. */
1375
1376 bitpos = bitnum % unit;
1377 offset = bitnum / unit;
1378 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1379
1380 /* If OP0 is a register, BITPOS must count within a word.
1381 But as we have it, it counts within whatever size OP0 now has.
1382 On a bigendian machine, these are not the same, so convert. */
1383 if (BYTES_BIG_ENDIAN
1384 && !MEM_P (op0)
1385 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1386 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1387
1388 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1389 If that's wrong, the solution is to test for it and set TARGET to 0
1390 if needed. */
1391
1392 /* Only scalar integer modes can be converted via subregs. There is an
1393 additional problem for FP modes here in that they can have a precision
1394 which is different from the size. mode_for_size uses precision, but
1395 we want a mode based on the size, so we must avoid calling it for FP
1396 modes. */
1397 mode1 = (SCALAR_INT_MODE_P (tmode)
1398 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1399 : mode);
1400
1401 /* If the bitfield is volatile, we need to make sure the access
1402 remains on a type-aligned boundary. */
1403 if (GET_CODE (op0) == MEM
1404 && MEM_VOLATILE_P (op0)
1405 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1406 && flag_strict_volatile_bitfields > 0)
1407 goto no_subreg_mode_swap;
1408
1409 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1410 && bitpos % BITS_PER_WORD == 0)
1411 || (mode1 != BLKmode
1412 /* ??? The big endian test here is wrong. This is correct
1413 if the value is in a register, and if mode_for_size is not
1414 the same mode as op0. This causes us to get unnecessarily
1415 inefficient code from the Thumb port when -mbig-endian. */
1416 && (BYTES_BIG_ENDIAN
1417 ? bitpos + bitsize == BITS_PER_WORD
1418 : bitpos == 0)))
1419 && ((!MEM_P (op0)
1420 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1421 && GET_MODE_SIZE (mode1) != 0
1422 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1423 || (MEM_P (op0)
1424 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1425 || (offset * BITS_PER_UNIT % bitsize == 0
1426 && MEM_ALIGN (op0) % bitsize == 0)))))
1427 {
1428 if (MEM_P (op0))
1429 op0 = adjust_address (op0, mode1, offset);
1430 else if (mode1 != GET_MODE (op0))
1431 {
1432 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1433 byte_offset);
1434 if (sub == NULL)
1435 goto no_subreg_mode_swap;
1436 op0 = sub;
1437 }
1438 if (mode1 != mode)
1439 return convert_to_mode (tmode, op0, unsignedp);
1440 return op0;
1441 }
1442 no_subreg_mode_swap:
1443
1444 /* Handle fields bigger than a word. */
1445
1446 if (bitsize > BITS_PER_WORD)
1447 {
1448 /* Here we transfer the words of the field
1449 in the order least significant first.
1450 This is because the most significant word is the one which may
1451 be less than full. */
1452
1453 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1454 unsigned int i;
1455
1456 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1457 target = gen_reg_rtx (mode);
1458
1459 /* Indicate for flow that the entire target reg is being set. */
1460 emit_clobber (target);
1461
1462 for (i = 0; i < nwords; i++)
1463 {
1464 /* If I is 0, use the low-order word in both field and target;
1465 if I is 1, use the next to lowest word; and so on. */
1466 /* Word number in TARGET to use. */
1467 unsigned int wordnum
1468 = (WORDS_BIG_ENDIAN
1469 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1470 : i);
1471 /* Offset from start of field in OP0. */
1472 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1473 ? MAX (0, ((int) bitsize - ((int) i + 1)
1474 * (int) BITS_PER_WORD))
1475 : (int) i * BITS_PER_WORD);
1476 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1477 rtx result_part
1478 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1479 bitsize - i * BITS_PER_WORD),
1480 bitnum + bit_offset, 1, false, target_part, mode,
1481 word_mode);
1482
1483 gcc_assert (target_part);
1484
1485 if (result_part != target_part)
1486 emit_move_insn (target_part, result_part);
1487 }
1488
1489 if (unsignedp)
1490 {
1491 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1492 need to be zero'd out. */
1493 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1494 {
1495 unsigned int i, total_words;
1496
1497 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1498 for (i = nwords; i < total_words; i++)
1499 emit_move_insn
1500 (operand_subword (target,
1501 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1502 1, VOIDmode),
1503 const0_rtx);
1504 }
1505 return target;
1506 }
1507
1508 /* Signed bit field: sign-extend with two arithmetic shifts. */
1509 target = expand_shift (LSHIFT_EXPR, mode, target,
1510 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1511 return expand_shift (RSHIFT_EXPR, mode, target,
1512 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1513 }
1514
1515 /* From here on we know the desired field is smaller than a word. */
1516
1517 /* Check if there is a correspondingly-sized integer field, so we can
1518 safely extract it as one size of integer, if necessary; then
1519 truncate or extend to the size that is wanted; then use SUBREGs or
1520 convert_to_mode to get one of the modes we really wanted. */
1521
1522 int_mode = int_mode_for_mode (tmode);
1523 if (int_mode == BLKmode)
1524 int_mode = int_mode_for_mode (mode);
1525 /* Should probably push op0 out to memory and then do a load. */
1526 gcc_assert (int_mode != BLKmode);
1527
1528 /* OFFSET is the number of words or bytes (UNIT says which)
1529 from STR_RTX to the first word or byte containing part of the field. */
1530 if (!MEM_P (op0))
1531 {
1532 if (offset != 0
1533 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1534 {
1535 if (!REG_P (op0))
1536 op0 = copy_to_reg (op0);
1537 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1538 op0, (offset * UNITS_PER_WORD));
1539 }
1540 offset = 0;
1541 }
1542
1543 /* Now OFFSET is nonzero only for memory operands. */
1544 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1545 if (ext_mode != MAX_MACHINE_MODE
1546 && bitsize > 0
1547 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1548 /* Do not use extv/extzv for volatile bitfields when
1549 -fstrict-volatile-bitfields is in effect. */
1550 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1551 && flag_strict_volatile_bitfields > 0)
1552 /* If op0 is a register, we need it in EXT_MODE to make it
1553 acceptable to the format of ext(z)v. */
1554 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1555 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1556 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1557 {
1558 struct expand_operand ops[4];
1559 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1560 rtx xop0 = op0;
1561 rtx xtarget = target;
1562 rtx xspec_target = target;
1563 rtx xspec_target_subreg = 0;
1564
1565 /* If op0 is a register, we need it in EXT_MODE to make it
1566 acceptable to the format of ext(z)v. */
1567 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1568 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1569 if (MEM_P (xop0))
1570 /* Get ref to first byte containing part of the field. */
1571 xop0 = adjust_address (xop0, byte_mode, xoffset);
1572
1573 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1574 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1575 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1576
1577 unit = GET_MODE_BITSIZE (ext_mode);
1578
1579 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1580 "backwards" from the size of the unit we are extracting from.
1581 Otherwise, we count bits from the most significant on a
1582 BYTES/BITS_BIG_ENDIAN machine. */
1583
1584 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1585 xbitpos = unit - bitsize - xbitpos;
1586
1587 if (xtarget == 0)
1588 xtarget = xspec_target = gen_reg_rtx (tmode);
1589
1590 if (GET_MODE (xtarget) != ext_mode)
1591 {
1592 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1593 between the mode of the extraction (word_mode) and the target
1594 mode. Instead, create a temporary and use convert_move to set
1595 the target. */
1596 if (REG_P (xtarget)
1597 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1598 {
1599 xtarget = gen_lowpart (ext_mode, xtarget);
1600 if (GET_MODE_PRECISION (ext_mode)
1601 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1602 xspec_target_subreg = xtarget;
1603 }
1604 else
1605 xtarget = gen_reg_rtx (ext_mode);
1606 }
1607
1608 create_output_operand (&ops[0], xtarget, ext_mode);
1609 create_fixed_operand (&ops[1], xop0);
1610 create_integer_operand (&ops[2], bitsize);
1611 create_integer_operand (&ops[3], xbitpos);
1612 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1613 4, ops))
1614 {
1615 xtarget = ops[0].value;
1616 if (xtarget == xspec_target)
1617 return xtarget;
1618 if (xtarget == xspec_target_subreg)
1619 return xspec_target;
1620 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1621 }
1622 }
1623
1624 /* If OP0 is a memory, try copying it to a register and seeing if a
1625 cheap register alternative is available. */
1626 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1627 {
1628 enum machine_mode bestmode;
1629
1630 /* Get the mode to use for inserting into this field. If
1631 OP0 is BLKmode, get the smallest mode consistent with the
1632 alignment. If OP0 is a non-BLKmode object that is no
1633 wider than EXT_MODE, use its mode. Otherwise, use the
1634 smallest mode containing the field. */
1635
1636 if (GET_MODE (op0) == BLKmode
1637 || (ext_mode != MAX_MACHINE_MODE
1638 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1639 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1640 (ext_mode == MAX_MACHINE_MODE
1641 ? VOIDmode : ext_mode),
1642 MEM_VOLATILE_P (op0));
1643 else
1644 bestmode = GET_MODE (op0);
1645
1646 if (bestmode != VOIDmode
1647 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1648 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1649 {
1650 unsigned HOST_WIDE_INT xoffset, xbitpos;
1651
1652 /* Compute the offset as a multiple of this unit,
1653 counting in bytes. */
1654 unit = GET_MODE_BITSIZE (bestmode);
1655 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1656 xbitpos = bitnum % unit;
1657
1658 /* Make sure the register is big enough for the whole field. */
1659 if (xoffset * BITS_PER_UNIT + unit
1660 >= offset * BITS_PER_UNIT + bitsize)
1661 {
1662 rtx last, result, xop0;
1663
1664 last = get_last_insn ();
1665
1666 /* Fetch it to a register in that size. */
1667 xop0 = adjust_address (op0, bestmode, xoffset);
1668 xop0 = force_reg (bestmode, xop0);
1669 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1670 unsignedp, packedp, target,
1671 mode, tmode, false);
1672 if (result)
1673 return result;
1674
1675 delete_insns_since (last);
1676 }
1677 }
1678 }
1679
1680 if (!fallback_p)
1681 return NULL;
1682
1683 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1684 bitpos, target, unsignedp, packedp);
1685 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1686 }
1687
1688 /* Generate code to extract a byte-field from STR_RTX
1689 containing BITSIZE bits, starting at BITNUM,
1690 and put it in TARGET if possible (if TARGET is nonzero).
1691 Regardless of TARGET, we return the rtx for where the value is placed.
1692
1693 STR_RTX is the structure containing the byte (a REG or MEM).
1694 UNSIGNEDP is nonzero if this is an unsigned bit field.
1695 PACKEDP is nonzero if the field has the packed attribute.
1696 MODE is the natural mode of the field value once extracted.
1697 TMODE is the mode the caller would like the value to have;
1698 but the value may be returned with type MODE instead.
1699
1700 If a TARGET is specified and we can store in it at no extra cost,
1701 we do so, and return TARGET.
1702 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1703 if they are equally easy. */
1704
1705 rtx
extract_bit_field(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,bool packedp,rtx target,enum machine_mode mode,enum machine_mode tmode)1706 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1707 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1708 rtx target, enum machine_mode mode, enum machine_mode tmode)
1709 {
1710 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1711 target, mode, tmode, true);
1712 }
1713
1714 /* Extract a bit field using shifts and boolean operations
1715 Returns an rtx to represent the value.
1716 OP0 addresses a register (word) or memory (byte).
1717 BITPOS says which bit within the word or byte the bit field starts in.
1718 OFFSET says how many bytes farther the bit field starts;
1719 it is 0 if OP0 is a register.
1720 BITSIZE says how many bits long the bit field is.
1721 (If OP0 is a register, it may be narrower than a full word,
1722 but BITPOS still counts within a full word,
1723 which is significant on bigendian machines.)
1724
1725 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1726 PACKEDP is true if the field has the packed attribute.
1727
1728 If TARGET is nonzero, attempts to store the value there
1729 and return TARGET, but this is not guaranteed.
1730 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1731
1732 static rtx
extract_fixed_bit_field(enum machine_mode tmode,rtx op0,unsigned HOST_WIDE_INT offset,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,rtx target,int unsignedp,bool packedp)1733 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1734 unsigned HOST_WIDE_INT offset,
1735 unsigned HOST_WIDE_INT bitsize,
1736 unsigned HOST_WIDE_INT bitpos, rtx target,
1737 int unsignedp, bool packedp)
1738 {
1739 unsigned int total_bits = BITS_PER_WORD;
1740 enum machine_mode mode;
1741
1742 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1743 {
1744 /* Special treatment for a bit field split across two registers. */
1745 if (bitsize + bitpos > BITS_PER_WORD)
1746 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1747 }
1748 else
1749 {
1750 /* Get the proper mode to use for this field. We want a mode that
1751 includes the entire field. If such a mode would be larger than
1752 a word, we won't be doing the extraction the normal way. */
1753
1754 if (MEM_VOLATILE_P (op0)
1755 && flag_strict_volatile_bitfields > 0)
1756 {
1757 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1758 mode = GET_MODE (op0);
1759 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1760 mode = GET_MODE (target);
1761 else
1762 mode = tmode;
1763 }
1764 else
1765 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1766 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1767
1768 if (mode == VOIDmode)
1769 /* The only way this should occur is if the field spans word
1770 boundaries. */
1771 return extract_split_bit_field (op0, bitsize,
1772 bitpos + offset * BITS_PER_UNIT,
1773 unsignedp);
1774
1775 total_bits = GET_MODE_BITSIZE (mode);
1776
1777 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1778 be in the range 0 to total_bits-1, and put any excess bytes in
1779 OFFSET. */
1780 if (bitpos >= total_bits)
1781 {
1782 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1783 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1784 * BITS_PER_UNIT);
1785 }
1786
1787 /* If we're accessing a volatile MEM, we can't do the next
1788 alignment step if it results in a multi-word access where we
1789 otherwise wouldn't have one. So, check for that case
1790 here. */
1791 if (MEM_P (op0)
1792 && MEM_VOLATILE_P (op0)
1793 && flag_strict_volatile_bitfields > 0
1794 && bitpos + bitsize <= total_bits
1795 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1796 {
1797 if (STRICT_ALIGNMENT)
1798 {
1799 static bool informed_about_misalignment = false;
1800 bool warned;
1801
1802 if (packedp)
1803 {
1804 if (bitsize == total_bits)
1805 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1806 "multiple accesses to volatile structure member"
1807 " because of packed attribute");
1808 else
1809 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1810 "multiple accesses to volatile structure bitfield"
1811 " because of packed attribute");
1812
1813 return extract_split_bit_field (op0, bitsize,
1814 bitpos + offset * BITS_PER_UNIT,
1815 unsignedp);
1816 }
1817
1818 if (bitsize == total_bits)
1819 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1820 "mis-aligned access used for structure member");
1821 else
1822 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1823 "mis-aligned access used for structure bitfield");
1824
1825 if (! informed_about_misalignment && warned)
1826 {
1827 informed_about_misalignment = true;
1828 inform (input_location,
1829 "when a volatile object spans multiple type-sized locations,"
1830 " the compiler must choose between using a single mis-aligned access to"
1831 " preserve the volatility, or using multiple aligned accesses to avoid"
1832 " runtime faults; this code may fail at runtime if the hardware does"
1833 " not allow this access");
1834 }
1835 }
1836 }
1837 else
1838 {
1839
1840 /* Get ref to an aligned byte, halfword, or word containing the field.
1841 Adjust BITPOS to be position within a word,
1842 and OFFSET to be the offset of that word.
1843 Then alter OP0 to refer to that word. */
1844 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1845 offset -= (offset % (total_bits / BITS_PER_UNIT));
1846 }
1847
1848 op0 = adjust_address (op0, mode, offset);
1849 }
1850
1851 mode = GET_MODE (op0);
1852
1853 if (BYTES_BIG_ENDIAN)
1854 /* BITPOS is the distance between our msb and that of OP0.
1855 Convert it to the distance from the lsb. */
1856 bitpos = total_bits - bitsize - bitpos;
1857
1858 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1859 We have reduced the big-endian case to the little-endian case. */
1860
1861 if (unsignedp)
1862 {
1863 if (bitpos)
1864 {
1865 /* If the field does not already start at the lsb,
1866 shift it so it does. */
1867 /* Maybe propagate the target for the shift. */
1868 /* But not if we will return it--could confuse integrate.c. */
1869 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1870 if (tmode != mode) subtarget = 0;
1871 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1872 }
1873 /* Convert the value to the desired mode. */
1874 if (mode != tmode)
1875 op0 = convert_to_mode (tmode, op0, 1);
1876
1877 /* Unless the msb of the field used to be the msb when we shifted,
1878 mask out the upper bits. */
1879
1880 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1881 return expand_binop (GET_MODE (op0), and_optab, op0,
1882 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1883 target, 1, OPTAB_LIB_WIDEN);
1884 return op0;
1885 }
1886
1887 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1888 then arithmetic-shift its lsb to the lsb of the word. */
1889 op0 = force_reg (mode, op0);
1890
1891 /* Find the narrowest integer mode that contains the field. */
1892
1893 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1894 mode = GET_MODE_WIDER_MODE (mode))
1895 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1896 {
1897 op0 = convert_to_mode (mode, op0, 0);
1898 break;
1899 }
1900
1901 if (mode != tmode)
1902 target = 0;
1903
1904 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1905 {
1906 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1907 /* Maybe propagate the target for the shift. */
1908 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1909 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1910 }
1911
1912 return expand_shift (RSHIFT_EXPR, mode, op0,
1913 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1914 }
1915
1916 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1917 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1918 complement of that if COMPLEMENT. The mask is truncated if
1919 necessary to the width of mode MODE. The mask is zero-extended if
1920 BITSIZE+BITPOS is too small for MODE. */
1921
1922 static rtx
mask_rtx(enum machine_mode mode,int bitpos,int bitsize,int complement)1923 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1924 {
1925 double_int mask;
1926
1927 mask = double_int_mask (bitsize);
1928 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1929
1930 if (complement)
1931 mask = double_int_not (mask);
1932
1933 return immed_double_int_const (mask, mode);
1934 }
1935
1936 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1937 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1938
1939 static rtx
lshift_value(enum machine_mode mode,rtx value,int bitpos,int bitsize)1940 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1941 {
1942 double_int val;
1943
1944 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1945 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1946
1947 return immed_double_int_const (val, mode);
1948 }
1949
1950 /* Extract a bit field that is split across two words
1951 and return an RTX for the result.
1952
1953 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1954 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1955 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1956
1957 static rtx
extract_split_bit_field(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,int unsignedp)1958 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1959 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1960 {
1961 unsigned int unit;
1962 unsigned int bitsdone = 0;
1963 rtx result = NULL_RTX;
1964 int first = 1;
1965
1966 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1967 much at a time. */
1968 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1969 unit = BITS_PER_WORD;
1970 else
1971 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1972
1973 while (bitsdone < bitsize)
1974 {
1975 unsigned HOST_WIDE_INT thissize;
1976 rtx part, word;
1977 unsigned HOST_WIDE_INT thispos;
1978 unsigned HOST_WIDE_INT offset;
1979
1980 offset = (bitpos + bitsdone) / unit;
1981 thispos = (bitpos + bitsdone) % unit;
1982
1983 /* THISSIZE must not overrun a word boundary. Otherwise,
1984 extract_fixed_bit_field will call us again, and we will mutually
1985 recurse forever. */
1986 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1987 thissize = MIN (thissize, unit - thispos);
1988
1989 /* If OP0 is a register, then handle OFFSET here.
1990
1991 When handling multiword bitfields, extract_bit_field may pass
1992 down a word_mode SUBREG of a larger REG for a bitfield that actually
1993 crosses a word boundary. Thus, for a SUBREG, we must find
1994 the current word starting from the base register. */
1995 if (GET_CODE (op0) == SUBREG)
1996 {
1997 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1998 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1999 GET_MODE (SUBREG_REG (op0)));
2000 offset = 0;
2001 }
2002 else if (REG_P (op0))
2003 {
2004 word = operand_subword_force (op0, offset, GET_MODE (op0));
2005 offset = 0;
2006 }
2007 else
2008 word = op0;
2009
2010 /* Extract the parts in bit-counting order,
2011 whose meaning is determined by BYTES_PER_UNIT.
2012 OFFSET is in UNITs, and UNIT is in bits.
2013 extract_fixed_bit_field wants offset in bytes. */
2014 part = extract_fixed_bit_field (word_mode, word,
2015 offset * unit / BITS_PER_UNIT,
2016 thissize, thispos, 0, 1, false);
2017 bitsdone += thissize;
2018
2019 /* Shift this part into place for the result. */
2020 if (BYTES_BIG_ENDIAN)
2021 {
2022 if (bitsize != bitsdone)
2023 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2024 bitsize - bitsdone, 0, 1);
2025 }
2026 else
2027 {
2028 if (bitsdone != thissize)
2029 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2030 bitsdone - thissize, 0, 1);
2031 }
2032
2033 if (first)
2034 result = part;
2035 else
2036 /* Combine the parts with bitwise or. This works
2037 because we extracted each part as an unsigned bit field. */
2038 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2039 OPTAB_LIB_WIDEN);
2040
2041 first = 0;
2042 }
2043
2044 /* Unsigned bit field: we are done. */
2045 if (unsignedp)
2046 return result;
2047 /* Signed bit field: sign-extend with two arithmetic shifts. */
2048 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2049 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2050 return expand_shift (RSHIFT_EXPR, word_mode, result,
2051 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2052 }
2053
2054 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2055 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2056 MODE, fill the upper bits with zeros. Fail if the layout of either
2057 mode is unknown (as for CC modes) or if the extraction would involve
2058 unprofitable mode punning. Return the value on success, otherwise
2059 return null.
2060
2061 This is different from gen_lowpart* in these respects:
2062
2063 - the returned value must always be considered an rvalue
2064
2065 - when MODE is wider than SRC_MODE, the extraction involves
2066 a zero extension
2067
2068 - when MODE is smaller than SRC_MODE, the extraction involves
2069 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2070
2071 In other words, this routine performs a computation, whereas the
2072 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2073 operations. */
2074
2075 rtx
extract_low_bits(enum machine_mode mode,enum machine_mode src_mode,rtx src)2076 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2077 {
2078 enum machine_mode int_mode, src_int_mode;
2079
2080 if (mode == src_mode)
2081 return src;
2082
2083 if (CONSTANT_P (src))
2084 {
2085 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2086 fails, it will happily create (subreg (symbol_ref)) or similar
2087 invalid SUBREGs. */
2088 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2089 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2090 if (ret)
2091 return ret;
2092
2093 if (GET_MODE (src) == VOIDmode
2094 || !validate_subreg (mode, src_mode, src, byte))
2095 return NULL_RTX;
2096
2097 src = force_reg (GET_MODE (src), src);
2098 return gen_rtx_SUBREG (mode, src, byte);
2099 }
2100
2101 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2102 return NULL_RTX;
2103
2104 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2105 && MODES_TIEABLE_P (mode, src_mode))
2106 {
2107 rtx x = gen_lowpart_common (mode, src);
2108 if (x)
2109 return x;
2110 }
2111
2112 src_int_mode = int_mode_for_mode (src_mode);
2113 int_mode = int_mode_for_mode (mode);
2114 if (src_int_mode == BLKmode || int_mode == BLKmode)
2115 return NULL_RTX;
2116
2117 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2118 return NULL_RTX;
2119 if (!MODES_TIEABLE_P (int_mode, mode))
2120 return NULL_RTX;
2121
2122 src = gen_lowpart (src_int_mode, src);
2123 src = convert_modes (int_mode, src_int_mode, src, true);
2124 src = gen_lowpart (mode, src);
2125 return src;
2126 }
2127
2128 /* Add INC into TARGET. */
2129
2130 void
expand_inc(rtx target,rtx inc)2131 expand_inc (rtx target, rtx inc)
2132 {
2133 rtx value = expand_binop (GET_MODE (target), add_optab,
2134 target, inc,
2135 target, 0, OPTAB_LIB_WIDEN);
2136 if (value != target)
2137 emit_move_insn (target, value);
2138 }
2139
2140 /* Subtract DEC from TARGET. */
2141
2142 void
expand_dec(rtx target,rtx dec)2143 expand_dec (rtx target, rtx dec)
2144 {
2145 rtx value = expand_binop (GET_MODE (target), sub_optab,
2146 target, dec,
2147 target, 0, OPTAB_LIB_WIDEN);
2148 if (value != target)
2149 emit_move_insn (target, value);
2150 }
2151
2152 /* Output a shift instruction for expression code CODE,
2153 with SHIFTED being the rtx for the value to shift,
2154 and AMOUNT the rtx for the amount to shift by.
2155 Store the result in the rtx TARGET, if that is convenient.
2156 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2157 Return the rtx for where the value is. */
2158
2159 static rtx
expand_shift_1(enum tree_code code,enum machine_mode mode,rtx shifted,rtx amount,rtx target,int unsignedp)2160 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2161 rtx amount, rtx target, int unsignedp)
2162 {
2163 rtx op1, temp = 0;
2164 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2165 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2166 optab lshift_optab = ashl_optab;
2167 optab rshift_arith_optab = ashr_optab;
2168 optab rshift_uns_optab = lshr_optab;
2169 optab lrotate_optab = rotl_optab;
2170 optab rrotate_optab = rotr_optab;
2171 enum machine_mode op1_mode;
2172 int attempt;
2173 bool speed = optimize_insn_for_speed_p ();
2174
2175 op1 = amount;
2176 op1_mode = GET_MODE (op1);
2177
2178 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2179 shift amount is a vector, use the vector/vector shift patterns. */
2180 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2181 {
2182 lshift_optab = vashl_optab;
2183 rshift_arith_optab = vashr_optab;
2184 rshift_uns_optab = vlshr_optab;
2185 lrotate_optab = vrotl_optab;
2186 rrotate_optab = vrotr_optab;
2187 }
2188
2189 /* Previously detected shift-counts computed by NEGATE_EXPR
2190 and shifted in the other direction; but that does not work
2191 on all machines. */
2192
2193 if (SHIFT_COUNT_TRUNCATED)
2194 {
2195 if (CONST_INT_P (op1)
2196 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2197 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2198 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2199 % GET_MODE_BITSIZE (mode));
2200 else if (GET_CODE (op1) == SUBREG
2201 && subreg_lowpart_p (op1)
2202 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2203 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2204 op1 = SUBREG_REG (op1);
2205 }
2206
2207 if (op1 == const0_rtx)
2208 return shifted;
2209
2210 /* Check whether its cheaper to implement a left shift by a constant
2211 bit count by a sequence of additions. */
2212 if (code == LSHIFT_EXPR
2213 && CONST_INT_P (op1)
2214 && INTVAL (op1) > 0
2215 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2216 && INTVAL (op1) < MAX_BITS_PER_WORD
2217 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2218 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2219 {
2220 int i;
2221 for (i = 0; i < INTVAL (op1); i++)
2222 {
2223 temp = force_reg (mode, shifted);
2224 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2225 unsignedp, OPTAB_LIB_WIDEN);
2226 }
2227 return shifted;
2228 }
2229
2230 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2231 {
2232 enum optab_methods methods;
2233
2234 if (attempt == 0)
2235 methods = OPTAB_DIRECT;
2236 else if (attempt == 1)
2237 methods = OPTAB_WIDEN;
2238 else
2239 methods = OPTAB_LIB_WIDEN;
2240
2241 if (rotate)
2242 {
2243 /* Widening does not work for rotation. */
2244 if (methods == OPTAB_WIDEN)
2245 continue;
2246 else if (methods == OPTAB_LIB_WIDEN)
2247 {
2248 /* If we have been unable to open-code this by a rotation,
2249 do it as the IOR of two shifts. I.e., to rotate A
2250 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2251 where C is the bitsize of A.
2252
2253 It is theoretically possible that the target machine might
2254 not be able to perform either shift and hence we would
2255 be making two libcalls rather than just the one for the
2256 shift (similarly if IOR could not be done). We will allow
2257 this extremely unlikely lossage to avoid complicating the
2258 code below. */
2259
2260 rtx subtarget = target == shifted ? 0 : target;
2261 rtx new_amount, other_amount;
2262 rtx temp1;
2263
2264 new_amount = op1;
2265 if (CONST_INT_P (op1))
2266 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2267 - INTVAL (op1));
2268 else
2269 other_amount
2270 = simplify_gen_binary (MINUS, GET_MODE (op1),
2271 GEN_INT (GET_MODE_PRECISION (mode)),
2272 op1);
2273
2274 shifted = force_reg (mode, shifted);
2275
2276 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2277 mode, shifted, new_amount, 0, 1);
2278 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2279 mode, shifted, other_amount,
2280 subtarget, 1);
2281 return expand_binop (mode, ior_optab, temp, temp1, target,
2282 unsignedp, methods);
2283 }
2284
2285 temp = expand_binop (mode,
2286 left ? lrotate_optab : rrotate_optab,
2287 shifted, op1, target, unsignedp, methods);
2288 }
2289 else if (unsignedp)
2290 temp = expand_binop (mode,
2291 left ? lshift_optab : rshift_uns_optab,
2292 shifted, op1, target, unsignedp, methods);
2293
2294 /* Do arithmetic shifts.
2295 Also, if we are going to widen the operand, we can just as well
2296 use an arithmetic right-shift instead of a logical one. */
2297 if (temp == 0 && ! rotate
2298 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2299 {
2300 enum optab_methods methods1 = methods;
2301
2302 /* If trying to widen a log shift to an arithmetic shift,
2303 don't accept an arithmetic shift of the same size. */
2304 if (unsignedp)
2305 methods1 = OPTAB_MUST_WIDEN;
2306
2307 /* Arithmetic shift */
2308
2309 temp = expand_binop (mode,
2310 left ? lshift_optab : rshift_arith_optab,
2311 shifted, op1, target, unsignedp, methods1);
2312 }
2313
2314 /* We used to try extzv here for logical right shifts, but that was
2315 only useful for one machine, the VAX, and caused poor code
2316 generation there for lshrdi3, so the code was deleted and a
2317 define_expand for lshrsi3 was added to vax.md. */
2318 }
2319
2320 gcc_assert (temp);
2321 return temp;
2322 }
2323
2324 /* Output a shift instruction for expression code CODE,
2325 with SHIFTED being the rtx for the value to shift,
2326 and AMOUNT the amount to shift by.
2327 Store the result in the rtx TARGET, if that is convenient.
2328 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2329 Return the rtx for where the value is. */
2330
2331 rtx
expand_shift(enum tree_code code,enum machine_mode mode,rtx shifted,int amount,rtx target,int unsignedp)2332 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2333 int amount, rtx target, int unsignedp)
2334 {
2335 return expand_shift_1 (code, mode,
2336 shifted, GEN_INT (amount), target, unsignedp);
2337 }
2338
2339 /* Output a shift instruction for expression code CODE,
2340 with SHIFTED being the rtx for the value to shift,
2341 and AMOUNT the tree for the amount to shift by.
2342 Store the result in the rtx TARGET, if that is convenient.
2343 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2344 Return the rtx for where the value is. */
2345
2346 rtx
expand_variable_shift(enum tree_code code,enum machine_mode mode,rtx shifted,tree amount,rtx target,int unsignedp)2347 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2348 tree amount, rtx target, int unsignedp)
2349 {
2350 return expand_shift_1 (code, mode,
2351 shifted, expand_normal (amount), target, unsignedp);
2352 }
2353
2354
2355 /* Indicates the type of fixup needed after a constant multiplication.
2356 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2357 the result should be negated, and ADD_VARIANT means that the
2358 multiplicand should be added to the result. */
2359 enum mult_variant {basic_variant, negate_variant, add_variant};
2360
2361 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2362 const struct mult_cost *, enum machine_mode mode);
2363 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2364 struct algorithm *, enum mult_variant *, int);
2365 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2366 const struct algorithm *, enum mult_variant);
2367 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2368 int, rtx *, int *, int *);
2369 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2370 static rtx extract_high_half (enum machine_mode, rtx);
2371 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2372 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2373 int, int);
2374 /* Compute and return the best algorithm for multiplying by T.
2375 The algorithm must cost less than cost_limit
2376 If retval.cost >= COST_LIMIT, no algorithm was found and all
2377 other field of the returned struct are undefined.
2378 MODE is the machine mode of the multiplication. */
2379
2380 static void
synth_mult(struct algorithm * alg_out,unsigned HOST_WIDE_INT t,const struct mult_cost * cost_limit,enum machine_mode mode)2381 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2382 const struct mult_cost *cost_limit, enum machine_mode mode)
2383 {
2384 int m;
2385 struct algorithm *alg_in, *best_alg;
2386 struct mult_cost best_cost;
2387 struct mult_cost new_limit;
2388 int op_cost, op_latency;
2389 unsigned HOST_WIDE_INT orig_t = t;
2390 unsigned HOST_WIDE_INT q;
2391 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2392 int hash_index;
2393 bool cache_hit = false;
2394 enum alg_code cache_alg = alg_zero;
2395 bool speed = optimize_insn_for_speed_p ();
2396
2397 /* Indicate that no algorithm is yet found. If no algorithm
2398 is found, this value will be returned and indicate failure. */
2399 alg_out->cost.cost = cost_limit->cost + 1;
2400 alg_out->cost.latency = cost_limit->latency + 1;
2401
2402 if (cost_limit->cost < 0
2403 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2404 return;
2405
2406 /* Restrict the bits of "t" to the multiplication's mode. */
2407 t &= GET_MODE_MASK (mode);
2408
2409 /* t == 1 can be done in zero cost. */
2410 if (t == 1)
2411 {
2412 alg_out->ops = 1;
2413 alg_out->cost.cost = 0;
2414 alg_out->cost.latency = 0;
2415 alg_out->op[0] = alg_m;
2416 return;
2417 }
2418
2419 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2420 fail now. */
2421 if (t == 0)
2422 {
2423 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2424 return;
2425 else
2426 {
2427 alg_out->ops = 1;
2428 alg_out->cost.cost = zero_cost[speed];
2429 alg_out->cost.latency = zero_cost[speed];
2430 alg_out->op[0] = alg_zero;
2431 return;
2432 }
2433 }
2434
2435 /* We'll be needing a couple extra algorithm structures now. */
2436
2437 alg_in = XALLOCA (struct algorithm);
2438 best_alg = XALLOCA (struct algorithm);
2439 best_cost = *cost_limit;
2440
2441 /* Compute the hash index. */
2442 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2443
2444 /* See if we already know what to do for T. */
2445 if (alg_hash[hash_index].t == t
2446 && alg_hash[hash_index].mode == mode
2447 && alg_hash[hash_index].mode == mode
2448 && alg_hash[hash_index].speed == speed
2449 && alg_hash[hash_index].alg != alg_unknown)
2450 {
2451 cache_alg = alg_hash[hash_index].alg;
2452
2453 if (cache_alg == alg_impossible)
2454 {
2455 /* The cache tells us that it's impossible to synthesize
2456 multiplication by T within alg_hash[hash_index].cost. */
2457 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2458 /* COST_LIMIT is at least as restrictive as the one
2459 recorded in the hash table, in which case we have no
2460 hope of synthesizing a multiplication. Just
2461 return. */
2462 return;
2463
2464 /* If we get here, COST_LIMIT is less restrictive than the
2465 one recorded in the hash table, so we may be able to
2466 synthesize a multiplication. Proceed as if we didn't
2467 have the cache entry. */
2468 }
2469 else
2470 {
2471 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2472 /* The cached algorithm shows that this multiplication
2473 requires more cost than COST_LIMIT. Just return. This
2474 way, we don't clobber this cache entry with
2475 alg_impossible but retain useful information. */
2476 return;
2477
2478 cache_hit = true;
2479
2480 switch (cache_alg)
2481 {
2482 case alg_shift:
2483 goto do_alg_shift;
2484
2485 case alg_add_t_m2:
2486 case alg_sub_t_m2:
2487 goto do_alg_addsub_t_m2;
2488
2489 case alg_add_factor:
2490 case alg_sub_factor:
2491 goto do_alg_addsub_factor;
2492
2493 case alg_add_t2_m:
2494 goto do_alg_add_t2_m;
2495
2496 case alg_sub_t2_m:
2497 goto do_alg_sub_t2_m;
2498
2499 default:
2500 gcc_unreachable ();
2501 }
2502 }
2503 }
2504
2505 /* If we have a group of zero bits at the low-order part of T, try
2506 multiplying by the remaining bits and then doing a shift. */
2507
2508 if ((t & 1) == 0)
2509 {
2510 do_alg_shift:
2511 m = floor_log2 (t & -t); /* m = number of low zero bits */
2512 if (m < maxm)
2513 {
2514 q = t >> m;
2515 /* The function expand_shift will choose between a shift and
2516 a sequence of additions, so the observed cost is given as
2517 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2518 op_cost = m * add_cost[speed][mode];
2519 if (shift_cost[speed][mode][m] < op_cost)
2520 op_cost = shift_cost[speed][mode][m];
2521 new_limit.cost = best_cost.cost - op_cost;
2522 new_limit.latency = best_cost.latency - op_cost;
2523 synth_mult (alg_in, q, &new_limit, mode);
2524
2525 alg_in->cost.cost += op_cost;
2526 alg_in->cost.latency += op_cost;
2527 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2528 {
2529 struct algorithm *x;
2530 best_cost = alg_in->cost;
2531 x = alg_in, alg_in = best_alg, best_alg = x;
2532 best_alg->log[best_alg->ops] = m;
2533 best_alg->op[best_alg->ops] = alg_shift;
2534 }
2535
2536 /* See if treating ORIG_T as a signed number yields a better
2537 sequence. Try this sequence only for a negative ORIG_T
2538 as it would be useless for a non-negative ORIG_T. */
2539 if ((HOST_WIDE_INT) orig_t < 0)
2540 {
2541 /* Shift ORIG_T as follows because a right shift of a
2542 negative-valued signed type is implementation
2543 defined. */
2544 q = ~(~orig_t >> m);
2545 /* The function expand_shift will choose between a shift
2546 and a sequence of additions, so the observed cost is
2547 given as MIN (m * add_cost[speed][mode],
2548 shift_cost[speed][mode][m]). */
2549 op_cost = m * add_cost[speed][mode];
2550 if (shift_cost[speed][mode][m] < op_cost)
2551 op_cost = shift_cost[speed][mode][m];
2552 new_limit.cost = best_cost.cost - op_cost;
2553 new_limit.latency = best_cost.latency - op_cost;
2554 synth_mult (alg_in, q, &new_limit, mode);
2555
2556 alg_in->cost.cost += op_cost;
2557 alg_in->cost.latency += op_cost;
2558 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2559 {
2560 struct algorithm *x;
2561 best_cost = alg_in->cost;
2562 x = alg_in, alg_in = best_alg, best_alg = x;
2563 best_alg->log[best_alg->ops] = m;
2564 best_alg->op[best_alg->ops] = alg_shift;
2565 }
2566 }
2567 }
2568 if (cache_hit)
2569 goto done;
2570 }
2571
2572 /* If we have an odd number, add or subtract one. */
2573 if ((t & 1) != 0)
2574 {
2575 unsigned HOST_WIDE_INT w;
2576
2577 do_alg_addsub_t_m2:
2578 for (w = 1; (w & t) != 0; w <<= 1)
2579 ;
2580 /* If T was -1, then W will be zero after the loop. This is another
2581 case where T ends with ...111. Handling this with (T + 1) and
2582 subtract 1 produces slightly better code and results in algorithm
2583 selection much faster than treating it like the ...0111 case
2584 below. */
2585 if (w == 0
2586 || (w > 2
2587 /* Reject the case where t is 3.
2588 Thus we prefer addition in that case. */
2589 && t != 3))
2590 {
2591 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2592
2593 op_cost = add_cost[speed][mode];
2594 new_limit.cost = best_cost.cost - op_cost;
2595 new_limit.latency = best_cost.latency - op_cost;
2596 synth_mult (alg_in, t + 1, &new_limit, mode);
2597
2598 alg_in->cost.cost += op_cost;
2599 alg_in->cost.latency += op_cost;
2600 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2601 {
2602 struct algorithm *x;
2603 best_cost = alg_in->cost;
2604 x = alg_in, alg_in = best_alg, best_alg = x;
2605 best_alg->log[best_alg->ops] = 0;
2606 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2607 }
2608 }
2609 else
2610 {
2611 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2612
2613 op_cost = add_cost[speed][mode];
2614 new_limit.cost = best_cost.cost - op_cost;
2615 new_limit.latency = best_cost.latency - op_cost;
2616 synth_mult (alg_in, t - 1, &new_limit, mode);
2617
2618 alg_in->cost.cost += op_cost;
2619 alg_in->cost.latency += op_cost;
2620 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2621 {
2622 struct algorithm *x;
2623 best_cost = alg_in->cost;
2624 x = alg_in, alg_in = best_alg, best_alg = x;
2625 best_alg->log[best_alg->ops] = 0;
2626 best_alg->op[best_alg->ops] = alg_add_t_m2;
2627 }
2628 }
2629
2630 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2631 quickly with a - a * n for some appropriate constant n. */
2632 m = exact_log2 (-orig_t + 1);
2633 if (m >= 0 && m < maxm)
2634 {
2635 op_cost = shiftsub1_cost[speed][mode][m];
2636 new_limit.cost = best_cost.cost - op_cost;
2637 new_limit.latency = best_cost.latency - op_cost;
2638 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2639
2640 alg_in->cost.cost += op_cost;
2641 alg_in->cost.latency += op_cost;
2642 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2643 {
2644 struct algorithm *x;
2645 best_cost = alg_in->cost;
2646 x = alg_in, alg_in = best_alg, best_alg = x;
2647 best_alg->log[best_alg->ops] = m;
2648 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2649 }
2650 }
2651
2652 if (cache_hit)
2653 goto done;
2654 }
2655
2656 /* Look for factors of t of the form
2657 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2658 If we find such a factor, we can multiply by t using an algorithm that
2659 multiplies by q, shift the result by m and add/subtract it to itself.
2660
2661 We search for large factors first and loop down, even if large factors
2662 are less probable than small; if we find a large factor we will find a
2663 good sequence quickly, and therefore be able to prune (by decreasing
2664 COST_LIMIT) the search. */
2665
2666 do_alg_addsub_factor:
2667 for (m = floor_log2 (t - 1); m >= 2; m--)
2668 {
2669 unsigned HOST_WIDE_INT d;
2670
2671 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2672 if (t % d == 0 && t > d && m < maxm
2673 && (!cache_hit || cache_alg == alg_add_factor))
2674 {
2675 /* If the target has a cheap shift-and-add instruction use
2676 that in preference to a shift insn followed by an add insn.
2677 Assume that the shift-and-add is "atomic" with a latency
2678 equal to its cost, otherwise assume that on superscalar
2679 hardware the shift may be executed concurrently with the
2680 earlier steps in the algorithm. */
2681 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2682 if (shiftadd_cost[speed][mode][m] < op_cost)
2683 {
2684 op_cost = shiftadd_cost[speed][mode][m];
2685 op_latency = op_cost;
2686 }
2687 else
2688 op_latency = add_cost[speed][mode];
2689
2690 new_limit.cost = best_cost.cost - op_cost;
2691 new_limit.latency = best_cost.latency - op_latency;
2692 synth_mult (alg_in, t / d, &new_limit, mode);
2693
2694 alg_in->cost.cost += op_cost;
2695 alg_in->cost.latency += op_latency;
2696 if (alg_in->cost.latency < op_cost)
2697 alg_in->cost.latency = op_cost;
2698 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2699 {
2700 struct algorithm *x;
2701 best_cost = alg_in->cost;
2702 x = alg_in, alg_in = best_alg, best_alg = x;
2703 best_alg->log[best_alg->ops] = m;
2704 best_alg->op[best_alg->ops] = alg_add_factor;
2705 }
2706 /* Other factors will have been taken care of in the recursion. */
2707 break;
2708 }
2709
2710 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2711 if (t % d == 0 && t > d && m < maxm
2712 && (!cache_hit || cache_alg == alg_sub_factor))
2713 {
2714 /* If the target has a cheap shift-and-subtract insn use
2715 that in preference to a shift insn followed by a sub insn.
2716 Assume that the shift-and-sub is "atomic" with a latency
2717 equal to it's cost, otherwise assume that on superscalar
2718 hardware the shift may be executed concurrently with the
2719 earlier steps in the algorithm. */
2720 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2721 if (shiftsub0_cost[speed][mode][m] < op_cost)
2722 {
2723 op_cost = shiftsub0_cost[speed][mode][m];
2724 op_latency = op_cost;
2725 }
2726 else
2727 op_latency = add_cost[speed][mode];
2728
2729 new_limit.cost = best_cost.cost - op_cost;
2730 new_limit.latency = best_cost.latency - op_latency;
2731 synth_mult (alg_in, t / d, &new_limit, mode);
2732
2733 alg_in->cost.cost += op_cost;
2734 alg_in->cost.latency += op_latency;
2735 if (alg_in->cost.latency < op_cost)
2736 alg_in->cost.latency = op_cost;
2737 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2738 {
2739 struct algorithm *x;
2740 best_cost = alg_in->cost;
2741 x = alg_in, alg_in = best_alg, best_alg = x;
2742 best_alg->log[best_alg->ops] = m;
2743 best_alg->op[best_alg->ops] = alg_sub_factor;
2744 }
2745 break;
2746 }
2747 }
2748 if (cache_hit)
2749 goto done;
2750
2751 /* Try shift-and-add (load effective address) instructions,
2752 i.e. do a*3, a*5, a*9. */
2753 if ((t & 1) != 0)
2754 {
2755 do_alg_add_t2_m:
2756 q = t - 1;
2757 q = q & -q;
2758 m = exact_log2 (q);
2759 if (m >= 0 && m < maxm)
2760 {
2761 op_cost = shiftadd_cost[speed][mode][m];
2762 new_limit.cost = best_cost.cost - op_cost;
2763 new_limit.latency = best_cost.latency - op_cost;
2764 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2765
2766 alg_in->cost.cost += op_cost;
2767 alg_in->cost.latency += op_cost;
2768 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2769 {
2770 struct algorithm *x;
2771 best_cost = alg_in->cost;
2772 x = alg_in, alg_in = best_alg, best_alg = x;
2773 best_alg->log[best_alg->ops] = m;
2774 best_alg->op[best_alg->ops] = alg_add_t2_m;
2775 }
2776 }
2777 if (cache_hit)
2778 goto done;
2779
2780 do_alg_sub_t2_m:
2781 q = t + 1;
2782 q = q & -q;
2783 m = exact_log2 (q);
2784 if (m >= 0 && m < maxm)
2785 {
2786 op_cost = shiftsub0_cost[speed][mode][m];
2787 new_limit.cost = best_cost.cost - op_cost;
2788 new_limit.latency = best_cost.latency - op_cost;
2789 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2790
2791 alg_in->cost.cost += op_cost;
2792 alg_in->cost.latency += op_cost;
2793 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2794 {
2795 struct algorithm *x;
2796 best_cost = alg_in->cost;
2797 x = alg_in, alg_in = best_alg, best_alg = x;
2798 best_alg->log[best_alg->ops] = m;
2799 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2800 }
2801 }
2802 if (cache_hit)
2803 goto done;
2804 }
2805
2806 done:
2807 /* If best_cost has not decreased, we have not found any algorithm. */
2808 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2809 {
2810 /* We failed to find an algorithm. Record alg_impossible for
2811 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2812 we are asked to find an algorithm for T within the same or
2813 lower COST_LIMIT, we can immediately return to the
2814 caller. */
2815 alg_hash[hash_index].t = t;
2816 alg_hash[hash_index].mode = mode;
2817 alg_hash[hash_index].speed = speed;
2818 alg_hash[hash_index].alg = alg_impossible;
2819 alg_hash[hash_index].cost = *cost_limit;
2820 return;
2821 }
2822
2823 /* Cache the result. */
2824 if (!cache_hit)
2825 {
2826 alg_hash[hash_index].t = t;
2827 alg_hash[hash_index].mode = mode;
2828 alg_hash[hash_index].speed = speed;
2829 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2830 alg_hash[hash_index].cost.cost = best_cost.cost;
2831 alg_hash[hash_index].cost.latency = best_cost.latency;
2832 }
2833
2834 /* If we are getting a too long sequence for `struct algorithm'
2835 to record, make this search fail. */
2836 if (best_alg->ops == MAX_BITS_PER_WORD)
2837 return;
2838
2839 /* Copy the algorithm from temporary space to the space at alg_out.
2840 We avoid using structure assignment because the majority of
2841 best_alg is normally undefined, and this is a critical function. */
2842 alg_out->ops = best_alg->ops + 1;
2843 alg_out->cost = best_cost;
2844 memcpy (alg_out->op, best_alg->op,
2845 alg_out->ops * sizeof *alg_out->op);
2846 memcpy (alg_out->log, best_alg->log,
2847 alg_out->ops * sizeof *alg_out->log);
2848 }
2849
2850 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2851 Try three variations:
2852
2853 - a shift/add sequence based on VAL itself
2854 - a shift/add sequence based on -VAL, followed by a negation
2855 - a shift/add sequence based on VAL - 1, followed by an addition.
2856
2857 Return true if the cheapest of these cost less than MULT_COST,
2858 describing the algorithm in *ALG and final fixup in *VARIANT. */
2859
2860 static bool
choose_mult_variant(enum machine_mode mode,HOST_WIDE_INT val,struct algorithm * alg,enum mult_variant * variant,int mult_cost)2861 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2862 struct algorithm *alg, enum mult_variant *variant,
2863 int mult_cost)
2864 {
2865 struct algorithm alg2;
2866 struct mult_cost limit;
2867 int op_cost;
2868 bool speed = optimize_insn_for_speed_p ();
2869
2870 /* Fail quickly for impossible bounds. */
2871 if (mult_cost < 0)
2872 return false;
2873
2874 /* Ensure that mult_cost provides a reasonable upper bound.
2875 Any constant multiplication can be performed with less
2876 than 2 * bits additions. */
2877 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2878 if (mult_cost > op_cost)
2879 mult_cost = op_cost;
2880
2881 *variant = basic_variant;
2882 limit.cost = mult_cost;
2883 limit.latency = mult_cost;
2884 synth_mult (alg, val, &limit, mode);
2885
2886 /* This works only if the inverted value actually fits in an
2887 `unsigned int' */
2888 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2889 {
2890 op_cost = neg_cost[speed][mode];
2891 if (MULT_COST_LESS (&alg->cost, mult_cost))
2892 {
2893 limit.cost = alg->cost.cost - op_cost;
2894 limit.latency = alg->cost.latency - op_cost;
2895 }
2896 else
2897 {
2898 limit.cost = mult_cost - op_cost;
2899 limit.latency = mult_cost - op_cost;
2900 }
2901
2902 synth_mult (&alg2, -val, &limit, mode);
2903 alg2.cost.cost += op_cost;
2904 alg2.cost.latency += op_cost;
2905 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2906 *alg = alg2, *variant = negate_variant;
2907 }
2908
2909 /* This proves very useful for division-by-constant. */
2910 op_cost = add_cost[speed][mode];
2911 if (MULT_COST_LESS (&alg->cost, mult_cost))
2912 {
2913 limit.cost = alg->cost.cost - op_cost;
2914 limit.latency = alg->cost.latency - op_cost;
2915 }
2916 else
2917 {
2918 limit.cost = mult_cost - op_cost;
2919 limit.latency = mult_cost - op_cost;
2920 }
2921
2922 synth_mult (&alg2, val - 1, &limit, mode);
2923 alg2.cost.cost += op_cost;
2924 alg2.cost.latency += op_cost;
2925 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2926 *alg = alg2, *variant = add_variant;
2927
2928 return MULT_COST_LESS (&alg->cost, mult_cost);
2929 }
2930
2931 /* A subroutine of expand_mult, used for constant multiplications.
2932 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2933 convenient. Use the shift/add sequence described by ALG and apply
2934 the final fixup specified by VARIANT. */
2935
2936 static rtx
expand_mult_const(enum machine_mode mode,rtx op0,HOST_WIDE_INT val,rtx target,const struct algorithm * alg,enum mult_variant variant)2937 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2938 rtx target, const struct algorithm *alg,
2939 enum mult_variant variant)
2940 {
2941 HOST_WIDE_INT val_so_far;
2942 rtx insn, accum, tem;
2943 int opno;
2944 enum machine_mode nmode;
2945
2946 /* Avoid referencing memory over and over and invalid sharing
2947 on SUBREGs. */
2948 op0 = force_reg (mode, op0);
2949
2950 /* ACCUM starts out either as OP0 or as a zero, depending on
2951 the first operation. */
2952
2953 if (alg->op[0] == alg_zero)
2954 {
2955 accum = copy_to_mode_reg (mode, const0_rtx);
2956 val_so_far = 0;
2957 }
2958 else if (alg->op[0] == alg_m)
2959 {
2960 accum = copy_to_mode_reg (mode, op0);
2961 val_so_far = 1;
2962 }
2963 else
2964 gcc_unreachable ();
2965
2966 for (opno = 1; opno < alg->ops; opno++)
2967 {
2968 int log = alg->log[opno];
2969 rtx shift_subtarget = optimize ? 0 : accum;
2970 rtx add_target
2971 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2972 && !optimize)
2973 ? target : 0;
2974 rtx accum_target = optimize ? 0 : accum;
2975 rtx accum_inner;
2976
2977 switch (alg->op[opno])
2978 {
2979 case alg_shift:
2980 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2981 /* REG_EQUAL note will be attached to the following insn. */
2982 emit_move_insn (accum, tem);
2983 val_so_far <<= log;
2984 break;
2985
2986 case alg_add_t_m2:
2987 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2988 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2989 add_target ? add_target : accum_target);
2990 val_so_far += (HOST_WIDE_INT) 1 << log;
2991 break;
2992
2993 case alg_sub_t_m2:
2994 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2995 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2996 add_target ? add_target : accum_target);
2997 val_so_far -= (HOST_WIDE_INT) 1 << log;
2998 break;
2999
3000 case alg_add_t2_m:
3001 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3002 log, shift_subtarget, 0);
3003 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3004 add_target ? add_target : accum_target);
3005 val_so_far = (val_so_far << log) + 1;
3006 break;
3007
3008 case alg_sub_t2_m:
3009 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3010 log, shift_subtarget, 0);
3011 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3012 add_target ? add_target : accum_target);
3013 val_so_far = (val_so_far << log) - 1;
3014 break;
3015
3016 case alg_add_factor:
3017 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3018 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3019 add_target ? add_target : accum_target);
3020 val_so_far += val_so_far << log;
3021 break;
3022
3023 case alg_sub_factor:
3024 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3025 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3026 (add_target
3027 ? add_target : (optimize ? 0 : tem)));
3028 val_so_far = (val_so_far << log) - val_so_far;
3029 break;
3030
3031 default:
3032 gcc_unreachable ();
3033 }
3034
3035 /* Write a REG_EQUAL note on the last insn so that we can cse
3036 multiplication sequences. Note that if ACCUM is a SUBREG,
3037 we've set the inner register and must properly indicate
3038 that. */
3039
3040 tem = op0, nmode = mode;
3041 accum_inner = accum;
3042 if (GET_CODE (accum) == SUBREG)
3043 {
3044 accum_inner = SUBREG_REG (accum);
3045 nmode = GET_MODE (accum_inner);
3046 tem = gen_lowpart (nmode, op0);
3047 }
3048
3049 insn = get_last_insn ();
3050 set_dst_reg_note (insn, REG_EQUAL,
3051 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3052 accum_inner);
3053 }
3054
3055 if (variant == negate_variant)
3056 {
3057 val_so_far = -val_so_far;
3058 accum = expand_unop (mode, neg_optab, accum, target, 0);
3059 }
3060 else if (variant == add_variant)
3061 {
3062 val_so_far = val_so_far + 1;
3063 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3064 }
3065
3066 /* Compare only the bits of val and val_so_far that are significant
3067 in the result mode, to avoid sign-/zero-extension confusion. */
3068 val &= GET_MODE_MASK (mode);
3069 val_so_far &= GET_MODE_MASK (mode);
3070 gcc_assert (val == val_so_far);
3071
3072 return accum;
3073 }
3074
3075 /* Perform a multiplication and return an rtx for the result.
3076 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3077 TARGET is a suggestion for where to store the result (an rtx).
3078
3079 We check specially for a constant integer as OP1.
3080 If you want this check for OP0 as well, then before calling
3081 you should swap the two operands if OP0 would be constant. */
3082
3083 rtx
expand_mult(enum machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)3084 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3085 int unsignedp)
3086 {
3087 enum mult_variant variant;
3088 struct algorithm algorithm;
3089 int max_cost;
3090 bool speed = optimize_insn_for_speed_p ();
3091
3092 /* Handling const0_rtx here allows us to use zero as a rogue value for
3093 coeff below. */
3094 if (op1 == const0_rtx)
3095 return const0_rtx;
3096 if (op1 == const1_rtx)
3097 return op0;
3098 if (op1 == constm1_rtx)
3099 return expand_unop (mode,
3100 GET_MODE_CLASS (mode) == MODE_INT
3101 && !unsignedp && flag_trapv
3102 ? negv_optab : neg_optab,
3103 op0, target, 0);
3104
3105 /* These are the operations that are potentially turned into a sequence
3106 of shifts and additions. */
3107 if (SCALAR_INT_MODE_P (mode)
3108 && (unsignedp || !flag_trapv))
3109 {
3110 HOST_WIDE_INT coeff = 0;
3111 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3112
3113 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3114 less than or equal in size to `unsigned int' this doesn't matter.
3115 If the mode is larger than `unsigned int', then synth_mult works
3116 only if the constant value exactly fits in an `unsigned int' without
3117 any truncation. This means that multiplying by negative values does
3118 not work; results are off by 2^32 on a 32 bit machine. */
3119
3120 if (CONST_INT_P (op1))
3121 {
3122 /* Attempt to handle multiplication of DImode values by negative
3123 coefficients, by performing the multiplication by a positive
3124 multiplier and then inverting the result. */
3125 if (INTVAL (op1) < 0
3126 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3127 {
3128 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3129 result is interpreted as an unsigned coefficient.
3130 Exclude cost of op0 from max_cost to match the cost
3131 calculation of the synth_mult. */
3132 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3133 speed)
3134 - neg_cost[speed][mode]);
3135 if (max_cost > 0
3136 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3137 &variant, max_cost))
3138 {
3139 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3140 NULL_RTX, &algorithm,
3141 variant);
3142 return expand_unop (mode, neg_optab, temp, target, 0);
3143 }
3144 }
3145 else coeff = INTVAL (op1);
3146 }
3147 else if (GET_CODE (op1) == CONST_DOUBLE)
3148 {
3149 /* If we are multiplying in DImode, it may still be a win
3150 to try to work with shifts and adds. */
3151 if (CONST_DOUBLE_HIGH (op1) == 0
3152 && CONST_DOUBLE_LOW (op1) > 0)
3153 coeff = CONST_DOUBLE_LOW (op1);
3154 else if (CONST_DOUBLE_LOW (op1) == 0
3155 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3156 {
3157 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3158 + HOST_BITS_PER_WIDE_INT;
3159 return expand_shift (LSHIFT_EXPR, mode, op0,
3160 shift, target, unsignedp);
3161 }
3162 }
3163
3164 /* We used to test optimize here, on the grounds that it's better to
3165 produce a smaller program when -O is not used. But this causes
3166 such a terrible slowdown sometimes that it seems better to always
3167 use synth_mult. */
3168 if (coeff != 0)
3169 {
3170 /* Special case powers of two. */
3171 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3172 return expand_shift (LSHIFT_EXPR, mode, op0,
3173 floor_log2 (coeff), target, unsignedp);
3174
3175 /* Exclude cost of op0 from max_cost to match the cost
3176 calculation of the synth_mult. */
3177 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3178 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3179 max_cost))
3180 return expand_mult_const (mode, op0, coeff, target,
3181 &algorithm, variant);
3182 }
3183 }
3184
3185 if (GET_CODE (op0) == CONST_DOUBLE)
3186 {
3187 rtx temp = op0;
3188 op0 = op1;
3189 op1 = temp;
3190 }
3191
3192 /* Expand x*2.0 as x+x. */
3193 if (GET_CODE (op1) == CONST_DOUBLE
3194 && SCALAR_FLOAT_MODE_P (mode))
3195 {
3196 REAL_VALUE_TYPE d;
3197 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3198
3199 if (REAL_VALUES_EQUAL (d, dconst2))
3200 {
3201 op0 = force_reg (GET_MODE (op0), op0);
3202 return expand_binop (mode, add_optab, op0, op0,
3203 target, unsignedp, OPTAB_LIB_WIDEN);
3204 }
3205 }
3206
3207 /* This used to use umul_optab if unsigned, but for non-widening multiply
3208 there is no difference between signed and unsigned. */
3209 op0 = expand_binop (mode,
3210 ! unsignedp
3211 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3212 ? smulv_optab : smul_optab,
3213 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3214 gcc_assert (op0);
3215 return op0;
3216 }
3217
3218 /* Perform a widening multiplication and return an rtx for the result.
3219 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3220 TARGET is a suggestion for where to store the result (an rtx).
3221 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3222 or smul_widen_optab.
3223
3224 We check specially for a constant integer as OP1, comparing the
3225 cost of a widening multiply against the cost of a sequence of shifts
3226 and adds. */
3227
3228 rtx
expand_widening_mult(enum machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,optab this_optab)3229 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3230 int unsignedp, optab this_optab)
3231 {
3232 bool speed = optimize_insn_for_speed_p ();
3233 rtx cop1;
3234
3235 if (CONST_INT_P (op1)
3236 && GET_MODE (op0) != VOIDmode
3237 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3238 this_optab == umul_widen_optab))
3239 && CONST_INT_P (cop1)
3240 && (INTVAL (cop1) >= 0
3241 || HWI_COMPUTABLE_MODE_P (mode)))
3242 {
3243 HOST_WIDE_INT coeff = INTVAL (cop1);
3244 int max_cost;
3245 enum mult_variant variant;
3246 struct algorithm algorithm;
3247
3248 /* Special case powers of two. */
3249 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3250 {
3251 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3252 return expand_shift (LSHIFT_EXPR, mode, op0,
3253 floor_log2 (coeff), target, unsignedp);
3254 }
3255
3256 /* Exclude cost of op0 from max_cost to match the cost
3257 calculation of the synth_mult. */
3258 max_cost = mul_widen_cost[speed][mode];
3259 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3260 max_cost))
3261 {
3262 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3263 return expand_mult_const (mode, op0, coeff, target,
3264 &algorithm, variant);
3265 }
3266 }
3267 return expand_binop (mode, this_optab, op0, op1, target,
3268 unsignedp, OPTAB_LIB_WIDEN);
3269 }
3270
3271 /* Return the smallest n such that 2**n >= X. */
3272
3273 int
ceil_log2(unsigned HOST_WIDE_INT x)3274 ceil_log2 (unsigned HOST_WIDE_INT x)
3275 {
3276 return floor_log2 (x - 1) + 1;
3277 }
3278
3279 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3280 replace division by D, and put the least significant N bits of the result
3281 in *MULTIPLIER_PTR and return the most significant bit.
3282
3283 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3284 needed precision is in PRECISION (should be <= N).
3285
3286 PRECISION should be as small as possible so this function can choose
3287 multiplier more freely.
3288
3289 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3290 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3291
3292 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3293 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3294
3295 static
3296 unsigned HOST_WIDE_INT
choose_multiplier(unsigned HOST_WIDE_INT d,int n,int precision,rtx * multiplier_ptr,int * post_shift_ptr,int * lgup_ptr)3297 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3298 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3299 {
3300 HOST_WIDE_INT mhigh_hi, mlow_hi;
3301 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3302 int lgup, post_shift;
3303 int pow, pow2;
3304 unsigned HOST_WIDE_INT nl, dummy1;
3305 HOST_WIDE_INT nh, dummy2;
3306
3307 /* lgup = ceil(log2(divisor)); */
3308 lgup = ceil_log2 (d);
3309
3310 gcc_assert (lgup <= n);
3311
3312 pow = n + lgup;
3313 pow2 = n + lgup - precision;
3314
3315 /* We could handle this with some effort, but this case is much
3316 better handled directly with a scc insn, so rely on caller using
3317 that. */
3318 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3319
3320 /* mlow = 2^(N + lgup)/d */
3321 if (pow >= HOST_BITS_PER_WIDE_INT)
3322 {
3323 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3324 nl = 0;
3325 }
3326 else
3327 {
3328 nh = 0;
3329 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3330 }
3331 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3332 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3333
3334 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3335 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3336 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3337 else
3338 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3339 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3340 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3341
3342 gcc_assert (!mhigh_hi || nh - d < d);
3343 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3344 /* Assert that mlow < mhigh. */
3345 gcc_assert (mlow_hi < mhigh_hi
3346 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3347
3348 /* If precision == N, then mlow, mhigh exceed 2^N
3349 (but they do not exceed 2^(N+1)). */
3350
3351 /* Reduce to lowest terms. */
3352 for (post_shift = lgup; post_shift > 0; post_shift--)
3353 {
3354 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3355 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3356 if (ml_lo >= mh_lo)
3357 break;
3358
3359 mlow_hi = 0;
3360 mlow_lo = ml_lo;
3361 mhigh_hi = 0;
3362 mhigh_lo = mh_lo;
3363 }
3364
3365 *post_shift_ptr = post_shift;
3366 *lgup_ptr = lgup;
3367 if (n < HOST_BITS_PER_WIDE_INT)
3368 {
3369 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3370 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3371 return mhigh_lo >= mask;
3372 }
3373 else
3374 {
3375 *multiplier_ptr = GEN_INT (mhigh_lo);
3376 return mhigh_hi;
3377 }
3378 }
3379
3380 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3381 congruent to 1 (mod 2**N). */
3382
3383 static unsigned HOST_WIDE_INT
invert_mod2n(unsigned HOST_WIDE_INT x,int n)3384 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3385 {
3386 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3387
3388 /* The algorithm notes that the choice y = x satisfies
3389 x*y == 1 mod 2^3, since x is assumed odd.
3390 Each iteration doubles the number of bits of significance in y. */
3391
3392 unsigned HOST_WIDE_INT mask;
3393 unsigned HOST_WIDE_INT y = x;
3394 int nbit = 3;
3395
3396 mask = (n == HOST_BITS_PER_WIDE_INT
3397 ? ~(unsigned HOST_WIDE_INT) 0
3398 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3399
3400 while (nbit < n)
3401 {
3402 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3403 nbit *= 2;
3404 }
3405 return y;
3406 }
3407
3408 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3409 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3410 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3411 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3412 become signed.
3413
3414 The result is put in TARGET if that is convenient.
3415
3416 MODE is the mode of operation. */
3417
3418 rtx
expand_mult_highpart_adjust(enum machine_mode mode,rtx adj_operand,rtx op0,rtx op1,rtx target,int unsignedp)3419 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3420 rtx op1, rtx target, int unsignedp)
3421 {
3422 rtx tem;
3423 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3424
3425 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3426 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3427 tem = expand_and (mode, tem, op1, NULL_RTX);
3428 adj_operand
3429 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3430 adj_operand);
3431
3432 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3433 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3434 tem = expand_and (mode, tem, op0, NULL_RTX);
3435 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3436 target);
3437
3438 return target;
3439 }
3440
3441 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3442
3443 static rtx
extract_high_half(enum machine_mode mode,rtx op)3444 extract_high_half (enum machine_mode mode, rtx op)
3445 {
3446 enum machine_mode wider_mode;
3447
3448 if (mode == word_mode)
3449 return gen_highpart (mode, op);
3450
3451 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3452
3453 wider_mode = GET_MODE_WIDER_MODE (mode);
3454 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3455 GET_MODE_BITSIZE (mode), 0, 1);
3456 return convert_modes (mode, wider_mode, op, 0);
3457 }
3458
3459 /* Like expand_mult_highpart, but only consider using a multiplication
3460 optab. OP1 is an rtx for the constant operand. */
3461
3462 static rtx
expand_mult_highpart_optab(enum machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3463 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3464 rtx target, int unsignedp, int max_cost)
3465 {
3466 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3467 enum machine_mode wider_mode;
3468 optab moptab;
3469 rtx tem;
3470 int size;
3471 bool speed = optimize_insn_for_speed_p ();
3472
3473 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3474
3475 wider_mode = GET_MODE_WIDER_MODE (mode);
3476 size = GET_MODE_BITSIZE (mode);
3477
3478 /* Firstly, try using a multiplication insn that only generates the needed
3479 high part of the product, and in the sign flavor of unsignedp. */
3480 if (mul_highpart_cost[speed][mode] < max_cost)
3481 {
3482 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3483 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3484 unsignedp, OPTAB_DIRECT);
3485 if (tem)
3486 return tem;
3487 }
3488
3489 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3490 Need to adjust the result after the multiplication. */
3491 if (size - 1 < BITS_PER_WORD
3492 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3493 + 4 * add_cost[speed][mode] < max_cost))
3494 {
3495 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3496 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3497 unsignedp, OPTAB_DIRECT);
3498 if (tem)
3499 /* We used the wrong signedness. Adjust the result. */
3500 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3501 tem, unsignedp);
3502 }
3503
3504 /* Try widening multiplication. */
3505 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3506 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3507 && mul_widen_cost[speed][wider_mode] < max_cost)
3508 {
3509 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3510 unsignedp, OPTAB_WIDEN);
3511 if (tem)
3512 return extract_high_half (mode, tem);
3513 }
3514
3515 /* Try widening the mode and perform a non-widening multiplication. */
3516 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3517 && size - 1 < BITS_PER_WORD
3518 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3519 {
3520 rtx insns, wop0, wop1;
3521
3522 /* We need to widen the operands, for example to ensure the
3523 constant multiplier is correctly sign or zero extended.
3524 Use a sequence to clean-up any instructions emitted by
3525 the conversions if things don't work out. */
3526 start_sequence ();
3527 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3528 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3529 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3530 unsignedp, OPTAB_WIDEN);
3531 insns = get_insns ();
3532 end_sequence ();
3533
3534 if (tem)
3535 {
3536 emit_insn (insns);
3537 return extract_high_half (mode, tem);
3538 }
3539 }
3540
3541 /* Try widening multiplication of opposite signedness, and adjust. */
3542 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3543 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3544 && size - 1 < BITS_PER_WORD
3545 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3546 + 4 * add_cost[speed][mode] < max_cost))
3547 {
3548 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3549 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3550 if (tem != 0)
3551 {
3552 tem = extract_high_half (mode, tem);
3553 /* We used the wrong signedness. Adjust the result. */
3554 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3555 target, unsignedp);
3556 }
3557 }
3558
3559 return 0;
3560 }
3561
3562 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3563 putting the high half of the result in TARGET if that is convenient,
3564 and return where the result is. If the operation can not be performed,
3565 0 is returned.
3566
3567 MODE is the mode of operation and result.
3568
3569 UNSIGNEDP nonzero means unsigned multiply.
3570
3571 MAX_COST is the total allowed cost for the expanded RTL. */
3572
3573 static rtx
expand_mult_highpart(enum machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3574 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3575 rtx target, int unsignedp, int max_cost)
3576 {
3577 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3578 unsigned HOST_WIDE_INT cnst1;
3579 int extra_cost;
3580 bool sign_adjust = false;
3581 enum mult_variant variant;
3582 struct algorithm alg;
3583 rtx tem;
3584 bool speed = optimize_insn_for_speed_p ();
3585
3586 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3587 /* We can't support modes wider than HOST_BITS_PER_INT. */
3588 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3589
3590 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3591
3592 /* We can't optimize modes wider than BITS_PER_WORD.
3593 ??? We might be able to perform double-word arithmetic if
3594 mode == word_mode, however all the cost calculations in
3595 synth_mult etc. assume single-word operations. */
3596 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3597 return expand_mult_highpart_optab (mode, op0, op1, target,
3598 unsignedp, max_cost);
3599
3600 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3601
3602 /* Check whether we try to multiply by a negative constant. */
3603 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3604 {
3605 sign_adjust = true;
3606 extra_cost += add_cost[speed][mode];
3607 }
3608
3609 /* See whether shift/add multiplication is cheap enough. */
3610 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3611 max_cost - extra_cost))
3612 {
3613 /* See whether the specialized multiplication optabs are
3614 cheaper than the shift/add version. */
3615 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3616 alg.cost.cost + extra_cost);
3617 if (tem)
3618 return tem;
3619
3620 tem = convert_to_mode (wider_mode, op0, unsignedp);
3621 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3622 tem = extract_high_half (mode, tem);
3623
3624 /* Adjust result for signedness. */
3625 if (sign_adjust)
3626 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3627
3628 return tem;
3629 }
3630 return expand_mult_highpart_optab (mode, op0, op1, target,
3631 unsignedp, max_cost);
3632 }
3633
3634
3635 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3636
3637 static rtx
expand_smod_pow2(enum machine_mode mode,rtx op0,HOST_WIDE_INT d)3638 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3639 {
3640 unsigned HOST_WIDE_INT masklow, maskhigh;
3641 rtx result, temp, shift, label;
3642 int logd;
3643
3644 logd = floor_log2 (d);
3645 result = gen_reg_rtx (mode);
3646
3647 /* Avoid conditional branches when they're expensive. */
3648 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3649 && optimize_insn_for_speed_p ())
3650 {
3651 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3652 mode, 0, -1);
3653 if (signmask)
3654 {
3655 signmask = force_reg (mode, signmask);
3656 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3657 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3658
3659 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3660 which instruction sequence to use. If logical right shifts
3661 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3662 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3663
3664 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3665 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3666 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3667 > COSTS_N_INSNS (2)))
3668 {
3669 temp = expand_binop (mode, xor_optab, op0, signmask,
3670 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3671 temp = expand_binop (mode, sub_optab, temp, signmask,
3672 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3673 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3674 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3675 temp = expand_binop (mode, xor_optab, temp, signmask,
3676 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3677 temp = expand_binop (mode, sub_optab, temp, signmask,
3678 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3679 }
3680 else
3681 {
3682 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3683 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684 signmask = force_reg (mode, signmask);
3685
3686 temp = expand_binop (mode, add_optab, op0, signmask,
3687 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3688 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3689 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3690 temp = expand_binop (mode, sub_optab, temp, signmask,
3691 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3692 }
3693 return temp;
3694 }
3695 }
3696
3697 /* Mask contains the mode's signbit and the significant bits of the
3698 modulus. By including the signbit in the operation, many targets
3699 can avoid an explicit compare operation in the following comparison
3700 against zero. */
3701
3702 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3703 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3704 {
3705 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3706 maskhigh = -1;
3707 }
3708 else
3709 maskhigh = (HOST_WIDE_INT) -1
3710 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3711
3712 temp = expand_binop (mode, and_optab, op0,
3713 immed_double_const (masklow, maskhigh, mode),
3714 result, 1, OPTAB_LIB_WIDEN);
3715 if (temp != result)
3716 emit_move_insn (result, temp);
3717
3718 label = gen_label_rtx ();
3719 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3720
3721 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3722 0, OPTAB_LIB_WIDEN);
3723 masklow = (HOST_WIDE_INT) -1 << logd;
3724 maskhigh = -1;
3725 temp = expand_binop (mode, ior_optab, temp,
3726 immed_double_const (masklow, maskhigh, mode),
3727 result, 1, OPTAB_LIB_WIDEN);
3728 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3729 0, OPTAB_LIB_WIDEN);
3730 if (temp != result)
3731 emit_move_insn (result, temp);
3732 emit_label (label);
3733 return result;
3734 }
3735
3736 /* Expand signed division of OP0 by a power of two D in mode MODE.
3737 This routine is only called for positive values of D. */
3738
3739 static rtx
expand_sdiv_pow2(enum machine_mode mode,rtx op0,HOST_WIDE_INT d)3740 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3741 {
3742 rtx temp, label;
3743 int logd;
3744
3745 logd = floor_log2 (d);
3746
3747 if (d == 2
3748 && BRANCH_COST (optimize_insn_for_speed_p (),
3749 false) >= 1)
3750 {
3751 temp = gen_reg_rtx (mode);
3752 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3753 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3754 0, OPTAB_LIB_WIDEN);
3755 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3756 }
3757
3758 #ifdef HAVE_conditional_move
3759 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3760 >= 2)
3761 {
3762 rtx temp2;
3763
3764 /* ??? emit_conditional_move forces a stack adjustment via
3765 compare_from_rtx so, if the sequence is discarded, it will
3766 be lost. Do it now instead. */
3767 do_pending_stack_adjust ();
3768
3769 start_sequence ();
3770 temp2 = copy_to_mode_reg (mode, op0);
3771 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3772 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3773 temp = force_reg (mode, temp);
3774
3775 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3776 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3777 mode, temp, temp2, mode, 0);
3778 if (temp2)
3779 {
3780 rtx seq = get_insns ();
3781 end_sequence ();
3782 emit_insn (seq);
3783 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3784 }
3785 end_sequence ();
3786 }
3787 #endif
3788
3789 if (BRANCH_COST (optimize_insn_for_speed_p (),
3790 false) >= 2)
3791 {
3792 int ushift = GET_MODE_BITSIZE (mode) - logd;
3793
3794 temp = gen_reg_rtx (mode);
3795 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3796 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3797 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3798 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3799 else
3800 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3801 ushift, NULL_RTX, 1);
3802 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3803 0, OPTAB_LIB_WIDEN);
3804 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3805 }
3806
3807 label = gen_label_rtx ();
3808 temp = copy_to_mode_reg (mode, op0);
3809 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3810 expand_inc (temp, GEN_INT (d - 1));
3811 emit_label (label);
3812 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3813 }
3814
3815 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3816 if that is convenient, and returning where the result is.
3817 You may request either the quotient or the remainder as the result;
3818 specify REM_FLAG nonzero to get the remainder.
3819
3820 CODE is the expression code for which kind of division this is;
3821 it controls how rounding is done. MODE is the machine mode to use.
3822 UNSIGNEDP nonzero means do unsigned division. */
3823
3824 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3825 and then correct it by or'ing in missing high bits
3826 if result of ANDI is nonzero.
3827 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3828 This could optimize to a bfexts instruction.
3829 But C doesn't use these operations, so their optimizations are
3830 left for later. */
3831 /* ??? For modulo, we don't actually need the highpart of the first product,
3832 the low part will do nicely. And for small divisors, the second multiply
3833 can also be a low-part only multiply or even be completely left out.
3834 E.g. to calculate the remainder of a division by 3 with a 32 bit
3835 multiply, multiply with 0x55555556 and extract the upper two bits;
3836 the result is exact for inputs up to 0x1fffffff.
3837 The input range can be reduced by using cross-sum rules.
3838 For odd divisors >= 3, the following table gives right shift counts
3839 so that if a number is shifted by an integer multiple of the given
3840 amount, the remainder stays the same:
3841 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3842 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3843 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3844 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3845 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3846
3847 Cross-sum rules for even numbers can be derived by leaving as many bits
3848 to the right alone as the divisor has zeros to the right.
3849 E.g. if x is an unsigned 32 bit number:
3850 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3851 */
3852
3853 rtx
expand_divmod(int rem_flag,enum tree_code code,enum machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)3854 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3855 rtx op0, rtx op1, rtx target, int unsignedp)
3856 {
3857 enum machine_mode compute_mode;
3858 rtx tquotient;
3859 rtx quotient = 0, remainder = 0;
3860 rtx last;
3861 int size;
3862 rtx insn;
3863 optab optab1, optab2;
3864 int op1_is_constant, op1_is_pow2 = 0;
3865 int max_cost, extra_cost;
3866 static HOST_WIDE_INT last_div_const = 0;
3867 static HOST_WIDE_INT ext_op1;
3868 bool speed = optimize_insn_for_speed_p ();
3869
3870 op1_is_constant = CONST_INT_P (op1);
3871 if (op1_is_constant)
3872 {
3873 ext_op1 = INTVAL (op1);
3874 if (unsignedp)
3875 ext_op1 &= GET_MODE_MASK (mode);
3876 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3877 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3878 }
3879
3880 /*
3881 This is the structure of expand_divmod:
3882
3883 First comes code to fix up the operands so we can perform the operations
3884 correctly and efficiently.
3885
3886 Second comes a switch statement with code specific for each rounding mode.
3887 For some special operands this code emits all RTL for the desired
3888 operation, for other cases, it generates only a quotient and stores it in
3889 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3890 to indicate that it has not done anything.
3891
3892 Last comes code that finishes the operation. If QUOTIENT is set and
3893 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3894 QUOTIENT is not set, it is computed using trunc rounding.
3895
3896 We try to generate special code for division and remainder when OP1 is a
3897 constant. If |OP1| = 2**n we can use shifts and some other fast
3898 operations. For other values of OP1, we compute a carefully selected
3899 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3900 by m.
3901
3902 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3903 half of the product. Different strategies for generating the product are
3904 implemented in expand_mult_highpart.
3905
3906 If what we actually want is the remainder, we generate that by another
3907 by-constant multiplication and a subtraction. */
3908
3909 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3910 code below will malfunction if we are, so check here and handle
3911 the special case if so. */
3912 if (op1 == const1_rtx)
3913 return rem_flag ? const0_rtx : op0;
3914
3915 /* When dividing by -1, we could get an overflow.
3916 negv_optab can handle overflows. */
3917 if (! unsignedp && op1 == constm1_rtx)
3918 {
3919 if (rem_flag)
3920 return const0_rtx;
3921 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3922 ? negv_optab : neg_optab, op0, target, 0);
3923 }
3924
3925 if (target
3926 /* Don't use the function value register as a target
3927 since we have to read it as well as write it,
3928 and function-inlining gets confused by this. */
3929 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3930 /* Don't clobber an operand while doing a multi-step calculation. */
3931 || ((rem_flag || op1_is_constant)
3932 && (reg_mentioned_p (target, op0)
3933 || (MEM_P (op0) && MEM_P (target))))
3934 || reg_mentioned_p (target, op1)
3935 || (MEM_P (op1) && MEM_P (target))))
3936 target = 0;
3937
3938 /* Get the mode in which to perform this computation. Normally it will
3939 be MODE, but sometimes we can't do the desired operation in MODE.
3940 If so, pick a wider mode in which we can do the operation. Convert
3941 to that mode at the start to avoid repeated conversions.
3942
3943 First see what operations we need. These depend on the expression
3944 we are evaluating. (We assume that divxx3 insns exist under the
3945 same conditions that modxx3 insns and that these insns don't normally
3946 fail. If these assumptions are not correct, we may generate less
3947 efficient code in some cases.)
3948
3949 Then see if we find a mode in which we can open-code that operation
3950 (either a division, modulus, or shift). Finally, check for the smallest
3951 mode for which we can do the operation with a library call. */
3952
3953 /* We might want to refine this now that we have division-by-constant
3954 optimization. Since expand_mult_highpart tries so many variants, it is
3955 not straightforward to generalize this. Maybe we should make an array
3956 of possible modes in init_expmed? Save this for GCC 2.7. */
3957
3958 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3959 ? (unsignedp ? lshr_optab : ashr_optab)
3960 : (unsignedp ? udiv_optab : sdiv_optab));
3961 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3962 ? optab1
3963 : (unsignedp ? udivmod_optab : sdivmod_optab));
3964
3965 for (compute_mode = mode; compute_mode != VOIDmode;
3966 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3967 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3968 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3969 break;
3970
3971 if (compute_mode == VOIDmode)
3972 for (compute_mode = mode; compute_mode != VOIDmode;
3973 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3974 if (optab_libfunc (optab1, compute_mode)
3975 || optab_libfunc (optab2, compute_mode))
3976 break;
3977
3978 /* If we still couldn't find a mode, use MODE, but expand_binop will
3979 probably die. */
3980 if (compute_mode == VOIDmode)
3981 compute_mode = mode;
3982
3983 if (target && GET_MODE (target) == compute_mode)
3984 tquotient = target;
3985 else
3986 tquotient = gen_reg_rtx (compute_mode);
3987
3988 size = GET_MODE_BITSIZE (compute_mode);
3989 #if 0
3990 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3991 (mode), and thereby get better code when OP1 is a constant. Do that
3992 later. It will require going over all usages of SIZE below. */
3993 size = GET_MODE_BITSIZE (mode);
3994 #endif
3995
3996 /* Only deduct something for a REM if the last divide done was
3997 for a different constant. Then set the constant of the last
3998 divide. */
3999 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
4000 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4001 && INTVAL (op1) == last_div_const))
4002 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
4003
4004 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4005
4006 /* Now convert to the best mode to use. */
4007 if (compute_mode != mode)
4008 {
4009 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4010 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4011
4012 /* convert_modes may have placed op1 into a register, so we
4013 must recompute the following. */
4014 op1_is_constant = CONST_INT_P (op1);
4015 op1_is_pow2 = (op1_is_constant
4016 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4017 || (! unsignedp
4018 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4019 }
4020
4021 /* If one of the operands is a volatile MEM, copy it into a register. */
4022
4023 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4024 op0 = force_reg (compute_mode, op0);
4025 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4026 op1 = force_reg (compute_mode, op1);
4027
4028 /* If we need the remainder or if OP1 is constant, we need to
4029 put OP0 in a register in case it has any queued subexpressions. */
4030 if (rem_flag || op1_is_constant)
4031 op0 = force_reg (compute_mode, op0);
4032
4033 last = get_last_insn ();
4034
4035 /* Promote floor rounding to trunc rounding for unsigned operations. */
4036 if (unsignedp)
4037 {
4038 if (code == FLOOR_DIV_EXPR)
4039 code = TRUNC_DIV_EXPR;
4040 if (code == FLOOR_MOD_EXPR)
4041 code = TRUNC_MOD_EXPR;
4042 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4043 code = TRUNC_DIV_EXPR;
4044 }
4045
4046 if (op1 != const0_rtx)
4047 switch (code)
4048 {
4049 case TRUNC_MOD_EXPR:
4050 case TRUNC_DIV_EXPR:
4051 if (op1_is_constant)
4052 {
4053 if (unsignedp)
4054 {
4055 unsigned HOST_WIDE_INT mh;
4056 int pre_shift, post_shift;
4057 int dummy;
4058 rtx ml;
4059 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4060 & GET_MODE_MASK (compute_mode));
4061
4062 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4063 {
4064 pre_shift = floor_log2 (d);
4065 if (rem_flag)
4066 {
4067 remainder
4068 = expand_binop (compute_mode, and_optab, op0,
4069 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4070 remainder, 1,
4071 OPTAB_LIB_WIDEN);
4072 if (remainder)
4073 return gen_lowpart (mode, remainder);
4074 }
4075 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4076 pre_shift, tquotient, 1);
4077 }
4078 else if (size <= HOST_BITS_PER_WIDE_INT)
4079 {
4080 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4081 {
4082 /* Most significant bit of divisor is set; emit an scc
4083 insn. */
4084 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4085 compute_mode, 1, 1);
4086 }
4087 else
4088 {
4089 /* Find a suitable multiplier and right shift count
4090 instead of multiplying with D. */
4091
4092 mh = choose_multiplier (d, size, size,
4093 &ml, &post_shift, &dummy);
4094
4095 /* If the suggested multiplier is more than SIZE bits,
4096 we can do better for even divisors, using an
4097 initial right shift. */
4098 if (mh != 0 && (d & 1) == 0)
4099 {
4100 pre_shift = floor_log2 (d & -d);
4101 mh = choose_multiplier (d >> pre_shift, size,
4102 size - pre_shift,
4103 &ml, &post_shift, &dummy);
4104 gcc_assert (!mh);
4105 }
4106 else
4107 pre_shift = 0;
4108
4109 if (mh != 0)
4110 {
4111 rtx t1, t2, t3, t4;
4112
4113 if (post_shift - 1 >= BITS_PER_WORD)
4114 goto fail1;
4115
4116 extra_cost
4117 = (shift_cost[speed][compute_mode][post_shift - 1]
4118 + shift_cost[speed][compute_mode][1]
4119 + 2 * add_cost[speed][compute_mode]);
4120 t1 = expand_mult_highpart (compute_mode, op0, ml,
4121 NULL_RTX, 1,
4122 max_cost - extra_cost);
4123 if (t1 == 0)
4124 goto fail1;
4125 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4126 op0, t1),
4127 NULL_RTX);
4128 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4129 t2, 1, NULL_RTX, 1);
4130 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4131 t1, t3),
4132 NULL_RTX);
4133 quotient = expand_shift
4134 (RSHIFT_EXPR, compute_mode, t4,
4135 post_shift - 1, tquotient, 1);
4136 }
4137 else
4138 {
4139 rtx t1, t2;
4140
4141 if (pre_shift >= BITS_PER_WORD
4142 || post_shift >= BITS_PER_WORD)
4143 goto fail1;
4144
4145 t1 = expand_shift
4146 (RSHIFT_EXPR, compute_mode, op0,
4147 pre_shift, NULL_RTX, 1);
4148 extra_cost
4149 = (shift_cost[speed][compute_mode][pre_shift]
4150 + shift_cost[speed][compute_mode][post_shift]);
4151 t2 = expand_mult_highpart (compute_mode, t1, ml,
4152 NULL_RTX, 1,
4153 max_cost - extra_cost);
4154 if (t2 == 0)
4155 goto fail1;
4156 quotient = expand_shift
4157 (RSHIFT_EXPR, compute_mode, t2,
4158 post_shift, tquotient, 1);
4159 }
4160 }
4161 }
4162 else /* Too wide mode to use tricky code */
4163 break;
4164
4165 insn = get_last_insn ();
4166 if (insn != last)
4167 set_dst_reg_note (insn, REG_EQUAL,
4168 gen_rtx_UDIV (compute_mode, op0, op1),
4169 quotient);
4170 }
4171 else /* TRUNC_DIV, signed */
4172 {
4173 unsigned HOST_WIDE_INT ml;
4174 int lgup, post_shift;
4175 rtx mlr;
4176 HOST_WIDE_INT d = INTVAL (op1);
4177 unsigned HOST_WIDE_INT abs_d;
4178
4179 /* Since d might be INT_MIN, we have to cast to
4180 unsigned HOST_WIDE_INT before negating to avoid
4181 undefined signed overflow. */
4182 abs_d = (d >= 0
4183 ? (unsigned HOST_WIDE_INT) d
4184 : - (unsigned HOST_WIDE_INT) d);
4185
4186 /* n rem d = n rem -d */
4187 if (rem_flag && d < 0)
4188 {
4189 d = abs_d;
4190 op1 = gen_int_mode (abs_d, compute_mode);
4191 }
4192
4193 if (d == 1)
4194 quotient = op0;
4195 else if (d == -1)
4196 quotient = expand_unop (compute_mode, neg_optab, op0,
4197 tquotient, 0);
4198 else if (HOST_BITS_PER_WIDE_INT >= size
4199 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4200 {
4201 /* This case is not handled correctly below. */
4202 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4203 compute_mode, 1, 1);
4204 if (quotient == 0)
4205 goto fail1;
4206 }
4207 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4208 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4209 : sdiv_pow2_cheap[speed][compute_mode])
4210 /* We assume that cheap metric is true if the
4211 optab has an expander for this mode. */
4212 && ((optab_handler ((rem_flag ? smod_optab
4213 : sdiv_optab),
4214 compute_mode)
4215 != CODE_FOR_nothing)
4216 || (optab_handler (sdivmod_optab,
4217 compute_mode)
4218 != CODE_FOR_nothing)))
4219 ;
4220 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4221 {
4222 if (rem_flag)
4223 {
4224 remainder = expand_smod_pow2 (compute_mode, op0, d);
4225 if (remainder)
4226 return gen_lowpart (mode, remainder);
4227 }
4228
4229 if (sdiv_pow2_cheap[speed][compute_mode]
4230 && ((optab_handler (sdiv_optab, compute_mode)
4231 != CODE_FOR_nothing)
4232 || (optab_handler (sdivmod_optab, compute_mode)
4233 != CODE_FOR_nothing)))
4234 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4235 compute_mode, op0,
4236 gen_int_mode (abs_d,
4237 compute_mode),
4238 NULL_RTX, 0);
4239 else
4240 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4241
4242 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4243 negate the quotient. */
4244 if (d < 0)
4245 {
4246 insn = get_last_insn ();
4247 if (insn != last
4248 && abs_d < ((unsigned HOST_WIDE_INT) 1
4249 << (HOST_BITS_PER_WIDE_INT - 1)))
4250 set_dst_reg_note (insn, REG_EQUAL,
4251 gen_rtx_DIV (compute_mode, op0,
4252 gen_int_mode
4253 (abs_d,
4254 compute_mode)),
4255 quotient);
4256
4257 quotient = expand_unop (compute_mode, neg_optab,
4258 quotient, quotient, 0);
4259 }
4260 }
4261 else if (size <= HOST_BITS_PER_WIDE_INT)
4262 {
4263 choose_multiplier (abs_d, size, size - 1,
4264 &mlr, &post_shift, &lgup);
4265 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4266 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4267 {
4268 rtx t1, t2, t3;
4269
4270 if (post_shift >= BITS_PER_WORD
4271 || size - 1 >= BITS_PER_WORD)
4272 goto fail1;
4273
4274 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4275 + shift_cost[speed][compute_mode][size - 1]
4276 + add_cost[speed][compute_mode]);
4277 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4278 NULL_RTX, 0,
4279 max_cost - extra_cost);
4280 if (t1 == 0)
4281 goto fail1;
4282 t2 = expand_shift
4283 (RSHIFT_EXPR, compute_mode, t1,
4284 post_shift, NULL_RTX, 0);
4285 t3 = expand_shift
4286 (RSHIFT_EXPR, compute_mode, op0,
4287 size - 1, NULL_RTX, 0);
4288 if (d < 0)
4289 quotient
4290 = force_operand (gen_rtx_MINUS (compute_mode,
4291 t3, t2),
4292 tquotient);
4293 else
4294 quotient
4295 = force_operand (gen_rtx_MINUS (compute_mode,
4296 t2, t3),
4297 tquotient);
4298 }
4299 else
4300 {
4301 rtx t1, t2, t3, t4;
4302
4303 if (post_shift >= BITS_PER_WORD
4304 || size - 1 >= BITS_PER_WORD)
4305 goto fail1;
4306
4307 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4308 mlr = gen_int_mode (ml, compute_mode);
4309 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4310 + shift_cost[speed][compute_mode][size - 1]
4311 + 2 * add_cost[speed][compute_mode]);
4312 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4313 NULL_RTX, 0,
4314 max_cost - extra_cost);
4315 if (t1 == 0)
4316 goto fail1;
4317 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4318 t1, op0),
4319 NULL_RTX);
4320 t3 = expand_shift
4321 (RSHIFT_EXPR, compute_mode, t2,
4322 post_shift, NULL_RTX, 0);
4323 t4 = expand_shift
4324 (RSHIFT_EXPR, compute_mode, op0,
4325 size - 1, NULL_RTX, 0);
4326 if (d < 0)
4327 quotient
4328 = force_operand (gen_rtx_MINUS (compute_mode,
4329 t4, t3),
4330 tquotient);
4331 else
4332 quotient
4333 = force_operand (gen_rtx_MINUS (compute_mode,
4334 t3, t4),
4335 tquotient);
4336 }
4337 }
4338 else /* Too wide mode to use tricky code */
4339 break;
4340
4341 insn = get_last_insn ();
4342 if (insn != last)
4343 set_dst_reg_note (insn, REG_EQUAL,
4344 gen_rtx_DIV (compute_mode, op0, op1),
4345 quotient);
4346 }
4347 break;
4348 }
4349 fail1:
4350 delete_insns_since (last);
4351 break;
4352
4353 case FLOOR_DIV_EXPR:
4354 case FLOOR_MOD_EXPR:
4355 /* We will come here only for signed operations. */
4356 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4357 {
4358 unsigned HOST_WIDE_INT mh;
4359 int pre_shift, lgup, post_shift;
4360 HOST_WIDE_INT d = INTVAL (op1);
4361 rtx ml;
4362
4363 if (d > 0)
4364 {
4365 /* We could just as easily deal with negative constants here,
4366 but it does not seem worth the trouble for GCC 2.6. */
4367 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4368 {
4369 pre_shift = floor_log2 (d);
4370 if (rem_flag)
4371 {
4372 remainder = expand_binop (compute_mode, and_optab, op0,
4373 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4374 remainder, 0, OPTAB_LIB_WIDEN);
4375 if (remainder)
4376 return gen_lowpart (mode, remainder);
4377 }
4378 quotient = expand_shift
4379 (RSHIFT_EXPR, compute_mode, op0,
4380 pre_shift, tquotient, 0);
4381 }
4382 else
4383 {
4384 rtx t1, t2, t3, t4;
4385
4386 mh = choose_multiplier (d, size, size - 1,
4387 &ml, &post_shift, &lgup);
4388 gcc_assert (!mh);
4389
4390 if (post_shift < BITS_PER_WORD
4391 && size - 1 < BITS_PER_WORD)
4392 {
4393 t1 = expand_shift
4394 (RSHIFT_EXPR, compute_mode, op0,
4395 size - 1, NULL_RTX, 0);
4396 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4397 NULL_RTX, 0, OPTAB_WIDEN);
4398 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4399 + shift_cost[speed][compute_mode][size - 1]
4400 + 2 * add_cost[speed][compute_mode]);
4401 t3 = expand_mult_highpart (compute_mode, t2, ml,
4402 NULL_RTX, 1,
4403 max_cost - extra_cost);
4404 if (t3 != 0)
4405 {
4406 t4 = expand_shift
4407 (RSHIFT_EXPR, compute_mode, t3,
4408 post_shift, NULL_RTX, 1);
4409 quotient = expand_binop (compute_mode, xor_optab,
4410 t4, t1, tquotient, 0,
4411 OPTAB_WIDEN);
4412 }
4413 }
4414 }
4415 }
4416 else
4417 {
4418 rtx nsign, t1, t2, t3, t4;
4419 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4420 op0, constm1_rtx), NULL_RTX);
4421 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4422 0, OPTAB_WIDEN);
4423 nsign = expand_shift
4424 (RSHIFT_EXPR, compute_mode, t2,
4425 size - 1, NULL_RTX, 0);
4426 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4427 NULL_RTX);
4428 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4429 NULL_RTX, 0);
4430 if (t4)
4431 {
4432 rtx t5;
4433 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4434 NULL_RTX, 0);
4435 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4436 t4, t5),
4437 tquotient);
4438 }
4439 }
4440 }
4441
4442 if (quotient != 0)
4443 break;
4444 delete_insns_since (last);
4445
4446 /* Try using an instruction that produces both the quotient and
4447 remainder, using truncation. We can easily compensate the quotient
4448 or remainder to get floor rounding, once we have the remainder.
4449 Notice that we compute also the final remainder value here,
4450 and return the result right away. */
4451 if (target == 0 || GET_MODE (target) != compute_mode)
4452 target = gen_reg_rtx (compute_mode);
4453
4454 if (rem_flag)
4455 {
4456 remainder
4457 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4458 quotient = gen_reg_rtx (compute_mode);
4459 }
4460 else
4461 {
4462 quotient
4463 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4464 remainder = gen_reg_rtx (compute_mode);
4465 }
4466
4467 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4468 quotient, remainder, 0))
4469 {
4470 /* This could be computed with a branch-less sequence.
4471 Save that for later. */
4472 rtx tem;
4473 rtx label = gen_label_rtx ();
4474 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4475 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4476 NULL_RTX, 0, OPTAB_WIDEN);
4477 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4478 expand_dec (quotient, const1_rtx);
4479 expand_inc (remainder, op1);
4480 emit_label (label);
4481 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4482 }
4483
4484 /* No luck with division elimination or divmod. Have to do it
4485 by conditionally adjusting op0 *and* the result. */
4486 {
4487 rtx label1, label2, label3, label4, label5;
4488 rtx adjusted_op0;
4489 rtx tem;
4490
4491 quotient = gen_reg_rtx (compute_mode);
4492 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4493 label1 = gen_label_rtx ();
4494 label2 = gen_label_rtx ();
4495 label3 = gen_label_rtx ();
4496 label4 = gen_label_rtx ();
4497 label5 = gen_label_rtx ();
4498 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4499 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4500 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4501 quotient, 0, OPTAB_LIB_WIDEN);
4502 if (tem != quotient)
4503 emit_move_insn (quotient, tem);
4504 emit_jump_insn (gen_jump (label5));
4505 emit_barrier ();
4506 emit_label (label1);
4507 expand_inc (adjusted_op0, const1_rtx);
4508 emit_jump_insn (gen_jump (label4));
4509 emit_barrier ();
4510 emit_label (label2);
4511 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4512 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4513 quotient, 0, OPTAB_LIB_WIDEN);
4514 if (tem != quotient)
4515 emit_move_insn (quotient, tem);
4516 emit_jump_insn (gen_jump (label5));
4517 emit_barrier ();
4518 emit_label (label3);
4519 expand_dec (adjusted_op0, const1_rtx);
4520 emit_label (label4);
4521 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4522 quotient, 0, OPTAB_LIB_WIDEN);
4523 if (tem != quotient)
4524 emit_move_insn (quotient, tem);
4525 expand_dec (quotient, const1_rtx);
4526 emit_label (label5);
4527 }
4528 break;
4529
4530 case CEIL_DIV_EXPR:
4531 case CEIL_MOD_EXPR:
4532 if (unsignedp)
4533 {
4534 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4535 {
4536 rtx t1, t2, t3;
4537 unsigned HOST_WIDE_INT d = INTVAL (op1);
4538 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4539 floor_log2 (d), tquotient, 1);
4540 t2 = expand_binop (compute_mode, and_optab, op0,
4541 GEN_INT (d - 1),
4542 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4543 t3 = gen_reg_rtx (compute_mode);
4544 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4545 compute_mode, 1, 1);
4546 if (t3 == 0)
4547 {
4548 rtx lab;
4549 lab = gen_label_rtx ();
4550 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4551 expand_inc (t1, const1_rtx);
4552 emit_label (lab);
4553 quotient = t1;
4554 }
4555 else
4556 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4557 t1, t3),
4558 tquotient);
4559 break;
4560 }
4561
4562 /* Try using an instruction that produces both the quotient and
4563 remainder, using truncation. We can easily compensate the
4564 quotient or remainder to get ceiling rounding, once we have the
4565 remainder. Notice that we compute also the final remainder
4566 value here, and return the result right away. */
4567 if (target == 0 || GET_MODE (target) != compute_mode)
4568 target = gen_reg_rtx (compute_mode);
4569
4570 if (rem_flag)
4571 {
4572 remainder = (REG_P (target)
4573 ? target : gen_reg_rtx (compute_mode));
4574 quotient = gen_reg_rtx (compute_mode);
4575 }
4576 else
4577 {
4578 quotient = (REG_P (target)
4579 ? target : gen_reg_rtx (compute_mode));
4580 remainder = gen_reg_rtx (compute_mode);
4581 }
4582
4583 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4584 remainder, 1))
4585 {
4586 /* This could be computed with a branch-less sequence.
4587 Save that for later. */
4588 rtx label = gen_label_rtx ();
4589 do_cmp_and_jump (remainder, const0_rtx, EQ,
4590 compute_mode, label);
4591 expand_inc (quotient, const1_rtx);
4592 expand_dec (remainder, op1);
4593 emit_label (label);
4594 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4595 }
4596
4597 /* No luck with division elimination or divmod. Have to do it
4598 by conditionally adjusting op0 *and* the result. */
4599 {
4600 rtx label1, label2;
4601 rtx adjusted_op0, tem;
4602
4603 quotient = gen_reg_rtx (compute_mode);
4604 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4605 label1 = gen_label_rtx ();
4606 label2 = gen_label_rtx ();
4607 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4608 compute_mode, label1);
4609 emit_move_insn (quotient, const0_rtx);
4610 emit_jump_insn (gen_jump (label2));
4611 emit_barrier ();
4612 emit_label (label1);
4613 expand_dec (adjusted_op0, const1_rtx);
4614 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4615 quotient, 1, OPTAB_LIB_WIDEN);
4616 if (tem != quotient)
4617 emit_move_insn (quotient, tem);
4618 expand_inc (quotient, const1_rtx);
4619 emit_label (label2);
4620 }
4621 }
4622 else /* signed */
4623 {
4624 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4625 && INTVAL (op1) >= 0)
4626 {
4627 /* This is extremely similar to the code for the unsigned case
4628 above. For 2.7 we should merge these variants, but for
4629 2.6.1 I don't want to touch the code for unsigned since that
4630 get used in C. The signed case will only be used by other
4631 languages (Ada). */
4632
4633 rtx t1, t2, t3;
4634 unsigned HOST_WIDE_INT d = INTVAL (op1);
4635 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4636 floor_log2 (d), tquotient, 0);
4637 t2 = expand_binop (compute_mode, and_optab, op0,
4638 GEN_INT (d - 1),
4639 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4640 t3 = gen_reg_rtx (compute_mode);
4641 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4642 compute_mode, 1, 1);
4643 if (t3 == 0)
4644 {
4645 rtx lab;
4646 lab = gen_label_rtx ();
4647 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4648 expand_inc (t1, const1_rtx);
4649 emit_label (lab);
4650 quotient = t1;
4651 }
4652 else
4653 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4654 t1, t3),
4655 tquotient);
4656 break;
4657 }
4658
4659 /* Try using an instruction that produces both the quotient and
4660 remainder, using truncation. We can easily compensate the
4661 quotient or remainder to get ceiling rounding, once we have the
4662 remainder. Notice that we compute also the final remainder
4663 value here, and return the result right away. */
4664 if (target == 0 || GET_MODE (target) != compute_mode)
4665 target = gen_reg_rtx (compute_mode);
4666 if (rem_flag)
4667 {
4668 remainder= (REG_P (target)
4669 ? target : gen_reg_rtx (compute_mode));
4670 quotient = gen_reg_rtx (compute_mode);
4671 }
4672 else
4673 {
4674 quotient = (REG_P (target)
4675 ? target : gen_reg_rtx (compute_mode));
4676 remainder = gen_reg_rtx (compute_mode);
4677 }
4678
4679 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4680 remainder, 0))
4681 {
4682 /* This could be computed with a branch-less sequence.
4683 Save that for later. */
4684 rtx tem;
4685 rtx label = gen_label_rtx ();
4686 do_cmp_and_jump (remainder, const0_rtx, EQ,
4687 compute_mode, label);
4688 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4689 NULL_RTX, 0, OPTAB_WIDEN);
4690 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4691 expand_inc (quotient, const1_rtx);
4692 expand_dec (remainder, op1);
4693 emit_label (label);
4694 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4695 }
4696
4697 /* No luck with division elimination or divmod. Have to do it
4698 by conditionally adjusting op0 *and* the result. */
4699 {
4700 rtx label1, label2, label3, label4, label5;
4701 rtx adjusted_op0;
4702 rtx tem;
4703
4704 quotient = gen_reg_rtx (compute_mode);
4705 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4706 label1 = gen_label_rtx ();
4707 label2 = gen_label_rtx ();
4708 label3 = gen_label_rtx ();
4709 label4 = gen_label_rtx ();
4710 label5 = gen_label_rtx ();
4711 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4712 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4713 compute_mode, label1);
4714 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4715 quotient, 0, OPTAB_LIB_WIDEN);
4716 if (tem != quotient)
4717 emit_move_insn (quotient, tem);
4718 emit_jump_insn (gen_jump (label5));
4719 emit_barrier ();
4720 emit_label (label1);
4721 expand_dec (adjusted_op0, const1_rtx);
4722 emit_jump_insn (gen_jump (label4));
4723 emit_barrier ();
4724 emit_label (label2);
4725 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4726 compute_mode, label3);
4727 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4728 quotient, 0, OPTAB_LIB_WIDEN);
4729 if (tem != quotient)
4730 emit_move_insn (quotient, tem);
4731 emit_jump_insn (gen_jump (label5));
4732 emit_barrier ();
4733 emit_label (label3);
4734 expand_inc (adjusted_op0, const1_rtx);
4735 emit_label (label4);
4736 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4737 quotient, 0, OPTAB_LIB_WIDEN);
4738 if (tem != quotient)
4739 emit_move_insn (quotient, tem);
4740 expand_inc (quotient, const1_rtx);
4741 emit_label (label5);
4742 }
4743 }
4744 break;
4745
4746 case EXACT_DIV_EXPR:
4747 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4748 {
4749 HOST_WIDE_INT d = INTVAL (op1);
4750 unsigned HOST_WIDE_INT ml;
4751 int pre_shift;
4752 rtx t1;
4753
4754 pre_shift = floor_log2 (d & -d);
4755 ml = invert_mod2n (d >> pre_shift, size);
4756 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4757 pre_shift, NULL_RTX, unsignedp);
4758 quotient = expand_mult (compute_mode, t1,
4759 gen_int_mode (ml, compute_mode),
4760 NULL_RTX, 1);
4761
4762 insn = get_last_insn ();
4763 set_dst_reg_note (insn, REG_EQUAL,
4764 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4765 compute_mode, op0, op1),
4766 quotient);
4767 }
4768 break;
4769
4770 case ROUND_DIV_EXPR:
4771 case ROUND_MOD_EXPR:
4772 if (unsignedp)
4773 {
4774 rtx tem;
4775 rtx label;
4776 label = gen_label_rtx ();
4777 quotient = gen_reg_rtx (compute_mode);
4778 remainder = gen_reg_rtx (compute_mode);
4779 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4780 {
4781 rtx tem;
4782 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4783 quotient, 1, OPTAB_LIB_WIDEN);
4784 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4785 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4786 remainder, 1, OPTAB_LIB_WIDEN);
4787 }
4788 tem = plus_constant (op1, -1);
4789 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4790 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4791 expand_inc (quotient, const1_rtx);
4792 expand_dec (remainder, op1);
4793 emit_label (label);
4794 }
4795 else
4796 {
4797 rtx abs_rem, abs_op1, tem, mask;
4798 rtx label;
4799 label = gen_label_rtx ();
4800 quotient = gen_reg_rtx (compute_mode);
4801 remainder = gen_reg_rtx (compute_mode);
4802 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4803 {
4804 rtx tem;
4805 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4806 quotient, 0, OPTAB_LIB_WIDEN);
4807 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4808 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4809 remainder, 0, OPTAB_LIB_WIDEN);
4810 }
4811 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4812 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4813 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4814 1, NULL_RTX, 1);
4815 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4816 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4817 NULL_RTX, 0, OPTAB_WIDEN);
4818 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4819 size - 1, NULL_RTX, 0);
4820 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4821 NULL_RTX, 0, OPTAB_WIDEN);
4822 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4823 NULL_RTX, 0, OPTAB_WIDEN);
4824 expand_inc (quotient, tem);
4825 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4826 NULL_RTX, 0, OPTAB_WIDEN);
4827 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4828 NULL_RTX, 0, OPTAB_WIDEN);
4829 expand_dec (remainder, tem);
4830 emit_label (label);
4831 }
4832 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4833
4834 default:
4835 gcc_unreachable ();
4836 }
4837
4838 if (quotient == 0)
4839 {
4840 if (target && GET_MODE (target) != compute_mode)
4841 target = 0;
4842
4843 if (rem_flag)
4844 {
4845 /* Try to produce the remainder without producing the quotient.
4846 If we seem to have a divmod pattern that does not require widening,
4847 don't try widening here. We should really have a WIDEN argument
4848 to expand_twoval_binop, since what we'd really like to do here is
4849 1) try a mod insn in compute_mode
4850 2) try a divmod insn in compute_mode
4851 3) try a div insn in compute_mode and multiply-subtract to get
4852 remainder
4853 4) try the same things with widening allowed. */
4854 remainder
4855 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4856 op0, op1, target,
4857 unsignedp,
4858 ((optab_handler (optab2, compute_mode)
4859 != CODE_FOR_nothing)
4860 ? OPTAB_DIRECT : OPTAB_WIDEN));
4861 if (remainder == 0)
4862 {
4863 /* No luck there. Can we do remainder and divide at once
4864 without a library call? */
4865 remainder = gen_reg_rtx (compute_mode);
4866 if (! expand_twoval_binop ((unsignedp
4867 ? udivmod_optab
4868 : sdivmod_optab),
4869 op0, op1,
4870 NULL_RTX, remainder, unsignedp))
4871 remainder = 0;
4872 }
4873
4874 if (remainder)
4875 return gen_lowpart (mode, remainder);
4876 }
4877
4878 /* Produce the quotient. Try a quotient insn, but not a library call.
4879 If we have a divmod in this mode, use it in preference to widening
4880 the div (for this test we assume it will not fail). Note that optab2
4881 is set to the one of the two optabs that the call below will use. */
4882 quotient
4883 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4884 op0, op1, rem_flag ? NULL_RTX : target,
4885 unsignedp,
4886 ((optab_handler (optab2, compute_mode)
4887 != CODE_FOR_nothing)
4888 ? OPTAB_DIRECT : OPTAB_WIDEN));
4889
4890 if (quotient == 0)
4891 {
4892 /* No luck there. Try a quotient-and-remainder insn,
4893 keeping the quotient alone. */
4894 quotient = gen_reg_rtx (compute_mode);
4895 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4896 op0, op1,
4897 quotient, NULL_RTX, unsignedp))
4898 {
4899 quotient = 0;
4900 if (! rem_flag)
4901 /* Still no luck. If we are not computing the remainder,
4902 use a library call for the quotient. */
4903 quotient = sign_expand_binop (compute_mode,
4904 udiv_optab, sdiv_optab,
4905 op0, op1, target,
4906 unsignedp, OPTAB_LIB_WIDEN);
4907 }
4908 }
4909 }
4910
4911 if (rem_flag)
4912 {
4913 if (target && GET_MODE (target) != compute_mode)
4914 target = 0;
4915
4916 if (quotient == 0)
4917 {
4918 /* No divide instruction either. Use library for remainder. */
4919 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4920 op0, op1, target,
4921 unsignedp, OPTAB_LIB_WIDEN);
4922 /* No remainder function. Try a quotient-and-remainder
4923 function, keeping the remainder. */
4924 if (!remainder)
4925 {
4926 remainder = gen_reg_rtx (compute_mode);
4927 if (!expand_twoval_binop_libfunc
4928 (unsignedp ? udivmod_optab : sdivmod_optab,
4929 op0, op1,
4930 NULL_RTX, remainder,
4931 unsignedp ? UMOD : MOD))
4932 remainder = NULL_RTX;
4933 }
4934 }
4935 else
4936 {
4937 /* We divided. Now finish doing X - Y * (X / Y). */
4938 remainder = expand_mult (compute_mode, quotient, op1,
4939 NULL_RTX, unsignedp);
4940 remainder = expand_binop (compute_mode, sub_optab, op0,
4941 remainder, target, unsignedp,
4942 OPTAB_LIB_WIDEN);
4943 }
4944 }
4945
4946 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4947 }
4948
4949 /* Return a tree node with data type TYPE, describing the value of X.
4950 Usually this is an VAR_DECL, if there is no obvious better choice.
4951 X may be an expression, however we only support those expressions
4952 generated by loop.c. */
4953
4954 tree
make_tree(tree type,rtx x)4955 make_tree (tree type, rtx x)
4956 {
4957 tree t;
4958
4959 switch (GET_CODE (x))
4960 {
4961 case CONST_INT:
4962 {
4963 HOST_WIDE_INT hi = 0;
4964
4965 if (INTVAL (x) < 0
4966 && !(TYPE_UNSIGNED (type)
4967 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4968 < HOST_BITS_PER_WIDE_INT)))
4969 hi = -1;
4970
4971 t = build_int_cst_wide (type, INTVAL (x), hi);
4972
4973 return t;
4974 }
4975
4976 case CONST_DOUBLE:
4977 if (GET_MODE (x) == VOIDmode)
4978 t = build_int_cst_wide (type,
4979 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4980 else
4981 {
4982 REAL_VALUE_TYPE d;
4983
4984 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4985 t = build_real (type, d);
4986 }
4987
4988 return t;
4989
4990 case CONST_VECTOR:
4991 {
4992 int units = CONST_VECTOR_NUNITS (x);
4993 tree itype = TREE_TYPE (type);
4994 tree t = NULL_TREE;
4995 int i;
4996
4997
4998 /* Build a tree with vector elements. */
4999 for (i = units - 1; i >= 0; --i)
5000 {
5001 rtx elt = CONST_VECTOR_ELT (x, i);
5002 t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
5003 }
5004
5005 return build_vector (type, t);
5006 }
5007
5008 case PLUS:
5009 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5010 make_tree (type, XEXP (x, 1)));
5011
5012 case MINUS:
5013 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5014 make_tree (type, XEXP (x, 1)));
5015
5016 case NEG:
5017 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5018
5019 case MULT:
5020 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5021 make_tree (type, XEXP (x, 1)));
5022
5023 case ASHIFT:
5024 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5025 make_tree (type, XEXP (x, 1)));
5026
5027 case LSHIFTRT:
5028 t = unsigned_type_for (type);
5029 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5030 make_tree (t, XEXP (x, 0)),
5031 make_tree (type, XEXP (x, 1))));
5032
5033 case ASHIFTRT:
5034 t = signed_type_for (type);
5035 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5036 make_tree (t, XEXP (x, 0)),
5037 make_tree (type, XEXP (x, 1))));
5038
5039 case DIV:
5040 if (TREE_CODE (type) != REAL_TYPE)
5041 t = signed_type_for (type);
5042 else
5043 t = type;
5044
5045 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5046 make_tree (t, XEXP (x, 0)),
5047 make_tree (t, XEXP (x, 1))));
5048 case UDIV:
5049 t = unsigned_type_for (type);
5050 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5051 make_tree (t, XEXP (x, 0)),
5052 make_tree (t, XEXP (x, 1))));
5053
5054 case SIGN_EXTEND:
5055 case ZERO_EXTEND:
5056 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5057 GET_CODE (x) == ZERO_EXTEND);
5058 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5059
5060 case CONST:
5061 return make_tree (type, XEXP (x, 0));
5062
5063 case SYMBOL_REF:
5064 t = SYMBOL_REF_DECL (x);
5065 if (t)
5066 return fold_convert (type, build_fold_addr_expr (t));
5067 /* else fall through. */
5068
5069 default:
5070 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5071
5072 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5073 address mode to pointer mode. */
5074 if (POINTER_TYPE_P (type))
5075 x = convert_memory_address_addr_space
5076 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5077
5078 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5079 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5080 t->decl_with_rtl.rtl = x;
5081
5082 return t;
5083 }
5084 }
5085
5086 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5087 and returning TARGET.
5088
5089 If TARGET is 0, a pseudo-register or constant is returned. */
5090
5091 rtx
expand_and(enum machine_mode mode,rtx op0,rtx op1,rtx target)5092 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5093 {
5094 rtx tem = 0;
5095
5096 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5097 tem = simplify_binary_operation (AND, mode, op0, op1);
5098 if (tem == 0)
5099 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5100
5101 if (target == 0)
5102 target = tem;
5103 else if (tem != target)
5104 emit_move_insn (target, tem);
5105 return target;
5106 }
5107
5108 /* Helper function for emit_store_flag. */
5109 static rtx
emit_cstore(rtx target,enum insn_code icode,enum rtx_code code,enum machine_mode mode,enum machine_mode compare_mode,int unsignedp,rtx x,rtx y,int normalizep,enum machine_mode target_mode)5110 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5111 enum machine_mode mode, enum machine_mode compare_mode,
5112 int unsignedp, rtx x, rtx y, int normalizep,
5113 enum machine_mode target_mode)
5114 {
5115 struct expand_operand ops[4];
5116 rtx op0, last, comparison, subtarget;
5117 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5118
5119 last = get_last_insn ();
5120 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5121 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5122 if (!x || !y)
5123 {
5124 delete_insns_since (last);
5125 return NULL_RTX;
5126 }
5127
5128 if (target_mode == VOIDmode)
5129 target_mode = result_mode;
5130 if (!target)
5131 target = gen_reg_rtx (target_mode);
5132
5133 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5134
5135 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5136 create_fixed_operand (&ops[1], comparison);
5137 create_fixed_operand (&ops[2], x);
5138 create_fixed_operand (&ops[3], y);
5139 if (!maybe_expand_insn (icode, 4, ops))
5140 {
5141 delete_insns_since (last);
5142 return NULL_RTX;
5143 }
5144 subtarget = ops[0].value;
5145
5146 /* If we are converting to a wider mode, first convert to
5147 TARGET_MODE, then normalize. This produces better combining
5148 opportunities on machines that have a SIGN_EXTRACT when we are
5149 testing a single bit. This mostly benefits the 68k.
5150
5151 If STORE_FLAG_VALUE does not have the sign bit set when
5152 interpreted in MODE, we can do this conversion as unsigned, which
5153 is usually more efficient. */
5154 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5155 {
5156 convert_move (target, subtarget,
5157 val_signbit_known_clear_p (result_mode,
5158 STORE_FLAG_VALUE));
5159 op0 = target;
5160 result_mode = target_mode;
5161 }
5162 else
5163 op0 = subtarget;
5164
5165 /* If we want to keep subexpressions around, don't reuse our last
5166 target. */
5167 if (optimize)
5168 subtarget = 0;
5169
5170 /* Now normalize to the proper value in MODE. Sometimes we don't
5171 have to do anything. */
5172 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5173 ;
5174 /* STORE_FLAG_VALUE might be the most negative number, so write
5175 the comparison this way to avoid a compiler-time warning. */
5176 else if (- normalizep == STORE_FLAG_VALUE)
5177 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5178
5179 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5180 it hard to use a value of just the sign bit due to ANSI integer
5181 constant typing rules. */
5182 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5183 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5184 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5185 normalizep == 1);
5186 else
5187 {
5188 gcc_assert (STORE_FLAG_VALUE & 1);
5189
5190 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5191 if (normalizep == -1)
5192 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5193 }
5194
5195 /* If we were converting to a smaller mode, do the conversion now. */
5196 if (target_mode != result_mode)
5197 {
5198 convert_move (target, op0, 0);
5199 return target;
5200 }
5201 else
5202 return op0;
5203 }
5204
5205
5206 /* A subroutine of emit_store_flag only including "tricks" that do not
5207 need a recursive call. These are kept separate to avoid infinite
5208 loops. */
5209
5210 static rtx
emit_store_flag_1(rtx target,enum rtx_code code,rtx op0,rtx op1,enum machine_mode mode,int unsignedp,int normalizep,enum machine_mode target_mode)5211 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5212 enum machine_mode mode, int unsignedp, int normalizep,
5213 enum machine_mode target_mode)
5214 {
5215 rtx subtarget;
5216 enum insn_code icode;
5217 enum machine_mode compare_mode;
5218 enum mode_class mclass;
5219 enum rtx_code scode;
5220 rtx tem;
5221
5222 if (unsignedp)
5223 code = unsigned_condition (code);
5224 scode = swap_condition (code);
5225
5226 /* If one operand is constant, make it the second one. Only do this
5227 if the other operand is not constant as well. */
5228
5229 if (swap_commutative_operands_p (op0, op1))
5230 {
5231 tem = op0;
5232 op0 = op1;
5233 op1 = tem;
5234 code = swap_condition (code);
5235 }
5236
5237 if (mode == VOIDmode)
5238 mode = GET_MODE (op0);
5239
5240 /* For some comparisons with 1 and -1, we can convert this to
5241 comparisons with zero. This will often produce more opportunities for
5242 store-flag insns. */
5243
5244 switch (code)
5245 {
5246 case LT:
5247 if (op1 == const1_rtx)
5248 op1 = const0_rtx, code = LE;
5249 break;
5250 case LE:
5251 if (op1 == constm1_rtx)
5252 op1 = const0_rtx, code = LT;
5253 break;
5254 case GE:
5255 if (op1 == const1_rtx)
5256 op1 = const0_rtx, code = GT;
5257 break;
5258 case GT:
5259 if (op1 == constm1_rtx)
5260 op1 = const0_rtx, code = GE;
5261 break;
5262 case GEU:
5263 if (op1 == const1_rtx)
5264 op1 = const0_rtx, code = NE;
5265 break;
5266 case LTU:
5267 if (op1 == const1_rtx)
5268 op1 = const0_rtx, code = EQ;
5269 break;
5270 default:
5271 break;
5272 }
5273
5274 /* If we are comparing a double-word integer with zero or -1, we can
5275 convert the comparison into one involving a single word. */
5276 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5277 && GET_MODE_CLASS (mode) == MODE_INT
5278 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5279 {
5280 if ((code == EQ || code == NE)
5281 && (op1 == const0_rtx || op1 == constm1_rtx))
5282 {
5283 rtx op00, op01;
5284
5285 /* Do a logical OR or AND of the two words and compare the
5286 result. */
5287 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5288 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5289 tem = expand_binop (word_mode,
5290 op1 == const0_rtx ? ior_optab : and_optab,
5291 op00, op01, NULL_RTX, unsignedp,
5292 OPTAB_DIRECT);
5293
5294 if (tem != 0)
5295 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5296 unsignedp, normalizep);
5297 }
5298 else if ((code == LT || code == GE) && op1 == const0_rtx)
5299 {
5300 rtx op0h;
5301
5302 /* If testing the sign bit, can just test on high word. */
5303 op0h = simplify_gen_subreg (word_mode, op0, mode,
5304 subreg_highpart_offset (word_mode,
5305 mode));
5306 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5307 unsignedp, normalizep);
5308 }
5309 else
5310 tem = NULL_RTX;
5311
5312 if (tem)
5313 {
5314 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5315 return tem;
5316 if (!target)
5317 target = gen_reg_rtx (target_mode);
5318
5319 convert_move (target, tem,
5320 !val_signbit_known_set_p (word_mode,
5321 (normalizep ? normalizep
5322 : STORE_FLAG_VALUE)));
5323 return target;
5324 }
5325 }
5326
5327 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5328 complement of A (for GE) and shifting the sign bit to the low bit. */
5329 if (op1 == const0_rtx && (code == LT || code == GE)
5330 && GET_MODE_CLASS (mode) == MODE_INT
5331 && (normalizep || STORE_FLAG_VALUE == 1
5332 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5333 {
5334 subtarget = target;
5335
5336 if (!target)
5337 target_mode = mode;
5338
5339 /* If the result is to be wider than OP0, it is best to convert it
5340 first. If it is to be narrower, it is *incorrect* to convert it
5341 first. */
5342 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5343 {
5344 op0 = convert_modes (target_mode, mode, op0, 0);
5345 mode = target_mode;
5346 }
5347
5348 if (target_mode != mode)
5349 subtarget = 0;
5350
5351 if (code == GE)
5352 op0 = expand_unop (mode, one_cmpl_optab, op0,
5353 ((STORE_FLAG_VALUE == 1 || normalizep)
5354 ? 0 : subtarget), 0);
5355
5356 if (STORE_FLAG_VALUE == 1 || normalizep)
5357 /* If we are supposed to produce a 0/1 value, we want to do
5358 a logical shift from the sign bit to the low-order bit; for
5359 a -1/0 value, we do an arithmetic shift. */
5360 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5361 GET_MODE_BITSIZE (mode) - 1,
5362 subtarget, normalizep != -1);
5363
5364 if (mode != target_mode)
5365 op0 = convert_modes (target_mode, mode, op0, 0);
5366
5367 return op0;
5368 }
5369
5370 mclass = GET_MODE_CLASS (mode);
5371 for (compare_mode = mode; compare_mode != VOIDmode;
5372 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5373 {
5374 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5375 icode = optab_handler (cstore_optab, optab_mode);
5376 if (icode != CODE_FOR_nothing)
5377 {
5378 do_pending_stack_adjust ();
5379 tem = emit_cstore (target, icode, code, mode, compare_mode,
5380 unsignedp, op0, op1, normalizep, target_mode);
5381 if (tem)
5382 return tem;
5383
5384 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5385 {
5386 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5387 unsignedp, op1, op0, normalizep, target_mode);
5388 if (tem)
5389 return tem;
5390 }
5391 break;
5392 }
5393 }
5394
5395 return 0;
5396 }
5397
5398 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5399 and storing in TARGET. Normally return TARGET.
5400 Return 0 if that cannot be done.
5401
5402 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5403 it is VOIDmode, they cannot both be CONST_INT.
5404
5405 UNSIGNEDP is for the case where we have to widen the operands
5406 to perform the operation. It says to use zero-extension.
5407
5408 NORMALIZEP is 1 if we should convert the result to be either zero
5409 or one. Normalize is -1 if we should convert the result to be
5410 either zero or -1. If NORMALIZEP is zero, the result will be left
5411 "raw" out of the scc insn. */
5412
5413 rtx
emit_store_flag(rtx target,enum rtx_code code,rtx op0,rtx op1,enum machine_mode mode,int unsignedp,int normalizep)5414 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5415 enum machine_mode mode, int unsignedp, int normalizep)
5416 {
5417 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5418 enum rtx_code rcode;
5419 rtx subtarget;
5420 rtx tem, last, trueval;
5421
5422 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5423 target_mode);
5424 if (tem)
5425 return tem;
5426
5427 /* If we reached here, we can't do this with a scc insn, however there
5428 are some comparisons that can be done in other ways. Don't do any
5429 of these cases if branches are very cheap. */
5430 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5431 return 0;
5432
5433 /* See what we need to return. We can only return a 1, -1, or the
5434 sign bit. */
5435
5436 if (normalizep == 0)
5437 {
5438 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5439 normalizep = STORE_FLAG_VALUE;
5440
5441 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5442 ;
5443 else
5444 return 0;
5445 }
5446
5447 last = get_last_insn ();
5448
5449 /* If optimizing, use different pseudo registers for each insn, instead
5450 of reusing the same pseudo. This leads to better CSE, but slows
5451 down the compiler, since there are more pseudos */
5452 subtarget = (!optimize
5453 && (target_mode == mode)) ? target : NULL_RTX;
5454 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5455
5456 /* For floating-point comparisons, try the reverse comparison or try
5457 changing the "orderedness" of the comparison. */
5458 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5459 {
5460 enum rtx_code first_code;
5461 bool and_them;
5462
5463 rcode = reverse_condition_maybe_unordered (code);
5464 if (can_compare_p (rcode, mode, ccp_store_flag)
5465 && (code == ORDERED || code == UNORDERED
5466 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5467 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5468 {
5469 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5470 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5471
5472 /* For the reverse comparison, use either an addition or a XOR. */
5473 if (want_add
5474 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5475 optimize_insn_for_speed_p ()) == 0)
5476 {
5477 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5478 STORE_FLAG_VALUE, target_mode);
5479 if (tem)
5480 return expand_binop (target_mode, add_optab, tem,
5481 GEN_INT (normalizep),
5482 target, 0, OPTAB_WIDEN);
5483 }
5484 else if (!want_add
5485 && rtx_cost (trueval, XOR, 1,
5486 optimize_insn_for_speed_p ()) == 0)
5487 {
5488 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5489 normalizep, target_mode);
5490 if (tem)
5491 return expand_binop (target_mode, xor_optab, tem, trueval,
5492 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5493 }
5494 }
5495
5496 delete_insns_since (last);
5497
5498 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5499 if (code == ORDERED || code == UNORDERED)
5500 return 0;
5501
5502 and_them = split_comparison (code, mode, &first_code, &code);
5503
5504 /* If there are no NaNs, the first comparison should always fall through.
5505 Effectively change the comparison to the other one. */
5506 if (!HONOR_NANS (mode))
5507 {
5508 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5509 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5510 target_mode);
5511 }
5512
5513 #ifdef HAVE_conditional_move
5514 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5515 conditional move. */
5516 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5517 normalizep, target_mode);
5518 if (tem == 0)
5519 return 0;
5520
5521 if (and_them)
5522 tem = emit_conditional_move (target, code, op0, op1, mode,
5523 tem, const0_rtx, GET_MODE (tem), 0);
5524 else
5525 tem = emit_conditional_move (target, code, op0, op1, mode,
5526 trueval, tem, GET_MODE (tem), 0);
5527
5528 if (tem == 0)
5529 delete_insns_since (last);
5530 return tem;
5531 #else
5532 return 0;
5533 #endif
5534 }
5535
5536 /* The remaining tricks only apply to integer comparisons. */
5537
5538 if (GET_MODE_CLASS (mode) != MODE_INT)
5539 return 0;
5540
5541 /* If this is an equality comparison of integers, we can try to exclusive-or
5542 (or subtract) the two operands and use a recursive call to try the
5543 comparison with zero. Don't do any of these cases if branches are
5544 very cheap. */
5545
5546 if ((code == EQ || code == NE) && op1 != const0_rtx)
5547 {
5548 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5549 OPTAB_WIDEN);
5550
5551 if (tem == 0)
5552 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5553 OPTAB_WIDEN);
5554 if (tem != 0)
5555 tem = emit_store_flag (target, code, tem, const0_rtx,
5556 mode, unsignedp, normalizep);
5557 if (tem != 0)
5558 return tem;
5559
5560 delete_insns_since (last);
5561 }
5562
5563 /* For integer comparisons, try the reverse comparison. However, for
5564 small X and if we'd have anyway to extend, implementing "X != 0"
5565 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5566 rcode = reverse_condition (code);
5567 if (can_compare_p (rcode, mode, ccp_store_flag)
5568 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5569 && code == NE
5570 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5571 && op1 == const0_rtx))
5572 {
5573 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5574 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5575
5576 /* Again, for the reverse comparison, use either an addition or a XOR. */
5577 if (want_add
5578 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5579 optimize_insn_for_speed_p ()) == 0)
5580 {
5581 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5582 STORE_FLAG_VALUE, target_mode);
5583 if (tem != 0)
5584 tem = expand_binop (target_mode, add_optab, tem,
5585 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5586 }
5587 else if (!want_add
5588 && rtx_cost (trueval, XOR, 1,
5589 optimize_insn_for_speed_p ()) == 0)
5590 {
5591 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5592 normalizep, target_mode);
5593 if (tem != 0)
5594 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5595 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5596 }
5597
5598 if (tem != 0)
5599 return tem;
5600 delete_insns_since (last);
5601 }
5602
5603 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5604 the constant zero. Reject all other comparisons at this point. Only
5605 do LE and GT if branches are expensive since they are expensive on
5606 2-operand machines. */
5607
5608 if (op1 != const0_rtx
5609 || (code != EQ && code != NE
5610 && (BRANCH_COST (optimize_insn_for_speed_p (),
5611 false) <= 1 || (code != LE && code != GT))))
5612 return 0;
5613
5614 /* Try to put the result of the comparison in the sign bit. Assume we can't
5615 do the necessary operation below. */
5616
5617 tem = 0;
5618
5619 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5620 the sign bit set. */
5621
5622 if (code == LE)
5623 {
5624 /* This is destructive, so SUBTARGET can't be OP0. */
5625 if (rtx_equal_p (subtarget, op0))
5626 subtarget = 0;
5627
5628 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5629 OPTAB_WIDEN);
5630 if (tem)
5631 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5632 OPTAB_WIDEN);
5633 }
5634
5635 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5636 number of bits in the mode of OP0, minus one. */
5637
5638 if (code == GT)
5639 {
5640 if (rtx_equal_p (subtarget, op0))
5641 subtarget = 0;
5642
5643 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5644 GET_MODE_BITSIZE (mode) - 1,
5645 subtarget, 0);
5646 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5647 OPTAB_WIDEN);
5648 }
5649
5650 if (code == EQ || code == NE)
5651 {
5652 /* For EQ or NE, one way to do the comparison is to apply an operation
5653 that converts the operand into a positive number if it is nonzero
5654 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5655 for NE we negate. This puts the result in the sign bit. Then we
5656 normalize with a shift, if needed.
5657
5658 Two operations that can do the above actions are ABS and FFS, so try
5659 them. If that doesn't work, and MODE is smaller than a full word,
5660 we can use zero-extension to the wider mode (an unsigned conversion)
5661 as the operation. */
5662
5663 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5664 that is compensated by the subsequent overflow when subtracting
5665 one / negating. */
5666
5667 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5668 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5669 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5670 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5671 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5672 {
5673 tem = convert_modes (word_mode, mode, op0, 1);
5674 mode = word_mode;
5675 }
5676
5677 if (tem != 0)
5678 {
5679 if (code == EQ)
5680 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5681 0, OPTAB_WIDEN);
5682 else
5683 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5684 }
5685
5686 /* If we couldn't do it that way, for NE we can "or" the two's complement
5687 of the value with itself. For EQ, we take the one's complement of
5688 that "or", which is an extra insn, so we only handle EQ if branches
5689 are expensive. */
5690
5691 if (tem == 0
5692 && (code == NE
5693 || BRANCH_COST (optimize_insn_for_speed_p (),
5694 false) > 1))
5695 {
5696 if (rtx_equal_p (subtarget, op0))
5697 subtarget = 0;
5698
5699 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5700 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5701 OPTAB_WIDEN);
5702
5703 if (tem && code == EQ)
5704 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5705 }
5706 }
5707
5708 if (tem && normalizep)
5709 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5710 GET_MODE_BITSIZE (mode) - 1,
5711 subtarget, normalizep == 1);
5712
5713 if (tem)
5714 {
5715 if (!target)
5716 ;
5717 else if (GET_MODE (tem) != target_mode)
5718 {
5719 convert_move (target, tem, 0);
5720 tem = target;
5721 }
5722 else if (!subtarget)
5723 {
5724 emit_move_insn (target, tem);
5725 tem = target;
5726 }
5727 }
5728 else
5729 delete_insns_since (last);
5730
5731 return tem;
5732 }
5733
5734 /* Like emit_store_flag, but always succeeds. */
5735
5736 rtx
emit_store_flag_force(rtx target,enum rtx_code code,rtx op0,rtx op1,enum machine_mode mode,int unsignedp,int normalizep)5737 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5738 enum machine_mode mode, int unsignedp, int normalizep)
5739 {
5740 rtx tem, label;
5741 rtx trueval, falseval;
5742
5743 /* First see if emit_store_flag can do the job. */
5744 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5745 if (tem != 0)
5746 return tem;
5747
5748 if (!target)
5749 target = gen_reg_rtx (word_mode);
5750
5751 /* If this failed, we have to do this with set/compare/jump/set code.
5752 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5753 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5754 if (code == NE
5755 && GET_MODE_CLASS (mode) == MODE_INT
5756 && REG_P (target)
5757 && op0 == target
5758 && op1 == const0_rtx)
5759 {
5760 label = gen_label_rtx ();
5761 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5762 mode, NULL_RTX, NULL_RTX, label, -1);
5763 emit_move_insn (target, trueval);
5764 emit_label (label);
5765 return target;
5766 }
5767
5768 if (!REG_P (target)
5769 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5770 target = gen_reg_rtx (GET_MODE (target));
5771
5772 /* Jump in the right direction if the target cannot implement CODE
5773 but can jump on its reverse condition. */
5774 falseval = const0_rtx;
5775 if (! can_compare_p (code, mode, ccp_jump)
5776 && (! FLOAT_MODE_P (mode)
5777 || code == ORDERED || code == UNORDERED
5778 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5779 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5780 {
5781 enum rtx_code rcode;
5782 if (FLOAT_MODE_P (mode))
5783 rcode = reverse_condition_maybe_unordered (code);
5784 else
5785 rcode = reverse_condition (code);
5786
5787 /* Canonicalize to UNORDERED for the libcall. */
5788 if (can_compare_p (rcode, mode, ccp_jump)
5789 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5790 {
5791 falseval = trueval;
5792 trueval = const0_rtx;
5793 code = rcode;
5794 }
5795 }
5796
5797 emit_move_insn (target, trueval);
5798 label = gen_label_rtx ();
5799 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5800 NULL_RTX, label, -1);
5801
5802 emit_move_insn (target, falseval);
5803 emit_label (label);
5804
5805 return target;
5806 }
5807
5808 /* Perform possibly multi-word comparison and conditional jump to LABEL
5809 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5810 now a thin wrapper around do_compare_rtx_and_jump. */
5811
5812 static void
do_cmp_and_jump(rtx arg1,rtx arg2,enum rtx_code op,enum machine_mode mode,rtx label)5813 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5814 rtx label)
5815 {
5816 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5817 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5818 NULL_RTX, NULL_RTX, label, -1);
5819 }
5820