1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2016 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "tm_p.h"
31 #include "expmed.h"
32 #include "optabs.h"
33 #include "emit-rtl.h"
34 #include "diagnostic-core.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "dojump.h"
38 #include "explow.h"
39 #include "expr.h"
40 #include "langhooks.h"
41
42 struct target_expmed default_target_expmed;
43 #if SWITCHABLE_TARGET
44 struct target_expmed *this_target_expmed = &default_target_expmed;
45 #endif
46
47 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 rtx, bool);
52 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
53 unsigned HOST_WIDE_INT,
54 rtx, bool);
55 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT,
59 rtx, bool);
60 static rtx extract_fixed_bit_field (machine_mode, rtx,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
64 unsigned HOST_WIDE_INT,
65 unsigned HOST_WIDE_INT, rtx, int, bool);
66 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
68 unsigned HOST_WIDE_INT, int, bool);
69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
70 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
71 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
72
73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
74 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
75 The mask is truncated if necessary to the width of mode MODE. The
76 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
77
78 static inline rtx
mask_rtx(machine_mode mode,int bitpos,int bitsize,bool complement)79 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
80 {
81 return immed_wide_int_const
82 (wi::shifted_mask (bitpos, bitsize, complement,
83 GET_MODE_PRECISION (mode)), mode);
84 }
85
86 /* Test whether a value is zero of a power of two. */
87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
88 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
89
90 struct init_expmed_rtl
91 {
92 rtx reg;
93 rtx plus;
94 rtx neg;
95 rtx mult;
96 rtx sdiv;
97 rtx udiv;
98 rtx sdiv_32;
99 rtx smod_32;
100 rtx wide_mult;
101 rtx wide_lshr;
102 rtx wide_trunc;
103 rtx shift;
104 rtx shift_mult;
105 rtx shift_add;
106 rtx shift_sub0;
107 rtx shift_sub1;
108 rtx zext;
109 rtx trunc;
110
111 rtx pow2[MAX_BITS_PER_WORD];
112 rtx cint[MAX_BITS_PER_WORD];
113 };
114
115 static void
init_expmed_one_conv(struct init_expmed_rtl * all,machine_mode to_mode,machine_mode from_mode,bool speed)116 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
117 machine_mode from_mode, bool speed)
118 {
119 int to_size, from_size;
120 rtx which;
121
122 to_size = GET_MODE_PRECISION (to_mode);
123 from_size = GET_MODE_PRECISION (from_mode);
124
125 /* Most partial integers have a precision less than the "full"
126 integer it requires for storage. In case one doesn't, for
127 comparison purposes here, reduce the bit size by one in that
128 case. */
129 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
130 && exact_log2 (to_size) != -1)
131 to_size --;
132 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
133 && exact_log2 (from_size) != -1)
134 from_size --;
135
136 /* Assume cost of zero-extend and sign-extend is the same. */
137 which = (to_size < from_size ? all->trunc : all->zext);
138
139 PUT_MODE (all->reg, from_mode);
140 set_convert_cost (to_mode, from_mode, speed,
141 set_src_cost (which, to_mode, speed));
142 }
143
144 static void
init_expmed_one_mode(struct init_expmed_rtl * all,machine_mode mode,int speed)145 init_expmed_one_mode (struct init_expmed_rtl *all,
146 machine_mode mode, int speed)
147 {
148 int m, n, mode_bitsize;
149 machine_mode mode_from;
150
151 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
152
153 PUT_MODE (all->reg, mode);
154 PUT_MODE (all->plus, mode);
155 PUT_MODE (all->neg, mode);
156 PUT_MODE (all->mult, mode);
157 PUT_MODE (all->sdiv, mode);
158 PUT_MODE (all->udiv, mode);
159 PUT_MODE (all->sdiv_32, mode);
160 PUT_MODE (all->smod_32, mode);
161 PUT_MODE (all->wide_trunc, mode);
162 PUT_MODE (all->shift, mode);
163 PUT_MODE (all->shift_mult, mode);
164 PUT_MODE (all->shift_add, mode);
165 PUT_MODE (all->shift_sub0, mode);
166 PUT_MODE (all->shift_sub1, mode);
167 PUT_MODE (all->zext, mode);
168 PUT_MODE (all->trunc, mode);
169
170 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
171 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
172 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
173 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
174 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
175
176 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
177 <= 2 * add_cost (speed, mode)));
178 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
179 <= 4 * add_cost (speed, mode)));
180
181 set_shift_cost (speed, mode, 0, 0);
182 {
183 int cost = add_cost (speed, mode);
184 set_shiftadd_cost (speed, mode, 0, cost);
185 set_shiftsub0_cost (speed, mode, 0, cost);
186 set_shiftsub1_cost (speed, mode, 0, cost);
187 }
188
189 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
190 for (m = 1; m < n; m++)
191 {
192 XEXP (all->shift, 1) = all->cint[m];
193 XEXP (all->shift_mult, 1) = all->pow2[m];
194
195 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
196 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
197 speed));
198 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
199 speed));
200 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
201 speed));
202 }
203
204 if (SCALAR_INT_MODE_P (mode))
205 {
206 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
207 mode_from = (machine_mode)(mode_from + 1))
208 init_expmed_one_conv (all, mode, mode_from, speed);
209 }
210 if (GET_MODE_CLASS (mode) == MODE_INT)
211 {
212 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
213 if (wider_mode != VOIDmode)
214 {
215 PUT_MODE (all->zext, wider_mode);
216 PUT_MODE (all->wide_mult, wider_mode);
217 PUT_MODE (all->wide_lshr, wider_mode);
218 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
219
220 set_mul_widen_cost (speed, wider_mode,
221 set_src_cost (all->wide_mult, wider_mode, speed));
222 set_mul_highpart_cost (speed, mode,
223 set_src_cost (all->wide_trunc, mode, speed));
224 }
225 }
226 }
227
228 void
init_expmed(void)229 init_expmed (void)
230 {
231 struct init_expmed_rtl all;
232 machine_mode mode = QImode;
233 int m, speed;
234
235 memset (&all, 0, sizeof all);
236 for (m = 1; m < MAX_BITS_PER_WORD; m++)
237 {
238 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
239 all.cint[m] = GEN_INT (m);
240 }
241
242 /* Avoid using hard regs in ways which may be unsupported. */
243 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
244 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
245 all.neg = gen_rtx_NEG (mode, all.reg);
246 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
247 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
248 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
249 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
250 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
251 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
252 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
253 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
254 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
255 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
256 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
257 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
258 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
259 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
260 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
261
262 for (speed = 0; speed < 2; speed++)
263 {
264 crtl->maybe_hot_insn_p = speed;
265 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
266
267 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
268 mode = (machine_mode)(mode + 1))
269 init_expmed_one_mode (&all, mode, speed);
270
271 if (MIN_MODE_PARTIAL_INT != VOIDmode)
272 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
273 mode = (machine_mode)(mode + 1))
274 init_expmed_one_mode (&all, mode, speed);
275
276 if (MIN_MODE_VECTOR_INT != VOIDmode)
277 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
278 mode = (machine_mode)(mode + 1))
279 init_expmed_one_mode (&all, mode, speed);
280 }
281
282 if (alg_hash_used_p ())
283 {
284 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
285 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
286 }
287 else
288 set_alg_hash_used_p (true);
289 default_rtl_profile ();
290
291 ggc_free (all.trunc);
292 ggc_free (all.shift_sub1);
293 ggc_free (all.shift_sub0);
294 ggc_free (all.shift_add);
295 ggc_free (all.shift_mult);
296 ggc_free (all.shift);
297 ggc_free (all.wide_trunc);
298 ggc_free (all.wide_lshr);
299 ggc_free (all.wide_mult);
300 ggc_free (all.zext);
301 ggc_free (all.smod_32);
302 ggc_free (all.sdiv_32);
303 ggc_free (all.udiv);
304 ggc_free (all.sdiv);
305 ggc_free (all.mult);
306 ggc_free (all.neg);
307 ggc_free (all.plus);
308 ggc_free (all.reg);
309 }
310
311 /* Return an rtx representing minus the value of X.
312 MODE is the intended mode of the result,
313 useful if X is a CONST_INT. */
314
315 rtx
negate_rtx(machine_mode mode,rtx x)316 negate_rtx (machine_mode mode, rtx x)
317 {
318 rtx result = simplify_unary_operation (NEG, mode, x, mode);
319
320 if (result == 0)
321 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
322
323 return result;
324 }
325
326 /* Whether reverse storage order is supported on the target. */
327 static int reverse_storage_order_supported = -1;
328
329 /* Check whether reverse storage order is supported on the target. */
330
331 static void
check_reverse_storage_order_support(void)332 check_reverse_storage_order_support (void)
333 {
334 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
335 {
336 reverse_storage_order_supported = 0;
337 sorry ("reverse scalar storage order");
338 }
339 else
340 reverse_storage_order_supported = 1;
341 }
342
343 /* Whether reverse FP storage order is supported on the target. */
344 static int reverse_float_storage_order_supported = -1;
345
346 /* Check whether reverse FP storage order is supported on the target. */
347
348 static void
check_reverse_float_storage_order_support(void)349 check_reverse_float_storage_order_support (void)
350 {
351 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
352 {
353 reverse_float_storage_order_supported = 0;
354 sorry ("reverse floating-point scalar storage order");
355 }
356 else
357 reverse_float_storage_order_supported = 1;
358 }
359
360 /* Return an rtx representing value of X with reverse storage order.
361 MODE is the intended mode of the result,
362 useful if X is a CONST_INT. */
363
364 rtx
flip_storage_order(enum machine_mode mode,rtx x)365 flip_storage_order (enum machine_mode mode, rtx x)
366 {
367 enum machine_mode int_mode;
368 rtx result;
369
370 if (mode == QImode)
371 return x;
372
373 if (COMPLEX_MODE_P (mode))
374 {
375 rtx real = read_complex_part (x, false);
376 rtx imag = read_complex_part (x, true);
377
378 real = flip_storage_order (GET_MODE_INNER (mode), real);
379 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
380
381 return gen_rtx_CONCAT (mode, real, imag);
382 }
383
384 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
385 check_reverse_storage_order_support ();
386
387 if (SCALAR_INT_MODE_P (mode))
388 int_mode = mode;
389 else
390 {
391 if (FLOAT_MODE_P (mode)
392 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
393 check_reverse_float_storage_order_support ();
394
395 int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
396 if (int_mode == BLKmode)
397 {
398 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
399 return x;
400 }
401 x = gen_lowpart (int_mode, x);
402 }
403
404 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
405 if (result == 0)
406 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
407
408 if (int_mode != mode)
409 result = gen_lowpart (mode, result);
410
411 return result;
412 }
413
414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
415 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
416 If MODE is BLKmode, return a reference to every byte in the bitfield.
417 Set *NEW_BITNUM to the bit position of the field within the new memory. */
418
419 static rtx
narrow_bit_field_mem(rtx mem,machine_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT * new_bitnum)420 narrow_bit_field_mem (rtx mem, machine_mode mode,
421 unsigned HOST_WIDE_INT bitsize,
422 unsigned HOST_WIDE_INT bitnum,
423 unsigned HOST_WIDE_INT *new_bitnum)
424 {
425 if (mode == BLKmode)
426 {
427 *new_bitnum = bitnum % BITS_PER_UNIT;
428 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
429 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
430 / BITS_PER_UNIT);
431 return adjust_bitfield_address_size (mem, mode, offset, size);
432 }
433 else
434 {
435 unsigned int unit = GET_MODE_BITSIZE (mode);
436 *new_bitnum = bitnum % unit;
437 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
438 return adjust_bitfield_address (mem, mode, offset);
439 }
440 }
441
442 /* The caller wants to perform insertion or extraction PATTERN on a
443 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
444 BITREGION_START and BITREGION_END are as for store_bit_field
445 and FIELDMODE is the natural mode of the field.
446
447 Search for a mode that is compatible with the memory access
448 restrictions and (where applicable) with a register insertion or
449 extraction. Return the new memory on success, storing the adjusted
450 bit position in *NEW_BITNUM. Return null otherwise. */
451
452 static rtx
adjust_bit_field_mem_for_reg(enum extraction_pattern pattern,rtx op0,HOST_WIDE_INT bitsize,HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end,machine_mode fieldmode,unsigned HOST_WIDE_INT * new_bitnum)453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
454 rtx op0, HOST_WIDE_INT bitsize,
455 HOST_WIDE_INT bitnum,
456 unsigned HOST_WIDE_INT bitregion_start,
457 unsigned HOST_WIDE_INT bitregion_end,
458 machine_mode fieldmode,
459 unsigned HOST_WIDE_INT *new_bitnum)
460 {
461 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
462 bitregion_end, MEM_ALIGN (op0),
463 MEM_VOLATILE_P (op0));
464 machine_mode best_mode;
465 if (iter.next_mode (&best_mode))
466 {
467 /* We can use a memory in BEST_MODE. See whether this is true for
468 any wider modes. All other things being equal, we prefer to
469 use the widest mode possible because it tends to expose more
470 CSE opportunities. */
471 if (!iter.prefer_smaller_modes ())
472 {
473 /* Limit the search to the mode required by the corresponding
474 register insertion or extraction instruction, if any. */
475 machine_mode limit_mode = word_mode;
476 extraction_insn insn;
477 if (get_best_reg_extraction_insn (&insn, pattern,
478 GET_MODE_BITSIZE (best_mode),
479 fieldmode))
480 limit_mode = insn.field_mode;
481
482 machine_mode wider_mode;
483 while (iter.next_mode (&wider_mode)
484 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
485 best_mode = wider_mode;
486 }
487 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
488 new_bitnum);
489 }
490 return NULL_RTX;
491 }
492
493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
494 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
495 offset is then BITNUM / BITS_PER_UNIT. */
496
497 static bool
lowpart_bit_field_p(unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT bitsize,machine_mode struct_mode)498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
499 unsigned HOST_WIDE_INT bitsize,
500 machine_mode struct_mode)
501 {
502 if (BYTES_BIG_ENDIAN)
503 return (bitnum % BITS_PER_UNIT == 0
504 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
505 || (bitnum + bitsize) % BITS_PER_WORD == 0));
506 else
507 return bitnum % BITS_PER_WORD == 0;
508 }
509
510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
511 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
512 Return false if the access would touch memory outside the range
513 BITREGION_START to BITREGION_END for conformance to the C++ memory
514 model. */
515
516 static bool
strict_volatile_bitfield_p(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,machine_mode fieldmode,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end)517 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
518 unsigned HOST_WIDE_INT bitnum,
519 machine_mode fieldmode,
520 unsigned HOST_WIDE_INT bitregion_start,
521 unsigned HOST_WIDE_INT bitregion_end)
522 {
523 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
524
525 /* -fstrict-volatile-bitfields must be enabled and we must have a
526 volatile MEM. */
527 if (!MEM_P (op0)
528 || !MEM_VOLATILE_P (op0)
529 || flag_strict_volatile_bitfields <= 0)
530 return false;
531
532 /* Non-integral modes likely only happen with packed structures.
533 Punt. */
534 if (!SCALAR_INT_MODE_P (fieldmode))
535 return false;
536
537 /* The bit size must not be larger than the field mode, and
538 the field mode must not be larger than a word. */
539 if (bitsize > modesize || modesize > BITS_PER_WORD)
540 return false;
541
542 /* Check for cases of unaligned fields that must be split. */
543 if (bitnum % modesize + bitsize > modesize)
544 return false;
545
546 /* The memory must be sufficiently aligned for a MODESIZE access.
547 This condition guarantees, that the memory access will not
548 touch anything after the end of the structure. */
549 if (MEM_ALIGN (op0) < modesize)
550 return false;
551
552 /* Check for cases where the C++ memory model applies. */
553 if (bitregion_end != 0
554 && (bitnum - bitnum % modesize < bitregion_start
555 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
556 return false;
557
558 return true;
559 }
560
561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
562 bit number BITNUM can be treated as a simple value of mode MODE. */
563
564 static bool
simple_mem_bitfield_p(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,machine_mode mode)565 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
566 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
567 {
568 return (MEM_P (op0)
569 && bitnum % BITS_PER_UNIT == 0
570 && bitsize == GET_MODE_BITSIZE (mode)
571 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
572 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
573 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
574 }
575
576 /* Try to use instruction INSV to store VALUE into a field of OP0.
577 BITSIZE and BITNUM are as for store_bit_field. */
578
579 static bool
store_bit_field_using_insv(const extraction_insn * insv,rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value)580 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
581 unsigned HOST_WIDE_INT bitsize,
582 unsigned HOST_WIDE_INT bitnum,
583 rtx value)
584 {
585 struct expand_operand ops[4];
586 rtx value1;
587 rtx xop0 = op0;
588 rtx_insn *last = get_last_insn ();
589 bool copy_back = false;
590
591 machine_mode op_mode = insv->field_mode;
592 unsigned int unit = GET_MODE_BITSIZE (op_mode);
593 if (bitsize == 0 || bitsize > unit)
594 return false;
595
596 if (MEM_P (xop0))
597 /* Get a reference to the first byte of the field. */
598 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
599 &bitnum);
600 else
601 {
602 /* Convert from counting within OP0 to counting in OP_MODE. */
603 if (BYTES_BIG_ENDIAN)
604 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
605
606 /* If xop0 is a register, we need it in OP_MODE
607 to make it acceptable to the format of insv. */
608 if (GET_CODE (xop0) == SUBREG)
609 /* We can't just change the mode, because this might clobber op0,
610 and we will need the original value of op0 if insv fails. */
611 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
612 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
613 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
614 }
615
616 /* If the destination is a paradoxical subreg such that we need a
617 truncate to the inner mode, perform the insertion on a temporary and
618 truncate the result to the original destination. Note that we can't
619 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
620 X) 0)) is (reg:N X). */
621 if (GET_CODE (xop0) == SUBREG
622 && REG_P (SUBREG_REG (xop0))
623 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
624 op_mode))
625 {
626 rtx tem = gen_reg_rtx (op_mode);
627 emit_move_insn (tem, xop0);
628 xop0 = tem;
629 copy_back = true;
630 }
631
632 /* There are similar overflow check at the start of store_bit_field_1,
633 but that only check the situation where the field lies completely
634 outside the register, while there do have situation where the field
635 lies partialy in the register, we need to adjust bitsize for this
636 partial overflow situation. Without this fix, pr48335-2.c on big-endian
637 will broken on those arch support bit insert instruction, like arm, aarch64
638 etc. */
639 if (bitsize + bitnum > unit && bitnum < unit)
640 {
641 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
642 "destination object, data truncated into %wu-bit",
643 bitsize, unit - bitnum);
644 bitsize = unit - bitnum;
645 }
646
647 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
648 "backwards" from the size of the unit we are inserting into.
649 Otherwise, we count bits from the most significant on a
650 BYTES/BITS_BIG_ENDIAN machine. */
651
652 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
653 bitnum = unit - bitsize - bitnum;
654
655 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
656 value1 = value;
657 if (GET_MODE (value) != op_mode)
658 {
659 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
660 {
661 rtx tmp;
662 /* Optimization: Don't bother really extending VALUE
663 if it has all the bits we will actually use. However,
664 if we must narrow it, be sure we do it correctly. */
665
666 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
667 {
668 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
669 if (! tmp)
670 tmp = simplify_gen_subreg (op_mode,
671 force_reg (GET_MODE (value),
672 value1),
673 GET_MODE (value), 0);
674 }
675 else
676 {
677 tmp = gen_lowpart_if_possible (op_mode, value1);
678 if (! tmp)
679 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
680 value1));
681 }
682 value1 = tmp;
683 }
684 else if (CONST_INT_P (value))
685 value1 = gen_int_mode (INTVAL (value), op_mode);
686 else
687 /* Parse phase is supposed to make VALUE's data type
688 match that of the component reference, which is a type
689 at least as wide as the field; so VALUE should have
690 a mode that corresponds to that type. */
691 gcc_assert (CONSTANT_P (value));
692 }
693
694 create_fixed_operand (&ops[0], xop0);
695 create_integer_operand (&ops[1], bitsize);
696 create_integer_operand (&ops[2], bitnum);
697 create_input_operand (&ops[3], value1, op_mode);
698 if (maybe_expand_insn (insv->icode, 4, ops))
699 {
700 if (copy_back)
701 convert_move (op0, xop0, true);
702 return true;
703 }
704 delete_insns_since (last);
705 return false;
706 }
707
708 /* A subroutine of store_bit_field, with the same arguments. Return true
709 if the operation could be implemented.
710
711 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
712 no other way of implementing the operation. If FALLBACK_P is false,
713 return false instead. */
714
715 static bool
store_bit_field_1(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)716 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
717 unsigned HOST_WIDE_INT bitnum,
718 unsigned HOST_WIDE_INT bitregion_start,
719 unsigned HOST_WIDE_INT bitregion_end,
720 machine_mode fieldmode,
721 rtx value, bool reverse, bool fallback_p)
722 {
723 rtx op0 = str_rtx;
724 rtx orig_value;
725
726 while (GET_CODE (op0) == SUBREG)
727 {
728 /* The following line once was done only if WORDS_BIG_ENDIAN,
729 but I think that is a mistake. WORDS_BIG_ENDIAN is
730 meaningful at a much higher level; when structures are copied
731 between memory and regs, the higher-numbered regs
732 always get higher addresses. */
733 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
734 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
735 int byte_offset = 0;
736
737 /* Paradoxical subregs need special handling on big-endian machines. */
738 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
739 {
740 int difference = inner_mode_size - outer_mode_size;
741
742 if (WORDS_BIG_ENDIAN)
743 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
744 if (BYTES_BIG_ENDIAN)
745 byte_offset += difference % UNITS_PER_WORD;
746 }
747 else
748 byte_offset = SUBREG_BYTE (op0);
749
750 bitnum += byte_offset * BITS_PER_UNIT;
751 op0 = SUBREG_REG (op0);
752 }
753
754 /* No action is needed if the target is a register and if the field
755 lies completely outside that register. This can occur if the source
756 code contains an out-of-bounds access to a small array. */
757 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
758 return true;
759
760 /* Use vec_set patterns for inserting parts of vectors whenever
761 available. */
762 if (VECTOR_MODE_P (GET_MODE (op0))
763 && !MEM_P (op0)
764 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
765 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
766 && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
767 && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
768 {
769 struct expand_operand ops[3];
770 machine_mode outermode = GET_MODE (op0);
771 machine_mode innermode = GET_MODE_INNER (outermode);
772 enum insn_code icode = optab_handler (vec_set_optab, outermode);
773 int pos = bitnum / GET_MODE_BITSIZE (innermode);
774
775 create_fixed_operand (&ops[0], op0);
776 create_input_operand (&ops[1], value, innermode);
777 create_integer_operand (&ops[2], pos);
778 if (maybe_expand_insn (icode, 3, ops))
779 return true;
780 }
781
782 /* If the target is a register, overwriting the entire object, or storing
783 a full-word or multi-word field can be done with just a SUBREG. */
784 if (!MEM_P (op0)
785 && bitsize == GET_MODE_BITSIZE (fieldmode)
786 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
787 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
788 {
789 /* Use the subreg machinery either to narrow OP0 to the required
790 words or to cope with mode punning between equal-sized modes.
791 In the latter case, use subreg on the rhs side, not lhs. */
792 rtx sub;
793
794 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
795 {
796 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
797 if (sub)
798 {
799 if (reverse)
800 sub = flip_storage_order (GET_MODE (op0), sub);
801 emit_move_insn (op0, sub);
802 return true;
803 }
804 }
805 else
806 {
807 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
808 bitnum / BITS_PER_UNIT);
809 if (sub)
810 {
811 if (reverse)
812 value = flip_storage_order (fieldmode, value);
813 emit_move_insn (sub, value);
814 return true;
815 }
816 }
817 }
818
819 /* If the target is memory, storing any naturally aligned field can be
820 done with a simple store. For targets that support fast unaligned
821 memory, any naturally sized, unit aligned field can be done directly. */
822 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
823 {
824 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
825 if (reverse)
826 value = flip_storage_order (fieldmode, value);
827 emit_move_insn (op0, value);
828 return true;
829 }
830
831 /* Make sure we are playing with integral modes. Pun with subregs
832 if we aren't. This must come after the entire register case above,
833 since that case is valid for any mode. The following cases are only
834 valid for integral modes. */
835 {
836 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
837 if (imode != GET_MODE (op0))
838 {
839 if (MEM_P (op0))
840 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
841 else
842 {
843 gcc_assert (imode != BLKmode);
844 op0 = gen_lowpart (imode, op0);
845 }
846 }
847 }
848
849 /* Storing an lsb-aligned field in a register
850 can be done with a movstrict instruction. */
851
852 if (!MEM_P (op0)
853 && !reverse
854 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
855 && bitsize == GET_MODE_BITSIZE (fieldmode)
856 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
857 {
858 struct expand_operand ops[2];
859 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
860 rtx arg0 = op0;
861 unsigned HOST_WIDE_INT subreg_off;
862
863 if (GET_CODE (arg0) == SUBREG)
864 {
865 /* Else we've got some float mode source being extracted into
866 a different float mode destination -- this combination of
867 subregs results in Severe Tire Damage. */
868 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
869 || GET_MODE_CLASS (fieldmode) == MODE_INT
870 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
871 arg0 = SUBREG_REG (arg0);
872 }
873
874 subreg_off = bitnum / BITS_PER_UNIT;
875 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
876 {
877 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
878
879 create_fixed_operand (&ops[0], arg0);
880 /* Shrink the source operand to FIELDMODE. */
881 create_convert_operand_to (&ops[1], value, fieldmode, false);
882 if (maybe_expand_insn (icode, 2, ops))
883 return true;
884 }
885 }
886
887 /* Handle fields bigger than a word. */
888
889 if (bitsize > BITS_PER_WORD)
890 {
891 /* Here we transfer the words of the field
892 in the order least significant first.
893 This is because the most significant word is the one which may
894 be less than full.
895 However, only do that if the value is not BLKmode. */
896
897 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
898 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
899 unsigned int i;
900 rtx_insn *last;
901
902 /* This is the mode we must force value to, so that there will be enough
903 subwords to extract. Note that fieldmode will often (always?) be
904 VOIDmode, because that is what store_field uses to indicate that this
905 is a bit field, but passing VOIDmode to operand_subword_force
906 is not allowed. */
907 fieldmode = GET_MODE (value);
908 if (fieldmode == VOIDmode)
909 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
910
911 last = get_last_insn ();
912 for (i = 0; i < nwords; i++)
913 {
914 /* If I is 0, use the low-order word in both field and target;
915 if I is 1, use the next to lowest word; and so on. */
916 unsigned int wordnum = (backwards
917 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
918 - i - 1
919 : i);
920 unsigned int bit_offset = (backwards ^ reverse
921 ? MAX ((int) bitsize - ((int) i + 1)
922 * BITS_PER_WORD,
923 0)
924 : (int) i * BITS_PER_WORD);
925 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
926 unsigned HOST_WIDE_INT new_bitsize =
927 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
928
929 /* If the remaining chunk doesn't have full wordsize we have
930 to make sure that for big-endian machines the higher order
931 bits are used. */
932 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
933 value_word = simplify_expand_binop (word_mode, lshr_optab,
934 value_word,
935 GEN_INT (BITS_PER_WORD
936 - new_bitsize),
937 NULL_RTX, true,
938 OPTAB_LIB_WIDEN);
939
940 if (!store_bit_field_1 (op0, new_bitsize,
941 bitnum + bit_offset,
942 bitregion_start, bitregion_end,
943 word_mode,
944 value_word, reverse, fallback_p))
945 {
946 delete_insns_since (last);
947 return false;
948 }
949 }
950 return true;
951 }
952
953 /* If VALUE has a floating-point or complex mode, access it as an
954 integer of the corresponding size. This can occur on a machine
955 with 64 bit registers that uses SFmode for float. It can also
956 occur for unaligned float or complex fields. */
957 orig_value = value;
958 if (GET_MODE (value) != VOIDmode
959 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
960 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
961 {
962 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
963 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
964 }
965
966 /* If OP0 is a multi-word register, narrow it to the affected word.
967 If the region spans two words, defer to store_split_bit_field. */
968 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
969 {
970 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
971 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
972 gcc_assert (op0);
973 bitnum %= BITS_PER_WORD;
974 if (bitnum + bitsize > BITS_PER_WORD)
975 {
976 if (!fallback_p)
977 return false;
978
979 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
980 bitregion_end, value, reverse);
981 return true;
982 }
983 }
984
985 /* From here on we can assume that the field to be stored in fits
986 within a word. If the destination is a register, it too fits
987 in a word. */
988
989 extraction_insn insv;
990 if (!MEM_P (op0)
991 && !reverse
992 && get_best_reg_extraction_insn (&insv, EP_insv,
993 GET_MODE_BITSIZE (GET_MODE (op0)),
994 fieldmode)
995 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
996 return true;
997
998 /* If OP0 is a memory, try copying it to a register and seeing if a
999 cheap register alternative is available. */
1000 if (MEM_P (op0) && !reverse)
1001 {
1002 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1003 fieldmode)
1004 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1005 return true;
1006
1007 rtx_insn *last = get_last_insn ();
1008
1009 /* Try loading part of OP0 into a register, inserting the bitfield
1010 into that, and then copying the result back to OP0. */
1011 unsigned HOST_WIDE_INT bitpos;
1012 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1013 bitregion_start, bitregion_end,
1014 fieldmode, &bitpos);
1015 if (xop0)
1016 {
1017 rtx tempreg = copy_to_reg (xop0);
1018 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1019 bitregion_start, bitregion_end,
1020 fieldmode, orig_value, reverse, false))
1021 {
1022 emit_move_insn (xop0, tempreg);
1023 return true;
1024 }
1025 delete_insns_since (last);
1026 }
1027 }
1028
1029 if (!fallback_p)
1030 return false;
1031
1032 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1033 bitregion_end, value, reverse);
1034 return true;
1035 }
1036
1037 /* Generate code to store value from rtx VALUE
1038 into a bit-field within structure STR_RTX
1039 containing BITSIZE bits starting at bit BITNUM.
1040
1041 BITREGION_START is bitpos of the first bitfield in this region.
1042 BITREGION_END is the bitpos of the ending bitfield in this region.
1043 These two fields are 0, if the C++ memory model does not apply,
1044 or we are not interested in keeping track of bitfield regions.
1045
1046 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1047
1048 If REVERSE is true, the store is to be done in reverse order. */
1049
1050 void
store_bit_field(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end,machine_mode fieldmode,rtx value,bool reverse)1051 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1052 unsigned HOST_WIDE_INT bitnum,
1053 unsigned HOST_WIDE_INT bitregion_start,
1054 unsigned HOST_WIDE_INT bitregion_end,
1055 machine_mode fieldmode,
1056 rtx value, bool reverse)
1057 {
1058 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1059 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1060 bitregion_start, bitregion_end))
1061 {
1062 /* Storing of a full word can be done with a simple store.
1063 We know here that the field can be accessed with one single
1064 instruction. For targets that support unaligned memory,
1065 an unaligned access may be necessary. */
1066 if (bitsize == GET_MODE_BITSIZE (fieldmode))
1067 {
1068 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1069 bitnum / BITS_PER_UNIT);
1070 if (reverse)
1071 value = flip_storage_order (fieldmode, value);
1072 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1073 emit_move_insn (str_rtx, value);
1074 }
1075 else
1076 {
1077 rtx temp;
1078
1079 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1080 &bitnum);
1081 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1082 temp = copy_to_reg (str_rtx);
1083 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1084 fieldmode, value, reverse, true))
1085 gcc_unreachable ();
1086
1087 emit_move_insn (str_rtx, temp);
1088 }
1089
1090 return;
1091 }
1092
1093 /* Under the C++0x memory model, we must not touch bits outside the
1094 bit region. Adjust the address to start at the beginning of the
1095 bit region. */
1096 if (MEM_P (str_rtx) && bitregion_start > 0)
1097 {
1098 machine_mode bestmode;
1099 HOST_WIDE_INT offset, size;
1100
1101 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1102
1103 offset = bitregion_start / BITS_PER_UNIT;
1104 bitnum -= bitregion_start;
1105 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1106 bitregion_end -= bitregion_start;
1107 bitregion_start = 0;
1108 bestmode = get_best_mode (bitsize, bitnum,
1109 bitregion_start, bitregion_end,
1110 MEM_ALIGN (str_rtx), VOIDmode,
1111 MEM_VOLATILE_P (str_rtx));
1112 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1113 }
1114
1115 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1116 bitregion_start, bitregion_end,
1117 fieldmode, value, reverse, true))
1118 gcc_unreachable ();
1119 }
1120
1121 /* Use shifts and boolean operations to store VALUE into a bit field of
1122 width BITSIZE in OP0, starting at bit BITNUM.
1123
1124 If REVERSE is true, the store is to be done in reverse order. */
1125
1126 static void
store_fixed_bit_field(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end,rtx value,bool reverse)1127 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1128 unsigned HOST_WIDE_INT bitnum,
1129 unsigned HOST_WIDE_INT bitregion_start,
1130 unsigned HOST_WIDE_INT bitregion_end,
1131 rtx value, bool reverse)
1132 {
1133 /* There is a case not handled here:
1134 a structure with a known alignment of just a halfword
1135 and a field split across two aligned halfwords within the structure.
1136 Or likewise a structure with a known alignment of just a byte
1137 and a field split across two bytes.
1138 Such cases are not supposed to be able to occur. */
1139
1140 if (MEM_P (op0))
1141 {
1142 machine_mode mode = GET_MODE (op0);
1143 if (GET_MODE_BITSIZE (mode) == 0
1144 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1145 mode = word_mode;
1146 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1147 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1148
1149 if (mode == VOIDmode)
1150 {
1151 /* The only way this should occur is if the field spans word
1152 boundaries. */
1153 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1154 bitregion_end, value, reverse);
1155 return;
1156 }
1157
1158 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1159 }
1160
1161 store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1162 }
1163
1164 /* Helper function for store_fixed_bit_field, stores
1165 the bit field always using the MODE of OP0. */
1166
1167 static void
store_fixed_bit_field_1(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,bool reverse)1168 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169 unsigned HOST_WIDE_INT bitnum,
1170 rtx value, bool reverse)
1171 {
1172 machine_mode mode;
1173 rtx temp;
1174 int all_zero = 0;
1175 int all_one = 0;
1176
1177 mode = GET_MODE (op0);
1178 gcc_assert (SCALAR_INT_MODE_P (mode));
1179
1180 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1182
1183 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1184 /* BITNUM is the distance between our msb
1185 and that of the containing datum.
1186 Convert it to the distance from the lsb. */
1187 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1188
1189 /* Now BITNUM is always the distance between our lsb
1190 and that of OP0. */
1191
1192 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1193 we must first convert its mode to MODE. */
1194
1195 if (CONST_INT_P (value))
1196 {
1197 unsigned HOST_WIDE_INT v = UINTVAL (value);
1198
1199 if (bitsize < HOST_BITS_PER_WIDE_INT)
1200 v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1201
1202 if (v == 0)
1203 all_zero = 1;
1204 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1205 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1206 || (bitsize == HOST_BITS_PER_WIDE_INT
1207 && v == (unsigned HOST_WIDE_INT) -1))
1208 all_one = 1;
1209
1210 value = lshift_value (mode, v, bitnum);
1211 }
1212 else
1213 {
1214 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1215 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1216
1217 if (GET_MODE (value) != mode)
1218 value = convert_to_mode (mode, value, 1);
1219
1220 if (must_and)
1221 value = expand_binop (mode, and_optab, value,
1222 mask_rtx (mode, 0, bitsize, 0),
1223 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1224 if (bitnum > 0)
1225 value = expand_shift (LSHIFT_EXPR, mode, value,
1226 bitnum, NULL_RTX, 1);
1227 }
1228
1229 if (reverse)
1230 value = flip_storage_order (mode, value);
1231
1232 /* Now clear the chosen bits in OP0,
1233 except that if VALUE is -1 we need not bother. */
1234 /* We keep the intermediates in registers to allow CSE to combine
1235 consecutive bitfield assignments. */
1236
1237 temp = force_reg (mode, op0);
1238
1239 if (! all_one)
1240 {
1241 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1242 if (reverse)
1243 mask = flip_storage_order (mode, mask);
1244 temp = expand_binop (mode, and_optab, temp, mask,
1245 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1246 temp = force_reg (mode, temp);
1247 }
1248
1249 /* Now logical-or VALUE into OP0, unless it is zero. */
1250
1251 if (! all_zero)
1252 {
1253 temp = expand_binop (mode, ior_optab, temp, value,
1254 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1255 temp = force_reg (mode, temp);
1256 }
1257
1258 if (op0 != temp)
1259 {
1260 op0 = copy_rtx (op0);
1261 emit_move_insn (op0, temp);
1262 }
1263 }
1264
1265 /* Store a bit field that is split across multiple accessible memory objects.
1266
1267 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1268 BITSIZE is the field width; BITPOS the position of its first bit
1269 (within the word).
1270 VALUE is the value to store.
1271
1272 If REVERSE is true, the store is to be done in reverse order.
1273
1274 This does not yet handle fields wider than BITS_PER_WORD. */
1275
1276 static void
store_split_bit_field(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end,rtx value,bool reverse)1277 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1278 unsigned HOST_WIDE_INT bitpos,
1279 unsigned HOST_WIDE_INT bitregion_start,
1280 unsigned HOST_WIDE_INT bitregion_end,
1281 rtx value, bool reverse)
1282 {
1283 unsigned int unit, total_bits, bitsdone = 0;
1284
1285 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1286 much at a time. */
1287 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1288 unit = BITS_PER_WORD;
1289 else
1290 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1291
1292 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1294 again, and we will mutually recurse forever. */
1295 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1296 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1297
1298 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1300 that VALUE might be a floating-point constant. */
1301 if (CONSTANT_P (value) && !CONST_INT_P (value))
1302 {
1303 rtx word = gen_lowpart_common (word_mode, value);
1304
1305 if (word && (value != word))
1306 value = word;
1307 else
1308 value = gen_lowpart_common (word_mode,
1309 force_reg (GET_MODE (value) != VOIDmode
1310 ? GET_MODE (value)
1311 : word_mode, value));
1312 }
1313
1314 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1315
1316 while (bitsdone < bitsize)
1317 {
1318 unsigned HOST_WIDE_INT thissize;
1319 unsigned HOST_WIDE_INT thispos;
1320 unsigned HOST_WIDE_INT offset;
1321 rtx part, word;
1322
1323 offset = (bitpos + bitsdone) / unit;
1324 thispos = (bitpos + bitsdone) % unit;
1325
1326 /* When region of bytes we can touch is restricted, decrease
1327 UNIT close to the end of the region as needed. If op0 is a REG
1328 or SUBREG of REG, don't do this, as there can't be data races
1329 on a register and we can expand shorter code in some cases. */
1330 if (bitregion_end
1331 && unit > BITS_PER_UNIT
1332 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1333 && !REG_P (op0)
1334 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1335 {
1336 unit = unit / 2;
1337 continue;
1338 }
1339
1340 /* THISSIZE must not overrun a word boundary. Otherwise,
1341 store_fixed_bit_field will call us again, and we will mutually
1342 recurse forever. */
1343 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1344 thissize = MIN (thissize, unit - thispos);
1345
1346 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1347 {
1348 /* Fetch successively less significant portions. */
1349 if (CONST_INT_P (value))
1350 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1351 >> (bitsize - bitsdone - thissize))
1352 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1353 /* Likewise, but the source is little-endian. */
1354 else if (reverse)
1355 part = extract_fixed_bit_field (word_mode, value, thissize,
1356 bitsize - bitsdone - thissize,
1357 NULL_RTX, 1, false);
1358 else
1359 {
1360 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1361 /* The args are chosen so that the last part includes the
1362 lsb. Give extract_bit_field the value it needs (with
1363 endianness compensation) to fetch the piece we want. */
1364 part = extract_fixed_bit_field (word_mode, value, thissize,
1365 total_bits - bitsize + bitsdone,
1366 NULL_RTX, 1, false);
1367 }
1368 }
1369 else
1370 {
1371 /* Fetch successively more significant portions. */
1372 if (CONST_INT_P (value))
1373 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1374 >> bitsdone)
1375 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1376 /* Likewise, but the source is big-endian. */
1377 else if (reverse)
1378 part = extract_fixed_bit_field (word_mode, value, thissize,
1379 total_bits - bitsdone - thissize,
1380 NULL_RTX, 1, false);
1381 else
1382 part = extract_fixed_bit_field (word_mode, value, thissize,
1383 bitsdone, NULL_RTX, 1, false);
1384 }
1385
1386 /* If OP0 is a register, then handle OFFSET here.
1387
1388 When handling multiword bitfields, extract_bit_field may pass
1389 down a word_mode SUBREG of a larger REG for a bitfield that actually
1390 crosses a word boundary. Thus, for a SUBREG, we must find
1391 the current word starting from the base register. */
1392 if (GET_CODE (op0) == SUBREG)
1393 {
1394 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1395 + (offset * unit / BITS_PER_WORD);
1396 machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1397 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1398 word = word_offset ? const0_rtx : op0;
1399 else
1400 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1401 GET_MODE (SUBREG_REG (op0)));
1402 offset &= BITS_PER_WORD / unit - 1;
1403 }
1404 else if (REG_P (op0))
1405 {
1406 machine_mode op0_mode = GET_MODE (op0);
1407 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1408 word = offset ? const0_rtx : op0;
1409 else
1410 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1411 GET_MODE (op0));
1412 offset &= BITS_PER_WORD / unit - 1;
1413 }
1414 else
1415 word = op0;
1416
1417 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1418 it is just an out-of-bounds access. Ignore it. */
1419 if (word != const0_rtx)
1420 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1421 bitregion_start, bitregion_end, part,
1422 reverse);
1423 bitsdone += thissize;
1424 }
1425 }
1426
1427 /* A subroutine of extract_bit_field_1 that converts return value X
1428 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1429 to extract_bit_field. */
1430
1431 static rtx
convert_extracted_bit_field(rtx x,machine_mode mode,machine_mode tmode,bool unsignedp)1432 convert_extracted_bit_field (rtx x, machine_mode mode,
1433 machine_mode tmode, bool unsignedp)
1434 {
1435 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1436 return x;
1437
1438 /* If the x mode is not a scalar integral, first convert to the
1439 integer mode of that size and then access it as a floating-point
1440 value via a SUBREG. */
1441 if (!SCALAR_INT_MODE_P (tmode))
1442 {
1443 machine_mode smode;
1444
1445 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1446 x = convert_to_mode (smode, x, unsignedp);
1447 x = force_reg (smode, x);
1448 return gen_lowpart (tmode, x);
1449 }
1450
1451 return convert_to_mode (tmode, x, unsignedp);
1452 }
1453
1454 /* Try to use an ext(z)v pattern to extract a field from OP0.
1455 Return the extracted value on success, otherwise return null.
1456 EXT_MODE is the mode of the extraction and the other arguments
1457 are as for extract_bit_field. */
1458
1459 static rtx
extract_bit_field_using_extv(const extraction_insn * extv,rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode)1460 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1461 unsigned HOST_WIDE_INT bitsize,
1462 unsigned HOST_WIDE_INT bitnum,
1463 int unsignedp, rtx target,
1464 machine_mode mode, machine_mode tmode)
1465 {
1466 struct expand_operand ops[4];
1467 rtx spec_target = target;
1468 rtx spec_target_subreg = 0;
1469 machine_mode ext_mode = extv->field_mode;
1470 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1471
1472 if (bitsize == 0 || unit < bitsize)
1473 return NULL_RTX;
1474
1475 if (MEM_P (op0))
1476 /* Get a reference to the first byte of the field. */
1477 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1478 &bitnum);
1479 else
1480 {
1481 /* Convert from counting within OP0 to counting in EXT_MODE. */
1482 if (BYTES_BIG_ENDIAN)
1483 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1484
1485 /* If op0 is a register, we need it in EXT_MODE to make it
1486 acceptable to the format of ext(z)v. */
1487 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1488 return NULL_RTX;
1489 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1490 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1491 }
1492
1493 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1494 "backwards" from the size of the unit we are extracting from.
1495 Otherwise, we count bits from the most significant on a
1496 BYTES/BITS_BIG_ENDIAN machine. */
1497
1498 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1499 bitnum = unit - bitsize - bitnum;
1500
1501 if (target == 0)
1502 target = spec_target = gen_reg_rtx (tmode);
1503
1504 if (GET_MODE (target) != ext_mode)
1505 {
1506 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1507 between the mode of the extraction (word_mode) and the target
1508 mode. Instead, create a temporary and use convert_move to set
1509 the target. */
1510 if (REG_P (target)
1511 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1512 {
1513 target = gen_lowpart (ext_mode, target);
1514 if (GET_MODE_PRECISION (ext_mode)
1515 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1516 spec_target_subreg = target;
1517 }
1518 else
1519 target = gen_reg_rtx (ext_mode);
1520 }
1521
1522 create_output_operand (&ops[0], target, ext_mode);
1523 create_fixed_operand (&ops[1], op0);
1524 create_integer_operand (&ops[2], bitsize);
1525 create_integer_operand (&ops[3], bitnum);
1526 if (maybe_expand_insn (extv->icode, 4, ops))
1527 {
1528 target = ops[0].value;
1529 if (target == spec_target)
1530 return target;
1531 if (target == spec_target_subreg)
1532 return spec_target;
1533 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1534 }
1535 return NULL_RTX;
1536 }
1537
1538 /* A subroutine of extract_bit_field, with the same arguments.
1539 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1540 if we can find no other means of implementing the operation.
1541 if FALLBACK_P is false, return NULL instead. */
1542
1543 static rtx
extract_bit_field_1(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p)1544 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1545 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1546 machine_mode mode, machine_mode tmode,
1547 bool reverse, bool fallback_p)
1548 {
1549 rtx op0 = str_rtx;
1550 machine_mode int_mode;
1551 machine_mode mode1;
1552
1553 if (tmode == VOIDmode)
1554 tmode = mode;
1555
1556 while (GET_CODE (op0) == SUBREG)
1557 {
1558 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1559 op0 = SUBREG_REG (op0);
1560 }
1561
1562 /* If we have an out-of-bounds access to a register, just return an
1563 uninitialized register of the required mode. This can occur if the
1564 source code contains an out-of-bounds access to a small array. */
1565 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1566 return gen_reg_rtx (tmode);
1567
1568 if (REG_P (op0)
1569 && mode == GET_MODE (op0)
1570 && bitnum == 0
1571 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1572 {
1573 if (reverse)
1574 op0 = flip_storage_order (mode, op0);
1575 /* We're trying to extract a full register from itself. */
1576 return op0;
1577 }
1578
1579 /* See if we can get a better vector mode before extracting. */
1580 if (VECTOR_MODE_P (GET_MODE (op0))
1581 && !MEM_P (op0)
1582 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1583 {
1584 machine_mode new_mode;
1585
1586 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1587 new_mode = MIN_MODE_VECTOR_FLOAT;
1588 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1589 new_mode = MIN_MODE_VECTOR_FRACT;
1590 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1591 new_mode = MIN_MODE_VECTOR_UFRACT;
1592 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1593 new_mode = MIN_MODE_VECTOR_ACCUM;
1594 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1595 new_mode = MIN_MODE_VECTOR_UACCUM;
1596 else
1597 new_mode = MIN_MODE_VECTOR_INT;
1598
1599 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1600 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1601 && targetm.vector_mode_supported_p (new_mode))
1602 break;
1603 if (new_mode != VOIDmode)
1604 op0 = gen_lowpart (new_mode, op0);
1605 }
1606
1607 /* Use vec_extract patterns for extracting parts of vectors whenever
1608 available. */
1609 if (VECTOR_MODE_P (GET_MODE (op0))
1610 && !MEM_P (op0)
1611 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1612 && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1613 == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1614 {
1615 struct expand_operand ops[3];
1616 machine_mode outermode = GET_MODE (op0);
1617 machine_mode innermode = GET_MODE_INNER (outermode);
1618 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1619 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1620
1621 create_output_operand (&ops[0], target, innermode);
1622 create_input_operand (&ops[1], op0, outermode);
1623 create_integer_operand (&ops[2], pos);
1624 if (maybe_expand_insn (icode, 3, ops))
1625 {
1626 target = ops[0].value;
1627 if (GET_MODE (target) != mode)
1628 return gen_lowpart (tmode, target);
1629 return target;
1630 }
1631 }
1632
1633 /* Make sure we are playing with integral modes. Pun with subregs
1634 if we aren't. */
1635 {
1636 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1637 if (imode != GET_MODE (op0))
1638 {
1639 if (MEM_P (op0))
1640 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1641 else if (imode != BLKmode)
1642 {
1643 op0 = gen_lowpart (imode, op0);
1644
1645 /* If we got a SUBREG, force it into a register since we
1646 aren't going to be able to do another SUBREG on it. */
1647 if (GET_CODE (op0) == SUBREG)
1648 op0 = force_reg (imode, op0);
1649 }
1650 else if (REG_P (op0))
1651 {
1652 rtx reg, subreg;
1653 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1654 MODE_INT);
1655 reg = gen_reg_rtx (imode);
1656 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1657 emit_move_insn (subreg, op0);
1658 op0 = reg;
1659 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1660 }
1661 else
1662 {
1663 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1664 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1665 emit_move_insn (mem, op0);
1666 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1667 }
1668 }
1669 }
1670
1671 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1672 If that's wrong, the solution is to test for it and set TARGET to 0
1673 if needed. */
1674
1675 /* Get the mode of the field to use for atomic access or subreg
1676 conversion. */
1677 mode1 = mode;
1678 if (SCALAR_INT_MODE_P (tmode))
1679 {
1680 machine_mode try_mode = mode_for_size (bitsize,
1681 GET_MODE_CLASS (tmode), 0);
1682 if (try_mode != BLKmode)
1683 mode1 = try_mode;
1684 }
1685 gcc_assert (mode1 != BLKmode);
1686
1687 /* Extraction of a full MODE1 value can be done with a subreg as long
1688 as the least significant bit of the value is the least significant
1689 bit of either OP0 or a word of OP0. */
1690 if (!MEM_P (op0)
1691 && !reverse
1692 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1693 && bitsize == GET_MODE_BITSIZE (mode1)
1694 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1695 {
1696 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1697 bitnum / BITS_PER_UNIT);
1698 if (sub)
1699 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1700 }
1701
1702 /* Extraction of a full MODE1 value can be done with a load as long as
1703 the field is on a byte boundary and is sufficiently aligned. */
1704 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1705 {
1706 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1707 if (reverse)
1708 op0 = flip_storage_order (mode1, op0);
1709 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1710 }
1711
1712 /* Handle fields bigger than a word. */
1713
1714 if (bitsize > BITS_PER_WORD)
1715 {
1716 /* Here we transfer the words of the field
1717 in the order least significant first.
1718 This is because the most significant word is the one which may
1719 be less than full. */
1720
1721 const bool backwards = WORDS_BIG_ENDIAN;
1722 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1723 unsigned int i;
1724 rtx_insn *last;
1725
1726 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1727 target = gen_reg_rtx (mode);
1728
1729 /* In case we're about to clobber a base register or something
1730 (see gcc.c-torture/execute/20040625-1.c). */
1731 if (reg_mentioned_p (target, str_rtx))
1732 target = gen_reg_rtx (mode);
1733
1734 /* Indicate for flow that the entire target reg is being set. */
1735 emit_clobber (target);
1736
1737 last = get_last_insn ();
1738 for (i = 0; i < nwords; i++)
1739 {
1740 /* If I is 0, use the low-order word in both field and target;
1741 if I is 1, use the next to lowest word; and so on. */
1742 /* Word number in TARGET to use. */
1743 unsigned int wordnum
1744 = (backwards
1745 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1746 : i);
1747 /* Offset from start of field in OP0. */
1748 unsigned int bit_offset = (backwards ^ reverse
1749 ? MAX ((int) bitsize - ((int) i + 1)
1750 * BITS_PER_WORD,
1751 0)
1752 : (int) i * BITS_PER_WORD);
1753 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1754 rtx result_part
1755 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1756 bitsize - i * BITS_PER_WORD),
1757 bitnum + bit_offset, 1, target_part,
1758 mode, word_mode, reverse, fallback_p);
1759
1760 gcc_assert (target_part);
1761 if (!result_part)
1762 {
1763 delete_insns_since (last);
1764 return NULL;
1765 }
1766
1767 if (result_part != target_part)
1768 emit_move_insn (target_part, result_part);
1769 }
1770
1771 if (unsignedp)
1772 {
1773 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1774 need to be zero'd out. */
1775 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1776 {
1777 unsigned int i, total_words;
1778
1779 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1780 for (i = nwords; i < total_words; i++)
1781 emit_move_insn
1782 (operand_subword (target,
1783 backwards ? total_words - i - 1 : i,
1784 1, VOIDmode),
1785 const0_rtx);
1786 }
1787 return target;
1788 }
1789
1790 /* Signed bit field: sign-extend with two arithmetic shifts. */
1791 target = expand_shift (LSHIFT_EXPR, mode, target,
1792 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1793 return expand_shift (RSHIFT_EXPR, mode, target,
1794 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1795 }
1796
1797 /* If OP0 is a multi-word register, narrow it to the affected word.
1798 If the region spans two words, defer to extract_split_bit_field. */
1799 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1800 {
1801 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1802 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1803 bitnum %= BITS_PER_WORD;
1804 if (bitnum + bitsize > BITS_PER_WORD)
1805 {
1806 if (!fallback_p)
1807 return NULL_RTX;
1808 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1809 reverse);
1810 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1811 }
1812 }
1813
1814 /* From here on we know the desired field is smaller than a word.
1815 If OP0 is a register, it too fits within a word. */
1816 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1817 extraction_insn extv;
1818 if (!MEM_P (op0)
1819 && !reverse
1820 /* ??? We could limit the structure size to the part of OP0 that
1821 contains the field, with appropriate checks for endianness
1822 and TRULY_NOOP_TRUNCATION. */
1823 && get_best_reg_extraction_insn (&extv, pattern,
1824 GET_MODE_BITSIZE (GET_MODE (op0)),
1825 tmode))
1826 {
1827 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1828 unsignedp, target, mode,
1829 tmode);
1830 if (result)
1831 return result;
1832 }
1833
1834 /* If OP0 is a memory, try copying it to a register and seeing if a
1835 cheap register alternative is available. */
1836 if (MEM_P (op0) & !reverse)
1837 {
1838 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1839 tmode))
1840 {
1841 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1842 bitnum, unsignedp,
1843 target, mode,
1844 tmode);
1845 if (result)
1846 return result;
1847 }
1848
1849 rtx_insn *last = get_last_insn ();
1850
1851 /* Try loading part of OP0 into a register and extracting the
1852 bitfield from that. */
1853 unsigned HOST_WIDE_INT bitpos;
1854 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1855 0, 0, tmode, &bitpos);
1856 if (xop0)
1857 {
1858 xop0 = copy_to_reg (xop0);
1859 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1860 unsignedp, target,
1861 mode, tmode, reverse, false);
1862 if (result)
1863 return result;
1864 delete_insns_since (last);
1865 }
1866 }
1867
1868 if (!fallback_p)
1869 return NULL;
1870
1871 /* Find a correspondingly-sized integer field, so we can apply
1872 shifts and masks to it. */
1873 int_mode = int_mode_for_mode (tmode);
1874 if (int_mode == BLKmode)
1875 int_mode = int_mode_for_mode (mode);
1876 /* Should probably push op0 out to memory and then do a load. */
1877 gcc_assert (int_mode != BLKmode);
1878
1879 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1880 unsignedp, reverse);
1881
1882 /* Complex values must be reversed piecewise, so we need to undo the global
1883 reversal, convert to the complex mode and reverse again. */
1884 if (reverse && COMPLEX_MODE_P (tmode))
1885 {
1886 target = flip_storage_order (int_mode, target);
1887 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1888 target = flip_storage_order (tmode, target);
1889 }
1890 else
1891 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1892
1893 return target;
1894 }
1895
1896 /* Generate code to extract a byte-field from STR_RTX
1897 containing BITSIZE bits, starting at BITNUM,
1898 and put it in TARGET if possible (if TARGET is nonzero).
1899 Regardless of TARGET, we return the rtx for where the value is placed.
1900
1901 STR_RTX is the structure containing the byte (a REG or MEM).
1902 UNSIGNEDP is nonzero if this is an unsigned bit field.
1903 MODE is the natural mode of the field value once extracted.
1904 TMODE is the mode the caller would like the value to have;
1905 but the value may be returned with type MODE instead.
1906
1907 If REVERSE is true, the extraction is to be done in reverse order.
1908
1909 If a TARGET is specified and we can store in it at no extra cost,
1910 we do so, and return TARGET.
1911 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1912 if they are equally easy. */
1913
1914 rtx
extract_bit_field(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse)1915 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1916 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1917 machine_mode mode, machine_mode tmode, bool reverse)
1918 {
1919 machine_mode mode1;
1920
1921 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1922 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1923 mode1 = GET_MODE (str_rtx);
1924 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1925 mode1 = GET_MODE (target);
1926 else
1927 mode1 = tmode;
1928
1929 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1930 {
1931 /* Extraction of a full MODE1 value can be done with a simple load.
1932 We know here that the field can be accessed with one single
1933 instruction. For targets that support unaligned memory,
1934 an unaligned access may be necessary. */
1935 if (bitsize == GET_MODE_BITSIZE (mode1))
1936 {
1937 rtx result = adjust_bitfield_address (str_rtx, mode1,
1938 bitnum / BITS_PER_UNIT);
1939 if (reverse)
1940 result = flip_storage_order (mode1, result);
1941 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1942 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1943 }
1944
1945 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1946 &bitnum);
1947 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1948 str_rtx = copy_to_reg (str_rtx);
1949 }
1950
1951 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1952 target, mode, tmode, reverse, true);
1953 }
1954
1955 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1956 from bit BITNUM of OP0.
1957
1958 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1959 If REVERSE is true, the extraction is to be done in reverse order.
1960
1961 If TARGET is nonzero, attempts to store the value there
1962 and return TARGET, but this is not guaranteed.
1963 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1964
1965 static rtx
extract_fixed_bit_field(machine_mode tmode,rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)1966 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1967 unsigned HOST_WIDE_INT bitsize,
1968 unsigned HOST_WIDE_INT bitnum, rtx target,
1969 int unsignedp, bool reverse)
1970 {
1971 if (MEM_P (op0))
1972 {
1973 machine_mode mode
1974 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1975 MEM_VOLATILE_P (op0));
1976
1977 if (mode == VOIDmode)
1978 /* The only way this should occur is if the field spans word
1979 boundaries. */
1980 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1981 reverse);
1982
1983 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1984 }
1985
1986 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1987 target, unsignedp, reverse);
1988 }
1989
1990 /* Helper function for extract_fixed_bit_field, extracts
1991 the bit field always using the MODE of OP0. */
1992
1993 static rtx
extract_fixed_bit_field_1(machine_mode tmode,rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)1994 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1995 unsigned HOST_WIDE_INT bitsize,
1996 unsigned HOST_WIDE_INT bitnum, rtx target,
1997 int unsignedp, bool reverse)
1998 {
1999 machine_mode mode = GET_MODE (op0);
2000 gcc_assert (SCALAR_INT_MODE_P (mode));
2001
2002 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2003 for invalid input, such as extract equivalent of f5 from
2004 gcc.dg/pr48335-2.c. */
2005
2006 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2007 /* BITNUM is the distance between our msb and that of OP0.
2008 Convert it to the distance from the lsb. */
2009 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2010
2011 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2012 We have reduced the big-endian case to the little-endian case. */
2013 if (reverse)
2014 op0 = flip_storage_order (mode, op0);
2015
2016 if (unsignedp)
2017 {
2018 if (bitnum)
2019 {
2020 /* If the field does not already start at the lsb,
2021 shift it so it does. */
2022 /* Maybe propagate the target for the shift. */
2023 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2024 if (tmode != mode)
2025 subtarget = 0;
2026 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2027 }
2028 /* Convert the value to the desired mode. */
2029 if (mode != tmode)
2030 op0 = convert_to_mode (tmode, op0, 1);
2031
2032 /* Unless the msb of the field used to be the msb when we shifted,
2033 mask out the upper bits. */
2034
2035 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2036 return expand_binop (GET_MODE (op0), and_optab, op0,
2037 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2038 target, 1, OPTAB_LIB_WIDEN);
2039 return op0;
2040 }
2041
2042 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2043 then arithmetic-shift its lsb to the lsb of the word. */
2044 op0 = force_reg (mode, op0);
2045
2046 /* Find the narrowest integer mode that contains the field. */
2047
2048 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2049 mode = GET_MODE_WIDER_MODE (mode))
2050 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2051 {
2052 op0 = convert_to_mode (mode, op0, 0);
2053 break;
2054 }
2055
2056 if (mode != tmode)
2057 target = 0;
2058
2059 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2060 {
2061 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2062 /* Maybe propagate the target for the shift. */
2063 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2064 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2065 }
2066
2067 return expand_shift (RSHIFT_EXPR, mode, op0,
2068 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2069 }
2070
2071 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2072 VALUE << BITPOS. */
2073
2074 static rtx
lshift_value(machine_mode mode,unsigned HOST_WIDE_INT value,int bitpos)2075 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2076 int bitpos)
2077 {
2078 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2079 }
2080
2081 /* Extract a bit field that is split across two words
2082 and return an RTX for the result.
2083
2084 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2085 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2086 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2087
2088 If REVERSE is true, the extraction is to be done in reverse order. */
2089
2090 static rtx
extract_split_bit_field(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,int unsignedp,bool reverse)2091 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2092 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2093 bool reverse)
2094 {
2095 unsigned int unit;
2096 unsigned int bitsdone = 0;
2097 rtx result = NULL_RTX;
2098 int first = 1;
2099
2100 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2101 much at a time. */
2102 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2103 unit = BITS_PER_WORD;
2104 else
2105 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2106
2107 while (bitsdone < bitsize)
2108 {
2109 unsigned HOST_WIDE_INT thissize;
2110 rtx part, word;
2111 unsigned HOST_WIDE_INT thispos;
2112 unsigned HOST_WIDE_INT offset;
2113
2114 offset = (bitpos + bitsdone) / unit;
2115 thispos = (bitpos + bitsdone) % unit;
2116
2117 /* THISSIZE must not overrun a word boundary. Otherwise,
2118 extract_fixed_bit_field will call us again, and we will mutually
2119 recurse forever. */
2120 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2121 thissize = MIN (thissize, unit - thispos);
2122
2123 /* If OP0 is a register, then handle OFFSET here.
2124
2125 When handling multiword bitfields, extract_bit_field may pass
2126 down a word_mode SUBREG of a larger REG for a bitfield that actually
2127 crosses a word boundary. Thus, for a SUBREG, we must find
2128 the current word starting from the base register. */
2129 if (GET_CODE (op0) == SUBREG)
2130 {
2131 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2132 word = operand_subword_force (SUBREG_REG (op0), word_offset,
2133 GET_MODE (SUBREG_REG (op0)));
2134 offset = 0;
2135 }
2136 else if (REG_P (op0))
2137 {
2138 word = operand_subword_force (op0, offset, GET_MODE (op0));
2139 offset = 0;
2140 }
2141 else
2142 word = op0;
2143
2144 /* Extract the parts in bit-counting order,
2145 whose meaning is determined by BYTES_PER_UNIT.
2146 OFFSET is in UNITs, and UNIT is in bits. */
2147 part = extract_fixed_bit_field (word_mode, word, thissize,
2148 offset * unit + thispos, 0, 1, reverse);
2149 bitsdone += thissize;
2150
2151 /* Shift this part into place for the result. */
2152 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2153 {
2154 if (bitsize != bitsdone)
2155 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2156 bitsize - bitsdone, 0, 1);
2157 }
2158 else
2159 {
2160 if (bitsdone != thissize)
2161 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2162 bitsdone - thissize, 0, 1);
2163 }
2164
2165 if (first)
2166 result = part;
2167 else
2168 /* Combine the parts with bitwise or. This works
2169 because we extracted each part as an unsigned bit field. */
2170 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2171 OPTAB_LIB_WIDEN);
2172
2173 first = 0;
2174 }
2175
2176 /* Unsigned bit field: we are done. */
2177 if (unsignedp)
2178 return result;
2179 /* Signed bit field: sign-extend with two arithmetic shifts. */
2180 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2181 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2182 return expand_shift (RSHIFT_EXPR, word_mode, result,
2183 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2184 }
2185
2186 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2187 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2188 MODE, fill the upper bits with zeros. Fail if the layout of either
2189 mode is unknown (as for CC modes) or if the extraction would involve
2190 unprofitable mode punning. Return the value on success, otherwise
2191 return null.
2192
2193 This is different from gen_lowpart* in these respects:
2194
2195 - the returned value must always be considered an rvalue
2196
2197 - when MODE is wider than SRC_MODE, the extraction involves
2198 a zero extension
2199
2200 - when MODE is smaller than SRC_MODE, the extraction involves
2201 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2202
2203 In other words, this routine performs a computation, whereas the
2204 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2205 operations. */
2206
2207 rtx
extract_low_bits(machine_mode mode,machine_mode src_mode,rtx src)2208 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2209 {
2210 machine_mode int_mode, src_int_mode;
2211
2212 if (mode == src_mode)
2213 return src;
2214
2215 if (CONSTANT_P (src))
2216 {
2217 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2218 fails, it will happily create (subreg (symbol_ref)) or similar
2219 invalid SUBREGs. */
2220 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2221 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2222 if (ret)
2223 return ret;
2224
2225 if (GET_MODE (src) == VOIDmode
2226 || !validate_subreg (mode, src_mode, src, byte))
2227 return NULL_RTX;
2228
2229 src = force_reg (GET_MODE (src), src);
2230 return gen_rtx_SUBREG (mode, src, byte);
2231 }
2232
2233 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2234 return NULL_RTX;
2235
2236 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2237 && MODES_TIEABLE_P (mode, src_mode))
2238 {
2239 rtx x = gen_lowpart_common (mode, src);
2240 if (x)
2241 return x;
2242 }
2243
2244 src_int_mode = int_mode_for_mode (src_mode);
2245 int_mode = int_mode_for_mode (mode);
2246 if (src_int_mode == BLKmode || int_mode == BLKmode)
2247 return NULL_RTX;
2248
2249 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2250 return NULL_RTX;
2251 if (!MODES_TIEABLE_P (int_mode, mode))
2252 return NULL_RTX;
2253
2254 src = gen_lowpart (src_int_mode, src);
2255 src = convert_modes (int_mode, src_int_mode, src, true);
2256 src = gen_lowpart (mode, src);
2257 return src;
2258 }
2259
2260 /* Add INC into TARGET. */
2261
2262 void
expand_inc(rtx target,rtx inc)2263 expand_inc (rtx target, rtx inc)
2264 {
2265 rtx value = expand_binop (GET_MODE (target), add_optab,
2266 target, inc,
2267 target, 0, OPTAB_LIB_WIDEN);
2268 if (value != target)
2269 emit_move_insn (target, value);
2270 }
2271
2272 /* Subtract DEC from TARGET. */
2273
2274 void
expand_dec(rtx target,rtx dec)2275 expand_dec (rtx target, rtx dec)
2276 {
2277 rtx value = expand_binop (GET_MODE (target), sub_optab,
2278 target, dec,
2279 target, 0, OPTAB_LIB_WIDEN);
2280 if (value != target)
2281 emit_move_insn (target, value);
2282 }
2283
2284 /* Output a shift instruction for expression code CODE,
2285 with SHIFTED being the rtx for the value to shift,
2286 and AMOUNT the rtx for the amount to shift by.
2287 Store the result in the rtx TARGET, if that is convenient.
2288 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2289 Return the rtx for where the value is.
2290 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2291 in which case 0 is returned. */
2292
2293 static rtx
2294 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2295 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2296 {
2297 rtx op1, temp = 0;
2298 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2299 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2300 optab lshift_optab = ashl_optab;
2301 optab rshift_arith_optab = ashr_optab;
2302 optab rshift_uns_optab = lshr_optab;
2303 optab lrotate_optab = rotl_optab;
2304 optab rrotate_optab = rotr_optab;
2305 machine_mode op1_mode;
2306 machine_mode scalar_mode = mode;
2307 int attempt;
2308 bool speed = optimize_insn_for_speed_p ();
2309
2310 if (VECTOR_MODE_P (mode))
2311 scalar_mode = GET_MODE_INNER (mode);
2312 op1 = amount;
2313 op1_mode = GET_MODE (op1);
2314
2315 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2316 shift amount is a vector, use the vector/vector shift patterns. */
2317 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2318 {
2319 lshift_optab = vashl_optab;
2320 rshift_arith_optab = vashr_optab;
2321 rshift_uns_optab = vlshr_optab;
2322 lrotate_optab = vrotl_optab;
2323 rrotate_optab = vrotr_optab;
2324 }
2325
2326 /* Previously detected shift-counts computed by NEGATE_EXPR
2327 and shifted in the other direction; but that does not work
2328 on all machines. */
2329
2330 if (SHIFT_COUNT_TRUNCATED)
2331 {
2332 if (CONST_INT_P (op1)
2333 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2334 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2335 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2336 % GET_MODE_BITSIZE (scalar_mode));
2337 else if (GET_CODE (op1) == SUBREG
2338 && subreg_lowpart_p (op1)
2339 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2340 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2341 op1 = SUBREG_REG (op1);
2342 }
2343
2344 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2345 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2346 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2347 amount instead. */
2348 if (rotate
2349 && CONST_INT_P (op1)
2350 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2351 GET_MODE_BITSIZE (scalar_mode) - 1))
2352 {
2353 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2354 left = !left;
2355 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2356 }
2357
2358 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2359 Note that this is not the case for bigger values. For instance a rotation
2360 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2361 0x04030201 (bswapsi). */
2362 if (rotate
2363 && CONST_INT_P (op1)
2364 && INTVAL (op1) == BITS_PER_UNIT
2365 && GET_MODE_SIZE (scalar_mode) == 2
2366 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2367 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2368 unsignedp);
2369
2370 if (op1 == const0_rtx)
2371 return shifted;
2372
2373 /* Check whether its cheaper to implement a left shift by a constant
2374 bit count by a sequence of additions. */
2375 if (code == LSHIFT_EXPR
2376 && CONST_INT_P (op1)
2377 && INTVAL (op1) > 0
2378 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2379 && INTVAL (op1) < MAX_BITS_PER_WORD
2380 && (shift_cost (speed, mode, INTVAL (op1))
2381 > INTVAL (op1) * add_cost (speed, mode))
2382 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2383 {
2384 int i;
2385 for (i = 0; i < INTVAL (op1); i++)
2386 {
2387 temp = force_reg (mode, shifted);
2388 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2389 unsignedp, OPTAB_LIB_WIDEN);
2390 }
2391 return shifted;
2392 }
2393
2394 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2395 {
2396 enum optab_methods methods;
2397
2398 if (attempt == 0)
2399 methods = OPTAB_DIRECT;
2400 else if (attempt == 1)
2401 methods = OPTAB_WIDEN;
2402 else
2403 methods = OPTAB_LIB_WIDEN;
2404
2405 if (rotate)
2406 {
2407 /* Widening does not work for rotation. */
2408 if (methods == OPTAB_WIDEN)
2409 continue;
2410 else if (methods == OPTAB_LIB_WIDEN)
2411 {
2412 /* If we have been unable to open-code this by a rotation,
2413 do it as the IOR of two shifts. I.e., to rotate A
2414 by N bits, compute
2415 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2416 where C is the bitsize of A.
2417
2418 It is theoretically possible that the target machine might
2419 not be able to perform either shift and hence we would
2420 be making two libcalls rather than just the one for the
2421 shift (similarly if IOR could not be done). We will allow
2422 this extremely unlikely lossage to avoid complicating the
2423 code below. */
2424
2425 rtx subtarget = target == shifted ? 0 : target;
2426 rtx new_amount, other_amount;
2427 rtx temp1;
2428
2429 new_amount = op1;
2430 if (op1 == const0_rtx)
2431 return shifted;
2432 else if (CONST_INT_P (op1))
2433 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2434 - INTVAL (op1));
2435 else
2436 {
2437 other_amount
2438 = simplify_gen_unary (NEG, GET_MODE (op1),
2439 op1, GET_MODE (op1));
2440 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2441 other_amount
2442 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2443 gen_int_mode (mask, GET_MODE (op1)));
2444 }
2445
2446 shifted = force_reg (mode, shifted);
2447
2448 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2449 mode, shifted, new_amount, 0, 1);
2450 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2451 mode, shifted, other_amount,
2452 subtarget, 1);
2453 return expand_binop (mode, ior_optab, temp, temp1, target,
2454 unsignedp, methods);
2455 }
2456
2457 temp = expand_binop (mode,
2458 left ? lrotate_optab : rrotate_optab,
2459 shifted, op1, target, unsignedp, methods);
2460 }
2461 else if (unsignedp)
2462 temp = expand_binop (mode,
2463 left ? lshift_optab : rshift_uns_optab,
2464 shifted, op1, target, unsignedp, methods);
2465
2466 /* Do arithmetic shifts.
2467 Also, if we are going to widen the operand, we can just as well
2468 use an arithmetic right-shift instead of a logical one. */
2469 if (temp == 0 && ! rotate
2470 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2471 {
2472 enum optab_methods methods1 = methods;
2473
2474 /* If trying to widen a log shift to an arithmetic shift,
2475 don't accept an arithmetic shift of the same size. */
2476 if (unsignedp)
2477 methods1 = OPTAB_MUST_WIDEN;
2478
2479 /* Arithmetic shift */
2480
2481 temp = expand_binop (mode,
2482 left ? lshift_optab : rshift_arith_optab,
2483 shifted, op1, target, unsignedp, methods1);
2484 }
2485
2486 /* We used to try extzv here for logical right shifts, but that was
2487 only useful for one machine, the VAX, and caused poor code
2488 generation there for lshrdi3, so the code was deleted and a
2489 define_expand for lshrsi3 was added to vax.md. */
2490 }
2491
2492 gcc_assert (temp != NULL_RTX || may_fail);
2493 return temp;
2494 }
2495
2496 /* Output a shift instruction for expression code CODE,
2497 with SHIFTED being the rtx for the value to shift,
2498 and AMOUNT the amount to shift by.
2499 Store the result in the rtx TARGET, if that is convenient.
2500 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2501 Return the rtx for where the value is. */
2502
2503 rtx
expand_shift(enum tree_code code,machine_mode mode,rtx shifted,int amount,rtx target,int unsignedp)2504 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2505 int amount, rtx target, int unsignedp)
2506 {
2507 return expand_shift_1 (code, mode,
2508 shifted, GEN_INT (amount), target, unsignedp);
2509 }
2510
2511 /* Likewise, but return 0 if that cannot be done. */
2512
2513 static rtx
maybe_expand_shift(enum tree_code code,machine_mode mode,rtx shifted,int amount,rtx target,int unsignedp)2514 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2515 int amount, rtx target, int unsignedp)
2516 {
2517 return expand_shift_1 (code, mode,
2518 shifted, GEN_INT (amount), target, unsignedp, true);
2519 }
2520
2521 /* Output a shift instruction for expression code CODE,
2522 with SHIFTED being the rtx for the value to shift,
2523 and AMOUNT the tree for the amount to shift by.
2524 Store the result in the rtx TARGET, if that is convenient.
2525 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2526 Return the rtx for where the value is. */
2527
2528 rtx
expand_variable_shift(enum tree_code code,machine_mode mode,rtx shifted,tree amount,rtx target,int unsignedp)2529 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2530 tree amount, rtx target, int unsignedp)
2531 {
2532 return expand_shift_1 (code, mode,
2533 shifted, expand_normal (amount), target, unsignedp);
2534 }
2535
2536
2537 /* Indicates the type of fixup needed after a constant multiplication.
2538 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2539 the result should be negated, and ADD_VARIANT means that the
2540 multiplicand should be added to the result. */
2541 enum mult_variant {basic_variant, negate_variant, add_variant};
2542
2543 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2544 const struct mult_cost *, machine_mode mode);
2545 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2546 struct algorithm *, enum mult_variant *, int);
2547 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2548 const struct algorithm *, enum mult_variant);
2549 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2550 static rtx extract_high_half (machine_mode, rtx);
2551 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2552 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2553 int, int);
2554 /* Compute and return the best algorithm for multiplying by T.
2555 The algorithm must cost less than cost_limit
2556 If retval.cost >= COST_LIMIT, no algorithm was found and all
2557 other field of the returned struct are undefined.
2558 MODE is the machine mode of the multiplication. */
2559
2560 static void
synth_mult(struct algorithm * alg_out,unsigned HOST_WIDE_INT t,const struct mult_cost * cost_limit,machine_mode mode)2561 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2562 const struct mult_cost *cost_limit, machine_mode mode)
2563 {
2564 int m;
2565 struct algorithm *alg_in, *best_alg;
2566 struct mult_cost best_cost;
2567 struct mult_cost new_limit;
2568 int op_cost, op_latency;
2569 unsigned HOST_WIDE_INT orig_t = t;
2570 unsigned HOST_WIDE_INT q;
2571 int maxm, hash_index;
2572 bool cache_hit = false;
2573 enum alg_code cache_alg = alg_zero;
2574 bool speed = optimize_insn_for_speed_p ();
2575 machine_mode imode;
2576 struct alg_hash_entry *entry_ptr;
2577
2578 /* Indicate that no algorithm is yet found. If no algorithm
2579 is found, this value will be returned and indicate failure. */
2580 alg_out->cost.cost = cost_limit->cost + 1;
2581 alg_out->cost.latency = cost_limit->latency + 1;
2582
2583 if (cost_limit->cost < 0
2584 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2585 return;
2586
2587 /* Be prepared for vector modes. */
2588 imode = GET_MODE_INNER (mode);
2589
2590 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2591
2592 /* Restrict the bits of "t" to the multiplication's mode. */
2593 t &= GET_MODE_MASK (imode);
2594
2595 /* t == 1 can be done in zero cost. */
2596 if (t == 1)
2597 {
2598 alg_out->ops = 1;
2599 alg_out->cost.cost = 0;
2600 alg_out->cost.latency = 0;
2601 alg_out->op[0] = alg_m;
2602 return;
2603 }
2604
2605 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2606 fail now. */
2607 if (t == 0)
2608 {
2609 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2610 return;
2611 else
2612 {
2613 alg_out->ops = 1;
2614 alg_out->cost.cost = zero_cost (speed);
2615 alg_out->cost.latency = zero_cost (speed);
2616 alg_out->op[0] = alg_zero;
2617 return;
2618 }
2619 }
2620
2621 /* We'll be needing a couple extra algorithm structures now. */
2622
2623 alg_in = XALLOCA (struct algorithm);
2624 best_alg = XALLOCA (struct algorithm);
2625 best_cost = *cost_limit;
2626
2627 /* Compute the hash index. */
2628 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2629
2630 /* See if we already know what to do for T. */
2631 entry_ptr = alg_hash_entry_ptr (hash_index);
2632 if (entry_ptr->t == t
2633 && entry_ptr->mode == mode
2634 && entry_ptr->mode == mode
2635 && entry_ptr->speed == speed
2636 && entry_ptr->alg != alg_unknown)
2637 {
2638 cache_alg = entry_ptr->alg;
2639
2640 if (cache_alg == alg_impossible)
2641 {
2642 /* The cache tells us that it's impossible to synthesize
2643 multiplication by T within entry_ptr->cost. */
2644 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2645 /* COST_LIMIT is at least as restrictive as the one
2646 recorded in the hash table, in which case we have no
2647 hope of synthesizing a multiplication. Just
2648 return. */
2649 return;
2650
2651 /* If we get here, COST_LIMIT is less restrictive than the
2652 one recorded in the hash table, so we may be able to
2653 synthesize a multiplication. Proceed as if we didn't
2654 have the cache entry. */
2655 }
2656 else
2657 {
2658 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2659 /* The cached algorithm shows that this multiplication
2660 requires more cost than COST_LIMIT. Just return. This
2661 way, we don't clobber this cache entry with
2662 alg_impossible but retain useful information. */
2663 return;
2664
2665 cache_hit = true;
2666
2667 switch (cache_alg)
2668 {
2669 case alg_shift:
2670 goto do_alg_shift;
2671
2672 case alg_add_t_m2:
2673 case alg_sub_t_m2:
2674 goto do_alg_addsub_t_m2;
2675
2676 case alg_add_factor:
2677 case alg_sub_factor:
2678 goto do_alg_addsub_factor;
2679
2680 case alg_add_t2_m:
2681 goto do_alg_add_t2_m;
2682
2683 case alg_sub_t2_m:
2684 goto do_alg_sub_t2_m;
2685
2686 default:
2687 gcc_unreachable ();
2688 }
2689 }
2690 }
2691
2692 /* If we have a group of zero bits at the low-order part of T, try
2693 multiplying by the remaining bits and then doing a shift. */
2694
2695 if ((t & 1) == 0)
2696 {
2697 do_alg_shift:
2698 m = floor_log2 (t & -t); /* m = number of low zero bits */
2699 if (m < maxm)
2700 {
2701 q = t >> m;
2702 /* The function expand_shift will choose between a shift and
2703 a sequence of additions, so the observed cost is given as
2704 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2705 op_cost = m * add_cost (speed, mode);
2706 if (shift_cost (speed, mode, m) < op_cost)
2707 op_cost = shift_cost (speed, mode, m);
2708 new_limit.cost = best_cost.cost - op_cost;
2709 new_limit.latency = best_cost.latency - op_cost;
2710 synth_mult (alg_in, q, &new_limit, mode);
2711
2712 alg_in->cost.cost += op_cost;
2713 alg_in->cost.latency += op_cost;
2714 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2715 {
2716 best_cost = alg_in->cost;
2717 std::swap (alg_in, best_alg);
2718 best_alg->log[best_alg->ops] = m;
2719 best_alg->op[best_alg->ops] = alg_shift;
2720 }
2721
2722 /* See if treating ORIG_T as a signed number yields a better
2723 sequence. Try this sequence only for a negative ORIG_T
2724 as it would be useless for a non-negative ORIG_T. */
2725 if ((HOST_WIDE_INT) orig_t < 0)
2726 {
2727 /* Shift ORIG_T as follows because a right shift of a
2728 negative-valued signed type is implementation
2729 defined. */
2730 q = ~(~orig_t >> m);
2731 /* The function expand_shift will choose between a shift
2732 and a sequence of additions, so the observed cost is
2733 given as MIN (m * add_cost(speed, mode),
2734 shift_cost(speed, mode, m)). */
2735 op_cost = m * add_cost (speed, mode);
2736 if (shift_cost (speed, mode, m) < op_cost)
2737 op_cost = shift_cost (speed, mode, m);
2738 new_limit.cost = best_cost.cost - op_cost;
2739 new_limit.latency = best_cost.latency - op_cost;
2740 synth_mult (alg_in, q, &new_limit, mode);
2741
2742 alg_in->cost.cost += op_cost;
2743 alg_in->cost.latency += op_cost;
2744 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2745 {
2746 best_cost = alg_in->cost;
2747 std::swap (alg_in, best_alg);
2748 best_alg->log[best_alg->ops] = m;
2749 best_alg->op[best_alg->ops] = alg_shift;
2750 }
2751 }
2752 }
2753 if (cache_hit)
2754 goto done;
2755 }
2756
2757 /* If we have an odd number, add or subtract one. */
2758 if ((t & 1) != 0)
2759 {
2760 unsigned HOST_WIDE_INT w;
2761
2762 do_alg_addsub_t_m2:
2763 for (w = 1; (w & t) != 0; w <<= 1)
2764 ;
2765 /* If T was -1, then W will be zero after the loop. This is another
2766 case where T ends with ...111. Handling this with (T + 1) and
2767 subtract 1 produces slightly better code and results in algorithm
2768 selection much faster than treating it like the ...0111 case
2769 below. */
2770 if (w == 0
2771 || (w > 2
2772 /* Reject the case where t is 3.
2773 Thus we prefer addition in that case. */
2774 && t != 3))
2775 {
2776 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2777
2778 op_cost = add_cost (speed, mode);
2779 new_limit.cost = best_cost.cost - op_cost;
2780 new_limit.latency = best_cost.latency - op_cost;
2781 synth_mult (alg_in, t + 1, &new_limit, mode);
2782
2783 alg_in->cost.cost += op_cost;
2784 alg_in->cost.latency += op_cost;
2785 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2786 {
2787 best_cost = alg_in->cost;
2788 std::swap (alg_in, best_alg);
2789 best_alg->log[best_alg->ops] = 0;
2790 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2791 }
2792 }
2793 else
2794 {
2795 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2796
2797 op_cost = add_cost (speed, mode);
2798 new_limit.cost = best_cost.cost - op_cost;
2799 new_limit.latency = best_cost.latency - op_cost;
2800 synth_mult (alg_in, t - 1, &new_limit, mode);
2801
2802 alg_in->cost.cost += op_cost;
2803 alg_in->cost.latency += op_cost;
2804 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2805 {
2806 best_cost = alg_in->cost;
2807 std::swap (alg_in, best_alg);
2808 best_alg->log[best_alg->ops] = 0;
2809 best_alg->op[best_alg->ops] = alg_add_t_m2;
2810 }
2811 }
2812
2813 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2814 quickly with a - a * n for some appropriate constant n. */
2815 m = exact_log2 (-orig_t + 1);
2816 if (m >= 0 && m < maxm)
2817 {
2818 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2819 /* If the target has a cheap shift-and-subtract insn use
2820 that in preference to a shift insn followed by a sub insn.
2821 Assume that the shift-and-sub is "atomic" with a latency
2822 equal to it's cost, otherwise assume that on superscalar
2823 hardware the shift may be executed concurrently with the
2824 earlier steps in the algorithm. */
2825 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2826 {
2827 op_cost = shiftsub1_cost (speed, mode, m);
2828 op_latency = op_cost;
2829 }
2830 else
2831 op_latency = add_cost (speed, mode);
2832
2833 new_limit.cost = best_cost.cost - op_cost;
2834 new_limit.latency = best_cost.latency - op_latency;
2835 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2836 &new_limit, mode);
2837
2838 alg_in->cost.cost += op_cost;
2839 alg_in->cost.latency += op_latency;
2840 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2841 {
2842 best_cost = alg_in->cost;
2843 std::swap (alg_in, best_alg);
2844 best_alg->log[best_alg->ops] = m;
2845 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2846 }
2847 }
2848
2849 if (cache_hit)
2850 goto done;
2851 }
2852
2853 /* Look for factors of t of the form
2854 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2855 If we find such a factor, we can multiply by t using an algorithm that
2856 multiplies by q, shift the result by m and add/subtract it to itself.
2857
2858 We search for large factors first and loop down, even if large factors
2859 are less probable than small; if we find a large factor we will find a
2860 good sequence quickly, and therefore be able to prune (by decreasing
2861 COST_LIMIT) the search. */
2862
2863 do_alg_addsub_factor:
2864 for (m = floor_log2 (t - 1); m >= 2; m--)
2865 {
2866 unsigned HOST_WIDE_INT d;
2867
2868 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2869 if (t % d == 0 && t > d && m < maxm
2870 && (!cache_hit || cache_alg == alg_add_factor))
2871 {
2872 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2873 if (shiftadd_cost (speed, mode, m) <= op_cost)
2874 op_cost = shiftadd_cost (speed, mode, m);
2875
2876 op_latency = op_cost;
2877
2878
2879 new_limit.cost = best_cost.cost - op_cost;
2880 new_limit.latency = best_cost.latency - op_latency;
2881 synth_mult (alg_in, t / d, &new_limit, mode);
2882
2883 alg_in->cost.cost += op_cost;
2884 alg_in->cost.latency += op_latency;
2885 if (alg_in->cost.latency < op_cost)
2886 alg_in->cost.latency = op_cost;
2887 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2888 {
2889 best_cost = alg_in->cost;
2890 std::swap (alg_in, best_alg);
2891 best_alg->log[best_alg->ops] = m;
2892 best_alg->op[best_alg->ops] = alg_add_factor;
2893 }
2894 /* Other factors will have been taken care of in the recursion. */
2895 break;
2896 }
2897
2898 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2899 if (t % d == 0 && t > d && m < maxm
2900 && (!cache_hit || cache_alg == alg_sub_factor))
2901 {
2902 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2903 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2904 op_cost = shiftsub0_cost (speed, mode, m);
2905
2906 op_latency = op_cost;
2907
2908 new_limit.cost = best_cost.cost - op_cost;
2909 new_limit.latency = best_cost.latency - op_latency;
2910 synth_mult (alg_in, t / d, &new_limit, mode);
2911
2912 alg_in->cost.cost += op_cost;
2913 alg_in->cost.latency += op_latency;
2914 if (alg_in->cost.latency < op_cost)
2915 alg_in->cost.latency = op_cost;
2916 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2917 {
2918 best_cost = alg_in->cost;
2919 std::swap (alg_in, best_alg);
2920 best_alg->log[best_alg->ops] = m;
2921 best_alg->op[best_alg->ops] = alg_sub_factor;
2922 }
2923 break;
2924 }
2925 }
2926 if (cache_hit)
2927 goto done;
2928
2929 /* Try shift-and-add (load effective address) instructions,
2930 i.e. do a*3, a*5, a*9. */
2931 if ((t & 1) != 0)
2932 {
2933 do_alg_add_t2_m:
2934 q = t - 1;
2935 q = q & -q;
2936 m = exact_log2 (q);
2937 if (m >= 0 && m < maxm)
2938 {
2939 op_cost = shiftadd_cost (speed, mode, m);
2940 new_limit.cost = best_cost.cost - op_cost;
2941 new_limit.latency = best_cost.latency - op_cost;
2942 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2943
2944 alg_in->cost.cost += op_cost;
2945 alg_in->cost.latency += op_cost;
2946 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2947 {
2948 best_cost = alg_in->cost;
2949 std::swap (alg_in, best_alg);
2950 best_alg->log[best_alg->ops] = m;
2951 best_alg->op[best_alg->ops] = alg_add_t2_m;
2952 }
2953 }
2954 if (cache_hit)
2955 goto done;
2956
2957 do_alg_sub_t2_m:
2958 q = t + 1;
2959 q = q & -q;
2960 m = exact_log2 (q);
2961 if (m >= 0 && m < maxm)
2962 {
2963 op_cost = shiftsub0_cost (speed, mode, m);
2964 new_limit.cost = best_cost.cost - op_cost;
2965 new_limit.latency = best_cost.latency - op_cost;
2966 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2967
2968 alg_in->cost.cost += op_cost;
2969 alg_in->cost.latency += op_cost;
2970 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2971 {
2972 best_cost = alg_in->cost;
2973 std::swap (alg_in, best_alg);
2974 best_alg->log[best_alg->ops] = m;
2975 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2976 }
2977 }
2978 if (cache_hit)
2979 goto done;
2980 }
2981
2982 done:
2983 /* If best_cost has not decreased, we have not found any algorithm. */
2984 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2985 {
2986 /* We failed to find an algorithm. Record alg_impossible for
2987 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2988 we are asked to find an algorithm for T within the same or
2989 lower COST_LIMIT, we can immediately return to the
2990 caller. */
2991 entry_ptr->t = t;
2992 entry_ptr->mode = mode;
2993 entry_ptr->speed = speed;
2994 entry_ptr->alg = alg_impossible;
2995 entry_ptr->cost = *cost_limit;
2996 return;
2997 }
2998
2999 /* Cache the result. */
3000 if (!cache_hit)
3001 {
3002 entry_ptr->t = t;
3003 entry_ptr->mode = mode;
3004 entry_ptr->speed = speed;
3005 entry_ptr->alg = best_alg->op[best_alg->ops];
3006 entry_ptr->cost.cost = best_cost.cost;
3007 entry_ptr->cost.latency = best_cost.latency;
3008 }
3009
3010 /* If we are getting a too long sequence for `struct algorithm'
3011 to record, make this search fail. */
3012 if (best_alg->ops == MAX_BITS_PER_WORD)
3013 return;
3014
3015 /* Copy the algorithm from temporary space to the space at alg_out.
3016 We avoid using structure assignment because the majority of
3017 best_alg is normally undefined, and this is a critical function. */
3018 alg_out->ops = best_alg->ops + 1;
3019 alg_out->cost = best_cost;
3020 memcpy (alg_out->op, best_alg->op,
3021 alg_out->ops * sizeof *alg_out->op);
3022 memcpy (alg_out->log, best_alg->log,
3023 alg_out->ops * sizeof *alg_out->log);
3024 }
3025
3026 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3027 Try three variations:
3028
3029 - a shift/add sequence based on VAL itself
3030 - a shift/add sequence based on -VAL, followed by a negation
3031 - a shift/add sequence based on VAL - 1, followed by an addition.
3032
3033 Return true if the cheapest of these cost less than MULT_COST,
3034 describing the algorithm in *ALG and final fixup in *VARIANT. */
3035
3036 static bool
choose_mult_variant(machine_mode mode,HOST_WIDE_INT val,struct algorithm * alg,enum mult_variant * variant,int mult_cost)3037 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3038 struct algorithm *alg, enum mult_variant *variant,
3039 int mult_cost)
3040 {
3041 struct algorithm alg2;
3042 struct mult_cost limit;
3043 int op_cost;
3044 bool speed = optimize_insn_for_speed_p ();
3045
3046 /* Fail quickly for impossible bounds. */
3047 if (mult_cost < 0)
3048 return false;
3049
3050 /* Ensure that mult_cost provides a reasonable upper bound.
3051 Any constant multiplication can be performed with less
3052 than 2 * bits additions. */
3053 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3054 if (mult_cost > op_cost)
3055 mult_cost = op_cost;
3056
3057 *variant = basic_variant;
3058 limit.cost = mult_cost;
3059 limit.latency = mult_cost;
3060 synth_mult (alg, val, &limit, mode);
3061
3062 /* This works only if the inverted value actually fits in an
3063 `unsigned int' */
3064 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3065 {
3066 op_cost = neg_cost (speed, mode);
3067 if (MULT_COST_LESS (&alg->cost, mult_cost))
3068 {
3069 limit.cost = alg->cost.cost - op_cost;
3070 limit.latency = alg->cost.latency - op_cost;
3071 }
3072 else
3073 {
3074 limit.cost = mult_cost - op_cost;
3075 limit.latency = mult_cost - op_cost;
3076 }
3077
3078 synth_mult (&alg2, -val, &limit, mode);
3079 alg2.cost.cost += op_cost;
3080 alg2.cost.latency += op_cost;
3081 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3082 *alg = alg2, *variant = negate_variant;
3083 }
3084
3085 /* This proves very useful for division-by-constant. */
3086 op_cost = add_cost (speed, mode);
3087 if (MULT_COST_LESS (&alg->cost, mult_cost))
3088 {
3089 limit.cost = alg->cost.cost - op_cost;
3090 limit.latency = alg->cost.latency - op_cost;
3091 }
3092 else
3093 {
3094 limit.cost = mult_cost - op_cost;
3095 limit.latency = mult_cost - op_cost;
3096 }
3097
3098 synth_mult (&alg2, val - 1, &limit, mode);
3099 alg2.cost.cost += op_cost;
3100 alg2.cost.latency += op_cost;
3101 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3102 *alg = alg2, *variant = add_variant;
3103
3104 return MULT_COST_LESS (&alg->cost, mult_cost);
3105 }
3106
3107 /* A subroutine of expand_mult, used for constant multiplications.
3108 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3109 convenient. Use the shift/add sequence described by ALG and apply
3110 the final fixup specified by VARIANT. */
3111
3112 static rtx
expand_mult_const(machine_mode mode,rtx op0,HOST_WIDE_INT val,rtx target,const struct algorithm * alg,enum mult_variant variant)3113 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3114 rtx target, const struct algorithm *alg,
3115 enum mult_variant variant)
3116 {
3117 HOST_WIDE_INT val_so_far;
3118 rtx_insn *insn;
3119 rtx accum, tem;
3120 int opno;
3121 machine_mode nmode;
3122
3123 /* Avoid referencing memory over and over and invalid sharing
3124 on SUBREGs. */
3125 op0 = force_reg (mode, op0);
3126
3127 /* ACCUM starts out either as OP0 or as a zero, depending on
3128 the first operation. */
3129
3130 if (alg->op[0] == alg_zero)
3131 {
3132 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3133 val_so_far = 0;
3134 }
3135 else if (alg->op[0] == alg_m)
3136 {
3137 accum = copy_to_mode_reg (mode, op0);
3138 val_so_far = 1;
3139 }
3140 else
3141 gcc_unreachable ();
3142
3143 for (opno = 1; opno < alg->ops; opno++)
3144 {
3145 int log = alg->log[opno];
3146 rtx shift_subtarget = optimize ? 0 : accum;
3147 rtx add_target
3148 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3149 && !optimize)
3150 ? target : 0;
3151 rtx accum_target = optimize ? 0 : accum;
3152 rtx accum_inner;
3153
3154 switch (alg->op[opno])
3155 {
3156 case alg_shift:
3157 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3158 /* REG_EQUAL note will be attached to the following insn. */
3159 emit_move_insn (accum, tem);
3160 val_so_far <<= log;
3161 break;
3162
3163 case alg_add_t_m2:
3164 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3165 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3166 add_target ? add_target : accum_target);
3167 val_so_far += (HOST_WIDE_INT) 1 << log;
3168 break;
3169
3170 case alg_sub_t_m2:
3171 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3172 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3173 add_target ? add_target : accum_target);
3174 val_so_far -= (HOST_WIDE_INT) 1 << log;
3175 break;
3176
3177 case alg_add_t2_m:
3178 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3179 log, shift_subtarget, 0);
3180 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3181 add_target ? add_target : accum_target);
3182 val_so_far = (val_so_far << log) + 1;
3183 break;
3184
3185 case alg_sub_t2_m:
3186 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3187 log, shift_subtarget, 0);
3188 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3189 add_target ? add_target : accum_target);
3190 val_so_far = (val_so_far << log) - 1;
3191 break;
3192
3193 case alg_add_factor:
3194 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3195 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3196 add_target ? add_target : accum_target);
3197 val_so_far += val_so_far << log;
3198 break;
3199
3200 case alg_sub_factor:
3201 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3202 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3203 (add_target
3204 ? add_target : (optimize ? 0 : tem)));
3205 val_so_far = (val_so_far << log) - val_so_far;
3206 break;
3207
3208 default:
3209 gcc_unreachable ();
3210 }
3211
3212 if (SCALAR_INT_MODE_P (mode))
3213 {
3214 /* Write a REG_EQUAL note on the last insn so that we can cse
3215 multiplication sequences. Note that if ACCUM is a SUBREG,
3216 we've set the inner register and must properly indicate that. */
3217 tem = op0, nmode = mode;
3218 accum_inner = accum;
3219 if (GET_CODE (accum) == SUBREG)
3220 {
3221 accum_inner = SUBREG_REG (accum);
3222 nmode = GET_MODE (accum_inner);
3223 tem = gen_lowpart (nmode, op0);
3224 }
3225
3226 insn = get_last_insn ();
3227 set_dst_reg_note (insn, REG_EQUAL,
3228 gen_rtx_MULT (nmode, tem,
3229 gen_int_mode (val_so_far, nmode)),
3230 accum_inner);
3231 }
3232 }
3233
3234 if (variant == negate_variant)
3235 {
3236 val_so_far = -val_so_far;
3237 accum = expand_unop (mode, neg_optab, accum, target, 0);
3238 }
3239 else if (variant == add_variant)
3240 {
3241 val_so_far = val_so_far + 1;
3242 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3243 }
3244
3245 /* Compare only the bits of val and val_so_far that are significant
3246 in the result mode, to avoid sign-/zero-extension confusion. */
3247 nmode = GET_MODE_INNER (mode);
3248 val &= GET_MODE_MASK (nmode);
3249 val_so_far &= GET_MODE_MASK (nmode);
3250 gcc_assert (val == val_so_far);
3251
3252 return accum;
3253 }
3254
3255 /* Perform a multiplication and return an rtx for the result.
3256 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3257 TARGET is a suggestion for where to store the result (an rtx).
3258
3259 We check specially for a constant integer as OP1.
3260 If you want this check for OP0 as well, then before calling
3261 you should swap the two operands if OP0 would be constant. */
3262
3263 rtx
expand_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)3264 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3265 int unsignedp)
3266 {
3267 enum mult_variant variant;
3268 struct algorithm algorithm;
3269 rtx scalar_op1;
3270 int max_cost;
3271 bool speed = optimize_insn_for_speed_p ();
3272 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3273
3274 if (CONSTANT_P (op0))
3275 std::swap (op0, op1);
3276
3277 /* For vectors, there are several simplifications that can be made if
3278 all elements of the vector constant are identical. */
3279 scalar_op1 = unwrap_const_vec_duplicate (op1);
3280
3281 if (INTEGRAL_MODE_P (mode))
3282 {
3283 rtx fake_reg;
3284 HOST_WIDE_INT coeff;
3285 bool is_neg;
3286 int mode_bitsize;
3287
3288 if (op1 == CONST0_RTX (mode))
3289 return op1;
3290 if (op1 == CONST1_RTX (mode))
3291 return op0;
3292 if (op1 == CONSTM1_RTX (mode))
3293 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3294 op0, target, 0);
3295
3296 if (do_trapv)
3297 goto skip_synth;
3298
3299 /* If mode is integer vector mode, check if the backend supports
3300 vector lshift (by scalar or vector) at all. If not, we can't use
3301 synthetized multiply. */
3302 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3303 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3304 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3305 goto skip_synth;
3306
3307 /* These are the operations that are potentially turned into
3308 a sequence of shifts and additions. */
3309 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3310
3311 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3312 less than or equal in size to `unsigned int' this doesn't matter.
3313 If the mode is larger than `unsigned int', then synth_mult works
3314 only if the constant value exactly fits in an `unsigned int' without
3315 any truncation. This means that multiplying by negative values does
3316 not work; results are off by 2^32 on a 32 bit machine. */
3317 if (CONST_INT_P (scalar_op1))
3318 {
3319 coeff = INTVAL (scalar_op1);
3320 is_neg = coeff < 0;
3321 }
3322 #if TARGET_SUPPORTS_WIDE_INT
3323 else if (CONST_WIDE_INT_P (scalar_op1))
3324 #else
3325 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3326 #endif
3327 {
3328 int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3329 /* Perfect power of 2 (other than 1, which is handled above). */
3330 if (shift > 0)
3331 return expand_shift (LSHIFT_EXPR, mode, op0,
3332 shift, target, unsignedp);
3333 else
3334 goto skip_synth;
3335 }
3336 else
3337 goto skip_synth;
3338
3339 /* We used to test optimize here, on the grounds that it's better to
3340 produce a smaller program when -O is not used. But this causes
3341 such a terrible slowdown sometimes that it seems better to always
3342 use synth_mult. */
3343
3344 /* Special case powers of two. */
3345 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3346 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3347 return expand_shift (LSHIFT_EXPR, mode, op0,
3348 floor_log2 (coeff), target, unsignedp);
3349
3350 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3351
3352 /* Attempt to handle multiplication of DImode values by negative
3353 coefficients, by performing the multiplication by a positive
3354 multiplier and then inverting the result. */
3355 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3356 {
3357 /* Its safe to use -coeff even for INT_MIN, as the
3358 result is interpreted as an unsigned coefficient.
3359 Exclude cost of op0 from max_cost to match the cost
3360 calculation of the synth_mult. */
3361 coeff = -(unsigned HOST_WIDE_INT) coeff;
3362 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3363 mode, speed)
3364 - neg_cost (speed, mode));
3365 if (max_cost <= 0)
3366 goto skip_synth;
3367
3368 /* Special case powers of two. */
3369 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3370 {
3371 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3372 floor_log2 (coeff), target, unsignedp);
3373 return expand_unop (mode, neg_optab, temp, target, 0);
3374 }
3375
3376 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3377 max_cost))
3378 {
3379 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3380 &algorithm, variant);
3381 return expand_unop (mode, neg_optab, temp, target, 0);
3382 }
3383 goto skip_synth;
3384 }
3385
3386 /* Exclude cost of op0 from max_cost to match the cost
3387 calculation of the synth_mult. */
3388 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3389 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3390 return expand_mult_const (mode, op0, coeff, target,
3391 &algorithm, variant);
3392 }
3393 skip_synth:
3394
3395 /* Expand x*2.0 as x+x. */
3396 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3397 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3398 {
3399 op0 = force_reg (GET_MODE (op0), op0);
3400 return expand_binop (mode, add_optab, op0, op0,
3401 target, unsignedp, OPTAB_LIB_WIDEN);
3402 }
3403
3404 /* This used to use umul_optab if unsigned, but for non-widening multiply
3405 there is no difference between signed and unsigned. */
3406 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3407 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3408 gcc_assert (op0);
3409 return op0;
3410 }
3411
3412 /* Return a cost estimate for multiplying a register by the given
3413 COEFFicient in the given MODE and SPEED. */
3414
3415 int
mult_by_coeff_cost(HOST_WIDE_INT coeff,machine_mode mode,bool speed)3416 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3417 {
3418 int max_cost;
3419 struct algorithm algorithm;
3420 enum mult_variant variant;
3421
3422 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3423 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3424 mode, speed);
3425 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3426 return algorithm.cost.cost;
3427 else
3428 return max_cost;
3429 }
3430
3431 /* Perform a widening multiplication and return an rtx for the result.
3432 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3433 TARGET is a suggestion for where to store the result (an rtx).
3434 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3435 or smul_widen_optab.
3436
3437 We check specially for a constant integer as OP1, comparing the
3438 cost of a widening multiply against the cost of a sequence of shifts
3439 and adds. */
3440
3441 rtx
expand_widening_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,optab this_optab)3442 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3443 int unsignedp, optab this_optab)
3444 {
3445 bool speed = optimize_insn_for_speed_p ();
3446 rtx cop1;
3447
3448 if (CONST_INT_P (op1)
3449 && GET_MODE (op0) != VOIDmode
3450 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3451 this_optab == umul_widen_optab))
3452 && CONST_INT_P (cop1)
3453 && (INTVAL (cop1) >= 0
3454 || HWI_COMPUTABLE_MODE_P (mode)))
3455 {
3456 HOST_WIDE_INT coeff = INTVAL (cop1);
3457 int max_cost;
3458 enum mult_variant variant;
3459 struct algorithm algorithm;
3460
3461 if (coeff == 0)
3462 return CONST0_RTX (mode);
3463
3464 /* Special case powers of two. */
3465 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3466 {
3467 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3468 return expand_shift (LSHIFT_EXPR, mode, op0,
3469 floor_log2 (coeff), target, unsignedp);
3470 }
3471
3472 /* Exclude cost of op0 from max_cost to match the cost
3473 calculation of the synth_mult. */
3474 max_cost = mul_widen_cost (speed, mode);
3475 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3476 max_cost))
3477 {
3478 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3479 return expand_mult_const (mode, op0, coeff, target,
3480 &algorithm, variant);
3481 }
3482 }
3483 return expand_binop (mode, this_optab, op0, op1, target,
3484 unsignedp, OPTAB_LIB_WIDEN);
3485 }
3486
3487 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3488 replace division by D, and put the least significant N bits of the result
3489 in *MULTIPLIER_PTR and return the most significant bit.
3490
3491 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3492 needed precision is in PRECISION (should be <= N).
3493
3494 PRECISION should be as small as possible so this function can choose
3495 multiplier more freely.
3496
3497 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3498 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3499
3500 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3501 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3502
3503 unsigned HOST_WIDE_INT
choose_multiplier(unsigned HOST_WIDE_INT d,int n,int precision,unsigned HOST_WIDE_INT * multiplier_ptr,int * post_shift_ptr,int * lgup_ptr)3504 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3505 unsigned HOST_WIDE_INT *multiplier_ptr,
3506 int *post_shift_ptr, int *lgup_ptr)
3507 {
3508 int lgup, post_shift;
3509 int pow, pow2;
3510
3511 /* lgup = ceil(log2(divisor)); */
3512 lgup = ceil_log2 (d);
3513
3514 gcc_assert (lgup <= n);
3515
3516 pow = n + lgup;
3517 pow2 = n + lgup - precision;
3518
3519 /* mlow = 2^(N + lgup)/d */
3520 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3521 wide_int mlow = wi::udiv_trunc (val, d);
3522
3523 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3524 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3525 wide_int mhigh = wi::udiv_trunc (val, d);
3526
3527 /* If precision == N, then mlow, mhigh exceed 2^N
3528 (but they do not exceed 2^(N+1)). */
3529
3530 /* Reduce to lowest terms. */
3531 for (post_shift = lgup; post_shift > 0; post_shift--)
3532 {
3533 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3534 HOST_BITS_PER_WIDE_INT);
3535 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3536 HOST_BITS_PER_WIDE_INT);
3537 if (ml_lo >= mh_lo)
3538 break;
3539
3540 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3541 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3542 }
3543
3544 *post_shift_ptr = post_shift;
3545 *lgup_ptr = lgup;
3546 if (n < HOST_BITS_PER_WIDE_INT)
3547 {
3548 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3549 *multiplier_ptr = mhigh.to_uhwi () & mask;
3550 return mhigh.to_uhwi () >= mask;
3551 }
3552 else
3553 {
3554 *multiplier_ptr = mhigh.to_uhwi ();
3555 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3556 }
3557 }
3558
3559 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3560 congruent to 1 (mod 2**N). */
3561
3562 static unsigned HOST_WIDE_INT
invert_mod2n(unsigned HOST_WIDE_INT x,int n)3563 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3564 {
3565 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3566
3567 /* The algorithm notes that the choice y = x satisfies
3568 x*y == 1 mod 2^3, since x is assumed odd.
3569 Each iteration doubles the number of bits of significance in y. */
3570
3571 unsigned HOST_WIDE_INT mask;
3572 unsigned HOST_WIDE_INT y = x;
3573 int nbit = 3;
3574
3575 mask = (n == HOST_BITS_PER_WIDE_INT
3576 ? ~(unsigned HOST_WIDE_INT) 0
3577 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3578
3579 while (nbit < n)
3580 {
3581 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3582 nbit *= 2;
3583 }
3584 return y;
3585 }
3586
3587 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3588 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3589 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3590 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3591 become signed.
3592
3593 The result is put in TARGET if that is convenient.
3594
3595 MODE is the mode of operation. */
3596
3597 rtx
expand_mult_highpart_adjust(machine_mode mode,rtx adj_operand,rtx op0,rtx op1,rtx target,int unsignedp)3598 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3599 rtx op1, rtx target, int unsignedp)
3600 {
3601 rtx tem;
3602 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3603
3604 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3605 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3606 tem = expand_and (mode, tem, op1, NULL_RTX);
3607 adj_operand
3608 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3609 adj_operand);
3610
3611 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3612 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3613 tem = expand_and (mode, tem, op0, NULL_RTX);
3614 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3615 target);
3616
3617 return target;
3618 }
3619
3620 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3621
3622 static rtx
extract_high_half(machine_mode mode,rtx op)3623 extract_high_half (machine_mode mode, rtx op)
3624 {
3625 machine_mode wider_mode;
3626
3627 if (mode == word_mode)
3628 return gen_highpart (mode, op);
3629
3630 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3631
3632 wider_mode = GET_MODE_WIDER_MODE (mode);
3633 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3634 GET_MODE_BITSIZE (mode), 0, 1);
3635 return convert_modes (mode, wider_mode, op, 0);
3636 }
3637
3638 /* Like expmed_mult_highpart, but only consider using a multiplication
3639 optab. OP1 is an rtx for the constant operand. */
3640
3641 static rtx
expmed_mult_highpart_optab(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3642 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3643 rtx target, int unsignedp, int max_cost)
3644 {
3645 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3646 machine_mode wider_mode;
3647 optab moptab;
3648 rtx tem;
3649 int size;
3650 bool speed = optimize_insn_for_speed_p ();
3651
3652 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3653
3654 wider_mode = GET_MODE_WIDER_MODE (mode);
3655 size = GET_MODE_BITSIZE (mode);
3656
3657 /* Firstly, try using a multiplication insn that only generates the needed
3658 high part of the product, and in the sign flavor of unsignedp. */
3659 if (mul_highpart_cost (speed, mode) < max_cost)
3660 {
3661 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3662 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3663 unsignedp, OPTAB_DIRECT);
3664 if (tem)
3665 return tem;
3666 }
3667
3668 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3669 Need to adjust the result after the multiplication. */
3670 if (size - 1 < BITS_PER_WORD
3671 && (mul_highpart_cost (speed, mode)
3672 + 2 * shift_cost (speed, mode, size-1)
3673 + 4 * add_cost (speed, mode) < max_cost))
3674 {
3675 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3676 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3677 unsignedp, OPTAB_DIRECT);
3678 if (tem)
3679 /* We used the wrong signedness. Adjust the result. */
3680 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3681 tem, unsignedp);
3682 }
3683
3684 /* Try widening multiplication. */
3685 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3686 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3687 && mul_widen_cost (speed, wider_mode) < max_cost)
3688 {
3689 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3690 unsignedp, OPTAB_WIDEN);
3691 if (tem)
3692 return extract_high_half (mode, tem);
3693 }
3694
3695 /* Try widening the mode and perform a non-widening multiplication. */
3696 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3697 && size - 1 < BITS_PER_WORD
3698 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3699 < max_cost))
3700 {
3701 rtx_insn *insns;
3702 rtx wop0, wop1;
3703
3704 /* We need to widen the operands, for example to ensure the
3705 constant multiplier is correctly sign or zero extended.
3706 Use a sequence to clean-up any instructions emitted by
3707 the conversions if things don't work out. */
3708 start_sequence ();
3709 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3710 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3711 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3712 unsignedp, OPTAB_WIDEN);
3713 insns = get_insns ();
3714 end_sequence ();
3715
3716 if (tem)
3717 {
3718 emit_insn (insns);
3719 return extract_high_half (mode, tem);
3720 }
3721 }
3722
3723 /* Try widening multiplication of opposite signedness, and adjust. */
3724 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3725 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3726 && size - 1 < BITS_PER_WORD
3727 && (mul_widen_cost (speed, wider_mode)
3728 + 2 * shift_cost (speed, mode, size-1)
3729 + 4 * add_cost (speed, mode) < max_cost))
3730 {
3731 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3732 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3733 if (tem != 0)
3734 {
3735 tem = extract_high_half (mode, tem);
3736 /* We used the wrong signedness. Adjust the result. */
3737 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3738 target, unsignedp);
3739 }
3740 }
3741
3742 return 0;
3743 }
3744
3745 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3746 putting the high half of the result in TARGET if that is convenient,
3747 and return where the result is. If the operation can not be performed,
3748 0 is returned.
3749
3750 MODE is the mode of operation and result.
3751
3752 UNSIGNEDP nonzero means unsigned multiply.
3753
3754 MAX_COST is the total allowed cost for the expanded RTL. */
3755
3756 static rtx
expmed_mult_highpart(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3757 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3758 rtx target, int unsignedp, int max_cost)
3759 {
3760 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3761 unsigned HOST_WIDE_INT cnst1;
3762 int extra_cost;
3763 bool sign_adjust = false;
3764 enum mult_variant variant;
3765 struct algorithm alg;
3766 rtx tem;
3767 bool speed = optimize_insn_for_speed_p ();
3768
3769 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3770 /* We can't support modes wider than HOST_BITS_PER_INT. */
3771 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3772
3773 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3774
3775 /* We can't optimize modes wider than BITS_PER_WORD.
3776 ??? We might be able to perform double-word arithmetic if
3777 mode == word_mode, however all the cost calculations in
3778 synth_mult etc. assume single-word operations. */
3779 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3780 return expmed_mult_highpart_optab (mode, op0, op1, target,
3781 unsignedp, max_cost);
3782
3783 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3784
3785 /* Check whether we try to multiply by a negative constant. */
3786 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3787 {
3788 sign_adjust = true;
3789 extra_cost += add_cost (speed, mode);
3790 }
3791
3792 /* See whether shift/add multiplication is cheap enough. */
3793 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3794 max_cost - extra_cost))
3795 {
3796 /* See whether the specialized multiplication optabs are
3797 cheaper than the shift/add version. */
3798 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3799 alg.cost.cost + extra_cost);
3800 if (tem)
3801 return tem;
3802
3803 tem = convert_to_mode (wider_mode, op0, unsignedp);
3804 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3805 tem = extract_high_half (mode, tem);
3806
3807 /* Adjust result for signedness. */
3808 if (sign_adjust)
3809 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3810
3811 return tem;
3812 }
3813 return expmed_mult_highpart_optab (mode, op0, op1, target,
3814 unsignedp, max_cost);
3815 }
3816
3817
3818 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3819
3820 static rtx
expand_smod_pow2(machine_mode mode,rtx op0,HOST_WIDE_INT d)3821 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3822 {
3823 rtx result, temp, shift;
3824 rtx_code_label *label;
3825 int logd;
3826 int prec = GET_MODE_PRECISION (mode);
3827
3828 logd = floor_log2 (d);
3829 result = gen_reg_rtx (mode);
3830
3831 /* Avoid conditional branches when they're expensive. */
3832 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3833 && optimize_insn_for_speed_p ())
3834 {
3835 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3836 mode, 0, -1);
3837 if (signmask)
3838 {
3839 HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3840 signmask = force_reg (mode, signmask);
3841 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3842
3843 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3844 which instruction sequence to use. If logical right shifts
3845 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3846 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3847
3848 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3849 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3850 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3851 > COSTS_N_INSNS (2)))
3852 {
3853 temp = expand_binop (mode, xor_optab, op0, signmask,
3854 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3855 temp = expand_binop (mode, sub_optab, temp, signmask,
3856 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3857 temp = expand_binop (mode, and_optab, temp,
3858 gen_int_mode (masklow, mode),
3859 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3860 temp = expand_binop (mode, xor_optab, temp, signmask,
3861 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3862 temp = expand_binop (mode, sub_optab, temp, signmask,
3863 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3864 }
3865 else
3866 {
3867 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3868 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3869 signmask = force_reg (mode, signmask);
3870
3871 temp = expand_binop (mode, add_optab, op0, signmask,
3872 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3873 temp = expand_binop (mode, and_optab, temp,
3874 gen_int_mode (masklow, mode),
3875 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3876 temp = expand_binop (mode, sub_optab, temp, signmask,
3877 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3878 }
3879 return temp;
3880 }
3881 }
3882
3883 /* Mask contains the mode's signbit and the significant bits of the
3884 modulus. By including the signbit in the operation, many targets
3885 can avoid an explicit compare operation in the following comparison
3886 against zero. */
3887 wide_int mask = wi::mask (logd, false, prec);
3888 mask = wi::set_bit (mask, prec - 1);
3889
3890 temp = expand_binop (mode, and_optab, op0,
3891 immed_wide_int_const (mask, mode),
3892 result, 1, OPTAB_LIB_WIDEN);
3893 if (temp != result)
3894 emit_move_insn (result, temp);
3895
3896 label = gen_label_rtx ();
3897 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3898
3899 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3900 0, OPTAB_LIB_WIDEN);
3901
3902 mask = wi::mask (logd, true, prec);
3903 temp = expand_binop (mode, ior_optab, temp,
3904 immed_wide_int_const (mask, mode),
3905 result, 1, OPTAB_LIB_WIDEN);
3906 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3907 0, OPTAB_LIB_WIDEN);
3908 if (temp != result)
3909 emit_move_insn (result, temp);
3910 emit_label (label);
3911 return result;
3912 }
3913
3914 /* Expand signed division of OP0 by a power of two D in mode MODE.
3915 This routine is only called for positive values of D. */
3916
3917 static rtx
expand_sdiv_pow2(machine_mode mode,rtx op0,HOST_WIDE_INT d)3918 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3919 {
3920 rtx temp;
3921 rtx_code_label *label;
3922 int logd;
3923
3924 logd = floor_log2 (d);
3925
3926 if (d == 2
3927 && BRANCH_COST (optimize_insn_for_speed_p (),
3928 false) >= 1)
3929 {
3930 temp = gen_reg_rtx (mode);
3931 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3932 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3933 0, OPTAB_LIB_WIDEN);
3934 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3935 }
3936
3937 if (HAVE_conditional_move
3938 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3939 {
3940 rtx temp2;
3941
3942 start_sequence ();
3943 temp2 = copy_to_mode_reg (mode, op0);
3944 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3945 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3946 temp = force_reg (mode, temp);
3947
3948 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3949 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3950 mode, temp, temp2, mode, 0);
3951 if (temp2)
3952 {
3953 rtx_insn *seq = get_insns ();
3954 end_sequence ();
3955 emit_insn (seq);
3956 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3957 }
3958 end_sequence ();
3959 }
3960
3961 if (BRANCH_COST (optimize_insn_for_speed_p (),
3962 false) >= 2)
3963 {
3964 int ushift = GET_MODE_BITSIZE (mode) - logd;
3965
3966 temp = gen_reg_rtx (mode);
3967 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3968 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3969 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3970 > COSTS_N_INSNS (1))
3971 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3972 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3973 else
3974 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3975 ushift, NULL_RTX, 1);
3976 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3977 0, OPTAB_LIB_WIDEN);
3978 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3979 }
3980
3981 label = gen_label_rtx ();
3982 temp = copy_to_mode_reg (mode, op0);
3983 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3984 expand_inc (temp, gen_int_mode (d - 1, mode));
3985 emit_label (label);
3986 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3987 }
3988
3989 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3990 if that is convenient, and returning where the result is.
3991 You may request either the quotient or the remainder as the result;
3992 specify REM_FLAG nonzero to get the remainder.
3993
3994 CODE is the expression code for which kind of division this is;
3995 it controls how rounding is done. MODE is the machine mode to use.
3996 UNSIGNEDP nonzero means do unsigned division. */
3997
3998 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3999 and then correct it by or'ing in missing high bits
4000 if result of ANDI is nonzero.
4001 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4002 This could optimize to a bfexts instruction.
4003 But C doesn't use these operations, so their optimizations are
4004 left for later. */
4005 /* ??? For modulo, we don't actually need the highpart of the first product,
4006 the low part will do nicely. And for small divisors, the second multiply
4007 can also be a low-part only multiply or even be completely left out.
4008 E.g. to calculate the remainder of a division by 3 with a 32 bit
4009 multiply, multiply with 0x55555556 and extract the upper two bits;
4010 the result is exact for inputs up to 0x1fffffff.
4011 The input range can be reduced by using cross-sum rules.
4012 For odd divisors >= 3, the following table gives right shift counts
4013 so that if a number is shifted by an integer multiple of the given
4014 amount, the remainder stays the same:
4015 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4016 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4017 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4018 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4019 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4020
4021 Cross-sum rules for even numbers can be derived by leaving as many bits
4022 to the right alone as the divisor has zeros to the right.
4023 E.g. if x is an unsigned 32 bit number:
4024 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4025 */
4026
4027 rtx
expand_divmod(int rem_flag,enum tree_code code,machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)4028 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4029 rtx op0, rtx op1, rtx target, int unsignedp)
4030 {
4031 machine_mode compute_mode;
4032 rtx tquotient;
4033 rtx quotient = 0, remainder = 0;
4034 rtx_insn *last;
4035 int size;
4036 rtx_insn *insn;
4037 optab optab1, optab2;
4038 int op1_is_constant, op1_is_pow2 = 0;
4039 int max_cost, extra_cost;
4040 static HOST_WIDE_INT last_div_const = 0;
4041 bool speed = optimize_insn_for_speed_p ();
4042
4043 op1_is_constant = CONST_INT_P (op1);
4044 if (op1_is_constant)
4045 {
4046 wide_int ext_op1 = rtx_mode_t (op1, mode);
4047 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4048 || (! unsignedp
4049 && wi::popcount (wi::neg (ext_op1)) == 1));
4050 }
4051
4052 /*
4053 This is the structure of expand_divmod:
4054
4055 First comes code to fix up the operands so we can perform the operations
4056 correctly and efficiently.
4057
4058 Second comes a switch statement with code specific for each rounding mode.
4059 For some special operands this code emits all RTL for the desired
4060 operation, for other cases, it generates only a quotient and stores it in
4061 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4062 to indicate that it has not done anything.
4063
4064 Last comes code that finishes the operation. If QUOTIENT is set and
4065 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4066 QUOTIENT is not set, it is computed using trunc rounding.
4067
4068 We try to generate special code for division and remainder when OP1 is a
4069 constant. If |OP1| = 2**n we can use shifts and some other fast
4070 operations. For other values of OP1, we compute a carefully selected
4071 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4072 by m.
4073
4074 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4075 half of the product. Different strategies for generating the product are
4076 implemented in expmed_mult_highpart.
4077
4078 If what we actually want is the remainder, we generate that by another
4079 by-constant multiplication and a subtraction. */
4080
4081 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4082 code below will malfunction if we are, so check here and handle
4083 the special case if so. */
4084 if (op1 == const1_rtx)
4085 return rem_flag ? const0_rtx : op0;
4086
4087 /* When dividing by -1, we could get an overflow.
4088 negv_optab can handle overflows. */
4089 if (! unsignedp && op1 == constm1_rtx)
4090 {
4091 if (rem_flag)
4092 return const0_rtx;
4093 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4094 ? negv_optab : neg_optab, op0, target, 0);
4095 }
4096
4097 if (target
4098 /* Don't use the function value register as a target
4099 since we have to read it as well as write it,
4100 and function-inlining gets confused by this. */
4101 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4102 /* Don't clobber an operand while doing a multi-step calculation. */
4103 || ((rem_flag || op1_is_constant)
4104 && (reg_mentioned_p (target, op0)
4105 || (MEM_P (op0) && MEM_P (target))))
4106 || reg_mentioned_p (target, op1)
4107 || (MEM_P (op1) && MEM_P (target))))
4108 target = 0;
4109
4110 /* Get the mode in which to perform this computation. Normally it will
4111 be MODE, but sometimes we can't do the desired operation in MODE.
4112 If so, pick a wider mode in which we can do the operation. Convert
4113 to that mode at the start to avoid repeated conversions.
4114
4115 First see what operations we need. These depend on the expression
4116 we are evaluating. (We assume that divxx3 insns exist under the
4117 same conditions that modxx3 insns and that these insns don't normally
4118 fail. If these assumptions are not correct, we may generate less
4119 efficient code in some cases.)
4120
4121 Then see if we find a mode in which we can open-code that operation
4122 (either a division, modulus, or shift). Finally, check for the smallest
4123 mode for which we can do the operation with a library call. */
4124
4125 /* We might want to refine this now that we have division-by-constant
4126 optimization. Since expmed_mult_highpart tries so many variants, it is
4127 not straightforward to generalize this. Maybe we should make an array
4128 of possible modes in init_expmed? Save this for GCC 2.7. */
4129
4130 optab1 = (op1_is_pow2
4131 ? (unsignedp ? lshr_optab : ashr_optab)
4132 : (unsignedp ? udiv_optab : sdiv_optab));
4133 optab2 = (op1_is_pow2 ? optab1
4134 : (unsignedp ? udivmod_optab : sdivmod_optab));
4135
4136 for (compute_mode = mode; compute_mode != VOIDmode;
4137 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4138 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4139 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4140 break;
4141
4142 if (compute_mode == VOIDmode)
4143 for (compute_mode = mode; compute_mode != VOIDmode;
4144 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4145 if (optab_libfunc (optab1, compute_mode)
4146 || optab_libfunc (optab2, compute_mode))
4147 break;
4148
4149 /* If we still couldn't find a mode, use MODE, but expand_binop will
4150 probably die. */
4151 if (compute_mode == VOIDmode)
4152 compute_mode = mode;
4153
4154 if (target && GET_MODE (target) == compute_mode)
4155 tquotient = target;
4156 else
4157 tquotient = gen_reg_rtx (compute_mode);
4158
4159 size = GET_MODE_BITSIZE (compute_mode);
4160 #if 0
4161 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4162 (mode), and thereby get better code when OP1 is a constant. Do that
4163 later. It will require going over all usages of SIZE below. */
4164 size = GET_MODE_BITSIZE (mode);
4165 #endif
4166
4167 /* Only deduct something for a REM if the last divide done was
4168 for a different constant. Then set the constant of the last
4169 divide. */
4170 max_cost = (unsignedp
4171 ? udiv_cost (speed, compute_mode)
4172 : sdiv_cost (speed, compute_mode));
4173 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4174 && INTVAL (op1) == last_div_const))
4175 max_cost -= (mul_cost (speed, compute_mode)
4176 + add_cost (speed, compute_mode));
4177
4178 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4179
4180 /* Now convert to the best mode to use. */
4181 if (compute_mode != mode)
4182 {
4183 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4184 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4185
4186 /* convert_modes may have placed op1 into a register, so we
4187 must recompute the following. */
4188 op1_is_constant = CONST_INT_P (op1);
4189 if (op1_is_constant)
4190 {
4191 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4192 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4193 || (! unsignedp
4194 && wi::popcount (wi::neg (ext_op1)) == 1));
4195 }
4196 else
4197 op1_is_pow2 = 0;
4198 }
4199
4200 /* If one of the operands is a volatile MEM, copy it into a register. */
4201
4202 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4203 op0 = force_reg (compute_mode, op0);
4204 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4205 op1 = force_reg (compute_mode, op1);
4206
4207 /* If we need the remainder or if OP1 is constant, we need to
4208 put OP0 in a register in case it has any queued subexpressions. */
4209 if (rem_flag || op1_is_constant)
4210 op0 = force_reg (compute_mode, op0);
4211
4212 last = get_last_insn ();
4213
4214 /* Promote floor rounding to trunc rounding for unsigned operations. */
4215 if (unsignedp)
4216 {
4217 if (code == FLOOR_DIV_EXPR)
4218 code = TRUNC_DIV_EXPR;
4219 if (code == FLOOR_MOD_EXPR)
4220 code = TRUNC_MOD_EXPR;
4221 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4222 code = TRUNC_DIV_EXPR;
4223 }
4224
4225 if (op1 != const0_rtx)
4226 switch (code)
4227 {
4228 case TRUNC_MOD_EXPR:
4229 case TRUNC_DIV_EXPR:
4230 if (op1_is_constant)
4231 {
4232 if (unsignedp)
4233 {
4234 unsigned HOST_WIDE_INT mh, ml;
4235 int pre_shift, post_shift;
4236 int dummy;
4237 wide_int wd = rtx_mode_t (op1, compute_mode);
4238 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4239
4240 if (wi::popcount (wd) == 1)
4241 {
4242 pre_shift = floor_log2 (d);
4243 if (rem_flag)
4244 {
4245 unsigned HOST_WIDE_INT mask
4246 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4247 remainder
4248 = expand_binop (compute_mode, and_optab, op0,
4249 gen_int_mode (mask, compute_mode),
4250 remainder, 1,
4251 OPTAB_LIB_WIDEN);
4252 if (remainder)
4253 return gen_lowpart (mode, remainder);
4254 }
4255 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4256 pre_shift, tquotient, 1);
4257 }
4258 else if (size <= HOST_BITS_PER_WIDE_INT)
4259 {
4260 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4261 {
4262 /* Most significant bit of divisor is set; emit an scc
4263 insn. */
4264 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4265 compute_mode, 1, 1);
4266 }
4267 else
4268 {
4269 /* Find a suitable multiplier and right shift count
4270 instead of multiplying with D. */
4271
4272 mh = choose_multiplier (d, size, size,
4273 &ml, &post_shift, &dummy);
4274
4275 /* If the suggested multiplier is more than SIZE bits,
4276 we can do better for even divisors, using an
4277 initial right shift. */
4278 if (mh != 0 && (d & 1) == 0)
4279 {
4280 pre_shift = floor_log2 (d & -d);
4281 mh = choose_multiplier (d >> pre_shift, size,
4282 size - pre_shift,
4283 &ml, &post_shift, &dummy);
4284 gcc_assert (!mh);
4285 }
4286 else
4287 pre_shift = 0;
4288
4289 if (mh != 0)
4290 {
4291 rtx t1, t2, t3, t4;
4292
4293 if (post_shift - 1 >= BITS_PER_WORD)
4294 goto fail1;
4295
4296 extra_cost
4297 = (shift_cost (speed, compute_mode, post_shift - 1)
4298 + shift_cost (speed, compute_mode, 1)
4299 + 2 * add_cost (speed, compute_mode));
4300 t1 = expmed_mult_highpart
4301 (compute_mode, op0,
4302 gen_int_mode (ml, compute_mode),
4303 NULL_RTX, 1, max_cost - extra_cost);
4304 if (t1 == 0)
4305 goto fail1;
4306 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4307 op0, t1),
4308 NULL_RTX);
4309 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4310 t2, 1, NULL_RTX, 1);
4311 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4312 t1, t3),
4313 NULL_RTX);
4314 quotient = expand_shift
4315 (RSHIFT_EXPR, compute_mode, t4,
4316 post_shift - 1, tquotient, 1);
4317 }
4318 else
4319 {
4320 rtx t1, t2;
4321
4322 if (pre_shift >= BITS_PER_WORD
4323 || post_shift >= BITS_PER_WORD)
4324 goto fail1;
4325
4326 t1 = expand_shift
4327 (RSHIFT_EXPR, compute_mode, op0,
4328 pre_shift, NULL_RTX, 1);
4329 extra_cost
4330 = (shift_cost (speed, compute_mode, pre_shift)
4331 + shift_cost (speed, compute_mode, post_shift));
4332 t2 = expmed_mult_highpart
4333 (compute_mode, t1,
4334 gen_int_mode (ml, compute_mode),
4335 NULL_RTX, 1, max_cost - extra_cost);
4336 if (t2 == 0)
4337 goto fail1;
4338 quotient = expand_shift
4339 (RSHIFT_EXPR, compute_mode, t2,
4340 post_shift, tquotient, 1);
4341 }
4342 }
4343 }
4344 else /* Too wide mode to use tricky code */
4345 break;
4346
4347 insn = get_last_insn ();
4348 if (insn != last)
4349 set_dst_reg_note (insn, REG_EQUAL,
4350 gen_rtx_UDIV (compute_mode, op0, op1),
4351 quotient);
4352 }
4353 else /* TRUNC_DIV, signed */
4354 {
4355 unsigned HOST_WIDE_INT ml;
4356 int lgup, post_shift;
4357 rtx mlr;
4358 HOST_WIDE_INT d = INTVAL (op1);
4359 unsigned HOST_WIDE_INT abs_d;
4360
4361 /* Since d might be INT_MIN, we have to cast to
4362 unsigned HOST_WIDE_INT before negating to avoid
4363 undefined signed overflow. */
4364 abs_d = (d >= 0
4365 ? (unsigned HOST_WIDE_INT) d
4366 : - (unsigned HOST_WIDE_INT) d);
4367
4368 /* n rem d = n rem -d */
4369 if (rem_flag && d < 0)
4370 {
4371 d = abs_d;
4372 op1 = gen_int_mode (abs_d, compute_mode);
4373 }
4374
4375 if (d == 1)
4376 quotient = op0;
4377 else if (d == -1)
4378 quotient = expand_unop (compute_mode, neg_optab, op0,
4379 tquotient, 0);
4380 else if (size <= HOST_BITS_PER_WIDE_INT
4381 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4382 {
4383 /* This case is not handled correctly below. */
4384 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4385 compute_mode, 1, 1);
4386 if (quotient == 0)
4387 goto fail1;
4388 }
4389 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4390 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4391 && (rem_flag
4392 ? smod_pow2_cheap (speed, compute_mode)
4393 : sdiv_pow2_cheap (speed, compute_mode))
4394 /* We assume that cheap metric is true if the
4395 optab has an expander for this mode. */
4396 && ((optab_handler ((rem_flag ? smod_optab
4397 : sdiv_optab),
4398 compute_mode)
4399 != CODE_FOR_nothing)
4400 || (optab_handler (sdivmod_optab,
4401 compute_mode)
4402 != CODE_FOR_nothing)))
4403 ;
4404 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4405 && (size <= HOST_BITS_PER_WIDE_INT
4406 || abs_d != (unsigned HOST_WIDE_INT) d))
4407 {
4408 if (rem_flag)
4409 {
4410 remainder = expand_smod_pow2 (compute_mode, op0, d);
4411 if (remainder)
4412 return gen_lowpart (mode, remainder);
4413 }
4414
4415 if (sdiv_pow2_cheap (speed, compute_mode)
4416 && ((optab_handler (sdiv_optab, compute_mode)
4417 != CODE_FOR_nothing)
4418 || (optab_handler (sdivmod_optab, compute_mode)
4419 != CODE_FOR_nothing)))
4420 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4421 compute_mode, op0,
4422 gen_int_mode (abs_d,
4423 compute_mode),
4424 NULL_RTX, 0);
4425 else
4426 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4427
4428 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4429 negate the quotient. */
4430 if (d < 0)
4431 {
4432 insn = get_last_insn ();
4433 if (insn != last
4434 && abs_d < ((unsigned HOST_WIDE_INT) 1
4435 << (HOST_BITS_PER_WIDE_INT - 1)))
4436 set_dst_reg_note (insn, REG_EQUAL,
4437 gen_rtx_DIV (compute_mode, op0,
4438 gen_int_mode
4439 (abs_d,
4440 compute_mode)),
4441 quotient);
4442
4443 quotient = expand_unop (compute_mode, neg_optab,
4444 quotient, quotient, 0);
4445 }
4446 }
4447 else if (size <= HOST_BITS_PER_WIDE_INT)
4448 {
4449 choose_multiplier (abs_d, size, size - 1,
4450 &ml, &post_shift, &lgup);
4451 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4452 {
4453 rtx t1, t2, t3;
4454
4455 if (post_shift >= BITS_PER_WORD
4456 || size - 1 >= BITS_PER_WORD)
4457 goto fail1;
4458
4459 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4460 + shift_cost (speed, compute_mode, size - 1)
4461 + add_cost (speed, compute_mode));
4462 t1 = expmed_mult_highpart
4463 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4464 NULL_RTX, 0, max_cost - extra_cost);
4465 if (t1 == 0)
4466 goto fail1;
4467 t2 = expand_shift
4468 (RSHIFT_EXPR, compute_mode, t1,
4469 post_shift, NULL_RTX, 0);
4470 t3 = expand_shift
4471 (RSHIFT_EXPR, compute_mode, op0,
4472 size - 1, NULL_RTX, 0);
4473 if (d < 0)
4474 quotient
4475 = force_operand (gen_rtx_MINUS (compute_mode,
4476 t3, t2),
4477 tquotient);
4478 else
4479 quotient
4480 = force_operand (gen_rtx_MINUS (compute_mode,
4481 t2, t3),
4482 tquotient);
4483 }
4484 else
4485 {
4486 rtx t1, t2, t3, t4;
4487
4488 if (post_shift >= BITS_PER_WORD
4489 || size - 1 >= BITS_PER_WORD)
4490 goto fail1;
4491
4492 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4493 mlr = gen_int_mode (ml, compute_mode);
4494 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4495 + shift_cost (speed, compute_mode, size - 1)
4496 + 2 * add_cost (speed, compute_mode));
4497 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4498 NULL_RTX, 0,
4499 max_cost - extra_cost);
4500 if (t1 == 0)
4501 goto fail1;
4502 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4503 t1, op0),
4504 NULL_RTX);
4505 t3 = expand_shift
4506 (RSHIFT_EXPR, compute_mode, t2,
4507 post_shift, NULL_RTX, 0);
4508 t4 = expand_shift
4509 (RSHIFT_EXPR, compute_mode, op0,
4510 size - 1, NULL_RTX, 0);
4511 if (d < 0)
4512 quotient
4513 = force_operand (gen_rtx_MINUS (compute_mode,
4514 t4, t3),
4515 tquotient);
4516 else
4517 quotient
4518 = force_operand (gen_rtx_MINUS (compute_mode,
4519 t3, t4),
4520 tquotient);
4521 }
4522 }
4523 else /* Too wide mode to use tricky code */
4524 break;
4525
4526 insn = get_last_insn ();
4527 if (insn != last)
4528 set_dst_reg_note (insn, REG_EQUAL,
4529 gen_rtx_DIV (compute_mode, op0, op1),
4530 quotient);
4531 }
4532 break;
4533 }
4534 fail1:
4535 delete_insns_since (last);
4536 break;
4537
4538 case FLOOR_DIV_EXPR:
4539 case FLOOR_MOD_EXPR:
4540 /* We will come here only for signed operations. */
4541 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4542 {
4543 unsigned HOST_WIDE_INT mh, ml;
4544 int pre_shift, lgup, post_shift;
4545 HOST_WIDE_INT d = INTVAL (op1);
4546
4547 if (d > 0)
4548 {
4549 /* We could just as easily deal with negative constants here,
4550 but it does not seem worth the trouble for GCC 2.6. */
4551 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4552 {
4553 pre_shift = floor_log2 (d);
4554 if (rem_flag)
4555 {
4556 unsigned HOST_WIDE_INT mask
4557 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4558 remainder = expand_binop
4559 (compute_mode, and_optab, op0,
4560 gen_int_mode (mask, compute_mode),
4561 remainder, 0, OPTAB_LIB_WIDEN);
4562 if (remainder)
4563 return gen_lowpart (mode, remainder);
4564 }
4565 quotient = expand_shift
4566 (RSHIFT_EXPR, compute_mode, op0,
4567 pre_shift, tquotient, 0);
4568 }
4569 else
4570 {
4571 rtx t1, t2, t3, t4;
4572
4573 mh = choose_multiplier (d, size, size - 1,
4574 &ml, &post_shift, &lgup);
4575 gcc_assert (!mh);
4576
4577 if (post_shift < BITS_PER_WORD
4578 && size - 1 < BITS_PER_WORD)
4579 {
4580 t1 = expand_shift
4581 (RSHIFT_EXPR, compute_mode, op0,
4582 size - 1, NULL_RTX, 0);
4583 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4584 NULL_RTX, 0, OPTAB_WIDEN);
4585 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4586 + shift_cost (speed, compute_mode, size - 1)
4587 + 2 * add_cost (speed, compute_mode));
4588 t3 = expmed_mult_highpart
4589 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4590 NULL_RTX, 1, max_cost - extra_cost);
4591 if (t3 != 0)
4592 {
4593 t4 = expand_shift
4594 (RSHIFT_EXPR, compute_mode, t3,
4595 post_shift, NULL_RTX, 1);
4596 quotient = expand_binop (compute_mode, xor_optab,
4597 t4, t1, tquotient, 0,
4598 OPTAB_WIDEN);
4599 }
4600 }
4601 }
4602 }
4603 else
4604 {
4605 rtx nsign, t1, t2, t3, t4;
4606 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4607 op0, constm1_rtx), NULL_RTX);
4608 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4609 0, OPTAB_WIDEN);
4610 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4611 size - 1, NULL_RTX, 0);
4612 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4613 NULL_RTX);
4614 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4615 NULL_RTX, 0);
4616 if (t4)
4617 {
4618 rtx t5;
4619 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4620 NULL_RTX, 0);
4621 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4622 t4, t5),
4623 tquotient);
4624 }
4625 }
4626 }
4627
4628 if (quotient != 0)
4629 break;
4630 delete_insns_since (last);
4631
4632 /* Try using an instruction that produces both the quotient and
4633 remainder, using truncation. We can easily compensate the quotient
4634 or remainder to get floor rounding, once we have the remainder.
4635 Notice that we compute also the final remainder value here,
4636 and return the result right away. */
4637 if (target == 0 || GET_MODE (target) != compute_mode)
4638 target = gen_reg_rtx (compute_mode);
4639
4640 if (rem_flag)
4641 {
4642 remainder
4643 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4644 quotient = gen_reg_rtx (compute_mode);
4645 }
4646 else
4647 {
4648 quotient
4649 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4650 remainder = gen_reg_rtx (compute_mode);
4651 }
4652
4653 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4654 quotient, remainder, 0))
4655 {
4656 /* This could be computed with a branch-less sequence.
4657 Save that for later. */
4658 rtx tem;
4659 rtx_code_label *label = gen_label_rtx ();
4660 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4661 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4662 NULL_RTX, 0, OPTAB_WIDEN);
4663 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4664 expand_dec (quotient, const1_rtx);
4665 expand_inc (remainder, op1);
4666 emit_label (label);
4667 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4668 }
4669
4670 /* No luck with division elimination or divmod. Have to do it
4671 by conditionally adjusting op0 *and* the result. */
4672 {
4673 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4674 rtx adjusted_op0;
4675 rtx tem;
4676
4677 quotient = gen_reg_rtx (compute_mode);
4678 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4679 label1 = gen_label_rtx ();
4680 label2 = gen_label_rtx ();
4681 label3 = gen_label_rtx ();
4682 label4 = gen_label_rtx ();
4683 label5 = gen_label_rtx ();
4684 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4685 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4686 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4687 quotient, 0, OPTAB_LIB_WIDEN);
4688 if (tem != quotient)
4689 emit_move_insn (quotient, tem);
4690 emit_jump_insn (targetm.gen_jump (label5));
4691 emit_barrier ();
4692 emit_label (label1);
4693 expand_inc (adjusted_op0, const1_rtx);
4694 emit_jump_insn (targetm.gen_jump (label4));
4695 emit_barrier ();
4696 emit_label (label2);
4697 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4698 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4699 quotient, 0, OPTAB_LIB_WIDEN);
4700 if (tem != quotient)
4701 emit_move_insn (quotient, tem);
4702 emit_jump_insn (targetm.gen_jump (label5));
4703 emit_barrier ();
4704 emit_label (label3);
4705 expand_dec (adjusted_op0, const1_rtx);
4706 emit_label (label4);
4707 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4708 quotient, 0, OPTAB_LIB_WIDEN);
4709 if (tem != quotient)
4710 emit_move_insn (quotient, tem);
4711 expand_dec (quotient, const1_rtx);
4712 emit_label (label5);
4713 }
4714 break;
4715
4716 case CEIL_DIV_EXPR:
4717 case CEIL_MOD_EXPR:
4718 if (unsignedp)
4719 {
4720 if (op1_is_constant
4721 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4722 && (size <= HOST_BITS_PER_WIDE_INT
4723 || INTVAL (op1) >= 0))
4724 {
4725 rtx t1, t2, t3;
4726 unsigned HOST_WIDE_INT d = INTVAL (op1);
4727 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4728 floor_log2 (d), tquotient, 1);
4729 t2 = expand_binop (compute_mode, and_optab, op0,
4730 gen_int_mode (d - 1, compute_mode),
4731 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4732 t3 = gen_reg_rtx (compute_mode);
4733 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4734 compute_mode, 1, 1);
4735 if (t3 == 0)
4736 {
4737 rtx_code_label *lab;
4738 lab = gen_label_rtx ();
4739 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4740 expand_inc (t1, const1_rtx);
4741 emit_label (lab);
4742 quotient = t1;
4743 }
4744 else
4745 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4746 t1, t3),
4747 tquotient);
4748 break;
4749 }
4750
4751 /* Try using an instruction that produces both the quotient and
4752 remainder, using truncation. We can easily compensate the
4753 quotient or remainder to get ceiling rounding, once we have the
4754 remainder. Notice that we compute also the final remainder
4755 value here, and return the result right away. */
4756 if (target == 0 || GET_MODE (target) != compute_mode)
4757 target = gen_reg_rtx (compute_mode);
4758
4759 if (rem_flag)
4760 {
4761 remainder = (REG_P (target)
4762 ? target : gen_reg_rtx (compute_mode));
4763 quotient = gen_reg_rtx (compute_mode);
4764 }
4765 else
4766 {
4767 quotient = (REG_P (target)
4768 ? target : gen_reg_rtx (compute_mode));
4769 remainder = gen_reg_rtx (compute_mode);
4770 }
4771
4772 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4773 remainder, 1))
4774 {
4775 /* This could be computed with a branch-less sequence.
4776 Save that for later. */
4777 rtx_code_label *label = gen_label_rtx ();
4778 do_cmp_and_jump (remainder, const0_rtx, EQ,
4779 compute_mode, label);
4780 expand_inc (quotient, const1_rtx);
4781 expand_dec (remainder, op1);
4782 emit_label (label);
4783 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4784 }
4785
4786 /* No luck with division elimination or divmod. Have to do it
4787 by conditionally adjusting op0 *and* the result. */
4788 {
4789 rtx_code_label *label1, *label2;
4790 rtx adjusted_op0, tem;
4791
4792 quotient = gen_reg_rtx (compute_mode);
4793 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4794 label1 = gen_label_rtx ();
4795 label2 = gen_label_rtx ();
4796 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4797 compute_mode, label1);
4798 emit_move_insn (quotient, const0_rtx);
4799 emit_jump_insn (targetm.gen_jump (label2));
4800 emit_barrier ();
4801 emit_label (label1);
4802 expand_dec (adjusted_op0, const1_rtx);
4803 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4804 quotient, 1, OPTAB_LIB_WIDEN);
4805 if (tem != quotient)
4806 emit_move_insn (quotient, tem);
4807 expand_inc (quotient, const1_rtx);
4808 emit_label (label2);
4809 }
4810 }
4811 else /* signed */
4812 {
4813 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4814 && INTVAL (op1) >= 0)
4815 {
4816 /* This is extremely similar to the code for the unsigned case
4817 above. For 2.7 we should merge these variants, but for
4818 2.6.1 I don't want to touch the code for unsigned since that
4819 get used in C. The signed case will only be used by other
4820 languages (Ada). */
4821
4822 rtx t1, t2, t3;
4823 unsigned HOST_WIDE_INT d = INTVAL (op1);
4824 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4825 floor_log2 (d), tquotient, 0);
4826 t2 = expand_binop (compute_mode, and_optab, op0,
4827 gen_int_mode (d - 1, compute_mode),
4828 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4829 t3 = gen_reg_rtx (compute_mode);
4830 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4831 compute_mode, 1, 1);
4832 if (t3 == 0)
4833 {
4834 rtx_code_label *lab;
4835 lab = gen_label_rtx ();
4836 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4837 expand_inc (t1, const1_rtx);
4838 emit_label (lab);
4839 quotient = t1;
4840 }
4841 else
4842 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4843 t1, t3),
4844 tquotient);
4845 break;
4846 }
4847
4848 /* Try using an instruction that produces both the quotient and
4849 remainder, using truncation. We can easily compensate the
4850 quotient or remainder to get ceiling rounding, once we have the
4851 remainder. Notice that we compute also the final remainder
4852 value here, and return the result right away. */
4853 if (target == 0 || GET_MODE (target) != compute_mode)
4854 target = gen_reg_rtx (compute_mode);
4855 if (rem_flag)
4856 {
4857 remainder= (REG_P (target)
4858 ? target : gen_reg_rtx (compute_mode));
4859 quotient = gen_reg_rtx (compute_mode);
4860 }
4861 else
4862 {
4863 quotient = (REG_P (target)
4864 ? target : gen_reg_rtx (compute_mode));
4865 remainder = gen_reg_rtx (compute_mode);
4866 }
4867
4868 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4869 remainder, 0))
4870 {
4871 /* This could be computed with a branch-less sequence.
4872 Save that for later. */
4873 rtx tem;
4874 rtx_code_label *label = gen_label_rtx ();
4875 do_cmp_and_jump (remainder, const0_rtx, EQ,
4876 compute_mode, label);
4877 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4878 NULL_RTX, 0, OPTAB_WIDEN);
4879 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4880 expand_inc (quotient, const1_rtx);
4881 expand_dec (remainder, op1);
4882 emit_label (label);
4883 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4884 }
4885
4886 /* No luck with division elimination or divmod. Have to do it
4887 by conditionally adjusting op0 *and* the result. */
4888 {
4889 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4890 rtx adjusted_op0;
4891 rtx tem;
4892
4893 quotient = gen_reg_rtx (compute_mode);
4894 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4895 label1 = gen_label_rtx ();
4896 label2 = gen_label_rtx ();
4897 label3 = gen_label_rtx ();
4898 label4 = gen_label_rtx ();
4899 label5 = gen_label_rtx ();
4900 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4901 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4902 compute_mode, label1);
4903 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4904 quotient, 0, OPTAB_LIB_WIDEN);
4905 if (tem != quotient)
4906 emit_move_insn (quotient, tem);
4907 emit_jump_insn (targetm.gen_jump (label5));
4908 emit_barrier ();
4909 emit_label (label1);
4910 expand_dec (adjusted_op0, const1_rtx);
4911 emit_jump_insn (targetm.gen_jump (label4));
4912 emit_barrier ();
4913 emit_label (label2);
4914 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4915 compute_mode, label3);
4916 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4917 quotient, 0, OPTAB_LIB_WIDEN);
4918 if (tem != quotient)
4919 emit_move_insn (quotient, tem);
4920 emit_jump_insn (targetm.gen_jump (label5));
4921 emit_barrier ();
4922 emit_label (label3);
4923 expand_inc (adjusted_op0, const1_rtx);
4924 emit_label (label4);
4925 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4926 quotient, 0, OPTAB_LIB_WIDEN);
4927 if (tem != quotient)
4928 emit_move_insn (quotient, tem);
4929 expand_inc (quotient, const1_rtx);
4930 emit_label (label5);
4931 }
4932 }
4933 break;
4934
4935 case EXACT_DIV_EXPR:
4936 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4937 {
4938 HOST_WIDE_INT d = INTVAL (op1);
4939 unsigned HOST_WIDE_INT ml;
4940 int pre_shift;
4941 rtx t1;
4942
4943 pre_shift = floor_log2 (d & -d);
4944 ml = invert_mod2n (d >> pre_shift, size);
4945 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4946 pre_shift, NULL_RTX, unsignedp);
4947 quotient = expand_mult (compute_mode, t1,
4948 gen_int_mode (ml, compute_mode),
4949 NULL_RTX, 1);
4950
4951 insn = get_last_insn ();
4952 set_dst_reg_note (insn, REG_EQUAL,
4953 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4954 compute_mode, op0, op1),
4955 quotient);
4956 }
4957 break;
4958
4959 case ROUND_DIV_EXPR:
4960 case ROUND_MOD_EXPR:
4961 if (unsignedp)
4962 {
4963 rtx tem;
4964 rtx_code_label *label;
4965 label = gen_label_rtx ();
4966 quotient = gen_reg_rtx (compute_mode);
4967 remainder = gen_reg_rtx (compute_mode);
4968 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4969 {
4970 rtx tem;
4971 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4972 quotient, 1, OPTAB_LIB_WIDEN);
4973 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4974 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4975 remainder, 1, OPTAB_LIB_WIDEN);
4976 }
4977 tem = plus_constant (compute_mode, op1, -1);
4978 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4979 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4980 expand_inc (quotient, const1_rtx);
4981 expand_dec (remainder, op1);
4982 emit_label (label);
4983 }
4984 else
4985 {
4986 rtx abs_rem, abs_op1, tem, mask;
4987 rtx_code_label *label;
4988 label = gen_label_rtx ();
4989 quotient = gen_reg_rtx (compute_mode);
4990 remainder = gen_reg_rtx (compute_mode);
4991 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4992 {
4993 rtx tem;
4994 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4995 quotient, 0, OPTAB_LIB_WIDEN);
4996 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4997 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4998 remainder, 0, OPTAB_LIB_WIDEN);
4999 }
5000 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
5001 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
5002 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
5003 1, NULL_RTX, 1);
5004 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
5005 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5006 NULL_RTX, 0, OPTAB_WIDEN);
5007 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
5008 size - 1, NULL_RTX, 0);
5009 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
5010 NULL_RTX, 0, OPTAB_WIDEN);
5011 tem = expand_binop (compute_mode, sub_optab, tem, mask,
5012 NULL_RTX, 0, OPTAB_WIDEN);
5013 expand_inc (quotient, tem);
5014 tem = expand_binop (compute_mode, xor_optab, mask, op1,
5015 NULL_RTX, 0, OPTAB_WIDEN);
5016 tem = expand_binop (compute_mode, sub_optab, tem, mask,
5017 NULL_RTX, 0, OPTAB_WIDEN);
5018 expand_dec (remainder, tem);
5019 emit_label (label);
5020 }
5021 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5022
5023 default:
5024 gcc_unreachable ();
5025 }
5026
5027 if (quotient == 0)
5028 {
5029 if (target && GET_MODE (target) != compute_mode)
5030 target = 0;
5031
5032 if (rem_flag)
5033 {
5034 /* Try to produce the remainder without producing the quotient.
5035 If we seem to have a divmod pattern that does not require widening,
5036 don't try widening here. We should really have a WIDEN argument
5037 to expand_twoval_binop, since what we'd really like to do here is
5038 1) try a mod insn in compute_mode
5039 2) try a divmod insn in compute_mode
5040 3) try a div insn in compute_mode and multiply-subtract to get
5041 remainder
5042 4) try the same things with widening allowed. */
5043 remainder
5044 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5045 op0, op1, target,
5046 unsignedp,
5047 ((optab_handler (optab2, compute_mode)
5048 != CODE_FOR_nothing)
5049 ? OPTAB_DIRECT : OPTAB_WIDEN));
5050 if (remainder == 0)
5051 {
5052 /* No luck there. Can we do remainder and divide at once
5053 without a library call? */
5054 remainder = gen_reg_rtx (compute_mode);
5055 if (! expand_twoval_binop ((unsignedp
5056 ? udivmod_optab
5057 : sdivmod_optab),
5058 op0, op1,
5059 NULL_RTX, remainder, unsignedp))
5060 remainder = 0;
5061 }
5062
5063 if (remainder)
5064 return gen_lowpart (mode, remainder);
5065 }
5066
5067 /* Produce the quotient. Try a quotient insn, but not a library call.
5068 If we have a divmod in this mode, use it in preference to widening
5069 the div (for this test we assume it will not fail). Note that optab2
5070 is set to the one of the two optabs that the call below will use. */
5071 quotient
5072 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5073 op0, op1, rem_flag ? NULL_RTX : target,
5074 unsignedp,
5075 ((optab_handler (optab2, compute_mode)
5076 != CODE_FOR_nothing)
5077 ? OPTAB_DIRECT : OPTAB_WIDEN));
5078
5079 if (quotient == 0)
5080 {
5081 /* No luck there. Try a quotient-and-remainder insn,
5082 keeping the quotient alone. */
5083 quotient = gen_reg_rtx (compute_mode);
5084 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5085 op0, op1,
5086 quotient, NULL_RTX, unsignedp))
5087 {
5088 quotient = 0;
5089 if (! rem_flag)
5090 /* Still no luck. If we are not computing the remainder,
5091 use a library call for the quotient. */
5092 quotient = sign_expand_binop (compute_mode,
5093 udiv_optab, sdiv_optab,
5094 op0, op1, target,
5095 unsignedp, OPTAB_LIB_WIDEN);
5096 }
5097 }
5098 }
5099
5100 if (rem_flag)
5101 {
5102 if (target && GET_MODE (target) != compute_mode)
5103 target = 0;
5104
5105 if (quotient == 0)
5106 {
5107 /* No divide instruction either. Use library for remainder. */
5108 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5109 op0, op1, target,
5110 unsignedp, OPTAB_LIB_WIDEN);
5111 /* No remainder function. Try a quotient-and-remainder
5112 function, keeping the remainder. */
5113 if (!remainder)
5114 {
5115 remainder = gen_reg_rtx (compute_mode);
5116 if (!expand_twoval_binop_libfunc
5117 (unsignedp ? udivmod_optab : sdivmod_optab,
5118 op0, op1,
5119 NULL_RTX, remainder,
5120 unsignedp ? UMOD : MOD))
5121 remainder = NULL_RTX;
5122 }
5123 }
5124 else
5125 {
5126 /* We divided. Now finish doing X - Y * (X / Y). */
5127 remainder = expand_mult (compute_mode, quotient, op1,
5128 NULL_RTX, unsignedp);
5129 remainder = expand_binop (compute_mode, sub_optab, op0,
5130 remainder, target, unsignedp,
5131 OPTAB_LIB_WIDEN);
5132 }
5133 }
5134
5135 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5136 }
5137
5138 /* Return a tree node with data type TYPE, describing the value of X.
5139 Usually this is an VAR_DECL, if there is no obvious better choice.
5140 X may be an expression, however we only support those expressions
5141 generated by loop.c. */
5142
5143 tree
make_tree(tree type,rtx x)5144 make_tree (tree type, rtx x)
5145 {
5146 tree t;
5147
5148 switch (GET_CODE (x))
5149 {
5150 case CONST_INT:
5151 case CONST_WIDE_INT:
5152 t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5153 return t;
5154
5155 case CONST_DOUBLE:
5156 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5157 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5158 t = wide_int_to_tree (type,
5159 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5160 HOST_BITS_PER_WIDE_INT * 2));
5161 else
5162 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5163
5164 return t;
5165
5166 case CONST_VECTOR:
5167 {
5168 int units = CONST_VECTOR_NUNITS (x);
5169 tree itype = TREE_TYPE (type);
5170 tree *elts;
5171 int i;
5172
5173 /* Build a tree with vector elements. */
5174 elts = XALLOCAVEC (tree, units);
5175 for (i = units - 1; i >= 0; --i)
5176 {
5177 rtx elt = CONST_VECTOR_ELT (x, i);
5178 elts[i] = make_tree (itype, elt);
5179 }
5180
5181 return build_vector (type, elts);
5182 }
5183
5184 case PLUS:
5185 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5186 make_tree (type, XEXP (x, 1)));
5187
5188 case MINUS:
5189 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5190 make_tree (type, XEXP (x, 1)));
5191
5192 case NEG:
5193 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5194
5195 case MULT:
5196 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5197 make_tree (type, XEXP (x, 1)));
5198
5199 case ASHIFT:
5200 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5201 make_tree (type, XEXP (x, 1)));
5202
5203 case LSHIFTRT:
5204 t = unsigned_type_for (type);
5205 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5206 make_tree (t, XEXP (x, 0)),
5207 make_tree (type, XEXP (x, 1))));
5208
5209 case ASHIFTRT:
5210 t = signed_type_for (type);
5211 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5212 make_tree (t, XEXP (x, 0)),
5213 make_tree (type, XEXP (x, 1))));
5214
5215 case DIV:
5216 if (TREE_CODE (type) != REAL_TYPE)
5217 t = signed_type_for (type);
5218 else
5219 t = type;
5220
5221 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5222 make_tree (t, XEXP (x, 0)),
5223 make_tree (t, XEXP (x, 1))));
5224 case UDIV:
5225 t = unsigned_type_for (type);
5226 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5227 make_tree (t, XEXP (x, 0)),
5228 make_tree (t, XEXP (x, 1))));
5229
5230 case SIGN_EXTEND:
5231 case ZERO_EXTEND:
5232 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5233 GET_CODE (x) == ZERO_EXTEND);
5234 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5235
5236 case CONST:
5237 return make_tree (type, XEXP (x, 0));
5238
5239 case SYMBOL_REF:
5240 t = SYMBOL_REF_DECL (x);
5241 if (t)
5242 return fold_convert (type, build_fold_addr_expr (t));
5243 /* else fall through. */
5244
5245 default:
5246 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5247
5248 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5249 address mode to pointer mode. */
5250 if (POINTER_TYPE_P (type))
5251 x = convert_memory_address_addr_space
5252 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5253
5254 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5255 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5256 t->decl_with_rtl.rtl = x;
5257
5258 return t;
5259 }
5260 }
5261
5262 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5263 and returning TARGET.
5264
5265 If TARGET is 0, a pseudo-register or constant is returned. */
5266
5267 rtx
expand_and(machine_mode mode,rtx op0,rtx op1,rtx target)5268 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5269 {
5270 rtx tem = 0;
5271
5272 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5273 tem = simplify_binary_operation (AND, mode, op0, op1);
5274 if (tem == 0)
5275 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5276
5277 if (target == 0)
5278 target = tem;
5279 else if (tem != target)
5280 emit_move_insn (target, tem);
5281 return target;
5282 }
5283
5284 /* Helper function for emit_store_flag. */
5285 rtx
emit_cstore(rtx target,enum insn_code icode,enum rtx_code code,machine_mode mode,machine_mode compare_mode,int unsignedp,rtx x,rtx y,int normalizep,machine_mode target_mode)5286 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5287 machine_mode mode, machine_mode compare_mode,
5288 int unsignedp, rtx x, rtx y, int normalizep,
5289 machine_mode target_mode)
5290 {
5291 struct expand_operand ops[4];
5292 rtx op0, comparison, subtarget;
5293 rtx_insn *last;
5294 machine_mode result_mode = targetm.cstore_mode (icode);
5295
5296 last = get_last_insn ();
5297 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5298 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5299 if (!x || !y)
5300 {
5301 delete_insns_since (last);
5302 return NULL_RTX;
5303 }
5304
5305 if (target_mode == VOIDmode)
5306 target_mode = result_mode;
5307 if (!target)
5308 target = gen_reg_rtx (target_mode);
5309
5310 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5311
5312 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5313 create_fixed_operand (&ops[1], comparison);
5314 create_fixed_operand (&ops[2], x);
5315 create_fixed_operand (&ops[3], y);
5316 if (!maybe_expand_insn (icode, 4, ops))
5317 {
5318 delete_insns_since (last);
5319 return NULL_RTX;
5320 }
5321 subtarget = ops[0].value;
5322
5323 /* If we are converting to a wider mode, first convert to
5324 TARGET_MODE, then normalize. This produces better combining
5325 opportunities on machines that have a SIGN_EXTRACT when we are
5326 testing a single bit. This mostly benefits the 68k.
5327
5328 If STORE_FLAG_VALUE does not have the sign bit set when
5329 interpreted in MODE, we can do this conversion as unsigned, which
5330 is usually more efficient. */
5331 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5332 {
5333 convert_move (target, subtarget,
5334 val_signbit_known_clear_p (result_mode,
5335 STORE_FLAG_VALUE));
5336 op0 = target;
5337 result_mode = target_mode;
5338 }
5339 else
5340 op0 = subtarget;
5341
5342 /* If we want to keep subexpressions around, don't reuse our last
5343 target. */
5344 if (optimize)
5345 subtarget = 0;
5346
5347 /* Now normalize to the proper value in MODE. Sometimes we don't
5348 have to do anything. */
5349 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5350 ;
5351 /* STORE_FLAG_VALUE might be the most negative number, so write
5352 the comparison this way to avoid a compiler-time warning. */
5353 else if (- normalizep == STORE_FLAG_VALUE)
5354 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5355
5356 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5357 it hard to use a value of just the sign bit due to ANSI integer
5358 constant typing rules. */
5359 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5360 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5361 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5362 normalizep == 1);
5363 else
5364 {
5365 gcc_assert (STORE_FLAG_VALUE & 1);
5366
5367 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5368 if (normalizep == -1)
5369 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5370 }
5371
5372 /* If we were converting to a smaller mode, do the conversion now. */
5373 if (target_mode != result_mode)
5374 {
5375 convert_move (target, op0, 0);
5376 return target;
5377 }
5378 else
5379 return op0;
5380 }
5381
5382
5383 /* A subroutine of emit_store_flag only including "tricks" that do not
5384 need a recursive call. These are kept separate to avoid infinite
5385 loops. */
5386
5387 static rtx
emit_store_flag_1(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep,machine_mode target_mode)5388 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5389 machine_mode mode, int unsignedp, int normalizep,
5390 machine_mode target_mode)
5391 {
5392 rtx subtarget;
5393 enum insn_code icode;
5394 machine_mode compare_mode;
5395 enum mode_class mclass;
5396 enum rtx_code scode;
5397
5398 if (unsignedp)
5399 code = unsigned_condition (code);
5400 scode = swap_condition (code);
5401
5402 /* If one operand is constant, make it the second one. Only do this
5403 if the other operand is not constant as well. */
5404
5405 if (swap_commutative_operands_p (op0, op1))
5406 {
5407 std::swap (op0, op1);
5408 code = swap_condition (code);
5409 }
5410
5411 if (mode == VOIDmode)
5412 mode = GET_MODE (op0);
5413
5414 /* For some comparisons with 1 and -1, we can convert this to
5415 comparisons with zero. This will often produce more opportunities for
5416 store-flag insns. */
5417
5418 switch (code)
5419 {
5420 case LT:
5421 if (op1 == const1_rtx)
5422 op1 = const0_rtx, code = LE;
5423 break;
5424 case LE:
5425 if (op1 == constm1_rtx)
5426 op1 = const0_rtx, code = LT;
5427 break;
5428 case GE:
5429 if (op1 == const1_rtx)
5430 op1 = const0_rtx, code = GT;
5431 break;
5432 case GT:
5433 if (op1 == constm1_rtx)
5434 op1 = const0_rtx, code = GE;
5435 break;
5436 case GEU:
5437 if (op1 == const1_rtx)
5438 op1 = const0_rtx, code = NE;
5439 break;
5440 case LTU:
5441 if (op1 == const1_rtx)
5442 op1 = const0_rtx, code = EQ;
5443 break;
5444 default:
5445 break;
5446 }
5447
5448 /* If we are comparing a double-word integer with zero or -1, we can
5449 convert the comparison into one involving a single word. */
5450 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5451 && GET_MODE_CLASS (mode) == MODE_INT
5452 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5453 {
5454 rtx tem;
5455 if ((code == EQ || code == NE)
5456 && (op1 == const0_rtx || op1 == constm1_rtx))
5457 {
5458 rtx op00, op01;
5459
5460 /* Do a logical OR or AND of the two words and compare the
5461 result. */
5462 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5463 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5464 tem = expand_binop (word_mode,
5465 op1 == const0_rtx ? ior_optab : and_optab,
5466 op00, op01, NULL_RTX, unsignedp,
5467 OPTAB_DIRECT);
5468
5469 if (tem != 0)
5470 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5471 unsignedp, normalizep);
5472 }
5473 else if ((code == LT || code == GE) && op1 == const0_rtx)
5474 {
5475 rtx op0h;
5476
5477 /* If testing the sign bit, can just test on high word. */
5478 op0h = simplify_gen_subreg (word_mode, op0, mode,
5479 subreg_highpart_offset (word_mode,
5480 mode));
5481 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5482 unsignedp, normalizep);
5483 }
5484 else
5485 tem = NULL_RTX;
5486
5487 if (tem)
5488 {
5489 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5490 return tem;
5491 if (!target)
5492 target = gen_reg_rtx (target_mode);
5493
5494 convert_move (target, tem,
5495 !val_signbit_known_set_p (word_mode,
5496 (normalizep ? normalizep
5497 : STORE_FLAG_VALUE)));
5498 return target;
5499 }
5500 }
5501
5502 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5503 complement of A (for GE) and shifting the sign bit to the low bit. */
5504 if (op1 == const0_rtx && (code == LT || code == GE)
5505 && GET_MODE_CLASS (mode) == MODE_INT
5506 && (normalizep || STORE_FLAG_VALUE == 1
5507 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5508 {
5509 subtarget = target;
5510
5511 if (!target)
5512 target_mode = mode;
5513
5514 /* If the result is to be wider than OP0, it is best to convert it
5515 first. If it is to be narrower, it is *incorrect* to convert it
5516 first. */
5517 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5518 {
5519 op0 = convert_modes (target_mode, mode, op0, 0);
5520 mode = target_mode;
5521 }
5522
5523 if (target_mode != mode)
5524 subtarget = 0;
5525
5526 if (code == GE)
5527 op0 = expand_unop (mode, one_cmpl_optab, op0,
5528 ((STORE_FLAG_VALUE == 1 || normalizep)
5529 ? 0 : subtarget), 0);
5530
5531 if (STORE_FLAG_VALUE == 1 || normalizep)
5532 /* If we are supposed to produce a 0/1 value, we want to do
5533 a logical shift from the sign bit to the low-order bit; for
5534 a -1/0 value, we do an arithmetic shift. */
5535 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5536 GET_MODE_BITSIZE (mode) - 1,
5537 subtarget, normalizep != -1);
5538
5539 if (mode != target_mode)
5540 op0 = convert_modes (target_mode, mode, op0, 0);
5541
5542 return op0;
5543 }
5544
5545 mclass = GET_MODE_CLASS (mode);
5546 for (compare_mode = mode; compare_mode != VOIDmode;
5547 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5548 {
5549 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5550 icode = optab_handler (cstore_optab, optab_mode);
5551 if (icode != CODE_FOR_nothing)
5552 {
5553 do_pending_stack_adjust ();
5554 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5555 unsignedp, op0, op1, normalizep, target_mode);
5556 if (tem)
5557 return tem;
5558
5559 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5560 {
5561 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5562 unsignedp, op1, op0, normalizep, target_mode);
5563 if (tem)
5564 return tem;
5565 }
5566 break;
5567 }
5568 }
5569
5570 return 0;
5571 }
5572
5573 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5574 and storing in TARGET. Normally return TARGET.
5575 Return 0 if that cannot be done.
5576
5577 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5578 it is VOIDmode, they cannot both be CONST_INT.
5579
5580 UNSIGNEDP is for the case where we have to widen the operands
5581 to perform the operation. It says to use zero-extension.
5582
5583 NORMALIZEP is 1 if we should convert the result to be either zero
5584 or one. Normalize is -1 if we should convert the result to be
5585 either zero or -1. If NORMALIZEP is zero, the result will be left
5586 "raw" out of the scc insn. */
5587
5588 rtx
emit_store_flag(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)5589 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5590 machine_mode mode, int unsignedp, int normalizep)
5591 {
5592 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5593 enum rtx_code rcode;
5594 rtx subtarget;
5595 rtx tem, trueval;
5596 rtx_insn *last;
5597
5598 /* If we compare constants, we shouldn't use a store-flag operation,
5599 but a constant load. We can get there via the vanilla route that
5600 usually generates a compare-branch sequence, but will in this case
5601 fold the comparison to a constant, and thus elide the branch. */
5602 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5603 return NULL_RTX;
5604
5605 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5606 target_mode);
5607 if (tem)
5608 return tem;
5609
5610 /* If we reached here, we can't do this with a scc insn, however there
5611 are some comparisons that can be done in other ways. Don't do any
5612 of these cases if branches are very cheap. */
5613 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5614 return 0;
5615
5616 /* See what we need to return. We can only return a 1, -1, or the
5617 sign bit. */
5618
5619 if (normalizep == 0)
5620 {
5621 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5622 normalizep = STORE_FLAG_VALUE;
5623
5624 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5625 ;
5626 else
5627 return 0;
5628 }
5629
5630 last = get_last_insn ();
5631
5632 /* If optimizing, use different pseudo registers for each insn, instead
5633 of reusing the same pseudo. This leads to better CSE, but slows
5634 down the compiler, since there are more pseudos */
5635 subtarget = (!optimize
5636 && (target_mode == mode)) ? target : NULL_RTX;
5637 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5638
5639 /* For floating-point comparisons, try the reverse comparison or try
5640 changing the "orderedness" of the comparison. */
5641 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5642 {
5643 enum rtx_code first_code;
5644 bool and_them;
5645
5646 rcode = reverse_condition_maybe_unordered (code);
5647 if (can_compare_p (rcode, mode, ccp_store_flag)
5648 && (code == ORDERED || code == UNORDERED
5649 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5650 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5651 {
5652 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5653 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5654
5655 /* For the reverse comparison, use either an addition or a XOR. */
5656 if (want_add
5657 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5658 optimize_insn_for_speed_p ()) == 0)
5659 {
5660 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5661 STORE_FLAG_VALUE, target_mode);
5662 if (tem)
5663 return expand_binop (target_mode, add_optab, tem,
5664 gen_int_mode (normalizep, target_mode),
5665 target, 0, OPTAB_WIDEN);
5666 }
5667 else if (!want_add
5668 && rtx_cost (trueval, mode, XOR, 1,
5669 optimize_insn_for_speed_p ()) == 0)
5670 {
5671 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5672 normalizep, target_mode);
5673 if (tem)
5674 return expand_binop (target_mode, xor_optab, tem, trueval,
5675 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5676 }
5677 }
5678
5679 delete_insns_since (last);
5680
5681 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5682 if (code == ORDERED || code == UNORDERED)
5683 return 0;
5684
5685 and_them = split_comparison (code, mode, &first_code, &code);
5686
5687 /* If there are no NaNs, the first comparison should always fall through.
5688 Effectively change the comparison to the other one. */
5689 if (!HONOR_NANS (mode))
5690 {
5691 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5692 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5693 target_mode);
5694 }
5695
5696 if (!HAVE_conditional_move)
5697 return 0;
5698
5699 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5700 conditional move. */
5701 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5702 normalizep, target_mode);
5703 if (tem == 0)
5704 return 0;
5705
5706 if (and_them)
5707 tem = emit_conditional_move (target, code, op0, op1, mode,
5708 tem, const0_rtx, GET_MODE (tem), 0);
5709 else
5710 tem = emit_conditional_move (target, code, op0, op1, mode,
5711 trueval, tem, GET_MODE (tem), 0);
5712
5713 if (tem == 0)
5714 delete_insns_since (last);
5715 return tem;
5716 }
5717
5718 /* The remaining tricks only apply to integer comparisons. */
5719
5720 if (GET_MODE_CLASS (mode) != MODE_INT)
5721 return 0;
5722
5723 /* If this is an equality comparison of integers, we can try to exclusive-or
5724 (or subtract) the two operands and use a recursive call to try the
5725 comparison with zero. Don't do any of these cases if branches are
5726 very cheap. */
5727
5728 if ((code == EQ || code == NE) && op1 != const0_rtx)
5729 {
5730 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5731 OPTAB_WIDEN);
5732
5733 if (tem == 0)
5734 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5735 OPTAB_WIDEN);
5736 if (tem != 0)
5737 tem = emit_store_flag (target, code, tem, const0_rtx,
5738 mode, unsignedp, normalizep);
5739 if (tem != 0)
5740 return tem;
5741
5742 delete_insns_since (last);
5743 }
5744
5745 /* For integer comparisons, try the reverse comparison. However, for
5746 small X and if we'd have anyway to extend, implementing "X != 0"
5747 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5748 rcode = reverse_condition (code);
5749 if (can_compare_p (rcode, mode, ccp_store_flag)
5750 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5751 && code == NE
5752 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5753 && op1 == const0_rtx))
5754 {
5755 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5756 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5757
5758 /* Again, for the reverse comparison, use either an addition or a XOR. */
5759 if (want_add
5760 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5761 optimize_insn_for_speed_p ()) == 0)
5762 {
5763 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5764 STORE_FLAG_VALUE, target_mode);
5765 if (tem != 0)
5766 tem = expand_binop (target_mode, add_optab, tem,
5767 gen_int_mode (normalizep, target_mode),
5768 target, 0, OPTAB_WIDEN);
5769 }
5770 else if (!want_add
5771 && rtx_cost (trueval, mode, XOR, 1,
5772 optimize_insn_for_speed_p ()) == 0)
5773 {
5774 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5775 normalizep, target_mode);
5776 if (tem != 0)
5777 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5778 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5779 }
5780
5781 if (tem != 0)
5782 return tem;
5783 delete_insns_since (last);
5784 }
5785
5786 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5787 the constant zero. Reject all other comparisons at this point. Only
5788 do LE and GT if branches are expensive since they are expensive on
5789 2-operand machines. */
5790
5791 if (op1 != const0_rtx
5792 || (code != EQ && code != NE
5793 && (BRANCH_COST (optimize_insn_for_speed_p (),
5794 false) <= 1 || (code != LE && code != GT))))
5795 return 0;
5796
5797 /* Try to put the result of the comparison in the sign bit. Assume we can't
5798 do the necessary operation below. */
5799
5800 tem = 0;
5801
5802 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5803 the sign bit set. */
5804
5805 if (code == LE)
5806 {
5807 /* This is destructive, so SUBTARGET can't be OP0. */
5808 if (rtx_equal_p (subtarget, op0))
5809 subtarget = 0;
5810
5811 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5812 OPTAB_WIDEN);
5813 if (tem)
5814 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5815 OPTAB_WIDEN);
5816 }
5817
5818 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5819 number of bits in the mode of OP0, minus one. */
5820
5821 if (code == GT)
5822 {
5823 if (rtx_equal_p (subtarget, op0))
5824 subtarget = 0;
5825
5826 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5827 GET_MODE_BITSIZE (mode) - 1,
5828 subtarget, 0);
5829 if (tem)
5830 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5831 OPTAB_WIDEN);
5832 }
5833
5834 if (code == EQ || code == NE)
5835 {
5836 /* For EQ or NE, one way to do the comparison is to apply an operation
5837 that converts the operand into a positive number if it is nonzero
5838 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5839 for NE we negate. This puts the result in the sign bit. Then we
5840 normalize with a shift, if needed.
5841
5842 Two operations that can do the above actions are ABS and FFS, so try
5843 them. If that doesn't work, and MODE is smaller than a full word,
5844 we can use zero-extension to the wider mode (an unsigned conversion)
5845 as the operation. */
5846
5847 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5848 that is compensated by the subsequent overflow when subtracting
5849 one / negating. */
5850
5851 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5852 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5853 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5854 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5855 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5856 {
5857 tem = convert_modes (word_mode, mode, op0, 1);
5858 mode = word_mode;
5859 }
5860
5861 if (tem != 0)
5862 {
5863 if (code == EQ)
5864 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5865 0, OPTAB_WIDEN);
5866 else
5867 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5868 }
5869
5870 /* If we couldn't do it that way, for NE we can "or" the two's complement
5871 of the value with itself. For EQ, we take the one's complement of
5872 that "or", which is an extra insn, so we only handle EQ if branches
5873 are expensive. */
5874
5875 if (tem == 0
5876 && (code == NE
5877 || BRANCH_COST (optimize_insn_for_speed_p (),
5878 false) > 1))
5879 {
5880 if (rtx_equal_p (subtarget, op0))
5881 subtarget = 0;
5882
5883 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5884 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5885 OPTAB_WIDEN);
5886
5887 if (tem && code == EQ)
5888 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5889 }
5890 }
5891
5892 if (tem && normalizep)
5893 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5894 GET_MODE_BITSIZE (mode) - 1,
5895 subtarget, normalizep == 1);
5896
5897 if (tem)
5898 {
5899 if (!target)
5900 ;
5901 else if (GET_MODE (tem) != target_mode)
5902 {
5903 convert_move (target, tem, 0);
5904 tem = target;
5905 }
5906 else if (!subtarget)
5907 {
5908 emit_move_insn (target, tem);
5909 tem = target;
5910 }
5911 }
5912 else
5913 delete_insns_since (last);
5914
5915 return tem;
5916 }
5917
5918 /* Like emit_store_flag, but always succeeds. */
5919
5920 rtx
emit_store_flag_force(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)5921 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5922 machine_mode mode, int unsignedp, int normalizep)
5923 {
5924 rtx tem;
5925 rtx_code_label *label;
5926 rtx trueval, falseval;
5927
5928 /* First see if emit_store_flag can do the job. */
5929 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5930 if (tem != 0)
5931 return tem;
5932
5933 /* If one operand is constant, make it the second one. Only do this
5934 if the other operand is not constant as well. */
5935
5936 if (swap_commutative_operands_p (op0, op1))
5937 {
5938 std::swap (op0, op1);
5939 code = swap_condition (code);
5940 }
5941
5942 if (mode == VOIDmode)
5943 mode = GET_MODE (op0);
5944
5945 if (!target)
5946 target = gen_reg_rtx (word_mode);
5947
5948 /* If this failed, we have to do this with set/compare/jump/set code.
5949 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5950 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5951 if (code == NE
5952 && GET_MODE_CLASS (mode) == MODE_INT
5953 && REG_P (target)
5954 && op0 == target
5955 && op1 == const0_rtx)
5956 {
5957 label = gen_label_rtx ();
5958 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5959 NULL_RTX, NULL, label, -1);
5960 emit_move_insn (target, trueval);
5961 emit_label (label);
5962 return target;
5963 }
5964
5965 if (!REG_P (target)
5966 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5967 target = gen_reg_rtx (GET_MODE (target));
5968
5969 /* Jump in the right direction if the target cannot implement CODE
5970 but can jump on its reverse condition. */
5971 falseval = const0_rtx;
5972 if (! can_compare_p (code, mode, ccp_jump)
5973 && (! FLOAT_MODE_P (mode)
5974 || code == ORDERED || code == UNORDERED
5975 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5976 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5977 {
5978 enum rtx_code rcode;
5979 if (FLOAT_MODE_P (mode))
5980 rcode = reverse_condition_maybe_unordered (code);
5981 else
5982 rcode = reverse_condition (code);
5983
5984 /* Canonicalize to UNORDERED for the libcall. */
5985 if (can_compare_p (rcode, mode, ccp_jump)
5986 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5987 {
5988 falseval = trueval;
5989 trueval = const0_rtx;
5990 code = rcode;
5991 }
5992 }
5993
5994 emit_move_insn (target, trueval);
5995 label = gen_label_rtx ();
5996 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5997 label, -1);
5998
5999 emit_move_insn (target, falseval);
6000 emit_label (label);
6001
6002 return target;
6003 }
6004
6005 /* Perform possibly multi-word comparison and conditional jump to LABEL
6006 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6007 now a thin wrapper around do_compare_rtx_and_jump. */
6008
6009 static void
do_cmp_and_jump(rtx arg1,rtx arg2,enum rtx_code op,machine_mode mode,rtx_code_label * label)6010 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6011 rtx_code_label *label)
6012 {
6013 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6014 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6015 NULL, label, -1);
6016 }
6017