1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2018 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "dojump.h"
40 #include "explow.h"
41 #include "expr.h"
42 #include "langhooks.h"
43 #include "tree-vector-builder.h"
44
45 struct target_expmed default_target_expmed;
46 #if SWITCHABLE_TARGET
47 struct target_expmed *this_target_expmed = &default_target_expmed;
48 #endif
49
50 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 poly_uint64, poly_uint64,
54 machine_mode, rtx, bool, bool);
55 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 poly_uint64, poly_uint64,
59 rtx, scalar_int_mode, bool);
60 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT,
63 rtx, scalar_int_mode, bool);
64 static void store_split_bit_field (rtx, opt_scalar_int_mode,
65 unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT,
67 poly_uint64, poly_uint64,
68 rtx, scalar_int_mode, bool);
69 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
70 unsigned HOST_WIDE_INT,
71 unsigned HOST_WIDE_INT, int, rtx,
72 machine_mode, machine_mode, bool, bool);
73 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
74 unsigned HOST_WIDE_INT,
75 unsigned HOST_WIDE_INT, rtx, int, bool);
76 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
77 unsigned HOST_WIDE_INT,
78 unsigned HOST_WIDE_INT, rtx, int, bool);
79 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
80 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
81 unsigned HOST_WIDE_INT,
82 unsigned HOST_WIDE_INT, int, bool);
83 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
84 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
85 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
86
87 /* Return a constant integer mask value of mode MODE with BITSIZE ones
88 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
89 The mask is truncated if necessary to the width of mode MODE. The
90 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
91
92 static inline rtx
mask_rtx(scalar_int_mode mode,int bitpos,int bitsize,bool complement)93 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
94 {
95 return immed_wide_int_const
96 (wi::shifted_mask (bitpos, bitsize, complement,
97 GET_MODE_PRECISION (mode)), mode);
98 }
99
100 /* Test whether a value is zero of a power of two. */
101 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
102 (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
103
104 struct init_expmed_rtl
105 {
106 rtx reg;
107 rtx plus;
108 rtx neg;
109 rtx mult;
110 rtx sdiv;
111 rtx udiv;
112 rtx sdiv_32;
113 rtx smod_32;
114 rtx wide_mult;
115 rtx wide_lshr;
116 rtx wide_trunc;
117 rtx shift;
118 rtx shift_mult;
119 rtx shift_add;
120 rtx shift_sub0;
121 rtx shift_sub1;
122 rtx zext;
123 rtx trunc;
124
125 rtx pow2[MAX_BITS_PER_WORD];
126 rtx cint[MAX_BITS_PER_WORD];
127 };
128
129 static void
init_expmed_one_conv(struct init_expmed_rtl * all,scalar_int_mode to_mode,scalar_int_mode from_mode,bool speed)130 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
131 scalar_int_mode from_mode, bool speed)
132 {
133 int to_size, from_size;
134 rtx which;
135
136 to_size = GET_MODE_PRECISION (to_mode);
137 from_size = GET_MODE_PRECISION (from_mode);
138
139 /* Most partial integers have a precision less than the "full"
140 integer it requires for storage. In case one doesn't, for
141 comparison purposes here, reduce the bit size by one in that
142 case. */
143 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
144 && pow2p_hwi (to_size))
145 to_size --;
146 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
147 && pow2p_hwi (from_size))
148 from_size --;
149
150 /* Assume cost of zero-extend and sign-extend is the same. */
151 which = (to_size < from_size ? all->trunc : all->zext);
152
153 PUT_MODE (all->reg, from_mode);
154 set_convert_cost (to_mode, from_mode, speed,
155 set_src_cost (which, to_mode, speed));
156 }
157
158 static void
init_expmed_one_mode(struct init_expmed_rtl * all,machine_mode mode,int speed)159 init_expmed_one_mode (struct init_expmed_rtl *all,
160 machine_mode mode, int speed)
161 {
162 int m, n, mode_bitsize;
163 machine_mode mode_from;
164
165 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
166
167 PUT_MODE (all->reg, mode);
168 PUT_MODE (all->plus, mode);
169 PUT_MODE (all->neg, mode);
170 PUT_MODE (all->mult, mode);
171 PUT_MODE (all->sdiv, mode);
172 PUT_MODE (all->udiv, mode);
173 PUT_MODE (all->sdiv_32, mode);
174 PUT_MODE (all->smod_32, mode);
175 PUT_MODE (all->wide_trunc, mode);
176 PUT_MODE (all->shift, mode);
177 PUT_MODE (all->shift_mult, mode);
178 PUT_MODE (all->shift_add, mode);
179 PUT_MODE (all->shift_sub0, mode);
180 PUT_MODE (all->shift_sub1, mode);
181 PUT_MODE (all->zext, mode);
182 PUT_MODE (all->trunc, mode);
183
184 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
185 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
186 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
187 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
188 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
189
190 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
191 <= 2 * add_cost (speed, mode)));
192 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
193 <= 4 * add_cost (speed, mode)));
194
195 set_shift_cost (speed, mode, 0, 0);
196 {
197 int cost = add_cost (speed, mode);
198 set_shiftadd_cost (speed, mode, 0, cost);
199 set_shiftsub0_cost (speed, mode, 0, cost);
200 set_shiftsub1_cost (speed, mode, 0, cost);
201 }
202
203 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
204 for (m = 1; m < n; m++)
205 {
206 XEXP (all->shift, 1) = all->cint[m];
207 XEXP (all->shift_mult, 1) = all->pow2[m];
208
209 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
210 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
211 speed));
212 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
213 speed));
214 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
215 speed));
216 }
217
218 scalar_int_mode int_mode_to;
219 if (is_a <scalar_int_mode> (mode, &int_mode_to))
220 {
221 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
222 mode_from = (machine_mode)(mode_from + 1))
223 init_expmed_one_conv (all, int_mode_to,
224 as_a <scalar_int_mode> (mode_from), speed);
225
226 scalar_int_mode wider_mode;
227 if (GET_MODE_CLASS (int_mode_to) == MODE_INT
228 && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
229 {
230 PUT_MODE (all->zext, wider_mode);
231 PUT_MODE (all->wide_mult, wider_mode);
232 PUT_MODE (all->wide_lshr, wider_mode);
233 XEXP (all->wide_lshr, 1)
234 = gen_int_shift_amount (wider_mode, mode_bitsize);
235
236 set_mul_widen_cost (speed, wider_mode,
237 set_src_cost (all->wide_mult, wider_mode, speed));
238 set_mul_highpart_cost (speed, int_mode_to,
239 set_src_cost (all->wide_trunc,
240 int_mode_to, speed));
241 }
242 }
243 }
244
245 void
init_expmed(void)246 init_expmed (void)
247 {
248 struct init_expmed_rtl all;
249 machine_mode mode = QImode;
250 int m, speed;
251
252 memset (&all, 0, sizeof all);
253 for (m = 1; m < MAX_BITS_PER_WORD; m++)
254 {
255 all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
256 all.cint[m] = GEN_INT (m);
257 }
258
259 /* Avoid using hard regs in ways which may be unsupported. */
260 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
261 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
262 all.neg = gen_rtx_NEG (mode, all.reg);
263 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
264 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
265 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
266 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
267 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
268 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
269 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
270 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
271 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
272 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
273 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
274 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
275 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
276 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
277 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
278
279 for (speed = 0; speed < 2; speed++)
280 {
281 crtl->maybe_hot_insn_p = speed;
282 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
283
284 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
285 mode = (machine_mode)(mode + 1))
286 init_expmed_one_mode (&all, mode, speed);
287
288 if (MIN_MODE_PARTIAL_INT != VOIDmode)
289 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
290 mode = (machine_mode)(mode + 1))
291 init_expmed_one_mode (&all, mode, speed);
292
293 if (MIN_MODE_VECTOR_INT != VOIDmode)
294 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
295 mode = (machine_mode)(mode + 1))
296 init_expmed_one_mode (&all, mode, speed);
297 }
298
299 if (alg_hash_used_p ())
300 {
301 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
302 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
303 }
304 else
305 set_alg_hash_used_p (true);
306 default_rtl_profile ();
307
308 ggc_free (all.trunc);
309 ggc_free (all.shift_sub1);
310 ggc_free (all.shift_sub0);
311 ggc_free (all.shift_add);
312 ggc_free (all.shift_mult);
313 ggc_free (all.shift);
314 ggc_free (all.wide_trunc);
315 ggc_free (all.wide_lshr);
316 ggc_free (all.wide_mult);
317 ggc_free (all.zext);
318 ggc_free (all.smod_32);
319 ggc_free (all.sdiv_32);
320 ggc_free (all.udiv);
321 ggc_free (all.sdiv);
322 ggc_free (all.mult);
323 ggc_free (all.neg);
324 ggc_free (all.plus);
325 ggc_free (all.reg);
326 }
327
328 /* Return an rtx representing minus the value of X.
329 MODE is the intended mode of the result,
330 useful if X is a CONST_INT. */
331
332 rtx
negate_rtx(machine_mode mode,rtx x)333 negate_rtx (machine_mode mode, rtx x)
334 {
335 rtx result = simplify_unary_operation (NEG, mode, x, mode);
336
337 if (result == 0)
338 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
339
340 return result;
341 }
342
343 /* Whether reverse storage order is supported on the target. */
344 static int reverse_storage_order_supported = -1;
345
346 /* Check whether reverse storage order is supported on the target. */
347
348 static void
check_reverse_storage_order_support(void)349 check_reverse_storage_order_support (void)
350 {
351 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
352 {
353 reverse_storage_order_supported = 0;
354 sorry ("reverse scalar storage order");
355 }
356 else
357 reverse_storage_order_supported = 1;
358 }
359
360 /* Whether reverse FP storage order is supported on the target. */
361 static int reverse_float_storage_order_supported = -1;
362
363 /* Check whether reverse FP storage order is supported on the target. */
364
365 static void
check_reverse_float_storage_order_support(void)366 check_reverse_float_storage_order_support (void)
367 {
368 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
369 {
370 reverse_float_storage_order_supported = 0;
371 sorry ("reverse floating-point scalar storage order");
372 }
373 else
374 reverse_float_storage_order_supported = 1;
375 }
376
377 /* Return an rtx representing value of X with reverse storage order.
378 MODE is the intended mode of the result,
379 useful if X is a CONST_INT. */
380
381 rtx
flip_storage_order(machine_mode mode,rtx x)382 flip_storage_order (machine_mode mode, rtx x)
383 {
384 scalar_int_mode int_mode;
385 rtx result;
386
387 if (mode == QImode)
388 return x;
389
390 if (COMPLEX_MODE_P (mode))
391 {
392 rtx real = read_complex_part (x, false);
393 rtx imag = read_complex_part (x, true);
394
395 real = flip_storage_order (GET_MODE_INNER (mode), real);
396 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
397
398 return gen_rtx_CONCAT (mode, real, imag);
399 }
400
401 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
402 check_reverse_storage_order_support ();
403
404 if (!is_a <scalar_int_mode> (mode, &int_mode))
405 {
406 if (FLOAT_MODE_P (mode)
407 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
408 check_reverse_float_storage_order_support ();
409
410 if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
411 {
412 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
413 return x;
414 }
415 x = gen_lowpart (int_mode, x);
416 }
417
418 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
419 if (result == 0)
420 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
421
422 if (int_mode != mode)
423 result = gen_lowpart (mode, result);
424
425 return result;
426 }
427
428 /* If MODE is set, adjust bitfield memory MEM so that it points to the
429 first unit of mode MODE that contains a bitfield of size BITSIZE at
430 bit position BITNUM. If MODE is not set, return a BLKmode reference
431 to every byte in the bitfield. Set *NEW_BITNUM to the bit position
432 of the field within the new memory. */
433
434 static rtx
narrow_bit_field_mem(rtx mem,opt_scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT * new_bitnum)435 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
436 unsigned HOST_WIDE_INT bitsize,
437 unsigned HOST_WIDE_INT bitnum,
438 unsigned HOST_WIDE_INT *new_bitnum)
439 {
440 scalar_int_mode imode;
441 if (mode.exists (&imode))
442 {
443 unsigned int unit = GET_MODE_BITSIZE (imode);
444 *new_bitnum = bitnum % unit;
445 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
446 return adjust_bitfield_address (mem, imode, offset);
447 }
448 else
449 {
450 *new_bitnum = bitnum % BITS_PER_UNIT;
451 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
452 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
453 / BITS_PER_UNIT);
454 return adjust_bitfield_address_size (mem, BLKmode, offset, size);
455 }
456 }
457
458 /* The caller wants to perform insertion or extraction PATTERN on a
459 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
460 BITREGION_START and BITREGION_END are as for store_bit_field
461 and FIELDMODE is the natural mode of the field.
462
463 Search for a mode that is compatible with the memory access
464 restrictions and (where applicable) with a register insertion or
465 extraction. Return the new memory on success, storing the adjusted
466 bit position in *NEW_BITNUM. Return null otherwise. */
467
468 static rtx
adjust_bit_field_mem_for_reg(enum extraction_pattern pattern,rtx op0,HOST_WIDE_INT bitsize,HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,unsigned HOST_WIDE_INT * new_bitnum)469 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
470 rtx op0, HOST_WIDE_INT bitsize,
471 HOST_WIDE_INT bitnum,
472 poly_uint64 bitregion_start,
473 poly_uint64 bitregion_end,
474 machine_mode fieldmode,
475 unsigned HOST_WIDE_INT *new_bitnum)
476 {
477 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
478 bitregion_end, MEM_ALIGN (op0),
479 MEM_VOLATILE_P (op0));
480 scalar_int_mode best_mode;
481 if (iter.next_mode (&best_mode))
482 {
483 /* We can use a memory in BEST_MODE. See whether this is true for
484 any wider modes. All other things being equal, we prefer to
485 use the widest mode possible because it tends to expose more
486 CSE opportunities. */
487 if (!iter.prefer_smaller_modes ())
488 {
489 /* Limit the search to the mode required by the corresponding
490 register insertion or extraction instruction, if any. */
491 scalar_int_mode limit_mode = word_mode;
492 extraction_insn insn;
493 if (get_best_reg_extraction_insn (&insn, pattern,
494 GET_MODE_BITSIZE (best_mode),
495 fieldmode))
496 limit_mode = insn.field_mode;
497
498 scalar_int_mode wider_mode;
499 while (iter.next_mode (&wider_mode)
500 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
501 best_mode = wider_mode;
502 }
503 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
504 new_bitnum);
505 }
506 return NULL_RTX;
507 }
508
509 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
510 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
511 offset is then BITNUM / BITS_PER_UNIT. */
512
513 static bool
lowpart_bit_field_p(poly_uint64 bitnum,poly_uint64 bitsize,machine_mode struct_mode)514 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
515 machine_mode struct_mode)
516 {
517 poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
518 if (BYTES_BIG_ENDIAN)
519 return (multiple_p (bitnum, BITS_PER_UNIT)
520 && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
521 || multiple_p (bitnum + bitsize,
522 regsize * BITS_PER_UNIT)));
523 else
524 return multiple_p (bitnum, regsize * BITS_PER_UNIT);
525 }
526
527 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
528 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
529 Return false if the access would touch memory outside the range
530 BITREGION_START to BITREGION_END for conformance to the C++ memory
531 model. */
532
533 static bool
strict_volatile_bitfield_p(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,scalar_int_mode fieldmode,poly_uint64 bitregion_start,poly_uint64 bitregion_end)534 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
535 unsigned HOST_WIDE_INT bitnum,
536 scalar_int_mode fieldmode,
537 poly_uint64 bitregion_start,
538 poly_uint64 bitregion_end)
539 {
540 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
541
542 /* -fstrict-volatile-bitfields must be enabled and we must have a
543 volatile MEM. */
544 if (!MEM_P (op0)
545 || !MEM_VOLATILE_P (op0)
546 || flag_strict_volatile_bitfields <= 0)
547 return false;
548
549 /* The bit size must not be larger than the field mode, and
550 the field mode must not be larger than a word. */
551 if (bitsize > modesize || modesize > BITS_PER_WORD)
552 return false;
553
554 /* Check for cases of unaligned fields that must be split. */
555 if (bitnum % modesize + bitsize > modesize)
556 return false;
557
558 /* The memory must be sufficiently aligned for a MODESIZE access.
559 This condition guarantees, that the memory access will not
560 touch anything after the end of the structure. */
561 if (MEM_ALIGN (op0) < modesize)
562 return false;
563
564 /* Check for cases where the C++ memory model applies. */
565 if (maybe_ne (bitregion_end, 0U)
566 && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
567 || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
568 bitregion_end)))
569 return false;
570
571 return true;
572 }
573
574 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
575 bit number BITNUM can be treated as a simple value of mode MODE.
576 Store the byte offset in *BYTENUM if so. */
577
578 static bool
simple_mem_bitfield_p(rtx op0,poly_uint64 bitsize,poly_uint64 bitnum,machine_mode mode,poly_uint64 * bytenum)579 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
580 machine_mode mode, poly_uint64 *bytenum)
581 {
582 return (MEM_P (op0)
583 && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
584 && known_eq (bitsize, GET_MODE_BITSIZE (mode))
585 && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
586 || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
587 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
588 }
589
590 /* Try to use instruction INSV to store VALUE into a field of OP0.
591 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
592 BLKmode MEM. VALUE_MODE is the mode of VALUE. BITSIZE and BITNUM
593 are as for store_bit_field. */
594
595 static bool
store_bit_field_using_insv(const extraction_insn * insv,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,scalar_int_mode value_mode)596 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
597 opt_scalar_int_mode op0_mode,
598 unsigned HOST_WIDE_INT bitsize,
599 unsigned HOST_WIDE_INT bitnum,
600 rtx value, scalar_int_mode value_mode)
601 {
602 struct expand_operand ops[4];
603 rtx value1;
604 rtx xop0 = op0;
605 rtx_insn *last = get_last_insn ();
606 bool copy_back = false;
607
608 scalar_int_mode op_mode = insv->field_mode;
609 unsigned int unit = GET_MODE_BITSIZE (op_mode);
610 if (bitsize == 0 || bitsize > unit)
611 return false;
612
613 if (MEM_P (xop0))
614 /* Get a reference to the first byte of the field. */
615 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
616 &bitnum);
617 else
618 {
619 /* Convert from counting within OP0 to counting in OP_MODE. */
620 if (BYTES_BIG_ENDIAN)
621 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
622
623 /* If xop0 is a register, we need it in OP_MODE
624 to make it acceptable to the format of insv. */
625 if (GET_CODE (xop0) == SUBREG)
626 {
627 /* If such a SUBREG can't be created, give up. */
628 if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
629 SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
630 return false;
631 /* We can't just change the mode, because this might clobber op0,
632 and we will need the original value of op0 if insv fails. */
633 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
634 SUBREG_BYTE (xop0));
635 }
636 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
637 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
638 }
639
640 /* If the destination is a paradoxical subreg such that we need a
641 truncate to the inner mode, perform the insertion on a temporary and
642 truncate the result to the original destination. Note that we can't
643 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
644 X) 0)) is (reg:N X). */
645 if (GET_CODE (xop0) == SUBREG
646 && REG_P (SUBREG_REG (xop0))
647 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
648 op_mode))
649 {
650 rtx tem = gen_reg_rtx (op_mode);
651 emit_move_insn (tem, xop0);
652 xop0 = tem;
653 copy_back = true;
654 }
655
656 /* There are similar overflow check at the start of store_bit_field_1,
657 but that only check the situation where the field lies completely
658 outside the register, while there do have situation where the field
659 lies partialy in the register, we need to adjust bitsize for this
660 partial overflow situation. Without this fix, pr48335-2.c on big-endian
661 will broken on those arch support bit insert instruction, like arm, aarch64
662 etc. */
663 if (bitsize + bitnum > unit && bitnum < unit)
664 {
665 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
666 "destination object, data truncated into %wu-bit",
667 bitsize, unit - bitnum);
668 bitsize = unit - bitnum;
669 }
670
671 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
672 "backwards" from the size of the unit we are inserting into.
673 Otherwise, we count bits from the most significant on a
674 BYTES/BITS_BIG_ENDIAN machine. */
675
676 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
677 bitnum = unit - bitsize - bitnum;
678
679 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
680 value1 = value;
681 if (value_mode != op_mode)
682 {
683 if (GET_MODE_BITSIZE (value_mode) >= bitsize)
684 {
685 rtx tmp;
686 /* Optimization: Don't bother really extending VALUE
687 if it has all the bits we will actually use. However,
688 if we must narrow it, be sure we do it correctly. */
689
690 if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
691 {
692 tmp = simplify_subreg (op_mode, value1, value_mode, 0);
693 if (! tmp)
694 tmp = simplify_gen_subreg (op_mode,
695 force_reg (value_mode, value1),
696 value_mode, 0);
697 }
698 else
699 {
700 tmp = gen_lowpart_if_possible (op_mode, value1);
701 if (! tmp)
702 tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
703 }
704 value1 = tmp;
705 }
706 else if (CONST_INT_P (value))
707 value1 = gen_int_mode (INTVAL (value), op_mode);
708 else
709 /* Parse phase is supposed to make VALUE's data type
710 match that of the component reference, which is a type
711 at least as wide as the field; so VALUE should have
712 a mode that corresponds to that type. */
713 gcc_assert (CONSTANT_P (value));
714 }
715
716 create_fixed_operand (&ops[0], xop0);
717 create_integer_operand (&ops[1], bitsize);
718 create_integer_operand (&ops[2], bitnum);
719 create_input_operand (&ops[3], value1, op_mode);
720 if (maybe_expand_insn (insv->icode, 4, ops))
721 {
722 if (copy_back)
723 convert_move (op0, xop0, true);
724 return true;
725 }
726 delete_insns_since (last);
727 return false;
728 }
729
730 /* A subroutine of store_bit_field, with the same arguments. Return true
731 if the operation could be implemented.
732
733 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
734 no other way of implementing the operation. If FALLBACK_P is false,
735 return false instead. */
736
737 static bool
store_bit_field_1(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)738 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
739 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
740 machine_mode fieldmode,
741 rtx value, bool reverse, bool fallback_p)
742 {
743 rtx op0 = str_rtx;
744
745 while (GET_CODE (op0) == SUBREG)
746 {
747 bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
748 op0 = SUBREG_REG (op0);
749 }
750
751 /* No action is needed if the target is a register and if the field
752 lies completely outside that register. This can occur if the source
753 code contains an out-of-bounds access to a small array. */
754 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
755 return true;
756
757 /* Use vec_set patterns for inserting parts of vectors whenever
758 available. */
759 machine_mode outermode = GET_MODE (op0);
760 scalar_mode innermode = GET_MODE_INNER (outermode);
761 poly_uint64 pos;
762 if (VECTOR_MODE_P (outermode)
763 && !MEM_P (op0)
764 && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
765 && fieldmode == innermode
766 && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
767 && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
768 {
769 struct expand_operand ops[3];
770 enum insn_code icode = optab_handler (vec_set_optab, outermode);
771
772 create_fixed_operand (&ops[0], op0);
773 create_input_operand (&ops[1], value, innermode);
774 create_integer_operand (&ops[2], pos);
775 if (maybe_expand_insn (icode, 3, ops))
776 return true;
777 }
778
779 /* If the target is a register, overwriting the entire object, or storing
780 a full-word or multi-word field can be done with just a SUBREG. */
781 if (!MEM_P (op0)
782 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
783 {
784 /* Use the subreg machinery either to narrow OP0 to the required
785 words or to cope with mode punning between equal-sized modes.
786 In the latter case, use subreg on the rhs side, not lhs. */
787 rtx sub;
788 HOST_WIDE_INT regnum;
789 poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
790 if (known_eq (bitnum, 0U)
791 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
792 {
793 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
794 if (sub)
795 {
796 if (reverse)
797 sub = flip_storage_order (GET_MODE (op0), sub);
798 emit_move_insn (op0, sub);
799 return true;
800 }
801 }
802 else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, ®num)
803 && multiple_p (bitsize, regsize * BITS_PER_UNIT))
804 {
805 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
806 regnum * regsize);
807 if (sub)
808 {
809 if (reverse)
810 value = flip_storage_order (fieldmode, value);
811 emit_move_insn (sub, value);
812 return true;
813 }
814 }
815 }
816
817 /* If the target is memory, storing any naturally aligned field can be
818 done with a simple store. For targets that support fast unaligned
819 memory, any naturally sized, unit aligned field can be done directly. */
820 poly_uint64 bytenum;
821 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
822 {
823 op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
824 if (reverse)
825 value = flip_storage_order (fieldmode, value);
826 emit_move_insn (op0, value);
827 return true;
828 }
829
830 /* It's possible we'll need to handle other cases here for
831 polynomial bitnum and bitsize. */
832
833 /* From here on we need to be looking at a fixed-size insertion. */
834 unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
835 unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
836
837 /* Make sure we are playing with integral modes. Pun with subregs
838 if we aren't. This must come after the entire register case above,
839 since that case is valid for any mode. The following cases are only
840 valid for integral modes. */
841 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
842 scalar_int_mode imode;
843 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
844 {
845 if (MEM_P (op0))
846 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
847 0, MEM_SIZE (op0));
848 else if (!op0_mode.exists ())
849 {
850 if (ibitnum == 0
851 && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
852 && MEM_P (value)
853 && !reverse)
854 {
855 value = adjust_address (value, GET_MODE (op0), 0);
856 emit_move_insn (op0, value);
857 return true;
858 }
859 if (!fallback_p)
860 return false;
861 rtx temp = assign_stack_temp (GET_MODE (op0),
862 GET_MODE_SIZE (GET_MODE (op0)));
863 emit_move_insn (temp, op0);
864 store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
865 reverse, fallback_p);
866 emit_move_insn (op0, temp);
867 return true;
868 }
869 else
870 op0 = gen_lowpart (op0_mode.require (), op0);
871 }
872
873 return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
874 bitregion_start, bitregion_end,
875 fieldmode, value, reverse, fallback_p);
876 }
877
878 /* Subroutine of store_bit_field_1, with the same arguments, except
879 that BITSIZE and BITNUM are constant. Handle cases specific to
880 integral modes. If OP0_MODE is defined, it is the mode of OP0,
881 otherwise OP0 is a BLKmode MEM. */
882
883 static bool
store_integral_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)884 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
885 unsigned HOST_WIDE_INT bitsize,
886 unsigned HOST_WIDE_INT bitnum,
887 poly_uint64 bitregion_start,
888 poly_uint64 bitregion_end,
889 machine_mode fieldmode,
890 rtx value, bool reverse, bool fallback_p)
891 {
892 /* Storing an lsb-aligned field in a register
893 can be done with a movstrict instruction. */
894
895 if (!MEM_P (op0)
896 && !reverse
897 && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
898 && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
899 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
900 {
901 struct expand_operand ops[2];
902 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
903 rtx arg0 = op0;
904 unsigned HOST_WIDE_INT subreg_off;
905
906 if (GET_CODE (arg0) == SUBREG)
907 {
908 /* Else we've got some float mode source being extracted into
909 a different float mode destination -- this combination of
910 subregs results in Severe Tire Damage. */
911 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
912 || GET_MODE_CLASS (fieldmode) == MODE_INT
913 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
914 arg0 = SUBREG_REG (arg0);
915 }
916
917 subreg_off = bitnum / BITS_PER_UNIT;
918 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
919 {
920 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
921
922 create_fixed_operand (&ops[0], arg0);
923 /* Shrink the source operand to FIELDMODE. */
924 create_convert_operand_to (&ops[1], value, fieldmode, false);
925 if (maybe_expand_insn (icode, 2, ops))
926 return true;
927 }
928 }
929
930 /* Handle fields bigger than a word. */
931
932 if (bitsize > BITS_PER_WORD)
933 {
934 /* Here we transfer the words of the field
935 in the order least significant first.
936 This is because the most significant word is the one which may
937 be less than full.
938 However, only do that if the value is not BLKmode. */
939
940 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
941 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
942 unsigned int i;
943 rtx_insn *last;
944
945 /* This is the mode we must force value to, so that there will be enough
946 subwords to extract. Note that fieldmode will often (always?) be
947 VOIDmode, because that is what store_field uses to indicate that this
948 is a bit field, but passing VOIDmode to operand_subword_force
949 is not allowed.
950
951 The mode must be fixed-size, since insertions into variable-sized
952 objects are meant to be handled before calling this function. */
953 fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
954 if (value_mode == VOIDmode)
955 value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
956
957 last = get_last_insn ();
958 for (i = 0; i < nwords; i++)
959 {
960 /* If I is 0, use the low-order word in both field and target;
961 if I is 1, use the next to lowest word; and so on. */
962 unsigned int wordnum = (backwards
963 ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD
964 - i - 1
965 : i);
966 unsigned int bit_offset = (backwards ^ reverse
967 ? MAX ((int) bitsize - ((int) i + 1)
968 * BITS_PER_WORD,
969 0)
970 : (int) i * BITS_PER_WORD);
971 rtx value_word = operand_subword_force (value, wordnum, value_mode);
972 unsigned HOST_WIDE_INT new_bitsize =
973 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
974
975 /* If the remaining chunk doesn't have full wordsize we have
976 to make sure that for big-endian machines the higher order
977 bits are used. */
978 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
979 {
980 int shift = BITS_PER_WORD - new_bitsize;
981 rtx shift_rtx = gen_int_shift_amount (word_mode, shift);
982 value_word = simplify_expand_binop (word_mode, lshr_optab,
983 value_word, shift_rtx,
984 NULL_RTX, true,
985 OPTAB_LIB_WIDEN);
986 }
987
988 if (!store_bit_field_1 (op0, new_bitsize,
989 bitnum + bit_offset,
990 bitregion_start, bitregion_end,
991 word_mode,
992 value_word, reverse, fallback_p))
993 {
994 delete_insns_since (last);
995 return false;
996 }
997 }
998 return true;
999 }
1000
1001 /* If VALUE has a floating-point or complex mode, access it as an
1002 integer of the corresponding size. This can occur on a machine
1003 with 64 bit registers that uses SFmode for float. It can also
1004 occur for unaligned float or complex fields. */
1005 rtx orig_value = value;
1006 scalar_int_mode value_mode;
1007 if (GET_MODE (value) == VOIDmode)
1008 /* By this point we've dealt with values that are bigger than a word,
1009 so word_mode is a conservatively correct choice. */
1010 value_mode = word_mode;
1011 else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1012 {
1013 value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1014 value = gen_reg_rtx (value_mode);
1015 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1016 }
1017
1018 /* If OP0 is a multi-word register, narrow it to the affected word.
1019 If the region spans two words, defer to store_split_bit_field.
1020 Don't do this if op0 is a single hard register wider than word
1021 such as a float or vector register. */
1022 if (!MEM_P (op0)
1023 && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1024 && (!REG_P (op0)
1025 || !HARD_REGISTER_P (op0)
1026 || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1027 {
1028 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1029 {
1030 if (!fallback_p)
1031 return false;
1032
1033 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1034 bitregion_start, bitregion_end,
1035 value, value_mode, reverse);
1036 return true;
1037 }
1038 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1039 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1040 gcc_assert (op0);
1041 op0_mode = word_mode;
1042 bitnum %= BITS_PER_WORD;
1043 }
1044
1045 /* From here on we can assume that the field to be stored in fits
1046 within a word. If the destination is a register, it too fits
1047 in a word. */
1048
1049 extraction_insn insv;
1050 if (!MEM_P (op0)
1051 && !reverse
1052 && get_best_reg_extraction_insn (&insv, EP_insv,
1053 GET_MODE_BITSIZE (op0_mode.require ()),
1054 fieldmode)
1055 && store_bit_field_using_insv (&insv, op0, op0_mode,
1056 bitsize, bitnum, value, value_mode))
1057 return true;
1058
1059 /* If OP0 is a memory, try copying it to a register and seeing if a
1060 cheap register alternative is available. */
1061 if (MEM_P (op0) && !reverse)
1062 {
1063 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1064 fieldmode)
1065 && store_bit_field_using_insv (&insv, op0, op0_mode,
1066 bitsize, bitnum, value, value_mode))
1067 return true;
1068
1069 rtx_insn *last = get_last_insn ();
1070
1071 /* Try loading part of OP0 into a register, inserting the bitfield
1072 into that, and then copying the result back to OP0. */
1073 unsigned HOST_WIDE_INT bitpos;
1074 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1075 bitregion_start, bitregion_end,
1076 fieldmode, &bitpos);
1077 if (xop0)
1078 {
1079 rtx tempreg = copy_to_reg (xop0);
1080 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1081 bitregion_start, bitregion_end,
1082 fieldmode, orig_value, reverse, false))
1083 {
1084 emit_move_insn (xop0, tempreg);
1085 return true;
1086 }
1087 delete_insns_since (last);
1088 }
1089 }
1090
1091 if (!fallback_p)
1092 return false;
1093
1094 store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1095 bitregion_end, value, value_mode, reverse);
1096 return true;
1097 }
1098
1099 /* Generate code to store value from rtx VALUE
1100 into a bit-field within structure STR_RTX
1101 containing BITSIZE bits starting at bit BITNUM.
1102
1103 BITREGION_START is bitpos of the first bitfield in this region.
1104 BITREGION_END is the bitpos of the ending bitfield in this region.
1105 These two fields are 0, if the C++ memory model does not apply,
1106 or we are not interested in keeping track of bitfield regions.
1107
1108 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1109
1110 If REVERSE is true, the store is to be done in reverse order. */
1111
1112 void
store_bit_field(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse)1113 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1114 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1115 machine_mode fieldmode,
1116 rtx value, bool reverse)
1117 {
1118 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1119 unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1120 scalar_int_mode int_mode;
1121 if (bitsize.is_constant (&ibitsize)
1122 && bitnum.is_constant (&ibitnum)
1123 && is_a <scalar_int_mode> (fieldmode, &int_mode)
1124 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1125 bitregion_start, bitregion_end))
1126 {
1127 /* Storing of a full word can be done with a simple store.
1128 We know here that the field can be accessed with one single
1129 instruction. For targets that support unaligned memory,
1130 an unaligned access may be necessary. */
1131 if (ibitsize == GET_MODE_BITSIZE (int_mode))
1132 {
1133 str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1134 ibitnum / BITS_PER_UNIT);
1135 if (reverse)
1136 value = flip_storage_order (int_mode, value);
1137 gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1138 emit_move_insn (str_rtx, value);
1139 }
1140 else
1141 {
1142 rtx temp;
1143
1144 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1145 ibitnum, &ibitnum);
1146 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1147 temp = copy_to_reg (str_rtx);
1148 if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1149 int_mode, value, reverse, true))
1150 gcc_unreachable ();
1151
1152 emit_move_insn (str_rtx, temp);
1153 }
1154
1155 return;
1156 }
1157
1158 /* Under the C++0x memory model, we must not touch bits outside the
1159 bit region. Adjust the address to start at the beginning of the
1160 bit region. */
1161 if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1162 {
1163 scalar_int_mode best_mode;
1164 machine_mode addr_mode = VOIDmode;
1165
1166 poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1167 bitnum -= bitregion_start;
1168 poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1169 bitregion_end -= bitregion_start;
1170 bitregion_start = 0;
1171 if (bitsize.is_constant (&ibitsize)
1172 && bitnum.is_constant (&ibitnum)
1173 && get_best_mode (ibitsize, ibitnum,
1174 bitregion_start, bitregion_end,
1175 MEM_ALIGN (str_rtx), INT_MAX,
1176 MEM_VOLATILE_P (str_rtx), &best_mode))
1177 addr_mode = best_mode;
1178 str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1179 offset, size);
1180 }
1181
1182 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1183 bitregion_start, bitregion_end,
1184 fieldmode, value, reverse, true))
1185 gcc_unreachable ();
1186 }
1187
1188 /* Use shifts and boolean operations to store VALUE into a bit field of
1189 width BITSIZE in OP0, starting at bit BITNUM. If OP0_MODE is defined,
1190 it is the mode of OP0, otherwise OP0 is a BLKmode MEM. VALUE_MODE is
1191 the mode of VALUE.
1192
1193 If REVERSE is true, the store is to be done in reverse order. */
1194
1195 static void
store_fixed_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,rtx value,scalar_int_mode value_mode,bool reverse)1196 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1197 unsigned HOST_WIDE_INT bitsize,
1198 unsigned HOST_WIDE_INT bitnum,
1199 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1200 rtx value, scalar_int_mode value_mode, bool reverse)
1201 {
1202 /* There is a case not handled here:
1203 a structure with a known alignment of just a halfword
1204 and a field split across two aligned halfwords within the structure.
1205 Or likewise a structure with a known alignment of just a byte
1206 and a field split across two bytes.
1207 Such cases are not supposed to be able to occur. */
1208
1209 scalar_int_mode best_mode;
1210 if (MEM_P (op0))
1211 {
1212 unsigned int max_bitsize = BITS_PER_WORD;
1213 scalar_int_mode imode;
1214 if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1215 max_bitsize = GET_MODE_BITSIZE (imode);
1216
1217 if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1218 MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1219 &best_mode))
1220 {
1221 /* The only way this should occur is if the field spans word
1222 boundaries. */
1223 store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1224 bitregion_start, bitregion_end,
1225 value, value_mode, reverse);
1226 return;
1227 }
1228
1229 op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1230 }
1231 else
1232 best_mode = op0_mode.require ();
1233
1234 store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1235 value, value_mode, reverse);
1236 }
1237
1238 /* Helper function for store_fixed_bit_field, stores
1239 the bit field always using MODE, which is the mode of OP0. The other
1240 arguments are as for store_fixed_bit_field. */
1241
1242 static void
store_fixed_bit_field_1(rtx op0,scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,scalar_int_mode value_mode,bool reverse)1243 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1244 unsigned HOST_WIDE_INT bitsize,
1245 unsigned HOST_WIDE_INT bitnum,
1246 rtx value, scalar_int_mode value_mode, bool reverse)
1247 {
1248 rtx temp;
1249 int all_zero = 0;
1250 int all_one = 0;
1251
1252 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1253 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1254
1255 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1256 /* BITNUM is the distance between our msb
1257 and that of the containing datum.
1258 Convert it to the distance from the lsb. */
1259 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1260
1261 /* Now BITNUM is always the distance between our lsb
1262 and that of OP0. */
1263
1264 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1265 we must first convert its mode to MODE. */
1266
1267 if (CONST_INT_P (value))
1268 {
1269 unsigned HOST_WIDE_INT v = UINTVAL (value);
1270
1271 if (bitsize < HOST_BITS_PER_WIDE_INT)
1272 v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1273
1274 if (v == 0)
1275 all_zero = 1;
1276 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1277 && v == (HOST_WIDE_INT_1U << bitsize) - 1)
1278 || (bitsize == HOST_BITS_PER_WIDE_INT
1279 && v == HOST_WIDE_INT_M1U))
1280 all_one = 1;
1281
1282 value = lshift_value (mode, v, bitnum);
1283 }
1284 else
1285 {
1286 int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1287 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1288
1289 if (value_mode != mode)
1290 value = convert_to_mode (mode, value, 1);
1291
1292 if (must_and)
1293 value = expand_binop (mode, and_optab, value,
1294 mask_rtx (mode, 0, bitsize, 0),
1295 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1296 if (bitnum > 0)
1297 value = expand_shift (LSHIFT_EXPR, mode, value,
1298 bitnum, NULL_RTX, 1);
1299 }
1300
1301 if (reverse)
1302 value = flip_storage_order (mode, value);
1303
1304 /* Now clear the chosen bits in OP0,
1305 except that if VALUE is -1 we need not bother. */
1306 /* We keep the intermediates in registers to allow CSE to combine
1307 consecutive bitfield assignments. */
1308
1309 temp = force_reg (mode, op0);
1310
1311 if (! all_one)
1312 {
1313 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1314 if (reverse)
1315 mask = flip_storage_order (mode, mask);
1316 temp = expand_binop (mode, and_optab, temp, mask,
1317 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1318 temp = force_reg (mode, temp);
1319 }
1320
1321 /* Now logical-or VALUE into OP0, unless it is zero. */
1322
1323 if (! all_zero)
1324 {
1325 temp = expand_binop (mode, ior_optab, temp, value,
1326 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1327 temp = force_reg (mode, temp);
1328 }
1329
1330 if (op0 != temp)
1331 {
1332 op0 = copy_rtx (op0);
1333 emit_move_insn (op0, temp);
1334 }
1335 }
1336
1337 /* Store a bit field that is split across multiple accessible memory objects.
1338
1339 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1340 BITSIZE is the field width; BITPOS the position of its first bit
1341 (within the word).
1342 VALUE is the value to store, which has mode VALUE_MODE.
1343 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1344 a BLKmode MEM.
1345
1346 If REVERSE is true, the store is to be done in reverse order.
1347
1348 This does not yet handle fields wider than BITS_PER_WORD. */
1349
1350 static void
store_split_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,poly_uint64 bitregion_start,poly_uint64 bitregion_end,rtx value,scalar_int_mode value_mode,bool reverse)1351 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1352 unsigned HOST_WIDE_INT bitsize,
1353 unsigned HOST_WIDE_INT bitpos,
1354 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1355 rtx value, scalar_int_mode value_mode, bool reverse)
1356 {
1357 unsigned int unit, total_bits, bitsdone = 0;
1358
1359 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1360 much at a time. */
1361 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1362 unit = BITS_PER_WORD;
1363 else
1364 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1365
1366 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1367 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1368 again, and we will mutually recurse forever. */
1369 if (MEM_P (op0) && op0_mode.exists ())
1370 unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1371
1372 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1373 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1374 that VALUE might be a floating-point constant. */
1375 if (CONSTANT_P (value) && !CONST_INT_P (value))
1376 {
1377 rtx word = gen_lowpart_common (word_mode, value);
1378
1379 if (word && (value != word))
1380 value = word;
1381 else
1382 value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1383 value_mode = word_mode;
1384 }
1385
1386 total_bits = GET_MODE_BITSIZE (value_mode);
1387
1388 while (bitsdone < bitsize)
1389 {
1390 unsigned HOST_WIDE_INT thissize;
1391 unsigned HOST_WIDE_INT thispos;
1392 unsigned HOST_WIDE_INT offset;
1393 rtx part;
1394
1395 offset = (bitpos + bitsdone) / unit;
1396 thispos = (bitpos + bitsdone) % unit;
1397
1398 /* When region of bytes we can touch is restricted, decrease
1399 UNIT close to the end of the region as needed. If op0 is a REG
1400 or SUBREG of REG, don't do this, as there can't be data races
1401 on a register and we can expand shorter code in some cases. */
1402 if (maybe_ne (bitregion_end, 0U)
1403 && unit > BITS_PER_UNIT
1404 && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1405 && !REG_P (op0)
1406 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1407 {
1408 unit = unit / 2;
1409 continue;
1410 }
1411
1412 /* THISSIZE must not overrun a word boundary. Otherwise,
1413 store_fixed_bit_field will call us again, and we will mutually
1414 recurse forever. */
1415 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1416 thissize = MIN (thissize, unit - thispos);
1417
1418 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1419 {
1420 /* Fetch successively less significant portions. */
1421 if (CONST_INT_P (value))
1422 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1423 >> (bitsize - bitsdone - thissize))
1424 & ((HOST_WIDE_INT_1 << thissize) - 1));
1425 /* Likewise, but the source is little-endian. */
1426 else if (reverse)
1427 part = extract_fixed_bit_field (word_mode, value, value_mode,
1428 thissize,
1429 bitsize - bitsdone - thissize,
1430 NULL_RTX, 1, false);
1431 else
1432 /* The args are chosen so that the last part includes the
1433 lsb. Give extract_bit_field the value it needs (with
1434 endianness compensation) to fetch the piece we want. */
1435 part = extract_fixed_bit_field (word_mode, value, value_mode,
1436 thissize,
1437 total_bits - bitsize + bitsdone,
1438 NULL_RTX, 1, false);
1439 }
1440 else
1441 {
1442 /* Fetch successively more significant portions. */
1443 if (CONST_INT_P (value))
1444 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1445 >> bitsdone)
1446 & ((HOST_WIDE_INT_1 << thissize) - 1));
1447 /* Likewise, but the source is big-endian. */
1448 else if (reverse)
1449 part = extract_fixed_bit_field (word_mode, value, value_mode,
1450 thissize,
1451 total_bits - bitsdone - thissize,
1452 NULL_RTX, 1, false);
1453 else
1454 part = extract_fixed_bit_field (word_mode, value, value_mode,
1455 thissize, bitsdone, NULL_RTX,
1456 1, false);
1457 }
1458
1459 /* If OP0 is a register, then handle OFFSET here. */
1460 rtx op0_piece = op0;
1461 opt_scalar_int_mode op0_piece_mode = op0_mode;
1462 if (SUBREG_P (op0) || REG_P (op0))
1463 {
1464 scalar_int_mode imode;
1465 if (op0_mode.exists (&imode)
1466 && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1467 {
1468 if (offset)
1469 op0_piece = const0_rtx;
1470 }
1471 else
1472 {
1473 op0_piece = operand_subword_force (op0,
1474 offset * unit / BITS_PER_WORD,
1475 GET_MODE (op0));
1476 op0_piece_mode = word_mode;
1477 }
1478 offset &= BITS_PER_WORD / unit - 1;
1479 }
1480
1481 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1482 it is just an out-of-bounds access. Ignore it. */
1483 if (op0_piece != const0_rtx)
1484 store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1485 offset * unit + thispos, bitregion_start,
1486 bitregion_end, part, word_mode, reverse);
1487 bitsdone += thissize;
1488 }
1489 }
1490
1491 /* A subroutine of extract_bit_field_1 that converts return value X
1492 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1493 to extract_bit_field. */
1494
1495 static rtx
convert_extracted_bit_field(rtx x,machine_mode mode,machine_mode tmode,bool unsignedp)1496 convert_extracted_bit_field (rtx x, machine_mode mode,
1497 machine_mode tmode, bool unsignedp)
1498 {
1499 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1500 return x;
1501
1502 /* If the x mode is not a scalar integral, first convert to the
1503 integer mode of that size and then access it as a floating-point
1504 value via a SUBREG. */
1505 if (!SCALAR_INT_MODE_P (tmode))
1506 {
1507 scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1508 x = convert_to_mode (int_mode, x, unsignedp);
1509 x = force_reg (int_mode, x);
1510 return gen_lowpart (tmode, x);
1511 }
1512
1513 return convert_to_mode (tmode, x, unsignedp);
1514 }
1515
1516 /* Try to use an ext(z)v pattern to extract a field from OP0.
1517 Return the extracted value on success, otherwise return null.
1518 EXTV describes the extraction instruction to use. If OP0_MODE
1519 is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1520 The other arguments are as for extract_bit_field. */
1521
1522 static rtx
extract_bit_field_using_extv(const extraction_insn * extv,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode)1523 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1524 opt_scalar_int_mode op0_mode,
1525 unsigned HOST_WIDE_INT bitsize,
1526 unsigned HOST_WIDE_INT bitnum,
1527 int unsignedp, rtx target,
1528 machine_mode mode, machine_mode tmode)
1529 {
1530 struct expand_operand ops[4];
1531 rtx spec_target = target;
1532 rtx spec_target_subreg = 0;
1533 scalar_int_mode ext_mode = extv->field_mode;
1534 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1535
1536 if (bitsize == 0 || unit < bitsize)
1537 return NULL_RTX;
1538
1539 if (MEM_P (op0))
1540 /* Get a reference to the first byte of the field. */
1541 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1542 &bitnum);
1543 else
1544 {
1545 /* Convert from counting within OP0 to counting in EXT_MODE. */
1546 if (BYTES_BIG_ENDIAN)
1547 bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1548
1549 /* If op0 is a register, we need it in EXT_MODE to make it
1550 acceptable to the format of ext(z)v. */
1551 if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1552 return NULL_RTX;
1553 if (REG_P (op0) && op0_mode.require () != ext_mode)
1554 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1555 }
1556
1557 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1558 "backwards" from the size of the unit we are extracting from.
1559 Otherwise, we count bits from the most significant on a
1560 BYTES/BITS_BIG_ENDIAN machine. */
1561
1562 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1563 bitnum = unit - bitsize - bitnum;
1564
1565 if (target == 0)
1566 target = spec_target = gen_reg_rtx (tmode);
1567
1568 if (GET_MODE (target) != ext_mode)
1569 {
1570 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1571 between the mode of the extraction (word_mode) and the target
1572 mode. Instead, create a temporary and use convert_move to set
1573 the target. */
1574 if (REG_P (target)
1575 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1576 {
1577 target = gen_lowpart (ext_mode, target);
1578 if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1579 spec_target_subreg = target;
1580 }
1581 else
1582 target = gen_reg_rtx (ext_mode);
1583 }
1584
1585 create_output_operand (&ops[0], target, ext_mode);
1586 create_fixed_operand (&ops[1], op0);
1587 create_integer_operand (&ops[2], bitsize);
1588 create_integer_operand (&ops[3], bitnum);
1589 if (maybe_expand_insn (extv->icode, 4, ops))
1590 {
1591 target = ops[0].value;
1592 if (target == spec_target)
1593 return target;
1594 if (target == spec_target_subreg)
1595 return spec_target;
1596 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1597 }
1598 return NULL_RTX;
1599 }
1600
1601 /* See whether it would be valid to extract the part of OP0 described
1602 by BITNUM and BITSIZE into a value of mode MODE using a subreg
1603 operation. Return the subreg if so, otherwise return null. */
1604
1605 static rtx
extract_bit_field_as_subreg(machine_mode mode,rtx op0,poly_uint64 bitsize,poly_uint64 bitnum)1606 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1607 poly_uint64 bitsize, poly_uint64 bitnum)
1608 {
1609 poly_uint64 bytenum;
1610 if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1611 && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1612 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1613 && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1614 return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1615 return NULL_RTX;
1616 }
1617
1618 /* A subroutine of extract_bit_field, with the same arguments.
1619 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1620 if we can find no other means of implementing the operation.
1621 if FALLBACK_P is false, return NULL instead. */
1622
1623 static rtx
extract_bit_field_1(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p,rtx * alt_rtl)1624 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1625 int unsignedp, rtx target, machine_mode mode,
1626 machine_mode tmode, bool reverse, bool fallback_p,
1627 rtx *alt_rtl)
1628 {
1629 rtx op0 = str_rtx;
1630 machine_mode mode1;
1631
1632 if (tmode == VOIDmode)
1633 tmode = mode;
1634
1635 while (GET_CODE (op0) == SUBREG)
1636 {
1637 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1638 op0 = SUBREG_REG (op0);
1639 }
1640
1641 /* If we have an out-of-bounds access to a register, just return an
1642 uninitialized register of the required mode. This can occur if the
1643 source code contains an out-of-bounds access to a small array. */
1644 if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1645 return gen_reg_rtx (tmode);
1646
1647 if (REG_P (op0)
1648 && mode == GET_MODE (op0)
1649 && known_eq (bitnum, 0U)
1650 && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1651 {
1652 if (reverse)
1653 op0 = flip_storage_order (mode, op0);
1654 /* We're trying to extract a full register from itself. */
1655 return op0;
1656 }
1657
1658 /* First try to check for vector from vector extractions. */
1659 if (VECTOR_MODE_P (GET_MODE (op0))
1660 && !MEM_P (op0)
1661 && VECTOR_MODE_P (tmode)
1662 && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1663 && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1664 {
1665 machine_mode new_mode = GET_MODE (op0);
1666 if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1667 {
1668 scalar_mode inner_mode = GET_MODE_INNER (tmode);
1669 poly_uint64 nunits;
1670 if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1671 GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1672 || !mode_for_vector (inner_mode, nunits).exists (&new_mode)
1673 || !VECTOR_MODE_P (new_mode)
1674 || maybe_ne (GET_MODE_SIZE (new_mode),
1675 GET_MODE_SIZE (GET_MODE (op0)))
1676 || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1677 || !targetm.vector_mode_supported_p (new_mode))
1678 new_mode = VOIDmode;
1679 }
1680 poly_uint64 pos;
1681 if (new_mode != VOIDmode
1682 && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1683 != CODE_FOR_nothing)
1684 && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1685 {
1686 struct expand_operand ops[3];
1687 machine_mode outermode = new_mode;
1688 machine_mode innermode = tmode;
1689 enum insn_code icode
1690 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1691
1692 if (new_mode != GET_MODE (op0))
1693 op0 = gen_lowpart (new_mode, op0);
1694 create_output_operand (&ops[0], target, innermode);
1695 ops[0].target = 1;
1696 create_input_operand (&ops[1], op0, outermode);
1697 create_integer_operand (&ops[2], pos);
1698 if (maybe_expand_insn (icode, 3, ops))
1699 {
1700 if (alt_rtl && ops[0].target)
1701 *alt_rtl = target;
1702 target = ops[0].value;
1703 if (GET_MODE (target) != mode)
1704 return gen_lowpart (tmode, target);
1705 return target;
1706 }
1707 }
1708 }
1709
1710 /* See if we can get a better vector mode before extracting. */
1711 if (VECTOR_MODE_P (GET_MODE (op0))
1712 && !MEM_P (op0)
1713 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1714 {
1715 machine_mode new_mode;
1716
1717 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1718 new_mode = MIN_MODE_VECTOR_FLOAT;
1719 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1720 new_mode = MIN_MODE_VECTOR_FRACT;
1721 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1722 new_mode = MIN_MODE_VECTOR_UFRACT;
1723 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1724 new_mode = MIN_MODE_VECTOR_ACCUM;
1725 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1726 new_mode = MIN_MODE_VECTOR_UACCUM;
1727 else
1728 new_mode = MIN_MODE_VECTOR_INT;
1729
1730 FOR_EACH_MODE_FROM (new_mode, new_mode)
1731 if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1732 && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1733 && targetm.vector_mode_supported_p (new_mode))
1734 break;
1735 if (new_mode != VOIDmode)
1736 op0 = gen_lowpart (new_mode, op0);
1737 }
1738
1739 /* Use vec_extract patterns for extracting parts of vectors whenever
1740 available. If that fails, see whether the current modes and bitregion
1741 give a natural subreg. */
1742 machine_mode outermode = GET_MODE (op0);
1743 if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1744 {
1745 scalar_mode innermode = GET_MODE_INNER (outermode);
1746 enum insn_code icode
1747 = convert_optab_handler (vec_extract_optab, outermode, innermode);
1748 poly_uint64 pos;
1749 if (icode != CODE_FOR_nothing
1750 && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1751 && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1752 {
1753 struct expand_operand ops[3];
1754
1755 create_output_operand (&ops[0], target, innermode);
1756 ops[0].target = 1;
1757 create_input_operand (&ops[1], op0, outermode);
1758 create_integer_operand (&ops[2], pos);
1759 if (maybe_expand_insn (icode, 3, ops))
1760 {
1761 if (alt_rtl && ops[0].target)
1762 *alt_rtl = target;
1763 target = ops[0].value;
1764 if (GET_MODE (target) != mode)
1765 return gen_lowpart (tmode, target);
1766 return target;
1767 }
1768 }
1769 /* Using subregs is useful if we're extracting one register vector
1770 from a multi-register vector. extract_bit_field_as_subreg checks
1771 for valid bitsize and bitnum, so we don't need to do that here. */
1772 if (VECTOR_MODE_P (mode))
1773 {
1774 rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1775 if (sub)
1776 return sub;
1777 }
1778 }
1779
1780 /* Make sure we are playing with integral modes. Pun with subregs
1781 if we aren't. */
1782 opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1783 scalar_int_mode imode;
1784 if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1785 {
1786 if (MEM_P (op0))
1787 op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1788 0, MEM_SIZE (op0));
1789 else if (op0_mode.exists (&imode))
1790 {
1791 op0 = gen_lowpart (imode, op0);
1792
1793 /* If we got a SUBREG, force it into a register since we
1794 aren't going to be able to do another SUBREG on it. */
1795 if (GET_CODE (op0) == SUBREG)
1796 op0 = force_reg (imode, op0);
1797 }
1798 else
1799 {
1800 poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1801 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1802 emit_move_insn (mem, op0);
1803 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1804 }
1805 }
1806
1807 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1808 If that's wrong, the solution is to test for it and set TARGET to 0
1809 if needed. */
1810
1811 /* Get the mode of the field to use for atomic access or subreg
1812 conversion. */
1813 if (!SCALAR_INT_MODE_P (tmode)
1814 || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1815 mode1 = mode;
1816 gcc_assert (mode1 != BLKmode);
1817
1818 /* Extraction of a full MODE1 value can be done with a subreg as long
1819 as the least significant bit of the value is the least significant
1820 bit of either OP0 or a word of OP0. */
1821 if (!MEM_P (op0) && !reverse)
1822 {
1823 rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1824 if (sub)
1825 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1826 }
1827
1828 /* Extraction of a full MODE1 value can be done with a load as long as
1829 the field is on a byte boundary and is sufficiently aligned. */
1830 poly_uint64 bytenum;
1831 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1832 {
1833 op0 = adjust_bitfield_address (op0, mode1, bytenum);
1834 if (reverse)
1835 op0 = flip_storage_order (mode1, op0);
1836 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1837 }
1838
1839 /* If we have a memory source and a non-constant bit offset, restrict
1840 the memory to the referenced bytes. This is a worst-case fallback
1841 but is useful for things like vector booleans. */
1842 if (MEM_P (op0) && !bitnum.is_constant ())
1843 {
1844 bytenum = bits_to_bytes_round_down (bitnum);
1845 bitnum = num_trailing_bits (bitnum);
1846 poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1847 op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1848 op0_mode = opt_scalar_int_mode ();
1849 }
1850
1851 /* It's possible we'll need to handle other cases here for
1852 polynomial bitnum and bitsize. */
1853
1854 /* From here on we need to be looking at a fixed-size insertion. */
1855 return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1856 bitnum.to_constant (), unsignedp,
1857 target, mode, tmode, reverse, fallback_p);
1858 }
1859
1860 /* Subroutine of extract_bit_field_1, with the same arguments, except
1861 that BITSIZE and BITNUM are constant. Handle cases specific to
1862 integral modes. If OP0_MODE is defined, it is the mode of OP0,
1863 otherwise OP0 is a BLKmode MEM. */
1864
1865 static rtx
extract_integral_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p)1866 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1867 unsigned HOST_WIDE_INT bitsize,
1868 unsigned HOST_WIDE_INT bitnum, int unsignedp,
1869 rtx target, machine_mode mode, machine_mode tmode,
1870 bool reverse, bool fallback_p)
1871 {
1872 /* Handle fields bigger than a word. */
1873
1874 if (bitsize > BITS_PER_WORD)
1875 {
1876 /* Here we transfer the words of the field
1877 in the order least significant first.
1878 This is because the most significant word is the one which may
1879 be less than full. */
1880
1881 const bool backwards = WORDS_BIG_ENDIAN;
1882 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1883 unsigned int i;
1884 rtx_insn *last;
1885
1886 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1887 target = gen_reg_rtx (mode);
1888
1889 /* In case we're about to clobber a base register or something
1890 (see gcc.c-torture/execute/20040625-1.c). */
1891 if (reg_mentioned_p (target, op0))
1892 target = gen_reg_rtx (mode);
1893
1894 /* Indicate for flow that the entire target reg is being set. */
1895 emit_clobber (target);
1896
1897 /* The mode must be fixed-size, since extract_bit_field_1 handles
1898 extractions from variable-sized objects before calling this
1899 function. */
1900 unsigned int target_size
1901 = GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1902 last = get_last_insn ();
1903 for (i = 0; i < nwords; i++)
1904 {
1905 /* If I is 0, use the low-order word in both field and target;
1906 if I is 1, use the next to lowest word; and so on. */
1907 /* Word number in TARGET to use. */
1908 unsigned int wordnum
1909 = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1910 /* Offset from start of field in OP0. */
1911 unsigned int bit_offset = (backwards ^ reverse
1912 ? MAX ((int) bitsize - ((int) i + 1)
1913 * BITS_PER_WORD,
1914 0)
1915 : (int) i * BITS_PER_WORD);
1916 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1917 rtx result_part
1918 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1919 bitsize - i * BITS_PER_WORD),
1920 bitnum + bit_offset, 1, target_part,
1921 mode, word_mode, reverse, fallback_p, NULL);
1922
1923 gcc_assert (target_part);
1924 if (!result_part)
1925 {
1926 delete_insns_since (last);
1927 return NULL;
1928 }
1929
1930 if (result_part != target_part)
1931 emit_move_insn (target_part, result_part);
1932 }
1933
1934 if (unsignedp)
1935 {
1936 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1937 need to be zero'd out. */
1938 if (target_size > nwords * UNITS_PER_WORD)
1939 {
1940 unsigned int i, total_words;
1941
1942 total_words = target_size / UNITS_PER_WORD;
1943 for (i = nwords; i < total_words; i++)
1944 emit_move_insn
1945 (operand_subword (target,
1946 backwards ? total_words - i - 1 : i,
1947 1, VOIDmode),
1948 const0_rtx);
1949 }
1950 return target;
1951 }
1952
1953 /* Signed bit field: sign-extend with two arithmetic shifts. */
1954 target = expand_shift (LSHIFT_EXPR, mode, target,
1955 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1956 return expand_shift (RSHIFT_EXPR, mode, target,
1957 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1958 }
1959
1960 /* If OP0 is a multi-word register, narrow it to the affected word.
1961 If the region spans two words, defer to extract_split_bit_field. */
1962 if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1963 {
1964 if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1965 {
1966 if (!fallback_p)
1967 return NULL_RTX;
1968 target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1969 unsignedp, reverse);
1970 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1971 }
1972 op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1973 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1974 op0_mode = word_mode;
1975 bitnum %= BITS_PER_WORD;
1976 }
1977
1978 /* From here on we know the desired field is smaller than a word.
1979 If OP0 is a register, it too fits within a word. */
1980 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1981 extraction_insn extv;
1982 if (!MEM_P (op0)
1983 && !reverse
1984 /* ??? We could limit the structure size to the part of OP0 that
1985 contains the field, with appropriate checks for endianness
1986 and TARGET_TRULY_NOOP_TRUNCATION. */
1987 && get_best_reg_extraction_insn (&extv, pattern,
1988 GET_MODE_BITSIZE (op0_mode.require ()),
1989 tmode))
1990 {
1991 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1992 bitsize, bitnum,
1993 unsignedp, target, mode,
1994 tmode);
1995 if (result)
1996 return result;
1997 }
1998
1999 /* If OP0 is a memory, try copying it to a register and seeing if a
2000 cheap register alternative is available. */
2001 if (MEM_P (op0) & !reverse)
2002 {
2003 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2004 tmode))
2005 {
2006 rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2007 bitsize, bitnum,
2008 unsignedp, target, mode,
2009 tmode);
2010 if (result)
2011 return result;
2012 }
2013
2014 rtx_insn *last = get_last_insn ();
2015
2016 /* Try loading part of OP0 into a register and extracting the
2017 bitfield from that. */
2018 unsigned HOST_WIDE_INT bitpos;
2019 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2020 0, 0, tmode, &bitpos);
2021 if (xop0)
2022 {
2023 xop0 = copy_to_reg (xop0);
2024 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2025 unsignedp, target,
2026 mode, tmode, reverse, false, NULL);
2027 if (result)
2028 return result;
2029 delete_insns_since (last);
2030 }
2031 }
2032
2033 if (!fallback_p)
2034 return NULL;
2035
2036 /* Find a correspondingly-sized integer field, so we can apply
2037 shifts and masks to it. */
2038 scalar_int_mode int_mode;
2039 if (!int_mode_for_mode (tmode).exists (&int_mode))
2040 /* If this fails, we should probably push op0 out to memory and then
2041 do a load. */
2042 int_mode = int_mode_for_mode (mode).require ();
2043
2044 target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2045 bitnum, target, unsignedp, reverse);
2046
2047 /* Complex values must be reversed piecewise, so we need to undo the global
2048 reversal, convert to the complex mode and reverse again. */
2049 if (reverse && COMPLEX_MODE_P (tmode))
2050 {
2051 target = flip_storage_order (int_mode, target);
2052 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2053 target = flip_storage_order (tmode, target);
2054 }
2055 else
2056 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2057
2058 return target;
2059 }
2060
2061 /* Generate code to extract a byte-field from STR_RTX
2062 containing BITSIZE bits, starting at BITNUM,
2063 and put it in TARGET if possible (if TARGET is nonzero).
2064 Regardless of TARGET, we return the rtx for where the value is placed.
2065
2066 STR_RTX is the structure containing the byte (a REG or MEM).
2067 UNSIGNEDP is nonzero if this is an unsigned bit field.
2068 MODE is the natural mode of the field value once extracted.
2069 TMODE is the mode the caller would like the value to have;
2070 but the value may be returned with type MODE instead.
2071
2072 If REVERSE is true, the extraction is to be done in reverse order.
2073
2074 If a TARGET is specified and we can store in it at no extra cost,
2075 we do so, and return TARGET.
2076 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2077 if they are equally easy. */
2078
2079 rtx
extract_bit_field(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,rtx * alt_rtl)2080 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2081 int unsignedp, rtx target, machine_mode mode,
2082 machine_mode tmode, bool reverse, rtx *alt_rtl)
2083 {
2084 machine_mode mode1;
2085
2086 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
2087 if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2088 mode1 = GET_MODE (str_rtx);
2089 else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2090 mode1 = GET_MODE (target);
2091 else
2092 mode1 = tmode;
2093
2094 unsigned HOST_WIDE_INT ibitsize, ibitnum;
2095 scalar_int_mode int_mode;
2096 if (bitsize.is_constant (&ibitsize)
2097 && bitnum.is_constant (&ibitnum)
2098 && is_a <scalar_int_mode> (mode1, &int_mode)
2099 && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2100 int_mode, 0, 0))
2101 {
2102 /* Extraction of a full INT_MODE value can be done with a simple load.
2103 We know here that the field can be accessed with one single
2104 instruction. For targets that support unaligned memory,
2105 an unaligned access may be necessary. */
2106 if (ibitsize == GET_MODE_BITSIZE (int_mode))
2107 {
2108 rtx result = adjust_bitfield_address (str_rtx, int_mode,
2109 ibitnum / BITS_PER_UNIT);
2110 if (reverse)
2111 result = flip_storage_order (int_mode, result);
2112 gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2113 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2114 }
2115
2116 str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2117 &ibitnum);
2118 gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2119 str_rtx = copy_to_reg (str_rtx);
2120 return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2121 target, mode, tmode, reverse, true, alt_rtl);
2122 }
2123
2124 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2125 target, mode, tmode, reverse, true, alt_rtl);
2126 }
2127
2128 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2129 from bit BITNUM of OP0. If OP0_MODE is defined, it is the mode of OP0,
2130 otherwise OP0 is a BLKmode MEM.
2131
2132 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2133 If REVERSE is true, the extraction is to be done in reverse order.
2134
2135 If TARGET is nonzero, attempts to store the value there
2136 and return TARGET, but this is not guaranteed.
2137 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
2138
2139 static rtx
extract_fixed_bit_field(machine_mode tmode,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)2140 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2141 opt_scalar_int_mode op0_mode,
2142 unsigned HOST_WIDE_INT bitsize,
2143 unsigned HOST_WIDE_INT bitnum, rtx target,
2144 int unsignedp, bool reverse)
2145 {
2146 scalar_int_mode mode;
2147 if (MEM_P (op0))
2148 {
2149 if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2150 BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2151 /* The only way this should occur is if the field spans word
2152 boundaries. */
2153 return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2154 unsignedp, reverse);
2155
2156 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2157 }
2158 else
2159 mode = op0_mode.require ();
2160
2161 return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2162 target, unsignedp, reverse);
2163 }
2164
2165 /* Helper function for extract_fixed_bit_field, extracts
2166 the bit field always using MODE, which is the mode of OP0.
2167 The other arguments are as for extract_fixed_bit_field. */
2168
2169 static rtx
extract_fixed_bit_field_1(machine_mode tmode,rtx op0,scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)2170 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2171 unsigned HOST_WIDE_INT bitsize,
2172 unsigned HOST_WIDE_INT bitnum, rtx target,
2173 int unsignedp, bool reverse)
2174 {
2175 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2176 for invalid input, such as extract equivalent of f5 from
2177 gcc.dg/pr48335-2.c. */
2178
2179 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2180 /* BITNUM is the distance between our msb and that of OP0.
2181 Convert it to the distance from the lsb. */
2182 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2183
2184 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2185 We have reduced the big-endian case to the little-endian case. */
2186 if (reverse)
2187 op0 = flip_storage_order (mode, op0);
2188
2189 if (unsignedp)
2190 {
2191 if (bitnum)
2192 {
2193 /* If the field does not already start at the lsb,
2194 shift it so it does. */
2195 /* Maybe propagate the target for the shift. */
2196 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2197 if (tmode != mode)
2198 subtarget = 0;
2199 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2200 }
2201 /* Convert the value to the desired mode. TMODE must also be a
2202 scalar integer for this conversion to make sense, since we
2203 shouldn't reinterpret the bits. */
2204 scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2205 if (mode != new_mode)
2206 op0 = convert_to_mode (new_mode, op0, 1);
2207
2208 /* Unless the msb of the field used to be the msb when we shifted,
2209 mask out the upper bits. */
2210
2211 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2212 return expand_binop (new_mode, and_optab, op0,
2213 mask_rtx (new_mode, 0, bitsize, 0),
2214 target, 1, OPTAB_LIB_WIDEN);
2215 return op0;
2216 }
2217
2218 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2219 then arithmetic-shift its lsb to the lsb of the word. */
2220 op0 = force_reg (mode, op0);
2221
2222 /* Find the narrowest integer mode that contains the field. */
2223
2224 opt_scalar_int_mode mode_iter;
2225 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2226 if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2227 break;
2228
2229 mode = mode_iter.require ();
2230 op0 = convert_to_mode (mode, op0, 0);
2231
2232 if (mode != tmode)
2233 target = 0;
2234
2235 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2236 {
2237 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2238 /* Maybe propagate the target for the shift. */
2239 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2240 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2241 }
2242
2243 return expand_shift (RSHIFT_EXPR, mode, op0,
2244 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2245 }
2246
2247 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2248 VALUE << BITPOS. */
2249
2250 static rtx
lshift_value(machine_mode mode,unsigned HOST_WIDE_INT value,int bitpos)2251 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2252 int bitpos)
2253 {
2254 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2255 }
2256
2257 /* Extract a bit field that is split across two words
2258 and return an RTX for the result.
2259
2260 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2261 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2262 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2263 If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2264 a BLKmode MEM.
2265
2266 If REVERSE is true, the extraction is to be done in reverse order. */
2267
2268 static rtx
extract_split_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,int unsignedp,bool reverse)2269 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2270 unsigned HOST_WIDE_INT bitsize,
2271 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2272 bool reverse)
2273 {
2274 unsigned int unit;
2275 unsigned int bitsdone = 0;
2276 rtx result = NULL_RTX;
2277 int first = 1;
2278
2279 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2280 much at a time. */
2281 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2282 unit = BITS_PER_WORD;
2283 else
2284 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2285
2286 while (bitsdone < bitsize)
2287 {
2288 unsigned HOST_WIDE_INT thissize;
2289 rtx part;
2290 unsigned HOST_WIDE_INT thispos;
2291 unsigned HOST_WIDE_INT offset;
2292
2293 offset = (bitpos + bitsdone) / unit;
2294 thispos = (bitpos + bitsdone) % unit;
2295
2296 /* THISSIZE must not overrun a word boundary. Otherwise,
2297 extract_fixed_bit_field will call us again, and we will mutually
2298 recurse forever. */
2299 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2300 thissize = MIN (thissize, unit - thispos);
2301
2302 /* If OP0 is a register, then handle OFFSET here. */
2303 rtx op0_piece = op0;
2304 opt_scalar_int_mode op0_piece_mode = op0_mode;
2305 if (SUBREG_P (op0) || REG_P (op0))
2306 {
2307 op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2308 op0_piece_mode = word_mode;
2309 offset = 0;
2310 }
2311
2312 /* Extract the parts in bit-counting order,
2313 whose meaning is determined by BYTES_PER_UNIT.
2314 OFFSET is in UNITs, and UNIT is in bits. */
2315 part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2316 thissize, offset * unit + thispos,
2317 0, 1, reverse);
2318 bitsdone += thissize;
2319
2320 /* Shift this part into place for the result. */
2321 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2322 {
2323 if (bitsize != bitsdone)
2324 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2325 bitsize - bitsdone, 0, 1);
2326 }
2327 else
2328 {
2329 if (bitsdone != thissize)
2330 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2331 bitsdone - thissize, 0, 1);
2332 }
2333
2334 if (first)
2335 result = part;
2336 else
2337 /* Combine the parts with bitwise or. This works
2338 because we extracted each part as an unsigned bit field. */
2339 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2340 OPTAB_LIB_WIDEN);
2341
2342 first = 0;
2343 }
2344
2345 /* Unsigned bit field: we are done. */
2346 if (unsignedp)
2347 return result;
2348 /* Signed bit field: sign-extend with two arithmetic shifts. */
2349 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2350 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2351 return expand_shift (RSHIFT_EXPR, word_mode, result,
2352 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2353 }
2354
2355 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2356 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2357 MODE, fill the upper bits with zeros. Fail if the layout of either
2358 mode is unknown (as for CC modes) or if the extraction would involve
2359 unprofitable mode punning. Return the value on success, otherwise
2360 return null.
2361
2362 This is different from gen_lowpart* in these respects:
2363
2364 - the returned value must always be considered an rvalue
2365
2366 - when MODE is wider than SRC_MODE, the extraction involves
2367 a zero extension
2368
2369 - when MODE is smaller than SRC_MODE, the extraction involves
2370 a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2371
2372 In other words, this routine performs a computation, whereas the
2373 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2374 operations. */
2375
2376 rtx
extract_low_bits(machine_mode mode,machine_mode src_mode,rtx src)2377 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2378 {
2379 scalar_int_mode int_mode, src_int_mode;
2380
2381 if (mode == src_mode)
2382 return src;
2383
2384 if (CONSTANT_P (src))
2385 {
2386 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2387 fails, it will happily create (subreg (symbol_ref)) or similar
2388 invalid SUBREGs. */
2389 poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2390 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2391 if (ret)
2392 return ret;
2393
2394 if (GET_MODE (src) == VOIDmode
2395 || !validate_subreg (mode, src_mode, src, byte))
2396 return NULL_RTX;
2397
2398 src = force_reg (GET_MODE (src), src);
2399 return gen_rtx_SUBREG (mode, src, byte);
2400 }
2401
2402 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2403 return NULL_RTX;
2404
2405 if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2406 && targetm.modes_tieable_p (mode, src_mode))
2407 {
2408 rtx x = gen_lowpart_common (mode, src);
2409 if (x)
2410 return x;
2411 }
2412
2413 if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2414 || !int_mode_for_mode (mode).exists (&int_mode))
2415 return NULL_RTX;
2416
2417 if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2418 return NULL_RTX;
2419 if (!targetm.modes_tieable_p (int_mode, mode))
2420 return NULL_RTX;
2421
2422 src = gen_lowpart (src_int_mode, src);
2423 src = convert_modes (int_mode, src_int_mode, src, true);
2424 src = gen_lowpart (mode, src);
2425 return src;
2426 }
2427
2428 /* Add INC into TARGET. */
2429
2430 void
expand_inc(rtx target,rtx inc)2431 expand_inc (rtx target, rtx inc)
2432 {
2433 rtx value = expand_binop (GET_MODE (target), add_optab,
2434 target, inc,
2435 target, 0, OPTAB_LIB_WIDEN);
2436 if (value != target)
2437 emit_move_insn (target, value);
2438 }
2439
2440 /* Subtract DEC from TARGET. */
2441
2442 void
expand_dec(rtx target,rtx dec)2443 expand_dec (rtx target, rtx dec)
2444 {
2445 rtx value = expand_binop (GET_MODE (target), sub_optab,
2446 target, dec,
2447 target, 0, OPTAB_LIB_WIDEN);
2448 if (value != target)
2449 emit_move_insn (target, value);
2450 }
2451
2452 /* Output a shift instruction for expression code CODE,
2453 with SHIFTED being the rtx for the value to shift,
2454 and AMOUNT the rtx for the amount to shift by.
2455 Store the result in the rtx TARGET, if that is convenient.
2456 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2457 Return the rtx for where the value is.
2458 If that cannot be done, abort the compilation unless MAY_FAIL is true,
2459 in which case 0 is returned. */
2460
2461 static rtx
2462 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2463 rtx amount, rtx target, int unsignedp, bool may_fail = false)
2464 {
2465 rtx op1, temp = 0;
2466 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2467 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2468 optab lshift_optab = ashl_optab;
2469 optab rshift_arith_optab = ashr_optab;
2470 optab rshift_uns_optab = lshr_optab;
2471 optab lrotate_optab = rotl_optab;
2472 optab rrotate_optab = rotr_optab;
2473 machine_mode op1_mode;
2474 scalar_mode scalar_mode = GET_MODE_INNER (mode);
2475 int attempt;
2476 bool speed = optimize_insn_for_speed_p ();
2477
2478 op1 = amount;
2479 op1_mode = GET_MODE (op1);
2480
2481 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2482 shift amount is a vector, use the vector/vector shift patterns. */
2483 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2484 {
2485 lshift_optab = vashl_optab;
2486 rshift_arith_optab = vashr_optab;
2487 rshift_uns_optab = vlshr_optab;
2488 lrotate_optab = vrotl_optab;
2489 rrotate_optab = vrotr_optab;
2490 }
2491
2492 /* Previously detected shift-counts computed by NEGATE_EXPR
2493 and shifted in the other direction; but that does not work
2494 on all machines. */
2495
2496 if (SHIFT_COUNT_TRUNCATED)
2497 {
2498 if (CONST_INT_P (op1)
2499 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2500 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2501 op1 = gen_int_shift_amount (mode,
2502 (unsigned HOST_WIDE_INT) INTVAL (op1)
2503 % GET_MODE_BITSIZE (scalar_mode));
2504 else if (GET_CODE (op1) == SUBREG
2505 && subreg_lowpart_p (op1)
2506 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2507 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2508 op1 = SUBREG_REG (op1);
2509 }
2510
2511 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2512 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2513 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2514 amount instead. */
2515 if (rotate
2516 && CONST_INT_P (op1)
2517 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2518 GET_MODE_BITSIZE (scalar_mode) - 1))
2519 {
2520 op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2521 - INTVAL (op1)));
2522 left = !left;
2523 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2524 }
2525
2526 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2527 Note that this is not the case for bigger values. For instance a rotation
2528 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2529 0x04030201 (bswapsi). */
2530 if (rotate
2531 && CONST_INT_P (op1)
2532 && INTVAL (op1) == BITS_PER_UNIT
2533 && GET_MODE_SIZE (scalar_mode) == 2
2534 && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2535 return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2536
2537 if (op1 == const0_rtx)
2538 return shifted;
2539
2540 /* Check whether its cheaper to implement a left shift by a constant
2541 bit count by a sequence of additions. */
2542 if (code == LSHIFT_EXPR
2543 && CONST_INT_P (op1)
2544 && INTVAL (op1) > 0
2545 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2546 && INTVAL (op1) < MAX_BITS_PER_WORD
2547 && (shift_cost (speed, mode, INTVAL (op1))
2548 > INTVAL (op1) * add_cost (speed, mode))
2549 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2550 {
2551 int i;
2552 for (i = 0; i < INTVAL (op1); i++)
2553 {
2554 temp = force_reg (mode, shifted);
2555 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2556 unsignedp, OPTAB_LIB_WIDEN);
2557 }
2558 return shifted;
2559 }
2560
2561 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2562 {
2563 enum optab_methods methods;
2564
2565 if (attempt == 0)
2566 methods = OPTAB_DIRECT;
2567 else if (attempt == 1)
2568 methods = OPTAB_WIDEN;
2569 else
2570 methods = OPTAB_LIB_WIDEN;
2571
2572 if (rotate)
2573 {
2574 /* Widening does not work for rotation. */
2575 if (methods == OPTAB_WIDEN)
2576 continue;
2577 else if (methods == OPTAB_LIB_WIDEN)
2578 {
2579 /* If we have been unable to open-code this by a rotation,
2580 do it as the IOR of two shifts. I.e., to rotate A
2581 by N bits, compute
2582 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2583 where C is the bitsize of A.
2584
2585 It is theoretically possible that the target machine might
2586 not be able to perform either shift and hence we would
2587 be making two libcalls rather than just the one for the
2588 shift (similarly if IOR could not be done). We will allow
2589 this extremely unlikely lossage to avoid complicating the
2590 code below. */
2591
2592 rtx subtarget = target == shifted ? 0 : target;
2593 rtx new_amount, other_amount;
2594 rtx temp1;
2595
2596 new_amount = op1;
2597 if (op1 == const0_rtx)
2598 return shifted;
2599 else if (CONST_INT_P (op1))
2600 other_amount = gen_int_shift_amount
2601 (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2602 else
2603 {
2604 other_amount
2605 = simplify_gen_unary (NEG, GET_MODE (op1),
2606 op1, GET_MODE (op1));
2607 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2608 other_amount
2609 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2610 gen_int_mode (mask, GET_MODE (op1)));
2611 }
2612
2613 shifted = force_reg (mode, shifted);
2614
2615 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2616 mode, shifted, new_amount, 0, 1);
2617 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2618 mode, shifted, other_amount,
2619 subtarget, 1);
2620 return expand_binop (mode, ior_optab, temp, temp1, target,
2621 unsignedp, methods);
2622 }
2623
2624 temp = expand_binop (mode,
2625 left ? lrotate_optab : rrotate_optab,
2626 shifted, op1, target, unsignedp, methods);
2627 }
2628 else if (unsignedp)
2629 temp = expand_binop (mode,
2630 left ? lshift_optab : rshift_uns_optab,
2631 shifted, op1, target, unsignedp, methods);
2632
2633 /* Do arithmetic shifts.
2634 Also, if we are going to widen the operand, we can just as well
2635 use an arithmetic right-shift instead of a logical one. */
2636 if (temp == 0 && ! rotate
2637 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2638 {
2639 enum optab_methods methods1 = methods;
2640
2641 /* If trying to widen a log shift to an arithmetic shift,
2642 don't accept an arithmetic shift of the same size. */
2643 if (unsignedp)
2644 methods1 = OPTAB_MUST_WIDEN;
2645
2646 /* Arithmetic shift */
2647
2648 temp = expand_binop (mode,
2649 left ? lshift_optab : rshift_arith_optab,
2650 shifted, op1, target, unsignedp, methods1);
2651 }
2652
2653 /* We used to try extzv here for logical right shifts, but that was
2654 only useful for one machine, the VAX, and caused poor code
2655 generation there for lshrdi3, so the code was deleted and a
2656 define_expand for lshrsi3 was added to vax.md. */
2657 }
2658
2659 gcc_assert (temp != NULL_RTX || may_fail);
2660 return temp;
2661 }
2662
2663 /* Output a shift instruction for expression code CODE,
2664 with SHIFTED being the rtx for the value to shift,
2665 and AMOUNT the amount to shift by.
2666 Store the result in the rtx TARGET, if that is convenient.
2667 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2668 Return the rtx for where the value is. */
2669
2670 rtx
expand_shift(enum tree_code code,machine_mode mode,rtx shifted,poly_int64 amount,rtx target,int unsignedp)2671 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2672 poly_int64 amount, rtx target, int unsignedp)
2673 {
2674 return expand_shift_1 (code, mode, shifted,
2675 gen_int_shift_amount (mode, amount),
2676 target, unsignedp);
2677 }
2678
2679 /* Likewise, but return 0 if that cannot be done. */
2680
2681 static rtx
maybe_expand_shift(enum tree_code code,machine_mode mode,rtx shifted,int amount,rtx target,int unsignedp)2682 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2683 int amount, rtx target, int unsignedp)
2684 {
2685 return expand_shift_1 (code, mode,
2686 shifted, GEN_INT (amount), target, unsignedp, true);
2687 }
2688
2689 /* Output a shift instruction for expression code CODE,
2690 with SHIFTED being the rtx for the value to shift,
2691 and AMOUNT the tree for the amount to shift by.
2692 Store the result in the rtx TARGET, if that is convenient.
2693 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2694 Return the rtx for where the value is. */
2695
2696 rtx
expand_variable_shift(enum tree_code code,machine_mode mode,rtx shifted,tree amount,rtx target,int unsignedp)2697 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2698 tree amount, rtx target, int unsignedp)
2699 {
2700 return expand_shift_1 (code, mode,
2701 shifted, expand_normal (amount), target, unsignedp);
2702 }
2703
2704
2705 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2706 const struct mult_cost *, machine_mode mode);
2707 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2708 const struct algorithm *, enum mult_variant);
2709 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2710 static rtx extract_high_half (scalar_int_mode, rtx);
2711 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2712 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2713 int, int);
2714 /* Compute and return the best algorithm for multiplying by T.
2715 The algorithm must cost less than cost_limit
2716 If retval.cost >= COST_LIMIT, no algorithm was found and all
2717 other field of the returned struct are undefined.
2718 MODE is the machine mode of the multiplication. */
2719
2720 static void
synth_mult(struct algorithm * alg_out,unsigned HOST_WIDE_INT t,const struct mult_cost * cost_limit,machine_mode mode)2721 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2722 const struct mult_cost *cost_limit, machine_mode mode)
2723 {
2724 int m;
2725 struct algorithm *alg_in, *best_alg;
2726 struct mult_cost best_cost;
2727 struct mult_cost new_limit;
2728 int op_cost, op_latency;
2729 unsigned HOST_WIDE_INT orig_t = t;
2730 unsigned HOST_WIDE_INT q;
2731 int maxm, hash_index;
2732 bool cache_hit = false;
2733 enum alg_code cache_alg = alg_zero;
2734 bool speed = optimize_insn_for_speed_p ();
2735 scalar_int_mode imode;
2736 struct alg_hash_entry *entry_ptr;
2737
2738 /* Indicate that no algorithm is yet found. If no algorithm
2739 is found, this value will be returned and indicate failure. */
2740 alg_out->cost.cost = cost_limit->cost + 1;
2741 alg_out->cost.latency = cost_limit->latency + 1;
2742
2743 if (cost_limit->cost < 0
2744 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2745 return;
2746
2747 /* Be prepared for vector modes. */
2748 imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2749
2750 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2751
2752 /* Restrict the bits of "t" to the multiplication's mode. */
2753 t &= GET_MODE_MASK (imode);
2754
2755 /* t == 1 can be done in zero cost. */
2756 if (t == 1)
2757 {
2758 alg_out->ops = 1;
2759 alg_out->cost.cost = 0;
2760 alg_out->cost.latency = 0;
2761 alg_out->op[0] = alg_m;
2762 return;
2763 }
2764
2765 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2766 fail now. */
2767 if (t == 0)
2768 {
2769 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2770 return;
2771 else
2772 {
2773 alg_out->ops = 1;
2774 alg_out->cost.cost = zero_cost (speed);
2775 alg_out->cost.latency = zero_cost (speed);
2776 alg_out->op[0] = alg_zero;
2777 return;
2778 }
2779 }
2780
2781 /* We'll be needing a couple extra algorithm structures now. */
2782
2783 alg_in = XALLOCA (struct algorithm);
2784 best_alg = XALLOCA (struct algorithm);
2785 best_cost = *cost_limit;
2786
2787 /* Compute the hash index. */
2788 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2789
2790 /* See if we already know what to do for T. */
2791 entry_ptr = alg_hash_entry_ptr (hash_index);
2792 if (entry_ptr->t == t
2793 && entry_ptr->mode == mode
2794 && entry_ptr->speed == speed
2795 && entry_ptr->alg != alg_unknown)
2796 {
2797 cache_alg = entry_ptr->alg;
2798
2799 if (cache_alg == alg_impossible)
2800 {
2801 /* The cache tells us that it's impossible to synthesize
2802 multiplication by T within entry_ptr->cost. */
2803 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2804 /* COST_LIMIT is at least as restrictive as the one
2805 recorded in the hash table, in which case we have no
2806 hope of synthesizing a multiplication. Just
2807 return. */
2808 return;
2809
2810 /* If we get here, COST_LIMIT is less restrictive than the
2811 one recorded in the hash table, so we may be able to
2812 synthesize a multiplication. Proceed as if we didn't
2813 have the cache entry. */
2814 }
2815 else
2816 {
2817 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2818 /* The cached algorithm shows that this multiplication
2819 requires more cost than COST_LIMIT. Just return. This
2820 way, we don't clobber this cache entry with
2821 alg_impossible but retain useful information. */
2822 return;
2823
2824 cache_hit = true;
2825
2826 switch (cache_alg)
2827 {
2828 case alg_shift:
2829 goto do_alg_shift;
2830
2831 case alg_add_t_m2:
2832 case alg_sub_t_m2:
2833 goto do_alg_addsub_t_m2;
2834
2835 case alg_add_factor:
2836 case alg_sub_factor:
2837 goto do_alg_addsub_factor;
2838
2839 case alg_add_t2_m:
2840 goto do_alg_add_t2_m;
2841
2842 case alg_sub_t2_m:
2843 goto do_alg_sub_t2_m;
2844
2845 default:
2846 gcc_unreachable ();
2847 }
2848 }
2849 }
2850
2851 /* If we have a group of zero bits at the low-order part of T, try
2852 multiplying by the remaining bits and then doing a shift. */
2853
2854 if ((t & 1) == 0)
2855 {
2856 do_alg_shift:
2857 m = ctz_or_zero (t); /* m = number of low zero bits */
2858 if (m < maxm)
2859 {
2860 q = t >> m;
2861 /* The function expand_shift will choose between a shift and
2862 a sequence of additions, so the observed cost is given as
2863 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2864 op_cost = m * add_cost (speed, mode);
2865 if (shift_cost (speed, mode, m) < op_cost)
2866 op_cost = shift_cost (speed, mode, m);
2867 new_limit.cost = best_cost.cost - op_cost;
2868 new_limit.latency = best_cost.latency - op_cost;
2869 synth_mult (alg_in, q, &new_limit, mode);
2870
2871 alg_in->cost.cost += op_cost;
2872 alg_in->cost.latency += op_cost;
2873 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2874 {
2875 best_cost = alg_in->cost;
2876 std::swap (alg_in, best_alg);
2877 best_alg->log[best_alg->ops] = m;
2878 best_alg->op[best_alg->ops] = alg_shift;
2879 }
2880
2881 /* See if treating ORIG_T as a signed number yields a better
2882 sequence. Try this sequence only for a negative ORIG_T
2883 as it would be useless for a non-negative ORIG_T. */
2884 if ((HOST_WIDE_INT) orig_t < 0)
2885 {
2886 /* Shift ORIG_T as follows because a right shift of a
2887 negative-valued signed type is implementation
2888 defined. */
2889 q = ~(~orig_t >> m);
2890 /* The function expand_shift will choose between a shift
2891 and a sequence of additions, so the observed cost is
2892 given as MIN (m * add_cost(speed, mode),
2893 shift_cost(speed, mode, m)). */
2894 op_cost = m * add_cost (speed, mode);
2895 if (shift_cost (speed, mode, m) < op_cost)
2896 op_cost = shift_cost (speed, mode, m);
2897 new_limit.cost = best_cost.cost - op_cost;
2898 new_limit.latency = best_cost.latency - op_cost;
2899 synth_mult (alg_in, q, &new_limit, mode);
2900
2901 alg_in->cost.cost += op_cost;
2902 alg_in->cost.latency += op_cost;
2903 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2904 {
2905 best_cost = alg_in->cost;
2906 std::swap (alg_in, best_alg);
2907 best_alg->log[best_alg->ops] = m;
2908 best_alg->op[best_alg->ops] = alg_shift;
2909 }
2910 }
2911 }
2912 if (cache_hit)
2913 goto done;
2914 }
2915
2916 /* If we have an odd number, add or subtract one. */
2917 if ((t & 1) != 0)
2918 {
2919 unsigned HOST_WIDE_INT w;
2920
2921 do_alg_addsub_t_m2:
2922 for (w = 1; (w & t) != 0; w <<= 1)
2923 ;
2924 /* If T was -1, then W will be zero after the loop. This is another
2925 case where T ends with ...111. Handling this with (T + 1) and
2926 subtract 1 produces slightly better code and results in algorithm
2927 selection much faster than treating it like the ...0111 case
2928 below. */
2929 if (w == 0
2930 || (w > 2
2931 /* Reject the case where t is 3.
2932 Thus we prefer addition in that case. */
2933 && t != 3))
2934 {
2935 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2936
2937 op_cost = add_cost (speed, mode);
2938 new_limit.cost = best_cost.cost - op_cost;
2939 new_limit.latency = best_cost.latency - op_cost;
2940 synth_mult (alg_in, t + 1, &new_limit, mode);
2941
2942 alg_in->cost.cost += op_cost;
2943 alg_in->cost.latency += op_cost;
2944 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2945 {
2946 best_cost = alg_in->cost;
2947 std::swap (alg_in, best_alg);
2948 best_alg->log[best_alg->ops] = 0;
2949 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2950 }
2951 }
2952 else
2953 {
2954 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2955
2956 op_cost = add_cost (speed, mode);
2957 new_limit.cost = best_cost.cost - op_cost;
2958 new_limit.latency = best_cost.latency - op_cost;
2959 synth_mult (alg_in, t - 1, &new_limit, mode);
2960
2961 alg_in->cost.cost += op_cost;
2962 alg_in->cost.latency += op_cost;
2963 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2964 {
2965 best_cost = alg_in->cost;
2966 std::swap (alg_in, best_alg);
2967 best_alg->log[best_alg->ops] = 0;
2968 best_alg->op[best_alg->ops] = alg_add_t_m2;
2969 }
2970 }
2971
2972 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2973 quickly with a - a * n for some appropriate constant n. */
2974 m = exact_log2 (-orig_t + 1);
2975 if (m >= 0 && m < maxm)
2976 {
2977 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2978 /* If the target has a cheap shift-and-subtract insn use
2979 that in preference to a shift insn followed by a sub insn.
2980 Assume that the shift-and-sub is "atomic" with a latency
2981 equal to it's cost, otherwise assume that on superscalar
2982 hardware the shift may be executed concurrently with the
2983 earlier steps in the algorithm. */
2984 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2985 {
2986 op_cost = shiftsub1_cost (speed, mode, m);
2987 op_latency = op_cost;
2988 }
2989 else
2990 op_latency = add_cost (speed, mode);
2991
2992 new_limit.cost = best_cost.cost - op_cost;
2993 new_limit.latency = best_cost.latency - op_latency;
2994 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2995 &new_limit, mode);
2996
2997 alg_in->cost.cost += op_cost;
2998 alg_in->cost.latency += op_latency;
2999 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3000 {
3001 best_cost = alg_in->cost;
3002 std::swap (alg_in, best_alg);
3003 best_alg->log[best_alg->ops] = m;
3004 best_alg->op[best_alg->ops] = alg_sub_t_m2;
3005 }
3006 }
3007
3008 if (cache_hit)
3009 goto done;
3010 }
3011
3012 /* Look for factors of t of the form
3013 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3014 If we find such a factor, we can multiply by t using an algorithm that
3015 multiplies by q, shift the result by m and add/subtract it to itself.
3016
3017 We search for large factors first and loop down, even if large factors
3018 are less probable than small; if we find a large factor we will find a
3019 good sequence quickly, and therefore be able to prune (by decreasing
3020 COST_LIMIT) the search. */
3021
3022 do_alg_addsub_factor:
3023 for (m = floor_log2 (t - 1); m >= 2; m--)
3024 {
3025 unsigned HOST_WIDE_INT d;
3026
3027 d = (HOST_WIDE_INT_1U << m) + 1;
3028 if (t % d == 0 && t > d && m < maxm
3029 && (!cache_hit || cache_alg == alg_add_factor))
3030 {
3031 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3032 if (shiftadd_cost (speed, mode, m) <= op_cost)
3033 op_cost = shiftadd_cost (speed, mode, m);
3034
3035 op_latency = op_cost;
3036
3037
3038 new_limit.cost = best_cost.cost - op_cost;
3039 new_limit.latency = best_cost.latency - op_latency;
3040 synth_mult (alg_in, t / d, &new_limit, mode);
3041
3042 alg_in->cost.cost += op_cost;
3043 alg_in->cost.latency += op_latency;
3044 if (alg_in->cost.latency < op_cost)
3045 alg_in->cost.latency = op_cost;
3046 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3047 {
3048 best_cost = alg_in->cost;
3049 std::swap (alg_in, best_alg);
3050 best_alg->log[best_alg->ops] = m;
3051 best_alg->op[best_alg->ops] = alg_add_factor;
3052 }
3053 /* Other factors will have been taken care of in the recursion. */
3054 break;
3055 }
3056
3057 d = (HOST_WIDE_INT_1U << m) - 1;
3058 if (t % d == 0 && t > d && m < maxm
3059 && (!cache_hit || cache_alg == alg_sub_factor))
3060 {
3061 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3062 if (shiftsub0_cost (speed, mode, m) <= op_cost)
3063 op_cost = shiftsub0_cost (speed, mode, m);
3064
3065 op_latency = op_cost;
3066
3067 new_limit.cost = best_cost.cost - op_cost;
3068 new_limit.latency = best_cost.latency - op_latency;
3069 synth_mult (alg_in, t / d, &new_limit, mode);
3070
3071 alg_in->cost.cost += op_cost;
3072 alg_in->cost.latency += op_latency;
3073 if (alg_in->cost.latency < op_cost)
3074 alg_in->cost.latency = op_cost;
3075 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3076 {
3077 best_cost = alg_in->cost;
3078 std::swap (alg_in, best_alg);
3079 best_alg->log[best_alg->ops] = m;
3080 best_alg->op[best_alg->ops] = alg_sub_factor;
3081 }
3082 break;
3083 }
3084 }
3085 if (cache_hit)
3086 goto done;
3087
3088 /* Try shift-and-add (load effective address) instructions,
3089 i.e. do a*3, a*5, a*9. */
3090 if ((t & 1) != 0)
3091 {
3092 do_alg_add_t2_m:
3093 q = t - 1;
3094 m = ctz_hwi (q);
3095 if (q && m < maxm)
3096 {
3097 op_cost = shiftadd_cost (speed, mode, m);
3098 new_limit.cost = best_cost.cost - op_cost;
3099 new_limit.latency = best_cost.latency - op_cost;
3100 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3101
3102 alg_in->cost.cost += op_cost;
3103 alg_in->cost.latency += op_cost;
3104 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3105 {
3106 best_cost = alg_in->cost;
3107 std::swap (alg_in, best_alg);
3108 best_alg->log[best_alg->ops] = m;
3109 best_alg->op[best_alg->ops] = alg_add_t2_m;
3110 }
3111 }
3112 if (cache_hit)
3113 goto done;
3114
3115 do_alg_sub_t2_m:
3116 q = t + 1;
3117 m = ctz_hwi (q);
3118 if (q && m < maxm)
3119 {
3120 op_cost = shiftsub0_cost (speed, mode, m);
3121 new_limit.cost = best_cost.cost - op_cost;
3122 new_limit.latency = best_cost.latency - op_cost;
3123 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3124
3125 alg_in->cost.cost += op_cost;
3126 alg_in->cost.latency += op_cost;
3127 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3128 {
3129 best_cost = alg_in->cost;
3130 std::swap (alg_in, best_alg);
3131 best_alg->log[best_alg->ops] = m;
3132 best_alg->op[best_alg->ops] = alg_sub_t2_m;
3133 }
3134 }
3135 if (cache_hit)
3136 goto done;
3137 }
3138
3139 done:
3140 /* If best_cost has not decreased, we have not found any algorithm. */
3141 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3142 {
3143 /* We failed to find an algorithm. Record alg_impossible for
3144 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3145 we are asked to find an algorithm for T within the same or
3146 lower COST_LIMIT, we can immediately return to the
3147 caller. */
3148 entry_ptr->t = t;
3149 entry_ptr->mode = mode;
3150 entry_ptr->speed = speed;
3151 entry_ptr->alg = alg_impossible;
3152 entry_ptr->cost = *cost_limit;
3153 return;
3154 }
3155
3156 /* Cache the result. */
3157 if (!cache_hit)
3158 {
3159 entry_ptr->t = t;
3160 entry_ptr->mode = mode;
3161 entry_ptr->speed = speed;
3162 entry_ptr->alg = best_alg->op[best_alg->ops];
3163 entry_ptr->cost.cost = best_cost.cost;
3164 entry_ptr->cost.latency = best_cost.latency;
3165 }
3166
3167 /* If we are getting a too long sequence for `struct algorithm'
3168 to record, make this search fail. */
3169 if (best_alg->ops == MAX_BITS_PER_WORD)
3170 return;
3171
3172 /* Copy the algorithm from temporary space to the space at alg_out.
3173 We avoid using structure assignment because the majority of
3174 best_alg is normally undefined, and this is a critical function. */
3175 alg_out->ops = best_alg->ops + 1;
3176 alg_out->cost = best_cost;
3177 memcpy (alg_out->op, best_alg->op,
3178 alg_out->ops * sizeof *alg_out->op);
3179 memcpy (alg_out->log, best_alg->log,
3180 alg_out->ops * sizeof *alg_out->log);
3181 }
3182
3183 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3184 Try three variations:
3185
3186 - a shift/add sequence based on VAL itself
3187 - a shift/add sequence based on -VAL, followed by a negation
3188 - a shift/add sequence based on VAL - 1, followed by an addition.
3189
3190 Return true if the cheapest of these cost less than MULT_COST,
3191 describing the algorithm in *ALG and final fixup in *VARIANT. */
3192
3193 bool
choose_mult_variant(machine_mode mode,HOST_WIDE_INT val,struct algorithm * alg,enum mult_variant * variant,int mult_cost)3194 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3195 struct algorithm *alg, enum mult_variant *variant,
3196 int mult_cost)
3197 {
3198 struct algorithm alg2;
3199 struct mult_cost limit;
3200 int op_cost;
3201 bool speed = optimize_insn_for_speed_p ();
3202
3203 /* Fail quickly for impossible bounds. */
3204 if (mult_cost < 0)
3205 return false;
3206
3207 /* Ensure that mult_cost provides a reasonable upper bound.
3208 Any constant multiplication can be performed with less
3209 than 2 * bits additions. */
3210 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3211 if (mult_cost > op_cost)
3212 mult_cost = op_cost;
3213
3214 *variant = basic_variant;
3215 limit.cost = mult_cost;
3216 limit.latency = mult_cost;
3217 synth_mult (alg, val, &limit, mode);
3218
3219 /* This works only if the inverted value actually fits in an
3220 `unsigned int' */
3221 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3222 {
3223 op_cost = neg_cost (speed, mode);
3224 if (MULT_COST_LESS (&alg->cost, mult_cost))
3225 {
3226 limit.cost = alg->cost.cost - op_cost;
3227 limit.latency = alg->cost.latency - op_cost;
3228 }
3229 else
3230 {
3231 limit.cost = mult_cost - op_cost;
3232 limit.latency = mult_cost - op_cost;
3233 }
3234
3235 synth_mult (&alg2, -val, &limit, mode);
3236 alg2.cost.cost += op_cost;
3237 alg2.cost.latency += op_cost;
3238 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3239 *alg = alg2, *variant = negate_variant;
3240 }
3241
3242 /* This proves very useful for division-by-constant. */
3243 op_cost = add_cost (speed, mode);
3244 if (MULT_COST_LESS (&alg->cost, mult_cost))
3245 {
3246 limit.cost = alg->cost.cost - op_cost;
3247 limit.latency = alg->cost.latency - op_cost;
3248 }
3249 else
3250 {
3251 limit.cost = mult_cost - op_cost;
3252 limit.latency = mult_cost - op_cost;
3253 }
3254
3255 synth_mult (&alg2, val - 1, &limit, mode);
3256 alg2.cost.cost += op_cost;
3257 alg2.cost.latency += op_cost;
3258 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3259 *alg = alg2, *variant = add_variant;
3260
3261 return MULT_COST_LESS (&alg->cost, mult_cost);
3262 }
3263
3264 /* A subroutine of expand_mult, used for constant multiplications.
3265 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3266 convenient. Use the shift/add sequence described by ALG and apply
3267 the final fixup specified by VARIANT. */
3268
3269 static rtx
expand_mult_const(machine_mode mode,rtx op0,HOST_WIDE_INT val,rtx target,const struct algorithm * alg,enum mult_variant variant)3270 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3271 rtx target, const struct algorithm *alg,
3272 enum mult_variant variant)
3273 {
3274 unsigned HOST_WIDE_INT val_so_far;
3275 rtx_insn *insn;
3276 rtx accum, tem;
3277 int opno;
3278 machine_mode nmode;
3279
3280 /* Avoid referencing memory over and over and invalid sharing
3281 on SUBREGs. */
3282 op0 = force_reg (mode, op0);
3283
3284 /* ACCUM starts out either as OP0 or as a zero, depending on
3285 the first operation. */
3286
3287 if (alg->op[0] == alg_zero)
3288 {
3289 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3290 val_so_far = 0;
3291 }
3292 else if (alg->op[0] == alg_m)
3293 {
3294 accum = copy_to_mode_reg (mode, op0);
3295 val_so_far = 1;
3296 }
3297 else
3298 gcc_unreachable ();
3299
3300 for (opno = 1; opno < alg->ops; opno++)
3301 {
3302 int log = alg->log[opno];
3303 rtx shift_subtarget = optimize ? 0 : accum;
3304 rtx add_target
3305 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3306 && !optimize)
3307 ? target : 0;
3308 rtx accum_target = optimize ? 0 : accum;
3309 rtx accum_inner;
3310
3311 switch (alg->op[opno])
3312 {
3313 case alg_shift:
3314 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3315 /* REG_EQUAL note will be attached to the following insn. */
3316 emit_move_insn (accum, tem);
3317 val_so_far <<= log;
3318 break;
3319
3320 case alg_add_t_m2:
3321 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3322 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3323 add_target ? add_target : accum_target);
3324 val_so_far += HOST_WIDE_INT_1U << log;
3325 break;
3326
3327 case alg_sub_t_m2:
3328 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3329 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3330 add_target ? add_target : accum_target);
3331 val_so_far -= HOST_WIDE_INT_1U << log;
3332 break;
3333
3334 case alg_add_t2_m:
3335 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3336 log, shift_subtarget, 0);
3337 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3338 add_target ? add_target : accum_target);
3339 val_so_far = (val_so_far << log) + 1;
3340 break;
3341
3342 case alg_sub_t2_m:
3343 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3344 log, shift_subtarget, 0);
3345 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3346 add_target ? add_target : accum_target);
3347 val_so_far = (val_so_far << log) - 1;
3348 break;
3349
3350 case alg_add_factor:
3351 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3352 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3353 add_target ? add_target : accum_target);
3354 val_so_far += val_so_far << log;
3355 break;
3356
3357 case alg_sub_factor:
3358 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3359 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3360 (add_target
3361 ? add_target : (optimize ? 0 : tem)));
3362 val_so_far = (val_so_far << log) - val_so_far;
3363 break;
3364
3365 default:
3366 gcc_unreachable ();
3367 }
3368
3369 if (SCALAR_INT_MODE_P (mode))
3370 {
3371 /* Write a REG_EQUAL note on the last insn so that we can cse
3372 multiplication sequences. Note that if ACCUM is a SUBREG,
3373 we've set the inner register and must properly indicate that. */
3374 tem = op0, nmode = mode;
3375 accum_inner = accum;
3376 if (GET_CODE (accum) == SUBREG)
3377 {
3378 accum_inner = SUBREG_REG (accum);
3379 nmode = GET_MODE (accum_inner);
3380 tem = gen_lowpart (nmode, op0);
3381 }
3382
3383 /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3384 In that case, only the low bits of accum would be guaranteed to
3385 be equal to the content of the REG_EQUAL note, the upper bits
3386 can be anything. */
3387 if (!paradoxical_subreg_p (tem))
3388 {
3389 insn = get_last_insn ();
3390 wide_int wval_so_far
3391 = wi::uhwi (val_so_far,
3392 GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3393 rtx c = immed_wide_int_const (wval_so_far, nmode);
3394 set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3395 accum_inner);
3396 }
3397 }
3398 }
3399
3400 if (variant == negate_variant)
3401 {
3402 val_so_far = -val_so_far;
3403 accum = expand_unop (mode, neg_optab, accum, target, 0);
3404 }
3405 else if (variant == add_variant)
3406 {
3407 val_so_far = val_so_far + 1;
3408 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3409 }
3410
3411 /* Compare only the bits of val and val_so_far that are significant
3412 in the result mode, to avoid sign-/zero-extension confusion. */
3413 nmode = GET_MODE_INNER (mode);
3414 val &= GET_MODE_MASK (nmode);
3415 val_so_far &= GET_MODE_MASK (nmode);
3416 gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3417
3418 return accum;
3419 }
3420
3421 /* Perform a multiplication and return an rtx for the result.
3422 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3423 TARGET is a suggestion for where to store the result (an rtx).
3424
3425 We check specially for a constant integer as OP1.
3426 If you want this check for OP0 as well, then before calling
3427 you should swap the two operands if OP0 would be constant. */
3428
3429 rtx
expand_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,bool no_libcall)3430 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3431 int unsignedp, bool no_libcall)
3432 {
3433 enum mult_variant variant;
3434 struct algorithm algorithm;
3435 rtx scalar_op1;
3436 int max_cost;
3437 bool speed = optimize_insn_for_speed_p ();
3438 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3439
3440 if (CONSTANT_P (op0))
3441 std::swap (op0, op1);
3442
3443 /* For vectors, there are several simplifications that can be made if
3444 all elements of the vector constant are identical. */
3445 scalar_op1 = unwrap_const_vec_duplicate (op1);
3446
3447 if (INTEGRAL_MODE_P (mode))
3448 {
3449 rtx fake_reg;
3450 HOST_WIDE_INT coeff;
3451 bool is_neg;
3452 int mode_bitsize;
3453
3454 if (op1 == CONST0_RTX (mode))
3455 return op1;
3456 if (op1 == CONST1_RTX (mode))
3457 return op0;
3458 if (op1 == CONSTM1_RTX (mode))
3459 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3460 op0, target, 0);
3461
3462 if (do_trapv)
3463 goto skip_synth;
3464
3465 /* If mode is integer vector mode, check if the backend supports
3466 vector lshift (by scalar or vector) at all. If not, we can't use
3467 synthetized multiply. */
3468 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3469 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3470 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3471 goto skip_synth;
3472
3473 /* These are the operations that are potentially turned into
3474 a sequence of shifts and additions. */
3475 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3476
3477 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3478 less than or equal in size to `unsigned int' this doesn't matter.
3479 If the mode is larger than `unsigned int', then synth_mult works
3480 only if the constant value exactly fits in an `unsigned int' without
3481 any truncation. This means that multiplying by negative values does
3482 not work; results are off by 2^32 on a 32 bit machine. */
3483 if (CONST_INT_P (scalar_op1))
3484 {
3485 coeff = INTVAL (scalar_op1);
3486 is_neg = coeff < 0;
3487 }
3488 #if TARGET_SUPPORTS_WIDE_INT
3489 else if (CONST_WIDE_INT_P (scalar_op1))
3490 #else
3491 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3492 #endif
3493 {
3494 int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3495 /* Perfect power of 2 (other than 1, which is handled above). */
3496 if (shift > 0)
3497 return expand_shift (LSHIFT_EXPR, mode, op0,
3498 shift, target, unsignedp);
3499 else
3500 goto skip_synth;
3501 }
3502 else
3503 goto skip_synth;
3504
3505 /* We used to test optimize here, on the grounds that it's better to
3506 produce a smaller program when -O is not used. But this causes
3507 such a terrible slowdown sometimes that it seems better to always
3508 use synth_mult. */
3509
3510 /* Special case powers of two. */
3511 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3512 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3513 return expand_shift (LSHIFT_EXPR, mode, op0,
3514 floor_log2 (coeff), target, unsignedp);
3515
3516 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3517
3518 /* Attempt to handle multiplication of DImode values by negative
3519 coefficients, by performing the multiplication by a positive
3520 multiplier and then inverting the result. */
3521 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3522 {
3523 /* Its safe to use -coeff even for INT_MIN, as the
3524 result is interpreted as an unsigned coefficient.
3525 Exclude cost of op0 from max_cost to match the cost
3526 calculation of the synth_mult. */
3527 coeff = -(unsigned HOST_WIDE_INT) coeff;
3528 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3529 mode, speed)
3530 - neg_cost (speed, mode));
3531 if (max_cost <= 0)
3532 goto skip_synth;
3533
3534 /* Special case powers of two. */
3535 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3536 {
3537 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3538 floor_log2 (coeff), target, unsignedp);
3539 return expand_unop (mode, neg_optab, temp, target, 0);
3540 }
3541
3542 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3543 max_cost))
3544 {
3545 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3546 &algorithm, variant);
3547 return expand_unop (mode, neg_optab, temp, target, 0);
3548 }
3549 goto skip_synth;
3550 }
3551
3552 /* Exclude cost of op0 from max_cost to match the cost
3553 calculation of the synth_mult. */
3554 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3555 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3556 return expand_mult_const (mode, op0, coeff, target,
3557 &algorithm, variant);
3558 }
3559 skip_synth:
3560
3561 /* Expand x*2.0 as x+x. */
3562 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3563 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3564 {
3565 op0 = force_reg (GET_MODE (op0), op0);
3566 return expand_binop (mode, add_optab, op0, op0,
3567 target, unsignedp,
3568 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3569 }
3570
3571 /* This used to use umul_optab if unsigned, but for non-widening multiply
3572 there is no difference between signed and unsigned. */
3573 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3574 op0, op1, target, unsignedp,
3575 no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3576 gcc_assert (op0 || no_libcall);
3577 return op0;
3578 }
3579
3580 /* Return a cost estimate for multiplying a register by the given
3581 COEFFicient in the given MODE and SPEED. */
3582
3583 int
mult_by_coeff_cost(HOST_WIDE_INT coeff,machine_mode mode,bool speed)3584 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3585 {
3586 int max_cost;
3587 struct algorithm algorithm;
3588 enum mult_variant variant;
3589
3590 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3591 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3592 mode, speed);
3593 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3594 return algorithm.cost.cost;
3595 else
3596 return max_cost;
3597 }
3598
3599 /* Perform a widening multiplication and return an rtx for the result.
3600 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3601 TARGET is a suggestion for where to store the result (an rtx).
3602 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3603 or smul_widen_optab.
3604
3605 We check specially for a constant integer as OP1, comparing the
3606 cost of a widening multiply against the cost of a sequence of shifts
3607 and adds. */
3608
3609 rtx
expand_widening_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,optab this_optab)3610 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3611 int unsignedp, optab this_optab)
3612 {
3613 bool speed = optimize_insn_for_speed_p ();
3614 rtx cop1;
3615
3616 if (CONST_INT_P (op1)
3617 && GET_MODE (op0) != VOIDmode
3618 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3619 this_optab == umul_widen_optab))
3620 && CONST_INT_P (cop1)
3621 && (INTVAL (cop1) >= 0
3622 || HWI_COMPUTABLE_MODE_P (mode)))
3623 {
3624 HOST_WIDE_INT coeff = INTVAL (cop1);
3625 int max_cost;
3626 enum mult_variant variant;
3627 struct algorithm algorithm;
3628
3629 if (coeff == 0)
3630 return CONST0_RTX (mode);
3631
3632 /* Special case powers of two. */
3633 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3634 {
3635 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3636 return expand_shift (LSHIFT_EXPR, mode, op0,
3637 floor_log2 (coeff), target, unsignedp);
3638 }
3639
3640 /* Exclude cost of op0 from max_cost to match the cost
3641 calculation of the synth_mult. */
3642 max_cost = mul_widen_cost (speed, mode);
3643 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3644 max_cost))
3645 {
3646 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3647 return expand_mult_const (mode, op0, coeff, target,
3648 &algorithm, variant);
3649 }
3650 }
3651 return expand_binop (mode, this_optab, op0, op1, target,
3652 unsignedp, OPTAB_LIB_WIDEN);
3653 }
3654
3655 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3656 replace division by D, and put the least significant N bits of the result
3657 in *MULTIPLIER_PTR and return the most significant bit.
3658
3659 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3660 needed precision is in PRECISION (should be <= N).
3661
3662 PRECISION should be as small as possible so this function can choose
3663 multiplier more freely.
3664
3665 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3666 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3667
3668 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3669 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3670
3671 unsigned HOST_WIDE_INT
choose_multiplier(unsigned HOST_WIDE_INT d,int n,int precision,unsigned HOST_WIDE_INT * multiplier_ptr,int * post_shift_ptr,int * lgup_ptr)3672 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3673 unsigned HOST_WIDE_INT *multiplier_ptr,
3674 int *post_shift_ptr, int *lgup_ptr)
3675 {
3676 int lgup, post_shift;
3677 int pow, pow2;
3678
3679 /* lgup = ceil(log2(divisor)); */
3680 lgup = ceil_log2 (d);
3681
3682 gcc_assert (lgup <= n);
3683
3684 pow = n + lgup;
3685 pow2 = n + lgup - precision;
3686
3687 /* mlow = 2^(N + lgup)/d */
3688 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3689 wide_int mlow = wi::udiv_trunc (val, d);
3690
3691 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3692 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3693 wide_int mhigh = wi::udiv_trunc (val, d);
3694
3695 /* If precision == N, then mlow, mhigh exceed 2^N
3696 (but they do not exceed 2^(N+1)). */
3697
3698 /* Reduce to lowest terms. */
3699 for (post_shift = lgup; post_shift > 0; post_shift--)
3700 {
3701 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3702 HOST_BITS_PER_WIDE_INT);
3703 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3704 HOST_BITS_PER_WIDE_INT);
3705 if (ml_lo >= mh_lo)
3706 break;
3707
3708 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3709 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3710 }
3711
3712 *post_shift_ptr = post_shift;
3713 *lgup_ptr = lgup;
3714 if (n < HOST_BITS_PER_WIDE_INT)
3715 {
3716 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3717 *multiplier_ptr = mhigh.to_uhwi () & mask;
3718 return mhigh.to_uhwi () >= mask;
3719 }
3720 else
3721 {
3722 *multiplier_ptr = mhigh.to_uhwi ();
3723 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3724 }
3725 }
3726
3727 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3728 congruent to 1 (mod 2**N). */
3729
3730 static unsigned HOST_WIDE_INT
invert_mod2n(unsigned HOST_WIDE_INT x,int n)3731 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3732 {
3733 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3734
3735 /* The algorithm notes that the choice y = x satisfies
3736 x*y == 1 mod 2^3, since x is assumed odd.
3737 Each iteration doubles the number of bits of significance in y. */
3738
3739 unsigned HOST_WIDE_INT mask;
3740 unsigned HOST_WIDE_INT y = x;
3741 int nbit = 3;
3742
3743 mask = (n == HOST_BITS_PER_WIDE_INT
3744 ? HOST_WIDE_INT_M1U
3745 : (HOST_WIDE_INT_1U << n) - 1);
3746
3747 while (nbit < n)
3748 {
3749 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3750 nbit *= 2;
3751 }
3752 return y;
3753 }
3754
3755 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3756 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3757 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3758 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3759 become signed.
3760
3761 The result is put in TARGET if that is convenient.
3762
3763 MODE is the mode of operation. */
3764
3765 rtx
expand_mult_highpart_adjust(scalar_int_mode mode,rtx adj_operand,rtx op0,rtx op1,rtx target,int unsignedp)3766 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3767 rtx op1, rtx target, int unsignedp)
3768 {
3769 rtx tem;
3770 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3771
3772 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3773 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3774 tem = expand_and (mode, tem, op1, NULL_RTX);
3775 adj_operand
3776 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3777 adj_operand);
3778
3779 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3780 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3781 tem = expand_and (mode, tem, op0, NULL_RTX);
3782 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3783 target);
3784
3785 return target;
3786 }
3787
3788 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3789
3790 static rtx
extract_high_half(scalar_int_mode mode,rtx op)3791 extract_high_half (scalar_int_mode mode, rtx op)
3792 {
3793 if (mode == word_mode)
3794 return gen_highpart (mode, op);
3795
3796 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3797
3798 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3799 GET_MODE_BITSIZE (mode), 0, 1);
3800 return convert_modes (mode, wider_mode, op, 0);
3801 }
3802
3803 /* Like expmed_mult_highpart, but only consider using a multiplication
3804 optab. OP1 is an rtx for the constant operand. */
3805
3806 static rtx
expmed_mult_highpart_optab(scalar_int_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3807 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3808 rtx target, int unsignedp, int max_cost)
3809 {
3810 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3811 optab moptab;
3812 rtx tem;
3813 int size;
3814 bool speed = optimize_insn_for_speed_p ();
3815
3816 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3817
3818 size = GET_MODE_BITSIZE (mode);
3819
3820 /* Firstly, try using a multiplication insn that only generates the needed
3821 high part of the product, and in the sign flavor of unsignedp. */
3822 if (mul_highpart_cost (speed, mode) < max_cost)
3823 {
3824 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3825 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3826 unsignedp, OPTAB_DIRECT);
3827 if (tem)
3828 return tem;
3829 }
3830
3831 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3832 Need to adjust the result after the multiplication. */
3833 if (size - 1 < BITS_PER_WORD
3834 && (mul_highpart_cost (speed, mode)
3835 + 2 * shift_cost (speed, mode, size-1)
3836 + 4 * add_cost (speed, mode) < max_cost))
3837 {
3838 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3839 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3840 unsignedp, OPTAB_DIRECT);
3841 if (tem)
3842 /* We used the wrong signedness. Adjust the result. */
3843 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3844 tem, unsignedp);
3845 }
3846
3847 /* Try widening multiplication. */
3848 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3849 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3850 && mul_widen_cost (speed, wider_mode) < max_cost)
3851 {
3852 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3853 unsignedp, OPTAB_WIDEN);
3854 if (tem)
3855 return extract_high_half (mode, tem);
3856 }
3857
3858 /* Try widening the mode and perform a non-widening multiplication. */
3859 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3860 && size - 1 < BITS_PER_WORD
3861 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3862 < max_cost))
3863 {
3864 rtx_insn *insns;
3865 rtx wop0, wop1;
3866
3867 /* We need to widen the operands, for example to ensure the
3868 constant multiplier is correctly sign or zero extended.
3869 Use a sequence to clean-up any instructions emitted by
3870 the conversions if things don't work out. */
3871 start_sequence ();
3872 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3873 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3874 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3875 unsignedp, OPTAB_WIDEN);
3876 insns = get_insns ();
3877 end_sequence ();
3878
3879 if (tem)
3880 {
3881 emit_insn (insns);
3882 return extract_high_half (mode, tem);
3883 }
3884 }
3885
3886 /* Try widening multiplication of opposite signedness, and adjust. */
3887 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3888 if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3889 && size - 1 < BITS_PER_WORD
3890 && (mul_widen_cost (speed, wider_mode)
3891 + 2 * shift_cost (speed, mode, size-1)
3892 + 4 * add_cost (speed, mode) < max_cost))
3893 {
3894 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3895 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3896 if (tem != 0)
3897 {
3898 tem = extract_high_half (mode, tem);
3899 /* We used the wrong signedness. Adjust the result. */
3900 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3901 target, unsignedp);
3902 }
3903 }
3904
3905 return 0;
3906 }
3907
3908 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3909 putting the high half of the result in TARGET if that is convenient,
3910 and return where the result is. If the operation can not be performed,
3911 0 is returned.
3912
3913 MODE is the mode of operation and result.
3914
3915 UNSIGNEDP nonzero means unsigned multiply.
3916
3917 MAX_COST is the total allowed cost for the expanded RTL. */
3918
3919 static rtx
expmed_mult_highpart(scalar_int_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3920 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3921 rtx target, int unsignedp, int max_cost)
3922 {
3923 unsigned HOST_WIDE_INT cnst1;
3924 int extra_cost;
3925 bool sign_adjust = false;
3926 enum mult_variant variant;
3927 struct algorithm alg;
3928 rtx tem;
3929 bool speed = optimize_insn_for_speed_p ();
3930
3931 /* We can't support modes wider than HOST_BITS_PER_INT. */
3932 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3933
3934 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3935
3936 /* We can't optimize modes wider than BITS_PER_WORD.
3937 ??? We might be able to perform double-word arithmetic if
3938 mode == word_mode, however all the cost calculations in
3939 synth_mult etc. assume single-word operations. */
3940 scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3941 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3942 return expmed_mult_highpart_optab (mode, op0, op1, target,
3943 unsignedp, max_cost);
3944
3945 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3946
3947 /* Check whether we try to multiply by a negative constant. */
3948 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3949 {
3950 sign_adjust = true;
3951 extra_cost += add_cost (speed, mode);
3952 }
3953
3954 /* See whether shift/add multiplication is cheap enough. */
3955 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3956 max_cost - extra_cost))
3957 {
3958 /* See whether the specialized multiplication optabs are
3959 cheaper than the shift/add version. */
3960 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3961 alg.cost.cost + extra_cost);
3962 if (tem)
3963 return tem;
3964
3965 tem = convert_to_mode (wider_mode, op0, unsignedp);
3966 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3967 tem = extract_high_half (mode, tem);
3968
3969 /* Adjust result for signedness. */
3970 if (sign_adjust)
3971 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3972
3973 return tem;
3974 }
3975 return expmed_mult_highpart_optab (mode, op0, op1, target,
3976 unsignedp, max_cost);
3977 }
3978
3979
3980 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3981
3982 static rtx
expand_smod_pow2(scalar_int_mode mode,rtx op0,HOST_WIDE_INT d)3983 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3984 {
3985 rtx result, temp, shift;
3986 rtx_code_label *label;
3987 int logd;
3988 int prec = GET_MODE_PRECISION (mode);
3989
3990 logd = floor_log2 (d);
3991 result = gen_reg_rtx (mode);
3992
3993 /* Avoid conditional branches when they're expensive. */
3994 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3995 && optimize_insn_for_speed_p ())
3996 {
3997 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3998 mode, 0, -1);
3999 if (signmask)
4000 {
4001 HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4002 signmask = force_reg (mode, signmask);
4003 shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4004
4005 /* Use the rtx_cost of a LSHIFTRT instruction to determine
4006 which instruction sequence to use. If logical right shifts
4007 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4008 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
4009
4010 temp = gen_rtx_LSHIFTRT (mode, result, shift);
4011 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4012 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4013 > COSTS_N_INSNS (2)))
4014 {
4015 temp = expand_binop (mode, xor_optab, op0, signmask,
4016 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4017 temp = expand_binop (mode, sub_optab, temp, signmask,
4018 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4019 temp = expand_binop (mode, and_optab, temp,
4020 gen_int_mode (masklow, mode),
4021 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4022 temp = expand_binop (mode, xor_optab, temp, signmask,
4023 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4024 temp = expand_binop (mode, sub_optab, temp, signmask,
4025 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4026 }
4027 else
4028 {
4029 signmask = expand_binop (mode, lshr_optab, signmask, shift,
4030 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4031 signmask = force_reg (mode, signmask);
4032
4033 temp = expand_binop (mode, add_optab, op0, signmask,
4034 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4035 temp = expand_binop (mode, and_optab, temp,
4036 gen_int_mode (masklow, mode),
4037 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4038 temp = expand_binop (mode, sub_optab, temp, signmask,
4039 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4040 }
4041 return temp;
4042 }
4043 }
4044
4045 /* Mask contains the mode's signbit and the significant bits of the
4046 modulus. By including the signbit in the operation, many targets
4047 can avoid an explicit compare operation in the following comparison
4048 against zero. */
4049 wide_int mask = wi::mask (logd, false, prec);
4050 mask = wi::set_bit (mask, prec - 1);
4051
4052 temp = expand_binop (mode, and_optab, op0,
4053 immed_wide_int_const (mask, mode),
4054 result, 1, OPTAB_LIB_WIDEN);
4055 if (temp != result)
4056 emit_move_insn (result, temp);
4057
4058 label = gen_label_rtx ();
4059 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4060
4061 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4062 0, OPTAB_LIB_WIDEN);
4063
4064 mask = wi::mask (logd, true, prec);
4065 temp = expand_binop (mode, ior_optab, temp,
4066 immed_wide_int_const (mask, mode),
4067 result, 1, OPTAB_LIB_WIDEN);
4068 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4069 0, OPTAB_LIB_WIDEN);
4070 if (temp != result)
4071 emit_move_insn (result, temp);
4072 emit_label (label);
4073 return result;
4074 }
4075
4076 /* Expand signed division of OP0 by a power of two D in mode MODE.
4077 This routine is only called for positive values of D. */
4078
4079 static rtx
expand_sdiv_pow2(scalar_int_mode mode,rtx op0,HOST_WIDE_INT d)4080 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4081 {
4082 rtx temp;
4083 rtx_code_label *label;
4084 int logd;
4085
4086 logd = floor_log2 (d);
4087
4088 if (d == 2
4089 && BRANCH_COST (optimize_insn_for_speed_p (),
4090 false) >= 1)
4091 {
4092 temp = gen_reg_rtx (mode);
4093 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4094 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4095 0, OPTAB_LIB_WIDEN);
4096 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4097 }
4098
4099 if (HAVE_conditional_move
4100 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4101 {
4102 rtx temp2;
4103
4104 start_sequence ();
4105 temp2 = copy_to_mode_reg (mode, op0);
4106 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4107 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4108 temp = force_reg (mode, temp);
4109
4110 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
4111 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4112 mode, temp, temp2, mode, 0);
4113 if (temp2)
4114 {
4115 rtx_insn *seq = get_insns ();
4116 end_sequence ();
4117 emit_insn (seq);
4118 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4119 }
4120 end_sequence ();
4121 }
4122
4123 if (BRANCH_COST (optimize_insn_for_speed_p (),
4124 false) >= 2)
4125 {
4126 int ushift = GET_MODE_BITSIZE (mode) - logd;
4127
4128 temp = gen_reg_rtx (mode);
4129 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4130 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4131 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4132 > COSTS_N_INSNS (1))
4133 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4134 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4135 else
4136 temp = expand_shift (RSHIFT_EXPR, mode, temp,
4137 ushift, NULL_RTX, 1);
4138 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4139 0, OPTAB_LIB_WIDEN);
4140 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4141 }
4142
4143 label = gen_label_rtx ();
4144 temp = copy_to_mode_reg (mode, op0);
4145 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4146 expand_inc (temp, gen_int_mode (d - 1, mode));
4147 emit_label (label);
4148 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4149 }
4150
4151 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4152 if that is convenient, and returning where the result is.
4153 You may request either the quotient or the remainder as the result;
4154 specify REM_FLAG nonzero to get the remainder.
4155
4156 CODE is the expression code for which kind of division this is;
4157 it controls how rounding is done. MODE is the machine mode to use.
4158 UNSIGNEDP nonzero means do unsigned division. */
4159
4160 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4161 and then correct it by or'ing in missing high bits
4162 if result of ANDI is nonzero.
4163 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4164 This could optimize to a bfexts instruction.
4165 But C doesn't use these operations, so their optimizations are
4166 left for later. */
4167 /* ??? For modulo, we don't actually need the highpart of the first product,
4168 the low part will do nicely. And for small divisors, the second multiply
4169 can also be a low-part only multiply or even be completely left out.
4170 E.g. to calculate the remainder of a division by 3 with a 32 bit
4171 multiply, multiply with 0x55555556 and extract the upper two bits;
4172 the result is exact for inputs up to 0x1fffffff.
4173 The input range can be reduced by using cross-sum rules.
4174 For odd divisors >= 3, the following table gives right shift counts
4175 so that if a number is shifted by an integer multiple of the given
4176 amount, the remainder stays the same:
4177 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4178 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4179 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4180 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4181 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4182
4183 Cross-sum rules for even numbers can be derived by leaving as many bits
4184 to the right alone as the divisor has zeros to the right.
4185 E.g. if x is an unsigned 32 bit number:
4186 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4187 */
4188
4189 rtx
expand_divmod(int rem_flag,enum tree_code code,machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)4190 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4191 rtx op0, rtx op1, rtx target, int unsignedp)
4192 {
4193 machine_mode compute_mode;
4194 rtx tquotient;
4195 rtx quotient = 0, remainder = 0;
4196 rtx_insn *last;
4197 rtx_insn *insn;
4198 optab optab1, optab2;
4199 int op1_is_constant, op1_is_pow2 = 0;
4200 int max_cost, extra_cost;
4201 static HOST_WIDE_INT last_div_const = 0;
4202 bool speed = optimize_insn_for_speed_p ();
4203
4204 op1_is_constant = CONST_INT_P (op1);
4205 if (op1_is_constant)
4206 {
4207 wide_int ext_op1 = rtx_mode_t (op1, mode);
4208 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4209 || (! unsignedp
4210 && wi::popcount (wi::neg (ext_op1)) == 1));
4211 }
4212
4213 /*
4214 This is the structure of expand_divmod:
4215
4216 First comes code to fix up the operands so we can perform the operations
4217 correctly and efficiently.
4218
4219 Second comes a switch statement with code specific for each rounding mode.
4220 For some special operands this code emits all RTL for the desired
4221 operation, for other cases, it generates only a quotient and stores it in
4222 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4223 to indicate that it has not done anything.
4224
4225 Last comes code that finishes the operation. If QUOTIENT is set and
4226 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4227 QUOTIENT is not set, it is computed using trunc rounding.
4228
4229 We try to generate special code for division and remainder when OP1 is a
4230 constant. If |OP1| = 2**n we can use shifts and some other fast
4231 operations. For other values of OP1, we compute a carefully selected
4232 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4233 by m.
4234
4235 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4236 half of the product. Different strategies for generating the product are
4237 implemented in expmed_mult_highpart.
4238
4239 If what we actually want is the remainder, we generate that by another
4240 by-constant multiplication and a subtraction. */
4241
4242 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4243 code below will malfunction if we are, so check here and handle
4244 the special case if so. */
4245 if (op1 == const1_rtx)
4246 return rem_flag ? const0_rtx : op0;
4247
4248 /* When dividing by -1, we could get an overflow.
4249 negv_optab can handle overflows. */
4250 if (! unsignedp && op1 == constm1_rtx)
4251 {
4252 if (rem_flag)
4253 return const0_rtx;
4254 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4255 ? negv_optab : neg_optab, op0, target, 0);
4256 }
4257
4258 if (target
4259 /* Don't use the function value register as a target
4260 since we have to read it as well as write it,
4261 and function-inlining gets confused by this. */
4262 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4263 /* Don't clobber an operand while doing a multi-step calculation. */
4264 || ((rem_flag || op1_is_constant)
4265 && (reg_mentioned_p (target, op0)
4266 || (MEM_P (op0) && MEM_P (target))))
4267 || reg_mentioned_p (target, op1)
4268 || (MEM_P (op1) && MEM_P (target))))
4269 target = 0;
4270
4271 /* Get the mode in which to perform this computation. Normally it will
4272 be MODE, but sometimes we can't do the desired operation in MODE.
4273 If so, pick a wider mode in which we can do the operation. Convert
4274 to that mode at the start to avoid repeated conversions.
4275
4276 First see what operations we need. These depend on the expression
4277 we are evaluating. (We assume that divxx3 insns exist under the
4278 same conditions that modxx3 insns and that these insns don't normally
4279 fail. If these assumptions are not correct, we may generate less
4280 efficient code in some cases.)
4281
4282 Then see if we find a mode in which we can open-code that operation
4283 (either a division, modulus, or shift). Finally, check for the smallest
4284 mode for which we can do the operation with a library call. */
4285
4286 /* We might want to refine this now that we have division-by-constant
4287 optimization. Since expmed_mult_highpart tries so many variants, it is
4288 not straightforward to generalize this. Maybe we should make an array
4289 of possible modes in init_expmed? Save this for GCC 2.7. */
4290
4291 optab1 = (op1_is_pow2
4292 ? (unsignedp ? lshr_optab : ashr_optab)
4293 : (unsignedp ? udiv_optab : sdiv_optab));
4294 optab2 = (op1_is_pow2 ? optab1
4295 : (unsignedp ? udivmod_optab : sdivmod_optab));
4296
4297 FOR_EACH_MODE_FROM (compute_mode, mode)
4298 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4299 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4300 break;
4301
4302 if (compute_mode == VOIDmode)
4303 FOR_EACH_MODE_FROM (compute_mode, mode)
4304 if (optab_libfunc (optab1, compute_mode)
4305 || optab_libfunc (optab2, compute_mode))
4306 break;
4307
4308 /* If we still couldn't find a mode, use MODE, but expand_binop will
4309 probably die. */
4310 if (compute_mode == VOIDmode)
4311 compute_mode = mode;
4312
4313 if (target && GET_MODE (target) == compute_mode)
4314 tquotient = target;
4315 else
4316 tquotient = gen_reg_rtx (compute_mode);
4317
4318 #if 0
4319 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4320 (mode), and thereby get better code when OP1 is a constant. Do that
4321 later. It will require going over all usages of SIZE below. */
4322 size = GET_MODE_BITSIZE (mode);
4323 #endif
4324
4325 /* Only deduct something for a REM if the last divide done was
4326 for a different constant. Then set the constant of the last
4327 divide. */
4328 max_cost = (unsignedp
4329 ? udiv_cost (speed, compute_mode)
4330 : sdiv_cost (speed, compute_mode));
4331 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4332 && INTVAL (op1) == last_div_const))
4333 max_cost -= (mul_cost (speed, compute_mode)
4334 + add_cost (speed, compute_mode));
4335
4336 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4337
4338 /* Now convert to the best mode to use. */
4339 if (compute_mode != mode)
4340 {
4341 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4342 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4343
4344 /* convert_modes may have placed op1 into a register, so we
4345 must recompute the following. */
4346 op1_is_constant = CONST_INT_P (op1);
4347 if (op1_is_constant)
4348 {
4349 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4350 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4351 || (! unsignedp
4352 && wi::popcount (wi::neg (ext_op1)) == 1));
4353 }
4354 else
4355 op1_is_pow2 = 0;
4356 }
4357
4358 /* If one of the operands is a volatile MEM, copy it into a register. */
4359
4360 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4361 op0 = force_reg (compute_mode, op0);
4362 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4363 op1 = force_reg (compute_mode, op1);
4364
4365 /* If we need the remainder or if OP1 is constant, we need to
4366 put OP0 in a register in case it has any queued subexpressions. */
4367 if (rem_flag || op1_is_constant)
4368 op0 = force_reg (compute_mode, op0);
4369
4370 last = get_last_insn ();
4371
4372 /* Promote floor rounding to trunc rounding for unsigned operations. */
4373 if (unsignedp)
4374 {
4375 if (code == FLOOR_DIV_EXPR)
4376 code = TRUNC_DIV_EXPR;
4377 if (code == FLOOR_MOD_EXPR)
4378 code = TRUNC_MOD_EXPR;
4379 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4380 code = TRUNC_DIV_EXPR;
4381 }
4382
4383 if (op1 != const0_rtx)
4384 switch (code)
4385 {
4386 case TRUNC_MOD_EXPR:
4387 case TRUNC_DIV_EXPR:
4388 if (op1_is_constant)
4389 {
4390 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4391 int size = GET_MODE_BITSIZE (int_mode);
4392 if (unsignedp)
4393 {
4394 unsigned HOST_WIDE_INT mh, ml;
4395 int pre_shift, post_shift;
4396 int dummy;
4397 wide_int wd = rtx_mode_t (op1, int_mode);
4398 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4399
4400 if (wi::popcount (wd) == 1)
4401 {
4402 pre_shift = floor_log2 (d);
4403 if (rem_flag)
4404 {
4405 unsigned HOST_WIDE_INT mask
4406 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4407 remainder
4408 = expand_binop (int_mode, and_optab, op0,
4409 gen_int_mode (mask, int_mode),
4410 remainder, 1,
4411 OPTAB_LIB_WIDEN);
4412 if (remainder)
4413 return gen_lowpart (mode, remainder);
4414 }
4415 quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4416 pre_shift, tquotient, 1);
4417 }
4418 else if (size <= HOST_BITS_PER_WIDE_INT)
4419 {
4420 if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4421 {
4422 /* Most significant bit of divisor is set; emit an scc
4423 insn. */
4424 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4425 int_mode, 1, 1);
4426 }
4427 else
4428 {
4429 /* Find a suitable multiplier and right shift count
4430 instead of multiplying with D. */
4431
4432 mh = choose_multiplier (d, size, size,
4433 &ml, &post_shift, &dummy);
4434
4435 /* If the suggested multiplier is more than SIZE bits,
4436 we can do better for even divisors, using an
4437 initial right shift. */
4438 if (mh != 0 && (d & 1) == 0)
4439 {
4440 pre_shift = ctz_or_zero (d);
4441 mh = choose_multiplier (d >> pre_shift, size,
4442 size - pre_shift,
4443 &ml, &post_shift, &dummy);
4444 gcc_assert (!mh);
4445 }
4446 else
4447 pre_shift = 0;
4448
4449 if (mh != 0)
4450 {
4451 rtx t1, t2, t3, t4;
4452
4453 if (post_shift - 1 >= BITS_PER_WORD)
4454 goto fail1;
4455
4456 extra_cost
4457 = (shift_cost (speed, int_mode, post_shift - 1)
4458 + shift_cost (speed, int_mode, 1)
4459 + 2 * add_cost (speed, int_mode));
4460 t1 = expmed_mult_highpart
4461 (int_mode, op0, gen_int_mode (ml, int_mode),
4462 NULL_RTX, 1, max_cost - extra_cost);
4463 if (t1 == 0)
4464 goto fail1;
4465 t2 = force_operand (gen_rtx_MINUS (int_mode,
4466 op0, t1),
4467 NULL_RTX);
4468 t3 = expand_shift (RSHIFT_EXPR, int_mode,
4469 t2, 1, NULL_RTX, 1);
4470 t4 = force_operand (gen_rtx_PLUS (int_mode,
4471 t1, t3),
4472 NULL_RTX);
4473 quotient = expand_shift
4474 (RSHIFT_EXPR, int_mode, t4,
4475 post_shift - 1, tquotient, 1);
4476 }
4477 else
4478 {
4479 rtx t1, t2;
4480
4481 if (pre_shift >= BITS_PER_WORD
4482 || post_shift >= BITS_PER_WORD)
4483 goto fail1;
4484
4485 t1 = expand_shift
4486 (RSHIFT_EXPR, int_mode, op0,
4487 pre_shift, NULL_RTX, 1);
4488 extra_cost
4489 = (shift_cost (speed, int_mode, pre_shift)
4490 + shift_cost (speed, int_mode, post_shift));
4491 t2 = expmed_mult_highpart
4492 (int_mode, t1,
4493 gen_int_mode (ml, int_mode),
4494 NULL_RTX, 1, max_cost - extra_cost);
4495 if (t2 == 0)
4496 goto fail1;
4497 quotient = expand_shift
4498 (RSHIFT_EXPR, int_mode, t2,
4499 post_shift, tquotient, 1);
4500 }
4501 }
4502 }
4503 else /* Too wide mode to use tricky code */
4504 break;
4505
4506 insn = get_last_insn ();
4507 if (insn != last)
4508 set_dst_reg_note (insn, REG_EQUAL,
4509 gen_rtx_UDIV (int_mode, op0, op1),
4510 quotient);
4511 }
4512 else /* TRUNC_DIV, signed */
4513 {
4514 unsigned HOST_WIDE_INT ml;
4515 int lgup, post_shift;
4516 rtx mlr;
4517 HOST_WIDE_INT d = INTVAL (op1);
4518 unsigned HOST_WIDE_INT abs_d;
4519
4520 /* Not prepared to handle division/remainder by
4521 0xffffffffffffffff8000000000000000 etc. */
4522 if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4523 break;
4524
4525 /* Since d might be INT_MIN, we have to cast to
4526 unsigned HOST_WIDE_INT before negating to avoid
4527 undefined signed overflow. */
4528 abs_d = (d >= 0
4529 ? (unsigned HOST_WIDE_INT) d
4530 : - (unsigned HOST_WIDE_INT) d);
4531
4532 /* n rem d = n rem -d */
4533 if (rem_flag && d < 0)
4534 {
4535 d = abs_d;
4536 op1 = gen_int_mode (abs_d, int_mode);
4537 }
4538
4539 if (d == 1)
4540 quotient = op0;
4541 else if (d == -1)
4542 quotient = expand_unop (int_mode, neg_optab, op0,
4543 tquotient, 0);
4544 else if (size <= HOST_BITS_PER_WIDE_INT
4545 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4546 {
4547 /* This case is not handled correctly below. */
4548 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4549 int_mode, 1, 1);
4550 if (quotient == 0)
4551 goto fail1;
4552 }
4553 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4554 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4555 && (rem_flag
4556 ? smod_pow2_cheap (speed, int_mode)
4557 : sdiv_pow2_cheap (speed, int_mode))
4558 /* We assume that cheap metric is true if the
4559 optab has an expander for this mode. */
4560 && ((optab_handler ((rem_flag ? smod_optab
4561 : sdiv_optab),
4562 int_mode)
4563 != CODE_FOR_nothing)
4564 || (optab_handler (sdivmod_optab, int_mode)
4565 != CODE_FOR_nothing)))
4566 ;
4567 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4568 {
4569 if (rem_flag)
4570 {
4571 remainder = expand_smod_pow2 (int_mode, op0, d);
4572 if (remainder)
4573 return gen_lowpart (mode, remainder);
4574 }
4575
4576 if (sdiv_pow2_cheap (speed, int_mode)
4577 && ((optab_handler (sdiv_optab, int_mode)
4578 != CODE_FOR_nothing)
4579 || (optab_handler (sdivmod_optab, int_mode)
4580 != CODE_FOR_nothing)))
4581 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4582 int_mode, op0,
4583 gen_int_mode (abs_d,
4584 int_mode),
4585 NULL_RTX, 0);
4586 else
4587 quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4588
4589 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4590 negate the quotient. */
4591 if (d < 0)
4592 {
4593 insn = get_last_insn ();
4594 if (insn != last
4595 && abs_d < (HOST_WIDE_INT_1U
4596 << (HOST_BITS_PER_WIDE_INT - 1)))
4597 set_dst_reg_note (insn, REG_EQUAL,
4598 gen_rtx_DIV (int_mode, op0,
4599 gen_int_mode
4600 (abs_d,
4601 int_mode)),
4602 quotient);
4603
4604 quotient = expand_unop (int_mode, neg_optab,
4605 quotient, quotient, 0);
4606 }
4607 }
4608 else if (size <= HOST_BITS_PER_WIDE_INT)
4609 {
4610 choose_multiplier (abs_d, size, size - 1,
4611 &ml, &post_shift, &lgup);
4612 if (ml < HOST_WIDE_INT_1U << (size - 1))
4613 {
4614 rtx t1, t2, t3;
4615
4616 if (post_shift >= BITS_PER_WORD
4617 || size - 1 >= BITS_PER_WORD)
4618 goto fail1;
4619
4620 extra_cost = (shift_cost (speed, int_mode, post_shift)
4621 + shift_cost (speed, int_mode, size - 1)
4622 + add_cost (speed, int_mode));
4623 t1 = expmed_mult_highpart
4624 (int_mode, op0, gen_int_mode (ml, int_mode),
4625 NULL_RTX, 0, max_cost - extra_cost);
4626 if (t1 == 0)
4627 goto fail1;
4628 t2 = expand_shift
4629 (RSHIFT_EXPR, int_mode, t1,
4630 post_shift, NULL_RTX, 0);
4631 t3 = expand_shift
4632 (RSHIFT_EXPR, int_mode, op0,
4633 size - 1, NULL_RTX, 0);
4634 if (d < 0)
4635 quotient
4636 = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4637 tquotient);
4638 else
4639 quotient
4640 = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4641 tquotient);
4642 }
4643 else
4644 {
4645 rtx t1, t2, t3, t4;
4646
4647 if (post_shift >= BITS_PER_WORD
4648 || size - 1 >= BITS_PER_WORD)
4649 goto fail1;
4650
4651 ml |= HOST_WIDE_INT_M1U << (size - 1);
4652 mlr = gen_int_mode (ml, int_mode);
4653 extra_cost = (shift_cost (speed, int_mode, post_shift)
4654 + shift_cost (speed, int_mode, size - 1)
4655 + 2 * add_cost (speed, int_mode));
4656 t1 = expmed_mult_highpart (int_mode, op0, mlr,
4657 NULL_RTX, 0,
4658 max_cost - extra_cost);
4659 if (t1 == 0)
4660 goto fail1;
4661 t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4662 NULL_RTX);
4663 t3 = expand_shift
4664 (RSHIFT_EXPR, int_mode, t2,
4665 post_shift, NULL_RTX, 0);
4666 t4 = expand_shift
4667 (RSHIFT_EXPR, int_mode, op0,
4668 size - 1, NULL_RTX, 0);
4669 if (d < 0)
4670 quotient
4671 = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4672 tquotient);
4673 else
4674 quotient
4675 = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4676 tquotient);
4677 }
4678 }
4679 else /* Too wide mode to use tricky code */
4680 break;
4681
4682 insn = get_last_insn ();
4683 if (insn != last)
4684 set_dst_reg_note (insn, REG_EQUAL,
4685 gen_rtx_DIV (int_mode, op0, op1),
4686 quotient);
4687 }
4688 break;
4689 }
4690 fail1:
4691 delete_insns_since (last);
4692 break;
4693
4694 case FLOOR_DIV_EXPR:
4695 case FLOOR_MOD_EXPR:
4696 /* We will come here only for signed operations. */
4697 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4698 {
4699 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4700 int size = GET_MODE_BITSIZE (int_mode);
4701 unsigned HOST_WIDE_INT mh, ml;
4702 int pre_shift, lgup, post_shift;
4703 HOST_WIDE_INT d = INTVAL (op1);
4704
4705 if (d > 0)
4706 {
4707 /* We could just as easily deal with negative constants here,
4708 but it does not seem worth the trouble for GCC 2.6. */
4709 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4710 {
4711 pre_shift = floor_log2 (d);
4712 if (rem_flag)
4713 {
4714 unsigned HOST_WIDE_INT mask
4715 = (HOST_WIDE_INT_1U << pre_shift) - 1;
4716 remainder = expand_binop
4717 (int_mode, and_optab, op0,
4718 gen_int_mode (mask, int_mode),
4719 remainder, 0, OPTAB_LIB_WIDEN);
4720 if (remainder)
4721 return gen_lowpart (mode, remainder);
4722 }
4723 quotient = expand_shift
4724 (RSHIFT_EXPR, int_mode, op0,
4725 pre_shift, tquotient, 0);
4726 }
4727 else
4728 {
4729 rtx t1, t2, t3, t4;
4730
4731 mh = choose_multiplier (d, size, size - 1,
4732 &ml, &post_shift, &lgup);
4733 gcc_assert (!mh);
4734
4735 if (post_shift < BITS_PER_WORD
4736 && size - 1 < BITS_PER_WORD)
4737 {
4738 t1 = expand_shift
4739 (RSHIFT_EXPR, int_mode, op0,
4740 size - 1, NULL_RTX, 0);
4741 t2 = expand_binop (int_mode, xor_optab, op0, t1,
4742 NULL_RTX, 0, OPTAB_WIDEN);
4743 extra_cost = (shift_cost (speed, int_mode, post_shift)
4744 + shift_cost (speed, int_mode, size - 1)
4745 + 2 * add_cost (speed, int_mode));
4746 t3 = expmed_mult_highpart
4747 (int_mode, t2, gen_int_mode (ml, int_mode),
4748 NULL_RTX, 1, max_cost - extra_cost);
4749 if (t3 != 0)
4750 {
4751 t4 = expand_shift
4752 (RSHIFT_EXPR, int_mode, t3,
4753 post_shift, NULL_RTX, 1);
4754 quotient = expand_binop (int_mode, xor_optab,
4755 t4, t1, tquotient, 0,
4756 OPTAB_WIDEN);
4757 }
4758 }
4759 }
4760 }
4761 else
4762 {
4763 rtx nsign, t1, t2, t3, t4;
4764 t1 = force_operand (gen_rtx_PLUS (int_mode,
4765 op0, constm1_rtx), NULL_RTX);
4766 t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4767 0, OPTAB_WIDEN);
4768 nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4769 size - 1, NULL_RTX, 0);
4770 t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4771 NULL_RTX);
4772 t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4773 NULL_RTX, 0);
4774 if (t4)
4775 {
4776 rtx t5;
4777 t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4778 NULL_RTX, 0);
4779 quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4780 tquotient);
4781 }
4782 }
4783 }
4784
4785 if (quotient != 0)
4786 break;
4787 delete_insns_since (last);
4788
4789 /* Try using an instruction that produces both the quotient and
4790 remainder, using truncation. We can easily compensate the quotient
4791 or remainder to get floor rounding, once we have the remainder.
4792 Notice that we compute also the final remainder value here,
4793 and return the result right away. */
4794 if (target == 0 || GET_MODE (target) != compute_mode)
4795 target = gen_reg_rtx (compute_mode);
4796
4797 if (rem_flag)
4798 {
4799 remainder
4800 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4801 quotient = gen_reg_rtx (compute_mode);
4802 }
4803 else
4804 {
4805 quotient
4806 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4807 remainder = gen_reg_rtx (compute_mode);
4808 }
4809
4810 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4811 quotient, remainder, 0))
4812 {
4813 /* This could be computed with a branch-less sequence.
4814 Save that for later. */
4815 rtx tem;
4816 rtx_code_label *label = gen_label_rtx ();
4817 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4818 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4819 NULL_RTX, 0, OPTAB_WIDEN);
4820 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4821 expand_dec (quotient, const1_rtx);
4822 expand_inc (remainder, op1);
4823 emit_label (label);
4824 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4825 }
4826
4827 /* No luck with division elimination or divmod. Have to do it
4828 by conditionally adjusting op0 *and* the result. */
4829 {
4830 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4831 rtx adjusted_op0;
4832 rtx tem;
4833
4834 quotient = gen_reg_rtx (compute_mode);
4835 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4836 label1 = gen_label_rtx ();
4837 label2 = gen_label_rtx ();
4838 label3 = gen_label_rtx ();
4839 label4 = gen_label_rtx ();
4840 label5 = gen_label_rtx ();
4841 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4842 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4843 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4844 quotient, 0, OPTAB_LIB_WIDEN);
4845 if (tem != quotient)
4846 emit_move_insn (quotient, tem);
4847 emit_jump_insn (targetm.gen_jump (label5));
4848 emit_barrier ();
4849 emit_label (label1);
4850 expand_inc (adjusted_op0, const1_rtx);
4851 emit_jump_insn (targetm.gen_jump (label4));
4852 emit_barrier ();
4853 emit_label (label2);
4854 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4855 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4856 quotient, 0, OPTAB_LIB_WIDEN);
4857 if (tem != quotient)
4858 emit_move_insn (quotient, tem);
4859 emit_jump_insn (targetm.gen_jump (label5));
4860 emit_barrier ();
4861 emit_label (label3);
4862 expand_dec (adjusted_op0, const1_rtx);
4863 emit_label (label4);
4864 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4865 quotient, 0, OPTAB_LIB_WIDEN);
4866 if (tem != quotient)
4867 emit_move_insn (quotient, tem);
4868 expand_dec (quotient, const1_rtx);
4869 emit_label (label5);
4870 }
4871 break;
4872
4873 case CEIL_DIV_EXPR:
4874 case CEIL_MOD_EXPR:
4875 if (unsignedp)
4876 {
4877 if (op1_is_constant
4878 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4879 && (HWI_COMPUTABLE_MODE_P (compute_mode)
4880 || INTVAL (op1) >= 0))
4881 {
4882 scalar_int_mode int_mode
4883 = as_a <scalar_int_mode> (compute_mode);
4884 rtx t1, t2, t3;
4885 unsigned HOST_WIDE_INT d = INTVAL (op1);
4886 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4887 floor_log2 (d), tquotient, 1);
4888 t2 = expand_binop (int_mode, and_optab, op0,
4889 gen_int_mode (d - 1, int_mode),
4890 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4891 t3 = gen_reg_rtx (int_mode);
4892 t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4893 if (t3 == 0)
4894 {
4895 rtx_code_label *lab;
4896 lab = gen_label_rtx ();
4897 do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4898 expand_inc (t1, const1_rtx);
4899 emit_label (lab);
4900 quotient = t1;
4901 }
4902 else
4903 quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4904 tquotient);
4905 break;
4906 }
4907
4908 /* Try using an instruction that produces both the quotient and
4909 remainder, using truncation. We can easily compensate the
4910 quotient or remainder to get ceiling rounding, once we have the
4911 remainder. Notice that we compute also the final remainder
4912 value here, and return the result right away. */
4913 if (target == 0 || GET_MODE (target) != compute_mode)
4914 target = gen_reg_rtx (compute_mode);
4915
4916 if (rem_flag)
4917 {
4918 remainder = (REG_P (target)
4919 ? target : gen_reg_rtx (compute_mode));
4920 quotient = gen_reg_rtx (compute_mode);
4921 }
4922 else
4923 {
4924 quotient = (REG_P (target)
4925 ? target : gen_reg_rtx (compute_mode));
4926 remainder = gen_reg_rtx (compute_mode);
4927 }
4928
4929 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4930 remainder, 1))
4931 {
4932 /* This could be computed with a branch-less sequence.
4933 Save that for later. */
4934 rtx_code_label *label = gen_label_rtx ();
4935 do_cmp_and_jump (remainder, const0_rtx, EQ,
4936 compute_mode, label);
4937 expand_inc (quotient, const1_rtx);
4938 expand_dec (remainder, op1);
4939 emit_label (label);
4940 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4941 }
4942
4943 /* No luck with division elimination or divmod. Have to do it
4944 by conditionally adjusting op0 *and* the result. */
4945 {
4946 rtx_code_label *label1, *label2;
4947 rtx adjusted_op0, tem;
4948
4949 quotient = gen_reg_rtx (compute_mode);
4950 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4951 label1 = gen_label_rtx ();
4952 label2 = gen_label_rtx ();
4953 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4954 compute_mode, label1);
4955 emit_move_insn (quotient, const0_rtx);
4956 emit_jump_insn (targetm.gen_jump (label2));
4957 emit_barrier ();
4958 emit_label (label1);
4959 expand_dec (adjusted_op0, const1_rtx);
4960 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4961 quotient, 1, OPTAB_LIB_WIDEN);
4962 if (tem != quotient)
4963 emit_move_insn (quotient, tem);
4964 expand_inc (quotient, const1_rtx);
4965 emit_label (label2);
4966 }
4967 }
4968 else /* signed */
4969 {
4970 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4971 && INTVAL (op1) >= 0)
4972 {
4973 /* This is extremely similar to the code for the unsigned case
4974 above. For 2.7 we should merge these variants, but for
4975 2.6.1 I don't want to touch the code for unsigned since that
4976 get used in C. The signed case will only be used by other
4977 languages (Ada). */
4978
4979 rtx t1, t2, t3;
4980 unsigned HOST_WIDE_INT d = INTVAL (op1);
4981 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4982 floor_log2 (d), tquotient, 0);
4983 t2 = expand_binop (compute_mode, and_optab, op0,
4984 gen_int_mode (d - 1, compute_mode),
4985 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4986 t3 = gen_reg_rtx (compute_mode);
4987 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4988 compute_mode, 1, 1);
4989 if (t3 == 0)
4990 {
4991 rtx_code_label *lab;
4992 lab = gen_label_rtx ();
4993 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4994 expand_inc (t1, const1_rtx);
4995 emit_label (lab);
4996 quotient = t1;
4997 }
4998 else
4999 quotient = force_operand (gen_rtx_PLUS (compute_mode,
5000 t1, t3),
5001 tquotient);
5002 break;
5003 }
5004
5005 /* Try using an instruction that produces both the quotient and
5006 remainder, using truncation. We can easily compensate the
5007 quotient or remainder to get ceiling rounding, once we have the
5008 remainder. Notice that we compute also the final remainder
5009 value here, and return the result right away. */
5010 if (target == 0 || GET_MODE (target) != compute_mode)
5011 target = gen_reg_rtx (compute_mode);
5012 if (rem_flag)
5013 {
5014 remainder= (REG_P (target)
5015 ? target : gen_reg_rtx (compute_mode));
5016 quotient = gen_reg_rtx (compute_mode);
5017 }
5018 else
5019 {
5020 quotient = (REG_P (target)
5021 ? target : gen_reg_rtx (compute_mode));
5022 remainder = gen_reg_rtx (compute_mode);
5023 }
5024
5025 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5026 remainder, 0))
5027 {
5028 /* This could be computed with a branch-less sequence.
5029 Save that for later. */
5030 rtx tem;
5031 rtx_code_label *label = gen_label_rtx ();
5032 do_cmp_and_jump (remainder, const0_rtx, EQ,
5033 compute_mode, label);
5034 tem = expand_binop (compute_mode, xor_optab, op0, op1,
5035 NULL_RTX, 0, OPTAB_WIDEN);
5036 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5037 expand_inc (quotient, const1_rtx);
5038 expand_dec (remainder, op1);
5039 emit_label (label);
5040 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5041 }
5042
5043 /* No luck with division elimination or divmod. Have to do it
5044 by conditionally adjusting op0 *and* the result. */
5045 {
5046 rtx_code_label *label1, *label2, *label3, *label4, *label5;
5047 rtx adjusted_op0;
5048 rtx tem;
5049
5050 quotient = gen_reg_rtx (compute_mode);
5051 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5052 label1 = gen_label_rtx ();
5053 label2 = gen_label_rtx ();
5054 label3 = gen_label_rtx ();
5055 label4 = gen_label_rtx ();
5056 label5 = gen_label_rtx ();
5057 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5058 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5059 compute_mode, label1);
5060 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5061 quotient, 0, OPTAB_LIB_WIDEN);
5062 if (tem != quotient)
5063 emit_move_insn (quotient, tem);
5064 emit_jump_insn (targetm.gen_jump (label5));
5065 emit_barrier ();
5066 emit_label (label1);
5067 expand_dec (adjusted_op0, const1_rtx);
5068 emit_jump_insn (targetm.gen_jump (label4));
5069 emit_barrier ();
5070 emit_label (label2);
5071 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5072 compute_mode, label3);
5073 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5074 quotient, 0, OPTAB_LIB_WIDEN);
5075 if (tem != quotient)
5076 emit_move_insn (quotient, tem);
5077 emit_jump_insn (targetm.gen_jump (label5));
5078 emit_barrier ();
5079 emit_label (label3);
5080 expand_inc (adjusted_op0, const1_rtx);
5081 emit_label (label4);
5082 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5083 quotient, 0, OPTAB_LIB_WIDEN);
5084 if (tem != quotient)
5085 emit_move_insn (quotient, tem);
5086 expand_inc (quotient, const1_rtx);
5087 emit_label (label5);
5088 }
5089 }
5090 break;
5091
5092 case EXACT_DIV_EXPR:
5093 if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5094 {
5095 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5096 int size = GET_MODE_BITSIZE (int_mode);
5097 HOST_WIDE_INT d = INTVAL (op1);
5098 unsigned HOST_WIDE_INT ml;
5099 int pre_shift;
5100 rtx t1;
5101
5102 pre_shift = ctz_or_zero (d);
5103 ml = invert_mod2n (d >> pre_shift, size);
5104 t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5105 pre_shift, NULL_RTX, unsignedp);
5106 quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5107 NULL_RTX, 1);
5108
5109 insn = get_last_insn ();
5110 set_dst_reg_note (insn, REG_EQUAL,
5111 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5112 int_mode, op0, op1),
5113 quotient);
5114 }
5115 break;
5116
5117 case ROUND_DIV_EXPR:
5118 case ROUND_MOD_EXPR:
5119 if (unsignedp)
5120 {
5121 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5122 rtx tem;
5123 rtx_code_label *label;
5124 label = gen_label_rtx ();
5125 quotient = gen_reg_rtx (int_mode);
5126 remainder = gen_reg_rtx (int_mode);
5127 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5128 {
5129 rtx tem;
5130 quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5131 quotient, 1, OPTAB_LIB_WIDEN);
5132 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5133 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5134 remainder, 1, OPTAB_LIB_WIDEN);
5135 }
5136 tem = plus_constant (int_mode, op1, -1);
5137 tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5138 do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5139 expand_inc (quotient, const1_rtx);
5140 expand_dec (remainder, op1);
5141 emit_label (label);
5142 }
5143 else
5144 {
5145 scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5146 int size = GET_MODE_BITSIZE (int_mode);
5147 rtx abs_rem, abs_op1, tem, mask;
5148 rtx_code_label *label;
5149 label = gen_label_rtx ();
5150 quotient = gen_reg_rtx (int_mode);
5151 remainder = gen_reg_rtx (int_mode);
5152 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5153 {
5154 rtx tem;
5155 quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5156 quotient, 0, OPTAB_LIB_WIDEN);
5157 tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5158 remainder = expand_binop (int_mode, sub_optab, op0, tem,
5159 remainder, 0, OPTAB_LIB_WIDEN);
5160 }
5161 abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5162 abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5163 tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5164 1, NULL_RTX, 1);
5165 do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5166 tem = expand_binop (int_mode, xor_optab, op0, op1,
5167 NULL_RTX, 0, OPTAB_WIDEN);
5168 mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5169 size - 1, NULL_RTX, 0);
5170 tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5171 NULL_RTX, 0, OPTAB_WIDEN);
5172 tem = expand_binop (int_mode, sub_optab, tem, mask,
5173 NULL_RTX, 0, OPTAB_WIDEN);
5174 expand_inc (quotient, tem);
5175 tem = expand_binop (int_mode, xor_optab, mask, op1,
5176 NULL_RTX, 0, OPTAB_WIDEN);
5177 tem = expand_binop (int_mode, sub_optab, tem, mask,
5178 NULL_RTX, 0, OPTAB_WIDEN);
5179 expand_dec (remainder, tem);
5180 emit_label (label);
5181 }
5182 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5183
5184 default:
5185 gcc_unreachable ();
5186 }
5187
5188 if (quotient == 0)
5189 {
5190 if (target && GET_MODE (target) != compute_mode)
5191 target = 0;
5192
5193 if (rem_flag)
5194 {
5195 /* Try to produce the remainder without producing the quotient.
5196 If we seem to have a divmod pattern that does not require widening,
5197 don't try widening here. We should really have a WIDEN argument
5198 to expand_twoval_binop, since what we'd really like to do here is
5199 1) try a mod insn in compute_mode
5200 2) try a divmod insn in compute_mode
5201 3) try a div insn in compute_mode and multiply-subtract to get
5202 remainder
5203 4) try the same things with widening allowed. */
5204 remainder
5205 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5206 op0, op1, target,
5207 unsignedp,
5208 ((optab_handler (optab2, compute_mode)
5209 != CODE_FOR_nothing)
5210 ? OPTAB_DIRECT : OPTAB_WIDEN));
5211 if (remainder == 0)
5212 {
5213 /* No luck there. Can we do remainder and divide at once
5214 without a library call? */
5215 remainder = gen_reg_rtx (compute_mode);
5216 if (! expand_twoval_binop ((unsignedp
5217 ? udivmod_optab
5218 : sdivmod_optab),
5219 op0, op1,
5220 NULL_RTX, remainder, unsignedp))
5221 remainder = 0;
5222 }
5223
5224 if (remainder)
5225 return gen_lowpart (mode, remainder);
5226 }
5227
5228 /* Produce the quotient. Try a quotient insn, but not a library call.
5229 If we have a divmod in this mode, use it in preference to widening
5230 the div (for this test we assume it will not fail). Note that optab2
5231 is set to the one of the two optabs that the call below will use. */
5232 quotient
5233 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5234 op0, op1, rem_flag ? NULL_RTX : target,
5235 unsignedp,
5236 ((optab_handler (optab2, compute_mode)
5237 != CODE_FOR_nothing)
5238 ? OPTAB_DIRECT : OPTAB_WIDEN));
5239
5240 if (quotient == 0)
5241 {
5242 /* No luck there. Try a quotient-and-remainder insn,
5243 keeping the quotient alone. */
5244 quotient = gen_reg_rtx (compute_mode);
5245 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5246 op0, op1,
5247 quotient, NULL_RTX, unsignedp))
5248 {
5249 quotient = 0;
5250 if (! rem_flag)
5251 /* Still no luck. If we are not computing the remainder,
5252 use a library call for the quotient. */
5253 quotient = sign_expand_binop (compute_mode,
5254 udiv_optab, sdiv_optab,
5255 op0, op1, target,
5256 unsignedp, OPTAB_LIB_WIDEN);
5257 }
5258 }
5259 }
5260
5261 if (rem_flag)
5262 {
5263 if (target && GET_MODE (target) != compute_mode)
5264 target = 0;
5265
5266 if (quotient == 0)
5267 {
5268 /* No divide instruction either. Use library for remainder. */
5269 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5270 op0, op1, target,
5271 unsignedp, OPTAB_LIB_WIDEN);
5272 /* No remainder function. Try a quotient-and-remainder
5273 function, keeping the remainder. */
5274 if (!remainder)
5275 {
5276 remainder = gen_reg_rtx (compute_mode);
5277 if (!expand_twoval_binop_libfunc
5278 (unsignedp ? udivmod_optab : sdivmod_optab,
5279 op0, op1,
5280 NULL_RTX, remainder,
5281 unsignedp ? UMOD : MOD))
5282 remainder = NULL_RTX;
5283 }
5284 }
5285 else
5286 {
5287 /* We divided. Now finish doing X - Y * (X / Y). */
5288 remainder = expand_mult (compute_mode, quotient, op1,
5289 NULL_RTX, unsignedp);
5290 remainder = expand_binop (compute_mode, sub_optab, op0,
5291 remainder, target, unsignedp,
5292 OPTAB_LIB_WIDEN);
5293 }
5294 }
5295
5296 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5297 }
5298
5299 /* Return a tree node with data type TYPE, describing the value of X.
5300 Usually this is an VAR_DECL, if there is no obvious better choice.
5301 X may be an expression, however we only support those expressions
5302 generated by loop.c. */
5303
5304 tree
make_tree(tree type,rtx x)5305 make_tree (tree type, rtx x)
5306 {
5307 tree t;
5308
5309 switch (GET_CODE (x))
5310 {
5311 case CONST_INT:
5312 case CONST_WIDE_INT:
5313 t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5314 return t;
5315
5316 case CONST_DOUBLE:
5317 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5318 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5319 t = wide_int_to_tree (type,
5320 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5321 HOST_BITS_PER_WIDE_INT * 2));
5322 else
5323 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5324
5325 return t;
5326
5327 case CONST_VECTOR:
5328 {
5329 unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5330 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5331 tree itype = TREE_TYPE (type);
5332
5333 /* Build a tree with vector elements. */
5334 tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5335 unsigned int count = elts.encoded_nelts ();
5336 for (unsigned int i = 0; i < count; ++i)
5337 {
5338 rtx elt = CONST_VECTOR_ELT (x, i);
5339 elts.quick_push (make_tree (itype, elt));
5340 }
5341
5342 return elts.build ();
5343 }
5344
5345 case PLUS:
5346 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5347 make_tree (type, XEXP (x, 1)));
5348
5349 case MINUS:
5350 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5351 make_tree (type, XEXP (x, 1)));
5352
5353 case NEG:
5354 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5355
5356 case MULT:
5357 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5358 make_tree (type, XEXP (x, 1)));
5359
5360 case ASHIFT:
5361 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5362 make_tree (type, XEXP (x, 1)));
5363
5364 case LSHIFTRT:
5365 t = unsigned_type_for (type);
5366 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5367 make_tree (t, XEXP (x, 0)),
5368 make_tree (type, XEXP (x, 1))));
5369
5370 case ASHIFTRT:
5371 t = signed_type_for (type);
5372 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5373 make_tree (t, XEXP (x, 0)),
5374 make_tree (type, XEXP (x, 1))));
5375
5376 case DIV:
5377 if (TREE_CODE (type) != REAL_TYPE)
5378 t = signed_type_for (type);
5379 else
5380 t = type;
5381
5382 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5383 make_tree (t, XEXP (x, 0)),
5384 make_tree (t, XEXP (x, 1))));
5385 case UDIV:
5386 t = unsigned_type_for (type);
5387 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5388 make_tree (t, XEXP (x, 0)),
5389 make_tree (t, XEXP (x, 1))));
5390
5391 case SIGN_EXTEND:
5392 case ZERO_EXTEND:
5393 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5394 GET_CODE (x) == ZERO_EXTEND);
5395 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5396
5397 case CONST:
5398 return make_tree (type, XEXP (x, 0));
5399
5400 case SYMBOL_REF:
5401 t = SYMBOL_REF_DECL (x);
5402 if (t)
5403 return fold_convert (type, build_fold_addr_expr (t));
5404 /* fall through. */
5405
5406 default:
5407 if (CONST_POLY_INT_P (x))
5408 return wide_int_to_tree (t, const_poly_int_value (x));
5409
5410 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5411
5412 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5413 address mode to pointer mode. */
5414 if (POINTER_TYPE_P (type))
5415 x = convert_memory_address_addr_space
5416 (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5417
5418 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5419 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5420 t->decl_with_rtl.rtl = x;
5421
5422 return t;
5423 }
5424 }
5425
5426 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5427 and returning TARGET.
5428
5429 If TARGET is 0, a pseudo-register or constant is returned. */
5430
5431 rtx
expand_and(machine_mode mode,rtx op0,rtx op1,rtx target)5432 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5433 {
5434 rtx tem = 0;
5435
5436 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5437 tem = simplify_binary_operation (AND, mode, op0, op1);
5438 if (tem == 0)
5439 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5440
5441 if (target == 0)
5442 target = tem;
5443 else if (tem != target)
5444 emit_move_insn (target, tem);
5445 return target;
5446 }
5447
5448 /* Helper function for emit_store_flag. */
5449 rtx
emit_cstore(rtx target,enum insn_code icode,enum rtx_code code,machine_mode mode,machine_mode compare_mode,int unsignedp,rtx x,rtx y,int normalizep,machine_mode target_mode)5450 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5451 machine_mode mode, machine_mode compare_mode,
5452 int unsignedp, rtx x, rtx y, int normalizep,
5453 machine_mode target_mode)
5454 {
5455 struct expand_operand ops[4];
5456 rtx op0, comparison, subtarget;
5457 rtx_insn *last;
5458 scalar_int_mode result_mode = targetm.cstore_mode (icode);
5459 scalar_int_mode int_target_mode;
5460
5461 last = get_last_insn ();
5462 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5463 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5464 if (!x || !y)
5465 {
5466 delete_insns_since (last);
5467 return NULL_RTX;
5468 }
5469
5470 if (target_mode == VOIDmode)
5471 int_target_mode = result_mode;
5472 else
5473 int_target_mode = as_a <scalar_int_mode> (target_mode);
5474 if (!target)
5475 target = gen_reg_rtx (int_target_mode);
5476
5477 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5478
5479 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5480 create_fixed_operand (&ops[1], comparison);
5481 create_fixed_operand (&ops[2], x);
5482 create_fixed_operand (&ops[3], y);
5483 if (!maybe_expand_insn (icode, 4, ops))
5484 {
5485 delete_insns_since (last);
5486 return NULL_RTX;
5487 }
5488 subtarget = ops[0].value;
5489
5490 /* If we are converting to a wider mode, first convert to
5491 INT_TARGET_MODE, then normalize. This produces better combining
5492 opportunities on machines that have a SIGN_EXTRACT when we are
5493 testing a single bit. This mostly benefits the 68k.
5494
5495 If STORE_FLAG_VALUE does not have the sign bit set when
5496 interpreted in MODE, we can do this conversion as unsigned, which
5497 is usually more efficient. */
5498 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
5499 {
5500 convert_move (target, subtarget,
5501 val_signbit_known_clear_p (result_mode,
5502 STORE_FLAG_VALUE));
5503 op0 = target;
5504 result_mode = int_target_mode;
5505 }
5506 else
5507 op0 = subtarget;
5508
5509 /* If we want to keep subexpressions around, don't reuse our last
5510 target. */
5511 if (optimize)
5512 subtarget = 0;
5513
5514 /* Now normalize to the proper value in MODE. Sometimes we don't
5515 have to do anything. */
5516 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5517 ;
5518 /* STORE_FLAG_VALUE might be the most negative number, so write
5519 the comparison this way to avoid a compiler-time warning. */
5520 else if (- normalizep == STORE_FLAG_VALUE)
5521 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5522
5523 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5524 it hard to use a value of just the sign bit due to ANSI integer
5525 constant typing rules. */
5526 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5527 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5528 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5529 normalizep == 1);
5530 else
5531 {
5532 gcc_assert (STORE_FLAG_VALUE & 1);
5533
5534 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5535 if (normalizep == -1)
5536 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5537 }
5538
5539 /* If we were converting to a smaller mode, do the conversion now. */
5540 if (int_target_mode != result_mode)
5541 {
5542 convert_move (target, op0, 0);
5543 return target;
5544 }
5545 else
5546 return op0;
5547 }
5548
5549
5550 /* A subroutine of emit_store_flag only including "tricks" that do not
5551 need a recursive call. These are kept separate to avoid infinite
5552 loops. */
5553
5554 static rtx
emit_store_flag_1(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep,machine_mode target_mode)5555 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5556 machine_mode mode, int unsignedp, int normalizep,
5557 machine_mode target_mode)
5558 {
5559 rtx subtarget;
5560 enum insn_code icode;
5561 machine_mode compare_mode;
5562 enum mode_class mclass;
5563 enum rtx_code scode;
5564
5565 if (unsignedp)
5566 code = unsigned_condition (code);
5567 scode = swap_condition (code);
5568
5569 /* If one operand is constant, make it the second one. Only do this
5570 if the other operand is not constant as well. */
5571
5572 if (swap_commutative_operands_p (op0, op1))
5573 {
5574 std::swap (op0, op1);
5575 code = swap_condition (code);
5576 }
5577
5578 if (mode == VOIDmode)
5579 mode = GET_MODE (op0);
5580
5581 /* For some comparisons with 1 and -1, we can convert this to
5582 comparisons with zero. This will often produce more opportunities for
5583 store-flag insns. */
5584
5585 switch (code)
5586 {
5587 case LT:
5588 if (op1 == const1_rtx)
5589 op1 = const0_rtx, code = LE;
5590 break;
5591 case LE:
5592 if (op1 == constm1_rtx)
5593 op1 = const0_rtx, code = LT;
5594 break;
5595 case GE:
5596 if (op1 == const1_rtx)
5597 op1 = const0_rtx, code = GT;
5598 break;
5599 case GT:
5600 if (op1 == constm1_rtx)
5601 op1 = const0_rtx, code = GE;
5602 break;
5603 case GEU:
5604 if (op1 == const1_rtx)
5605 op1 = const0_rtx, code = NE;
5606 break;
5607 case LTU:
5608 if (op1 == const1_rtx)
5609 op1 = const0_rtx, code = EQ;
5610 break;
5611 default:
5612 break;
5613 }
5614
5615 /* If we are comparing a double-word integer with zero or -1, we can
5616 convert the comparison into one involving a single word. */
5617 scalar_int_mode int_mode;
5618 if (is_int_mode (mode, &int_mode)
5619 && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5620 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5621 {
5622 rtx tem;
5623 if ((code == EQ || code == NE)
5624 && (op1 == const0_rtx || op1 == constm1_rtx))
5625 {
5626 rtx op00, op01;
5627
5628 /* Do a logical OR or AND of the two words and compare the
5629 result. */
5630 op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5631 op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5632 tem = expand_binop (word_mode,
5633 op1 == const0_rtx ? ior_optab : and_optab,
5634 op00, op01, NULL_RTX, unsignedp,
5635 OPTAB_DIRECT);
5636
5637 if (tem != 0)
5638 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5639 unsignedp, normalizep);
5640 }
5641 else if ((code == LT || code == GE) && op1 == const0_rtx)
5642 {
5643 rtx op0h;
5644
5645 /* If testing the sign bit, can just test on high word. */
5646 op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5647 subreg_highpart_offset (word_mode,
5648 int_mode));
5649 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5650 unsignedp, normalizep);
5651 }
5652 else
5653 tem = NULL_RTX;
5654
5655 if (tem)
5656 {
5657 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5658 return tem;
5659 if (!target)
5660 target = gen_reg_rtx (target_mode);
5661
5662 convert_move (target, tem,
5663 !val_signbit_known_set_p (word_mode,
5664 (normalizep ? normalizep
5665 : STORE_FLAG_VALUE)));
5666 return target;
5667 }
5668 }
5669
5670 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5671 complement of A (for GE) and shifting the sign bit to the low bit. */
5672 if (op1 == const0_rtx && (code == LT || code == GE)
5673 && is_int_mode (mode, &int_mode)
5674 && (normalizep || STORE_FLAG_VALUE == 1
5675 || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5676 {
5677 scalar_int_mode int_target_mode;
5678 subtarget = target;
5679
5680 if (!target)
5681 int_target_mode = int_mode;
5682 else
5683 {
5684 /* If the result is to be wider than OP0, it is best to convert it
5685 first. If it is to be narrower, it is *incorrect* to convert it
5686 first. */
5687 int_target_mode = as_a <scalar_int_mode> (target_mode);
5688 if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5689 {
5690 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5691 int_mode = int_target_mode;
5692 }
5693 }
5694
5695 if (int_target_mode != int_mode)
5696 subtarget = 0;
5697
5698 if (code == GE)
5699 op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5700 ((STORE_FLAG_VALUE == 1 || normalizep)
5701 ? 0 : subtarget), 0);
5702
5703 if (STORE_FLAG_VALUE == 1 || normalizep)
5704 /* If we are supposed to produce a 0/1 value, we want to do
5705 a logical shift from the sign bit to the low-order bit; for
5706 a -1/0 value, we do an arithmetic shift. */
5707 op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5708 GET_MODE_BITSIZE (int_mode) - 1,
5709 subtarget, normalizep != -1);
5710
5711 if (int_mode != int_target_mode)
5712 op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5713
5714 return op0;
5715 }
5716
5717 mclass = GET_MODE_CLASS (mode);
5718 FOR_EACH_MODE_FROM (compare_mode, mode)
5719 {
5720 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5721 icode = optab_handler (cstore_optab, optab_mode);
5722 if (icode != CODE_FOR_nothing)
5723 {
5724 do_pending_stack_adjust ();
5725 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5726 unsignedp, op0, op1, normalizep, target_mode);
5727 if (tem)
5728 return tem;
5729
5730 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5731 {
5732 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5733 unsignedp, op1, op0, normalizep, target_mode);
5734 if (tem)
5735 return tem;
5736 }
5737 break;
5738 }
5739 }
5740
5741 return 0;
5742 }
5743
5744 /* Subroutine of emit_store_flag that handles cases in which the operands
5745 are scalar integers. SUBTARGET is the target to use for temporary
5746 operations and TRUEVAL is the value to store when the condition is
5747 true. All other arguments are as for emit_store_flag. */
5748
5749 rtx
emit_store_flag_int(rtx target,rtx subtarget,enum rtx_code code,rtx op0,rtx op1,scalar_int_mode mode,int unsignedp,int normalizep,rtx trueval)5750 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5751 rtx op1, scalar_int_mode mode, int unsignedp,
5752 int normalizep, rtx trueval)
5753 {
5754 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5755 rtx_insn *last = get_last_insn ();
5756
5757 /* If this is an equality comparison of integers, we can try to exclusive-or
5758 (or subtract) the two operands and use a recursive call to try the
5759 comparison with zero. Don't do any of these cases if branches are
5760 very cheap. */
5761
5762 if ((code == EQ || code == NE) && op1 != const0_rtx)
5763 {
5764 rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5765 OPTAB_WIDEN);
5766
5767 if (tem == 0)
5768 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5769 OPTAB_WIDEN);
5770 if (tem != 0)
5771 tem = emit_store_flag (target, code, tem, const0_rtx,
5772 mode, unsignedp, normalizep);
5773 if (tem != 0)
5774 return tem;
5775
5776 delete_insns_since (last);
5777 }
5778
5779 /* For integer comparisons, try the reverse comparison. However, for
5780 small X and if we'd have anyway to extend, implementing "X != 0"
5781 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5782 rtx_code rcode = reverse_condition (code);
5783 if (can_compare_p (rcode, mode, ccp_store_flag)
5784 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5785 && code == NE
5786 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5787 && op1 == const0_rtx))
5788 {
5789 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5790 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5791
5792 /* Again, for the reverse comparison, use either an addition or a XOR. */
5793 if (want_add
5794 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5795 optimize_insn_for_speed_p ()) == 0)
5796 {
5797 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5798 STORE_FLAG_VALUE, target_mode);
5799 if (tem != 0)
5800 tem = expand_binop (target_mode, add_optab, tem,
5801 gen_int_mode (normalizep, target_mode),
5802 target, 0, OPTAB_WIDEN);
5803 if (tem != 0)
5804 return tem;
5805 }
5806 else if (!want_add
5807 && rtx_cost (trueval, mode, XOR, 1,
5808 optimize_insn_for_speed_p ()) == 0)
5809 {
5810 rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5811 normalizep, target_mode);
5812 if (tem != 0)
5813 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5814 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5815 if (tem != 0)
5816 return tem;
5817 }
5818
5819 delete_insns_since (last);
5820 }
5821
5822 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5823 the constant zero. Reject all other comparisons at this point. Only
5824 do LE and GT if branches are expensive since they are expensive on
5825 2-operand machines. */
5826
5827 if (op1 != const0_rtx
5828 || (code != EQ && code != NE
5829 && (BRANCH_COST (optimize_insn_for_speed_p (),
5830 false) <= 1 || (code != LE && code != GT))))
5831 return 0;
5832
5833 /* Try to put the result of the comparison in the sign bit. Assume we can't
5834 do the necessary operation below. */
5835
5836 rtx tem = 0;
5837
5838 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5839 the sign bit set. */
5840
5841 if (code == LE)
5842 {
5843 /* This is destructive, so SUBTARGET can't be OP0. */
5844 if (rtx_equal_p (subtarget, op0))
5845 subtarget = 0;
5846
5847 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5848 OPTAB_WIDEN);
5849 if (tem)
5850 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5851 OPTAB_WIDEN);
5852 }
5853
5854 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5855 number of bits in the mode of OP0, minus one. */
5856
5857 if (code == GT)
5858 {
5859 if (rtx_equal_p (subtarget, op0))
5860 subtarget = 0;
5861
5862 tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5863 GET_MODE_BITSIZE (mode) - 1,
5864 subtarget, 0);
5865 if (tem)
5866 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5867 OPTAB_WIDEN);
5868 }
5869
5870 if (code == EQ || code == NE)
5871 {
5872 /* For EQ or NE, one way to do the comparison is to apply an operation
5873 that converts the operand into a positive number if it is nonzero
5874 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5875 for NE we negate. This puts the result in the sign bit. Then we
5876 normalize with a shift, if needed.
5877
5878 Two operations that can do the above actions are ABS and FFS, so try
5879 them. If that doesn't work, and MODE is smaller than a full word,
5880 we can use zero-extension to the wider mode (an unsigned conversion)
5881 as the operation. */
5882
5883 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5884 that is compensated by the subsequent overflow when subtracting
5885 one / negating. */
5886
5887 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5888 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5889 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5890 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5891 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5892 {
5893 tem = convert_modes (word_mode, mode, op0, 1);
5894 mode = word_mode;
5895 }
5896
5897 if (tem != 0)
5898 {
5899 if (code == EQ)
5900 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5901 0, OPTAB_WIDEN);
5902 else
5903 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5904 }
5905
5906 /* If we couldn't do it that way, for NE we can "or" the two's complement
5907 of the value with itself. For EQ, we take the one's complement of
5908 that "or", which is an extra insn, so we only handle EQ if branches
5909 are expensive. */
5910
5911 if (tem == 0
5912 && (code == NE
5913 || BRANCH_COST (optimize_insn_for_speed_p (),
5914 false) > 1))
5915 {
5916 if (rtx_equal_p (subtarget, op0))
5917 subtarget = 0;
5918
5919 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5920 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5921 OPTAB_WIDEN);
5922
5923 if (tem && code == EQ)
5924 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5925 }
5926 }
5927
5928 if (tem && normalizep)
5929 tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5930 GET_MODE_BITSIZE (mode) - 1,
5931 subtarget, normalizep == 1);
5932
5933 if (tem)
5934 {
5935 if (!target)
5936 ;
5937 else if (GET_MODE (tem) != target_mode)
5938 {
5939 convert_move (target, tem, 0);
5940 tem = target;
5941 }
5942 else if (!subtarget)
5943 {
5944 emit_move_insn (target, tem);
5945 tem = target;
5946 }
5947 }
5948 else
5949 delete_insns_since (last);
5950
5951 return tem;
5952 }
5953
5954 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5955 and storing in TARGET. Normally return TARGET.
5956 Return 0 if that cannot be done.
5957
5958 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5959 it is VOIDmode, they cannot both be CONST_INT.
5960
5961 UNSIGNEDP is for the case where we have to widen the operands
5962 to perform the operation. It says to use zero-extension.
5963
5964 NORMALIZEP is 1 if we should convert the result to be either zero
5965 or one. Normalize is -1 if we should convert the result to be
5966 either zero or -1. If NORMALIZEP is zero, the result will be left
5967 "raw" out of the scc insn. */
5968
5969 rtx
emit_store_flag(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)5970 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5971 machine_mode mode, int unsignedp, int normalizep)
5972 {
5973 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5974 enum rtx_code rcode;
5975 rtx subtarget;
5976 rtx tem, trueval;
5977 rtx_insn *last;
5978
5979 /* If we compare constants, we shouldn't use a store-flag operation,
5980 but a constant load. We can get there via the vanilla route that
5981 usually generates a compare-branch sequence, but will in this case
5982 fold the comparison to a constant, and thus elide the branch. */
5983 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5984 return NULL_RTX;
5985
5986 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5987 target_mode);
5988 if (tem)
5989 return tem;
5990
5991 /* If we reached here, we can't do this with a scc insn, however there
5992 are some comparisons that can be done in other ways. Don't do any
5993 of these cases if branches are very cheap. */
5994 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5995 return 0;
5996
5997 /* See what we need to return. We can only return a 1, -1, or the
5998 sign bit. */
5999
6000 if (normalizep == 0)
6001 {
6002 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6003 normalizep = STORE_FLAG_VALUE;
6004
6005 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6006 ;
6007 else
6008 return 0;
6009 }
6010
6011 last = get_last_insn ();
6012
6013 /* If optimizing, use different pseudo registers for each insn, instead
6014 of reusing the same pseudo. This leads to better CSE, but slows
6015 down the compiler, since there are more pseudos. */
6016 subtarget = (!optimize
6017 && (target_mode == mode)) ? target : NULL_RTX;
6018 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6019
6020 /* For floating-point comparisons, try the reverse comparison or try
6021 changing the "orderedness" of the comparison. */
6022 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6023 {
6024 enum rtx_code first_code;
6025 bool and_them;
6026
6027 rcode = reverse_condition_maybe_unordered (code);
6028 if (can_compare_p (rcode, mode, ccp_store_flag)
6029 && (code == ORDERED || code == UNORDERED
6030 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6031 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6032 {
6033 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6034 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6035
6036 /* For the reverse comparison, use either an addition or a XOR. */
6037 if (want_add
6038 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6039 optimize_insn_for_speed_p ()) == 0)
6040 {
6041 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6042 STORE_FLAG_VALUE, target_mode);
6043 if (tem)
6044 return expand_binop (target_mode, add_optab, tem,
6045 gen_int_mode (normalizep, target_mode),
6046 target, 0, OPTAB_WIDEN);
6047 }
6048 else if (!want_add
6049 && rtx_cost (trueval, mode, XOR, 1,
6050 optimize_insn_for_speed_p ()) == 0)
6051 {
6052 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6053 normalizep, target_mode);
6054 if (tem)
6055 return expand_binop (target_mode, xor_optab, tem, trueval,
6056 target, INTVAL (trueval) >= 0,
6057 OPTAB_WIDEN);
6058 }
6059 }
6060
6061 delete_insns_since (last);
6062
6063 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
6064 if (code == ORDERED || code == UNORDERED)
6065 return 0;
6066
6067 and_them = split_comparison (code, mode, &first_code, &code);
6068
6069 /* If there are no NaNs, the first comparison should always fall through.
6070 Effectively change the comparison to the other one. */
6071 if (!HONOR_NANS (mode))
6072 {
6073 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6074 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6075 target_mode);
6076 }
6077
6078 if (!HAVE_conditional_move)
6079 return 0;
6080
6081 /* Do not turn a trapping comparison into a non-trapping one. */
6082 if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6083 && flag_trapping_math)
6084 return 0;
6085
6086 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6087 conditional move. */
6088 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6089 normalizep, target_mode);
6090 if (tem == 0)
6091 return 0;
6092
6093 if (and_them)
6094 tem = emit_conditional_move (target, code, op0, op1, mode,
6095 tem, const0_rtx, GET_MODE (tem), 0);
6096 else
6097 tem = emit_conditional_move (target, code, op0, op1, mode,
6098 trueval, tem, GET_MODE (tem), 0);
6099
6100 if (tem == 0)
6101 delete_insns_since (last);
6102 return tem;
6103 }
6104
6105 /* The remaining tricks only apply to integer comparisons. */
6106
6107 scalar_int_mode int_mode;
6108 if (is_int_mode (mode, &int_mode))
6109 return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6110 unsignedp, normalizep, trueval);
6111
6112 return 0;
6113 }
6114
6115 /* Like emit_store_flag, but always succeeds. */
6116
6117 rtx
emit_store_flag_force(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)6118 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6119 machine_mode mode, int unsignedp, int normalizep)
6120 {
6121 rtx tem;
6122 rtx_code_label *label;
6123 rtx trueval, falseval;
6124
6125 /* First see if emit_store_flag can do the job. */
6126 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6127 if (tem != 0)
6128 return tem;
6129
6130 /* If one operand is constant, make it the second one. Only do this
6131 if the other operand is not constant as well. */
6132 if (swap_commutative_operands_p (op0, op1))
6133 {
6134 std::swap (op0, op1);
6135 code = swap_condition (code);
6136 }
6137
6138 if (mode == VOIDmode)
6139 mode = GET_MODE (op0);
6140
6141 if (!target)
6142 target = gen_reg_rtx (word_mode);
6143
6144 /* If this failed, we have to do this with set/compare/jump/set code.
6145 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
6146 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6147 if (code == NE
6148 && GET_MODE_CLASS (mode) == MODE_INT
6149 && REG_P (target)
6150 && op0 == target
6151 && op1 == const0_rtx)
6152 {
6153 label = gen_label_rtx ();
6154 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6155 NULL_RTX, NULL, label,
6156 profile_probability::uninitialized ());
6157 emit_move_insn (target, trueval);
6158 emit_label (label);
6159 return target;
6160 }
6161
6162 if (!REG_P (target)
6163 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6164 target = gen_reg_rtx (GET_MODE (target));
6165
6166 /* Jump in the right direction if the target cannot implement CODE
6167 but can jump on its reverse condition. */
6168 falseval = const0_rtx;
6169 if (! can_compare_p (code, mode, ccp_jump)
6170 && (! FLOAT_MODE_P (mode)
6171 || code == ORDERED || code == UNORDERED
6172 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6173 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6174 {
6175 enum rtx_code rcode;
6176 if (FLOAT_MODE_P (mode))
6177 rcode = reverse_condition_maybe_unordered (code);
6178 else
6179 rcode = reverse_condition (code);
6180
6181 /* Canonicalize to UNORDERED for the libcall. */
6182 if (can_compare_p (rcode, mode, ccp_jump)
6183 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6184 {
6185 falseval = trueval;
6186 trueval = const0_rtx;
6187 code = rcode;
6188 }
6189 }
6190
6191 emit_move_insn (target, trueval);
6192 label = gen_label_rtx ();
6193 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6194 label, profile_probability::uninitialized ());
6195
6196 emit_move_insn (target, falseval);
6197 emit_label (label);
6198
6199 return target;
6200 }
6201
6202 /* Perform possibly multi-word comparison and conditional jump to LABEL
6203 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
6204 now a thin wrapper around do_compare_rtx_and_jump. */
6205
6206 static void
do_cmp_and_jump(rtx arg1,rtx arg2,enum rtx_code op,machine_mode mode,rtx_code_label * label)6207 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6208 rtx_code_label *label)
6209 {
6210 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6211 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6212 NULL, label, profile_probability::uninitialized ());
6213 }
6214