1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987-2020 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Work around tree-optimization/91825.  */
22 #pragma GCC diagnostic warning "-Wmaybe-uninitialized"
23 
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "predict.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "dojump.h"
42 #include "explow.h"
43 #include "expr.h"
44 #include "langhooks.h"
45 #include "tree-vector-builder.h"
46 
47 struct target_expmed default_target_expmed;
48 #if SWITCHABLE_TARGET
49 struct target_expmed *this_target_expmed = &default_target_expmed;
50 #endif
51 
52 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
53 				      unsigned HOST_WIDE_INT,
54 				      unsigned HOST_WIDE_INT,
55 				      poly_uint64, poly_uint64,
56 				      machine_mode, rtx, bool, bool);
57 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
58 				   unsigned HOST_WIDE_INT,
59 				   unsigned HOST_WIDE_INT,
60 				   poly_uint64, poly_uint64,
61 				   rtx, scalar_int_mode, bool);
62 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
63 				     unsigned HOST_WIDE_INT,
64 				     unsigned HOST_WIDE_INT,
65 				     rtx, scalar_int_mode, bool);
66 static void store_split_bit_field (rtx, opt_scalar_int_mode,
67 				   unsigned HOST_WIDE_INT,
68 				   unsigned HOST_WIDE_INT,
69 				   poly_uint64, poly_uint64,
70 				   rtx, scalar_int_mode, bool);
71 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
72 				       unsigned HOST_WIDE_INT,
73 				       unsigned HOST_WIDE_INT, int, rtx,
74 				       machine_mode, machine_mode, bool, bool);
75 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
76 				    unsigned HOST_WIDE_INT,
77 				    unsigned HOST_WIDE_INT, rtx, int, bool);
78 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
79 				      unsigned HOST_WIDE_INT,
80 				      unsigned HOST_WIDE_INT, rtx, int, bool);
81 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
82 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
83 				    unsigned HOST_WIDE_INT,
84 				    unsigned HOST_WIDE_INT, int, bool);
85 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
86 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
87 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
88 
89 /* Return a constant integer mask value of mode MODE with BITSIZE ones
90    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
91    The mask is truncated if necessary to the width of mode MODE.  The
92    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
93 
94 static inline rtx
mask_rtx(scalar_int_mode mode,int bitpos,int bitsize,bool complement)95 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
96 {
97   return immed_wide_int_const
98     (wi::shifted_mask (bitpos, bitsize, complement,
99 		       GET_MODE_PRECISION (mode)), mode);
100 }
101 
102 /* Test whether a value is zero of a power of two.  */
103 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
104   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
105 
106 struct init_expmed_rtl
107 {
108   rtx reg;
109   rtx plus;
110   rtx neg;
111   rtx mult;
112   rtx sdiv;
113   rtx udiv;
114   rtx sdiv_32;
115   rtx smod_32;
116   rtx wide_mult;
117   rtx wide_lshr;
118   rtx wide_trunc;
119   rtx shift;
120   rtx shift_mult;
121   rtx shift_add;
122   rtx shift_sub0;
123   rtx shift_sub1;
124   rtx zext;
125   rtx trunc;
126 
127   rtx pow2[MAX_BITS_PER_WORD];
128   rtx cint[MAX_BITS_PER_WORD];
129 };
130 
131 static void
init_expmed_one_conv(struct init_expmed_rtl * all,scalar_int_mode to_mode,scalar_int_mode from_mode,bool speed)132 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
133 		      scalar_int_mode from_mode, bool speed)
134 {
135   int to_size, from_size;
136   rtx which;
137 
138   to_size = GET_MODE_PRECISION (to_mode);
139   from_size = GET_MODE_PRECISION (from_mode);
140 
141   /* Most partial integers have a precision less than the "full"
142      integer it requires for storage.  In case one doesn't, for
143      comparison purposes here, reduce the bit size by one in that
144      case.  */
145   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
146       && pow2p_hwi (to_size))
147     to_size --;
148   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
149       && pow2p_hwi (from_size))
150     from_size --;
151 
152   /* Assume cost of zero-extend and sign-extend is the same.  */
153   which = (to_size < from_size ? all->trunc : all->zext);
154 
155   PUT_MODE (all->reg, from_mode);
156   set_convert_cost (to_mode, from_mode, speed,
157 		    set_src_cost (which, to_mode, speed));
158 }
159 
160 static void
init_expmed_one_mode(struct init_expmed_rtl * all,machine_mode mode,int speed)161 init_expmed_one_mode (struct init_expmed_rtl *all,
162 		      machine_mode mode, int speed)
163 {
164   int m, n, mode_bitsize;
165   machine_mode mode_from;
166 
167   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
168 
169   PUT_MODE (all->reg, mode);
170   PUT_MODE (all->plus, mode);
171   PUT_MODE (all->neg, mode);
172   PUT_MODE (all->mult, mode);
173   PUT_MODE (all->sdiv, mode);
174   PUT_MODE (all->udiv, mode);
175   PUT_MODE (all->sdiv_32, mode);
176   PUT_MODE (all->smod_32, mode);
177   PUT_MODE (all->wide_trunc, mode);
178   PUT_MODE (all->shift, mode);
179   PUT_MODE (all->shift_mult, mode);
180   PUT_MODE (all->shift_add, mode);
181   PUT_MODE (all->shift_sub0, mode);
182   PUT_MODE (all->shift_sub1, mode);
183   PUT_MODE (all->zext, mode);
184   PUT_MODE (all->trunc, mode);
185 
186   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
187   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
188   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
189   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
190   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
191 
192   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
193 				     <= 2 * add_cost (speed, mode)));
194   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
195 				     <= 4 * add_cost (speed, mode)));
196 
197   set_shift_cost (speed, mode, 0, 0);
198   {
199     int cost = add_cost (speed, mode);
200     set_shiftadd_cost (speed, mode, 0, cost);
201     set_shiftsub0_cost (speed, mode, 0, cost);
202     set_shiftsub1_cost (speed, mode, 0, cost);
203   }
204 
205   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
206   for (m = 1; m < n; m++)
207     {
208       XEXP (all->shift, 1) = all->cint[m];
209       XEXP (all->shift_mult, 1) = all->pow2[m];
210 
211       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
212       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
213 						       speed));
214       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
215 							speed));
216       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
217 							speed));
218     }
219 
220   scalar_int_mode int_mode_to;
221   if (is_a <scalar_int_mode> (mode, &int_mode_to))
222     {
223       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
224 	   mode_from = (machine_mode)(mode_from + 1))
225 	init_expmed_one_conv (all, int_mode_to,
226 			      as_a <scalar_int_mode> (mode_from), speed);
227 
228       scalar_int_mode wider_mode;
229       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
230 	  && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
231 	{
232 	  PUT_MODE (all->zext, wider_mode);
233 	  PUT_MODE (all->wide_mult, wider_mode);
234 	  PUT_MODE (all->wide_lshr, wider_mode);
235 	  XEXP (all->wide_lshr, 1)
236 	    = gen_int_shift_amount (wider_mode, mode_bitsize);
237 
238 	  set_mul_widen_cost (speed, wider_mode,
239 			      set_src_cost (all->wide_mult, wider_mode, speed));
240 	  set_mul_highpart_cost (speed, int_mode_to,
241 				 set_src_cost (all->wide_trunc,
242 					       int_mode_to, speed));
243 	}
244     }
245 }
246 
247 void
init_expmed(void)248 init_expmed (void)
249 {
250   struct init_expmed_rtl all;
251   machine_mode mode = QImode;
252   int m, speed;
253 
254   memset (&all, 0, sizeof all);
255   for (m = 1; m < MAX_BITS_PER_WORD; m++)
256     {
257       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
258       all.cint[m] = GEN_INT (m);
259     }
260 
261   /* Avoid using hard regs in ways which may be unsupported.  */
262   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
263   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
264   all.neg = gen_rtx_NEG (mode, all.reg);
265   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
266   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
267   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
268   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
269   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
270   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
271   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
272   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
273   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
274   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
275   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
276   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
277   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
278   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
279   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
280 
281   for (speed = 0; speed < 2; speed++)
282     {
283       crtl->maybe_hot_insn_p = speed;
284       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
285 
286       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
287 	   mode = (machine_mode)(mode + 1))
288 	init_expmed_one_mode (&all, mode, speed);
289 
290       if (MIN_MODE_PARTIAL_INT != VOIDmode)
291 	for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
292 	     mode = (machine_mode)(mode + 1))
293 	  init_expmed_one_mode (&all, mode, speed);
294 
295       if (MIN_MODE_VECTOR_INT != VOIDmode)
296 	for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
297 	     mode = (machine_mode)(mode + 1))
298 	  init_expmed_one_mode (&all, mode, speed);
299     }
300 
301   if (alg_hash_used_p ())
302     {
303       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
304       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
305     }
306   else
307     set_alg_hash_used_p (true);
308   default_rtl_profile ();
309 
310   ggc_free (all.trunc);
311   ggc_free (all.shift_sub1);
312   ggc_free (all.shift_sub0);
313   ggc_free (all.shift_add);
314   ggc_free (all.shift_mult);
315   ggc_free (all.shift);
316   ggc_free (all.wide_trunc);
317   ggc_free (all.wide_lshr);
318   ggc_free (all.wide_mult);
319   ggc_free (all.zext);
320   ggc_free (all.smod_32);
321   ggc_free (all.sdiv_32);
322   ggc_free (all.udiv);
323   ggc_free (all.sdiv);
324   ggc_free (all.mult);
325   ggc_free (all.neg);
326   ggc_free (all.plus);
327   ggc_free (all.reg);
328 }
329 
330 /* Return an rtx representing minus the value of X.
331    MODE is the intended mode of the result,
332    useful if X is a CONST_INT.  */
333 
334 rtx
negate_rtx(machine_mode mode,rtx x)335 negate_rtx (machine_mode mode, rtx x)
336 {
337   rtx result = simplify_unary_operation (NEG, mode, x, mode);
338 
339   if (result == 0)
340     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
341 
342   return result;
343 }
344 
345 /* Whether reverse storage order is supported on the target.  */
346 static int reverse_storage_order_supported = -1;
347 
348 /* Check whether reverse storage order is supported on the target.  */
349 
350 static void
check_reverse_storage_order_support(void)351 check_reverse_storage_order_support (void)
352 {
353   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
354     {
355       reverse_storage_order_supported = 0;
356       sorry ("reverse scalar storage order");
357     }
358   else
359     reverse_storage_order_supported = 1;
360 }
361 
362 /* Whether reverse FP storage order is supported on the target.  */
363 static int reverse_float_storage_order_supported = -1;
364 
365 /* Check whether reverse FP storage order is supported on the target.  */
366 
367 static void
check_reverse_float_storage_order_support(void)368 check_reverse_float_storage_order_support (void)
369 {
370   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
371     {
372       reverse_float_storage_order_supported = 0;
373       sorry ("reverse floating-point scalar storage order");
374     }
375   else
376     reverse_float_storage_order_supported = 1;
377 }
378 
379 /* Return an rtx representing value of X with reverse storage order.
380    MODE is the intended mode of the result,
381    useful if X is a CONST_INT.  */
382 
383 rtx
flip_storage_order(machine_mode mode,rtx x)384 flip_storage_order (machine_mode mode, rtx x)
385 {
386   scalar_int_mode int_mode;
387   rtx result;
388 
389   if (mode == QImode)
390     return x;
391 
392   if (COMPLEX_MODE_P (mode))
393     {
394       rtx real = read_complex_part (x, false);
395       rtx imag = read_complex_part (x, true);
396 
397       real = flip_storage_order (GET_MODE_INNER (mode), real);
398       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
399 
400       return gen_rtx_CONCAT (mode, real, imag);
401     }
402 
403   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
404     check_reverse_storage_order_support ();
405 
406   if (!is_a <scalar_int_mode> (mode, &int_mode))
407     {
408       if (FLOAT_MODE_P (mode)
409 	  && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
410 	check_reverse_float_storage_order_support ();
411 
412       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
413 	{
414 	  sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
415 	  return x;
416 	}
417       x = gen_lowpart (int_mode, x);
418     }
419 
420   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
421   if (result == 0)
422     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
423 
424   if (int_mode != mode)
425     result = gen_lowpart (mode, result);
426 
427   return result;
428 }
429 
430 /* If MODE is set, adjust bitfield memory MEM so that it points to the
431    first unit of mode MODE that contains a bitfield of size BITSIZE at
432    bit position BITNUM.  If MODE is not set, return a BLKmode reference
433    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
434    of the field within the new memory.  */
435 
436 static rtx
narrow_bit_field_mem(rtx mem,opt_scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT * new_bitnum)437 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
438 		      unsigned HOST_WIDE_INT bitsize,
439 		      unsigned HOST_WIDE_INT bitnum,
440 		      unsigned HOST_WIDE_INT *new_bitnum)
441 {
442   scalar_int_mode imode;
443   if (mode.exists (&imode))
444     {
445       unsigned int unit = GET_MODE_BITSIZE (imode);
446       *new_bitnum = bitnum % unit;
447       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
448       return adjust_bitfield_address (mem, imode, offset);
449     }
450   else
451     {
452       *new_bitnum = bitnum % BITS_PER_UNIT;
453       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
454       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
455 			    / BITS_PER_UNIT);
456       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
457     }
458 }
459 
460 /* The caller wants to perform insertion or extraction PATTERN on a
461    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
462    BITREGION_START and BITREGION_END are as for store_bit_field
463    and FIELDMODE is the natural mode of the field.
464 
465    Search for a mode that is compatible with the memory access
466    restrictions and (where applicable) with a register insertion or
467    extraction.  Return the new memory on success, storing the adjusted
468    bit position in *NEW_BITNUM.  Return null otherwise.  */
469 
470 static rtx
adjust_bit_field_mem_for_reg(enum extraction_pattern pattern,rtx op0,HOST_WIDE_INT bitsize,HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,unsigned HOST_WIDE_INT * new_bitnum)471 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
472 			      rtx op0, HOST_WIDE_INT bitsize,
473 			      HOST_WIDE_INT bitnum,
474 			      poly_uint64 bitregion_start,
475 			      poly_uint64 bitregion_end,
476 			      machine_mode fieldmode,
477 			      unsigned HOST_WIDE_INT *new_bitnum)
478 {
479   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
480 				bitregion_end, MEM_ALIGN (op0),
481 				MEM_VOLATILE_P (op0));
482   scalar_int_mode best_mode;
483   if (iter.next_mode (&best_mode))
484     {
485       /* We can use a memory in BEST_MODE.  See whether this is true for
486 	 any wider modes.  All other things being equal, we prefer to
487 	 use the widest mode possible because it tends to expose more
488 	 CSE opportunities.  */
489       if (!iter.prefer_smaller_modes ())
490 	{
491 	  /* Limit the search to the mode required by the corresponding
492 	     register insertion or extraction instruction, if any.  */
493 	  scalar_int_mode limit_mode = word_mode;
494 	  extraction_insn insn;
495 	  if (get_best_reg_extraction_insn (&insn, pattern,
496 					    GET_MODE_BITSIZE (best_mode),
497 					    fieldmode))
498 	    limit_mode = insn.field_mode;
499 
500 	  scalar_int_mode wider_mode;
501 	  while (iter.next_mode (&wider_mode)
502 		 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
503 	    best_mode = wider_mode;
504 	}
505       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
506 				   new_bitnum);
507     }
508   return NULL_RTX;
509 }
510 
511 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
512    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
513    offset is then BITNUM / BITS_PER_UNIT.  */
514 
515 static bool
lowpart_bit_field_p(poly_uint64 bitnum,poly_uint64 bitsize,machine_mode struct_mode)516 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
517 		     machine_mode struct_mode)
518 {
519   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
520   if (BYTES_BIG_ENDIAN)
521     return (multiple_p (bitnum, BITS_PER_UNIT)
522 	    && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
523 		|| multiple_p (bitnum + bitsize,
524 			       regsize * BITS_PER_UNIT)));
525   else
526     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
527 }
528 
529 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
530    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
531    Return false if the access would touch memory outside the range
532    BITREGION_START to BITREGION_END for conformance to the C++ memory
533    model.  */
534 
535 static bool
strict_volatile_bitfield_p(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,scalar_int_mode fieldmode,poly_uint64 bitregion_start,poly_uint64 bitregion_end)536 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
537 			    unsigned HOST_WIDE_INT bitnum,
538 			    scalar_int_mode fieldmode,
539 			    poly_uint64 bitregion_start,
540 			    poly_uint64 bitregion_end)
541 {
542   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
543 
544   /* -fstrict-volatile-bitfields must be enabled and we must have a
545      volatile MEM.  */
546   if (!MEM_P (op0)
547       || !MEM_VOLATILE_P (op0)
548       || flag_strict_volatile_bitfields <= 0)
549     return false;
550 
551   /* The bit size must not be larger than the field mode, and
552      the field mode must not be larger than a word.  */
553   if (bitsize > modesize || modesize > BITS_PER_WORD)
554     return false;
555 
556   /* Check for cases of unaligned fields that must be split.  */
557   if (bitnum % modesize + bitsize > modesize)
558     return false;
559 
560   /* The memory must be sufficiently aligned for a MODESIZE access.
561      This condition guarantees, that the memory access will not
562      touch anything after the end of the structure.  */
563   if (MEM_ALIGN (op0) < modesize)
564     return false;
565 
566   /* Check for cases where the C++ memory model applies.  */
567   if (maybe_ne (bitregion_end, 0U)
568       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
569 	  || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
570 		       bitregion_end)))
571     return false;
572 
573   return true;
574 }
575 
576 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
577    bit number BITNUM can be treated as a simple value of mode MODE.
578    Store the byte offset in *BYTENUM if so.  */
579 
580 static bool
simple_mem_bitfield_p(rtx op0,poly_uint64 bitsize,poly_uint64 bitnum,machine_mode mode,poly_uint64 * bytenum)581 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
582 		       machine_mode mode, poly_uint64 *bytenum)
583 {
584   return (MEM_P (op0)
585 	  && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
586 	  && known_eq (bitsize, GET_MODE_BITSIZE (mode))
587 	  && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
588 	      || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
589 		  && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
590 }
591 
592 /* Try to use instruction INSV to store VALUE into a field of OP0.
593    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
594    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
595    are as for store_bit_field.  */
596 
597 static bool
store_bit_field_using_insv(const extraction_insn * insv,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,scalar_int_mode value_mode)598 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
599 			    opt_scalar_int_mode op0_mode,
600 			    unsigned HOST_WIDE_INT bitsize,
601 			    unsigned HOST_WIDE_INT bitnum,
602 			    rtx value, scalar_int_mode value_mode)
603 {
604   class expand_operand ops[4];
605   rtx value1;
606   rtx xop0 = op0;
607   rtx_insn *last = get_last_insn ();
608   bool copy_back = false;
609 
610   scalar_int_mode op_mode = insv->field_mode;
611   unsigned int unit = GET_MODE_BITSIZE (op_mode);
612   if (bitsize == 0 || bitsize > unit)
613     return false;
614 
615   if (MEM_P (xop0))
616     /* Get a reference to the first byte of the field.  */
617     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
618 				 &bitnum);
619   else
620     {
621       /* Convert from counting within OP0 to counting in OP_MODE.  */
622       if (BYTES_BIG_ENDIAN)
623 	bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
624 
625       /* If xop0 is a register, we need it in OP_MODE
626 	 to make it acceptable to the format of insv.  */
627       if (GET_CODE (xop0) == SUBREG)
628 	/* We can't just change the mode, because this might clobber op0,
629 	   and we will need the original value of op0 if insv fails.  */
630 	xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
631       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
632 	xop0 = gen_lowpart_SUBREG (op_mode, xop0);
633     }
634 
635   /* If the destination is a paradoxical subreg such that we need a
636      truncate to the inner mode, perform the insertion on a temporary and
637      truncate the result to the original destination.  Note that we can't
638      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
639      X) 0)) is (reg:N X).  */
640   if (GET_CODE (xop0) == SUBREG
641       && REG_P (SUBREG_REG (xop0))
642       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
643 					 op_mode))
644     {
645       rtx tem = gen_reg_rtx (op_mode);
646       emit_move_insn (tem, xop0);
647       xop0 = tem;
648       copy_back = true;
649     }
650 
651   /* There are similar overflow check at the start of store_bit_field_1,
652      but that only check the situation where the field lies completely
653      outside the register, while there do have situation where the field
654      lies partialy in the register, we need to adjust bitsize for this
655      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
656      will broken on those arch support bit insert instruction, like arm, aarch64
657      etc.  */
658   if (bitsize + bitnum > unit && bitnum < unit)
659     {
660       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
661 	       "destination object, data truncated into %wu-bit",
662 	       bitsize, unit - bitnum);
663       bitsize = unit - bitnum;
664     }
665 
666   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
667      "backwards" from the size of the unit we are inserting into.
668      Otherwise, we count bits from the most significant on a
669      BYTES/BITS_BIG_ENDIAN machine.  */
670 
671   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
672     bitnum = unit - bitsize - bitnum;
673 
674   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
675   value1 = value;
676   if (value_mode != op_mode)
677     {
678       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
679 	{
680 	  rtx tmp;
681 	  /* Optimization: Don't bother really extending VALUE
682 	     if it has all the bits we will actually use.  However,
683 	     if we must narrow it, be sure we do it correctly.  */
684 
685 	  if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
686 	    {
687 	      tmp = simplify_subreg (op_mode, value1, value_mode, 0);
688 	      if (! tmp)
689 		tmp = simplify_gen_subreg (op_mode,
690 					   force_reg (value_mode, value1),
691 					   value_mode, 0);
692 	    }
693 	  else
694 	    {
695 	      tmp = gen_lowpart_if_possible (op_mode, value1);
696 	      if (! tmp)
697 		tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
698 	    }
699 	  value1 = tmp;
700 	}
701       else if (CONST_INT_P (value))
702 	value1 = gen_int_mode (INTVAL (value), op_mode);
703       else
704 	/* Parse phase is supposed to make VALUE's data type
705 	   match that of the component reference, which is a type
706 	   at least as wide as the field; so VALUE should have
707 	   a mode that corresponds to that type.  */
708 	gcc_assert (CONSTANT_P (value));
709     }
710 
711   create_fixed_operand (&ops[0], xop0);
712   create_integer_operand (&ops[1], bitsize);
713   create_integer_operand (&ops[2], bitnum);
714   create_input_operand (&ops[3], value1, op_mode);
715   if (maybe_expand_insn (insv->icode, 4, ops))
716     {
717       if (copy_back)
718 	convert_move (op0, xop0, true);
719       return true;
720     }
721   delete_insns_since (last);
722   return false;
723 }
724 
725 /* A subroutine of store_bit_field, with the same arguments.  Return true
726    if the operation could be implemented.
727 
728    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
729    no other way of implementing the operation.  If FALLBACK_P is false,
730    return false instead.  */
731 
732 static bool
store_bit_field_1(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)733 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
734 		   poly_uint64 bitregion_start, poly_uint64 bitregion_end,
735 		   machine_mode fieldmode,
736 		   rtx value, bool reverse, bool fallback_p)
737 {
738   rtx op0 = str_rtx;
739 
740   while (GET_CODE (op0) == SUBREG)
741     {
742       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
743       op0 = SUBREG_REG (op0);
744     }
745 
746   /* No action is needed if the target is a register and if the field
747      lies completely outside that register.  This can occur if the source
748      code contains an out-of-bounds access to a small array.  */
749   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
750     return true;
751 
752   /* Use vec_set patterns for inserting parts of vectors whenever
753      available.  */
754   machine_mode outermode = GET_MODE (op0);
755   scalar_mode innermode = GET_MODE_INNER (outermode);
756   poly_uint64 pos;
757   if (VECTOR_MODE_P (outermode)
758       && !MEM_P (op0)
759       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
760       && fieldmode == innermode
761       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
762       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
763     {
764       class expand_operand ops[3];
765       enum insn_code icode = optab_handler (vec_set_optab, outermode);
766 
767       create_fixed_operand (&ops[0], op0);
768       create_input_operand (&ops[1], value, innermode);
769       create_integer_operand (&ops[2], pos);
770       if (maybe_expand_insn (icode, 3, ops))
771 	return true;
772     }
773 
774   /* If the target is a register, overwriting the entire object, or storing
775      a full-word or multi-word field can be done with just a SUBREG.  */
776   if (!MEM_P (op0)
777       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
778     {
779       /* Use the subreg machinery either to narrow OP0 to the required
780 	 words or to cope with mode punning between equal-sized modes.
781 	 In the latter case, use subreg on the rhs side, not lhs.  */
782       rtx sub;
783       HOST_WIDE_INT regnum;
784       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
785       if (known_eq (bitnum, 0U)
786 	  && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
787 	{
788 	  sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
789 	  if (sub)
790 	    {
791 	      if (reverse)
792 		sub = flip_storage_order (GET_MODE (op0), sub);
793 	      emit_move_insn (op0, sub);
794 	      return true;
795 	    }
796 	}
797       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
798 	       && multiple_p (bitsize, regsize * BITS_PER_UNIT))
799 	{
800 	  sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
801 				     regnum * regsize);
802 	  if (sub)
803 	    {
804 	      if (reverse)
805 		value = flip_storage_order (fieldmode, value);
806 	      emit_move_insn (sub, value);
807 	      return true;
808 	    }
809 	}
810     }
811 
812   /* If the target is memory, storing any naturally aligned field can be
813      done with a simple store.  For targets that support fast unaligned
814      memory, any naturally sized, unit aligned field can be done directly.  */
815   poly_uint64 bytenum;
816   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
817     {
818       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
819       if (reverse)
820 	value = flip_storage_order (fieldmode, value);
821       emit_move_insn (op0, value);
822       return true;
823     }
824 
825   /* It's possible we'll need to handle other cases here for
826      polynomial bitnum and bitsize.  */
827 
828   /* From here on we need to be looking at a fixed-size insertion.  */
829   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
830   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
831 
832   /* Make sure we are playing with integral modes.  Pun with subregs
833      if we aren't.  This must come after the entire register case above,
834      since that case is valid for any mode.  The following cases are only
835      valid for integral modes.  */
836   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
837   scalar_int_mode imode;
838   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
839     {
840       if (MEM_P (op0))
841 	op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
842 					    0, MEM_SIZE (op0));
843       else if (!op0_mode.exists ())
844 	{
845 	  if (ibitnum == 0
846 	      && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
847 	      && MEM_P (value)
848 	      && !reverse)
849 	    {
850 	      value = adjust_address (value, GET_MODE (op0), 0);
851 	      emit_move_insn (op0, value);
852 	      return true;
853 	    }
854 	  if (!fallback_p)
855 	    return false;
856 	  rtx temp = assign_stack_temp (GET_MODE (op0),
857 					GET_MODE_SIZE (GET_MODE (op0)));
858 	  emit_move_insn (temp, op0);
859 	  store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
860 			     reverse, fallback_p);
861 	  emit_move_insn (op0, temp);
862 	  return true;
863 	}
864       else
865 	op0 = gen_lowpart (op0_mode.require (), op0);
866     }
867 
868   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
869 				   bitregion_start, bitregion_end,
870 				   fieldmode, value, reverse, fallback_p);
871 }
872 
873 /* Subroutine of store_bit_field_1, with the same arguments, except
874    that BITSIZE and BITNUM are constant.  Handle cases specific to
875    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
876    otherwise OP0 is a BLKmode MEM.  */
877 
878 static bool
store_integral_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)879 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
880 			  unsigned HOST_WIDE_INT bitsize,
881 			  unsigned HOST_WIDE_INT bitnum,
882 			  poly_uint64 bitregion_start,
883 			  poly_uint64 bitregion_end,
884 			  machine_mode fieldmode,
885 			  rtx value, bool reverse, bool fallback_p)
886 {
887   /* Storing an lsb-aligned field in a register
888      can be done with a movstrict instruction.  */
889 
890   if (!MEM_P (op0)
891       && !reverse
892       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
893       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
894       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
895     {
896       class expand_operand ops[2];
897       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
898       rtx arg0 = op0;
899       unsigned HOST_WIDE_INT subreg_off;
900 
901       if (GET_CODE (arg0) == SUBREG)
902 	{
903 	  /* Else we've got some float mode source being extracted into
904 	     a different float mode destination -- this combination of
905 	     subregs results in Severe Tire Damage.  */
906 	  gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
907 		      || GET_MODE_CLASS (fieldmode) == MODE_INT
908 		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
909 	  arg0 = SUBREG_REG (arg0);
910 	}
911 
912       subreg_off = bitnum / BITS_PER_UNIT;
913       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
914 	{
915 	  arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
916 
917 	  create_fixed_operand (&ops[0], arg0);
918 	  /* Shrink the source operand to FIELDMODE.  */
919 	  create_convert_operand_to (&ops[1], value, fieldmode, false);
920 	  if (maybe_expand_insn (icode, 2, ops))
921 	    return true;
922 	}
923     }
924 
925   /* Handle fields bigger than a word.  */
926 
927   if (bitsize > BITS_PER_WORD)
928     {
929       /* Here we transfer the words of the field
930 	 in the order least significant first.
931 	 This is because the most significant word is the one which may
932 	 be less than full.
933 	 However, only do that if the value is not BLKmode.  */
934 
935       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
936       const int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
937       rtx_insn *last;
938 
939       /* This is the mode we must force value to, so that there will be enough
940 	 subwords to extract.  Note that fieldmode will often (always?) be
941 	 VOIDmode, because that is what store_field uses to indicate that this
942 	 is a bit field, but passing VOIDmode to operand_subword_force
943 	 is not allowed.
944 
945 	 The mode must be fixed-size, since insertions into variable-sized
946 	 objects are meant to be handled before calling this function.  */
947       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
948       if (value_mode == VOIDmode)
949 	value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
950 
951       last = get_last_insn ();
952       for (int i = 0; i < nwords; i++)
953 	{
954 	  /* Number of bits to be stored in this iteration, i.e. BITS_PER_WORD
955 	     except maybe for the last iteration.  */
956 	  const unsigned HOST_WIDE_INT new_bitsize
957 	    = MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
958 	  /* Bit offset from the starting bit number in the target.  */
959 	  const unsigned int bit_offset
960 	    = backwards ^ reverse
961 	      ? MAX ((int) bitsize - (i + 1) * BITS_PER_WORD, 0)
962 	      : i * BITS_PER_WORD;
963 	  /* Starting word number in the value.  */
964 	  const unsigned int wordnum
965 	    = backwards
966 	      ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD - (i + 1)
967 	      : i;
968 	  /* The chunk of the value in word_mode.  We use bit-field extraction
969 	      in BLKmode to handle unaligned memory references and to shift the
970 	      last chunk right on big-endian machines if need be.  */
971 	  rtx value_word
972 	    = fieldmode == BLKmode
973 	      ? extract_bit_field (value, new_bitsize, wordnum * BITS_PER_WORD,
974 				   1, NULL_RTX, word_mode, word_mode, false,
975 				   NULL)
976 	      : operand_subword_force (value, wordnum, value_mode);
977 
978 	  if (!store_bit_field_1 (op0, new_bitsize,
979 				  bitnum + bit_offset,
980 				  bitregion_start, bitregion_end,
981 				  word_mode,
982 				  value_word, reverse, fallback_p))
983 	    {
984 	      delete_insns_since (last);
985 	      return false;
986 	    }
987 	}
988       return true;
989     }
990 
991   /* If VALUE has a floating-point or complex mode, access it as an
992      integer of the corresponding size.  This can occur on a machine
993      with 64 bit registers that uses SFmode for float.  It can also
994      occur for unaligned float or complex fields.  */
995   rtx orig_value = value;
996   scalar_int_mode value_mode;
997   if (GET_MODE (value) == VOIDmode)
998     /* By this point we've dealt with values that are bigger than a word,
999        so word_mode is a conservatively correct choice.  */
1000     value_mode = word_mode;
1001   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1002     {
1003       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1004       value = gen_reg_rtx (value_mode);
1005       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1006     }
1007 
1008   /* If OP0 is a multi-word register, narrow it to the affected word.
1009      If the region spans two words, defer to store_split_bit_field.
1010      Don't do this if op0 is a single hard register wider than word
1011      such as a float or vector register.  */
1012   if (!MEM_P (op0)
1013       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1014       && (!REG_P (op0)
1015 	  || !HARD_REGISTER_P (op0)
1016 	  || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1017     {
1018       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1019 	{
1020 	  if (!fallback_p)
1021 	    return false;
1022 
1023 	  store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1024 				 bitregion_start, bitregion_end,
1025 				 value, value_mode, reverse);
1026 	  return true;
1027 	}
1028       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1029 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1030       gcc_assert (op0);
1031       op0_mode = word_mode;
1032       bitnum %= BITS_PER_WORD;
1033     }
1034 
1035   /* From here on we can assume that the field to be stored in fits
1036      within a word.  If the destination is a register, it too fits
1037      in a word.  */
1038 
1039   extraction_insn insv;
1040   if (!MEM_P (op0)
1041       && !reverse
1042       && get_best_reg_extraction_insn (&insv, EP_insv,
1043 				       GET_MODE_BITSIZE (op0_mode.require ()),
1044 				       fieldmode)
1045       && store_bit_field_using_insv (&insv, op0, op0_mode,
1046 				     bitsize, bitnum, value, value_mode))
1047     return true;
1048 
1049   /* If OP0 is a memory, try copying it to a register and seeing if a
1050      cheap register alternative is available.  */
1051   if (MEM_P (op0) && !reverse)
1052     {
1053       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1054 					fieldmode)
1055 	  && store_bit_field_using_insv (&insv, op0, op0_mode,
1056 					 bitsize, bitnum, value, value_mode))
1057 	return true;
1058 
1059       rtx_insn *last = get_last_insn ();
1060 
1061       /* Try loading part of OP0 into a register, inserting the bitfield
1062 	 into that, and then copying the result back to OP0.  */
1063       unsigned HOST_WIDE_INT bitpos;
1064       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1065 					       bitregion_start, bitregion_end,
1066 					       fieldmode, &bitpos);
1067       if (xop0)
1068 	{
1069 	  rtx tempreg = copy_to_reg (xop0);
1070 	  if (store_bit_field_1 (tempreg, bitsize, bitpos,
1071 				 bitregion_start, bitregion_end,
1072 				 fieldmode, orig_value, reverse, false))
1073 	    {
1074 	      emit_move_insn (xop0, tempreg);
1075 	      return true;
1076 	    }
1077 	  delete_insns_since (last);
1078 	}
1079     }
1080 
1081   if (!fallback_p)
1082     return false;
1083 
1084   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1085 			 bitregion_end, value, value_mode, reverse);
1086   return true;
1087 }
1088 
1089 /* Generate code to store value from rtx VALUE
1090    into a bit-field within structure STR_RTX
1091    containing BITSIZE bits starting at bit BITNUM.
1092 
1093    BITREGION_START is bitpos of the first bitfield in this region.
1094    BITREGION_END is the bitpos of the ending bitfield in this region.
1095    These two fields are 0, if the C++ memory model does not apply,
1096    or we are not interested in keeping track of bitfield regions.
1097 
1098    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1099 
1100    If REVERSE is true, the store is to be done in reverse order.  */
1101 
1102 void
store_bit_field(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse)1103 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1104 		 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1105 		 machine_mode fieldmode,
1106 		 rtx value, bool reverse)
1107 {
1108   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1109   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1110   scalar_int_mode int_mode;
1111   if (bitsize.is_constant (&ibitsize)
1112       && bitnum.is_constant (&ibitnum)
1113       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1114       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1115 				     bitregion_start, bitregion_end))
1116     {
1117       /* Storing of a full word can be done with a simple store.
1118 	 We know here that the field can be accessed with one single
1119 	 instruction.  For targets that support unaligned memory,
1120 	 an unaligned access may be necessary.  */
1121       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1122 	{
1123 	  str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1124 					     ibitnum / BITS_PER_UNIT);
1125 	  if (reverse)
1126 	    value = flip_storage_order (int_mode, value);
1127 	  gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1128 	  emit_move_insn (str_rtx, value);
1129 	}
1130       else
1131 	{
1132 	  rtx temp;
1133 
1134 	  str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1135 					  ibitnum, &ibitnum);
1136 	  gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1137 	  temp = copy_to_reg (str_rtx);
1138 	  if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1139 				  int_mode, value, reverse, true))
1140 	    gcc_unreachable ();
1141 
1142 	  emit_move_insn (str_rtx, temp);
1143 	}
1144 
1145       return;
1146     }
1147 
1148   /* Under the C++0x memory model, we must not touch bits outside the
1149      bit region.  Adjust the address to start at the beginning of the
1150      bit region.  */
1151   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1152     {
1153       scalar_int_mode best_mode;
1154       machine_mode addr_mode = VOIDmode;
1155 
1156       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1157       bitnum -= bitregion_start;
1158       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1159       bitregion_end -= bitregion_start;
1160       bitregion_start = 0;
1161       if (bitsize.is_constant (&ibitsize)
1162 	  && bitnum.is_constant (&ibitnum)
1163 	  && get_best_mode (ibitsize, ibitnum,
1164 			    bitregion_start, bitregion_end,
1165 			    MEM_ALIGN (str_rtx), INT_MAX,
1166 			    MEM_VOLATILE_P (str_rtx), &best_mode))
1167 	addr_mode = best_mode;
1168       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1169 					      offset, size);
1170     }
1171 
1172   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1173 			  bitregion_start, bitregion_end,
1174 			  fieldmode, value, reverse, true))
1175     gcc_unreachable ();
1176 }
1177 
1178 /* Use shifts and boolean operations to store VALUE into a bit field of
1179    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1180    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1181    the mode of VALUE.
1182 
1183    If REVERSE is true, the store is to be done in reverse order.  */
1184 
1185 static void
store_fixed_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,rtx value,scalar_int_mode value_mode,bool reverse)1186 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1187 		       unsigned HOST_WIDE_INT bitsize,
1188 		       unsigned HOST_WIDE_INT bitnum,
1189 		       poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1190 		       rtx value, scalar_int_mode value_mode, bool reverse)
1191 {
1192   /* There is a case not handled here:
1193      a structure with a known alignment of just a halfword
1194      and a field split across two aligned halfwords within the structure.
1195      Or likewise a structure with a known alignment of just a byte
1196      and a field split across two bytes.
1197      Such cases are not supposed to be able to occur.  */
1198 
1199   scalar_int_mode best_mode;
1200   if (MEM_P (op0))
1201     {
1202       unsigned int max_bitsize = BITS_PER_WORD;
1203       scalar_int_mode imode;
1204       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1205 	max_bitsize = GET_MODE_BITSIZE (imode);
1206 
1207       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1208 			  MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1209 			  &best_mode))
1210 	{
1211 	  /* The only way this should occur is if the field spans word
1212 	     boundaries.  */
1213 	  store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1214 				 bitregion_start, bitregion_end,
1215 				 value, value_mode, reverse);
1216 	  return;
1217 	}
1218 
1219       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1220     }
1221   else
1222     best_mode = op0_mode.require ();
1223 
1224   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1225 			   value, value_mode, reverse);
1226 }
1227 
1228 /* Helper function for store_fixed_bit_field, stores
1229    the bit field always using MODE, which is the mode of OP0.  The other
1230    arguments are as for store_fixed_bit_field.  */
1231 
1232 static void
store_fixed_bit_field_1(rtx op0,scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,scalar_int_mode value_mode,bool reverse)1233 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1234 			 unsigned HOST_WIDE_INT bitsize,
1235 			 unsigned HOST_WIDE_INT bitnum,
1236 			 rtx value, scalar_int_mode value_mode, bool reverse)
1237 {
1238   rtx temp;
1239   int all_zero = 0;
1240   int all_one = 0;
1241 
1242   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1243      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1244 
1245   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1246     /* BITNUM is the distance between our msb
1247        and that of the containing datum.
1248        Convert it to the distance from the lsb.  */
1249     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1250 
1251   /* Now BITNUM is always the distance between our lsb
1252      and that of OP0.  */
1253 
1254   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1255      we must first convert its mode to MODE.  */
1256 
1257   if (CONST_INT_P (value))
1258     {
1259       unsigned HOST_WIDE_INT v = UINTVAL (value);
1260 
1261       if (bitsize < HOST_BITS_PER_WIDE_INT)
1262 	v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1263 
1264       if (v == 0)
1265 	all_zero = 1;
1266       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1267 		&& v == (HOST_WIDE_INT_1U << bitsize) - 1)
1268 	       || (bitsize == HOST_BITS_PER_WIDE_INT
1269 		   && v == HOST_WIDE_INT_M1U))
1270 	all_one = 1;
1271 
1272       value = lshift_value (mode, v, bitnum);
1273     }
1274   else
1275     {
1276       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1277 		      && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1278 
1279       if (value_mode != mode)
1280 	value = convert_to_mode (mode, value, 1);
1281 
1282       if (must_and)
1283 	value = expand_binop (mode, and_optab, value,
1284 			      mask_rtx (mode, 0, bitsize, 0),
1285 			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1286       if (bitnum > 0)
1287 	value = expand_shift (LSHIFT_EXPR, mode, value,
1288 			      bitnum, NULL_RTX, 1);
1289     }
1290 
1291   if (reverse)
1292     value = flip_storage_order (mode, value);
1293 
1294   /* Now clear the chosen bits in OP0,
1295      except that if VALUE is -1 we need not bother.  */
1296   /* We keep the intermediates in registers to allow CSE to combine
1297      consecutive bitfield assignments.  */
1298 
1299   temp = force_reg (mode, op0);
1300 
1301   if (! all_one)
1302     {
1303       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1304       if (reverse)
1305 	mask = flip_storage_order (mode, mask);
1306       temp = expand_binop (mode, and_optab, temp, mask,
1307 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1308       temp = force_reg (mode, temp);
1309     }
1310 
1311   /* Now logical-or VALUE into OP0, unless it is zero.  */
1312 
1313   if (! all_zero)
1314     {
1315       temp = expand_binop (mode, ior_optab, temp, value,
1316 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1317       temp = force_reg (mode, temp);
1318     }
1319 
1320   if (op0 != temp)
1321     {
1322       op0 = copy_rtx (op0);
1323       emit_move_insn (op0, temp);
1324     }
1325 }
1326 
1327 /* Store a bit field that is split across multiple accessible memory objects.
1328 
1329    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1330    BITSIZE is the field width; BITPOS the position of its first bit
1331    (within the word).
1332    VALUE is the value to store, which has mode VALUE_MODE.
1333    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1334    a BLKmode MEM.
1335 
1336    If REVERSE is true, the store is to be done in reverse order.
1337 
1338    This does not yet handle fields wider than BITS_PER_WORD.  */
1339 
1340 static void
store_split_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,poly_uint64 bitregion_start,poly_uint64 bitregion_end,rtx value,scalar_int_mode value_mode,bool reverse)1341 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1342 		       unsigned HOST_WIDE_INT bitsize,
1343 		       unsigned HOST_WIDE_INT bitpos,
1344 		       poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1345 		       rtx value, scalar_int_mode value_mode, bool reverse)
1346 {
1347   unsigned int unit, total_bits, bitsdone = 0;
1348 
1349   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1350      much at a time.  */
1351   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1352     unit = BITS_PER_WORD;
1353   else
1354     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1355 
1356   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1357      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1358      again, and we will mutually recurse forever.  */
1359   if (MEM_P (op0) && op0_mode.exists ())
1360     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1361 
1362   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1363      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1364      that VALUE might be a floating-point constant.  */
1365   if (CONSTANT_P (value) && !CONST_INT_P (value))
1366     {
1367       rtx word = gen_lowpart_common (word_mode, value);
1368 
1369       if (word && (value != word))
1370 	value = word;
1371       else
1372 	value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1373       value_mode = word_mode;
1374     }
1375 
1376   total_bits = GET_MODE_BITSIZE (value_mode);
1377 
1378   while (bitsdone < bitsize)
1379     {
1380       unsigned HOST_WIDE_INT thissize;
1381       unsigned HOST_WIDE_INT thispos;
1382       unsigned HOST_WIDE_INT offset;
1383       rtx part;
1384 
1385       offset = (bitpos + bitsdone) / unit;
1386       thispos = (bitpos + bitsdone) % unit;
1387 
1388       /* When region of bytes we can touch is restricted, decrease
1389 	 UNIT close to the end of the region as needed.  If op0 is a REG
1390 	 or SUBREG of REG, don't do this, as there can't be data races
1391 	 on a register and we can expand shorter code in some cases.  */
1392       if (maybe_ne (bitregion_end, 0U)
1393 	  && unit > BITS_PER_UNIT
1394 	  && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1395 	  && !REG_P (op0)
1396 	  && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1397 	{
1398 	  unit = unit / 2;
1399 	  continue;
1400 	}
1401 
1402       /* THISSIZE must not overrun a word boundary.  Otherwise,
1403 	 store_fixed_bit_field will call us again, and we will mutually
1404 	 recurse forever.  */
1405       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1406       thissize = MIN (thissize, unit - thispos);
1407 
1408       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1409 	{
1410 	  /* Fetch successively less significant portions.  */
1411 	  if (CONST_INT_P (value))
1412 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1413 			     >> (bitsize - bitsdone - thissize))
1414 			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1415           /* Likewise, but the source is little-endian.  */
1416           else if (reverse)
1417 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1418 					    thissize,
1419 					    bitsize - bitsdone - thissize,
1420 					    NULL_RTX, 1, false);
1421 	  else
1422 	    /* The args are chosen so that the last part includes the
1423 	       lsb.  Give extract_bit_field the value it needs (with
1424 	       endianness compensation) to fetch the piece we want.  */
1425 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1426 					    thissize,
1427 					    total_bits - bitsize + bitsdone,
1428 					    NULL_RTX, 1, false);
1429 	}
1430       else
1431 	{
1432 	  /* Fetch successively more significant portions.  */
1433 	  if (CONST_INT_P (value))
1434 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1435 			     >> bitsdone)
1436 			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1437 	  /* Likewise, but the source is big-endian.  */
1438           else if (reverse)
1439 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1440 					    thissize,
1441 					    total_bits - bitsdone - thissize,
1442 					    NULL_RTX, 1, false);
1443 	  else
1444 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1445 					    thissize, bitsdone, NULL_RTX,
1446 					    1, false);
1447 	}
1448 
1449       /* If OP0 is a register, then handle OFFSET here.  */
1450       rtx op0_piece = op0;
1451       opt_scalar_int_mode op0_piece_mode = op0_mode;
1452       if (SUBREG_P (op0) || REG_P (op0))
1453 	{
1454 	  scalar_int_mode imode;
1455 	  if (op0_mode.exists (&imode)
1456 	      && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1457 	    {
1458 	      if (offset)
1459 		op0_piece = const0_rtx;
1460 	    }
1461 	  else
1462 	    {
1463 	      op0_piece = operand_subword_force (op0,
1464 						 offset * unit / BITS_PER_WORD,
1465 						 GET_MODE (op0));
1466 	      op0_piece_mode = word_mode;
1467 	    }
1468 	  offset &= BITS_PER_WORD / unit - 1;
1469 	}
1470 
1471       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1472 	 it is just an out-of-bounds access.  Ignore it.  */
1473       if (op0_piece != const0_rtx)
1474 	store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1475 			       offset * unit + thispos, bitregion_start,
1476 			       bitregion_end, part, word_mode, reverse);
1477       bitsdone += thissize;
1478     }
1479 }
1480 
1481 /* A subroutine of extract_bit_field_1 that converts return value X
1482    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1483    to extract_bit_field.  */
1484 
1485 static rtx
convert_extracted_bit_field(rtx x,machine_mode mode,machine_mode tmode,bool unsignedp)1486 convert_extracted_bit_field (rtx x, machine_mode mode,
1487 			     machine_mode tmode, bool unsignedp)
1488 {
1489   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1490     return x;
1491 
1492   /* If the x mode is not a scalar integral, first convert to the
1493      integer mode of that size and then access it as a floating-point
1494      value via a SUBREG.  */
1495   if (!SCALAR_INT_MODE_P (tmode))
1496     {
1497       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1498       x = convert_to_mode (int_mode, x, unsignedp);
1499       x = force_reg (int_mode, x);
1500       return gen_lowpart (tmode, x);
1501     }
1502 
1503   return convert_to_mode (tmode, x, unsignedp);
1504 }
1505 
1506 /* Try to use an ext(z)v pattern to extract a field from OP0.
1507    Return the extracted value on success, otherwise return null.
1508    EXTV describes the extraction instruction to use.  If OP0_MODE
1509    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1510    The other arguments are as for extract_bit_field.  */
1511 
1512 static rtx
extract_bit_field_using_extv(const extraction_insn * extv,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode)1513 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1514 			      opt_scalar_int_mode op0_mode,
1515 			      unsigned HOST_WIDE_INT bitsize,
1516 			      unsigned HOST_WIDE_INT bitnum,
1517 			      int unsignedp, rtx target,
1518 			      machine_mode mode, machine_mode tmode)
1519 {
1520   class expand_operand ops[4];
1521   rtx spec_target = target;
1522   rtx spec_target_subreg = 0;
1523   scalar_int_mode ext_mode = extv->field_mode;
1524   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1525 
1526   if (bitsize == 0 || unit < bitsize)
1527     return NULL_RTX;
1528 
1529   if (MEM_P (op0))
1530     /* Get a reference to the first byte of the field.  */
1531     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1532 				&bitnum);
1533   else
1534     {
1535       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1536       if (BYTES_BIG_ENDIAN)
1537 	bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1538 
1539       /* If op0 is a register, we need it in EXT_MODE to make it
1540 	 acceptable to the format of ext(z)v.  */
1541       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1542 	return NULL_RTX;
1543       if (REG_P (op0) && op0_mode.require () != ext_mode)
1544 	op0 = gen_lowpart_SUBREG (ext_mode, op0);
1545     }
1546 
1547   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1548      "backwards" from the size of the unit we are extracting from.
1549      Otherwise, we count bits from the most significant on a
1550      BYTES/BITS_BIG_ENDIAN machine.  */
1551 
1552   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1553     bitnum = unit - bitsize - bitnum;
1554 
1555   if (target == 0)
1556     target = spec_target = gen_reg_rtx (tmode);
1557 
1558   if (GET_MODE (target) != ext_mode)
1559     {
1560       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1561 	 between the mode of the extraction (word_mode) and the target
1562 	 mode.  Instead, create a temporary and use convert_move to set
1563 	 the target.  */
1564       if (REG_P (target)
1565 	  && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1566 	{
1567 	  target = gen_lowpart (ext_mode, target);
1568 	  if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1569 	    spec_target_subreg = target;
1570 	}
1571       else
1572 	target = gen_reg_rtx (ext_mode);
1573     }
1574 
1575   create_output_operand (&ops[0], target, ext_mode);
1576   create_fixed_operand (&ops[1], op0);
1577   create_integer_operand (&ops[2], bitsize);
1578   create_integer_operand (&ops[3], bitnum);
1579   if (maybe_expand_insn (extv->icode, 4, ops))
1580     {
1581       target = ops[0].value;
1582       if (target == spec_target)
1583 	return target;
1584       if (target == spec_target_subreg)
1585 	return spec_target;
1586       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1587     }
1588   return NULL_RTX;
1589 }
1590 
1591 /* See whether it would be valid to extract the part of OP0 described
1592    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1593    operation.  Return the subreg if so, otherwise return null.  */
1594 
1595 static rtx
extract_bit_field_as_subreg(machine_mode mode,rtx op0,poly_uint64 bitsize,poly_uint64 bitnum)1596 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1597 			     poly_uint64 bitsize, poly_uint64 bitnum)
1598 {
1599   poly_uint64 bytenum;
1600   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1601       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1602       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1603       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1604     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1605   return NULL_RTX;
1606 }
1607 
1608 /* A subroutine of extract_bit_field, with the same arguments.
1609    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1610    if we can find no other means of implementing the operation.
1611    if FALLBACK_P is false, return NULL instead.  */
1612 
1613 static rtx
extract_bit_field_1(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p,rtx * alt_rtl)1614 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1615 		     int unsignedp, rtx target, machine_mode mode,
1616 		     machine_mode tmode, bool reverse, bool fallback_p,
1617 		     rtx *alt_rtl)
1618 {
1619   rtx op0 = str_rtx;
1620   machine_mode mode1;
1621 
1622   if (tmode == VOIDmode)
1623     tmode = mode;
1624 
1625   while (GET_CODE (op0) == SUBREG)
1626     {
1627       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1628       op0 = SUBREG_REG (op0);
1629     }
1630 
1631   /* If we have an out-of-bounds access to a register, just return an
1632      uninitialized register of the required mode.  This can occur if the
1633      source code contains an out-of-bounds access to a small array.  */
1634   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1635     return gen_reg_rtx (tmode);
1636 
1637   if (REG_P (op0)
1638       && mode == GET_MODE (op0)
1639       && known_eq (bitnum, 0U)
1640       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1641     {
1642       if (reverse)
1643 	op0 = flip_storage_order (mode, op0);
1644       /* We're trying to extract a full register from itself.  */
1645       return op0;
1646     }
1647 
1648   /* First try to check for vector from vector extractions.  */
1649   if (VECTOR_MODE_P (GET_MODE (op0))
1650       && !MEM_P (op0)
1651       && VECTOR_MODE_P (tmode)
1652       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1653       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1654     {
1655       machine_mode new_mode = GET_MODE (op0);
1656       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1657 	{
1658 	  scalar_mode inner_mode = GET_MODE_INNER (tmode);
1659 	  poly_uint64 nunits;
1660 	  if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1661 			   GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1662 	      || !related_vector_mode (tmode, inner_mode,
1663 				       nunits).exists (&new_mode)
1664 	      || maybe_ne (GET_MODE_SIZE (new_mode),
1665 			   GET_MODE_SIZE (GET_MODE (op0))))
1666 	    new_mode = VOIDmode;
1667 	}
1668       poly_uint64 pos;
1669       if (new_mode != VOIDmode
1670 	  && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1671 	      != CODE_FOR_nothing)
1672 	  && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1673 	{
1674 	  class expand_operand ops[3];
1675 	  machine_mode outermode = new_mode;
1676 	  machine_mode innermode = tmode;
1677 	  enum insn_code icode
1678 	    = convert_optab_handler (vec_extract_optab, outermode, innermode);
1679 
1680 	  if (new_mode != GET_MODE (op0))
1681 	    op0 = gen_lowpart (new_mode, op0);
1682 	  create_output_operand (&ops[0], target, innermode);
1683 	  ops[0].target = 1;
1684 	  create_input_operand (&ops[1], op0, outermode);
1685 	  create_integer_operand (&ops[2], pos);
1686 	  if (maybe_expand_insn (icode, 3, ops))
1687 	    {
1688 	      if (alt_rtl && ops[0].target)
1689 		*alt_rtl = target;
1690 	      target = ops[0].value;
1691 	      if (GET_MODE (target) != mode)
1692 		return gen_lowpart (tmode, target);
1693 	      return target;
1694 	    }
1695 	}
1696     }
1697 
1698   /* See if we can get a better vector mode before extracting.  */
1699   if (VECTOR_MODE_P (GET_MODE (op0))
1700       && !MEM_P (op0)
1701       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1702     {
1703       machine_mode new_mode;
1704 
1705       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1706 	new_mode = MIN_MODE_VECTOR_FLOAT;
1707       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1708 	new_mode = MIN_MODE_VECTOR_FRACT;
1709       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1710 	new_mode = MIN_MODE_VECTOR_UFRACT;
1711       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1712 	new_mode = MIN_MODE_VECTOR_ACCUM;
1713       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1714 	new_mode = MIN_MODE_VECTOR_UACCUM;
1715       else
1716 	new_mode = MIN_MODE_VECTOR_INT;
1717 
1718       FOR_EACH_MODE_FROM (new_mode, new_mode)
1719 	if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1720 	    && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1721 	    && targetm.vector_mode_supported_p (new_mode))
1722 	  break;
1723       if (new_mode != VOIDmode)
1724 	op0 = gen_lowpart (new_mode, op0);
1725     }
1726 
1727   /* Use vec_extract patterns for extracting parts of vectors whenever
1728      available.  If that fails, see whether the current modes and bitregion
1729      give a natural subreg.  */
1730   machine_mode outermode = GET_MODE (op0);
1731   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1732     {
1733       scalar_mode innermode = GET_MODE_INNER (outermode);
1734       enum insn_code icode
1735 	= convert_optab_handler (vec_extract_optab, outermode, innermode);
1736       poly_uint64 pos;
1737       if (icode != CODE_FOR_nothing
1738 	  && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1739 	  && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1740 	{
1741 	  class expand_operand ops[3];
1742 
1743 	  create_output_operand (&ops[0], target, innermode);
1744 	  ops[0].target = 1;
1745 	  create_input_operand (&ops[1], op0, outermode);
1746 	  create_integer_operand (&ops[2], pos);
1747 	  if (maybe_expand_insn (icode, 3, ops))
1748 	    {
1749 	      if (alt_rtl && ops[0].target)
1750 		*alt_rtl = target;
1751 	      target = ops[0].value;
1752 	      if (GET_MODE (target) != mode)
1753 		return gen_lowpart (tmode, target);
1754 	      return target;
1755 	    }
1756 	}
1757       /* Using subregs is useful if we're extracting one register vector
1758 	 from a multi-register vector.  extract_bit_field_as_subreg checks
1759 	 for valid bitsize and bitnum, so we don't need to do that here.  */
1760       if (VECTOR_MODE_P (mode))
1761 	{
1762 	  rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1763 	  if (sub)
1764 	    return sub;
1765 	}
1766     }
1767 
1768   /* Make sure we are playing with integral modes.  Pun with subregs
1769      if we aren't.  */
1770   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1771   scalar_int_mode imode;
1772   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1773     {
1774       if (MEM_P (op0))
1775 	op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1776 					    0, MEM_SIZE (op0));
1777       else if (op0_mode.exists (&imode))
1778 	{
1779 	  op0 = gen_lowpart (imode, op0);
1780 
1781 	  /* If we got a SUBREG, force it into a register since we
1782 	     aren't going to be able to do another SUBREG on it.  */
1783 	  if (GET_CODE (op0) == SUBREG)
1784 	    op0 = force_reg (imode, op0);
1785 	}
1786       else
1787 	{
1788 	  poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1789 	  rtx mem = assign_stack_temp (GET_MODE (op0), size);
1790 	  emit_move_insn (mem, op0);
1791 	  op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1792 	}
1793     }
1794 
1795   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1796      If that's wrong, the solution is to test for it and set TARGET to 0
1797      if needed.  */
1798 
1799   /* Get the mode of the field to use for atomic access or subreg
1800      conversion.  */
1801   if (!SCALAR_INT_MODE_P (tmode)
1802       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1803     mode1 = mode;
1804   gcc_assert (mode1 != BLKmode);
1805 
1806   /* Extraction of a full MODE1 value can be done with a subreg as long
1807      as the least significant bit of the value is the least significant
1808      bit of either OP0 or a word of OP0.  */
1809   if (!MEM_P (op0) && !reverse)
1810     {
1811       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1812       if (sub)
1813 	return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1814     }
1815 
1816   /* Extraction of a full MODE1 value can be done with a load as long as
1817      the field is on a byte boundary and is sufficiently aligned.  */
1818   poly_uint64 bytenum;
1819   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1820     {
1821       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1822       if (reverse)
1823 	op0 = flip_storage_order (mode1, op0);
1824       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1825     }
1826 
1827   /* If we have a memory source and a non-constant bit offset, restrict
1828      the memory to the referenced bytes.  This is a worst-case fallback
1829      but is useful for things like vector booleans.  */
1830   if (MEM_P (op0) && !bitnum.is_constant ())
1831     {
1832       bytenum = bits_to_bytes_round_down (bitnum);
1833       bitnum = num_trailing_bits (bitnum);
1834       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1835       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1836       op0_mode = opt_scalar_int_mode ();
1837     }
1838 
1839   /* It's possible we'll need to handle other cases here for
1840      polynomial bitnum and bitsize.  */
1841 
1842   /* From here on we need to be looking at a fixed-size insertion.  */
1843   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1844 				     bitnum.to_constant (), unsignedp,
1845 				     target, mode, tmode, reverse, fallback_p);
1846 }
1847 
1848 /* Subroutine of extract_bit_field_1, with the same arguments, except
1849    that BITSIZE and BITNUM are constant.  Handle cases specific to
1850    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1851    otherwise OP0 is a BLKmode MEM.  */
1852 
1853 static rtx
extract_integral_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p)1854 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1855 			    unsigned HOST_WIDE_INT bitsize,
1856 			    unsigned HOST_WIDE_INT bitnum, int unsignedp,
1857 			    rtx target, machine_mode mode, machine_mode tmode,
1858 			    bool reverse, bool fallback_p)
1859 {
1860   /* Handle fields bigger than a word.  */
1861 
1862   if (bitsize > BITS_PER_WORD)
1863     {
1864       /* Here we transfer the words of the field
1865 	 in the order least significant first.
1866 	 This is because the most significant word is the one which may
1867 	 be less than full.  */
1868 
1869       const bool backwards = WORDS_BIG_ENDIAN;
1870       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1871       unsigned int i;
1872       rtx_insn *last;
1873 
1874       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1875 	target = gen_reg_rtx (mode);
1876 
1877       /* In case we're about to clobber a base register or something
1878 	 (see gcc.c-torture/execute/20040625-1.c).   */
1879       if (reg_mentioned_p (target, op0))
1880 	target = gen_reg_rtx (mode);
1881 
1882       /* Indicate for flow that the entire target reg is being set.  */
1883       emit_clobber (target);
1884 
1885       /* The mode must be fixed-size, since extract_bit_field_1 handles
1886 	 extractions from variable-sized objects before calling this
1887 	 function.  */
1888       unsigned int target_size
1889 	= GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1890       last = get_last_insn ();
1891       for (i = 0; i < nwords; i++)
1892 	{
1893 	  /* If I is 0, use the low-order word in both field and target;
1894 	     if I is 1, use the next to lowest word; and so on.  */
1895 	  /* Word number in TARGET to use.  */
1896 	  unsigned int wordnum
1897 	    = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1898 	  /* Offset from start of field in OP0.  */
1899 	  unsigned int bit_offset = (backwards ^ reverse
1900 				     ? MAX ((int) bitsize - ((int) i + 1)
1901 					    * BITS_PER_WORD,
1902 					    0)
1903 				     : (int) i * BITS_PER_WORD);
1904 	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1905 	  rtx result_part
1906 	    = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1907 					     bitsize - i * BITS_PER_WORD),
1908 				   bitnum + bit_offset, 1, target_part,
1909 				   mode, word_mode, reverse, fallback_p, NULL);
1910 
1911 	  gcc_assert (target_part);
1912 	  if (!result_part)
1913 	    {
1914 	      delete_insns_since (last);
1915 	      return NULL;
1916 	    }
1917 
1918 	  if (result_part != target_part)
1919 	    emit_move_insn (target_part, result_part);
1920 	}
1921 
1922       if (unsignedp)
1923 	{
1924 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1925 	     need to be zero'd out.  */
1926 	  if (target_size > nwords * UNITS_PER_WORD)
1927 	    {
1928 	      unsigned int i, total_words;
1929 
1930 	      total_words = target_size / UNITS_PER_WORD;
1931 	      for (i = nwords; i < total_words; i++)
1932 		emit_move_insn
1933 		  (operand_subword (target,
1934 				    backwards ? total_words - i - 1 : i,
1935 				    1, VOIDmode),
1936 		   const0_rtx);
1937 	    }
1938 	  return target;
1939 	}
1940 
1941       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1942       target = expand_shift (LSHIFT_EXPR, mode, target,
1943 			     GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1944       return expand_shift (RSHIFT_EXPR, mode, target,
1945 			   GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1946     }
1947 
1948   /* If OP0 is a multi-word register, narrow it to the affected word.
1949      If the region spans two words, defer to extract_split_bit_field.  */
1950   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1951     {
1952       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1953 	{
1954 	  if (!fallback_p)
1955 	    return NULL_RTX;
1956 	  target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1957 					    unsignedp, reverse);
1958 	  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1959 	}
1960       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1961 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1962       op0_mode = word_mode;
1963       bitnum %= BITS_PER_WORD;
1964     }
1965 
1966   /* From here on we know the desired field is smaller than a word.
1967      If OP0 is a register, it too fits within a word.  */
1968   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1969   extraction_insn extv;
1970   if (!MEM_P (op0)
1971       && !reverse
1972       /* ??? We could limit the structure size to the part of OP0 that
1973 	 contains the field, with appropriate checks for endianness
1974 	 and TARGET_TRULY_NOOP_TRUNCATION.  */
1975       && get_best_reg_extraction_insn (&extv, pattern,
1976 				       GET_MODE_BITSIZE (op0_mode.require ()),
1977 				       tmode))
1978     {
1979       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1980 						 bitsize, bitnum,
1981 						 unsignedp, target, mode,
1982 						 tmode);
1983       if (result)
1984 	return result;
1985     }
1986 
1987   /* If OP0 is a memory, try copying it to a register and seeing if a
1988      cheap register alternative is available.  */
1989   if (MEM_P (op0) & !reverse)
1990     {
1991       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1992 					tmode))
1993 	{
1994 	  rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1995 						     bitsize, bitnum,
1996 						     unsignedp, target, mode,
1997 						     tmode);
1998 	  if (result)
1999 	    return result;
2000 	}
2001 
2002       rtx_insn *last = get_last_insn ();
2003 
2004       /* Try loading part of OP0 into a register and extracting the
2005 	 bitfield from that.  */
2006       unsigned HOST_WIDE_INT bitpos;
2007       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2008 					       0, 0, tmode, &bitpos);
2009       if (xop0)
2010 	{
2011 	  xop0 = copy_to_reg (xop0);
2012 	  rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2013 					    unsignedp, target,
2014 					    mode, tmode, reverse, false, NULL);
2015 	  if (result)
2016 	    return result;
2017 	  delete_insns_since (last);
2018 	}
2019     }
2020 
2021   if (!fallback_p)
2022     return NULL;
2023 
2024   /* Find a correspondingly-sized integer field, so we can apply
2025      shifts and masks to it.  */
2026   scalar_int_mode int_mode;
2027   if (!int_mode_for_mode (tmode).exists (&int_mode))
2028     /* If this fails, we should probably push op0 out to memory and then
2029        do a load.  */
2030     int_mode = int_mode_for_mode (mode).require ();
2031 
2032   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2033 				    bitnum, target, unsignedp, reverse);
2034 
2035   /* Complex values must be reversed piecewise, so we need to undo the global
2036      reversal, convert to the complex mode and reverse again.  */
2037   if (reverse && COMPLEX_MODE_P (tmode))
2038     {
2039       target = flip_storage_order (int_mode, target);
2040       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2041       target = flip_storage_order (tmode, target);
2042     }
2043   else
2044     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2045 
2046   return target;
2047 }
2048 
2049 /* Generate code to extract a byte-field from STR_RTX
2050    containing BITSIZE bits, starting at BITNUM,
2051    and put it in TARGET if possible (if TARGET is nonzero).
2052    Regardless of TARGET, we return the rtx for where the value is placed.
2053 
2054    STR_RTX is the structure containing the byte (a REG or MEM).
2055    UNSIGNEDP is nonzero if this is an unsigned bit field.
2056    MODE is the natural mode of the field value once extracted.
2057    TMODE is the mode the caller would like the value to have;
2058    but the value may be returned with type MODE instead.
2059 
2060    If REVERSE is true, the extraction is to be done in reverse order.
2061 
2062    If a TARGET is specified and we can store in it at no extra cost,
2063    we do so, and return TARGET.
2064    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2065    if they are equally easy.
2066 
2067    If the result can be stored at TARGET, and ALT_RTL is non-NULL,
2068    then *ALT_RTL is set to TARGET (before legitimziation).  */
2069 
2070 rtx
extract_bit_field(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,rtx * alt_rtl)2071 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2072 		   int unsignedp, rtx target, machine_mode mode,
2073 		   machine_mode tmode, bool reverse, rtx *alt_rtl)
2074 {
2075   machine_mode mode1;
2076 
2077   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2078   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2079     mode1 = GET_MODE (str_rtx);
2080   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2081     mode1 = GET_MODE (target);
2082   else
2083     mode1 = tmode;
2084 
2085   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2086   scalar_int_mode int_mode;
2087   if (bitsize.is_constant (&ibitsize)
2088       && bitnum.is_constant (&ibitnum)
2089       && is_a <scalar_int_mode> (mode1, &int_mode)
2090       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2091 				     int_mode, 0, 0))
2092     {
2093       /* Extraction of a full INT_MODE value can be done with a simple load.
2094 	 We know here that the field can be accessed with one single
2095 	 instruction.  For targets that support unaligned memory,
2096 	 an unaligned access may be necessary.  */
2097       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2098 	{
2099 	  rtx result = adjust_bitfield_address (str_rtx, int_mode,
2100 						ibitnum / BITS_PER_UNIT);
2101 	  if (reverse)
2102 	    result = flip_storage_order (int_mode, result);
2103 	  gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2104 	  return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2105 	}
2106 
2107       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2108 				      &ibitnum);
2109       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2110       str_rtx = copy_to_reg (str_rtx);
2111       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2112 				  target, mode, tmode, reverse, true, alt_rtl);
2113     }
2114 
2115   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2116 			      target, mode, tmode, reverse, true, alt_rtl);
2117 }
2118 
2119 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2120    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2121    otherwise OP0 is a BLKmode MEM.
2122 
2123    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2124    If REVERSE is true, the extraction is to be done in reverse order.
2125 
2126    If TARGET is nonzero, attempts to store the value there
2127    and return TARGET, but this is not guaranteed.
2128    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2129 
2130 static rtx
extract_fixed_bit_field(machine_mode tmode,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)2131 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2132 			 opt_scalar_int_mode op0_mode,
2133 			 unsigned HOST_WIDE_INT bitsize,
2134 			 unsigned HOST_WIDE_INT bitnum, rtx target,
2135 			 int unsignedp, bool reverse)
2136 {
2137   scalar_int_mode mode;
2138   if (MEM_P (op0))
2139     {
2140       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2141 			  BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2142 	/* The only way this should occur is if the field spans word
2143 	   boundaries.  */
2144 	return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2145 					unsignedp, reverse);
2146 
2147       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2148     }
2149   else
2150     mode = op0_mode.require ();
2151 
2152   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2153 				    target, unsignedp, reverse);
2154 }
2155 
2156 /* Helper function for extract_fixed_bit_field, extracts
2157    the bit field always using MODE, which is the mode of OP0.
2158    The other arguments are as for extract_fixed_bit_field.  */
2159 
2160 static rtx
extract_fixed_bit_field_1(machine_mode tmode,rtx op0,scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)2161 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2162 			   unsigned HOST_WIDE_INT bitsize,
2163 			   unsigned HOST_WIDE_INT bitnum, rtx target,
2164 			   int unsignedp, bool reverse)
2165 {
2166   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2167      for invalid input, such as extract equivalent of f5 from
2168      gcc.dg/pr48335-2.c.  */
2169 
2170   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2171     /* BITNUM is the distance between our msb and that of OP0.
2172        Convert it to the distance from the lsb.  */
2173     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2174 
2175   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2176      We have reduced the big-endian case to the little-endian case.  */
2177   if (reverse)
2178     op0 = flip_storage_order (mode, op0);
2179 
2180   if (unsignedp)
2181     {
2182       if (bitnum)
2183 	{
2184 	  /* If the field does not already start at the lsb,
2185 	     shift it so it does.  */
2186 	  /* Maybe propagate the target for the shift.  */
2187 	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2188 	  if (tmode != mode)
2189 	    subtarget = 0;
2190 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2191 	}
2192       /* Convert the value to the desired mode.  TMODE must also be a
2193 	 scalar integer for this conversion to make sense, since we
2194 	 shouldn't reinterpret the bits.  */
2195       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2196       if (mode != new_mode)
2197 	op0 = convert_to_mode (new_mode, op0, 1);
2198 
2199       /* Unless the msb of the field used to be the msb when we shifted,
2200 	 mask out the upper bits.  */
2201 
2202       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2203 	return expand_binop (new_mode, and_optab, op0,
2204 			     mask_rtx (new_mode, 0, bitsize, 0),
2205 			     target, 1, OPTAB_LIB_WIDEN);
2206       return op0;
2207     }
2208 
2209   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2210      then arithmetic-shift its lsb to the lsb of the word.  */
2211   op0 = force_reg (mode, op0);
2212 
2213   /* Find the narrowest integer mode that contains the field.  */
2214 
2215   opt_scalar_int_mode mode_iter;
2216   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2217     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2218       break;
2219 
2220   mode = mode_iter.require ();
2221   op0 = convert_to_mode (mode, op0, 0);
2222 
2223   if (mode != tmode)
2224     target = 0;
2225 
2226   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2227     {
2228       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2229       /* Maybe propagate the target for the shift.  */
2230       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2231       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2232     }
2233 
2234   return expand_shift (RSHIFT_EXPR, mode, op0,
2235 		       GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2236 }
2237 
2238 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2239    VALUE << BITPOS.  */
2240 
2241 static rtx
lshift_value(machine_mode mode,unsigned HOST_WIDE_INT value,int bitpos)2242 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2243 	      int bitpos)
2244 {
2245   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2246 }
2247 
2248 /* Extract a bit field that is split across two words
2249    and return an RTX for the result.
2250 
2251    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2252    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2253    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2254    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2255    a BLKmode MEM.
2256 
2257    If REVERSE is true, the extraction is to be done in reverse order.  */
2258 
2259 static rtx
extract_split_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,int unsignedp,bool reverse)2260 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2261 			 unsigned HOST_WIDE_INT bitsize,
2262 			 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2263 			 bool reverse)
2264 {
2265   unsigned int unit;
2266   unsigned int bitsdone = 0;
2267   rtx result = NULL_RTX;
2268   int first = 1;
2269 
2270   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2271      much at a time.  */
2272   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2273     unit = BITS_PER_WORD;
2274   else
2275     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2276 
2277   while (bitsdone < bitsize)
2278     {
2279       unsigned HOST_WIDE_INT thissize;
2280       rtx part;
2281       unsigned HOST_WIDE_INT thispos;
2282       unsigned HOST_WIDE_INT offset;
2283 
2284       offset = (bitpos + bitsdone) / unit;
2285       thispos = (bitpos + bitsdone) % unit;
2286 
2287       /* THISSIZE must not overrun a word boundary.  Otherwise,
2288 	 extract_fixed_bit_field will call us again, and we will mutually
2289 	 recurse forever.  */
2290       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2291       thissize = MIN (thissize, unit - thispos);
2292 
2293       /* If OP0 is a register, then handle OFFSET here.  */
2294       rtx op0_piece = op0;
2295       opt_scalar_int_mode op0_piece_mode = op0_mode;
2296       if (SUBREG_P (op0) || REG_P (op0))
2297 	{
2298 	  op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2299 	  op0_piece_mode = word_mode;
2300 	  offset = 0;
2301 	}
2302 
2303       /* Extract the parts in bit-counting order,
2304 	 whose meaning is determined by BYTES_PER_UNIT.
2305 	 OFFSET is in UNITs, and UNIT is in bits.  */
2306       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2307 				      thissize, offset * unit + thispos,
2308 				      0, 1, reverse);
2309       bitsdone += thissize;
2310 
2311       /* Shift this part into place for the result.  */
2312       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2313 	{
2314 	  if (bitsize != bitsdone)
2315 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2316 				 bitsize - bitsdone, 0, 1);
2317 	}
2318       else
2319 	{
2320 	  if (bitsdone != thissize)
2321 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2322 				 bitsdone - thissize, 0, 1);
2323 	}
2324 
2325       if (first)
2326 	result = part;
2327       else
2328 	/* Combine the parts with bitwise or.  This works
2329 	   because we extracted each part as an unsigned bit field.  */
2330 	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2331 			       OPTAB_LIB_WIDEN);
2332 
2333       first = 0;
2334     }
2335 
2336   /* Unsigned bit field: we are done.  */
2337   if (unsignedp)
2338     return result;
2339   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2340   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2341 			 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2342   return expand_shift (RSHIFT_EXPR, word_mode, result,
2343 		       BITS_PER_WORD - bitsize, NULL_RTX, 0);
2344 }
2345 
2346 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2347    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2348    MODE, fill the upper bits with zeros.  Fail if the layout of either
2349    mode is unknown (as for CC modes) or if the extraction would involve
2350    unprofitable mode punning.  Return the value on success, otherwise
2351    return null.
2352 
2353    This is different from gen_lowpart* in these respects:
2354 
2355      - the returned value must always be considered an rvalue
2356 
2357      - when MODE is wider than SRC_MODE, the extraction involves
2358        a zero extension
2359 
2360      - when MODE is smaller than SRC_MODE, the extraction involves
2361        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2362 
2363    In other words, this routine performs a computation, whereas the
2364    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2365    operations.  */
2366 
2367 rtx
extract_low_bits(machine_mode mode,machine_mode src_mode,rtx src)2368 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2369 {
2370   scalar_int_mode int_mode, src_int_mode;
2371 
2372   if (mode == src_mode)
2373     return src;
2374 
2375   if (CONSTANT_P (src))
2376     {
2377       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2378 	 fails, it will happily create (subreg (symbol_ref)) or similar
2379 	 invalid SUBREGs.  */
2380       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2381       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2382       if (ret)
2383 	return ret;
2384 
2385       if (GET_MODE (src) == VOIDmode
2386 	  || !validate_subreg (mode, src_mode, src, byte))
2387 	return NULL_RTX;
2388 
2389       src = force_reg (GET_MODE (src), src);
2390       return gen_rtx_SUBREG (mode, src, byte);
2391     }
2392 
2393   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2394     return NULL_RTX;
2395 
2396   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2397       && targetm.modes_tieable_p (mode, src_mode))
2398     {
2399       rtx x = gen_lowpart_common (mode, src);
2400       if (x)
2401         return x;
2402     }
2403 
2404   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2405       || !int_mode_for_mode (mode).exists (&int_mode))
2406     return NULL_RTX;
2407 
2408   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2409     return NULL_RTX;
2410   if (!targetm.modes_tieable_p (int_mode, mode))
2411     return NULL_RTX;
2412 
2413   src = gen_lowpart (src_int_mode, src);
2414   if (!validate_subreg (int_mode, src_int_mode, src,
2415 			subreg_lowpart_offset (int_mode, src_int_mode)))
2416     return NULL_RTX;
2417 
2418   src = convert_modes (int_mode, src_int_mode, src, true);
2419   src = gen_lowpart (mode, src);
2420   return src;
2421 }
2422 
2423 /* Add INC into TARGET.  */
2424 
2425 void
expand_inc(rtx target,rtx inc)2426 expand_inc (rtx target, rtx inc)
2427 {
2428   rtx value = expand_binop (GET_MODE (target), add_optab,
2429 			    target, inc,
2430 			    target, 0, OPTAB_LIB_WIDEN);
2431   if (value != target)
2432     emit_move_insn (target, value);
2433 }
2434 
2435 /* Subtract DEC from TARGET.  */
2436 
2437 void
expand_dec(rtx target,rtx dec)2438 expand_dec (rtx target, rtx dec)
2439 {
2440   rtx value = expand_binop (GET_MODE (target), sub_optab,
2441 			    target, dec,
2442 			    target, 0, OPTAB_LIB_WIDEN);
2443   if (value != target)
2444     emit_move_insn (target, value);
2445 }
2446 
2447 /* Output a shift instruction for expression code CODE,
2448    with SHIFTED being the rtx for the value to shift,
2449    and AMOUNT the rtx for the amount to shift by.
2450    Store the result in the rtx TARGET, if that is convenient.
2451    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2452    Return the rtx for where the value is.
2453    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2454    in which case 0 is returned.  */
2455 
2456 static rtx
2457 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2458 		rtx amount, rtx target, int unsignedp, bool may_fail = false)
2459 {
2460   rtx op1, temp = 0;
2461   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2462   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2463   optab lshift_optab = ashl_optab;
2464   optab rshift_arith_optab = ashr_optab;
2465   optab rshift_uns_optab = lshr_optab;
2466   optab lrotate_optab = rotl_optab;
2467   optab rrotate_optab = rotr_optab;
2468   machine_mode op1_mode;
2469   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2470   int attempt;
2471   bool speed = optimize_insn_for_speed_p ();
2472 
2473   op1 = amount;
2474   op1_mode = GET_MODE (op1);
2475 
2476   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2477      shift amount is a vector, use the vector/vector shift patterns.  */
2478   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2479     {
2480       lshift_optab = vashl_optab;
2481       rshift_arith_optab = vashr_optab;
2482       rshift_uns_optab = vlshr_optab;
2483       lrotate_optab = vrotl_optab;
2484       rrotate_optab = vrotr_optab;
2485     }
2486 
2487   /* Previously detected shift-counts computed by NEGATE_EXPR
2488      and shifted in the other direction; but that does not work
2489      on all machines.  */
2490 
2491   if (SHIFT_COUNT_TRUNCATED)
2492     {
2493       if (CONST_INT_P (op1)
2494 	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2495 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2496 	op1 = gen_int_shift_amount (mode,
2497 				    (unsigned HOST_WIDE_INT) INTVAL (op1)
2498 				    % GET_MODE_BITSIZE (scalar_mode));
2499       else if (GET_CODE (op1) == SUBREG
2500 	       && subreg_lowpart_p (op1)
2501 	       && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2502 	       && SCALAR_INT_MODE_P (GET_MODE (op1)))
2503 	op1 = SUBREG_REG (op1);
2504     }
2505 
2506   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2507      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2508      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2509      amount instead.  */
2510   if (rotate
2511       && CONST_INT_P (op1)
2512       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2513 		   GET_MODE_BITSIZE (scalar_mode) - 1))
2514     {
2515       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2516 					 - INTVAL (op1)));
2517       left = !left;
2518       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2519     }
2520 
2521   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2522      Note that this is not the case for bigger values.  For instance a rotation
2523      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2524      0x04030201 (bswapsi).  */
2525   if (rotate
2526       && CONST_INT_P (op1)
2527       && INTVAL (op1) == BITS_PER_UNIT
2528       && GET_MODE_SIZE (scalar_mode) == 2
2529       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2530     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2531 
2532   if (op1 == const0_rtx)
2533     return shifted;
2534 
2535   /* Check whether its cheaper to implement a left shift by a constant
2536      bit count by a sequence of additions.  */
2537   if (code == LSHIFT_EXPR
2538       && CONST_INT_P (op1)
2539       && INTVAL (op1) > 0
2540       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2541       && INTVAL (op1) < MAX_BITS_PER_WORD
2542       && (shift_cost (speed, mode, INTVAL (op1))
2543 	  > INTVAL (op1) * add_cost (speed, mode))
2544       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2545     {
2546       int i;
2547       for (i = 0; i < INTVAL (op1); i++)
2548 	{
2549 	  temp = force_reg (mode, shifted);
2550 	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2551 				  unsignedp, OPTAB_LIB_WIDEN);
2552 	}
2553       return shifted;
2554     }
2555 
2556   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2557     {
2558       enum optab_methods methods;
2559 
2560       if (attempt == 0)
2561 	methods = OPTAB_DIRECT;
2562       else if (attempt == 1)
2563 	methods = OPTAB_WIDEN;
2564       else
2565 	methods = OPTAB_LIB_WIDEN;
2566 
2567       if (rotate)
2568 	{
2569 	  /* Widening does not work for rotation.  */
2570 	  if (methods == OPTAB_WIDEN)
2571 	    continue;
2572 	  else if (methods == OPTAB_LIB_WIDEN)
2573 	    {
2574 	      /* If we have been unable to open-code this by a rotation,
2575 		 do it as the IOR of two shifts.  I.e., to rotate A
2576 		 by N bits, compute
2577 		 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2578 		 where C is the bitsize of A.
2579 
2580 		 It is theoretically possible that the target machine might
2581 		 not be able to perform either shift and hence we would
2582 		 be making two libcalls rather than just the one for the
2583 		 shift (similarly if IOR could not be done).  We will allow
2584 		 this extremely unlikely lossage to avoid complicating the
2585 		 code below.  */
2586 
2587 	      rtx subtarget = target == shifted ? 0 : target;
2588 	      rtx new_amount, other_amount;
2589 	      rtx temp1;
2590 
2591 	      new_amount = op1;
2592 	      if (op1 == const0_rtx)
2593 		return shifted;
2594 	      else if (CONST_INT_P (op1))
2595 		other_amount = gen_int_shift_amount
2596 		  (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2597 	      else
2598 		{
2599 		  other_amount
2600 		    = simplify_gen_unary (NEG, GET_MODE (op1),
2601 					  op1, GET_MODE (op1));
2602 		  HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2603 		  other_amount
2604 		    = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2605 					   gen_int_mode (mask, GET_MODE (op1)));
2606 		}
2607 
2608 	      shifted = force_reg (mode, shifted);
2609 
2610 	      temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2611 				     mode, shifted, new_amount, 0, 1);
2612 	      temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2613 				      mode, shifted, other_amount,
2614 				      subtarget, 1);
2615 	      return expand_binop (mode, ior_optab, temp, temp1, target,
2616 				   unsignedp, methods);
2617 	    }
2618 
2619 	  temp = expand_binop (mode,
2620 			       left ? lrotate_optab : rrotate_optab,
2621 			       shifted, op1, target, unsignedp, methods);
2622 	}
2623       else if (unsignedp)
2624 	temp = expand_binop (mode,
2625 			     left ? lshift_optab : rshift_uns_optab,
2626 			     shifted, op1, target, unsignedp, methods);
2627 
2628       /* Do arithmetic shifts.
2629 	 Also, if we are going to widen the operand, we can just as well
2630 	 use an arithmetic right-shift instead of a logical one.  */
2631       if (temp == 0 && ! rotate
2632 	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2633 	{
2634 	  enum optab_methods methods1 = methods;
2635 
2636 	  /* If trying to widen a log shift to an arithmetic shift,
2637 	     don't accept an arithmetic shift of the same size.  */
2638 	  if (unsignedp)
2639 	    methods1 = OPTAB_MUST_WIDEN;
2640 
2641 	  /* Arithmetic shift */
2642 
2643 	  temp = expand_binop (mode,
2644 			       left ? lshift_optab : rshift_arith_optab,
2645 			       shifted, op1, target, unsignedp, methods1);
2646 	}
2647 
2648       /* We used to try extzv here for logical right shifts, but that was
2649 	 only useful for one machine, the VAX, and caused poor code
2650 	 generation there for lshrdi3, so the code was deleted and a
2651 	 define_expand for lshrsi3 was added to vax.md.  */
2652     }
2653 
2654   gcc_assert (temp != NULL_RTX || may_fail);
2655   return temp;
2656 }
2657 
2658 /* Output a shift instruction for expression code CODE,
2659    with SHIFTED being the rtx for the value to shift,
2660    and AMOUNT the amount to shift by.
2661    Store the result in the rtx TARGET, if that is convenient.
2662    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2663    Return the rtx for where the value is.  */
2664 
2665 rtx
expand_shift(enum tree_code code,machine_mode mode,rtx shifted,poly_int64 amount,rtx target,int unsignedp)2666 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2667 	      poly_int64 amount, rtx target, int unsignedp)
2668 {
2669   return expand_shift_1 (code, mode, shifted,
2670 			 gen_int_shift_amount (mode, amount),
2671 			 target, unsignedp);
2672 }
2673 
2674 /* Likewise, but return 0 if that cannot be done.  */
2675 
2676 static rtx
maybe_expand_shift(enum tree_code code,machine_mode mode,rtx shifted,int amount,rtx target,int unsignedp)2677 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2678 		    int amount, rtx target, int unsignedp)
2679 {
2680   return expand_shift_1 (code, mode,
2681 			 shifted, GEN_INT (amount), target, unsignedp, true);
2682 }
2683 
2684 /* Output a shift instruction for expression code CODE,
2685    with SHIFTED being the rtx for the value to shift,
2686    and AMOUNT the tree for the amount to shift by.
2687    Store the result in the rtx TARGET, if that is convenient.
2688    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2689    Return the rtx for where the value is.  */
2690 
2691 rtx
expand_variable_shift(enum tree_code code,machine_mode mode,rtx shifted,tree amount,rtx target,int unsignedp)2692 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2693 		       tree amount, rtx target, int unsignedp)
2694 {
2695   return expand_shift_1 (code, mode,
2696 			 shifted, expand_normal (amount), target, unsignedp);
2697 }
2698 
2699 
2700 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2701 			const struct mult_cost *, machine_mode mode);
2702 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2703 			      const struct algorithm *, enum mult_variant);
2704 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2705 static rtx extract_high_half (scalar_int_mode, rtx);
2706 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2707 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2708 				       int, int);
2709 /* Compute and return the best algorithm for multiplying by T.
2710    The algorithm must cost less than cost_limit
2711    If retval.cost >= COST_LIMIT, no algorithm was found and all
2712    other field of the returned struct are undefined.
2713    MODE is the machine mode of the multiplication.  */
2714 
2715 static void
synth_mult(struct algorithm * alg_out,unsigned HOST_WIDE_INT t,const struct mult_cost * cost_limit,machine_mode mode)2716 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2717 	    const struct mult_cost *cost_limit, machine_mode mode)
2718 {
2719   int m;
2720   struct algorithm *alg_in, *best_alg;
2721   struct mult_cost best_cost;
2722   struct mult_cost new_limit;
2723   int op_cost, op_latency;
2724   unsigned HOST_WIDE_INT orig_t = t;
2725   unsigned HOST_WIDE_INT q;
2726   int maxm, hash_index;
2727   bool cache_hit = false;
2728   enum alg_code cache_alg = alg_zero;
2729   bool speed = optimize_insn_for_speed_p ();
2730   scalar_int_mode imode;
2731   struct alg_hash_entry *entry_ptr;
2732 
2733   /* Indicate that no algorithm is yet found.  If no algorithm
2734      is found, this value will be returned and indicate failure.  */
2735   alg_out->cost.cost = cost_limit->cost + 1;
2736   alg_out->cost.latency = cost_limit->latency + 1;
2737 
2738   if (cost_limit->cost < 0
2739       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2740     return;
2741 
2742   /* Be prepared for vector modes.  */
2743   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2744 
2745   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2746 
2747   /* Restrict the bits of "t" to the multiplication's mode.  */
2748   t &= GET_MODE_MASK (imode);
2749 
2750   /* t == 1 can be done in zero cost.  */
2751   if (t == 1)
2752     {
2753       alg_out->ops = 1;
2754       alg_out->cost.cost = 0;
2755       alg_out->cost.latency = 0;
2756       alg_out->op[0] = alg_m;
2757       return;
2758     }
2759 
2760   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2761      fail now.  */
2762   if (t == 0)
2763     {
2764       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2765 	return;
2766       else
2767 	{
2768 	  alg_out->ops = 1;
2769 	  alg_out->cost.cost = zero_cost (speed);
2770 	  alg_out->cost.latency = zero_cost (speed);
2771 	  alg_out->op[0] = alg_zero;
2772 	  return;
2773 	}
2774     }
2775 
2776   /* We'll be needing a couple extra algorithm structures now.  */
2777 
2778   alg_in = XALLOCA (struct algorithm);
2779   best_alg = XALLOCA (struct algorithm);
2780   best_cost = *cost_limit;
2781 
2782   /* Compute the hash index.  */
2783   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2784 
2785   /* See if we already know what to do for T.  */
2786   entry_ptr = alg_hash_entry_ptr (hash_index);
2787   if (entry_ptr->t == t
2788       && entry_ptr->mode == mode
2789       && entry_ptr->speed == speed
2790       && entry_ptr->alg != alg_unknown)
2791     {
2792       cache_alg = entry_ptr->alg;
2793 
2794       if (cache_alg == alg_impossible)
2795 	{
2796 	  /* The cache tells us that it's impossible to synthesize
2797 	     multiplication by T within entry_ptr->cost.  */
2798 	  if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2799 	    /* COST_LIMIT is at least as restrictive as the one
2800 	       recorded in the hash table, in which case we have no
2801 	       hope of synthesizing a multiplication.  Just
2802 	       return.  */
2803 	    return;
2804 
2805 	  /* If we get here, COST_LIMIT is less restrictive than the
2806 	     one recorded in the hash table, so we may be able to
2807 	     synthesize a multiplication.  Proceed as if we didn't
2808 	     have the cache entry.  */
2809 	}
2810       else
2811 	{
2812 	  if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2813 	    /* The cached algorithm shows that this multiplication
2814 	       requires more cost than COST_LIMIT.  Just return.  This
2815 	       way, we don't clobber this cache entry with
2816 	       alg_impossible but retain useful information.  */
2817 	    return;
2818 
2819 	  cache_hit = true;
2820 
2821 	  switch (cache_alg)
2822 	    {
2823 	    case alg_shift:
2824 	      goto do_alg_shift;
2825 
2826 	    case alg_add_t_m2:
2827 	    case alg_sub_t_m2:
2828 	      goto do_alg_addsub_t_m2;
2829 
2830 	    case alg_add_factor:
2831 	    case alg_sub_factor:
2832 	      goto do_alg_addsub_factor;
2833 
2834 	    case alg_add_t2_m:
2835 	      goto do_alg_add_t2_m;
2836 
2837 	    case alg_sub_t2_m:
2838 	      goto do_alg_sub_t2_m;
2839 
2840 	    default:
2841 	      gcc_unreachable ();
2842 	    }
2843 	}
2844     }
2845 
2846   /* If we have a group of zero bits at the low-order part of T, try
2847      multiplying by the remaining bits and then doing a shift.  */
2848 
2849   if ((t & 1) == 0)
2850     {
2851     do_alg_shift:
2852       m = ctz_or_zero (t); /* m = number of low zero bits */
2853       if (m < maxm)
2854 	{
2855 	  q = t >> m;
2856 	  /* The function expand_shift will choose between a shift and
2857 	     a sequence of additions, so the observed cost is given as
2858 	     MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2859 	  op_cost = m * add_cost (speed, mode);
2860 	  if (shift_cost (speed, mode, m) < op_cost)
2861 	    op_cost = shift_cost (speed, mode, m);
2862 	  new_limit.cost = best_cost.cost - op_cost;
2863 	  new_limit.latency = best_cost.latency - op_cost;
2864 	  synth_mult (alg_in, q, &new_limit, mode);
2865 
2866 	  alg_in->cost.cost += op_cost;
2867 	  alg_in->cost.latency += op_cost;
2868 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2869 	    {
2870 	      best_cost = alg_in->cost;
2871 	      std::swap (alg_in, best_alg);
2872 	      best_alg->log[best_alg->ops] = m;
2873 	      best_alg->op[best_alg->ops] = alg_shift;
2874 	    }
2875 
2876 	  /* See if treating ORIG_T as a signed number yields a better
2877 	     sequence.  Try this sequence only for a negative ORIG_T
2878 	     as it would be useless for a non-negative ORIG_T.  */
2879 	  if ((HOST_WIDE_INT) orig_t < 0)
2880 	    {
2881 	      /* Shift ORIG_T as follows because a right shift of a
2882 		 negative-valued signed type is implementation
2883 		 defined.  */
2884 	      q = ~(~orig_t >> m);
2885 	      /* The function expand_shift will choose between a shift
2886 		 and a sequence of additions, so the observed cost is
2887 		 given as MIN (m * add_cost(speed, mode),
2888 		 shift_cost(speed, mode, m)).  */
2889 	      op_cost = m * add_cost (speed, mode);
2890 	      if (shift_cost (speed, mode, m) < op_cost)
2891 		op_cost = shift_cost (speed, mode, m);
2892 	      new_limit.cost = best_cost.cost - op_cost;
2893 	      new_limit.latency = best_cost.latency - op_cost;
2894 	      synth_mult (alg_in, q, &new_limit, mode);
2895 
2896 	      alg_in->cost.cost += op_cost;
2897 	      alg_in->cost.latency += op_cost;
2898 	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2899 		{
2900 		  best_cost = alg_in->cost;
2901 		  std::swap (alg_in, best_alg);
2902 		  best_alg->log[best_alg->ops] = m;
2903 		  best_alg->op[best_alg->ops] = alg_shift;
2904 		}
2905 	    }
2906 	}
2907       if (cache_hit)
2908 	goto done;
2909     }
2910 
2911   /* If we have an odd number, add or subtract one.  */
2912   if ((t & 1) != 0)
2913     {
2914       unsigned HOST_WIDE_INT w;
2915 
2916     do_alg_addsub_t_m2:
2917       for (w = 1; (w & t) != 0; w <<= 1)
2918 	;
2919       /* If T was -1, then W will be zero after the loop.  This is another
2920 	 case where T ends with ...111.  Handling this with (T + 1) and
2921 	 subtract 1 produces slightly better code and results in algorithm
2922 	 selection much faster than treating it like the ...0111 case
2923 	 below.  */
2924       if (w == 0
2925 	  || (w > 2
2926 	      /* Reject the case where t is 3.
2927 		 Thus we prefer addition in that case.  */
2928 	      && t != 3))
2929 	{
2930 	  /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2931 
2932 	  op_cost = add_cost (speed, mode);
2933 	  new_limit.cost = best_cost.cost - op_cost;
2934 	  new_limit.latency = best_cost.latency - op_cost;
2935 	  synth_mult (alg_in, t + 1, &new_limit, mode);
2936 
2937 	  alg_in->cost.cost += op_cost;
2938 	  alg_in->cost.latency += op_cost;
2939 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2940 	    {
2941 	      best_cost = alg_in->cost;
2942 	      std::swap (alg_in, best_alg);
2943 	      best_alg->log[best_alg->ops] = 0;
2944 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2945 	    }
2946 	}
2947       else
2948 	{
2949 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2950 
2951 	  op_cost = add_cost (speed, mode);
2952 	  new_limit.cost = best_cost.cost - op_cost;
2953 	  new_limit.latency = best_cost.latency - op_cost;
2954 	  synth_mult (alg_in, t - 1, &new_limit, mode);
2955 
2956 	  alg_in->cost.cost += op_cost;
2957 	  alg_in->cost.latency += op_cost;
2958 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2959 	    {
2960 	      best_cost = alg_in->cost;
2961 	      std::swap (alg_in, best_alg);
2962 	      best_alg->log[best_alg->ops] = 0;
2963 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2964 	    }
2965 	}
2966 
2967       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2968 	 quickly with a - a * n for some appropriate constant n.  */
2969       m = exact_log2 (-orig_t + 1);
2970       if (m >= 0 && m < maxm)
2971 	{
2972 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2973 	  /* If the target has a cheap shift-and-subtract insn use
2974 	     that in preference to a shift insn followed by a sub insn.
2975 	     Assume that the shift-and-sub is "atomic" with a latency
2976 	     equal to it's cost, otherwise assume that on superscalar
2977 	     hardware the shift may be executed concurrently with the
2978 	     earlier steps in the algorithm.  */
2979 	  if (shiftsub1_cost (speed, mode, m) <= op_cost)
2980 	    {
2981 	      op_cost = shiftsub1_cost (speed, mode, m);
2982 	      op_latency = op_cost;
2983 	    }
2984 	  else
2985 	    op_latency = add_cost (speed, mode);
2986 
2987 	  new_limit.cost = best_cost.cost - op_cost;
2988 	  new_limit.latency = best_cost.latency - op_latency;
2989 	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2990 		      &new_limit, mode);
2991 
2992 	  alg_in->cost.cost += op_cost;
2993 	  alg_in->cost.latency += op_latency;
2994 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2995 	    {
2996 	      best_cost = alg_in->cost;
2997 	      std::swap (alg_in, best_alg);
2998 	      best_alg->log[best_alg->ops] = m;
2999 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
3000 	    }
3001 	}
3002 
3003       if (cache_hit)
3004 	goto done;
3005     }
3006 
3007   /* Look for factors of t of the form
3008      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3009      If we find such a factor, we can multiply by t using an algorithm that
3010      multiplies by q, shift the result by m and add/subtract it to itself.
3011 
3012      We search for large factors first and loop down, even if large factors
3013      are less probable than small; if we find a large factor we will find a
3014      good sequence quickly, and therefore be able to prune (by decreasing
3015      COST_LIMIT) the search.  */
3016 
3017  do_alg_addsub_factor:
3018   for (m = floor_log2 (t - 1); m >= 2; m--)
3019     {
3020       unsigned HOST_WIDE_INT d;
3021 
3022       d = (HOST_WIDE_INT_1U << m) + 1;
3023       if (t % d == 0 && t > d && m < maxm
3024 	  && (!cache_hit || cache_alg == alg_add_factor))
3025 	{
3026 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3027 	  if (shiftadd_cost (speed, mode, m) <= op_cost)
3028 	    op_cost = shiftadd_cost (speed, mode, m);
3029 
3030 	  op_latency = op_cost;
3031 
3032 
3033 	  new_limit.cost = best_cost.cost - op_cost;
3034 	  new_limit.latency = best_cost.latency - op_latency;
3035 	  synth_mult (alg_in, t / d, &new_limit, mode);
3036 
3037 	  alg_in->cost.cost += op_cost;
3038 	  alg_in->cost.latency += op_latency;
3039 	  if (alg_in->cost.latency < op_cost)
3040 	    alg_in->cost.latency = op_cost;
3041 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3042 	    {
3043 	      best_cost = alg_in->cost;
3044 	      std::swap (alg_in, best_alg);
3045 	      best_alg->log[best_alg->ops] = m;
3046 	      best_alg->op[best_alg->ops] = alg_add_factor;
3047 	    }
3048 	  /* Other factors will have been taken care of in the recursion.  */
3049 	  break;
3050 	}
3051 
3052       d = (HOST_WIDE_INT_1U << m) - 1;
3053       if (t % d == 0 && t > d && m < maxm
3054 	  && (!cache_hit || cache_alg == alg_sub_factor))
3055 	{
3056 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3057 	  if (shiftsub0_cost (speed, mode, m) <= op_cost)
3058 	    op_cost = shiftsub0_cost (speed, mode, m);
3059 
3060 	  op_latency = op_cost;
3061 
3062 	  new_limit.cost = best_cost.cost - op_cost;
3063 	  new_limit.latency = best_cost.latency - op_latency;
3064 	  synth_mult (alg_in, t / d, &new_limit, mode);
3065 
3066 	  alg_in->cost.cost += op_cost;
3067 	  alg_in->cost.latency += op_latency;
3068 	  if (alg_in->cost.latency < op_cost)
3069 	    alg_in->cost.latency = op_cost;
3070 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3071 	    {
3072 	      best_cost = alg_in->cost;
3073 	      std::swap (alg_in, best_alg);
3074 	      best_alg->log[best_alg->ops] = m;
3075 	      best_alg->op[best_alg->ops] = alg_sub_factor;
3076 	    }
3077 	  break;
3078 	}
3079     }
3080   if (cache_hit)
3081     goto done;
3082 
3083   /* Try shift-and-add (load effective address) instructions,
3084      i.e. do a*3, a*5, a*9.  */
3085   if ((t & 1) != 0)
3086     {
3087     do_alg_add_t2_m:
3088       q = t - 1;
3089       m = ctz_hwi (q);
3090       if (q && m < maxm)
3091 	{
3092 	  op_cost = shiftadd_cost (speed, mode, m);
3093 	  new_limit.cost = best_cost.cost - op_cost;
3094 	  new_limit.latency = best_cost.latency - op_cost;
3095 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3096 
3097 	  alg_in->cost.cost += op_cost;
3098 	  alg_in->cost.latency += op_cost;
3099 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3100 	    {
3101 	      best_cost = alg_in->cost;
3102 	      std::swap (alg_in, best_alg);
3103 	      best_alg->log[best_alg->ops] = m;
3104 	      best_alg->op[best_alg->ops] = alg_add_t2_m;
3105 	    }
3106 	}
3107       if (cache_hit)
3108 	goto done;
3109 
3110     do_alg_sub_t2_m:
3111       q = t + 1;
3112       m = ctz_hwi (q);
3113       if (q && m < maxm)
3114 	{
3115 	  op_cost = shiftsub0_cost (speed, mode, m);
3116 	  new_limit.cost = best_cost.cost - op_cost;
3117 	  new_limit.latency = best_cost.latency - op_cost;
3118 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3119 
3120 	  alg_in->cost.cost += op_cost;
3121 	  alg_in->cost.latency += op_cost;
3122 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3123 	    {
3124 	      best_cost = alg_in->cost;
3125 	      std::swap (alg_in, best_alg);
3126 	      best_alg->log[best_alg->ops] = m;
3127 	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
3128 	    }
3129 	}
3130       if (cache_hit)
3131 	goto done;
3132     }
3133 
3134  done:
3135   /* If best_cost has not decreased, we have not found any algorithm.  */
3136   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3137     {
3138       /* We failed to find an algorithm.  Record alg_impossible for
3139 	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3140 	 we are asked to find an algorithm for T within the same or
3141 	 lower COST_LIMIT, we can immediately return to the
3142 	 caller.  */
3143       entry_ptr->t = t;
3144       entry_ptr->mode = mode;
3145       entry_ptr->speed = speed;
3146       entry_ptr->alg = alg_impossible;
3147       entry_ptr->cost = *cost_limit;
3148       return;
3149     }
3150 
3151   /* Cache the result.  */
3152   if (!cache_hit)
3153     {
3154       entry_ptr->t = t;
3155       entry_ptr->mode = mode;
3156       entry_ptr->speed = speed;
3157       entry_ptr->alg = best_alg->op[best_alg->ops];
3158       entry_ptr->cost.cost = best_cost.cost;
3159       entry_ptr->cost.latency = best_cost.latency;
3160     }
3161 
3162   /* If we are getting a too long sequence for `struct algorithm'
3163      to record, make this search fail.  */
3164   if (best_alg->ops == MAX_BITS_PER_WORD)
3165     return;
3166 
3167   /* Copy the algorithm from temporary space to the space at alg_out.
3168      We avoid using structure assignment because the majority of
3169      best_alg is normally undefined, and this is a critical function.  */
3170   alg_out->ops = best_alg->ops + 1;
3171   alg_out->cost = best_cost;
3172   memcpy (alg_out->op, best_alg->op,
3173 	  alg_out->ops * sizeof *alg_out->op);
3174   memcpy (alg_out->log, best_alg->log,
3175 	  alg_out->ops * sizeof *alg_out->log);
3176 }
3177 
3178 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3179    Try three variations:
3180 
3181        - a shift/add sequence based on VAL itself
3182        - a shift/add sequence based on -VAL, followed by a negation
3183        - a shift/add sequence based on VAL - 1, followed by an addition.
3184 
3185    Return true if the cheapest of these cost less than MULT_COST,
3186    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3187 
3188 bool
choose_mult_variant(machine_mode mode,HOST_WIDE_INT val,struct algorithm * alg,enum mult_variant * variant,int mult_cost)3189 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3190 		     struct algorithm *alg, enum mult_variant *variant,
3191 		     int mult_cost)
3192 {
3193   struct algorithm alg2;
3194   struct mult_cost limit;
3195   int op_cost;
3196   bool speed = optimize_insn_for_speed_p ();
3197 
3198   /* Fail quickly for impossible bounds.  */
3199   if (mult_cost < 0)
3200     return false;
3201 
3202   /* Ensure that mult_cost provides a reasonable upper bound.
3203      Any constant multiplication can be performed with less
3204      than 2 * bits additions.  */
3205   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3206   if (mult_cost > op_cost)
3207     mult_cost = op_cost;
3208 
3209   *variant = basic_variant;
3210   limit.cost = mult_cost;
3211   limit.latency = mult_cost;
3212   synth_mult (alg, val, &limit, mode);
3213 
3214   /* This works only if the inverted value actually fits in an
3215      `unsigned int' */
3216   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3217     {
3218       op_cost = neg_cost (speed, mode);
3219       if (MULT_COST_LESS (&alg->cost, mult_cost))
3220 	{
3221 	  limit.cost = alg->cost.cost - op_cost;
3222 	  limit.latency = alg->cost.latency - op_cost;
3223 	}
3224       else
3225 	{
3226 	  limit.cost = mult_cost - op_cost;
3227 	  limit.latency = mult_cost - op_cost;
3228 	}
3229 
3230       synth_mult (&alg2, -val, &limit, mode);
3231       alg2.cost.cost += op_cost;
3232       alg2.cost.latency += op_cost;
3233       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3234 	*alg = alg2, *variant = negate_variant;
3235     }
3236 
3237   /* This proves very useful for division-by-constant.  */
3238   op_cost = add_cost (speed, mode);
3239   if (MULT_COST_LESS (&alg->cost, mult_cost))
3240     {
3241       limit.cost = alg->cost.cost - op_cost;
3242       limit.latency = alg->cost.latency - op_cost;
3243     }
3244   else
3245     {
3246       limit.cost = mult_cost - op_cost;
3247       limit.latency = mult_cost - op_cost;
3248     }
3249 
3250   synth_mult (&alg2, val - 1, &limit, mode);
3251   alg2.cost.cost += op_cost;
3252   alg2.cost.latency += op_cost;
3253   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3254     *alg = alg2, *variant = add_variant;
3255 
3256   return MULT_COST_LESS (&alg->cost, mult_cost);
3257 }
3258 
3259 /* A subroutine of expand_mult, used for constant multiplications.
3260    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3261    convenient.  Use the shift/add sequence described by ALG and apply
3262    the final fixup specified by VARIANT.  */
3263 
3264 static rtx
expand_mult_const(machine_mode mode,rtx op0,HOST_WIDE_INT val,rtx target,const struct algorithm * alg,enum mult_variant variant)3265 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3266 		   rtx target, const struct algorithm *alg,
3267 		   enum mult_variant variant)
3268 {
3269   unsigned HOST_WIDE_INT val_so_far;
3270   rtx_insn *insn;
3271   rtx accum, tem;
3272   int opno;
3273   machine_mode nmode;
3274 
3275   /* Avoid referencing memory over and over and invalid sharing
3276      on SUBREGs.  */
3277   op0 = force_reg (mode, op0);
3278 
3279   /* ACCUM starts out either as OP0 or as a zero, depending on
3280      the first operation.  */
3281 
3282   if (alg->op[0] == alg_zero)
3283     {
3284       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3285       val_so_far = 0;
3286     }
3287   else if (alg->op[0] == alg_m)
3288     {
3289       accum = copy_to_mode_reg (mode, op0);
3290       val_so_far = 1;
3291     }
3292   else
3293     gcc_unreachable ();
3294 
3295   for (opno = 1; opno < alg->ops; opno++)
3296     {
3297       int log = alg->log[opno];
3298       rtx shift_subtarget = optimize ? 0 : accum;
3299       rtx add_target
3300 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
3301 	   && !optimize)
3302 	  ? target : 0;
3303       rtx accum_target = optimize ? 0 : accum;
3304       rtx accum_inner;
3305 
3306       switch (alg->op[opno])
3307 	{
3308 	case alg_shift:
3309 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3310 	  /* REG_EQUAL note will be attached to the following insn.  */
3311 	  emit_move_insn (accum, tem);
3312 	  val_so_far <<= log;
3313 	  break;
3314 
3315 	case alg_add_t_m2:
3316 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3317 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3318 				 add_target ? add_target : accum_target);
3319 	  val_so_far += HOST_WIDE_INT_1U << log;
3320 	  break;
3321 
3322 	case alg_sub_t_m2:
3323 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3324 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3325 				 add_target ? add_target : accum_target);
3326 	  val_so_far -= HOST_WIDE_INT_1U << log;
3327 	  break;
3328 
3329 	case alg_add_t2_m:
3330 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3331 				log, shift_subtarget, 0);
3332 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3333 				 add_target ? add_target : accum_target);
3334 	  val_so_far = (val_so_far << log) + 1;
3335 	  break;
3336 
3337 	case alg_sub_t2_m:
3338 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3339 				log, shift_subtarget, 0);
3340 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3341 				 add_target ? add_target : accum_target);
3342 	  val_so_far = (val_so_far << log) - 1;
3343 	  break;
3344 
3345 	case alg_add_factor:
3346 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3347 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3348 				 add_target ? add_target : accum_target);
3349 	  val_so_far += val_so_far << log;
3350 	  break;
3351 
3352 	case alg_sub_factor:
3353 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3354 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3355 				 (add_target
3356 				  ? add_target : (optimize ? 0 : tem)));
3357 	  val_so_far = (val_so_far << log) - val_so_far;
3358 	  break;
3359 
3360 	default:
3361 	  gcc_unreachable ();
3362 	}
3363 
3364       if (SCALAR_INT_MODE_P (mode))
3365 	{
3366 	  /* Write a REG_EQUAL note on the last insn so that we can cse
3367 	     multiplication sequences.  Note that if ACCUM is a SUBREG,
3368 	     we've set the inner register and must properly indicate that.  */
3369 	  tem = op0, nmode = mode;
3370 	  accum_inner = accum;
3371 	  if (GET_CODE (accum) == SUBREG)
3372 	    {
3373 	      accum_inner = SUBREG_REG (accum);
3374 	      nmode = GET_MODE (accum_inner);
3375 	      tem = gen_lowpart (nmode, op0);
3376 	    }
3377 
3378 	  /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3379 	     In that case, only the low bits of accum would be guaranteed to
3380 	     be equal to the content of the REG_EQUAL note, the upper bits
3381 	     can be anything.  */
3382 	  if (!paradoxical_subreg_p (tem))
3383 	    {
3384 	      insn = get_last_insn ();
3385 	      wide_int wval_so_far
3386 		= wi::uhwi (val_so_far,
3387 			    GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3388 	      rtx c = immed_wide_int_const (wval_so_far, nmode);
3389 	      set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3390 				accum_inner);
3391 	    }
3392 	}
3393     }
3394 
3395   if (variant == negate_variant)
3396     {
3397       val_so_far = -val_so_far;
3398       accum = expand_unop (mode, neg_optab, accum, target, 0);
3399     }
3400   else if (variant == add_variant)
3401     {
3402       val_so_far = val_so_far + 1;
3403       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3404     }
3405 
3406   /* Compare only the bits of val and val_so_far that are significant
3407      in the result mode, to avoid sign-/zero-extension confusion.  */
3408   nmode = GET_MODE_INNER (mode);
3409   val &= GET_MODE_MASK (nmode);
3410   val_so_far &= GET_MODE_MASK (nmode);
3411   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3412 
3413   return accum;
3414 }
3415 
3416 /* Perform a multiplication and return an rtx for the result.
3417    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3418    TARGET is a suggestion for where to store the result (an rtx).
3419 
3420    We check specially for a constant integer as OP1.
3421    If you want this check for OP0 as well, then before calling
3422    you should swap the two operands if OP0 would be constant.  */
3423 
3424 rtx
expand_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,bool no_libcall)3425 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3426 	     int unsignedp, bool no_libcall)
3427 {
3428   enum mult_variant variant;
3429   struct algorithm algorithm;
3430   rtx scalar_op1;
3431   int max_cost;
3432   bool speed = optimize_insn_for_speed_p ();
3433   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3434 
3435   if (CONSTANT_P (op0))
3436     std::swap (op0, op1);
3437 
3438   /* For vectors, there are several simplifications that can be made if
3439      all elements of the vector constant are identical.  */
3440   scalar_op1 = unwrap_const_vec_duplicate (op1);
3441 
3442   if (INTEGRAL_MODE_P (mode))
3443     {
3444       rtx fake_reg;
3445       HOST_WIDE_INT coeff;
3446       bool is_neg;
3447       int mode_bitsize;
3448 
3449       if (op1 == CONST0_RTX (mode))
3450 	return op1;
3451       if (op1 == CONST1_RTX (mode))
3452 	return op0;
3453       if (op1 == CONSTM1_RTX (mode))
3454 	return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3455 			    op0, target, 0);
3456 
3457       if (do_trapv)
3458 	goto skip_synth;
3459 
3460       /* If mode is integer vector mode, check if the backend supports
3461 	 vector lshift (by scalar or vector) at all.  If not, we can't use
3462 	 synthetized multiply.  */
3463       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3464 	  && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3465 	  && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3466 	goto skip_synth;
3467 
3468       /* These are the operations that are potentially turned into
3469 	 a sequence of shifts and additions.  */
3470       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3471 
3472       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3473 	 less than or equal in size to `unsigned int' this doesn't matter.
3474 	 If the mode is larger than `unsigned int', then synth_mult works
3475 	 only if the constant value exactly fits in an `unsigned int' without
3476 	 any truncation.  This means that multiplying by negative values does
3477 	 not work; results are off by 2^32 on a 32 bit machine.  */
3478       if (CONST_INT_P (scalar_op1))
3479 	{
3480 	  coeff = INTVAL (scalar_op1);
3481 	  is_neg = coeff < 0;
3482 	}
3483 #if TARGET_SUPPORTS_WIDE_INT
3484       else if (CONST_WIDE_INT_P (scalar_op1))
3485 #else
3486       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3487 #endif
3488 	{
3489 	  int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3490 	  /* Perfect power of 2 (other than 1, which is handled above).  */
3491 	  if (shift > 0)
3492 	    return expand_shift (LSHIFT_EXPR, mode, op0,
3493 				 shift, target, unsignedp);
3494 	  else
3495 	    goto skip_synth;
3496 	}
3497       else
3498 	goto skip_synth;
3499 
3500       /* We used to test optimize here, on the grounds that it's better to
3501 	 produce a smaller program when -O is not used.  But this causes
3502 	 such a terrible slowdown sometimes that it seems better to always
3503 	 use synth_mult.  */
3504 
3505       /* Special case powers of two.  */
3506       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3507 	  && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3508 	return expand_shift (LSHIFT_EXPR, mode, op0,
3509 			     floor_log2 (coeff), target, unsignedp);
3510 
3511       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3512 
3513       /* Attempt to handle multiplication of DImode values by negative
3514 	 coefficients, by performing the multiplication by a positive
3515 	 multiplier and then inverting the result.  */
3516       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3517 	{
3518 	  /* Its safe to use -coeff even for INT_MIN, as the
3519 	     result is interpreted as an unsigned coefficient.
3520 	     Exclude cost of op0 from max_cost to match the cost
3521 	     calculation of the synth_mult.  */
3522 	  coeff = -(unsigned HOST_WIDE_INT) coeff;
3523 	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3524 				    mode, speed)
3525 		      - neg_cost (speed, mode));
3526 	  if (max_cost <= 0)
3527 	    goto skip_synth;
3528 
3529 	  /* Special case powers of two.  */
3530 	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3531 	    {
3532 	      rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3533 				       floor_log2 (coeff), target, unsignedp);
3534 	      return expand_unop (mode, neg_optab, temp, target, 0);
3535 	    }
3536 
3537 	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3538 				   max_cost))
3539 	    {
3540 	      rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3541 					    &algorithm, variant);
3542 	      return expand_unop (mode, neg_optab, temp, target, 0);
3543 	    }
3544 	  goto skip_synth;
3545 	}
3546 
3547       /* Exclude cost of op0 from max_cost to match the cost
3548 	 calculation of the synth_mult.  */
3549       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3550       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3551 	return expand_mult_const (mode, op0, coeff, target,
3552 				  &algorithm, variant);
3553     }
3554  skip_synth:
3555 
3556   /* Expand x*2.0 as x+x.  */
3557   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3558       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3559     {
3560       op0 = force_reg (GET_MODE (op0), op0);
3561       return expand_binop (mode, add_optab, op0, op0,
3562 			   target, unsignedp,
3563 			   no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3564     }
3565 
3566   /* This used to use umul_optab if unsigned, but for non-widening multiply
3567      there is no difference between signed and unsigned.  */
3568   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3569 		      op0, op1, target, unsignedp,
3570 		      no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3571   gcc_assert (op0 || no_libcall);
3572   return op0;
3573 }
3574 
3575 /* Return a cost estimate for multiplying a register by the given
3576    COEFFicient in the given MODE and SPEED.  */
3577 
3578 int
mult_by_coeff_cost(HOST_WIDE_INT coeff,machine_mode mode,bool speed)3579 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3580 {
3581   int max_cost;
3582   struct algorithm algorithm;
3583   enum mult_variant variant;
3584 
3585   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3586   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3587 			   mode, speed);
3588   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3589     return algorithm.cost.cost;
3590   else
3591     return max_cost;
3592 }
3593 
3594 /* Perform a widening multiplication and return an rtx for the result.
3595    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3596    TARGET is a suggestion for where to store the result (an rtx).
3597    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3598    or smul_widen_optab.
3599 
3600    We check specially for a constant integer as OP1, comparing the
3601    cost of a widening multiply against the cost of a sequence of shifts
3602    and adds.  */
3603 
3604 rtx
expand_widening_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,optab this_optab)3605 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3606 		      int unsignedp, optab this_optab)
3607 {
3608   bool speed = optimize_insn_for_speed_p ();
3609   rtx cop1;
3610 
3611   if (CONST_INT_P (op1)
3612       && GET_MODE (op0) != VOIDmode
3613       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3614 				this_optab == umul_widen_optab))
3615       && CONST_INT_P (cop1)
3616       && (INTVAL (cop1) >= 0
3617 	  || HWI_COMPUTABLE_MODE_P (mode)))
3618     {
3619       HOST_WIDE_INT coeff = INTVAL (cop1);
3620       int max_cost;
3621       enum mult_variant variant;
3622       struct algorithm algorithm;
3623 
3624       if (coeff == 0)
3625 	return CONST0_RTX (mode);
3626 
3627       /* Special case powers of two.  */
3628       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3629 	{
3630 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3631 	  return expand_shift (LSHIFT_EXPR, mode, op0,
3632 			       floor_log2 (coeff), target, unsignedp);
3633 	}
3634 
3635       /* Exclude cost of op0 from max_cost to match the cost
3636 	 calculation of the synth_mult.  */
3637       max_cost = mul_widen_cost (speed, mode);
3638       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3639 			       max_cost))
3640 	{
3641 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3642 	  return expand_mult_const (mode, op0, coeff, target,
3643 				    &algorithm, variant);
3644 	}
3645     }
3646   return expand_binop (mode, this_optab, op0, op1, target,
3647 		       unsignedp, OPTAB_LIB_WIDEN);
3648 }
3649 
3650 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3651    replace division by D, and put the least significant N bits of the result
3652    in *MULTIPLIER_PTR and return the most significant bit.
3653 
3654    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3655    needed precision is in PRECISION (should be <= N).
3656 
3657    PRECISION should be as small as possible so this function can choose
3658    multiplier more freely.
3659 
3660    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3661    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3662 
3663    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3664    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3665 
3666 unsigned HOST_WIDE_INT
choose_multiplier(unsigned HOST_WIDE_INT d,int n,int precision,unsigned HOST_WIDE_INT * multiplier_ptr,int * post_shift_ptr,int * lgup_ptr)3667 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3668 		   unsigned HOST_WIDE_INT *multiplier_ptr,
3669 		   int *post_shift_ptr, int *lgup_ptr)
3670 {
3671   int lgup, post_shift;
3672   int pow, pow2;
3673 
3674   /* lgup = ceil(log2(divisor)); */
3675   lgup = ceil_log2 (d);
3676 
3677   gcc_assert (lgup <= n);
3678 
3679   pow = n + lgup;
3680   pow2 = n + lgup - precision;
3681 
3682   /* mlow = 2^(N + lgup)/d */
3683   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3684   wide_int mlow = wi::udiv_trunc (val, d);
3685 
3686   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3687   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3688   wide_int mhigh = wi::udiv_trunc (val, d);
3689 
3690   /* If precision == N, then mlow, mhigh exceed 2^N
3691      (but they do not exceed 2^(N+1)).  */
3692 
3693   /* Reduce to lowest terms.  */
3694   for (post_shift = lgup; post_shift > 0; post_shift--)
3695     {
3696       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3697 						       HOST_BITS_PER_WIDE_INT);
3698       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3699 						       HOST_BITS_PER_WIDE_INT);
3700       if (ml_lo >= mh_lo)
3701 	break;
3702 
3703       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3704       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3705     }
3706 
3707   *post_shift_ptr = post_shift;
3708   *lgup_ptr = lgup;
3709   if (n < HOST_BITS_PER_WIDE_INT)
3710     {
3711       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3712       *multiplier_ptr = mhigh.to_uhwi () & mask;
3713       return mhigh.to_uhwi () > mask;
3714     }
3715   else
3716     {
3717       *multiplier_ptr = mhigh.to_uhwi ();
3718       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3719     }
3720 }
3721 
3722 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3723    congruent to 1 (mod 2**N).  */
3724 
3725 static unsigned HOST_WIDE_INT
invert_mod2n(unsigned HOST_WIDE_INT x,int n)3726 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3727 {
3728   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3729 
3730   /* The algorithm notes that the choice y = x satisfies
3731      x*y == 1 mod 2^3, since x is assumed odd.
3732      Each iteration doubles the number of bits of significance in y.  */
3733 
3734   unsigned HOST_WIDE_INT mask;
3735   unsigned HOST_WIDE_INT y = x;
3736   int nbit = 3;
3737 
3738   mask = (n == HOST_BITS_PER_WIDE_INT
3739 	  ? HOST_WIDE_INT_M1U
3740 	  : (HOST_WIDE_INT_1U << n) - 1);
3741 
3742   while (nbit < n)
3743     {
3744       y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3745       nbit *= 2;
3746     }
3747   return y;
3748 }
3749 
3750 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3751    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3752    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3753    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3754    become signed.
3755 
3756    The result is put in TARGET if that is convenient.
3757 
3758    MODE is the mode of operation.  */
3759 
3760 rtx
expand_mult_highpart_adjust(scalar_int_mode mode,rtx adj_operand,rtx op0,rtx op1,rtx target,int unsignedp)3761 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3762 			     rtx op1, rtx target, int unsignedp)
3763 {
3764   rtx tem;
3765   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3766 
3767   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3768 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3769   tem = expand_and (mode, tem, op1, NULL_RTX);
3770   adj_operand
3771     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3772 		     adj_operand);
3773 
3774   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3775 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3776   tem = expand_and (mode, tem, op0, NULL_RTX);
3777   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3778 			  target);
3779 
3780   return target;
3781 }
3782 
3783 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3784 
3785 static rtx
extract_high_half(scalar_int_mode mode,rtx op)3786 extract_high_half (scalar_int_mode mode, rtx op)
3787 {
3788   if (mode == word_mode)
3789     return gen_highpart (mode, op);
3790 
3791   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3792 
3793   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3794 		     GET_MODE_BITSIZE (mode), 0, 1);
3795   return convert_modes (mode, wider_mode, op, 0);
3796 }
3797 
3798 /* Like expmed_mult_highpart, but only consider using a multiplication
3799    optab.  OP1 is an rtx for the constant operand.  */
3800 
3801 static rtx
expmed_mult_highpart_optab(scalar_int_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3802 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3803 			    rtx target, int unsignedp, int max_cost)
3804 {
3805   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3806   optab moptab;
3807   rtx tem;
3808   int size;
3809   bool speed = optimize_insn_for_speed_p ();
3810 
3811   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3812 
3813   size = GET_MODE_BITSIZE (mode);
3814 
3815   /* Firstly, try using a multiplication insn that only generates the needed
3816      high part of the product, and in the sign flavor of unsignedp.  */
3817   if (mul_highpart_cost (speed, mode) < max_cost)
3818     {
3819       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3820       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3821 			  unsignedp, OPTAB_DIRECT);
3822       if (tem)
3823 	return tem;
3824     }
3825 
3826   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3827      Need to adjust the result after the multiplication.  */
3828   if (size - 1 < BITS_PER_WORD
3829       && (mul_highpart_cost (speed, mode)
3830 	  + 2 * shift_cost (speed, mode, size-1)
3831 	  + 4 * add_cost (speed, mode) < max_cost))
3832     {
3833       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3834       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3835 			  unsignedp, OPTAB_DIRECT);
3836       if (tem)
3837 	/* We used the wrong signedness.  Adjust the result.  */
3838 	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3839 					    tem, unsignedp);
3840     }
3841 
3842   /* Try widening multiplication.  */
3843   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3844   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3845       && mul_widen_cost (speed, wider_mode) < max_cost)
3846     {
3847       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3848 			  unsignedp, OPTAB_WIDEN);
3849       if (tem)
3850 	return extract_high_half (mode, tem);
3851     }
3852 
3853   /* Try widening the mode and perform a non-widening multiplication.  */
3854   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3855       && size - 1 < BITS_PER_WORD
3856       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3857 	  < max_cost))
3858     {
3859       rtx_insn *insns;
3860       rtx wop0, wop1;
3861 
3862       /* We need to widen the operands, for example to ensure the
3863 	 constant multiplier is correctly sign or zero extended.
3864 	 Use a sequence to clean-up any instructions emitted by
3865 	 the conversions if things don't work out.  */
3866       start_sequence ();
3867       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3868       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3869       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3870 			  unsignedp, OPTAB_WIDEN);
3871       insns = get_insns ();
3872       end_sequence ();
3873 
3874       if (tem)
3875 	{
3876 	  emit_insn (insns);
3877 	  return extract_high_half (mode, tem);
3878 	}
3879     }
3880 
3881   /* Try widening multiplication of opposite signedness, and adjust.  */
3882   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3883   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3884       && size - 1 < BITS_PER_WORD
3885       && (mul_widen_cost (speed, wider_mode)
3886 	  + 2 * shift_cost (speed, mode, size-1)
3887 	  + 4 * add_cost (speed, mode) < max_cost))
3888     {
3889       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3890 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3891       if (tem != 0)
3892 	{
3893 	  tem = extract_high_half (mode, tem);
3894 	  /* We used the wrong signedness.  Adjust the result.  */
3895 	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3896 					      target, unsignedp);
3897 	}
3898     }
3899 
3900   return 0;
3901 }
3902 
3903 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3904    putting the high half of the result in TARGET if that is convenient,
3905    and return where the result is.  If the operation cannot be performed,
3906    0 is returned.
3907 
3908    MODE is the mode of operation and result.
3909 
3910    UNSIGNEDP nonzero means unsigned multiply.
3911 
3912    MAX_COST is the total allowed cost for the expanded RTL.  */
3913 
3914 static rtx
expmed_mult_highpart(scalar_int_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3915 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3916 		      rtx target, int unsignedp, int max_cost)
3917 {
3918   unsigned HOST_WIDE_INT cnst1;
3919   int extra_cost;
3920   bool sign_adjust = false;
3921   enum mult_variant variant;
3922   struct algorithm alg;
3923   rtx tem;
3924   bool speed = optimize_insn_for_speed_p ();
3925 
3926   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3927   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3928 
3929   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3930 
3931   /* We can't optimize modes wider than BITS_PER_WORD.
3932      ??? We might be able to perform double-word arithmetic if
3933      mode == word_mode, however all the cost calculations in
3934      synth_mult etc. assume single-word operations.  */
3935   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3936   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3937     return expmed_mult_highpart_optab (mode, op0, op1, target,
3938 				       unsignedp, max_cost);
3939 
3940   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3941 
3942   /* Check whether we try to multiply by a negative constant.  */
3943   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3944     {
3945       sign_adjust = true;
3946       extra_cost += add_cost (speed, mode);
3947     }
3948 
3949   /* See whether shift/add multiplication is cheap enough.  */
3950   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3951 			   max_cost - extra_cost))
3952     {
3953       /* See whether the specialized multiplication optabs are
3954 	 cheaper than the shift/add version.  */
3955       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3956 					alg.cost.cost + extra_cost);
3957       if (tem)
3958 	return tem;
3959 
3960       tem = convert_to_mode (wider_mode, op0, unsignedp);
3961       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3962       tem = extract_high_half (mode, tem);
3963 
3964       /* Adjust result for signedness.  */
3965       if (sign_adjust)
3966 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3967 
3968       return tem;
3969     }
3970   return expmed_mult_highpart_optab (mode, op0, op1, target,
3971 				     unsignedp, max_cost);
3972 }
3973 
3974 
3975 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3976 
3977 static rtx
expand_smod_pow2(scalar_int_mode mode,rtx op0,HOST_WIDE_INT d)3978 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3979 {
3980   rtx result, temp, shift;
3981   rtx_code_label *label;
3982   int logd;
3983   int prec = GET_MODE_PRECISION (mode);
3984 
3985   logd = floor_log2 (d);
3986   result = gen_reg_rtx (mode);
3987 
3988   /* Avoid conditional branches when they're expensive.  */
3989   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3990       && optimize_insn_for_speed_p ())
3991     {
3992       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3993 				      mode, 0, -1);
3994       if (signmask)
3995 	{
3996 	  HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
3997 	  signmask = force_reg (mode, signmask);
3998 	  shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
3999 
4000 	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
4001 	     which instruction sequence to use.  If logical right shifts
4002 	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4003 	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4004 
4005 	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
4006 	  if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4007 	      || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4008 		  > COSTS_N_INSNS (2)))
4009 	    {
4010 	      temp = expand_binop (mode, xor_optab, op0, signmask,
4011 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4012 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4013 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4014 	      temp = expand_binop (mode, and_optab, temp,
4015 				   gen_int_mode (masklow, mode),
4016 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4017 	      temp = expand_binop (mode, xor_optab, temp, signmask,
4018 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4019 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4020 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4021 	    }
4022 	  else
4023 	    {
4024 	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
4025 				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
4026 	      signmask = force_reg (mode, signmask);
4027 
4028 	      temp = expand_binop (mode, add_optab, op0, signmask,
4029 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4030 	      temp = expand_binop (mode, and_optab, temp,
4031 				   gen_int_mode (masklow, mode),
4032 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4033 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4034 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4035 	    }
4036 	  return temp;
4037 	}
4038     }
4039 
4040   /* Mask contains the mode's signbit and the significant bits of the
4041      modulus.  By including the signbit in the operation, many targets
4042      can avoid an explicit compare operation in the following comparison
4043      against zero.  */
4044   wide_int mask = wi::mask (logd, false, prec);
4045   mask = wi::set_bit (mask, prec - 1);
4046 
4047   temp = expand_binop (mode, and_optab, op0,
4048 		       immed_wide_int_const (mask, mode),
4049 		       result, 1, OPTAB_LIB_WIDEN);
4050   if (temp != result)
4051     emit_move_insn (result, temp);
4052 
4053   label = gen_label_rtx ();
4054   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4055 
4056   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4057 		       0, OPTAB_LIB_WIDEN);
4058 
4059   mask = wi::mask (logd, true, prec);
4060   temp = expand_binop (mode, ior_optab, temp,
4061 		       immed_wide_int_const (mask, mode),
4062 		       result, 1, OPTAB_LIB_WIDEN);
4063   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4064 		       0, OPTAB_LIB_WIDEN);
4065   if (temp != result)
4066     emit_move_insn (result, temp);
4067   emit_label (label);
4068   return result;
4069 }
4070 
4071 /* Expand signed division of OP0 by a power of two D in mode MODE.
4072    This routine is only called for positive values of D.  */
4073 
4074 static rtx
expand_sdiv_pow2(scalar_int_mode mode,rtx op0,HOST_WIDE_INT d)4075 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4076 {
4077   rtx temp;
4078   rtx_code_label *label;
4079   int logd;
4080 
4081   logd = floor_log2 (d);
4082 
4083   if (d == 2
4084       && BRANCH_COST (optimize_insn_for_speed_p (),
4085 		      false) >= 1)
4086     {
4087       temp = gen_reg_rtx (mode);
4088       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4089       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4090 			   0, OPTAB_LIB_WIDEN);
4091       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4092     }
4093 
4094   if (HAVE_conditional_move
4095       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4096     {
4097       rtx temp2;
4098 
4099       start_sequence ();
4100       temp2 = copy_to_mode_reg (mode, op0);
4101       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4102 			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
4103       temp = force_reg (mode, temp);
4104 
4105       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4106       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4107 				     mode, temp, temp2, mode, 0);
4108       if (temp2)
4109 	{
4110 	  rtx_insn *seq = get_insns ();
4111 	  end_sequence ();
4112 	  emit_insn (seq);
4113 	  return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4114 	}
4115       end_sequence ();
4116     }
4117 
4118   if (BRANCH_COST (optimize_insn_for_speed_p (),
4119 		   false) >= 2)
4120     {
4121       int ushift = GET_MODE_BITSIZE (mode) - logd;
4122 
4123       temp = gen_reg_rtx (mode);
4124       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4125       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4126 	  || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4127 	     > COSTS_N_INSNS (1))
4128 	temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4129 			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
4130       else
4131 	temp = expand_shift (RSHIFT_EXPR, mode, temp,
4132 			     ushift, NULL_RTX, 1);
4133       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4134 			   0, OPTAB_LIB_WIDEN);
4135       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4136     }
4137 
4138   label = gen_label_rtx ();
4139   temp = copy_to_mode_reg (mode, op0);
4140   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4141   expand_inc (temp, gen_int_mode (d - 1, mode));
4142   emit_label (label);
4143   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4144 }
4145 
4146 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4147    if that is convenient, and returning where the result is.
4148    You may request either the quotient or the remainder as the result;
4149    specify REM_FLAG nonzero to get the remainder.
4150 
4151    CODE is the expression code for which kind of division this is;
4152    it controls how rounding is done.  MODE is the machine mode to use.
4153    UNSIGNEDP nonzero means do unsigned division.  */
4154 
4155 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4156    and then correct it by or'ing in missing high bits
4157    if result of ANDI is nonzero.
4158    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4159    This could optimize to a bfexts instruction.
4160    But C doesn't use these operations, so their optimizations are
4161    left for later.  */
4162 /* ??? For modulo, we don't actually need the highpart of the first product,
4163    the low part will do nicely.  And for small divisors, the second multiply
4164    can also be a low-part only multiply or even be completely left out.
4165    E.g. to calculate the remainder of a division by 3 with a 32 bit
4166    multiply, multiply with 0x55555556 and extract the upper two bits;
4167    the result is exact for inputs up to 0x1fffffff.
4168    The input range can be reduced by using cross-sum rules.
4169    For odd divisors >= 3, the following table gives right shift counts
4170    so that if a number is shifted by an integer multiple of the given
4171    amount, the remainder stays the same:
4172    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4173    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4174    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4175    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4176    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4177 
4178    Cross-sum rules for even numbers can be derived by leaving as many bits
4179    to the right alone as the divisor has zeros to the right.
4180    E.g. if x is an unsigned 32 bit number:
4181    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4182    */
4183 
4184 rtx
expand_divmod(int rem_flag,enum tree_code code,machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)4185 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4186 	       rtx op0, rtx op1, rtx target, int unsignedp)
4187 {
4188   machine_mode compute_mode;
4189   rtx tquotient;
4190   rtx quotient = 0, remainder = 0;
4191   rtx_insn *last;
4192   rtx_insn *insn;
4193   optab optab1, optab2;
4194   int op1_is_constant, op1_is_pow2 = 0;
4195   int max_cost, extra_cost;
4196   static HOST_WIDE_INT last_div_const = 0;
4197   bool speed = optimize_insn_for_speed_p ();
4198 
4199   op1_is_constant = CONST_INT_P (op1);
4200   if (op1_is_constant)
4201     {
4202       wide_int ext_op1 = rtx_mode_t (op1, mode);
4203       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4204 		     || (! unsignedp
4205 			 && wi::popcount (wi::neg (ext_op1)) == 1));
4206     }
4207 
4208   /*
4209      This is the structure of expand_divmod:
4210 
4211      First comes code to fix up the operands so we can perform the operations
4212      correctly and efficiently.
4213 
4214      Second comes a switch statement with code specific for each rounding mode.
4215      For some special operands this code emits all RTL for the desired
4216      operation, for other cases, it generates only a quotient and stores it in
4217      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4218      to indicate that it has not done anything.
4219 
4220      Last comes code that finishes the operation.  If QUOTIENT is set and
4221      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4222      QUOTIENT is not set, it is computed using trunc rounding.
4223 
4224      We try to generate special code for division and remainder when OP1 is a
4225      constant.  If |OP1| = 2**n we can use shifts and some other fast
4226      operations.  For other values of OP1, we compute a carefully selected
4227      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4228      by m.
4229 
4230      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4231      half of the product.  Different strategies for generating the product are
4232      implemented in expmed_mult_highpart.
4233 
4234      If what we actually want is the remainder, we generate that by another
4235      by-constant multiplication and a subtraction.  */
4236 
4237   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4238      code below will malfunction if we are, so check here and handle
4239      the special case if so.  */
4240   if (op1 == const1_rtx)
4241     return rem_flag ? const0_rtx : op0;
4242 
4243     /* When dividing by -1, we could get an overflow.
4244      negv_optab can handle overflows.  */
4245   if (! unsignedp && op1 == constm1_rtx)
4246     {
4247       if (rem_flag)
4248 	return const0_rtx;
4249       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4250 			  ? negv_optab : neg_optab, op0, target, 0);
4251     }
4252 
4253   if (target
4254       /* Don't use the function value register as a target
4255 	 since we have to read it as well as write it,
4256 	 and function-inlining gets confused by this.  */
4257       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4258 	  /* Don't clobber an operand while doing a multi-step calculation.  */
4259 	  || ((rem_flag || op1_is_constant)
4260 	      && (reg_mentioned_p (target, op0)
4261 		  || (MEM_P (op0) && MEM_P (target))))
4262 	  || reg_mentioned_p (target, op1)
4263 	  || (MEM_P (op1) && MEM_P (target))))
4264     target = 0;
4265 
4266   /* Get the mode in which to perform this computation.  Normally it will
4267      be MODE, but sometimes we can't do the desired operation in MODE.
4268      If so, pick a wider mode in which we can do the operation.  Convert
4269      to that mode at the start to avoid repeated conversions.
4270 
4271      First see what operations we need.  These depend on the expression
4272      we are evaluating.  (We assume that divxx3 insns exist under the
4273      same conditions that modxx3 insns and that these insns don't normally
4274      fail.  If these assumptions are not correct, we may generate less
4275      efficient code in some cases.)
4276 
4277      Then see if we find a mode in which we can open-code that operation
4278      (either a division, modulus, or shift).  Finally, check for the smallest
4279      mode for which we can do the operation with a library call.  */
4280 
4281   /* We might want to refine this now that we have division-by-constant
4282      optimization.  Since expmed_mult_highpart tries so many variants, it is
4283      not straightforward to generalize this.  Maybe we should make an array
4284      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4285 
4286   optab1 = (op1_is_pow2
4287 	    ? (unsignedp ? lshr_optab : ashr_optab)
4288 	    : (unsignedp ? udiv_optab : sdiv_optab));
4289   optab2 = (op1_is_pow2 ? optab1
4290 	    : (unsignedp ? udivmod_optab : sdivmod_optab));
4291 
4292   FOR_EACH_MODE_FROM (compute_mode, mode)
4293     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4294 	|| optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4295       break;
4296 
4297   if (compute_mode == VOIDmode)
4298     FOR_EACH_MODE_FROM (compute_mode, mode)
4299       if (optab_libfunc (optab1, compute_mode)
4300 	  || optab_libfunc (optab2, compute_mode))
4301 	break;
4302 
4303   /* If we still couldn't find a mode, use MODE, but expand_binop will
4304      probably die.  */
4305   if (compute_mode == VOIDmode)
4306     compute_mode = mode;
4307 
4308   if (target && GET_MODE (target) == compute_mode)
4309     tquotient = target;
4310   else
4311     tquotient = gen_reg_rtx (compute_mode);
4312 
4313 #if 0
4314   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4315      (mode), and thereby get better code when OP1 is a constant.  Do that
4316      later.  It will require going over all usages of SIZE below.  */
4317   size = GET_MODE_BITSIZE (mode);
4318 #endif
4319 
4320   /* Only deduct something for a REM if the last divide done was
4321      for a different constant.   Then set the constant of the last
4322      divide.  */
4323   max_cost = (unsignedp
4324 	      ? udiv_cost (speed, compute_mode)
4325 	      : sdiv_cost (speed, compute_mode));
4326   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4327 		     && INTVAL (op1) == last_div_const))
4328     max_cost -= (mul_cost (speed, compute_mode)
4329 		 + add_cost (speed, compute_mode));
4330 
4331   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4332 
4333   /* Now convert to the best mode to use.  */
4334   if (compute_mode != mode)
4335     {
4336       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4337       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4338 
4339       /* convert_modes may have placed op1 into a register, so we
4340 	 must recompute the following.  */
4341       op1_is_constant = CONST_INT_P (op1);
4342       if (op1_is_constant)
4343 	{
4344 	  wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4345 	  op1_is_pow2 = (wi::popcount (ext_op1) == 1
4346 			 || (! unsignedp
4347 			     && wi::popcount (wi::neg (ext_op1)) == 1));
4348 	}
4349       else
4350 	op1_is_pow2 = 0;
4351     }
4352 
4353   /* If one of the operands is a volatile MEM, copy it into a register.  */
4354 
4355   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4356     op0 = force_reg (compute_mode, op0);
4357   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4358     op1 = force_reg (compute_mode, op1);
4359 
4360   /* If we need the remainder or if OP1 is constant, we need to
4361      put OP0 in a register in case it has any queued subexpressions.  */
4362   if (rem_flag || op1_is_constant)
4363     op0 = force_reg (compute_mode, op0);
4364 
4365   last = get_last_insn ();
4366 
4367   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4368   if (unsignedp)
4369     {
4370       if (code == FLOOR_DIV_EXPR)
4371 	code = TRUNC_DIV_EXPR;
4372       if (code == FLOOR_MOD_EXPR)
4373 	code = TRUNC_MOD_EXPR;
4374       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4375 	code = TRUNC_DIV_EXPR;
4376     }
4377 
4378   if (op1 != const0_rtx)
4379     switch (code)
4380       {
4381       case TRUNC_MOD_EXPR:
4382       case TRUNC_DIV_EXPR:
4383 	if (op1_is_constant)
4384 	  {
4385 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4386 	    int size = GET_MODE_BITSIZE (int_mode);
4387 	    if (unsignedp)
4388 	      {
4389 		unsigned HOST_WIDE_INT mh, ml;
4390 		int pre_shift, post_shift;
4391 		int dummy;
4392 		wide_int wd = rtx_mode_t (op1, int_mode);
4393 		unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4394 
4395 		if (wi::popcount (wd) == 1)
4396 		  {
4397 		    pre_shift = floor_log2 (d);
4398 		    if (rem_flag)
4399 		      {
4400 			unsigned HOST_WIDE_INT mask
4401 			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4402 			remainder
4403 			  = expand_binop (int_mode, and_optab, op0,
4404 					  gen_int_mode (mask, int_mode),
4405 					  remainder, 1,
4406 					  OPTAB_LIB_WIDEN);
4407 			if (remainder)
4408 			  return gen_lowpart (mode, remainder);
4409 		      }
4410 		    quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4411 					     pre_shift, tquotient, 1);
4412 		  }
4413 		else if (size <= HOST_BITS_PER_WIDE_INT)
4414 		  {
4415 		    if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4416 		      {
4417 			/* Most significant bit of divisor is set; emit an scc
4418 			   insn.  */
4419 			quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4420 							  int_mode, 1, 1);
4421 		      }
4422 		    else
4423 		      {
4424 			/* Find a suitable multiplier and right shift count
4425 			   instead of multiplying with D.  */
4426 
4427 			mh = choose_multiplier (d, size, size,
4428 						&ml, &post_shift, &dummy);
4429 
4430 			/* If the suggested multiplier is more than SIZE bits,
4431 			   we can do better for even divisors, using an
4432 			   initial right shift.  */
4433 			if (mh != 0 && (d & 1) == 0)
4434 			  {
4435 			    pre_shift = ctz_or_zero (d);
4436 			    mh = choose_multiplier (d >> pre_shift, size,
4437 						    size - pre_shift,
4438 						    &ml, &post_shift, &dummy);
4439 			    gcc_assert (!mh);
4440 			  }
4441 			else
4442 			  pre_shift = 0;
4443 
4444 			if (mh != 0)
4445 			  {
4446 			    rtx t1, t2, t3, t4;
4447 
4448 			    if (post_shift - 1 >= BITS_PER_WORD)
4449 			      goto fail1;
4450 
4451 			    extra_cost
4452 			      = (shift_cost (speed, int_mode, post_shift - 1)
4453 				 + shift_cost (speed, int_mode, 1)
4454 				 + 2 * add_cost (speed, int_mode));
4455 			    t1 = expmed_mult_highpart
4456 			      (int_mode, op0, gen_int_mode (ml, int_mode),
4457 			       NULL_RTX, 1, max_cost - extra_cost);
4458 			    if (t1 == 0)
4459 			      goto fail1;
4460 			    t2 = force_operand (gen_rtx_MINUS (int_mode,
4461 							       op0, t1),
4462 						NULL_RTX);
4463 			    t3 = expand_shift (RSHIFT_EXPR, int_mode,
4464 					       t2, 1, NULL_RTX, 1);
4465 			    t4 = force_operand (gen_rtx_PLUS (int_mode,
4466 							      t1, t3),
4467 						NULL_RTX);
4468 			    quotient = expand_shift
4469 			      (RSHIFT_EXPR, int_mode, t4,
4470 			       post_shift - 1, tquotient, 1);
4471 			  }
4472 			else
4473 			  {
4474 			    rtx t1, t2;
4475 
4476 			    if (pre_shift >= BITS_PER_WORD
4477 				|| post_shift >= BITS_PER_WORD)
4478 			      goto fail1;
4479 
4480 			    t1 = expand_shift
4481 			      (RSHIFT_EXPR, int_mode, op0,
4482 			       pre_shift, NULL_RTX, 1);
4483 			    extra_cost
4484 			      = (shift_cost (speed, int_mode, pre_shift)
4485 				 + shift_cost (speed, int_mode, post_shift));
4486 			    t2 = expmed_mult_highpart
4487 			      (int_mode, t1,
4488 			       gen_int_mode (ml, int_mode),
4489 			       NULL_RTX, 1, max_cost - extra_cost);
4490 			    if (t2 == 0)
4491 			      goto fail1;
4492 			    quotient = expand_shift
4493 			      (RSHIFT_EXPR, int_mode, t2,
4494 			       post_shift, tquotient, 1);
4495 			  }
4496 		      }
4497 		  }
4498 		else		/* Too wide mode to use tricky code */
4499 		  break;
4500 
4501 		insn = get_last_insn ();
4502 		if (insn != last)
4503 		  set_dst_reg_note (insn, REG_EQUAL,
4504 				    gen_rtx_UDIV (int_mode, op0, op1),
4505 				    quotient);
4506 	      }
4507 	    else		/* TRUNC_DIV, signed */
4508 	      {
4509 		unsigned HOST_WIDE_INT ml;
4510 		int lgup, post_shift;
4511 		rtx mlr;
4512 		HOST_WIDE_INT d = INTVAL (op1);
4513 		unsigned HOST_WIDE_INT abs_d;
4514 
4515 		/* Not prepared to handle division/remainder by
4516 		   0xffffffffffffffff8000000000000000 etc.  */
4517 		if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4518 		  break;
4519 
4520 		/* Since d might be INT_MIN, we have to cast to
4521 		   unsigned HOST_WIDE_INT before negating to avoid
4522 		   undefined signed overflow.  */
4523 		abs_d = (d >= 0
4524 			 ? (unsigned HOST_WIDE_INT) d
4525 			 : - (unsigned HOST_WIDE_INT) d);
4526 
4527 		/* n rem d = n rem -d */
4528 		if (rem_flag && d < 0)
4529 		  {
4530 		    d = abs_d;
4531 		    op1 = gen_int_mode (abs_d, int_mode);
4532 		  }
4533 
4534 		if (d == 1)
4535 		  quotient = op0;
4536 		else if (d == -1)
4537 		  quotient = expand_unop (int_mode, neg_optab, op0,
4538 					  tquotient, 0);
4539 		else if (size <= HOST_BITS_PER_WIDE_INT
4540 			 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4541 		  {
4542 		    /* This case is not handled correctly below.  */
4543 		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4544 						int_mode, 1, 1);
4545 		    if (quotient == 0)
4546 		      goto fail1;
4547 		  }
4548 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4549 			 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4550 			 && (rem_flag
4551 			     ? smod_pow2_cheap (speed, int_mode)
4552 			     : sdiv_pow2_cheap (speed, int_mode))
4553 			 /* We assume that cheap metric is true if the
4554 			    optab has an expander for this mode.  */
4555 			 && ((optab_handler ((rem_flag ? smod_optab
4556 					      : sdiv_optab),
4557 					     int_mode)
4558 			      != CODE_FOR_nothing)
4559 			     || (optab_handler (sdivmod_optab, int_mode)
4560 				 != CODE_FOR_nothing)))
4561 		  ;
4562 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4563 		  {
4564 		    if (rem_flag)
4565 		      {
4566 			remainder = expand_smod_pow2 (int_mode, op0, d);
4567 			if (remainder)
4568 			  return gen_lowpart (mode, remainder);
4569 		      }
4570 
4571 		    if (sdiv_pow2_cheap (speed, int_mode)
4572 			&& ((optab_handler (sdiv_optab, int_mode)
4573 			     != CODE_FOR_nothing)
4574 			    || (optab_handler (sdivmod_optab, int_mode)
4575 				!= CODE_FOR_nothing)))
4576 		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4577 						int_mode, op0,
4578 						gen_int_mode (abs_d,
4579 							      int_mode),
4580 						NULL_RTX, 0);
4581 		    else
4582 		      quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4583 
4584 		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4585 		       negate the quotient.  */
4586 		    if (d < 0)
4587 		      {
4588 			insn = get_last_insn ();
4589 			if (insn != last
4590 			    && abs_d < (HOST_WIDE_INT_1U
4591 					<< (HOST_BITS_PER_WIDE_INT - 1)))
4592 			  set_dst_reg_note (insn, REG_EQUAL,
4593 					    gen_rtx_DIV (int_mode, op0,
4594 							 gen_int_mode
4595 							   (abs_d,
4596 							    int_mode)),
4597 					    quotient);
4598 
4599 			quotient = expand_unop (int_mode, neg_optab,
4600 						quotient, quotient, 0);
4601 		      }
4602 		  }
4603 		else if (size <= HOST_BITS_PER_WIDE_INT)
4604 		  {
4605 		    choose_multiplier (abs_d, size, size - 1,
4606 				       &ml, &post_shift, &lgup);
4607 		    if (ml < HOST_WIDE_INT_1U << (size - 1))
4608 		      {
4609 			rtx t1, t2, t3;
4610 
4611 			if (post_shift >= BITS_PER_WORD
4612 			    || size - 1 >= BITS_PER_WORD)
4613 			  goto fail1;
4614 
4615 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4616 				      + shift_cost (speed, int_mode, size - 1)
4617 				      + add_cost (speed, int_mode));
4618 			t1 = expmed_mult_highpart
4619 			  (int_mode, op0, gen_int_mode (ml, int_mode),
4620 			   NULL_RTX, 0, max_cost - extra_cost);
4621 			if (t1 == 0)
4622 			  goto fail1;
4623 			t2 = expand_shift
4624 			  (RSHIFT_EXPR, int_mode, t1,
4625 			   post_shift, NULL_RTX, 0);
4626 			t3 = expand_shift
4627 			  (RSHIFT_EXPR, int_mode, op0,
4628 			   size - 1, NULL_RTX, 0);
4629 			if (d < 0)
4630 			  quotient
4631 			    = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4632 					     tquotient);
4633 			else
4634 			  quotient
4635 			    = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4636 					     tquotient);
4637 		      }
4638 		    else
4639 		      {
4640 			rtx t1, t2, t3, t4;
4641 
4642 			if (post_shift >= BITS_PER_WORD
4643 			    || size - 1 >= BITS_PER_WORD)
4644 			  goto fail1;
4645 
4646 			ml |= HOST_WIDE_INT_M1U << (size - 1);
4647 			mlr = gen_int_mode (ml, int_mode);
4648 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4649 				      + shift_cost (speed, int_mode, size - 1)
4650 				      + 2 * add_cost (speed, int_mode));
4651 			t1 = expmed_mult_highpart (int_mode, op0, mlr,
4652 						   NULL_RTX, 0,
4653 						   max_cost - extra_cost);
4654 			if (t1 == 0)
4655 			  goto fail1;
4656 			t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4657 					    NULL_RTX);
4658 			t3 = expand_shift
4659 			  (RSHIFT_EXPR, int_mode, t2,
4660 			   post_shift, NULL_RTX, 0);
4661 			t4 = expand_shift
4662 			  (RSHIFT_EXPR, int_mode, op0,
4663 			   size - 1, NULL_RTX, 0);
4664 			if (d < 0)
4665 			  quotient
4666 			    = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4667 					     tquotient);
4668 			else
4669 			  quotient
4670 			    = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4671 					     tquotient);
4672 		      }
4673 		  }
4674 		else		/* Too wide mode to use tricky code */
4675 		  break;
4676 
4677 		insn = get_last_insn ();
4678 		if (insn != last)
4679 		  set_dst_reg_note (insn, REG_EQUAL,
4680 				    gen_rtx_DIV (int_mode, op0, op1),
4681 				    quotient);
4682 	      }
4683 	    break;
4684 	  }
4685       fail1:
4686 	delete_insns_since (last);
4687 	break;
4688 
4689       case FLOOR_DIV_EXPR:
4690       case FLOOR_MOD_EXPR:
4691       /* We will come here only for signed operations.  */
4692 	if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4693 	  {
4694 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4695 	    int size = GET_MODE_BITSIZE (int_mode);
4696 	    unsigned HOST_WIDE_INT mh, ml;
4697 	    int pre_shift, lgup, post_shift;
4698 	    HOST_WIDE_INT d = INTVAL (op1);
4699 
4700 	    if (d > 0)
4701 	      {
4702 		/* We could just as easily deal with negative constants here,
4703 		   but it does not seem worth the trouble for GCC 2.6.  */
4704 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4705 		  {
4706 		    pre_shift = floor_log2 (d);
4707 		    if (rem_flag)
4708 		      {
4709 			unsigned HOST_WIDE_INT mask
4710 			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4711 			remainder = expand_binop
4712 			  (int_mode, and_optab, op0,
4713 			   gen_int_mode (mask, int_mode),
4714 			   remainder, 0, OPTAB_LIB_WIDEN);
4715 			if (remainder)
4716 			  return gen_lowpart (mode, remainder);
4717 		      }
4718 		    quotient = expand_shift
4719 		      (RSHIFT_EXPR, int_mode, op0,
4720 		       pre_shift, tquotient, 0);
4721 		  }
4722 		else
4723 		  {
4724 		    rtx t1, t2, t3, t4;
4725 
4726 		    mh = choose_multiplier (d, size, size - 1,
4727 					    &ml, &post_shift, &lgup);
4728 		    gcc_assert (!mh);
4729 
4730 		    if (post_shift < BITS_PER_WORD
4731 			&& size - 1 < BITS_PER_WORD)
4732 		      {
4733 			t1 = expand_shift
4734 			  (RSHIFT_EXPR, int_mode, op0,
4735 			   size - 1, NULL_RTX, 0);
4736 			t2 = expand_binop (int_mode, xor_optab, op0, t1,
4737 					   NULL_RTX, 0, OPTAB_WIDEN);
4738 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4739 				      + shift_cost (speed, int_mode, size - 1)
4740 				      + 2 * add_cost (speed, int_mode));
4741 			t3 = expmed_mult_highpart
4742 			  (int_mode, t2, gen_int_mode (ml, int_mode),
4743 			   NULL_RTX, 1, max_cost - extra_cost);
4744 			if (t3 != 0)
4745 			  {
4746 			    t4 = expand_shift
4747 			      (RSHIFT_EXPR, int_mode, t3,
4748 			       post_shift, NULL_RTX, 1);
4749 			    quotient = expand_binop (int_mode, xor_optab,
4750 						     t4, t1, tquotient, 0,
4751 						     OPTAB_WIDEN);
4752 			  }
4753 		      }
4754 		  }
4755 	      }
4756 	    else
4757 	      {
4758 		rtx nsign, t1, t2, t3, t4;
4759 		t1 = force_operand (gen_rtx_PLUS (int_mode,
4760 						  op0, constm1_rtx), NULL_RTX);
4761 		t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4762 				   0, OPTAB_WIDEN);
4763 		nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4764 				      size - 1, NULL_RTX, 0);
4765 		t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4766 				    NULL_RTX);
4767 		t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4768 				    NULL_RTX, 0);
4769 		if (t4)
4770 		  {
4771 		    rtx t5;
4772 		    t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4773 				      NULL_RTX, 0);
4774 		    quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4775 					      tquotient);
4776 		  }
4777 	      }
4778 	  }
4779 
4780 	if (quotient != 0)
4781 	  break;
4782 	delete_insns_since (last);
4783 
4784 	/* Try using an instruction that produces both the quotient and
4785 	   remainder, using truncation.  We can easily compensate the quotient
4786 	   or remainder to get floor rounding, once we have the remainder.
4787 	   Notice that we compute also the final remainder value here,
4788 	   and return the result right away.  */
4789 	if (target == 0 || GET_MODE (target) != compute_mode)
4790 	  target = gen_reg_rtx (compute_mode);
4791 
4792 	if (rem_flag)
4793 	  {
4794 	    remainder
4795 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4796 	    quotient = gen_reg_rtx (compute_mode);
4797 	  }
4798 	else
4799 	  {
4800 	    quotient
4801 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4802 	    remainder = gen_reg_rtx (compute_mode);
4803 	  }
4804 
4805 	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4806 				 quotient, remainder, 0))
4807 	  {
4808 	    /* This could be computed with a branch-less sequence.
4809 	       Save that for later.  */
4810 	    rtx tem;
4811 	    rtx_code_label *label = gen_label_rtx ();
4812 	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4813 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4814 				NULL_RTX, 0, OPTAB_WIDEN);
4815 	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4816 	    expand_dec (quotient, const1_rtx);
4817 	    expand_inc (remainder, op1);
4818 	    emit_label (label);
4819 	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4820 	  }
4821 
4822 	/* No luck with division elimination or divmod.  Have to do it
4823 	   by conditionally adjusting op0 *and* the result.  */
4824 	{
4825 	  rtx_code_label *label1, *label2, *label3, *label4, *label5;
4826 	  rtx adjusted_op0;
4827 	  rtx tem;
4828 
4829 	  quotient = gen_reg_rtx (compute_mode);
4830 	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4831 	  label1 = gen_label_rtx ();
4832 	  label2 = gen_label_rtx ();
4833 	  label3 = gen_label_rtx ();
4834 	  label4 = gen_label_rtx ();
4835 	  label5 = gen_label_rtx ();
4836 	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4837 	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4838 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4839 			      quotient, 0, OPTAB_LIB_WIDEN);
4840 	  if (tem != quotient)
4841 	    emit_move_insn (quotient, tem);
4842 	  emit_jump_insn (targetm.gen_jump (label5));
4843 	  emit_barrier ();
4844 	  emit_label (label1);
4845 	  expand_inc (adjusted_op0, const1_rtx);
4846 	  emit_jump_insn (targetm.gen_jump (label4));
4847 	  emit_barrier ();
4848 	  emit_label (label2);
4849 	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4850 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4851 			      quotient, 0, OPTAB_LIB_WIDEN);
4852 	  if (tem != quotient)
4853 	    emit_move_insn (quotient, tem);
4854 	  emit_jump_insn (targetm.gen_jump (label5));
4855 	  emit_barrier ();
4856 	  emit_label (label3);
4857 	  expand_dec (adjusted_op0, const1_rtx);
4858 	  emit_label (label4);
4859 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4860 			      quotient, 0, OPTAB_LIB_WIDEN);
4861 	  if (tem != quotient)
4862 	    emit_move_insn (quotient, tem);
4863 	  expand_dec (quotient, const1_rtx);
4864 	  emit_label (label5);
4865 	}
4866 	break;
4867 
4868       case CEIL_DIV_EXPR:
4869       case CEIL_MOD_EXPR:
4870 	if (unsignedp)
4871 	  {
4872 	    if (op1_is_constant
4873 		&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4874 		&& (HWI_COMPUTABLE_MODE_P (compute_mode)
4875 		    || INTVAL (op1) >= 0))
4876 	      {
4877 		scalar_int_mode int_mode
4878 		  = as_a <scalar_int_mode> (compute_mode);
4879 		rtx t1, t2, t3;
4880 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4881 		t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4882 				   floor_log2 (d), tquotient, 1);
4883 		t2 = expand_binop (int_mode, and_optab, op0,
4884 				   gen_int_mode (d - 1, int_mode),
4885 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4886 		t3 = gen_reg_rtx (int_mode);
4887 		t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4888 		if (t3 == 0)
4889 		  {
4890 		    rtx_code_label *lab;
4891 		    lab = gen_label_rtx ();
4892 		    do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4893 		    expand_inc (t1, const1_rtx);
4894 		    emit_label (lab);
4895 		    quotient = t1;
4896 		  }
4897 		else
4898 		  quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4899 					    tquotient);
4900 		break;
4901 	      }
4902 
4903 	    /* Try using an instruction that produces both the quotient and
4904 	       remainder, using truncation.  We can easily compensate the
4905 	       quotient or remainder to get ceiling rounding, once we have the
4906 	       remainder.  Notice that we compute also the final remainder
4907 	       value here, and return the result right away.  */
4908 	    if (target == 0 || GET_MODE (target) != compute_mode)
4909 	      target = gen_reg_rtx (compute_mode);
4910 
4911 	    if (rem_flag)
4912 	      {
4913 		remainder = (REG_P (target)
4914 			     ? target : gen_reg_rtx (compute_mode));
4915 		quotient = gen_reg_rtx (compute_mode);
4916 	      }
4917 	    else
4918 	      {
4919 		quotient = (REG_P (target)
4920 			    ? target : gen_reg_rtx (compute_mode));
4921 		remainder = gen_reg_rtx (compute_mode);
4922 	      }
4923 
4924 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4925 				     remainder, 1))
4926 	      {
4927 		/* This could be computed with a branch-less sequence.
4928 		   Save that for later.  */
4929 		rtx_code_label *label = gen_label_rtx ();
4930 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4931 				 compute_mode, label);
4932 		expand_inc (quotient, const1_rtx);
4933 		expand_dec (remainder, op1);
4934 		emit_label (label);
4935 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4936 	      }
4937 
4938 	    /* No luck with division elimination or divmod.  Have to do it
4939 	       by conditionally adjusting op0 *and* the result.  */
4940 	    {
4941 	      rtx_code_label *label1, *label2;
4942 	      rtx adjusted_op0, tem;
4943 
4944 	      quotient = gen_reg_rtx (compute_mode);
4945 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4946 	      label1 = gen_label_rtx ();
4947 	      label2 = gen_label_rtx ();
4948 	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4949 			       compute_mode, label1);
4950 	      emit_move_insn  (quotient, const0_rtx);
4951 	      emit_jump_insn (targetm.gen_jump (label2));
4952 	      emit_barrier ();
4953 	      emit_label (label1);
4954 	      expand_dec (adjusted_op0, const1_rtx);
4955 	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4956 				  quotient, 1, OPTAB_LIB_WIDEN);
4957 	      if (tem != quotient)
4958 		emit_move_insn (quotient, tem);
4959 	      expand_inc (quotient, const1_rtx);
4960 	      emit_label (label2);
4961 	    }
4962 	  }
4963 	else /* signed */
4964 	  {
4965 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4966 		&& INTVAL (op1) >= 0)
4967 	      {
4968 		/* This is extremely similar to the code for the unsigned case
4969 		   above.  For 2.7 we should merge these variants, but for
4970 		   2.6.1 I don't want to touch the code for unsigned since that
4971 		   get used in C.  The signed case will only be used by other
4972 		   languages (Ada).  */
4973 
4974 		rtx t1, t2, t3;
4975 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4976 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4977 				   floor_log2 (d), tquotient, 0);
4978 		t2 = expand_binop (compute_mode, and_optab, op0,
4979 				   gen_int_mode (d - 1, compute_mode),
4980 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4981 		t3 = gen_reg_rtx (compute_mode);
4982 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4983 				      compute_mode, 1, 1);
4984 		if (t3 == 0)
4985 		  {
4986 		    rtx_code_label *lab;
4987 		    lab = gen_label_rtx ();
4988 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4989 		    expand_inc (t1, const1_rtx);
4990 		    emit_label (lab);
4991 		    quotient = t1;
4992 		  }
4993 		else
4994 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4995 							  t1, t3),
4996 					    tquotient);
4997 		break;
4998 	      }
4999 
5000 	    /* Try using an instruction that produces both the quotient and
5001 	       remainder, using truncation.  We can easily compensate the
5002 	       quotient or remainder to get ceiling rounding, once we have the
5003 	       remainder.  Notice that we compute also the final remainder
5004 	       value here, and return the result right away.  */
5005 	    if (target == 0 || GET_MODE (target) != compute_mode)
5006 	      target = gen_reg_rtx (compute_mode);
5007 	    if (rem_flag)
5008 	      {
5009 		remainder= (REG_P (target)
5010 			    ? target : gen_reg_rtx (compute_mode));
5011 		quotient = gen_reg_rtx (compute_mode);
5012 	      }
5013 	    else
5014 	      {
5015 		quotient = (REG_P (target)
5016 			    ? target : gen_reg_rtx (compute_mode));
5017 		remainder = gen_reg_rtx (compute_mode);
5018 	      }
5019 
5020 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5021 				     remainder, 0))
5022 	      {
5023 		/* This could be computed with a branch-less sequence.
5024 		   Save that for later.  */
5025 		rtx tem;
5026 		rtx_code_label *label = gen_label_rtx ();
5027 		do_cmp_and_jump (remainder, const0_rtx, EQ,
5028 				 compute_mode, label);
5029 		tem = expand_binop (compute_mode, xor_optab, op0, op1,
5030 				    NULL_RTX, 0, OPTAB_WIDEN);
5031 		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5032 		expand_inc (quotient, const1_rtx);
5033 		expand_dec (remainder, op1);
5034 		emit_label (label);
5035 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
5036 	      }
5037 
5038 	    /* No luck with division elimination or divmod.  Have to do it
5039 	       by conditionally adjusting op0 *and* the result.  */
5040 	    {
5041 	      rtx_code_label *label1, *label2, *label3, *label4, *label5;
5042 	      rtx adjusted_op0;
5043 	      rtx tem;
5044 
5045 	      quotient = gen_reg_rtx (compute_mode);
5046 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5047 	      label1 = gen_label_rtx ();
5048 	      label2 = gen_label_rtx ();
5049 	      label3 = gen_label_rtx ();
5050 	      label4 = gen_label_rtx ();
5051 	      label5 = gen_label_rtx ();
5052 	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5053 	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5054 			       compute_mode, label1);
5055 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5056 				  quotient, 0, OPTAB_LIB_WIDEN);
5057 	      if (tem != quotient)
5058 		emit_move_insn (quotient, tem);
5059 	      emit_jump_insn (targetm.gen_jump (label5));
5060 	      emit_barrier ();
5061 	      emit_label (label1);
5062 	      expand_dec (adjusted_op0, const1_rtx);
5063 	      emit_jump_insn (targetm.gen_jump (label4));
5064 	      emit_barrier ();
5065 	      emit_label (label2);
5066 	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5067 			       compute_mode, label3);
5068 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5069 				  quotient, 0, OPTAB_LIB_WIDEN);
5070 	      if (tem != quotient)
5071 		emit_move_insn (quotient, tem);
5072 	      emit_jump_insn (targetm.gen_jump (label5));
5073 	      emit_barrier ();
5074 	      emit_label (label3);
5075 	      expand_inc (adjusted_op0, const1_rtx);
5076 	      emit_label (label4);
5077 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5078 				  quotient, 0, OPTAB_LIB_WIDEN);
5079 	      if (tem != quotient)
5080 		emit_move_insn (quotient, tem);
5081 	      expand_inc (quotient, const1_rtx);
5082 	      emit_label (label5);
5083 	    }
5084 	  }
5085 	break;
5086 
5087       case EXACT_DIV_EXPR:
5088 	if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5089 	  {
5090 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5091 	    int size = GET_MODE_BITSIZE (int_mode);
5092 	    HOST_WIDE_INT d = INTVAL (op1);
5093 	    unsigned HOST_WIDE_INT ml;
5094 	    int pre_shift;
5095 	    rtx t1;
5096 
5097 	    pre_shift = ctz_or_zero (d);
5098 	    ml = invert_mod2n (d >> pre_shift, size);
5099 	    t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5100 			       pre_shift, NULL_RTX, unsignedp);
5101 	    quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5102 				    NULL_RTX, 1);
5103 
5104 	    insn = get_last_insn ();
5105 	    set_dst_reg_note (insn, REG_EQUAL,
5106 			      gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5107 					      int_mode, op0, op1),
5108 			      quotient);
5109 	  }
5110 	break;
5111 
5112       case ROUND_DIV_EXPR:
5113       case ROUND_MOD_EXPR:
5114 	if (unsignedp)
5115 	  {
5116 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5117 	    rtx tem;
5118 	    rtx_code_label *label;
5119 	    label = gen_label_rtx ();
5120 	    quotient = gen_reg_rtx (int_mode);
5121 	    remainder = gen_reg_rtx (int_mode);
5122 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5123 	      {
5124 		rtx tem;
5125 		quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5126 					 quotient, 1, OPTAB_LIB_WIDEN);
5127 		tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5128 		remainder = expand_binop (int_mode, sub_optab, op0, tem,
5129 					  remainder, 1, OPTAB_LIB_WIDEN);
5130 	      }
5131 	    tem = plus_constant (int_mode, op1, -1);
5132 	    tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5133 	    do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5134 	    expand_inc (quotient, const1_rtx);
5135 	    expand_dec (remainder, op1);
5136 	    emit_label (label);
5137 	  }
5138 	else
5139 	  {
5140 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5141 	    int size = GET_MODE_BITSIZE (int_mode);
5142 	    rtx abs_rem, abs_op1, tem, mask;
5143 	    rtx_code_label *label;
5144 	    label = gen_label_rtx ();
5145 	    quotient = gen_reg_rtx (int_mode);
5146 	    remainder = gen_reg_rtx (int_mode);
5147 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5148 	      {
5149 		rtx tem;
5150 		quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5151 					 quotient, 0, OPTAB_LIB_WIDEN);
5152 		tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5153 		remainder = expand_binop (int_mode, sub_optab, op0, tem,
5154 					  remainder, 0, OPTAB_LIB_WIDEN);
5155 	      }
5156 	    abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5157 	    abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5158 	    tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5159 				1, NULL_RTX, 1);
5160 	    do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5161 	    tem = expand_binop (int_mode, xor_optab, op0, op1,
5162 				NULL_RTX, 0, OPTAB_WIDEN);
5163 	    mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5164 				 size - 1, NULL_RTX, 0);
5165 	    tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5166 				NULL_RTX, 0, OPTAB_WIDEN);
5167 	    tem = expand_binop (int_mode, sub_optab, tem, mask,
5168 				NULL_RTX, 0, OPTAB_WIDEN);
5169 	    expand_inc (quotient, tem);
5170 	    tem = expand_binop (int_mode, xor_optab, mask, op1,
5171 				NULL_RTX, 0, OPTAB_WIDEN);
5172 	    tem = expand_binop (int_mode, sub_optab, tem, mask,
5173 				NULL_RTX, 0, OPTAB_WIDEN);
5174 	    expand_dec (remainder, tem);
5175 	    emit_label (label);
5176 	  }
5177 	return gen_lowpart (mode, rem_flag ? remainder : quotient);
5178 
5179       default:
5180 	gcc_unreachable ();
5181       }
5182 
5183   if (quotient == 0)
5184     {
5185       if (target && GET_MODE (target) != compute_mode)
5186 	target = 0;
5187 
5188       if (rem_flag)
5189 	{
5190 	  /* Try to produce the remainder without producing the quotient.
5191 	     If we seem to have a divmod pattern that does not require widening,
5192 	     don't try widening here.  We should really have a WIDEN argument
5193 	     to expand_twoval_binop, since what we'd really like to do here is
5194 	     1) try a mod insn in compute_mode
5195 	     2) try a divmod insn in compute_mode
5196 	     3) try a div insn in compute_mode and multiply-subtract to get
5197 	        remainder
5198 	     4) try the same things with widening allowed.  */
5199 	  remainder
5200 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5201 				 op0, op1, target,
5202 				 unsignedp,
5203 				 ((optab_handler (optab2, compute_mode)
5204 				   != CODE_FOR_nothing)
5205 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
5206 	  if (remainder == 0)
5207 	    {
5208 	      /* No luck there.  Can we do remainder and divide at once
5209 		 without a library call?  */
5210 	      remainder = gen_reg_rtx (compute_mode);
5211 	      if (! expand_twoval_binop ((unsignedp
5212 					  ? udivmod_optab
5213 					  : sdivmod_optab),
5214 					 op0, op1,
5215 					 NULL_RTX, remainder, unsignedp))
5216 		remainder = 0;
5217 	    }
5218 
5219 	  if (remainder)
5220 	    return gen_lowpart (mode, remainder);
5221 	}
5222 
5223       /* Produce the quotient.  Try a quotient insn, but not a library call.
5224 	 If we have a divmod in this mode, use it in preference to widening
5225 	 the div (for this test we assume it will not fail). Note that optab2
5226 	 is set to the one of the two optabs that the call below will use.  */
5227       quotient
5228 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5229 			     op0, op1, rem_flag ? NULL_RTX : target,
5230 			     unsignedp,
5231 			     ((optab_handler (optab2, compute_mode)
5232 			       != CODE_FOR_nothing)
5233 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
5234 
5235       if (quotient == 0)
5236 	{
5237 	  /* No luck there.  Try a quotient-and-remainder insn,
5238 	     keeping the quotient alone.  */
5239 	  quotient = gen_reg_rtx (compute_mode);
5240 	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5241 				     op0, op1,
5242 				     quotient, NULL_RTX, unsignedp))
5243 	    {
5244 	      quotient = 0;
5245 	      if (! rem_flag)
5246 		/* Still no luck.  If we are not computing the remainder,
5247 		   use a library call for the quotient.  */
5248 		quotient = sign_expand_binop (compute_mode,
5249 					      udiv_optab, sdiv_optab,
5250 					      op0, op1, target,
5251 					      unsignedp, OPTAB_LIB_WIDEN);
5252 	    }
5253 	}
5254     }
5255 
5256   if (rem_flag)
5257     {
5258       if (target && GET_MODE (target) != compute_mode)
5259 	target = 0;
5260 
5261       if (quotient == 0)
5262 	{
5263 	  /* No divide instruction either.  Use library for remainder.  */
5264 	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5265 					 op0, op1, target,
5266 					 unsignedp, OPTAB_LIB_WIDEN);
5267 	  /* No remainder function.  Try a quotient-and-remainder
5268 	     function, keeping the remainder.  */
5269 	  if (!remainder)
5270 	    {
5271 	      remainder = gen_reg_rtx (compute_mode);
5272 	      if (!expand_twoval_binop_libfunc
5273 		  (unsignedp ? udivmod_optab : sdivmod_optab,
5274 		   op0, op1,
5275 		   NULL_RTX, remainder,
5276 		   unsignedp ? UMOD : MOD))
5277 		remainder = NULL_RTX;
5278 	    }
5279 	}
5280       else
5281 	{
5282 	  /* We divided.  Now finish doing X - Y * (X / Y).  */
5283 	  remainder = expand_mult (compute_mode, quotient, op1,
5284 				   NULL_RTX, unsignedp);
5285 	  remainder = expand_binop (compute_mode, sub_optab, op0,
5286 				    remainder, target, unsignedp,
5287 				    OPTAB_LIB_WIDEN);
5288 	}
5289     }
5290 
5291   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5292 }
5293 
5294 /* Return a tree node with data type TYPE, describing the value of X.
5295    Usually this is an VAR_DECL, if there is no obvious better choice.
5296    X may be an expression, however we only support those expressions
5297    generated by loop.c.  */
5298 
5299 tree
make_tree(tree type,rtx x)5300 make_tree (tree type, rtx x)
5301 {
5302   tree t;
5303 
5304   switch (GET_CODE (x))
5305     {
5306     case CONST_INT:
5307     case CONST_WIDE_INT:
5308       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5309       return t;
5310 
5311     case CONST_DOUBLE:
5312       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5313       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5314 	t = wide_int_to_tree (type,
5315 			      wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5316 						    HOST_BITS_PER_WIDE_INT * 2));
5317       else
5318 	t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5319 
5320       return t;
5321 
5322     case CONST_VECTOR:
5323       {
5324 	unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5325 	unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5326 	tree itype = TREE_TYPE (type);
5327 
5328 	/* Build a tree with vector elements.  */
5329 	tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5330 	unsigned int count = elts.encoded_nelts ();
5331 	for (unsigned int i = 0; i < count; ++i)
5332 	  {
5333 	    rtx elt = CONST_VECTOR_ELT (x, i);
5334 	    elts.quick_push (make_tree (itype, elt));
5335 	  }
5336 
5337 	return elts.build ();
5338       }
5339 
5340     case PLUS:
5341       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5342 			  make_tree (type, XEXP (x, 1)));
5343 
5344     case MINUS:
5345       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5346 			  make_tree (type, XEXP (x, 1)));
5347 
5348     case NEG:
5349       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5350 
5351     case MULT:
5352       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5353 			  make_tree (type, XEXP (x, 1)));
5354 
5355     case ASHIFT:
5356       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5357 			  make_tree (type, XEXP (x, 1)));
5358 
5359     case LSHIFTRT:
5360       t = unsigned_type_for (type);
5361       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5362 			    		 make_tree (t, XEXP (x, 0)),
5363 				    	 make_tree (type, XEXP (x, 1))));
5364 
5365     case ASHIFTRT:
5366       t = signed_type_for (type);
5367       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5368 					 make_tree (t, XEXP (x, 0)),
5369 				    	 make_tree (type, XEXP (x, 1))));
5370 
5371     case DIV:
5372       if (TREE_CODE (type) != REAL_TYPE)
5373 	t = signed_type_for (type);
5374       else
5375 	t = type;
5376 
5377       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5378 				    	 make_tree (t, XEXP (x, 0)),
5379 				    	 make_tree (t, XEXP (x, 1))));
5380     case UDIV:
5381       t = unsigned_type_for (type);
5382       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5383 				    	 make_tree (t, XEXP (x, 0)),
5384 				    	 make_tree (t, XEXP (x, 1))));
5385 
5386     case SIGN_EXTEND:
5387     case ZERO_EXTEND:
5388       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5389 					  GET_CODE (x) == ZERO_EXTEND);
5390       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5391 
5392     case CONST:
5393       return make_tree (type, XEXP (x, 0));
5394 
5395     case SYMBOL_REF:
5396       t = SYMBOL_REF_DECL (x);
5397       if (t)
5398 	return fold_convert (type, build_fold_addr_expr (t));
5399       /* fall through.  */
5400 
5401     default:
5402       if (CONST_POLY_INT_P (x))
5403 	return wide_int_to_tree (t, const_poly_int_value (x));
5404 
5405       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5406 
5407       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5408 	 address mode to pointer mode.  */
5409       if (POINTER_TYPE_P (type))
5410 	x = convert_memory_address_addr_space
5411 	  (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5412 
5413       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5414 	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5415       t->decl_with_rtl.rtl = x;
5416 
5417       return t;
5418     }
5419 }
5420 
5421 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5422    and returning TARGET.
5423 
5424    If TARGET is 0, a pseudo-register or constant is returned.  */
5425 
5426 rtx
expand_and(machine_mode mode,rtx op0,rtx op1,rtx target)5427 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5428 {
5429   rtx tem = 0;
5430 
5431   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5432     tem = simplify_binary_operation (AND, mode, op0, op1);
5433   if (tem == 0)
5434     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5435 
5436   if (target == 0)
5437     target = tem;
5438   else if (tem != target)
5439     emit_move_insn (target, tem);
5440   return target;
5441 }
5442 
5443 /* Helper function for emit_store_flag.  */
5444 rtx
emit_cstore(rtx target,enum insn_code icode,enum rtx_code code,machine_mode mode,machine_mode compare_mode,int unsignedp,rtx x,rtx y,int normalizep,machine_mode target_mode)5445 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5446 	     machine_mode mode, machine_mode compare_mode,
5447 	     int unsignedp, rtx x, rtx y, int normalizep,
5448 	     machine_mode target_mode)
5449 {
5450   class expand_operand ops[4];
5451   rtx op0, comparison, subtarget;
5452   rtx_insn *last;
5453   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5454   scalar_int_mode int_target_mode;
5455 
5456   last = get_last_insn ();
5457   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5458   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5459   if (!x || !y)
5460     {
5461       delete_insns_since (last);
5462       return NULL_RTX;
5463     }
5464 
5465   if (target_mode == VOIDmode)
5466     int_target_mode = result_mode;
5467   else
5468     int_target_mode = as_a <scalar_int_mode> (target_mode);
5469   if (!target)
5470     target = gen_reg_rtx (int_target_mode);
5471 
5472   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5473 
5474   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5475   create_fixed_operand (&ops[1], comparison);
5476   create_fixed_operand (&ops[2], x);
5477   create_fixed_operand (&ops[3], y);
5478   if (!maybe_expand_insn (icode, 4, ops))
5479     {
5480       delete_insns_since (last);
5481       return NULL_RTX;
5482     }
5483   subtarget = ops[0].value;
5484 
5485   /* If we are converting to a wider mode, first convert to
5486      INT_TARGET_MODE, then normalize.  This produces better combining
5487      opportunities on machines that have a SIGN_EXTRACT when we are
5488      testing a single bit.  This mostly benefits the 68k.
5489 
5490      If STORE_FLAG_VALUE does not have the sign bit set when
5491      interpreted in MODE, we can do this conversion as unsigned, which
5492      is usually more efficient.  */
5493   if (GET_MODE_PRECISION (int_target_mode) > GET_MODE_PRECISION (result_mode))
5494     {
5495       gcc_assert (GET_MODE_PRECISION (result_mode) != 1
5496 		  || STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1);
5497 
5498       bool unsignedp = (STORE_FLAG_VALUE >= 0);
5499       convert_move (target, subtarget, unsignedp);
5500 
5501       op0 = target;
5502       result_mode = int_target_mode;
5503     }
5504   else
5505     op0 = subtarget;
5506 
5507   /* If we want to keep subexpressions around, don't reuse our last
5508      target.  */
5509   if (optimize)
5510     subtarget = 0;
5511 
5512   /* Now normalize to the proper value in MODE.  Sometimes we don't
5513      have to do anything.  */
5514   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5515     ;
5516   /* STORE_FLAG_VALUE might be the most negative number, so write
5517      the comparison this way to avoid a compiler-time warning.  */
5518   else if (- normalizep == STORE_FLAG_VALUE)
5519     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5520 
5521   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5522      it hard to use a value of just the sign bit due to ANSI integer
5523      constant typing rules.  */
5524   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5525     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5526 			GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5527 			normalizep == 1);
5528   else
5529     {
5530       gcc_assert (STORE_FLAG_VALUE & 1);
5531 
5532       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5533       if (normalizep == -1)
5534 	op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5535     }
5536 
5537   /* If we were converting to a smaller mode, do the conversion now.  */
5538   if (int_target_mode != result_mode)
5539     {
5540       convert_move (target, op0, 0);
5541       return target;
5542     }
5543   else
5544     return op0;
5545 }
5546 
5547 
5548 /* A subroutine of emit_store_flag only including "tricks" that do not
5549    need a recursive call.  These are kept separate to avoid infinite
5550    loops.  */
5551 
5552 static rtx
emit_store_flag_1(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep,machine_mode target_mode)5553 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5554 		   machine_mode mode, int unsignedp, int normalizep,
5555 		   machine_mode target_mode)
5556 {
5557   rtx subtarget;
5558   enum insn_code icode;
5559   machine_mode compare_mode;
5560   enum mode_class mclass;
5561   enum rtx_code scode;
5562 
5563   if (unsignedp)
5564     code = unsigned_condition (code);
5565   scode = swap_condition (code);
5566 
5567   /* If one operand is constant, make it the second one.  Only do this
5568      if the other operand is not constant as well.  */
5569 
5570   if (swap_commutative_operands_p (op0, op1))
5571     {
5572       std::swap (op0, op1);
5573       code = swap_condition (code);
5574     }
5575 
5576   if (mode == VOIDmode)
5577     mode = GET_MODE (op0);
5578 
5579   if (CONST_SCALAR_INT_P (op1))
5580     canonicalize_comparison (mode, &code, &op1);
5581 
5582   /* For some comparisons with 1 and -1, we can convert this to
5583      comparisons with zero.  This will often produce more opportunities for
5584      store-flag insns.  */
5585 
5586   switch (code)
5587     {
5588     case LT:
5589       if (op1 == const1_rtx)
5590 	op1 = const0_rtx, code = LE;
5591       break;
5592     case LE:
5593       if (op1 == constm1_rtx)
5594 	op1 = const0_rtx, code = LT;
5595       break;
5596     case GE:
5597       if (op1 == const1_rtx)
5598 	op1 = const0_rtx, code = GT;
5599       break;
5600     case GT:
5601       if (op1 == constm1_rtx)
5602 	op1 = const0_rtx, code = GE;
5603       break;
5604     case GEU:
5605       if (op1 == const1_rtx)
5606 	op1 = const0_rtx, code = NE;
5607       break;
5608     case LTU:
5609       if (op1 == const1_rtx)
5610 	op1 = const0_rtx, code = EQ;
5611       break;
5612     default:
5613       break;
5614     }
5615 
5616   /* If we are comparing a double-word integer with zero or -1, we can
5617      convert the comparison into one involving a single word.  */
5618   scalar_int_mode int_mode;
5619   if (is_int_mode (mode, &int_mode)
5620       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5621       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5622     {
5623       rtx tem;
5624       if ((code == EQ || code == NE)
5625 	  && (op1 == const0_rtx || op1 == constm1_rtx))
5626 	{
5627 	  rtx op00, op01;
5628 
5629 	  /* Do a logical OR or AND of the two words and compare the
5630 	     result.  */
5631 	  op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5632 	  op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5633 	  tem = expand_binop (word_mode,
5634 			      op1 == const0_rtx ? ior_optab : and_optab,
5635 			      op00, op01, NULL_RTX, unsignedp,
5636 			      OPTAB_DIRECT);
5637 
5638 	  if (tem != 0)
5639 	    tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5640 				   unsignedp, normalizep);
5641 	}
5642       else if ((code == LT || code == GE) && op1 == const0_rtx)
5643 	{
5644 	  rtx op0h;
5645 
5646 	  /* If testing the sign bit, can just test on high word.  */
5647 	  op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5648 				      subreg_highpart_offset (word_mode,
5649 							      int_mode));
5650 	  tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5651 				 unsignedp, normalizep);
5652 	}
5653       else
5654 	tem = NULL_RTX;
5655 
5656       if (tem)
5657 	{
5658 	  if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5659 	    return tem;
5660 	  if (!target)
5661 	    target = gen_reg_rtx (target_mode);
5662 
5663 	  convert_move (target, tem,
5664 			!val_signbit_known_set_p (word_mode,
5665 						  (normalizep ? normalizep
5666 						   : STORE_FLAG_VALUE)));
5667 	  return target;
5668 	}
5669     }
5670 
5671   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5672      complement of A (for GE) and shifting the sign bit to the low bit.  */
5673   if (op1 == const0_rtx && (code == LT || code == GE)
5674       && is_int_mode (mode, &int_mode)
5675       && (normalizep || STORE_FLAG_VALUE == 1
5676 	  || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5677     {
5678       scalar_int_mode int_target_mode;
5679       subtarget = target;
5680 
5681       if (!target)
5682 	int_target_mode = int_mode;
5683       else
5684 	{
5685 	  /* If the result is to be wider than OP0, it is best to convert it
5686 	     first.  If it is to be narrower, it is *incorrect* to convert it
5687 	     first.  */
5688 	  int_target_mode = as_a <scalar_int_mode> (target_mode);
5689 	  if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5690 	    {
5691 	      op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5692 	      int_mode = int_target_mode;
5693 	    }
5694 	}
5695 
5696       if (int_target_mode != int_mode)
5697 	subtarget = 0;
5698 
5699       if (code == GE)
5700 	op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5701 			   ((STORE_FLAG_VALUE == 1 || normalizep)
5702 			    ? 0 : subtarget), 0);
5703 
5704       if (STORE_FLAG_VALUE == 1 || normalizep)
5705 	/* If we are supposed to produce a 0/1 value, we want to do
5706 	   a logical shift from the sign bit to the low-order bit; for
5707 	   a -1/0 value, we do an arithmetic shift.  */
5708 	op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5709 			    GET_MODE_BITSIZE (int_mode) - 1,
5710 			    subtarget, normalizep != -1);
5711 
5712       if (int_mode != int_target_mode)
5713 	op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5714 
5715       return op0;
5716     }
5717 
5718   mclass = GET_MODE_CLASS (mode);
5719   FOR_EACH_MODE_FROM (compare_mode, mode)
5720     {
5721      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5722      icode = optab_handler (cstore_optab, optab_mode);
5723      if (icode != CODE_FOR_nothing)
5724 	{
5725 	  do_pending_stack_adjust ();
5726 	  rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5727 				 unsignedp, op0, op1, normalizep, target_mode);
5728 	  if (tem)
5729 	    return tem;
5730 
5731 	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5732 	    {
5733 	      tem = emit_cstore (target, icode, scode, mode, compare_mode,
5734 				 unsignedp, op1, op0, normalizep, target_mode);
5735 	      if (tem)
5736 	        return tem;
5737 	    }
5738 	  break;
5739 	}
5740     }
5741 
5742   return 0;
5743 }
5744 
5745 /* Subroutine of emit_store_flag that handles cases in which the operands
5746    are scalar integers.  SUBTARGET is the target to use for temporary
5747    operations and TRUEVAL is the value to store when the condition is
5748    true.  All other arguments are as for emit_store_flag.  */
5749 
5750 rtx
emit_store_flag_int(rtx target,rtx subtarget,enum rtx_code code,rtx op0,rtx op1,scalar_int_mode mode,int unsignedp,int normalizep,rtx trueval)5751 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5752 		     rtx op1, scalar_int_mode mode, int unsignedp,
5753 		     int normalizep, rtx trueval)
5754 {
5755   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5756   rtx_insn *last = get_last_insn ();
5757 
5758   /* If this is an equality comparison of integers, we can try to exclusive-or
5759      (or subtract) the two operands and use a recursive call to try the
5760      comparison with zero.  Don't do any of these cases if branches are
5761      very cheap.  */
5762 
5763   if ((code == EQ || code == NE) && op1 != const0_rtx)
5764     {
5765       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5766 			      OPTAB_WIDEN);
5767 
5768       if (tem == 0)
5769 	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5770 			    OPTAB_WIDEN);
5771       if (tem != 0)
5772 	tem = emit_store_flag (target, code, tem, const0_rtx,
5773 			       mode, unsignedp, normalizep);
5774       if (tem != 0)
5775 	return tem;
5776 
5777       delete_insns_since (last);
5778     }
5779 
5780   /* For integer comparisons, try the reverse comparison.  However, for
5781      small X and if we'd have anyway to extend, implementing "X != 0"
5782      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5783   rtx_code rcode = reverse_condition (code);
5784   if (can_compare_p (rcode, mode, ccp_store_flag)
5785       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5786 	    && code == NE
5787 	    && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5788 	    && op1 == const0_rtx))
5789     {
5790       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5791 		      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5792 
5793       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5794       if (want_add
5795 	  && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5796 		       optimize_insn_for_speed_p ()) == 0)
5797 	{
5798 	  rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5799 				       STORE_FLAG_VALUE, target_mode);
5800 	  if (tem != 0)
5801 	    tem = expand_binop (target_mode, add_optab, tem,
5802 				gen_int_mode (normalizep, target_mode),
5803 				target, 0, OPTAB_WIDEN);
5804 	  if (tem != 0)
5805 	    return tem;
5806 	}
5807       else if (!want_add
5808 	       && rtx_cost (trueval, mode, XOR, 1,
5809 			    optimize_insn_for_speed_p ()) == 0)
5810 	{
5811 	  rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5812 				       normalizep, target_mode);
5813 	  if (tem != 0)
5814 	    tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5815 				INTVAL (trueval) >= 0, OPTAB_WIDEN);
5816 	  if (tem != 0)
5817 	    return tem;
5818 	}
5819 
5820       delete_insns_since (last);
5821     }
5822 
5823   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5824      the constant zero.  Reject all other comparisons at this point.  Only
5825      do LE and GT if branches are expensive since they are expensive on
5826      2-operand machines.  */
5827 
5828   if (op1 != const0_rtx
5829       || (code != EQ && code != NE
5830 	  && (BRANCH_COST (optimize_insn_for_speed_p (),
5831 			   false) <= 1 || (code != LE && code != GT))))
5832     return 0;
5833 
5834   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5835      do the necessary operation below.  */
5836 
5837   rtx tem = 0;
5838 
5839   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5840      the sign bit set.  */
5841 
5842   if (code == LE)
5843     {
5844       /* This is destructive, so SUBTARGET can't be OP0.  */
5845       if (rtx_equal_p (subtarget, op0))
5846 	subtarget = 0;
5847 
5848       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5849 			  OPTAB_WIDEN);
5850       if (tem)
5851 	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5852 			    OPTAB_WIDEN);
5853     }
5854 
5855   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5856      number of bits in the mode of OP0, minus one.  */
5857 
5858   if (code == GT)
5859     {
5860       if (rtx_equal_p (subtarget, op0))
5861 	subtarget = 0;
5862 
5863       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5864 				GET_MODE_BITSIZE (mode) - 1,
5865 				subtarget, 0);
5866       if (tem)
5867 	tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5868 			    OPTAB_WIDEN);
5869     }
5870 
5871   if (code == EQ || code == NE)
5872     {
5873       /* For EQ or NE, one way to do the comparison is to apply an operation
5874 	 that converts the operand into a positive number if it is nonzero
5875 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5876 	 for NE we negate.  This puts the result in the sign bit.  Then we
5877 	 normalize with a shift, if needed.
5878 
5879 	 Two operations that can do the above actions are ABS and FFS, so try
5880 	 them.  If that doesn't work, and MODE is smaller than a full word,
5881 	 we can use zero-extension to the wider mode (an unsigned conversion)
5882 	 as the operation.  */
5883 
5884       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5885 	 that is compensated by the subsequent overflow when subtracting
5886 	 one / negating.  */
5887 
5888       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5889 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5890       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5891 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5892       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5893 	{
5894 	  tem = convert_modes (word_mode, mode, op0, 1);
5895 	  mode = word_mode;
5896 	}
5897 
5898       if (tem != 0)
5899 	{
5900 	  if (code == EQ)
5901 	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5902 				0, OPTAB_WIDEN);
5903 	  else
5904 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5905 	}
5906 
5907       /* If we couldn't do it that way, for NE we can "or" the two's complement
5908 	 of the value with itself.  For EQ, we take the one's complement of
5909 	 that "or", which is an extra insn, so we only handle EQ if branches
5910 	 are expensive.  */
5911 
5912       if (tem == 0
5913 	  && (code == NE
5914 	      || BRANCH_COST (optimize_insn_for_speed_p (),
5915 			      false) > 1))
5916 	{
5917 	  if (rtx_equal_p (subtarget, op0))
5918 	    subtarget = 0;
5919 
5920 	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5921 	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5922 			      OPTAB_WIDEN);
5923 
5924 	  if (tem && code == EQ)
5925 	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5926 	}
5927     }
5928 
5929   if (tem && normalizep)
5930     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5931 			      GET_MODE_BITSIZE (mode) - 1,
5932 			      subtarget, normalizep == 1);
5933 
5934   if (tem)
5935     {
5936       if (!target)
5937 	;
5938       else if (GET_MODE (tem) != target_mode)
5939 	{
5940 	  convert_move (target, tem, 0);
5941 	  tem = target;
5942 	}
5943       else if (!subtarget)
5944 	{
5945 	  emit_move_insn (target, tem);
5946 	  tem = target;
5947 	}
5948     }
5949   else
5950     delete_insns_since (last);
5951 
5952   return tem;
5953 }
5954 
5955 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5956    and storing in TARGET.  Normally return TARGET.
5957    Return 0 if that cannot be done.
5958 
5959    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5960    it is VOIDmode, they cannot both be CONST_INT.
5961 
5962    UNSIGNEDP is for the case where we have to widen the operands
5963    to perform the operation.  It says to use zero-extension.
5964 
5965    NORMALIZEP is 1 if we should convert the result to be either zero
5966    or one.  Normalize is -1 if we should convert the result to be
5967    either zero or -1.  If NORMALIZEP is zero, the result will be left
5968    "raw" out of the scc insn.  */
5969 
5970 rtx
emit_store_flag(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)5971 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5972 		 machine_mode mode, int unsignedp, int normalizep)
5973 {
5974   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5975   enum rtx_code rcode;
5976   rtx subtarget;
5977   rtx tem, trueval;
5978   rtx_insn *last;
5979 
5980   /* If we compare constants, we shouldn't use a store-flag operation,
5981      but a constant load.  We can get there via the vanilla route that
5982      usually generates a compare-branch sequence, but will in this case
5983      fold the comparison to a constant, and thus elide the branch.  */
5984   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5985     return NULL_RTX;
5986 
5987   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5988 			   target_mode);
5989   if (tem)
5990     return tem;
5991 
5992   /* If we reached here, we can't do this with a scc insn, however there
5993      are some comparisons that can be done in other ways.  Don't do any
5994      of these cases if branches are very cheap.  */
5995   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5996     return 0;
5997 
5998   /* See what we need to return.  We can only return a 1, -1, or the
5999      sign bit.  */
6000 
6001   if (normalizep == 0)
6002     {
6003       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6004 	normalizep = STORE_FLAG_VALUE;
6005 
6006       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6007 	;
6008       else
6009 	return 0;
6010     }
6011 
6012   last = get_last_insn ();
6013 
6014   /* If optimizing, use different pseudo registers for each insn, instead
6015      of reusing the same pseudo.  This leads to better CSE, but slows
6016      down the compiler, since there are more pseudos.  */
6017   subtarget = (!optimize
6018 	       && (target_mode == mode)) ? target : NULL_RTX;
6019   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6020 
6021   /* For floating-point comparisons, try the reverse comparison or try
6022      changing the "orderedness" of the comparison.  */
6023   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6024     {
6025       enum rtx_code first_code;
6026       bool and_them;
6027 
6028       rcode = reverse_condition_maybe_unordered (code);
6029       if (can_compare_p (rcode, mode, ccp_store_flag)
6030 	  && (code == ORDERED || code == UNORDERED
6031 	      || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6032 	      || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6033 	{
6034 	  int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6035 			  || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6036 
6037 	  /* For the reverse comparison, use either an addition or a XOR.  */
6038 	  if (want_add
6039 	      && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6040 			   optimize_insn_for_speed_p ()) == 0)
6041 	    {
6042 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6043 				       STORE_FLAG_VALUE, target_mode);
6044 	      if (tem)
6045 		return expand_binop (target_mode, add_optab, tem,
6046 				     gen_int_mode (normalizep, target_mode),
6047 				     target, 0, OPTAB_WIDEN);
6048 	    }
6049 	  else if (!want_add
6050 		   && rtx_cost (trueval, mode, XOR, 1,
6051 				optimize_insn_for_speed_p ()) == 0)
6052 	    {
6053 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6054 				       normalizep, target_mode);
6055 	      if (tem)
6056 		return expand_binop (target_mode, xor_optab, tem, trueval,
6057 				     target, INTVAL (trueval) >= 0,
6058 				     OPTAB_WIDEN);
6059 	    }
6060 	}
6061 
6062       delete_insns_since (last);
6063 
6064       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6065       if (code == ORDERED || code == UNORDERED)
6066 	return 0;
6067 
6068       and_them = split_comparison (code, mode, &first_code, &code);
6069 
6070       /* If there are no NaNs, the first comparison should always fall through.
6071 	 Effectively change the comparison to the other one.  */
6072       if (!HONOR_NANS (mode))
6073 	{
6074 	  gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6075 	  return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6076 				    target_mode);
6077 	}
6078 
6079       if (!HAVE_conditional_move)
6080 	return 0;
6081 
6082       /* Do not turn a trapping comparison into a non-trapping one.  */
6083       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6084 	  && flag_trapping_math)
6085 	return 0;
6086 
6087       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6088 	 conditional move.  */
6089       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6090 			       normalizep, target_mode);
6091       if (tem == 0)
6092 	return 0;
6093 
6094       if (and_them)
6095 	tem = emit_conditional_move (target, code, op0, op1, mode,
6096 				     tem, const0_rtx, GET_MODE (tem), 0);
6097       else
6098 	tem = emit_conditional_move (target, code, op0, op1, mode,
6099 				     trueval, tem, GET_MODE (tem), 0);
6100 
6101       if (tem == 0)
6102 	delete_insns_since (last);
6103       return tem;
6104     }
6105 
6106   /* The remaining tricks only apply to integer comparisons.  */
6107 
6108   scalar_int_mode int_mode;
6109   if (is_int_mode (mode, &int_mode))
6110     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6111 				unsignedp, normalizep, trueval);
6112 
6113   return 0;
6114 }
6115 
6116 /* Like emit_store_flag, but always succeeds.  */
6117 
6118 rtx
emit_store_flag_force(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)6119 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6120 		       machine_mode mode, int unsignedp, int normalizep)
6121 {
6122   rtx tem;
6123   rtx_code_label *label;
6124   rtx trueval, falseval;
6125 
6126   /* First see if emit_store_flag can do the job.  */
6127   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6128   if (tem != 0)
6129     return tem;
6130 
6131   /* If one operand is constant, make it the second one.  Only do this
6132      if the other operand is not constant as well.  */
6133   if (swap_commutative_operands_p (op0, op1))
6134     {
6135       std::swap (op0, op1);
6136       code = swap_condition (code);
6137     }
6138 
6139   if (mode == VOIDmode)
6140     mode = GET_MODE (op0);
6141 
6142   if (!target)
6143     target = gen_reg_rtx (word_mode);
6144 
6145   /* If this failed, we have to do this with set/compare/jump/set code.
6146      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6147   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6148   if (code == NE
6149       && GET_MODE_CLASS (mode) == MODE_INT
6150       && REG_P (target)
6151       && op0 == target
6152       && op1 == const0_rtx)
6153     {
6154       label = gen_label_rtx ();
6155       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6156 			       NULL_RTX, NULL, label,
6157 			       profile_probability::uninitialized ());
6158       emit_move_insn (target, trueval);
6159       emit_label (label);
6160       return target;
6161     }
6162 
6163   if (!REG_P (target)
6164       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6165     target = gen_reg_rtx (GET_MODE (target));
6166 
6167   /* Jump in the right direction if the target cannot implement CODE
6168      but can jump on its reverse condition.  */
6169   falseval = const0_rtx;
6170   if (! can_compare_p (code, mode, ccp_jump)
6171       && (! FLOAT_MODE_P (mode)
6172           || code == ORDERED || code == UNORDERED
6173           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6174           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6175     {
6176       enum rtx_code rcode;
6177       if (FLOAT_MODE_P (mode))
6178         rcode = reverse_condition_maybe_unordered (code);
6179       else
6180         rcode = reverse_condition (code);
6181 
6182       /* Canonicalize to UNORDERED for the libcall.  */
6183       if (can_compare_p (rcode, mode, ccp_jump)
6184           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6185 	{
6186 	  falseval = trueval;
6187 	  trueval = const0_rtx;
6188 	  code = rcode;
6189 	}
6190     }
6191 
6192   emit_move_insn (target, trueval);
6193   label = gen_label_rtx ();
6194   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6195 			   label, profile_probability::uninitialized ());
6196 
6197   emit_move_insn (target, falseval);
6198   emit_label (label);
6199 
6200   return target;
6201 }
6202 
6203 /* Helper function for canonicalize_cmp_for_target.  Swap between inclusive
6204    and exclusive ranges in order to create an equivalent comparison.  See
6205    canonicalize_cmp_for_target for the possible cases.  */
6206 
6207 static enum rtx_code
equivalent_cmp_code(enum rtx_code code)6208 equivalent_cmp_code (enum rtx_code code)
6209 {
6210   switch (code)
6211     {
6212     case GT:
6213       return GE;
6214     case GE:
6215       return GT;
6216     case LT:
6217       return LE;
6218     case LE:
6219       return LT;
6220     case GTU:
6221       return GEU;
6222     case GEU:
6223       return GTU;
6224     case LTU:
6225       return LEU;
6226     case LEU:
6227       return LTU;
6228 
6229     default:
6230       return code;
6231     }
6232 }
6233 
6234 /* Choose the more appropiate immediate in scalar integer comparisons.  The
6235    purpose of this is to end up with an immediate which can be loaded into a
6236    register in fewer moves, if possible.
6237 
6238    For each integer comparison there exists an equivalent choice:
6239      i)   a >  b or a >= b + 1
6240      ii)  a <= b or a <  b + 1
6241      iii) a >= b or a >  b - 1
6242      iv)  a <  b or a <= b - 1
6243 
6244    MODE is the mode of the first operand.
6245    CODE points to the comparison code.
6246    IMM points to the rtx containing the immediate.  *IMM must satisfy
6247    CONST_SCALAR_INT_P on entry and continues to satisfy CONST_SCALAR_INT_P
6248    on exit.  */
6249 
6250 void
canonicalize_comparison(machine_mode mode,enum rtx_code * code,rtx * imm)6251 canonicalize_comparison (machine_mode mode, enum rtx_code *code, rtx *imm)
6252 {
6253   if (!SCALAR_INT_MODE_P (mode))
6254     return;
6255 
6256   int to_add = 0;
6257   enum signop sgn = unsigned_condition_p (*code) ? UNSIGNED : SIGNED;
6258 
6259   /* Extract the immediate value from the rtx.  */
6260   wide_int imm_val = rtx_mode_t (*imm, mode);
6261 
6262   if (*code == GT || *code == GTU || *code == LE || *code == LEU)
6263     to_add = 1;
6264   else if (*code == GE || *code == GEU || *code == LT || *code == LTU)
6265     to_add = -1;
6266   else
6267     return;
6268 
6269   /* Check for overflow/underflow in the case of signed values and
6270      wrapping around in the case of unsigned values.  If any occur
6271      cancel the optimization.  */
6272   wi::overflow_type overflow = wi::OVF_NONE;
6273   wide_int imm_modif;
6274 
6275   if (to_add == 1)
6276     imm_modif = wi::add (imm_val, 1, sgn, &overflow);
6277   else
6278     imm_modif = wi::sub (imm_val, 1, sgn, &overflow);
6279 
6280   if (overflow)
6281     return;
6282 
6283   /* The following creates a pseudo; if we cannot do that, bail out.  */
6284   if (!can_create_pseudo_p ())
6285     return;
6286 
6287   rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
6288   rtx new_imm = immed_wide_int_const (imm_modif, mode);
6289 
6290   rtx_insn *old_rtx = gen_move_insn (reg, *imm);
6291   rtx_insn *new_rtx = gen_move_insn (reg, new_imm);
6292 
6293   /* Update the immediate and the code.  */
6294   if (insn_cost (old_rtx, true) > insn_cost (new_rtx, true))
6295     {
6296       *code = equivalent_cmp_code (*code);
6297       *imm = new_imm;
6298     }
6299 }
6300 
6301 
6302 
6303 /* Perform possibly multi-word comparison and conditional jump to LABEL
6304    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6305    now a thin wrapper around do_compare_rtx_and_jump.  */
6306 
6307 static void
do_cmp_and_jump(rtx arg1,rtx arg2,enum rtx_code op,machine_mode mode,rtx_code_label * label)6308 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6309 		 rtx_code_label *label)
6310 {
6311   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6312   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6313 			   NULL, label, profile_probability::uninitialized ());
6314 }
6315