1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987-2018 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "dojump.h"
40 #include "explow.h"
41 #include "expr.h"
42 #include "langhooks.h"
43 #include "tree-vector-builder.h"
44 
45 struct target_expmed default_target_expmed;
46 #if SWITCHABLE_TARGET
47 struct target_expmed *this_target_expmed = &default_target_expmed;
48 #endif
49 
50 static bool store_integral_bit_field (rtx, opt_scalar_int_mode,
51 				      unsigned HOST_WIDE_INT,
52 				      unsigned HOST_WIDE_INT,
53 				      poly_uint64, poly_uint64,
54 				      machine_mode, rtx, bool, bool);
55 static void store_fixed_bit_field (rtx, opt_scalar_int_mode,
56 				   unsigned HOST_WIDE_INT,
57 				   unsigned HOST_WIDE_INT,
58 				   poly_uint64, poly_uint64,
59 				   rtx, scalar_int_mode, bool);
60 static void store_fixed_bit_field_1 (rtx, scalar_int_mode,
61 				     unsigned HOST_WIDE_INT,
62 				     unsigned HOST_WIDE_INT,
63 				     rtx, scalar_int_mode, bool);
64 static void store_split_bit_field (rtx, opt_scalar_int_mode,
65 				   unsigned HOST_WIDE_INT,
66 				   unsigned HOST_WIDE_INT,
67 				   poly_uint64, poly_uint64,
68 				   rtx, scalar_int_mode, bool);
69 static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
70 				       unsigned HOST_WIDE_INT,
71 				       unsigned HOST_WIDE_INT, int, rtx,
72 				       machine_mode, machine_mode, bool, bool);
73 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
74 				    unsigned HOST_WIDE_INT,
75 				    unsigned HOST_WIDE_INT, rtx, int, bool);
76 static rtx extract_fixed_bit_field_1 (machine_mode, rtx, scalar_int_mode,
77 				      unsigned HOST_WIDE_INT,
78 				      unsigned HOST_WIDE_INT, rtx, int, bool);
79 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
80 static rtx extract_split_bit_field (rtx, opt_scalar_int_mode,
81 				    unsigned HOST_WIDE_INT,
82 				    unsigned HOST_WIDE_INT, int, bool);
83 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
84 static rtx expand_smod_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
85 static rtx expand_sdiv_pow2 (scalar_int_mode, rtx, HOST_WIDE_INT);
86 
87 /* Return a constant integer mask value of mode MODE with BITSIZE ones
88    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
89    The mask is truncated if necessary to the width of mode MODE.  The
90    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
91 
92 static inline rtx
mask_rtx(scalar_int_mode mode,int bitpos,int bitsize,bool complement)93 mask_rtx (scalar_int_mode mode, int bitpos, int bitsize, bool complement)
94 {
95   return immed_wide_int_const
96     (wi::shifted_mask (bitpos, bitsize, complement,
97 		       GET_MODE_PRECISION (mode)), mode);
98 }
99 
100 /* Test whether a value is zero of a power of two.  */
101 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
102   (((x) & ((x) - HOST_WIDE_INT_1U)) == 0)
103 
104 struct init_expmed_rtl
105 {
106   rtx reg;
107   rtx plus;
108   rtx neg;
109   rtx mult;
110   rtx sdiv;
111   rtx udiv;
112   rtx sdiv_32;
113   rtx smod_32;
114   rtx wide_mult;
115   rtx wide_lshr;
116   rtx wide_trunc;
117   rtx shift;
118   rtx shift_mult;
119   rtx shift_add;
120   rtx shift_sub0;
121   rtx shift_sub1;
122   rtx zext;
123   rtx trunc;
124 
125   rtx pow2[MAX_BITS_PER_WORD];
126   rtx cint[MAX_BITS_PER_WORD];
127 };
128 
129 static void
init_expmed_one_conv(struct init_expmed_rtl * all,scalar_int_mode to_mode,scalar_int_mode from_mode,bool speed)130 init_expmed_one_conv (struct init_expmed_rtl *all, scalar_int_mode to_mode,
131 		      scalar_int_mode from_mode, bool speed)
132 {
133   int to_size, from_size;
134   rtx which;
135 
136   to_size = GET_MODE_PRECISION (to_mode);
137   from_size = GET_MODE_PRECISION (from_mode);
138 
139   /* Most partial integers have a precision less than the "full"
140      integer it requires for storage.  In case one doesn't, for
141      comparison purposes here, reduce the bit size by one in that
142      case.  */
143   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
144       && pow2p_hwi (to_size))
145     to_size --;
146   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
147       && pow2p_hwi (from_size))
148     from_size --;
149 
150   /* Assume cost of zero-extend and sign-extend is the same.  */
151   which = (to_size < from_size ? all->trunc : all->zext);
152 
153   PUT_MODE (all->reg, from_mode);
154   set_convert_cost (to_mode, from_mode, speed,
155 		    set_src_cost (which, to_mode, speed));
156 }
157 
158 static void
init_expmed_one_mode(struct init_expmed_rtl * all,machine_mode mode,int speed)159 init_expmed_one_mode (struct init_expmed_rtl *all,
160 		      machine_mode mode, int speed)
161 {
162   int m, n, mode_bitsize;
163   machine_mode mode_from;
164 
165   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
166 
167   PUT_MODE (all->reg, mode);
168   PUT_MODE (all->plus, mode);
169   PUT_MODE (all->neg, mode);
170   PUT_MODE (all->mult, mode);
171   PUT_MODE (all->sdiv, mode);
172   PUT_MODE (all->udiv, mode);
173   PUT_MODE (all->sdiv_32, mode);
174   PUT_MODE (all->smod_32, mode);
175   PUT_MODE (all->wide_trunc, mode);
176   PUT_MODE (all->shift, mode);
177   PUT_MODE (all->shift_mult, mode);
178   PUT_MODE (all->shift_add, mode);
179   PUT_MODE (all->shift_sub0, mode);
180   PUT_MODE (all->shift_sub1, mode);
181   PUT_MODE (all->zext, mode);
182   PUT_MODE (all->trunc, mode);
183 
184   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
185   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
186   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
187   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
188   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
189 
190   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
191 				     <= 2 * add_cost (speed, mode)));
192   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
193 				     <= 4 * add_cost (speed, mode)));
194 
195   set_shift_cost (speed, mode, 0, 0);
196   {
197     int cost = add_cost (speed, mode);
198     set_shiftadd_cost (speed, mode, 0, cost);
199     set_shiftsub0_cost (speed, mode, 0, cost);
200     set_shiftsub1_cost (speed, mode, 0, cost);
201   }
202 
203   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
204   for (m = 1; m < n; m++)
205     {
206       XEXP (all->shift, 1) = all->cint[m];
207       XEXP (all->shift_mult, 1) = all->pow2[m];
208 
209       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
210       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
211 						       speed));
212       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
213 							speed));
214       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
215 							speed));
216     }
217 
218   scalar_int_mode int_mode_to;
219   if (is_a <scalar_int_mode> (mode, &int_mode_to))
220     {
221       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
222 	   mode_from = (machine_mode)(mode_from + 1))
223 	init_expmed_one_conv (all, int_mode_to,
224 			      as_a <scalar_int_mode> (mode_from), speed);
225 
226       scalar_int_mode wider_mode;
227       if (GET_MODE_CLASS (int_mode_to) == MODE_INT
228 	  && GET_MODE_WIDER_MODE (int_mode_to).exists (&wider_mode))
229 	{
230 	  PUT_MODE (all->zext, wider_mode);
231 	  PUT_MODE (all->wide_mult, wider_mode);
232 	  PUT_MODE (all->wide_lshr, wider_mode);
233 	  XEXP (all->wide_lshr, 1)
234 	    = gen_int_shift_amount (wider_mode, mode_bitsize);
235 
236 	  set_mul_widen_cost (speed, wider_mode,
237 			      set_src_cost (all->wide_mult, wider_mode, speed));
238 	  set_mul_highpart_cost (speed, int_mode_to,
239 				 set_src_cost (all->wide_trunc,
240 					       int_mode_to, speed));
241 	}
242     }
243 }
244 
245 void
init_expmed(void)246 init_expmed (void)
247 {
248   struct init_expmed_rtl all;
249   machine_mode mode = QImode;
250   int m, speed;
251 
252   memset (&all, 0, sizeof all);
253   for (m = 1; m < MAX_BITS_PER_WORD; m++)
254     {
255       all.pow2[m] = GEN_INT (HOST_WIDE_INT_1 << m);
256       all.cint[m] = GEN_INT (m);
257     }
258 
259   /* Avoid using hard regs in ways which may be unsupported.  */
260   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
261   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
262   all.neg = gen_rtx_NEG (mode, all.reg);
263   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
264   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
265   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
266   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
267   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
268   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
269   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
270   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
271   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
272   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
273   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
274   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
275   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
276   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
277   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
278 
279   for (speed = 0; speed < 2; speed++)
280     {
281       crtl->maybe_hot_insn_p = speed;
282       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
283 
284       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
285 	   mode = (machine_mode)(mode + 1))
286 	init_expmed_one_mode (&all, mode, speed);
287 
288       if (MIN_MODE_PARTIAL_INT != VOIDmode)
289 	for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
290 	     mode = (machine_mode)(mode + 1))
291 	  init_expmed_one_mode (&all, mode, speed);
292 
293       if (MIN_MODE_VECTOR_INT != VOIDmode)
294 	for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
295 	     mode = (machine_mode)(mode + 1))
296 	  init_expmed_one_mode (&all, mode, speed);
297     }
298 
299   if (alg_hash_used_p ())
300     {
301       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
302       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
303     }
304   else
305     set_alg_hash_used_p (true);
306   default_rtl_profile ();
307 
308   ggc_free (all.trunc);
309   ggc_free (all.shift_sub1);
310   ggc_free (all.shift_sub0);
311   ggc_free (all.shift_add);
312   ggc_free (all.shift_mult);
313   ggc_free (all.shift);
314   ggc_free (all.wide_trunc);
315   ggc_free (all.wide_lshr);
316   ggc_free (all.wide_mult);
317   ggc_free (all.zext);
318   ggc_free (all.smod_32);
319   ggc_free (all.sdiv_32);
320   ggc_free (all.udiv);
321   ggc_free (all.sdiv);
322   ggc_free (all.mult);
323   ggc_free (all.neg);
324   ggc_free (all.plus);
325   ggc_free (all.reg);
326 }
327 
328 /* Return an rtx representing minus the value of X.
329    MODE is the intended mode of the result,
330    useful if X is a CONST_INT.  */
331 
332 rtx
negate_rtx(machine_mode mode,rtx x)333 negate_rtx (machine_mode mode, rtx x)
334 {
335   rtx result = simplify_unary_operation (NEG, mode, x, mode);
336 
337   if (result == 0)
338     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
339 
340   return result;
341 }
342 
343 /* Whether reverse storage order is supported on the target.  */
344 static int reverse_storage_order_supported = -1;
345 
346 /* Check whether reverse storage order is supported on the target.  */
347 
348 static void
check_reverse_storage_order_support(void)349 check_reverse_storage_order_support (void)
350 {
351   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
352     {
353       reverse_storage_order_supported = 0;
354       sorry ("reverse scalar storage order");
355     }
356   else
357     reverse_storage_order_supported = 1;
358 }
359 
360 /* Whether reverse FP storage order is supported on the target.  */
361 static int reverse_float_storage_order_supported = -1;
362 
363 /* Check whether reverse FP storage order is supported on the target.  */
364 
365 static void
check_reverse_float_storage_order_support(void)366 check_reverse_float_storage_order_support (void)
367 {
368   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
369     {
370       reverse_float_storage_order_supported = 0;
371       sorry ("reverse floating-point scalar storage order");
372     }
373   else
374     reverse_float_storage_order_supported = 1;
375 }
376 
377 /* Return an rtx representing value of X with reverse storage order.
378    MODE is the intended mode of the result,
379    useful if X is a CONST_INT.  */
380 
381 rtx
flip_storage_order(machine_mode mode,rtx x)382 flip_storage_order (machine_mode mode, rtx x)
383 {
384   scalar_int_mode int_mode;
385   rtx result;
386 
387   if (mode == QImode)
388     return x;
389 
390   if (COMPLEX_MODE_P (mode))
391     {
392       rtx real = read_complex_part (x, false);
393       rtx imag = read_complex_part (x, true);
394 
395       real = flip_storage_order (GET_MODE_INNER (mode), real);
396       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
397 
398       return gen_rtx_CONCAT (mode, real, imag);
399     }
400 
401   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
402     check_reverse_storage_order_support ();
403 
404   if (!is_a <scalar_int_mode> (mode, &int_mode))
405     {
406       if (FLOAT_MODE_P (mode)
407 	  && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
408 	check_reverse_float_storage_order_support ();
409 
410       if (!int_mode_for_size (GET_MODE_PRECISION (mode), 0).exists (&int_mode))
411 	{
412 	  sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
413 	  return x;
414 	}
415       x = gen_lowpart (int_mode, x);
416     }
417 
418   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
419   if (result == 0)
420     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
421 
422   if (int_mode != mode)
423     result = gen_lowpart (mode, result);
424 
425   return result;
426 }
427 
428 /* If MODE is set, adjust bitfield memory MEM so that it points to the
429    first unit of mode MODE that contains a bitfield of size BITSIZE at
430    bit position BITNUM.  If MODE is not set, return a BLKmode reference
431    to every byte in the bitfield.  Set *NEW_BITNUM to the bit position
432    of the field within the new memory.  */
433 
434 static rtx
narrow_bit_field_mem(rtx mem,opt_scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT * new_bitnum)435 narrow_bit_field_mem (rtx mem, opt_scalar_int_mode mode,
436 		      unsigned HOST_WIDE_INT bitsize,
437 		      unsigned HOST_WIDE_INT bitnum,
438 		      unsigned HOST_WIDE_INT *new_bitnum)
439 {
440   scalar_int_mode imode;
441   if (mode.exists (&imode))
442     {
443       unsigned int unit = GET_MODE_BITSIZE (imode);
444       *new_bitnum = bitnum % unit;
445       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
446       return adjust_bitfield_address (mem, imode, offset);
447     }
448   else
449     {
450       *new_bitnum = bitnum % BITS_PER_UNIT;
451       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
452       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
453 			    / BITS_PER_UNIT);
454       return adjust_bitfield_address_size (mem, BLKmode, offset, size);
455     }
456 }
457 
458 /* The caller wants to perform insertion or extraction PATTERN on a
459    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
460    BITREGION_START and BITREGION_END are as for store_bit_field
461    and FIELDMODE is the natural mode of the field.
462 
463    Search for a mode that is compatible with the memory access
464    restrictions and (where applicable) with a register insertion or
465    extraction.  Return the new memory on success, storing the adjusted
466    bit position in *NEW_BITNUM.  Return null otherwise.  */
467 
468 static rtx
adjust_bit_field_mem_for_reg(enum extraction_pattern pattern,rtx op0,HOST_WIDE_INT bitsize,HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,unsigned HOST_WIDE_INT * new_bitnum)469 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
470 			      rtx op0, HOST_WIDE_INT bitsize,
471 			      HOST_WIDE_INT bitnum,
472 			      poly_uint64 bitregion_start,
473 			      poly_uint64 bitregion_end,
474 			      machine_mode fieldmode,
475 			      unsigned HOST_WIDE_INT *new_bitnum)
476 {
477   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
478 				bitregion_end, MEM_ALIGN (op0),
479 				MEM_VOLATILE_P (op0));
480   scalar_int_mode best_mode;
481   if (iter.next_mode (&best_mode))
482     {
483       /* We can use a memory in BEST_MODE.  See whether this is true for
484 	 any wider modes.  All other things being equal, we prefer to
485 	 use the widest mode possible because it tends to expose more
486 	 CSE opportunities.  */
487       if (!iter.prefer_smaller_modes ())
488 	{
489 	  /* Limit the search to the mode required by the corresponding
490 	     register insertion or extraction instruction, if any.  */
491 	  scalar_int_mode limit_mode = word_mode;
492 	  extraction_insn insn;
493 	  if (get_best_reg_extraction_insn (&insn, pattern,
494 					    GET_MODE_BITSIZE (best_mode),
495 					    fieldmode))
496 	    limit_mode = insn.field_mode;
497 
498 	  scalar_int_mode wider_mode;
499 	  while (iter.next_mode (&wider_mode)
500 		 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
501 	    best_mode = wider_mode;
502 	}
503       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
504 				   new_bitnum);
505     }
506   return NULL_RTX;
507 }
508 
509 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
510    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
511    offset is then BITNUM / BITS_PER_UNIT.  */
512 
513 static bool
lowpart_bit_field_p(poly_uint64 bitnum,poly_uint64 bitsize,machine_mode struct_mode)514 lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
515 		     machine_mode struct_mode)
516 {
517   poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
518   if (BYTES_BIG_ENDIAN)
519     return (multiple_p (bitnum, BITS_PER_UNIT)
520 	    && (known_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
521 		|| multiple_p (bitnum + bitsize,
522 			       regsize * BITS_PER_UNIT)));
523   else
524     return multiple_p (bitnum, regsize * BITS_PER_UNIT);
525 }
526 
527 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
528    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
529    Return false if the access would touch memory outside the range
530    BITREGION_START to BITREGION_END for conformance to the C++ memory
531    model.  */
532 
533 static bool
strict_volatile_bitfield_p(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,scalar_int_mode fieldmode,poly_uint64 bitregion_start,poly_uint64 bitregion_end)534 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
535 			    unsigned HOST_WIDE_INT bitnum,
536 			    scalar_int_mode fieldmode,
537 			    poly_uint64 bitregion_start,
538 			    poly_uint64 bitregion_end)
539 {
540   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
541 
542   /* -fstrict-volatile-bitfields must be enabled and we must have a
543      volatile MEM.  */
544   if (!MEM_P (op0)
545       || !MEM_VOLATILE_P (op0)
546       || flag_strict_volatile_bitfields <= 0)
547     return false;
548 
549   /* The bit size must not be larger than the field mode, and
550      the field mode must not be larger than a word.  */
551   if (bitsize > modesize || modesize > BITS_PER_WORD)
552     return false;
553 
554   /* Check for cases of unaligned fields that must be split.  */
555   if (bitnum % modesize + bitsize > modesize)
556     return false;
557 
558   /* The memory must be sufficiently aligned for a MODESIZE access.
559      This condition guarantees, that the memory access will not
560      touch anything after the end of the structure.  */
561   if (MEM_ALIGN (op0) < modesize)
562     return false;
563 
564   /* Check for cases where the C++ memory model applies.  */
565   if (maybe_ne (bitregion_end, 0U)
566       && (maybe_lt (bitnum - bitnum % modesize, bitregion_start)
567 	  || maybe_gt (bitnum - bitnum % modesize + modesize - 1,
568 		       bitregion_end)))
569     return false;
570 
571   return true;
572 }
573 
574 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
575    bit number BITNUM can be treated as a simple value of mode MODE.
576    Store the byte offset in *BYTENUM if so.  */
577 
578 static bool
simple_mem_bitfield_p(rtx op0,poly_uint64 bitsize,poly_uint64 bitnum,machine_mode mode,poly_uint64 * bytenum)579 simple_mem_bitfield_p (rtx op0, poly_uint64 bitsize, poly_uint64 bitnum,
580 		       machine_mode mode, poly_uint64 *bytenum)
581 {
582   return (MEM_P (op0)
583 	  && multiple_p (bitnum, BITS_PER_UNIT, bytenum)
584 	  && known_eq (bitsize, GET_MODE_BITSIZE (mode))
585 	  && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (op0))
586 	      || (multiple_p (bitnum, GET_MODE_ALIGNMENT (mode))
587 		  && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
588 }
589 
590 /* Try to use instruction INSV to store VALUE into a field of OP0.
591    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is a
592    BLKmode MEM.  VALUE_MODE is the mode of VALUE.  BITSIZE and BITNUM
593    are as for store_bit_field.  */
594 
595 static bool
store_bit_field_using_insv(const extraction_insn * insv,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,scalar_int_mode value_mode)596 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
597 			    opt_scalar_int_mode op0_mode,
598 			    unsigned HOST_WIDE_INT bitsize,
599 			    unsigned HOST_WIDE_INT bitnum,
600 			    rtx value, scalar_int_mode value_mode)
601 {
602   struct expand_operand ops[4];
603   rtx value1;
604   rtx xop0 = op0;
605   rtx_insn *last = get_last_insn ();
606   bool copy_back = false;
607 
608   scalar_int_mode op_mode = insv->field_mode;
609   unsigned int unit = GET_MODE_BITSIZE (op_mode);
610   if (bitsize == 0 || bitsize > unit)
611     return false;
612 
613   if (MEM_P (xop0))
614     /* Get a reference to the first byte of the field.  */
615     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
616 				 &bitnum);
617   else
618     {
619       /* Convert from counting within OP0 to counting in OP_MODE.  */
620       if (BYTES_BIG_ENDIAN)
621 	bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
622 
623       /* If xop0 is a register, we need it in OP_MODE
624 	 to make it acceptable to the format of insv.  */
625       if (GET_CODE (xop0) == SUBREG)
626 	{
627 	  /* If such a SUBREG can't be created, give up.  */
628 	  if (!validate_subreg (op_mode, GET_MODE (SUBREG_REG (xop0)),
629 				SUBREG_REG (xop0), SUBREG_BYTE (xop0)))
630 	    return false;
631 	  /* We can't just change the mode, because this might clobber op0,
632 	     and we will need the original value of op0 if insv fails.  */
633 	  xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0),
634 				 SUBREG_BYTE (xop0));
635 	}
636       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
637 	xop0 = gen_lowpart_SUBREG (op_mode, xop0);
638     }
639 
640   /* If the destination is a paradoxical subreg such that we need a
641      truncate to the inner mode, perform the insertion on a temporary and
642      truncate the result to the original destination.  Note that we can't
643      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
644      X) 0)) is (reg:N X).  */
645   if (GET_CODE (xop0) == SUBREG
646       && REG_P (SUBREG_REG (xop0))
647       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
648 					 op_mode))
649     {
650       rtx tem = gen_reg_rtx (op_mode);
651       emit_move_insn (tem, xop0);
652       xop0 = tem;
653       copy_back = true;
654     }
655 
656   /* There are similar overflow check at the start of store_bit_field_1,
657      but that only check the situation where the field lies completely
658      outside the register, while there do have situation where the field
659      lies partialy in the register, we need to adjust bitsize for this
660      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
661      will broken on those arch support bit insert instruction, like arm, aarch64
662      etc.  */
663   if (bitsize + bitnum > unit && bitnum < unit)
664     {
665       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
666 	       "destination object, data truncated into %wu-bit",
667 	       bitsize, unit - bitnum);
668       bitsize = unit - bitnum;
669     }
670 
671   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
672      "backwards" from the size of the unit we are inserting into.
673      Otherwise, we count bits from the most significant on a
674      BYTES/BITS_BIG_ENDIAN machine.  */
675 
676   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
677     bitnum = unit - bitsize - bitnum;
678 
679   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
680   value1 = value;
681   if (value_mode != op_mode)
682     {
683       if (GET_MODE_BITSIZE (value_mode) >= bitsize)
684 	{
685 	  rtx tmp;
686 	  /* Optimization: Don't bother really extending VALUE
687 	     if it has all the bits we will actually use.  However,
688 	     if we must narrow it, be sure we do it correctly.  */
689 
690 	  if (GET_MODE_SIZE (value_mode) < GET_MODE_SIZE (op_mode))
691 	    {
692 	      tmp = simplify_subreg (op_mode, value1, value_mode, 0);
693 	      if (! tmp)
694 		tmp = simplify_gen_subreg (op_mode,
695 					   force_reg (value_mode, value1),
696 					   value_mode, 0);
697 	    }
698 	  else
699 	    {
700 	      tmp = gen_lowpart_if_possible (op_mode, value1);
701 	      if (! tmp)
702 		tmp = gen_lowpart (op_mode, force_reg (value_mode, value1));
703 	    }
704 	  value1 = tmp;
705 	}
706       else if (CONST_INT_P (value))
707 	value1 = gen_int_mode (INTVAL (value), op_mode);
708       else
709 	/* Parse phase is supposed to make VALUE's data type
710 	   match that of the component reference, which is a type
711 	   at least as wide as the field; so VALUE should have
712 	   a mode that corresponds to that type.  */
713 	gcc_assert (CONSTANT_P (value));
714     }
715 
716   create_fixed_operand (&ops[0], xop0);
717   create_integer_operand (&ops[1], bitsize);
718   create_integer_operand (&ops[2], bitnum);
719   create_input_operand (&ops[3], value1, op_mode);
720   if (maybe_expand_insn (insv->icode, 4, ops))
721     {
722       if (copy_back)
723 	convert_move (op0, xop0, true);
724       return true;
725     }
726   delete_insns_since (last);
727   return false;
728 }
729 
730 /* A subroutine of store_bit_field, with the same arguments.  Return true
731    if the operation could be implemented.
732 
733    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
734    no other way of implementing the operation.  If FALLBACK_P is false,
735    return false instead.  */
736 
737 static bool
store_bit_field_1(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)738 store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
739 		   poly_uint64 bitregion_start, poly_uint64 bitregion_end,
740 		   machine_mode fieldmode,
741 		   rtx value, bool reverse, bool fallback_p)
742 {
743   rtx op0 = str_rtx;
744 
745   while (GET_CODE (op0) == SUBREG)
746     {
747       bitnum += subreg_memory_offset (op0) * BITS_PER_UNIT;
748       op0 = SUBREG_REG (op0);
749     }
750 
751   /* No action is needed if the target is a register and if the field
752      lies completely outside that register.  This can occur if the source
753      code contains an out-of-bounds access to a small array.  */
754   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
755     return true;
756 
757   /* Use vec_set patterns for inserting parts of vectors whenever
758      available.  */
759   machine_mode outermode = GET_MODE (op0);
760   scalar_mode innermode = GET_MODE_INNER (outermode);
761   poly_uint64 pos;
762   if (VECTOR_MODE_P (outermode)
763       && !MEM_P (op0)
764       && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
765       && fieldmode == innermode
766       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
767       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
768     {
769       struct expand_operand ops[3];
770       enum insn_code icode = optab_handler (vec_set_optab, outermode);
771 
772       create_fixed_operand (&ops[0], op0);
773       create_input_operand (&ops[1], value, innermode);
774       create_integer_operand (&ops[2], pos);
775       if (maybe_expand_insn (icode, 3, ops))
776 	return true;
777     }
778 
779   /* If the target is a register, overwriting the entire object, or storing
780      a full-word or multi-word field can be done with just a SUBREG.  */
781   if (!MEM_P (op0)
782       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
783     {
784       /* Use the subreg machinery either to narrow OP0 to the required
785 	 words or to cope with mode punning between equal-sized modes.
786 	 In the latter case, use subreg on the rhs side, not lhs.  */
787       rtx sub;
788       HOST_WIDE_INT regnum;
789       poly_uint64 regsize = REGMODE_NATURAL_SIZE (GET_MODE (op0));
790       if (known_eq (bitnum, 0U)
791 	  && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
792 	{
793 	  sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
794 	  if (sub)
795 	    {
796 	      if (reverse)
797 		sub = flip_storage_order (GET_MODE (op0), sub);
798 	      emit_move_insn (op0, sub);
799 	      return true;
800 	    }
801 	}
802       else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, &regnum)
803 	       && multiple_p (bitsize, regsize * BITS_PER_UNIT))
804 	{
805 	  sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
806 				     regnum * regsize);
807 	  if (sub)
808 	    {
809 	      if (reverse)
810 		value = flip_storage_order (fieldmode, value);
811 	      emit_move_insn (sub, value);
812 	      return true;
813 	    }
814 	}
815     }
816 
817   /* If the target is memory, storing any naturally aligned field can be
818      done with a simple store.  For targets that support fast unaligned
819      memory, any naturally sized, unit aligned field can be done directly.  */
820   poly_uint64 bytenum;
821   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode, &bytenum))
822     {
823       op0 = adjust_bitfield_address (op0, fieldmode, bytenum);
824       if (reverse)
825 	value = flip_storage_order (fieldmode, value);
826       emit_move_insn (op0, value);
827       return true;
828     }
829 
830   /* It's possible we'll need to handle other cases here for
831      polynomial bitnum and bitsize.  */
832 
833   /* From here on we need to be looking at a fixed-size insertion.  */
834   unsigned HOST_WIDE_INT ibitsize = bitsize.to_constant ();
835   unsigned HOST_WIDE_INT ibitnum = bitnum.to_constant ();
836 
837   /* Make sure we are playing with integral modes.  Pun with subregs
838      if we aren't.  This must come after the entire register case above,
839      since that case is valid for any mode.  The following cases are only
840      valid for integral modes.  */
841   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
842   scalar_int_mode imode;
843   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
844     {
845       if (MEM_P (op0))
846 	op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
847 					    0, MEM_SIZE (op0));
848       else if (!op0_mode.exists ())
849 	{
850 	  if (ibitnum == 0
851 	      && known_eq (ibitsize, GET_MODE_BITSIZE (GET_MODE (op0)))
852 	      && MEM_P (value)
853 	      && !reverse)
854 	    {
855 	      value = adjust_address (value, GET_MODE (op0), 0);
856 	      emit_move_insn (op0, value);
857 	      return true;
858 	    }
859 	  if (!fallback_p)
860 	    return false;
861 	  rtx temp = assign_stack_temp (GET_MODE (op0),
862 					GET_MODE_SIZE (GET_MODE (op0)));
863 	  emit_move_insn (temp, op0);
864 	  store_bit_field_1 (temp, bitsize, bitnum, 0, 0, fieldmode, value,
865 			     reverse, fallback_p);
866 	  emit_move_insn (op0, temp);
867 	  return true;
868 	}
869       else
870 	op0 = gen_lowpart (op0_mode.require (), op0);
871     }
872 
873   return store_integral_bit_field (op0, op0_mode, ibitsize, ibitnum,
874 				   bitregion_start, bitregion_end,
875 				   fieldmode, value, reverse, fallback_p);
876 }
877 
878 /* Subroutine of store_bit_field_1, with the same arguments, except
879    that BITSIZE and BITNUM are constant.  Handle cases specific to
880    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
881    otherwise OP0 is a BLKmode MEM.  */
882 
883 static bool
store_integral_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)884 store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
885 			  unsigned HOST_WIDE_INT bitsize,
886 			  unsigned HOST_WIDE_INT bitnum,
887 			  poly_uint64 bitregion_start,
888 			  poly_uint64 bitregion_end,
889 			  machine_mode fieldmode,
890 			  rtx value, bool reverse, bool fallback_p)
891 {
892   /* Storing an lsb-aligned field in a register
893      can be done with a movstrict instruction.  */
894 
895   if (!MEM_P (op0)
896       && !reverse
897       && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
898       && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
899       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
900     {
901       struct expand_operand ops[2];
902       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
903       rtx arg0 = op0;
904       unsigned HOST_WIDE_INT subreg_off;
905 
906       if (GET_CODE (arg0) == SUBREG)
907 	{
908 	  /* Else we've got some float mode source being extracted into
909 	     a different float mode destination -- this combination of
910 	     subregs results in Severe Tire Damage.  */
911 	  gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
912 		      || GET_MODE_CLASS (fieldmode) == MODE_INT
913 		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
914 	  arg0 = SUBREG_REG (arg0);
915 	}
916 
917       subreg_off = bitnum / BITS_PER_UNIT;
918       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
919 	{
920 	  arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
921 
922 	  create_fixed_operand (&ops[0], arg0);
923 	  /* Shrink the source operand to FIELDMODE.  */
924 	  create_convert_operand_to (&ops[1], value, fieldmode, false);
925 	  if (maybe_expand_insn (icode, 2, ops))
926 	    return true;
927 	}
928     }
929 
930   /* Handle fields bigger than a word.  */
931 
932   if (bitsize > BITS_PER_WORD)
933     {
934       /* Here we transfer the words of the field
935 	 in the order least significant first.
936 	 This is because the most significant word is the one which may
937 	 be less than full.
938 	 However, only do that if the value is not BLKmode.  */
939 
940       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
941       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
942       unsigned int i;
943       rtx_insn *last;
944 
945       /* This is the mode we must force value to, so that there will be enough
946 	 subwords to extract.  Note that fieldmode will often (always?) be
947 	 VOIDmode, because that is what store_field uses to indicate that this
948 	 is a bit field, but passing VOIDmode to operand_subword_force
949 	 is not allowed.
950 
951 	 The mode must be fixed-size, since insertions into variable-sized
952 	 objects are meant to be handled before calling this function.  */
953       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
954       if (value_mode == VOIDmode)
955 	value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
956 
957       last = get_last_insn ();
958       for (i = 0; i < nwords; i++)
959 	{
960 	  /* If I is 0, use the low-order word in both field and target;
961 	     if I is 1, use the next to lowest word; and so on.  */
962 	  unsigned int wordnum = (backwards
963 				  ? GET_MODE_SIZE (value_mode) / UNITS_PER_WORD
964 				  - i - 1
965 				  : i);
966 	  unsigned int bit_offset = (backwards ^ reverse
967 				     ? MAX ((int) bitsize - ((int) i + 1)
968 					    * BITS_PER_WORD,
969 					    0)
970 				     : (int) i * BITS_PER_WORD);
971 	  rtx value_word = operand_subword_force (value, wordnum, value_mode);
972 	  unsigned HOST_WIDE_INT new_bitsize =
973 	    MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
974 
975 	  /* If the remaining chunk doesn't have full wordsize we have
976 	     to make sure that for big-endian machines the higher order
977 	     bits are used.  */
978 	  if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
979 	    {
980 	      int shift = BITS_PER_WORD - new_bitsize;
981 	      rtx shift_rtx = gen_int_shift_amount (word_mode, shift);
982 	      value_word = simplify_expand_binop (word_mode, lshr_optab,
983 						  value_word, shift_rtx,
984 						  NULL_RTX, true,
985 						  OPTAB_LIB_WIDEN);
986 	    }
987 
988 	  if (!store_bit_field_1 (op0, new_bitsize,
989 				  bitnum + bit_offset,
990 				  bitregion_start, bitregion_end,
991 				  word_mode,
992 				  value_word, reverse, fallback_p))
993 	    {
994 	      delete_insns_since (last);
995 	      return false;
996 	    }
997 	}
998       return true;
999     }
1000 
1001   /* If VALUE has a floating-point or complex mode, access it as an
1002      integer of the corresponding size.  This can occur on a machine
1003      with 64 bit registers that uses SFmode for float.  It can also
1004      occur for unaligned float or complex fields.  */
1005   rtx orig_value = value;
1006   scalar_int_mode value_mode;
1007   if (GET_MODE (value) == VOIDmode)
1008     /* By this point we've dealt with values that are bigger than a word,
1009        so word_mode is a conservatively correct choice.  */
1010     value_mode = word_mode;
1011   else if (!is_a <scalar_int_mode> (GET_MODE (value), &value_mode))
1012     {
1013       value_mode = int_mode_for_mode (GET_MODE (value)).require ();
1014       value = gen_reg_rtx (value_mode);
1015       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
1016     }
1017 
1018   /* If OP0 is a multi-word register, narrow it to the affected word.
1019      If the region spans two words, defer to store_split_bit_field.
1020      Don't do this if op0 is a single hard register wider than word
1021      such as a float or vector register.  */
1022   if (!MEM_P (op0)
1023       && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD
1024       && (!REG_P (op0)
1025 	  || !HARD_REGISTER_P (op0)
1026 	  || hard_regno_nregs (REGNO (op0), op0_mode.require ()) != 1))
1027     {
1028       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1029 	{
1030 	  if (!fallback_p)
1031 	    return false;
1032 
1033 	  store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1034 				 bitregion_start, bitregion_end,
1035 				 value, value_mode, reverse);
1036 	  return true;
1037 	}
1038       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1039 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1040       gcc_assert (op0);
1041       op0_mode = word_mode;
1042       bitnum %= BITS_PER_WORD;
1043     }
1044 
1045   /* From here on we can assume that the field to be stored in fits
1046      within a word.  If the destination is a register, it too fits
1047      in a word.  */
1048 
1049   extraction_insn insv;
1050   if (!MEM_P (op0)
1051       && !reverse
1052       && get_best_reg_extraction_insn (&insv, EP_insv,
1053 				       GET_MODE_BITSIZE (op0_mode.require ()),
1054 				       fieldmode)
1055       && store_bit_field_using_insv (&insv, op0, op0_mode,
1056 				     bitsize, bitnum, value, value_mode))
1057     return true;
1058 
1059   /* If OP0 is a memory, try copying it to a register and seeing if a
1060      cheap register alternative is available.  */
1061   if (MEM_P (op0) && !reverse)
1062     {
1063       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1064 					fieldmode)
1065 	  && store_bit_field_using_insv (&insv, op0, op0_mode,
1066 					 bitsize, bitnum, value, value_mode))
1067 	return true;
1068 
1069       rtx_insn *last = get_last_insn ();
1070 
1071       /* Try loading part of OP0 into a register, inserting the bitfield
1072 	 into that, and then copying the result back to OP0.  */
1073       unsigned HOST_WIDE_INT bitpos;
1074       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1075 					       bitregion_start, bitregion_end,
1076 					       fieldmode, &bitpos);
1077       if (xop0)
1078 	{
1079 	  rtx tempreg = copy_to_reg (xop0);
1080 	  if (store_bit_field_1 (tempreg, bitsize, bitpos,
1081 				 bitregion_start, bitregion_end,
1082 				 fieldmode, orig_value, reverse, false))
1083 	    {
1084 	      emit_move_insn (xop0, tempreg);
1085 	      return true;
1086 	    }
1087 	  delete_insns_since (last);
1088 	}
1089     }
1090 
1091   if (!fallback_p)
1092     return false;
1093 
1094   store_fixed_bit_field (op0, op0_mode, bitsize, bitnum, bitregion_start,
1095 			 bitregion_end, value, value_mode, reverse);
1096   return true;
1097 }
1098 
1099 /* Generate code to store value from rtx VALUE
1100    into a bit-field within structure STR_RTX
1101    containing BITSIZE bits starting at bit BITNUM.
1102 
1103    BITREGION_START is bitpos of the first bitfield in this region.
1104    BITREGION_END is the bitpos of the ending bitfield in this region.
1105    These two fields are 0, if the C++ memory model does not apply,
1106    or we are not interested in keeping track of bitfield regions.
1107 
1108    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1109 
1110    If REVERSE is true, the store is to be done in reverse order.  */
1111 
1112 void
store_bit_field(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,machine_mode fieldmode,rtx value,bool reverse)1113 store_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1114 		 poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1115 		 machine_mode fieldmode,
1116 		 rtx value, bool reverse)
1117 {
1118   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1119   unsigned HOST_WIDE_INT ibitsize = 0, ibitnum = 0;
1120   scalar_int_mode int_mode;
1121   if (bitsize.is_constant (&ibitsize)
1122       && bitnum.is_constant (&ibitnum)
1123       && is_a <scalar_int_mode> (fieldmode, &int_mode)
1124       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum, int_mode,
1125 				     bitregion_start, bitregion_end))
1126     {
1127       /* Storing of a full word can be done with a simple store.
1128 	 We know here that the field can be accessed with one single
1129 	 instruction.  For targets that support unaligned memory,
1130 	 an unaligned access may be necessary.  */
1131       if (ibitsize == GET_MODE_BITSIZE (int_mode))
1132 	{
1133 	  str_rtx = adjust_bitfield_address (str_rtx, int_mode,
1134 					     ibitnum / BITS_PER_UNIT);
1135 	  if (reverse)
1136 	    value = flip_storage_order (int_mode, value);
1137 	  gcc_assert (ibitnum % BITS_PER_UNIT == 0);
1138 	  emit_move_insn (str_rtx, value);
1139 	}
1140       else
1141 	{
1142 	  rtx temp;
1143 
1144 	  str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize,
1145 					  ibitnum, &ibitnum);
1146 	  gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
1147 	  temp = copy_to_reg (str_rtx);
1148 	  if (!store_bit_field_1 (temp, ibitsize, ibitnum, 0, 0,
1149 				  int_mode, value, reverse, true))
1150 	    gcc_unreachable ();
1151 
1152 	  emit_move_insn (str_rtx, temp);
1153 	}
1154 
1155       return;
1156     }
1157 
1158   /* Under the C++0x memory model, we must not touch bits outside the
1159      bit region.  Adjust the address to start at the beginning of the
1160      bit region.  */
1161   if (MEM_P (str_rtx) && maybe_ne (bitregion_start, 0U))
1162     {
1163       scalar_int_mode best_mode;
1164       machine_mode addr_mode = VOIDmode;
1165 
1166       poly_uint64 offset = exact_div (bitregion_start, BITS_PER_UNIT);
1167       bitnum -= bitregion_start;
1168       poly_int64 size = bits_to_bytes_round_up (bitnum + bitsize);
1169       bitregion_end -= bitregion_start;
1170       bitregion_start = 0;
1171       if (bitsize.is_constant (&ibitsize)
1172 	  && bitnum.is_constant (&ibitnum)
1173 	  && get_best_mode (ibitsize, ibitnum,
1174 			    bitregion_start, bitregion_end,
1175 			    MEM_ALIGN (str_rtx), INT_MAX,
1176 			    MEM_VOLATILE_P (str_rtx), &best_mode))
1177 	addr_mode = best_mode;
1178       str_rtx = adjust_bitfield_address_size (str_rtx, addr_mode,
1179 					      offset, size);
1180     }
1181 
1182   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1183 			  bitregion_start, bitregion_end,
1184 			  fieldmode, value, reverse, true))
1185     gcc_unreachable ();
1186 }
1187 
1188 /* Use shifts and boolean operations to store VALUE into a bit field of
1189    width BITSIZE in OP0, starting at bit BITNUM.  If OP0_MODE is defined,
1190    it is the mode of OP0, otherwise OP0 is a BLKmode MEM.  VALUE_MODE is
1191    the mode of VALUE.
1192 
1193    If REVERSE is true, the store is to be done in reverse order.  */
1194 
1195 static void
store_fixed_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,poly_uint64 bitregion_start,poly_uint64 bitregion_end,rtx value,scalar_int_mode value_mode,bool reverse)1196 store_fixed_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1197 		       unsigned HOST_WIDE_INT bitsize,
1198 		       unsigned HOST_WIDE_INT bitnum,
1199 		       poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1200 		       rtx value, scalar_int_mode value_mode, bool reverse)
1201 {
1202   /* There is a case not handled here:
1203      a structure with a known alignment of just a halfword
1204      and a field split across two aligned halfwords within the structure.
1205      Or likewise a structure with a known alignment of just a byte
1206      and a field split across two bytes.
1207      Such cases are not supposed to be able to occur.  */
1208 
1209   scalar_int_mode best_mode;
1210   if (MEM_P (op0))
1211     {
1212       unsigned int max_bitsize = BITS_PER_WORD;
1213       scalar_int_mode imode;
1214       if (op0_mode.exists (&imode) && GET_MODE_BITSIZE (imode) < max_bitsize)
1215 	max_bitsize = GET_MODE_BITSIZE (imode);
1216 
1217       if (!get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1218 			  MEM_ALIGN (op0), max_bitsize, MEM_VOLATILE_P (op0),
1219 			  &best_mode))
1220 	{
1221 	  /* The only way this should occur is if the field spans word
1222 	     boundaries.  */
1223 	  store_split_bit_field (op0, op0_mode, bitsize, bitnum,
1224 				 bitregion_start, bitregion_end,
1225 				 value, value_mode, reverse);
1226 	  return;
1227 	}
1228 
1229       op0 = narrow_bit_field_mem (op0, best_mode, bitsize, bitnum, &bitnum);
1230     }
1231   else
1232     best_mode = op0_mode.require ();
1233 
1234   store_fixed_bit_field_1 (op0, best_mode, bitsize, bitnum,
1235 			   value, value_mode, reverse);
1236 }
1237 
1238 /* Helper function for store_fixed_bit_field, stores
1239    the bit field always using MODE, which is the mode of OP0.  The other
1240    arguments are as for store_fixed_bit_field.  */
1241 
1242 static void
store_fixed_bit_field_1(rtx op0,scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,scalar_int_mode value_mode,bool reverse)1243 store_fixed_bit_field_1 (rtx op0, scalar_int_mode mode,
1244 			 unsigned HOST_WIDE_INT bitsize,
1245 			 unsigned HOST_WIDE_INT bitnum,
1246 			 rtx value, scalar_int_mode value_mode, bool reverse)
1247 {
1248   rtx temp;
1249   int all_zero = 0;
1250   int all_one = 0;
1251 
1252   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1253      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1254 
1255   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1256     /* BITNUM is the distance between our msb
1257        and that of the containing datum.
1258        Convert it to the distance from the lsb.  */
1259     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1260 
1261   /* Now BITNUM is always the distance between our lsb
1262      and that of OP0.  */
1263 
1264   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1265      we must first convert its mode to MODE.  */
1266 
1267   if (CONST_INT_P (value))
1268     {
1269       unsigned HOST_WIDE_INT v = UINTVAL (value);
1270 
1271       if (bitsize < HOST_BITS_PER_WIDE_INT)
1272 	v &= (HOST_WIDE_INT_1U << bitsize) - 1;
1273 
1274       if (v == 0)
1275 	all_zero = 1;
1276       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1277 		&& v == (HOST_WIDE_INT_1U << bitsize) - 1)
1278 	       || (bitsize == HOST_BITS_PER_WIDE_INT
1279 		   && v == HOST_WIDE_INT_M1U))
1280 	all_one = 1;
1281 
1282       value = lshift_value (mode, v, bitnum);
1283     }
1284   else
1285     {
1286       int must_and = (GET_MODE_BITSIZE (value_mode) != bitsize
1287 		      && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1288 
1289       if (value_mode != mode)
1290 	value = convert_to_mode (mode, value, 1);
1291 
1292       if (must_and)
1293 	value = expand_binop (mode, and_optab, value,
1294 			      mask_rtx (mode, 0, bitsize, 0),
1295 			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1296       if (bitnum > 0)
1297 	value = expand_shift (LSHIFT_EXPR, mode, value,
1298 			      bitnum, NULL_RTX, 1);
1299     }
1300 
1301   if (reverse)
1302     value = flip_storage_order (mode, value);
1303 
1304   /* Now clear the chosen bits in OP0,
1305      except that if VALUE is -1 we need not bother.  */
1306   /* We keep the intermediates in registers to allow CSE to combine
1307      consecutive bitfield assignments.  */
1308 
1309   temp = force_reg (mode, op0);
1310 
1311   if (! all_one)
1312     {
1313       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1314       if (reverse)
1315 	mask = flip_storage_order (mode, mask);
1316       temp = expand_binop (mode, and_optab, temp, mask,
1317 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1318       temp = force_reg (mode, temp);
1319     }
1320 
1321   /* Now logical-or VALUE into OP0, unless it is zero.  */
1322 
1323   if (! all_zero)
1324     {
1325       temp = expand_binop (mode, ior_optab, temp, value,
1326 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1327       temp = force_reg (mode, temp);
1328     }
1329 
1330   if (op0 != temp)
1331     {
1332       op0 = copy_rtx (op0);
1333       emit_move_insn (op0, temp);
1334     }
1335 }
1336 
1337 /* Store a bit field that is split across multiple accessible memory objects.
1338 
1339    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1340    BITSIZE is the field width; BITPOS the position of its first bit
1341    (within the word).
1342    VALUE is the value to store, which has mode VALUE_MODE.
1343    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
1344    a BLKmode MEM.
1345 
1346    If REVERSE is true, the store is to be done in reverse order.
1347 
1348    This does not yet handle fields wider than BITS_PER_WORD.  */
1349 
1350 static void
store_split_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,poly_uint64 bitregion_start,poly_uint64 bitregion_end,rtx value,scalar_int_mode value_mode,bool reverse)1351 store_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1352 		       unsigned HOST_WIDE_INT bitsize,
1353 		       unsigned HOST_WIDE_INT bitpos,
1354 		       poly_uint64 bitregion_start, poly_uint64 bitregion_end,
1355 		       rtx value, scalar_int_mode value_mode, bool reverse)
1356 {
1357   unsigned int unit, total_bits, bitsdone = 0;
1358 
1359   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1360      much at a time.  */
1361   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1362     unit = BITS_PER_WORD;
1363   else
1364     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1365 
1366   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1367      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1368      again, and we will mutually recurse forever.  */
1369   if (MEM_P (op0) && op0_mode.exists ())
1370     unit = MIN (unit, GET_MODE_BITSIZE (op0_mode.require ()));
1371 
1372   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1373      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1374      that VALUE might be a floating-point constant.  */
1375   if (CONSTANT_P (value) && !CONST_INT_P (value))
1376     {
1377       rtx word = gen_lowpart_common (word_mode, value);
1378 
1379       if (word && (value != word))
1380 	value = word;
1381       else
1382 	value = gen_lowpart_common (word_mode, force_reg (value_mode, value));
1383       value_mode = word_mode;
1384     }
1385 
1386   total_bits = GET_MODE_BITSIZE (value_mode);
1387 
1388   while (bitsdone < bitsize)
1389     {
1390       unsigned HOST_WIDE_INT thissize;
1391       unsigned HOST_WIDE_INT thispos;
1392       unsigned HOST_WIDE_INT offset;
1393       rtx part;
1394 
1395       offset = (bitpos + bitsdone) / unit;
1396       thispos = (bitpos + bitsdone) % unit;
1397 
1398       /* When region of bytes we can touch is restricted, decrease
1399 	 UNIT close to the end of the region as needed.  If op0 is a REG
1400 	 or SUBREG of REG, don't do this, as there can't be data races
1401 	 on a register and we can expand shorter code in some cases.  */
1402       if (maybe_ne (bitregion_end, 0U)
1403 	  && unit > BITS_PER_UNIT
1404 	  && maybe_gt (bitpos + bitsdone - thispos + unit, bitregion_end + 1)
1405 	  && !REG_P (op0)
1406 	  && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1407 	{
1408 	  unit = unit / 2;
1409 	  continue;
1410 	}
1411 
1412       /* THISSIZE must not overrun a word boundary.  Otherwise,
1413 	 store_fixed_bit_field will call us again, and we will mutually
1414 	 recurse forever.  */
1415       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1416       thissize = MIN (thissize, unit - thispos);
1417 
1418       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1419 	{
1420 	  /* Fetch successively less significant portions.  */
1421 	  if (CONST_INT_P (value))
1422 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1423 			     >> (bitsize - bitsdone - thissize))
1424 			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1425           /* Likewise, but the source is little-endian.  */
1426           else if (reverse)
1427 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1428 					    thissize,
1429 					    bitsize - bitsdone - thissize,
1430 					    NULL_RTX, 1, false);
1431 	  else
1432 	    /* The args are chosen so that the last part includes the
1433 	       lsb.  Give extract_bit_field the value it needs (with
1434 	       endianness compensation) to fetch the piece we want.  */
1435 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1436 					    thissize,
1437 					    total_bits - bitsize + bitsdone,
1438 					    NULL_RTX, 1, false);
1439 	}
1440       else
1441 	{
1442 	  /* Fetch successively more significant portions.  */
1443 	  if (CONST_INT_P (value))
1444 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1445 			     >> bitsdone)
1446 			    & ((HOST_WIDE_INT_1 << thissize) - 1));
1447 	  /* Likewise, but the source is big-endian.  */
1448           else if (reverse)
1449 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1450 					    thissize,
1451 					    total_bits - bitsdone - thissize,
1452 					    NULL_RTX, 1, false);
1453 	  else
1454 	    part = extract_fixed_bit_field (word_mode, value, value_mode,
1455 					    thissize, bitsdone, NULL_RTX,
1456 					    1, false);
1457 	}
1458 
1459       /* If OP0 is a register, then handle OFFSET here.  */
1460       rtx op0_piece = op0;
1461       opt_scalar_int_mode op0_piece_mode = op0_mode;
1462       if (SUBREG_P (op0) || REG_P (op0))
1463 	{
1464 	  scalar_int_mode imode;
1465 	  if (op0_mode.exists (&imode)
1466 	      && GET_MODE_SIZE (imode) < UNITS_PER_WORD)
1467 	    {
1468 	      if (offset)
1469 		op0_piece = const0_rtx;
1470 	    }
1471 	  else
1472 	    {
1473 	      op0_piece = operand_subword_force (op0,
1474 						 offset * unit / BITS_PER_WORD,
1475 						 GET_MODE (op0));
1476 	      op0_piece_mode = word_mode;
1477 	    }
1478 	  offset &= BITS_PER_WORD / unit - 1;
1479 	}
1480 
1481       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1482 	 it is just an out-of-bounds access.  Ignore it.  */
1483       if (op0_piece != const0_rtx)
1484 	store_fixed_bit_field (op0_piece, op0_piece_mode, thissize,
1485 			       offset * unit + thispos, bitregion_start,
1486 			       bitregion_end, part, word_mode, reverse);
1487       bitsdone += thissize;
1488     }
1489 }
1490 
1491 /* A subroutine of extract_bit_field_1 that converts return value X
1492    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1493    to extract_bit_field.  */
1494 
1495 static rtx
convert_extracted_bit_field(rtx x,machine_mode mode,machine_mode tmode,bool unsignedp)1496 convert_extracted_bit_field (rtx x, machine_mode mode,
1497 			     machine_mode tmode, bool unsignedp)
1498 {
1499   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1500     return x;
1501 
1502   /* If the x mode is not a scalar integral, first convert to the
1503      integer mode of that size and then access it as a floating-point
1504      value via a SUBREG.  */
1505   if (!SCALAR_INT_MODE_P (tmode))
1506     {
1507       scalar_int_mode int_mode = int_mode_for_mode (tmode).require ();
1508       x = convert_to_mode (int_mode, x, unsignedp);
1509       x = force_reg (int_mode, x);
1510       return gen_lowpart (tmode, x);
1511     }
1512 
1513   return convert_to_mode (tmode, x, unsignedp);
1514 }
1515 
1516 /* Try to use an ext(z)v pattern to extract a field from OP0.
1517    Return the extracted value on success, otherwise return null.
1518    EXTV describes the extraction instruction to use.  If OP0_MODE
1519    is defined, it is the mode of OP0, otherwise OP0 is a BLKmode MEM.
1520    The other arguments are as for extract_bit_field.  */
1521 
1522 static rtx
extract_bit_field_using_extv(const extraction_insn * extv,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode)1523 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1524 			      opt_scalar_int_mode op0_mode,
1525 			      unsigned HOST_WIDE_INT bitsize,
1526 			      unsigned HOST_WIDE_INT bitnum,
1527 			      int unsignedp, rtx target,
1528 			      machine_mode mode, machine_mode tmode)
1529 {
1530   struct expand_operand ops[4];
1531   rtx spec_target = target;
1532   rtx spec_target_subreg = 0;
1533   scalar_int_mode ext_mode = extv->field_mode;
1534   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1535 
1536   if (bitsize == 0 || unit < bitsize)
1537     return NULL_RTX;
1538 
1539   if (MEM_P (op0))
1540     /* Get a reference to the first byte of the field.  */
1541     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1542 				&bitnum);
1543   else
1544     {
1545       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1546       if (BYTES_BIG_ENDIAN)
1547 	bitnum += unit - GET_MODE_BITSIZE (op0_mode.require ());
1548 
1549       /* If op0 is a register, we need it in EXT_MODE to make it
1550 	 acceptable to the format of ext(z)v.  */
1551       if (GET_CODE (op0) == SUBREG && op0_mode.require () != ext_mode)
1552 	return NULL_RTX;
1553       if (REG_P (op0) && op0_mode.require () != ext_mode)
1554 	op0 = gen_lowpart_SUBREG (ext_mode, op0);
1555     }
1556 
1557   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1558      "backwards" from the size of the unit we are extracting from.
1559      Otherwise, we count bits from the most significant on a
1560      BYTES/BITS_BIG_ENDIAN machine.  */
1561 
1562   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1563     bitnum = unit - bitsize - bitnum;
1564 
1565   if (target == 0)
1566     target = spec_target = gen_reg_rtx (tmode);
1567 
1568   if (GET_MODE (target) != ext_mode)
1569     {
1570       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1571 	 between the mode of the extraction (word_mode) and the target
1572 	 mode.  Instead, create a temporary and use convert_move to set
1573 	 the target.  */
1574       if (REG_P (target)
1575 	  && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1576 	{
1577 	  target = gen_lowpart (ext_mode, target);
1578 	  if (partial_subreg_p (GET_MODE (spec_target), ext_mode))
1579 	    spec_target_subreg = target;
1580 	}
1581       else
1582 	target = gen_reg_rtx (ext_mode);
1583     }
1584 
1585   create_output_operand (&ops[0], target, ext_mode);
1586   create_fixed_operand (&ops[1], op0);
1587   create_integer_operand (&ops[2], bitsize);
1588   create_integer_operand (&ops[3], bitnum);
1589   if (maybe_expand_insn (extv->icode, 4, ops))
1590     {
1591       target = ops[0].value;
1592       if (target == spec_target)
1593 	return target;
1594       if (target == spec_target_subreg)
1595 	return spec_target;
1596       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1597     }
1598   return NULL_RTX;
1599 }
1600 
1601 /* See whether it would be valid to extract the part of OP0 described
1602    by BITNUM and BITSIZE into a value of mode MODE using a subreg
1603    operation.  Return the subreg if so, otherwise return null.  */
1604 
1605 static rtx
extract_bit_field_as_subreg(machine_mode mode,rtx op0,poly_uint64 bitsize,poly_uint64 bitnum)1606 extract_bit_field_as_subreg (machine_mode mode, rtx op0,
1607 			     poly_uint64 bitsize, poly_uint64 bitnum)
1608 {
1609   poly_uint64 bytenum;
1610   if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
1611       && known_eq (bitsize, GET_MODE_BITSIZE (mode))
1612       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1613       && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
1614     return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
1615   return NULL_RTX;
1616 }
1617 
1618 /* A subroutine of extract_bit_field, with the same arguments.
1619    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1620    if we can find no other means of implementing the operation.
1621    if FALLBACK_P is false, return NULL instead.  */
1622 
1623 static rtx
extract_bit_field_1(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p,rtx * alt_rtl)1624 extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
1625 		     int unsignedp, rtx target, machine_mode mode,
1626 		     machine_mode tmode, bool reverse, bool fallback_p,
1627 		     rtx *alt_rtl)
1628 {
1629   rtx op0 = str_rtx;
1630   machine_mode mode1;
1631 
1632   if (tmode == VOIDmode)
1633     tmode = mode;
1634 
1635   while (GET_CODE (op0) == SUBREG)
1636     {
1637       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1638       op0 = SUBREG_REG (op0);
1639     }
1640 
1641   /* If we have an out-of-bounds access to a register, just return an
1642      uninitialized register of the required mode.  This can occur if the
1643      source code contains an out-of-bounds access to a small array.  */
1644   if (REG_P (op0) && known_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
1645     return gen_reg_rtx (tmode);
1646 
1647   if (REG_P (op0)
1648       && mode == GET_MODE (op0)
1649       && known_eq (bitnum, 0U)
1650       && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
1651     {
1652       if (reverse)
1653 	op0 = flip_storage_order (mode, op0);
1654       /* We're trying to extract a full register from itself.  */
1655       return op0;
1656     }
1657 
1658   /* First try to check for vector from vector extractions.  */
1659   if (VECTOR_MODE_P (GET_MODE (op0))
1660       && !MEM_P (op0)
1661       && VECTOR_MODE_P (tmode)
1662       && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
1663       && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
1664     {
1665       machine_mode new_mode = GET_MODE (op0);
1666       if (GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode))
1667 	{
1668 	  scalar_mode inner_mode = GET_MODE_INNER (tmode);
1669 	  poly_uint64 nunits;
1670 	  if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
1671 			   GET_MODE_UNIT_BITSIZE (tmode), &nunits)
1672 	      || !mode_for_vector (inner_mode, nunits).exists (&new_mode)
1673 	      || !VECTOR_MODE_P (new_mode)
1674 	      || maybe_ne (GET_MODE_SIZE (new_mode),
1675 			   GET_MODE_SIZE (GET_MODE (op0)))
1676 	      || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
1677 	      || !targetm.vector_mode_supported_p (new_mode))
1678 	    new_mode = VOIDmode;
1679 	}
1680       poly_uint64 pos;
1681       if (new_mode != VOIDmode
1682 	  && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
1683 	      != CODE_FOR_nothing)
1684 	  && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
1685 	{
1686 	  struct expand_operand ops[3];
1687 	  machine_mode outermode = new_mode;
1688 	  machine_mode innermode = tmode;
1689 	  enum insn_code icode
1690 	    = convert_optab_handler (vec_extract_optab, outermode, innermode);
1691 
1692 	  if (new_mode != GET_MODE (op0))
1693 	    op0 = gen_lowpart (new_mode, op0);
1694 	  create_output_operand (&ops[0], target, innermode);
1695 	  ops[0].target = 1;
1696 	  create_input_operand (&ops[1], op0, outermode);
1697 	  create_integer_operand (&ops[2], pos);
1698 	  if (maybe_expand_insn (icode, 3, ops))
1699 	    {
1700 	      if (alt_rtl && ops[0].target)
1701 		*alt_rtl = target;
1702 	      target = ops[0].value;
1703 	      if (GET_MODE (target) != mode)
1704 		return gen_lowpart (tmode, target);
1705 	      return target;
1706 	    }
1707 	}
1708     }
1709 
1710   /* See if we can get a better vector mode before extracting.  */
1711   if (VECTOR_MODE_P (GET_MODE (op0))
1712       && !MEM_P (op0)
1713       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1714     {
1715       machine_mode new_mode;
1716 
1717       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1718 	new_mode = MIN_MODE_VECTOR_FLOAT;
1719       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1720 	new_mode = MIN_MODE_VECTOR_FRACT;
1721       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1722 	new_mode = MIN_MODE_VECTOR_UFRACT;
1723       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1724 	new_mode = MIN_MODE_VECTOR_ACCUM;
1725       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1726 	new_mode = MIN_MODE_VECTOR_UACCUM;
1727       else
1728 	new_mode = MIN_MODE_VECTOR_INT;
1729 
1730       FOR_EACH_MODE_FROM (new_mode, new_mode)
1731 	if (known_eq (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (GET_MODE (op0)))
1732 	    && known_eq (GET_MODE_UNIT_SIZE (new_mode), GET_MODE_SIZE (tmode))
1733 	    && targetm.vector_mode_supported_p (new_mode))
1734 	  break;
1735       if (new_mode != VOIDmode)
1736 	op0 = gen_lowpart (new_mode, op0);
1737     }
1738 
1739   /* Use vec_extract patterns for extracting parts of vectors whenever
1740      available.  If that fails, see whether the current modes and bitregion
1741      give a natural subreg.  */
1742   machine_mode outermode = GET_MODE (op0);
1743   if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
1744     {
1745       scalar_mode innermode = GET_MODE_INNER (outermode);
1746       enum insn_code icode
1747 	= convert_optab_handler (vec_extract_optab, outermode, innermode);
1748       poly_uint64 pos;
1749       if (icode != CODE_FOR_nothing
1750 	  && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
1751 	  && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
1752 	{
1753 	  struct expand_operand ops[3];
1754 
1755 	  create_output_operand (&ops[0], target, innermode);
1756 	  ops[0].target = 1;
1757 	  create_input_operand (&ops[1], op0, outermode);
1758 	  create_integer_operand (&ops[2], pos);
1759 	  if (maybe_expand_insn (icode, 3, ops))
1760 	    {
1761 	      if (alt_rtl && ops[0].target)
1762 		*alt_rtl = target;
1763 	      target = ops[0].value;
1764 	      if (GET_MODE (target) != mode)
1765 		return gen_lowpart (tmode, target);
1766 	      return target;
1767 	    }
1768 	}
1769       /* Using subregs is useful if we're extracting one register vector
1770 	 from a multi-register vector.  extract_bit_field_as_subreg checks
1771 	 for valid bitsize and bitnum, so we don't need to do that here.  */
1772       if (VECTOR_MODE_P (mode))
1773 	{
1774 	  rtx sub = extract_bit_field_as_subreg (mode, op0, bitsize, bitnum);
1775 	  if (sub)
1776 	    return sub;
1777 	}
1778     }
1779 
1780   /* Make sure we are playing with integral modes.  Pun with subregs
1781      if we aren't.  */
1782   opt_scalar_int_mode op0_mode = int_mode_for_mode (GET_MODE (op0));
1783   scalar_int_mode imode;
1784   if (!op0_mode.exists (&imode) || imode != GET_MODE (op0))
1785     {
1786       if (MEM_P (op0))
1787 	op0 = adjust_bitfield_address_size (op0, op0_mode.else_blk (),
1788 					    0, MEM_SIZE (op0));
1789       else if (op0_mode.exists (&imode))
1790 	{
1791 	  op0 = gen_lowpart (imode, op0);
1792 
1793 	  /* If we got a SUBREG, force it into a register since we
1794 	     aren't going to be able to do another SUBREG on it.  */
1795 	  if (GET_CODE (op0) == SUBREG)
1796 	    op0 = force_reg (imode, op0);
1797 	}
1798       else
1799 	{
1800 	  poly_int64 size = GET_MODE_SIZE (GET_MODE (op0));
1801 	  rtx mem = assign_stack_temp (GET_MODE (op0), size);
1802 	  emit_move_insn (mem, op0);
1803 	  op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1804 	}
1805     }
1806 
1807   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1808      If that's wrong, the solution is to test for it and set TARGET to 0
1809      if needed.  */
1810 
1811   /* Get the mode of the field to use for atomic access or subreg
1812      conversion.  */
1813   if (!SCALAR_INT_MODE_P (tmode)
1814       || !mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0).exists (&mode1))
1815     mode1 = mode;
1816   gcc_assert (mode1 != BLKmode);
1817 
1818   /* Extraction of a full MODE1 value can be done with a subreg as long
1819      as the least significant bit of the value is the least significant
1820      bit of either OP0 or a word of OP0.  */
1821   if (!MEM_P (op0) && !reverse)
1822     {
1823       rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
1824       if (sub)
1825 	return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1826     }
1827 
1828   /* Extraction of a full MODE1 value can be done with a load as long as
1829      the field is on a byte boundary and is sufficiently aligned.  */
1830   poly_uint64 bytenum;
1831   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1, &bytenum))
1832     {
1833       op0 = adjust_bitfield_address (op0, mode1, bytenum);
1834       if (reverse)
1835 	op0 = flip_storage_order (mode1, op0);
1836       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1837     }
1838 
1839   /* If we have a memory source and a non-constant bit offset, restrict
1840      the memory to the referenced bytes.  This is a worst-case fallback
1841      but is useful for things like vector booleans.  */
1842   if (MEM_P (op0) && !bitnum.is_constant ())
1843     {
1844       bytenum = bits_to_bytes_round_down (bitnum);
1845       bitnum = num_trailing_bits (bitnum);
1846       poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
1847       op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
1848       op0_mode = opt_scalar_int_mode ();
1849     }
1850 
1851   /* It's possible we'll need to handle other cases here for
1852      polynomial bitnum and bitsize.  */
1853 
1854   /* From here on we need to be looking at a fixed-size insertion.  */
1855   return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
1856 				     bitnum.to_constant (), unsignedp,
1857 				     target, mode, tmode, reverse, fallback_p);
1858 }
1859 
1860 /* Subroutine of extract_bit_field_1, with the same arguments, except
1861    that BITSIZE and BITNUM are constant.  Handle cases specific to
1862    integral modes.  If OP0_MODE is defined, it is the mode of OP0,
1863    otherwise OP0 is a BLKmode MEM.  */
1864 
1865 static rtx
extract_integral_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p)1866 extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
1867 			    unsigned HOST_WIDE_INT bitsize,
1868 			    unsigned HOST_WIDE_INT bitnum, int unsignedp,
1869 			    rtx target, machine_mode mode, machine_mode tmode,
1870 			    bool reverse, bool fallback_p)
1871 {
1872   /* Handle fields bigger than a word.  */
1873 
1874   if (bitsize > BITS_PER_WORD)
1875     {
1876       /* Here we transfer the words of the field
1877 	 in the order least significant first.
1878 	 This is because the most significant word is the one which may
1879 	 be less than full.  */
1880 
1881       const bool backwards = WORDS_BIG_ENDIAN;
1882       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1883       unsigned int i;
1884       rtx_insn *last;
1885 
1886       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1887 	target = gen_reg_rtx (mode);
1888 
1889       /* In case we're about to clobber a base register or something
1890 	 (see gcc.c-torture/execute/20040625-1.c).   */
1891       if (reg_mentioned_p (target, op0))
1892 	target = gen_reg_rtx (mode);
1893 
1894       /* Indicate for flow that the entire target reg is being set.  */
1895       emit_clobber (target);
1896 
1897       /* The mode must be fixed-size, since extract_bit_field_1 handles
1898 	 extractions from variable-sized objects before calling this
1899 	 function.  */
1900       unsigned int target_size
1901 	= GET_MODE_SIZE (GET_MODE (target)).to_constant ();
1902       last = get_last_insn ();
1903       for (i = 0; i < nwords; i++)
1904 	{
1905 	  /* If I is 0, use the low-order word in both field and target;
1906 	     if I is 1, use the next to lowest word; and so on.  */
1907 	  /* Word number in TARGET to use.  */
1908 	  unsigned int wordnum
1909 	    = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
1910 	  /* Offset from start of field in OP0.  */
1911 	  unsigned int bit_offset = (backwards ^ reverse
1912 				     ? MAX ((int) bitsize - ((int) i + 1)
1913 					    * BITS_PER_WORD,
1914 					    0)
1915 				     : (int) i * BITS_PER_WORD);
1916 	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1917 	  rtx result_part
1918 	    = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1919 					     bitsize - i * BITS_PER_WORD),
1920 				   bitnum + bit_offset, 1, target_part,
1921 				   mode, word_mode, reverse, fallback_p, NULL);
1922 
1923 	  gcc_assert (target_part);
1924 	  if (!result_part)
1925 	    {
1926 	      delete_insns_since (last);
1927 	      return NULL;
1928 	    }
1929 
1930 	  if (result_part != target_part)
1931 	    emit_move_insn (target_part, result_part);
1932 	}
1933 
1934       if (unsignedp)
1935 	{
1936 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1937 	     need to be zero'd out.  */
1938 	  if (target_size > nwords * UNITS_PER_WORD)
1939 	    {
1940 	      unsigned int i, total_words;
1941 
1942 	      total_words = target_size / UNITS_PER_WORD;
1943 	      for (i = nwords; i < total_words; i++)
1944 		emit_move_insn
1945 		  (operand_subword (target,
1946 				    backwards ? total_words - i - 1 : i,
1947 				    1, VOIDmode),
1948 		   const0_rtx);
1949 	    }
1950 	  return target;
1951 	}
1952 
1953       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1954       target = expand_shift (LSHIFT_EXPR, mode, target,
1955 			     GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1956       return expand_shift (RSHIFT_EXPR, mode, target,
1957 			   GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1958     }
1959 
1960   /* If OP0 is a multi-word register, narrow it to the affected word.
1961      If the region spans two words, defer to extract_split_bit_field.  */
1962   if (!MEM_P (op0) && GET_MODE_SIZE (op0_mode.require ()) > UNITS_PER_WORD)
1963     {
1964       if (bitnum % BITS_PER_WORD + bitsize > BITS_PER_WORD)
1965 	{
1966 	  if (!fallback_p)
1967 	    return NULL_RTX;
1968 	  target = extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
1969 					    unsignedp, reverse);
1970 	  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1971 	}
1972       op0 = simplify_gen_subreg (word_mode, op0, op0_mode.require (),
1973 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1974       op0_mode = word_mode;
1975       bitnum %= BITS_PER_WORD;
1976     }
1977 
1978   /* From here on we know the desired field is smaller than a word.
1979      If OP0 is a register, it too fits within a word.  */
1980   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1981   extraction_insn extv;
1982   if (!MEM_P (op0)
1983       && !reverse
1984       /* ??? We could limit the structure size to the part of OP0 that
1985 	 contains the field, with appropriate checks for endianness
1986 	 and TARGET_TRULY_NOOP_TRUNCATION.  */
1987       && get_best_reg_extraction_insn (&extv, pattern,
1988 				       GET_MODE_BITSIZE (op0_mode.require ()),
1989 				       tmode))
1990     {
1991       rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
1992 						 bitsize, bitnum,
1993 						 unsignedp, target, mode,
1994 						 tmode);
1995       if (result)
1996 	return result;
1997     }
1998 
1999   /* If OP0 is a memory, try copying it to a register and seeing if a
2000      cheap register alternative is available.  */
2001   if (MEM_P (op0) & !reverse)
2002     {
2003       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
2004 					tmode))
2005 	{
2006 	  rtx result = extract_bit_field_using_extv (&extv, op0, op0_mode,
2007 						     bitsize, bitnum,
2008 						     unsignedp, target, mode,
2009 						     tmode);
2010 	  if (result)
2011 	    return result;
2012 	}
2013 
2014       rtx_insn *last = get_last_insn ();
2015 
2016       /* Try loading part of OP0 into a register and extracting the
2017 	 bitfield from that.  */
2018       unsigned HOST_WIDE_INT bitpos;
2019       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
2020 					       0, 0, tmode, &bitpos);
2021       if (xop0)
2022 	{
2023 	  xop0 = copy_to_reg (xop0);
2024 	  rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
2025 					    unsignedp, target,
2026 					    mode, tmode, reverse, false, NULL);
2027 	  if (result)
2028 	    return result;
2029 	  delete_insns_since (last);
2030 	}
2031     }
2032 
2033   if (!fallback_p)
2034     return NULL;
2035 
2036   /* Find a correspondingly-sized integer field, so we can apply
2037      shifts and masks to it.  */
2038   scalar_int_mode int_mode;
2039   if (!int_mode_for_mode (tmode).exists (&int_mode))
2040     /* If this fails, we should probably push op0 out to memory and then
2041        do a load.  */
2042     int_mode = int_mode_for_mode (mode).require ();
2043 
2044   target = extract_fixed_bit_field (int_mode, op0, op0_mode, bitsize,
2045 				    bitnum, target, unsignedp, reverse);
2046 
2047   /* Complex values must be reversed piecewise, so we need to undo the global
2048      reversal, convert to the complex mode and reverse again.  */
2049   if (reverse && COMPLEX_MODE_P (tmode))
2050     {
2051       target = flip_storage_order (int_mode, target);
2052       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2053       target = flip_storage_order (tmode, target);
2054     }
2055   else
2056     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
2057 
2058   return target;
2059 }
2060 
2061 /* Generate code to extract a byte-field from STR_RTX
2062    containing BITSIZE bits, starting at BITNUM,
2063    and put it in TARGET if possible (if TARGET is nonzero).
2064    Regardless of TARGET, we return the rtx for where the value is placed.
2065 
2066    STR_RTX is the structure containing the byte (a REG or MEM).
2067    UNSIGNEDP is nonzero if this is an unsigned bit field.
2068    MODE is the natural mode of the field value once extracted.
2069    TMODE is the mode the caller would like the value to have;
2070    but the value may be returned with type MODE instead.
2071 
2072    If REVERSE is true, the extraction is to be done in reverse order.
2073 
2074    If a TARGET is specified and we can store in it at no extra cost,
2075    we do so, and return TARGET.
2076    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
2077    if they are equally easy.  */
2078 
2079 rtx
extract_bit_field(rtx str_rtx,poly_uint64 bitsize,poly_uint64 bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,rtx * alt_rtl)2080 extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
2081 		   int unsignedp, rtx target, machine_mode mode,
2082 		   machine_mode tmode, bool reverse, rtx *alt_rtl)
2083 {
2084   machine_mode mode1;
2085 
2086   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
2087   if (maybe_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
2088     mode1 = GET_MODE (str_rtx);
2089   else if (target && maybe_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
2090     mode1 = GET_MODE (target);
2091   else
2092     mode1 = tmode;
2093 
2094   unsigned HOST_WIDE_INT ibitsize, ibitnum;
2095   scalar_int_mode int_mode;
2096   if (bitsize.is_constant (&ibitsize)
2097       && bitnum.is_constant (&ibitnum)
2098       && is_a <scalar_int_mode> (mode1, &int_mode)
2099       && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
2100 				     int_mode, 0, 0))
2101     {
2102       /* Extraction of a full INT_MODE value can be done with a simple load.
2103 	 We know here that the field can be accessed with one single
2104 	 instruction.  For targets that support unaligned memory,
2105 	 an unaligned access may be necessary.  */
2106       if (ibitsize == GET_MODE_BITSIZE (int_mode))
2107 	{
2108 	  rtx result = adjust_bitfield_address (str_rtx, int_mode,
2109 						ibitnum / BITS_PER_UNIT);
2110 	  if (reverse)
2111 	    result = flip_storage_order (int_mode, result);
2112 	  gcc_assert (ibitnum % BITS_PER_UNIT == 0);
2113 	  return convert_extracted_bit_field (result, mode, tmode, unsignedp);
2114 	}
2115 
2116       str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
2117 				      &ibitnum);
2118       gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
2119       str_rtx = copy_to_reg (str_rtx);
2120       return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
2121 				  target, mode, tmode, reverse, true, alt_rtl);
2122     }
2123 
2124   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
2125 			      target, mode, tmode, reverse, true, alt_rtl);
2126 }
2127 
2128 /* Use shifts and boolean operations to extract a field of BITSIZE bits
2129    from bit BITNUM of OP0.  If OP0_MODE is defined, it is the mode of OP0,
2130    otherwise OP0 is a BLKmode MEM.
2131 
2132    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
2133    If REVERSE is true, the extraction is to be done in reverse order.
2134 
2135    If TARGET is nonzero, attempts to store the value there
2136    and return TARGET, but this is not guaranteed.
2137    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
2138 
2139 static rtx
extract_fixed_bit_field(machine_mode tmode,rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)2140 extract_fixed_bit_field (machine_mode tmode, rtx op0,
2141 			 opt_scalar_int_mode op0_mode,
2142 			 unsigned HOST_WIDE_INT bitsize,
2143 			 unsigned HOST_WIDE_INT bitnum, rtx target,
2144 			 int unsignedp, bool reverse)
2145 {
2146   scalar_int_mode mode;
2147   if (MEM_P (op0))
2148     {
2149       if (!get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
2150 			  BITS_PER_WORD, MEM_VOLATILE_P (op0), &mode))
2151 	/* The only way this should occur is if the field spans word
2152 	   boundaries.  */
2153 	return extract_split_bit_field (op0, op0_mode, bitsize, bitnum,
2154 					unsignedp, reverse);
2155 
2156       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
2157     }
2158   else
2159     mode = op0_mode.require ();
2160 
2161   return extract_fixed_bit_field_1 (tmode, op0, mode, bitsize, bitnum,
2162 				    target, unsignedp, reverse);
2163 }
2164 
2165 /* Helper function for extract_fixed_bit_field, extracts
2166    the bit field always using MODE, which is the mode of OP0.
2167    The other arguments are as for extract_fixed_bit_field.  */
2168 
2169 static rtx
extract_fixed_bit_field_1(machine_mode tmode,rtx op0,scalar_int_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)2170 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0, scalar_int_mode mode,
2171 			   unsigned HOST_WIDE_INT bitsize,
2172 			   unsigned HOST_WIDE_INT bitnum, rtx target,
2173 			   int unsignedp, bool reverse)
2174 {
2175   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2176      for invalid input, such as extract equivalent of f5 from
2177      gcc.dg/pr48335-2.c.  */
2178 
2179   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2180     /* BITNUM is the distance between our msb and that of OP0.
2181        Convert it to the distance from the lsb.  */
2182     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2183 
2184   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2185      We have reduced the big-endian case to the little-endian case.  */
2186   if (reverse)
2187     op0 = flip_storage_order (mode, op0);
2188 
2189   if (unsignedp)
2190     {
2191       if (bitnum)
2192 	{
2193 	  /* If the field does not already start at the lsb,
2194 	     shift it so it does.  */
2195 	  /* Maybe propagate the target for the shift.  */
2196 	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2197 	  if (tmode != mode)
2198 	    subtarget = 0;
2199 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2200 	}
2201       /* Convert the value to the desired mode.  TMODE must also be a
2202 	 scalar integer for this conversion to make sense, since we
2203 	 shouldn't reinterpret the bits.  */
2204       scalar_int_mode new_mode = as_a <scalar_int_mode> (tmode);
2205       if (mode != new_mode)
2206 	op0 = convert_to_mode (new_mode, op0, 1);
2207 
2208       /* Unless the msb of the field used to be the msb when we shifted,
2209 	 mask out the upper bits.  */
2210 
2211       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2212 	return expand_binop (new_mode, and_optab, op0,
2213 			     mask_rtx (new_mode, 0, bitsize, 0),
2214 			     target, 1, OPTAB_LIB_WIDEN);
2215       return op0;
2216     }
2217 
2218   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2219      then arithmetic-shift its lsb to the lsb of the word.  */
2220   op0 = force_reg (mode, op0);
2221 
2222   /* Find the narrowest integer mode that contains the field.  */
2223 
2224   opt_scalar_int_mode mode_iter;
2225   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
2226     if (GET_MODE_BITSIZE (mode_iter.require ()) >= bitsize + bitnum)
2227       break;
2228 
2229   mode = mode_iter.require ();
2230   op0 = convert_to_mode (mode, op0, 0);
2231 
2232   if (mode != tmode)
2233     target = 0;
2234 
2235   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2236     {
2237       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2238       /* Maybe propagate the target for the shift.  */
2239       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2240       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2241     }
2242 
2243   return expand_shift (RSHIFT_EXPR, mode, op0,
2244 		       GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2245 }
2246 
2247 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2248    VALUE << BITPOS.  */
2249 
2250 static rtx
lshift_value(machine_mode mode,unsigned HOST_WIDE_INT value,int bitpos)2251 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2252 	      int bitpos)
2253 {
2254   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2255 }
2256 
2257 /* Extract a bit field that is split across two words
2258    and return an RTX for the result.
2259 
2260    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2261    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2262    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2263    If OP0_MODE is defined, it is the mode of OP0, otherwise OP0 is
2264    a BLKmode MEM.
2265 
2266    If REVERSE is true, the extraction is to be done in reverse order.  */
2267 
2268 static rtx
extract_split_bit_field(rtx op0,opt_scalar_int_mode op0_mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,int unsignedp,bool reverse)2269 extract_split_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
2270 			 unsigned HOST_WIDE_INT bitsize,
2271 			 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2272 			 bool reverse)
2273 {
2274   unsigned int unit;
2275   unsigned int bitsdone = 0;
2276   rtx result = NULL_RTX;
2277   int first = 1;
2278 
2279   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2280      much at a time.  */
2281   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2282     unit = BITS_PER_WORD;
2283   else
2284     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2285 
2286   while (bitsdone < bitsize)
2287     {
2288       unsigned HOST_WIDE_INT thissize;
2289       rtx part;
2290       unsigned HOST_WIDE_INT thispos;
2291       unsigned HOST_WIDE_INT offset;
2292 
2293       offset = (bitpos + bitsdone) / unit;
2294       thispos = (bitpos + bitsdone) % unit;
2295 
2296       /* THISSIZE must not overrun a word boundary.  Otherwise,
2297 	 extract_fixed_bit_field will call us again, and we will mutually
2298 	 recurse forever.  */
2299       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2300       thissize = MIN (thissize, unit - thispos);
2301 
2302       /* If OP0 is a register, then handle OFFSET here.  */
2303       rtx op0_piece = op0;
2304       opt_scalar_int_mode op0_piece_mode = op0_mode;
2305       if (SUBREG_P (op0) || REG_P (op0))
2306 	{
2307 	  op0_piece = operand_subword_force (op0, offset, op0_mode.require ());
2308 	  op0_piece_mode = word_mode;
2309 	  offset = 0;
2310 	}
2311 
2312       /* Extract the parts in bit-counting order,
2313 	 whose meaning is determined by BYTES_PER_UNIT.
2314 	 OFFSET is in UNITs, and UNIT is in bits.  */
2315       part = extract_fixed_bit_field (word_mode, op0_piece, op0_piece_mode,
2316 				      thissize, offset * unit + thispos,
2317 				      0, 1, reverse);
2318       bitsdone += thissize;
2319 
2320       /* Shift this part into place for the result.  */
2321       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2322 	{
2323 	  if (bitsize != bitsdone)
2324 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2325 				 bitsize - bitsdone, 0, 1);
2326 	}
2327       else
2328 	{
2329 	  if (bitsdone != thissize)
2330 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2331 				 bitsdone - thissize, 0, 1);
2332 	}
2333 
2334       if (first)
2335 	result = part;
2336       else
2337 	/* Combine the parts with bitwise or.  This works
2338 	   because we extracted each part as an unsigned bit field.  */
2339 	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2340 			       OPTAB_LIB_WIDEN);
2341 
2342       first = 0;
2343     }
2344 
2345   /* Unsigned bit field: we are done.  */
2346   if (unsignedp)
2347     return result;
2348   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2349   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2350 			 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2351   return expand_shift (RSHIFT_EXPR, word_mode, result,
2352 		       BITS_PER_WORD - bitsize, NULL_RTX, 0);
2353 }
2354 
2355 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2356    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2357    MODE, fill the upper bits with zeros.  Fail if the layout of either
2358    mode is unknown (as for CC modes) or if the extraction would involve
2359    unprofitable mode punning.  Return the value on success, otherwise
2360    return null.
2361 
2362    This is different from gen_lowpart* in these respects:
2363 
2364      - the returned value must always be considered an rvalue
2365 
2366      - when MODE is wider than SRC_MODE, the extraction involves
2367        a zero extension
2368 
2369      - when MODE is smaller than SRC_MODE, the extraction involves
2370        a truncation (and is thus subject to TARGET_TRULY_NOOP_TRUNCATION).
2371 
2372    In other words, this routine performs a computation, whereas the
2373    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2374    operations.  */
2375 
2376 rtx
extract_low_bits(machine_mode mode,machine_mode src_mode,rtx src)2377 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2378 {
2379   scalar_int_mode int_mode, src_int_mode;
2380 
2381   if (mode == src_mode)
2382     return src;
2383 
2384   if (CONSTANT_P (src))
2385     {
2386       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2387 	 fails, it will happily create (subreg (symbol_ref)) or similar
2388 	 invalid SUBREGs.  */
2389       poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
2390       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2391       if (ret)
2392 	return ret;
2393 
2394       if (GET_MODE (src) == VOIDmode
2395 	  || !validate_subreg (mode, src_mode, src, byte))
2396 	return NULL_RTX;
2397 
2398       src = force_reg (GET_MODE (src), src);
2399       return gen_rtx_SUBREG (mode, src, byte);
2400     }
2401 
2402   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2403     return NULL_RTX;
2404 
2405   if (known_eq (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
2406       && targetm.modes_tieable_p (mode, src_mode))
2407     {
2408       rtx x = gen_lowpart_common (mode, src);
2409       if (x)
2410         return x;
2411     }
2412 
2413   if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
2414       || !int_mode_for_mode (mode).exists (&int_mode))
2415     return NULL_RTX;
2416 
2417   if (!targetm.modes_tieable_p (src_int_mode, src_mode))
2418     return NULL_RTX;
2419   if (!targetm.modes_tieable_p (int_mode, mode))
2420     return NULL_RTX;
2421 
2422   src = gen_lowpart (src_int_mode, src);
2423   src = convert_modes (int_mode, src_int_mode, src, true);
2424   src = gen_lowpart (mode, src);
2425   return src;
2426 }
2427 
2428 /* Add INC into TARGET.  */
2429 
2430 void
expand_inc(rtx target,rtx inc)2431 expand_inc (rtx target, rtx inc)
2432 {
2433   rtx value = expand_binop (GET_MODE (target), add_optab,
2434 			    target, inc,
2435 			    target, 0, OPTAB_LIB_WIDEN);
2436   if (value != target)
2437     emit_move_insn (target, value);
2438 }
2439 
2440 /* Subtract DEC from TARGET.  */
2441 
2442 void
expand_dec(rtx target,rtx dec)2443 expand_dec (rtx target, rtx dec)
2444 {
2445   rtx value = expand_binop (GET_MODE (target), sub_optab,
2446 			    target, dec,
2447 			    target, 0, OPTAB_LIB_WIDEN);
2448   if (value != target)
2449     emit_move_insn (target, value);
2450 }
2451 
2452 /* Output a shift instruction for expression code CODE,
2453    with SHIFTED being the rtx for the value to shift,
2454    and AMOUNT the rtx for the amount to shift by.
2455    Store the result in the rtx TARGET, if that is convenient.
2456    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2457    Return the rtx for where the value is.
2458    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2459    in which case 0 is returned.  */
2460 
2461 static rtx
2462 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2463 		rtx amount, rtx target, int unsignedp, bool may_fail = false)
2464 {
2465   rtx op1, temp = 0;
2466   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2467   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2468   optab lshift_optab = ashl_optab;
2469   optab rshift_arith_optab = ashr_optab;
2470   optab rshift_uns_optab = lshr_optab;
2471   optab lrotate_optab = rotl_optab;
2472   optab rrotate_optab = rotr_optab;
2473   machine_mode op1_mode;
2474   scalar_mode scalar_mode = GET_MODE_INNER (mode);
2475   int attempt;
2476   bool speed = optimize_insn_for_speed_p ();
2477 
2478   op1 = amount;
2479   op1_mode = GET_MODE (op1);
2480 
2481   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2482      shift amount is a vector, use the vector/vector shift patterns.  */
2483   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2484     {
2485       lshift_optab = vashl_optab;
2486       rshift_arith_optab = vashr_optab;
2487       rshift_uns_optab = vlshr_optab;
2488       lrotate_optab = vrotl_optab;
2489       rrotate_optab = vrotr_optab;
2490     }
2491 
2492   /* Previously detected shift-counts computed by NEGATE_EXPR
2493      and shifted in the other direction; but that does not work
2494      on all machines.  */
2495 
2496   if (SHIFT_COUNT_TRUNCATED)
2497     {
2498       if (CONST_INT_P (op1)
2499 	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2500 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2501 	op1 = gen_int_shift_amount (mode,
2502 				    (unsigned HOST_WIDE_INT) INTVAL (op1)
2503 				    % GET_MODE_BITSIZE (scalar_mode));
2504       else if (GET_CODE (op1) == SUBREG
2505 	       && subreg_lowpart_p (op1)
2506 	       && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2507 	       && SCALAR_INT_MODE_P (GET_MODE (op1)))
2508 	op1 = SUBREG_REG (op1);
2509     }
2510 
2511   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2512      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2513      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2514      amount instead.  */
2515   if (rotate
2516       && CONST_INT_P (op1)
2517       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2518 		   GET_MODE_BITSIZE (scalar_mode) - 1))
2519     {
2520       op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode)
2521 					 - INTVAL (op1)));
2522       left = !left;
2523       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2524     }
2525 
2526   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2527      Note that this is not the case for bigger values.  For instance a rotation
2528      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2529      0x04030201 (bswapsi).  */
2530   if (rotate
2531       && CONST_INT_P (op1)
2532       && INTVAL (op1) == BITS_PER_UNIT
2533       && GET_MODE_SIZE (scalar_mode) == 2
2534       && optab_handler (bswap_optab, mode) != CODE_FOR_nothing)
2535     return expand_unop (mode, bswap_optab, shifted, NULL_RTX, unsignedp);
2536 
2537   if (op1 == const0_rtx)
2538     return shifted;
2539 
2540   /* Check whether its cheaper to implement a left shift by a constant
2541      bit count by a sequence of additions.  */
2542   if (code == LSHIFT_EXPR
2543       && CONST_INT_P (op1)
2544       && INTVAL (op1) > 0
2545       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2546       && INTVAL (op1) < MAX_BITS_PER_WORD
2547       && (shift_cost (speed, mode, INTVAL (op1))
2548 	  > INTVAL (op1) * add_cost (speed, mode))
2549       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2550     {
2551       int i;
2552       for (i = 0; i < INTVAL (op1); i++)
2553 	{
2554 	  temp = force_reg (mode, shifted);
2555 	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2556 				  unsignedp, OPTAB_LIB_WIDEN);
2557 	}
2558       return shifted;
2559     }
2560 
2561   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2562     {
2563       enum optab_methods methods;
2564 
2565       if (attempt == 0)
2566 	methods = OPTAB_DIRECT;
2567       else if (attempt == 1)
2568 	methods = OPTAB_WIDEN;
2569       else
2570 	methods = OPTAB_LIB_WIDEN;
2571 
2572       if (rotate)
2573 	{
2574 	  /* Widening does not work for rotation.  */
2575 	  if (methods == OPTAB_WIDEN)
2576 	    continue;
2577 	  else if (methods == OPTAB_LIB_WIDEN)
2578 	    {
2579 	      /* If we have been unable to open-code this by a rotation,
2580 		 do it as the IOR of two shifts.  I.e., to rotate A
2581 		 by N bits, compute
2582 		 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2583 		 where C is the bitsize of A.
2584 
2585 		 It is theoretically possible that the target machine might
2586 		 not be able to perform either shift and hence we would
2587 		 be making two libcalls rather than just the one for the
2588 		 shift (similarly if IOR could not be done).  We will allow
2589 		 this extremely unlikely lossage to avoid complicating the
2590 		 code below.  */
2591 
2592 	      rtx subtarget = target == shifted ? 0 : target;
2593 	      rtx new_amount, other_amount;
2594 	      rtx temp1;
2595 
2596 	      new_amount = op1;
2597 	      if (op1 == const0_rtx)
2598 		return shifted;
2599 	      else if (CONST_INT_P (op1))
2600 		other_amount = gen_int_shift_amount
2601 		  (mode, GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2602 	      else
2603 		{
2604 		  other_amount
2605 		    = simplify_gen_unary (NEG, GET_MODE (op1),
2606 					  op1, GET_MODE (op1));
2607 		  HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2608 		  other_amount
2609 		    = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2610 					   gen_int_mode (mask, GET_MODE (op1)));
2611 		}
2612 
2613 	      shifted = force_reg (mode, shifted);
2614 
2615 	      temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2616 				     mode, shifted, new_amount, 0, 1);
2617 	      temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2618 				      mode, shifted, other_amount,
2619 				      subtarget, 1);
2620 	      return expand_binop (mode, ior_optab, temp, temp1, target,
2621 				   unsignedp, methods);
2622 	    }
2623 
2624 	  temp = expand_binop (mode,
2625 			       left ? lrotate_optab : rrotate_optab,
2626 			       shifted, op1, target, unsignedp, methods);
2627 	}
2628       else if (unsignedp)
2629 	temp = expand_binop (mode,
2630 			     left ? lshift_optab : rshift_uns_optab,
2631 			     shifted, op1, target, unsignedp, methods);
2632 
2633       /* Do arithmetic shifts.
2634 	 Also, if we are going to widen the operand, we can just as well
2635 	 use an arithmetic right-shift instead of a logical one.  */
2636       if (temp == 0 && ! rotate
2637 	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2638 	{
2639 	  enum optab_methods methods1 = methods;
2640 
2641 	  /* If trying to widen a log shift to an arithmetic shift,
2642 	     don't accept an arithmetic shift of the same size.  */
2643 	  if (unsignedp)
2644 	    methods1 = OPTAB_MUST_WIDEN;
2645 
2646 	  /* Arithmetic shift */
2647 
2648 	  temp = expand_binop (mode,
2649 			       left ? lshift_optab : rshift_arith_optab,
2650 			       shifted, op1, target, unsignedp, methods1);
2651 	}
2652 
2653       /* We used to try extzv here for logical right shifts, but that was
2654 	 only useful for one machine, the VAX, and caused poor code
2655 	 generation there for lshrdi3, so the code was deleted and a
2656 	 define_expand for lshrsi3 was added to vax.md.  */
2657     }
2658 
2659   gcc_assert (temp != NULL_RTX || may_fail);
2660   return temp;
2661 }
2662 
2663 /* Output a shift instruction for expression code CODE,
2664    with SHIFTED being the rtx for the value to shift,
2665    and AMOUNT the amount to shift by.
2666    Store the result in the rtx TARGET, if that is convenient.
2667    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2668    Return the rtx for where the value is.  */
2669 
2670 rtx
expand_shift(enum tree_code code,machine_mode mode,rtx shifted,poly_int64 amount,rtx target,int unsignedp)2671 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2672 	      poly_int64 amount, rtx target, int unsignedp)
2673 {
2674   return expand_shift_1 (code, mode, shifted,
2675 			 gen_int_shift_amount (mode, amount),
2676 			 target, unsignedp);
2677 }
2678 
2679 /* Likewise, but return 0 if that cannot be done.  */
2680 
2681 static rtx
maybe_expand_shift(enum tree_code code,machine_mode mode,rtx shifted,int amount,rtx target,int unsignedp)2682 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2683 		    int amount, rtx target, int unsignedp)
2684 {
2685   return expand_shift_1 (code, mode,
2686 			 shifted, GEN_INT (amount), target, unsignedp, true);
2687 }
2688 
2689 /* Output a shift instruction for expression code CODE,
2690    with SHIFTED being the rtx for the value to shift,
2691    and AMOUNT the tree for the amount to shift by.
2692    Store the result in the rtx TARGET, if that is convenient.
2693    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2694    Return the rtx for where the value is.  */
2695 
2696 rtx
expand_variable_shift(enum tree_code code,machine_mode mode,rtx shifted,tree amount,rtx target,int unsignedp)2697 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2698 		       tree amount, rtx target, int unsignedp)
2699 {
2700   return expand_shift_1 (code, mode,
2701 			 shifted, expand_normal (amount), target, unsignedp);
2702 }
2703 
2704 
2705 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2706 			const struct mult_cost *, machine_mode mode);
2707 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2708 			      const struct algorithm *, enum mult_variant);
2709 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2710 static rtx extract_high_half (scalar_int_mode, rtx);
2711 static rtx expmed_mult_highpart (scalar_int_mode, rtx, rtx, rtx, int, int);
2712 static rtx expmed_mult_highpart_optab (scalar_int_mode, rtx, rtx, rtx,
2713 				       int, int);
2714 /* Compute and return the best algorithm for multiplying by T.
2715    The algorithm must cost less than cost_limit
2716    If retval.cost >= COST_LIMIT, no algorithm was found and all
2717    other field of the returned struct are undefined.
2718    MODE is the machine mode of the multiplication.  */
2719 
2720 static void
synth_mult(struct algorithm * alg_out,unsigned HOST_WIDE_INT t,const struct mult_cost * cost_limit,machine_mode mode)2721 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2722 	    const struct mult_cost *cost_limit, machine_mode mode)
2723 {
2724   int m;
2725   struct algorithm *alg_in, *best_alg;
2726   struct mult_cost best_cost;
2727   struct mult_cost new_limit;
2728   int op_cost, op_latency;
2729   unsigned HOST_WIDE_INT orig_t = t;
2730   unsigned HOST_WIDE_INT q;
2731   int maxm, hash_index;
2732   bool cache_hit = false;
2733   enum alg_code cache_alg = alg_zero;
2734   bool speed = optimize_insn_for_speed_p ();
2735   scalar_int_mode imode;
2736   struct alg_hash_entry *entry_ptr;
2737 
2738   /* Indicate that no algorithm is yet found.  If no algorithm
2739      is found, this value will be returned and indicate failure.  */
2740   alg_out->cost.cost = cost_limit->cost + 1;
2741   alg_out->cost.latency = cost_limit->latency + 1;
2742 
2743   if (cost_limit->cost < 0
2744       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2745     return;
2746 
2747   /* Be prepared for vector modes.  */
2748   imode = as_a <scalar_int_mode> (GET_MODE_INNER (mode));
2749 
2750   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2751 
2752   /* Restrict the bits of "t" to the multiplication's mode.  */
2753   t &= GET_MODE_MASK (imode);
2754 
2755   /* t == 1 can be done in zero cost.  */
2756   if (t == 1)
2757     {
2758       alg_out->ops = 1;
2759       alg_out->cost.cost = 0;
2760       alg_out->cost.latency = 0;
2761       alg_out->op[0] = alg_m;
2762       return;
2763     }
2764 
2765   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2766      fail now.  */
2767   if (t == 0)
2768     {
2769       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2770 	return;
2771       else
2772 	{
2773 	  alg_out->ops = 1;
2774 	  alg_out->cost.cost = zero_cost (speed);
2775 	  alg_out->cost.latency = zero_cost (speed);
2776 	  alg_out->op[0] = alg_zero;
2777 	  return;
2778 	}
2779     }
2780 
2781   /* We'll be needing a couple extra algorithm structures now.  */
2782 
2783   alg_in = XALLOCA (struct algorithm);
2784   best_alg = XALLOCA (struct algorithm);
2785   best_cost = *cost_limit;
2786 
2787   /* Compute the hash index.  */
2788   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2789 
2790   /* See if we already know what to do for T.  */
2791   entry_ptr = alg_hash_entry_ptr (hash_index);
2792   if (entry_ptr->t == t
2793       && entry_ptr->mode == mode
2794       && entry_ptr->speed == speed
2795       && entry_ptr->alg != alg_unknown)
2796     {
2797       cache_alg = entry_ptr->alg;
2798 
2799       if (cache_alg == alg_impossible)
2800 	{
2801 	  /* The cache tells us that it's impossible to synthesize
2802 	     multiplication by T within entry_ptr->cost.  */
2803 	  if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2804 	    /* COST_LIMIT is at least as restrictive as the one
2805 	       recorded in the hash table, in which case we have no
2806 	       hope of synthesizing a multiplication.  Just
2807 	       return.  */
2808 	    return;
2809 
2810 	  /* If we get here, COST_LIMIT is less restrictive than the
2811 	     one recorded in the hash table, so we may be able to
2812 	     synthesize a multiplication.  Proceed as if we didn't
2813 	     have the cache entry.  */
2814 	}
2815       else
2816 	{
2817 	  if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2818 	    /* The cached algorithm shows that this multiplication
2819 	       requires more cost than COST_LIMIT.  Just return.  This
2820 	       way, we don't clobber this cache entry with
2821 	       alg_impossible but retain useful information.  */
2822 	    return;
2823 
2824 	  cache_hit = true;
2825 
2826 	  switch (cache_alg)
2827 	    {
2828 	    case alg_shift:
2829 	      goto do_alg_shift;
2830 
2831 	    case alg_add_t_m2:
2832 	    case alg_sub_t_m2:
2833 	      goto do_alg_addsub_t_m2;
2834 
2835 	    case alg_add_factor:
2836 	    case alg_sub_factor:
2837 	      goto do_alg_addsub_factor;
2838 
2839 	    case alg_add_t2_m:
2840 	      goto do_alg_add_t2_m;
2841 
2842 	    case alg_sub_t2_m:
2843 	      goto do_alg_sub_t2_m;
2844 
2845 	    default:
2846 	      gcc_unreachable ();
2847 	    }
2848 	}
2849     }
2850 
2851   /* If we have a group of zero bits at the low-order part of T, try
2852      multiplying by the remaining bits and then doing a shift.  */
2853 
2854   if ((t & 1) == 0)
2855     {
2856     do_alg_shift:
2857       m = ctz_or_zero (t); /* m = number of low zero bits */
2858       if (m < maxm)
2859 	{
2860 	  q = t >> m;
2861 	  /* The function expand_shift will choose between a shift and
2862 	     a sequence of additions, so the observed cost is given as
2863 	     MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2864 	  op_cost = m * add_cost (speed, mode);
2865 	  if (shift_cost (speed, mode, m) < op_cost)
2866 	    op_cost = shift_cost (speed, mode, m);
2867 	  new_limit.cost = best_cost.cost - op_cost;
2868 	  new_limit.latency = best_cost.latency - op_cost;
2869 	  synth_mult (alg_in, q, &new_limit, mode);
2870 
2871 	  alg_in->cost.cost += op_cost;
2872 	  alg_in->cost.latency += op_cost;
2873 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2874 	    {
2875 	      best_cost = alg_in->cost;
2876 	      std::swap (alg_in, best_alg);
2877 	      best_alg->log[best_alg->ops] = m;
2878 	      best_alg->op[best_alg->ops] = alg_shift;
2879 	    }
2880 
2881 	  /* See if treating ORIG_T as a signed number yields a better
2882 	     sequence.  Try this sequence only for a negative ORIG_T
2883 	     as it would be useless for a non-negative ORIG_T.  */
2884 	  if ((HOST_WIDE_INT) orig_t < 0)
2885 	    {
2886 	      /* Shift ORIG_T as follows because a right shift of a
2887 		 negative-valued signed type is implementation
2888 		 defined.  */
2889 	      q = ~(~orig_t >> m);
2890 	      /* The function expand_shift will choose between a shift
2891 		 and a sequence of additions, so the observed cost is
2892 		 given as MIN (m * add_cost(speed, mode),
2893 		 shift_cost(speed, mode, m)).  */
2894 	      op_cost = m * add_cost (speed, mode);
2895 	      if (shift_cost (speed, mode, m) < op_cost)
2896 		op_cost = shift_cost (speed, mode, m);
2897 	      new_limit.cost = best_cost.cost - op_cost;
2898 	      new_limit.latency = best_cost.latency - op_cost;
2899 	      synth_mult (alg_in, q, &new_limit, mode);
2900 
2901 	      alg_in->cost.cost += op_cost;
2902 	      alg_in->cost.latency += op_cost;
2903 	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2904 		{
2905 		  best_cost = alg_in->cost;
2906 		  std::swap (alg_in, best_alg);
2907 		  best_alg->log[best_alg->ops] = m;
2908 		  best_alg->op[best_alg->ops] = alg_shift;
2909 		}
2910 	    }
2911 	}
2912       if (cache_hit)
2913 	goto done;
2914     }
2915 
2916   /* If we have an odd number, add or subtract one.  */
2917   if ((t & 1) != 0)
2918     {
2919       unsigned HOST_WIDE_INT w;
2920 
2921     do_alg_addsub_t_m2:
2922       for (w = 1; (w & t) != 0; w <<= 1)
2923 	;
2924       /* If T was -1, then W will be zero after the loop.  This is another
2925 	 case where T ends with ...111.  Handling this with (T + 1) and
2926 	 subtract 1 produces slightly better code and results in algorithm
2927 	 selection much faster than treating it like the ...0111 case
2928 	 below.  */
2929       if (w == 0
2930 	  || (w > 2
2931 	      /* Reject the case where t is 3.
2932 		 Thus we prefer addition in that case.  */
2933 	      && t != 3))
2934 	{
2935 	  /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2936 
2937 	  op_cost = add_cost (speed, mode);
2938 	  new_limit.cost = best_cost.cost - op_cost;
2939 	  new_limit.latency = best_cost.latency - op_cost;
2940 	  synth_mult (alg_in, t + 1, &new_limit, mode);
2941 
2942 	  alg_in->cost.cost += op_cost;
2943 	  alg_in->cost.latency += op_cost;
2944 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2945 	    {
2946 	      best_cost = alg_in->cost;
2947 	      std::swap (alg_in, best_alg);
2948 	      best_alg->log[best_alg->ops] = 0;
2949 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2950 	    }
2951 	}
2952       else
2953 	{
2954 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2955 
2956 	  op_cost = add_cost (speed, mode);
2957 	  new_limit.cost = best_cost.cost - op_cost;
2958 	  new_limit.latency = best_cost.latency - op_cost;
2959 	  synth_mult (alg_in, t - 1, &new_limit, mode);
2960 
2961 	  alg_in->cost.cost += op_cost;
2962 	  alg_in->cost.latency += op_cost;
2963 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2964 	    {
2965 	      best_cost = alg_in->cost;
2966 	      std::swap (alg_in, best_alg);
2967 	      best_alg->log[best_alg->ops] = 0;
2968 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2969 	    }
2970 	}
2971 
2972       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2973 	 quickly with a - a * n for some appropriate constant n.  */
2974       m = exact_log2 (-orig_t + 1);
2975       if (m >= 0 && m < maxm)
2976 	{
2977 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2978 	  /* If the target has a cheap shift-and-subtract insn use
2979 	     that in preference to a shift insn followed by a sub insn.
2980 	     Assume that the shift-and-sub is "atomic" with a latency
2981 	     equal to it's cost, otherwise assume that on superscalar
2982 	     hardware the shift may be executed concurrently with the
2983 	     earlier steps in the algorithm.  */
2984 	  if (shiftsub1_cost (speed, mode, m) <= op_cost)
2985 	    {
2986 	      op_cost = shiftsub1_cost (speed, mode, m);
2987 	      op_latency = op_cost;
2988 	    }
2989 	  else
2990 	    op_latency = add_cost (speed, mode);
2991 
2992 	  new_limit.cost = best_cost.cost - op_cost;
2993 	  new_limit.latency = best_cost.latency - op_latency;
2994 	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2995 		      &new_limit, mode);
2996 
2997 	  alg_in->cost.cost += op_cost;
2998 	  alg_in->cost.latency += op_latency;
2999 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3000 	    {
3001 	      best_cost = alg_in->cost;
3002 	      std::swap (alg_in, best_alg);
3003 	      best_alg->log[best_alg->ops] = m;
3004 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
3005 	    }
3006 	}
3007 
3008       if (cache_hit)
3009 	goto done;
3010     }
3011 
3012   /* Look for factors of t of the form
3013      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
3014      If we find such a factor, we can multiply by t using an algorithm that
3015      multiplies by q, shift the result by m and add/subtract it to itself.
3016 
3017      We search for large factors first and loop down, even if large factors
3018      are less probable than small; if we find a large factor we will find a
3019      good sequence quickly, and therefore be able to prune (by decreasing
3020      COST_LIMIT) the search.  */
3021 
3022  do_alg_addsub_factor:
3023   for (m = floor_log2 (t - 1); m >= 2; m--)
3024     {
3025       unsigned HOST_WIDE_INT d;
3026 
3027       d = (HOST_WIDE_INT_1U << m) + 1;
3028       if (t % d == 0 && t > d && m < maxm
3029 	  && (!cache_hit || cache_alg == alg_add_factor))
3030 	{
3031 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3032 	  if (shiftadd_cost (speed, mode, m) <= op_cost)
3033 	    op_cost = shiftadd_cost (speed, mode, m);
3034 
3035 	  op_latency = op_cost;
3036 
3037 
3038 	  new_limit.cost = best_cost.cost - op_cost;
3039 	  new_limit.latency = best_cost.latency - op_latency;
3040 	  synth_mult (alg_in, t / d, &new_limit, mode);
3041 
3042 	  alg_in->cost.cost += op_cost;
3043 	  alg_in->cost.latency += op_latency;
3044 	  if (alg_in->cost.latency < op_cost)
3045 	    alg_in->cost.latency = op_cost;
3046 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3047 	    {
3048 	      best_cost = alg_in->cost;
3049 	      std::swap (alg_in, best_alg);
3050 	      best_alg->log[best_alg->ops] = m;
3051 	      best_alg->op[best_alg->ops] = alg_add_factor;
3052 	    }
3053 	  /* Other factors will have been taken care of in the recursion.  */
3054 	  break;
3055 	}
3056 
3057       d = (HOST_WIDE_INT_1U << m) - 1;
3058       if (t % d == 0 && t > d && m < maxm
3059 	  && (!cache_hit || cache_alg == alg_sub_factor))
3060 	{
3061 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3062 	  if (shiftsub0_cost (speed, mode, m) <= op_cost)
3063 	    op_cost = shiftsub0_cost (speed, mode, m);
3064 
3065 	  op_latency = op_cost;
3066 
3067 	  new_limit.cost = best_cost.cost - op_cost;
3068 	  new_limit.latency = best_cost.latency - op_latency;
3069 	  synth_mult (alg_in, t / d, &new_limit, mode);
3070 
3071 	  alg_in->cost.cost += op_cost;
3072 	  alg_in->cost.latency += op_latency;
3073 	  if (alg_in->cost.latency < op_cost)
3074 	    alg_in->cost.latency = op_cost;
3075 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3076 	    {
3077 	      best_cost = alg_in->cost;
3078 	      std::swap (alg_in, best_alg);
3079 	      best_alg->log[best_alg->ops] = m;
3080 	      best_alg->op[best_alg->ops] = alg_sub_factor;
3081 	    }
3082 	  break;
3083 	}
3084     }
3085   if (cache_hit)
3086     goto done;
3087 
3088   /* Try shift-and-add (load effective address) instructions,
3089      i.e. do a*3, a*5, a*9.  */
3090   if ((t & 1) != 0)
3091     {
3092     do_alg_add_t2_m:
3093       q = t - 1;
3094       m = ctz_hwi (q);
3095       if (q && m < maxm)
3096 	{
3097 	  op_cost = shiftadd_cost (speed, mode, m);
3098 	  new_limit.cost = best_cost.cost - op_cost;
3099 	  new_limit.latency = best_cost.latency - op_cost;
3100 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
3101 
3102 	  alg_in->cost.cost += op_cost;
3103 	  alg_in->cost.latency += op_cost;
3104 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3105 	    {
3106 	      best_cost = alg_in->cost;
3107 	      std::swap (alg_in, best_alg);
3108 	      best_alg->log[best_alg->ops] = m;
3109 	      best_alg->op[best_alg->ops] = alg_add_t2_m;
3110 	    }
3111 	}
3112       if (cache_hit)
3113 	goto done;
3114 
3115     do_alg_sub_t2_m:
3116       q = t + 1;
3117       m = ctz_hwi (q);
3118       if (q && m < maxm)
3119 	{
3120 	  op_cost = shiftsub0_cost (speed, mode, m);
3121 	  new_limit.cost = best_cost.cost - op_cost;
3122 	  new_limit.latency = best_cost.latency - op_cost;
3123 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
3124 
3125 	  alg_in->cost.cost += op_cost;
3126 	  alg_in->cost.latency += op_cost;
3127 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
3128 	    {
3129 	      best_cost = alg_in->cost;
3130 	      std::swap (alg_in, best_alg);
3131 	      best_alg->log[best_alg->ops] = m;
3132 	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
3133 	    }
3134 	}
3135       if (cache_hit)
3136 	goto done;
3137     }
3138 
3139  done:
3140   /* If best_cost has not decreased, we have not found any algorithm.  */
3141   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
3142     {
3143       /* We failed to find an algorithm.  Record alg_impossible for
3144 	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
3145 	 we are asked to find an algorithm for T within the same or
3146 	 lower COST_LIMIT, we can immediately return to the
3147 	 caller.  */
3148       entry_ptr->t = t;
3149       entry_ptr->mode = mode;
3150       entry_ptr->speed = speed;
3151       entry_ptr->alg = alg_impossible;
3152       entry_ptr->cost = *cost_limit;
3153       return;
3154     }
3155 
3156   /* Cache the result.  */
3157   if (!cache_hit)
3158     {
3159       entry_ptr->t = t;
3160       entry_ptr->mode = mode;
3161       entry_ptr->speed = speed;
3162       entry_ptr->alg = best_alg->op[best_alg->ops];
3163       entry_ptr->cost.cost = best_cost.cost;
3164       entry_ptr->cost.latency = best_cost.latency;
3165     }
3166 
3167   /* If we are getting a too long sequence for `struct algorithm'
3168      to record, make this search fail.  */
3169   if (best_alg->ops == MAX_BITS_PER_WORD)
3170     return;
3171 
3172   /* Copy the algorithm from temporary space to the space at alg_out.
3173      We avoid using structure assignment because the majority of
3174      best_alg is normally undefined, and this is a critical function.  */
3175   alg_out->ops = best_alg->ops + 1;
3176   alg_out->cost = best_cost;
3177   memcpy (alg_out->op, best_alg->op,
3178 	  alg_out->ops * sizeof *alg_out->op);
3179   memcpy (alg_out->log, best_alg->log,
3180 	  alg_out->ops * sizeof *alg_out->log);
3181 }
3182 
3183 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3184    Try three variations:
3185 
3186        - a shift/add sequence based on VAL itself
3187        - a shift/add sequence based on -VAL, followed by a negation
3188        - a shift/add sequence based on VAL - 1, followed by an addition.
3189 
3190    Return true if the cheapest of these cost less than MULT_COST,
3191    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3192 
3193 bool
choose_mult_variant(machine_mode mode,HOST_WIDE_INT val,struct algorithm * alg,enum mult_variant * variant,int mult_cost)3194 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3195 		     struct algorithm *alg, enum mult_variant *variant,
3196 		     int mult_cost)
3197 {
3198   struct algorithm alg2;
3199   struct mult_cost limit;
3200   int op_cost;
3201   bool speed = optimize_insn_for_speed_p ();
3202 
3203   /* Fail quickly for impossible bounds.  */
3204   if (mult_cost < 0)
3205     return false;
3206 
3207   /* Ensure that mult_cost provides a reasonable upper bound.
3208      Any constant multiplication can be performed with less
3209      than 2 * bits additions.  */
3210   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3211   if (mult_cost > op_cost)
3212     mult_cost = op_cost;
3213 
3214   *variant = basic_variant;
3215   limit.cost = mult_cost;
3216   limit.latency = mult_cost;
3217   synth_mult (alg, val, &limit, mode);
3218 
3219   /* This works only if the inverted value actually fits in an
3220      `unsigned int' */
3221   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3222     {
3223       op_cost = neg_cost (speed, mode);
3224       if (MULT_COST_LESS (&alg->cost, mult_cost))
3225 	{
3226 	  limit.cost = alg->cost.cost - op_cost;
3227 	  limit.latency = alg->cost.latency - op_cost;
3228 	}
3229       else
3230 	{
3231 	  limit.cost = mult_cost - op_cost;
3232 	  limit.latency = mult_cost - op_cost;
3233 	}
3234 
3235       synth_mult (&alg2, -val, &limit, mode);
3236       alg2.cost.cost += op_cost;
3237       alg2.cost.latency += op_cost;
3238       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3239 	*alg = alg2, *variant = negate_variant;
3240     }
3241 
3242   /* This proves very useful for division-by-constant.  */
3243   op_cost = add_cost (speed, mode);
3244   if (MULT_COST_LESS (&alg->cost, mult_cost))
3245     {
3246       limit.cost = alg->cost.cost - op_cost;
3247       limit.latency = alg->cost.latency - op_cost;
3248     }
3249   else
3250     {
3251       limit.cost = mult_cost - op_cost;
3252       limit.latency = mult_cost - op_cost;
3253     }
3254 
3255   synth_mult (&alg2, val - 1, &limit, mode);
3256   alg2.cost.cost += op_cost;
3257   alg2.cost.latency += op_cost;
3258   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3259     *alg = alg2, *variant = add_variant;
3260 
3261   return MULT_COST_LESS (&alg->cost, mult_cost);
3262 }
3263 
3264 /* A subroutine of expand_mult, used for constant multiplications.
3265    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3266    convenient.  Use the shift/add sequence described by ALG and apply
3267    the final fixup specified by VARIANT.  */
3268 
3269 static rtx
expand_mult_const(machine_mode mode,rtx op0,HOST_WIDE_INT val,rtx target,const struct algorithm * alg,enum mult_variant variant)3270 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3271 		   rtx target, const struct algorithm *alg,
3272 		   enum mult_variant variant)
3273 {
3274   unsigned HOST_WIDE_INT val_so_far;
3275   rtx_insn *insn;
3276   rtx accum, tem;
3277   int opno;
3278   machine_mode nmode;
3279 
3280   /* Avoid referencing memory over and over and invalid sharing
3281      on SUBREGs.  */
3282   op0 = force_reg (mode, op0);
3283 
3284   /* ACCUM starts out either as OP0 or as a zero, depending on
3285      the first operation.  */
3286 
3287   if (alg->op[0] == alg_zero)
3288     {
3289       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3290       val_so_far = 0;
3291     }
3292   else if (alg->op[0] == alg_m)
3293     {
3294       accum = copy_to_mode_reg (mode, op0);
3295       val_so_far = 1;
3296     }
3297   else
3298     gcc_unreachable ();
3299 
3300   for (opno = 1; opno < alg->ops; opno++)
3301     {
3302       int log = alg->log[opno];
3303       rtx shift_subtarget = optimize ? 0 : accum;
3304       rtx add_target
3305 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
3306 	   && !optimize)
3307 	  ? target : 0;
3308       rtx accum_target = optimize ? 0 : accum;
3309       rtx accum_inner;
3310 
3311       switch (alg->op[opno])
3312 	{
3313 	case alg_shift:
3314 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3315 	  /* REG_EQUAL note will be attached to the following insn.  */
3316 	  emit_move_insn (accum, tem);
3317 	  val_so_far <<= log;
3318 	  break;
3319 
3320 	case alg_add_t_m2:
3321 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3322 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3323 				 add_target ? add_target : accum_target);
3324 	  val_so_far += HOST_WIDE_INT_1U << log;
3325 	  break;
3326 
3327 	case alg_sub_t_m2:
3328 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3329 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3330 				 add_target ? add_target : accum_target);
3331 	  val_so_far -= HOST_WIDE_INT_1U << log;
3332 	  break;
3333 
3334 	case alg_add_t2_m:
3335 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3336 				log, shift_subtarget, 0);
3337 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3338 				 add_target ? add_target : accum_target);
3339 	  val_so_far = (val_so_far << log) + 1;
3340 	  break;
3341 
3342 	case alg_sub_t2_m:
3343 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3344 				log, shift_subtarget, 0);
3345 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3346 				 add_target ? add_target : accum_target);
3347 	  val_so_far = (val_so_far << log) - 1;
3348 	  break;
3349 
3350 	case alg_add_factor:
3351 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3352 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3353 				 add_target ? add_target : accum_target);
3354 	  val_so_far += val_so_far << log;
3355 	  break;
3356 
3357 	case alg_sub_factor:
3358 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3359 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3360 				 (add_target
3361 				  ? add_target : (optimize ? 0 : tem)));
3362 	  val_so_far = (val_so_far << log) - val_so_far;
3363 	  break;
3364 
3365 	default:
3366 	  gcc_unreachable ();
3367 	}
3368 
3369       if (SCALAR_INT_MODE_P (mode))
3370 	{
3371 	  /* Write a REG_EQUAL note on the last insn so that we can cse
3372 	     multiplication sequences.  Note that if ACCUM is a SUBREG,
3373 	     we've set the inner register and must properly indicate that.  */
3374 	  tem = op0, nmode = mode;
3375 	  accum_inner = accum;
3376 	  if (GET_CODE (accum) == SUBREG)
3377 	    {
3378 	      accum_inner = SUBREG_REG (accum);
3379 	      nmode = GET_MODE (accum_inner);
3380 	      tem = gen_lowpart (nmode, op0);
3381 	    }
3382 
3383 	  /* Don't add a REG_EQUAL note if tem is a paradoxical SUBREG.
3384 	     In that case, only the low bits of accum would be guaranteed to
3385 	     be equal to the content of the REG_EQUAL note, the upper bits
3386 	     can be anything.  */
3387 	  if (!paradoxical_subreg_p (tem))
3388 	    {
3389 	      insn = get_last_insn ();
3390 	      wide_int wval_so_far
3391 		= wi::uhwi (val_so_far,
3392 			    GET_MODE_PRECISION (as_a <scalar_mode> (nmode)));
3393 	      rtx c = immed_wide_int_const (wval_so_far, nmode);
3394 	      set_dst_reg_note (insn, REG_EQUAL, gen_rtx_MULT (nmode, tem, c),
3395 				accum_inner);
3396 	    }
3397 	}
3398     }
3399 
3400   if (variant == negate_variant)
3401     {
3402       val_so_far = -val_so_far;
3403       accum = expand_unop (mode, neg_optab, accum, target, 0);
3404     }
3405   else if (variant == add_variant)
3406     {
3407       val_so_far = val_so_far + 1;
3408       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3409     }
3410 
3411   /* Compare only the bits of val and val_so_far that are significant
3412      in the result mode, to avoid sign-/zero-extension confusion.  */
3413   nmode = GET_MODE_INNER (mode);
3414   val &= GET_MODE_MASK (nmode);
3415   val_so_far &= GET_MODE_MASK (nmode);
3416   gcc_assert (val == (HOST_WIDE_INT) val_so_far);
3417 
3418   return accum;
3419 }
3420 
3421 /* Perform a multiplication and return an rtx for the result.
3422    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3423    TARGET is a suggestion for where to store the result (an rtx).
3424 
3425    We check specially for a constant integer as OP1.
3426    If you want this check for OP0 as well, then before calling
3427    you should swap the two operands if OP0 would be constant.  */
3428 
3429 rtx
expand_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,bool no_libcall)3430 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3431 	     int unsignedp, bool no_libcall)
3432 {
3433   enum mult_variant variant;
3434   struct algorithm algorithm;
3435   rtx scalar_op1;
3436   int max_cost;
3437   bool speed = optimize_insn_for_speed_p ();
3438   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3439 
3440   if (CONSTANT_P (op0))
3441     std::swap (op0, op1);
3442 
3443   /* For vectors, there are several simplifications that can be made if
3444      all elements of the vector constant are identical.  */
3445   scalar_op1 = unwrap_const_vec_duplicate (op1);
3446 
3447   if (INTEGRAL_MODE_P (mode))
3448     {
3449       rtx fake_reg;
3450       HOST_WIDE_INT coeff;
3451       bool is_neg;
3452       int mode_bitsize;
3453 
3454       if (op1 == CONST0_RTX (mode))
3455 	return op1;
3456       if (op1 == CONST1_RTX (mode))
3457 	return op0;
3458       if (op1 == CONSTM1_RTX (mode))
3459 	return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3460 			    op0, target, 0);
3461 
3462       if (do_trapv)
3463 	goto skip_synth;
3464 
3465       /* If mode is integer vector mode, check if the backend supports
3466 	 vector lshift (by scalar or vector) at all.  If not, we can't use
3467 	 synthetized multiply.  */
3468       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3469 	  && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3470 	  && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3471 	goto skip_synth;
3472 
3473       /* These are the operations that are potentially turned into
3474 	 a sequence of shifts and additions.  */
3475       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3476 
3477       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3478 	 less than or equal in size to `unsigned int' this doesn't matter.
3479 	 If the mode is larger than `unsigned int', then synth_mult works
3480 	 only if the constant value exactly fits in an `unsigned int' without
3481 	 any truncation.  This means that multiplying by negative values does
3482 	 not work; results are off by 2^32 on a 32 bit machine.  */
3483       if (CONST_INT_P (scalar_op1))
3484 	{
3485 	  coeff = INTVAL (scalar_op1);
3486 	  is_neg = coeff < 0;
3487 	}
3488 #if TARGET_SUPPORTS_WIDE_INT
3489       else if (CONST_WIDE_INT_P (scalar_op1))
3490 #else
3491       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3492 #endif
3493 	{
3494 	  int shift = wi::exact_log2 (rtx_mode_t (scalar_op1, mode));
3495 	  /* Perfect power of 2 (other than 1, which is handled above).  */
3496 	  if (shift > 0)
3497 	    return expand_shift (LSHIFT_EXPR, mode, op0,
3498 				 shift, target, unsignedp);
3499 	  else
3500 	    goto skip_synth;
3501 	}
3502       else
3503 	goto skip_synth;
3504 
3505       /* We used to test optimize here, on the grounds that it's better to
3506 	 produce a smaller program when -O is not used.  But this causes
3507 	 such a terrible slowdown sometimes that it seems better to always
3508 	 use synth_mult.  */
3509 
3510       /* Special case powers of two.  */
3511       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3512 	  && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3513 	return expand_shift (LSHIFT_EXPR, mode, op0,
3514 			     floor_log2 (coeff), target, unsignedp);
3515 
3516       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3517 
3518       /* Attempt to handle multiplication of DImode values by negative
3519 	 coefficients, by performing the multiplication by a positive
3520 	 multiplier and then inverting the result.  */
3521       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3522 	{
3523 	  /* Its safe to use -coeff even for INT_MIN, as the
3524 	     result is interpreted as an unsigned coefficient.
3525 	     Exclude cost of op0 from max_cost to match the cost
3526 	     calculation of the synth_mult.  */
3527 	  coeff = -(unsigned HOST_WIDE_INT) coeff;
3528 	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3529 				    mode, speed)
3530 		      - neg_cost (speed, mode));
3531 	  if (max_cost <= 0)
3532 	    goto skip_synth;
3533 
3534 	  /* Special case powers of two.  */
3535 	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3536 	    {
3537 	      rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3538 				       floor_log2 (coeff), target, unsignedp);
3539 	      return expand_unop (mode, neg_optab, temp, target, 0);
3540 	    }
3541 
3542 	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3543 				   max_cost))
3544 	    {
3545 	      rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3546 					    &algorithm, variant);
3547 	      return expand_unop (mode, neg_optab, temp, target, 0);
3548 	    }
3549 	  goto skip_synth;
3550 	}
3551 
3552       /* Exclude cost of op0 from max_cost to match the cost
3553 	 calculation of the synth_mult.  */
3554       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3555       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3556 	return expand_mult_const (mode, op0, coeff, target,
3557 				  &algorithm, variant);
3558     }
3559  skip_synth:
3560 
3561   /* Expand x*2.0 as x+x.  */
3562   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3563       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3564     {
3565       op0 = force_reg (GET_MODE (op0), op0);
3566       return expand_binop (mode, add_optab, op0, op0,
3567 			   target, unsignedp,
3568 			   no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3569     }
3570 
3571   /* This used to use umul_optab if unsigned, but for non-widening multiply
3572      there is no difference between signed and unsigned.  */
3573   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3574 		      op0, op1, target, unsignedp,
3575 		      no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
3576   gcc_assert (op0 || no_libcall);
3577   return op0;
3578 }
3579 
3580 /* Return a cost estimate for multiplying a register by the given
3581    COEFFicient in the given MODE and SPEED.  */
3582 
3583 int
mult_by_coeff_cost(HOST_WIDE_INT coeff,machine_mode mode,bool speed)3584 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3585 {
3586   int max_cost;
3587   struct algorithm algorithm;
3588   enum mult_variant variant;
3589 
3590   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3591   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3592 			   mode, speed);
3593   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3594     return algorithm.cost.cost;
3595   else
3596     return max_cost;
3597 }
3598 
3599 /* Perform a widening multiplication and return an rtx for the result.
3600    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3601    TARGET is a suggestion for where to store the result (an rtx).
3602    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3603    or smul_widen_optab.
3604 
3605    We check specially for a constant integer as OP1, comparing the
3606    cost of a widening multiply against the cost of a sequence of shifts
3607    and adds.  */
3608 
3609 rtx
expand_widening_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,optab this_optab)3610 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3611 		      int unsignedp, optab this_optab)
3612 {
3613   bool speed = optimize_insn_for_speed_p ();
3614   rtx cop1;
3615 
3616   if (CONST_INT_P (op1)
3617       && GET_MODE (op0) != VOIDmode
3618       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3619 				this_optab == umul_widen_optab))
3620       && CONST_INT_P (cop1)
3621       && (INTVAL (cop1) >= 0
3622 	  || HWI_COMPUTABLE_MODE_P (mode)))
3623     {
3624       HOST_WIDE_INT coeff = INTVAL (cop1);
3625       int max_cost;
3626       enum mult_variant variant;
3627       struct algorithm algorithm;
3628 
3629       if (coeff == 0)
3630 	return CONST0_RTX (mode);
3631 
3632       /* Special case powers of two.  */
3633       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3634 	{
3635 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3636 	  return expand_shift (LSHIFT_EXPR, mode, op0,
3637 			       floor_log2 (coeff), target, unsignedp);
3638 	}
3639 
3640       /* Exclude cost of op0 from max_cost to match the cost
3641 	 calculation of the synth_mult.  */
3642       max_cost = mul_widen_cost (speed, mode);
3643       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3644 			       max_cost))
3645 	{
3646 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3647 	  return expand_mult_const (mode, op0, coeff, target,
3648 				    &algorithm, variant);
3649 	}
3650     }
3651   return expand_binop (mode, this_optab, op0, op1, target,
3652 		       unsignedp, OPTAB_LIB_WIDEN);
3653 }
3654 
3655 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3656    replace division by D, and put the least significant N bits of the result
3657    in *MULTIPLIER_PTR and return the most significant bit.
3658 
3659    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3660    needed precision is in PRECISION (should be <= N).
3661 
3662    PRECISION should be as small as possible so this function can choose
3663    multiplier more freely.
3664 
3665    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3666    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3667 
3668    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3669    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3670 
3671 unsigned HOST_WIDE_INT
choose_multiplier(unsigned HOST_WIDE_INT d,int n,int precision,unsigned HOST_WIDE_INT * multiplier_ptr,int * post_shift_ptr,int * lgup_ptr)3672 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3673 		   unsigned HOST_WIDE_INT *multiplier_ptr,
3674 		   int *post_shift_ptr, int *lgup_ptr)
3675 {
3676   int lgup, post_shift;
3677   int pow, pow2;
3678 
3679   /* lgup = ceil(log2(divisor)); */
3680   lgup = ceil_log2 (d);
3681 
3682   gcc_assert (lgup <= n);
3683 
3684   pow = n + lgup;
3685   pow2 = n + lgup - precision;
3686 
3687   /* mlow = 2^(N + lgup)/d */
3688   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3689   wide_int mlow = wi::udiv_trunc (val, d);
3690 
3691   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3692   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3693   wide_int mhigh = wi::udiv_trunc (val, d);
3694 
3695   /* If precision == N, then mlow, mhigh exceed 2^N
3696      (but they do not exceed 2^(N+1)).  */
3697 
3698   /* Reduce to lowest terms.  */
3699   for (post_shift = lgup; post_shift > 0; post_shift--)
3700     {
3701       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3702 						       HOST_BITS_PER_WIDE_INT);
3703       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3704 						       HOST_BITS_PER_WIDE_INT);
3705       if (ml_lo >= mh_lo)
3706 	break;
3707 
3708       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3709       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3710     }
3711 
3712   *post_shift_ptr = post_shift;
3713   *lgup_ptr = lgup;
3714   if (n < HOST_BITS_PER_WIDE_INT)
3715     {
3716       unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << n) - 1;
3717       *multiplier_ptr = mhigh.to_uhwi () & mask;
3718       return mhigh.to_uhwi () >= mask;
3719     }
3720   else
3721     {
3722       *multiplier_ptr = mhigh.to_uhwi ();
3723       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3724     }
3725 }
3726 
3727 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3728    congruent to 1 (mod 2**N).  */
3729 
3730 static unsigned HOST_WIDE_INT
invert_mod2n(unsigned HOST_WIDE_INT x,int n)3731 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3732 {
3733   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3734 
3735   /* The algorithm notes that the choice y = x satisfies
3736      x*y == 1 mod 2^3, since x is assumed odd.
3737      Each iteration doubles the number of bits of significance in y.  */
3738 
3739   unsigned HOST_WIDE_INT mask;
3740   unsigned HOST_WIDE_INT y = x;
3741   int nbit = 3;
3742 
3743   mask = (n == HOST_BITS_PER_WIDE_INT
3744 	  ? HOST_WIDE_INT_M1U
3745 	  : (HOST_WIDE_INT_1U << n) - 1);
3746 
3747   while (nbit < n)
3748     {
3749       y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3750       nbit *= 2;
3751     }
3752   return y;
3753 }
3754 
3755 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3756    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3757    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3758    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3759    become signed.
3760 
3761    The result is put in TARGET if that is convenient.
3762 
3763    MODE is the mode of operation.  */
3764 
3765 rtx
expand_mult_highpart_adjust(scalar_int_mode mode,rtx adj_operand,rtx op0,rtx op1,rtx target,int unsignedp)3766 expand_mult_highpart_adjust (scalar_int_mode mode, rtx adj_operand, rtx op0,
3767 			     rtx op1, rtx target, int unsignedp)
3768 {
3769   rtx tem;
3770   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3771 
3772   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3773 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3774   tem = expand_and (mode, tem, op1, NULL_RTX);
3775   adj_operand
3776     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3777 		     adj_operand);
3778 
3779   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3780 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3781   tem = expand_and (mode, tem, op0, NULL_RTX);
3782   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3783 			  target);
3784 
3785   return target;
3786 }
3787 
3788 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3789 
3790 static rtx
extract_high_half(scalar_int_mode mode,rtx op)3791 extract_high_half (scalar_int_mode mode, rtx op)
3792 {
3793   if (mode == word_mode)
3794     return gen_highpart (mode, op);
3795 
3796   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3797 
3798   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3799 		     GET_MODE_BITSIZE (mode), 0, 1);
3800   return convert_modes (mode, wider_mode, op, 0);
3801 }
3802 
3803 /* Like expmed_mult_highpart, but only consider using a multiplication
3804    optab.  OP1 is an rtx for the constant operand.  */
3805 
3806 static rtx
expmed_mult_highpart_optab(scalar_int_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3807 expmed_mult_highpart_optab (scalar_int_mode mode, rtx op0, rtx op1,
3808 			    rtx target, int unsignedp, int max_cost)
3809 {
3810   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3811   optab moptab;
3812   rtx tem;
3813   int size;
3814   bool speed = optimize_insn_for_speed_p ();
3815 
3816   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3817 
3818   size = GET_MODE_BITSIZE (mode);
3819 
3820   /* Firstly, try using a multiplication insn that only generates the needed
3821      high part of the product, and in the sign flavor of unsignedp.  */
3822   if (mul_highpart_cost (speed, mode) < max_cost)
3823     {
3824       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3825       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3826 			  unsignedp, OPTAB_DIRECT);
3827       if (tem)
3828 	return tem;
3829     }
3830 
3831   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3832      Need to adjust the result after the multiplication.  */
3833   if (size - 1 < BITS_PER_WORD
3834       && (mul_highpart_cost (speed, mode)
3835 	  + 2 * shift_cost (speed, mode, size-1)
3836 	  + 4 * add_cost (speed, mode) < max_cost))
3837     {
3838       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3839       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3840 			  unsignedp, OPTAB_DIRECT);
3841       if (tem)
3842 	/* We used the wrong signedness.  Adjust the result.  */
3843 	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3844 					    tem, unsignedp);
3845     }
3846 
3847   /* Try widening multiplication.  */
3848   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3849   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3850       && mul_widen_cost (speed, wider_mode) < max_cost)
3851     {
3852       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3853 			  unsignedp, OPTAB_WIDEN);
3854       if (tem)
3855 	return extract_high_half (mode, tem);
3856     }
3857 
3858   /* Try widening the mode and perform a non-widening multiplication.  */
3859   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3860       && size - 1 < BITS_PER_WORD
3861       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3862 	  < max_cost))
3863     {
3864       rtx_insn *insns;
3865       rtx wop0, wop1;
3866 
3867       /* We need to widen the operands, for example to ensure the
3868 	 constant multiplier is correctly sign or zero extended.
3869 	 Use a sequence to clean-up any instructions emitted by
3870 	 the conversions if things don't work out.  */
3871       start_sequence ();
3872       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3873       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3874       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3875 			  unsignedp, OPTAB_WIDEN);
3876       insns = get_insns ();
3877       end_sequence ();
3878 
3879       if (tem)
3880 	{
3881 	  emit_insn (insns);
3882 	  return extract_high_half (mode, tem);
3883 	}
3884     }
3885 
3886   /* Try widening multiplication of opposite signedness, and adjust.  */
3887   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3888   if (convert_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3889       && size - 1 < BITS_PER_WORD
3890       && (mul_widen_cost (speed, wider_mode)
3891 	  + 2 * shift_cost (speed, mode, size-1)
3892 	  + 4 * add_cost (speed, mode) < max_cost))
3893     {
3894       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3895 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3896       if (tem != 0)
3897 	{
3898 	  tem = extract_high_half (mode, tem);
3899 	  /* We used the wrong signedness.  Adjust the result.  */
3900 	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3901 					      target, unsignedp);
3902 	}
3903     }
3904 
3905   return 0;
3906 }
3907 
3908 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3909    putting the high half of the result in TARGET if that is convenient,
3910    and return where the result is.  If the operation can not be performed,
3911    0 is returned.
3912 
3913    MODE is the mode of operation and result.
3914 
3915    UNSIGNEDP nonzero means unsigned multiply.
3916 
3917    MAX_COST is the total allowed cost for the expanded RTL.  */
3918 
3919 static rtx
expmed_mult_highpart(scalar_int_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3920 expmed_mult_highpart (scalar_int_mode mode, rtx op0, rtx op1,
3921 		      rtx target, int unsignedp, int max_cost)
3922 {
3923   unsigned HOST_WIDE_INT cnst1;
3924   int extra_cost;
3925   bool sign_adjust = false;
3926   enum mult_variant variant;
3927   struct algorithm alg;
3928   rtx tem;
3929   bool speed = optimize_insn_for_speed_p ();
3930 
3931   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3932   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3933 
3934   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3935 
3936   /* We can't optimize modes wider than BITS_PER_WORD.
3937      ??? We might be able to perform double-word arithmetic if
3938      mode == word_mode, however all the cost calculations in
3939      synth_mult etc. assume single-word operations.  */
3940   scalar_int_mode wider_mode = GET_MODE_WIDER_MODE (mode).require ();
3941   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3942     return expmed_mult_highpart_optab (mode, op0, op1, target,
3943 				       unsignedp, max_cost);
3944 
3945   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3946 
3947   /* Check whether we try to multiply by a negative constant.  */
3948   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3949     {
3950       sign_adjust = true;
3951       extra_cost += add_cost (speed, mode);
3952     }
3953 
3954   /* See whether shift/add multiplication is cheap enough.  */
3955   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3956 			   max_cost - extra_cost))
3957     {
3958       /* See whether the specialized multiplication optabs are
3959 	 cheaper than the shift/add version.  */
3960       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3961 					alg.cost.cost + extra_cost);
3962       if (tem)
3963 	return tem;
3964 
3965       tem = convert_to_mode (wider_mode, op0, unsignedp);
3966       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3967       tem = extract_high_half (mode, tem);
3968 
3969       /* Adjust result for signedness.  */
3970       if (sign_adjust)
3971 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3972 
3973       return tem;
3974     }
3975   return expmed_mult_highpart_optab (mode, op0, op1, target,
3976 				     unsignedp, max_cost);
3977 }
3978 
3979 
3980 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3981 
3982 static rtx
expand_smod_pow2(scalar_int_mode mode,rtx op0,HOST_WIDE_INT d)3983 expand_smod_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
3984 {
3985   rtx result, temp, shift;
3986   rtx_code_label *label;
3987   int logd;
3988   int prec = GET_MODE_PRECISION (mode);
3989 
3990   logd = floor_log2 (d);
3991   result = gen_reg_rtx (mode);
3992 
3993   /* Avoid conditional branches when they're expensive.  */
3994   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3995       && optimize_insn_for_speed_p ())
3996     {
3997       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3998 				      mode, 0, -1);
3999       if (signmask)
4000 	{
4001 	  HOST_WIDE_INT masklow = (HOST_WIDE_INT_1 << logd) - 1;
4002 	  signmask = force_reg (mode, signmask);
4003 	  shift = gen_int_shift_amount (mode, GET_MODE_BITSIZE (mode) - logd);
4004 
4005 	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
4006 	     which instruction sequence to use.  If logical right shifts
4007 	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
4008 	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
4009 
4010 	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
4011 	  if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
4012 	      || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
4013 		  > COSTS_N_INSNS (2)))
4014 	    {
4015 	      temp = expand_binop (mode, xor_optab, op0, signmask,
4016 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4017 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4018 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4019 	      temp = expand_binop (mode, and_optab, temp,
4020 				   gen_int_mode (masklow, mode),
4021 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4022 	      temp = expand_binop (mode, xor_optab, temp, signmask,
4023 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4024 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4025 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4026 	    }
4027 	  else
4028 	    {
4029 	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
4030 				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
4031 	      signmask = force_reg (mode, signmask);
4032 
4033 	      temp = expand_binop (mode, add_optab, op0, signmask,
4034 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4035 	      temp = expand_binop (mode, and_optab, temp,
4036 				   gen_int_mode (masklow, mode),
4037 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4038 	      temp = expand_binop (mode, sub_optab, temp, signmask,
4039 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4040 	    }
4041 	  return temp;
4042 	}
4043     }
4044 
4045   /* Mask contains the mode's signbit and the significant bits of the
4046      modulus.  By including the signbit in the operation, many targets
4047      can avoid an explicit compare operation in the following comparison
4048      against zero.  */
4049   wide_int mask = wi::mask (logd, false, prec);
4050   mask = wi::set_bit (mask, prec - 1);
4051 
4052   temp = expand_binop (mode, and_optab, op0,
4053 		       immed_wide_int_const (mask, mode),
4054 		       result, 1, OPTAB_LIB_WIDEN);
4055   if (temp != result)
4056     emit_move_insn (result, temp);
4057 
4058   label = gen_label_rtx ();
4059   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
4060 
4061   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
4062 		       0, OPTAB_LIB_WIDEN);
4063 
4064   mask = wi::mask (logd, true, prec);
4065   temp = expand_binop (mode, ior_optab, temp,
4066 		       immed_wide_int_const (mask, mode),
4067 		       result, 1, OPTAB_LIB_WIDEN);
4068   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
4069 		       0, OPTAB_LIB_WIDEN);
4070   if (temp != result)
4071     emit_move_insn (result, temp);
4072   emit_label (label);
4073   return result;
4074 }
4075 
4076 /* Expand signed division of OP0 by a power of two D in mode MODE.
4077    This routine is only called for positive values of D.  */
4078 
4079 static rtx
expand_sdiv_pow2(scalar_int_mode mode,rtx op0,HOST_WIDE_INT d)4080 expand_sdiv_pow2 (scalar_int_mode mode, rtx op0, HOST_WIDE_INT d)
4081 {
4082   rtx temp;
4083   rtx_code_label *label;
4084   int logd;
4085 
4086   logd = floor_log2 (d);
4087 
4088   if (d == 2
4089       && BRANCH_COST (optimize_insn_for_speed_p (),
4090 		      false) >= 1)
4091     {
4092       temp = gen_reg_rtx (mode);
4093       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
4094       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4095 			   0, OPTAB_LIB_WIDEN);
4096       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4097     }
4098 
4099   if (HAVE_conditional_move
4100       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
4101     {
4102       rtx temp2;
4103 
4104       start_sequence ();
4105       temp2 = copy_to_mode_reg (mode, op0);
4106       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
4107 			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
4108       temp = force_reg (mode, temp);
4109 
4110       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
4111       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
4112 				     mode, temp, temp2, mode, 0);
4113       if (temp2)
4114 	{
4115 	  rtx_insn *seq = get_insns ();
4116 	  end_sequence ();
4117 	  emit_insn (seq);
4118 	  return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
4119 	}
4120       end_sequence ();
4121     }
4122 
4123   if (BRANCH_COST (optimize_insn_for_speed_p (),
4124 		   false) >= 2)
4125     {
4126       int ushift = GET_MODE_BITSIZE (mode) - logd;
4127 
4128       temp = gen_reg_rtx (mode);
4129       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
4130       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
4131 	  || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
4132 	     > COSTS_N_INSNS (1))
4133 	temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
4134 			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
4135       else
4136 	temp = expand_shift (RSHIFT_EXPR, mode, temp,
4137 			     ushift, NULL_RTX, 1);
4138       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
4139 			   0, OPTAB_LIB_WIDEN);
4140       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4141     }
4142 
4143   label = gen_label_rtx ();
4144   temp = copy_to_mode_reg (mode, op0);
4145   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
4146   expand_inc (temp, gen_int_mode (d - 1, mode));
4147   emit_label (label);
4148   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
4149 }
4150 
4151 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
4152    if that is convenient, and returning where the result is.
4153    You may request either the quotient or the remainder as the result;
4154    specify REM_FLAG nonzero to get the remainder.
4155 
4156    CODE is the expression code for which kind of division this is;
4157    it controls how rounding is done.  MODE is the machine mode to use.
4158    UNSIGNEDP nonzero means do unsigned division.  */
4159 
4160 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
4161    and then correct it by or'ing in missing high bits
4162    if result of ANDI is nonzero.
4163    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4164    This could optimize to a bfexts instruction.
4165    But C doesn't use these operations, so their optimizations are
4166    left for later.  */
4167 /* ??? For modulo, we don't actually need the highpart of the first product,
4168    the low part will do nicely.  And for small divisors, the second multiply
4169    can also be a low-part only multiply or even be completely left out.
4170    E.g. to calculate the remainder of a division by 3 with a 32 bit
4171    multiply, multiply with 0x55555556 and extract the upper two bits;
4172    the result is exact for inputs up to 0x1fffffff.
4173    The input range can be reduced by using cross-sum rules.
4174    For odd divisors >= 3, the following table gives right shift counts
4175    so that if a number is shifted by an integer multiple of the given
4176    amount, the remainder stays the same:
4177    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4178    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4179    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4180    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4181    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4182 
4183    Cross-sum rules for even numbers can be derived by leaving as many bits
4184    to the right alone as the divisor has zeros to the right.
4185    E.g. if x is an unsigned 32 bit number:
4186    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4187    */
4188 
4189 rtx
expand_divmod(int rem_flag,enum tree_code code,machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)4190 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4191 	       rtx op0, rtx op1, rtx target, int unsignedp)
4192 {
4193   machine_mode compute_mode;
4194   rtx tquotient;
4195   rtx quotient = 0, remainder = 0;
4196   rtx_insn *last;
4197   rtx_insn *insn;
4198   optab optab1, optab2;
4199   int op1_is_constant, op1_is_pow2 = 0;
4200   int max_cost, extra_cost;
4201   static HOST_WIDE_INT last_div_const = 0;
4202   bool speed = optimize_insn_for_speed_p ();
4203 
4204   op1_is_constant = CONST_INT_P (op1);
4205   if (op1_is_constant)
4206     {
4207       wide_int ext_op1 = rtx_mode_t (op1, mode);
4208       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4209 		     || (! unsignedp
4210 			 && wi::popcount (wi::neg (ext_op1)) == 1));
4211     }
4212 
4213   /*
4214      This is the structure of expand_divmod:
4215 
4216      First comes code to fix up the operands so we can perform the operations
4217      correctly and efficiently.
4218 
4219      Second comes a switch statement with code specific for each rounding mode.
4220      For some special operands this code emits all RTL for the desired
4221      operation, for other cases, it generates only a quotient and stores it in
4222      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4223      to indicate that it has not done anything.
4224 
4225      Last comes code that finishes the operation.  If QUOTIENT is set and
4226      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4227      QUOTIENT is not set, it is computed using trunc rounding.
4228 
4229      We try to generate special code for division and remainder when OP1 is a
4230      constant.  If |OP1| = 2**n we can use shifts and some other fast
4231      operations.  For other values of OP1, we compute a carefully selected
4232      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4233      by m.
4234 
4235      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4236      half of the product.  Different strategies for generating the product are
4237      implemented in expmed_mult_highpart.
4238 
4239      If what we actually want is the remainder, we generate that by another
4240      by-constant multiplication and a subtraction.  */
4241 
4242   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4243      code below will malfunction if we are, so check here and handle
4244      the special case if so.  */
4245   if (op1 == const1_rtx)
4246     return rem_flag ? const0_rtx : op0;
4247 
4248     /* When dividing by -1, we could get an overflow.
4249      negv_optab can handle overflows.  */
4250   if (! unsignedp && op1 == constm1_rtx)
4251     {
4252       if (rem_flag)
4253 	return const0_rtx;
4254       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4255 			  ? negv_optab : neg_optab, op0, target, 0);
4256     }
4257 
4258   if (target
4259       /* Don't use the function value register as a target
4260 	 since we have to read it as well as write it,
4261 	 and function-inlining gets confused by this.  */
4262       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4263 	  /* Don't clobber an operand while doing a multi-step calculation.  */
4264 	  || ((rem_flag || op1_is_constant)
4265 	      && (reg_mentioned_p (target, op0)
4266 		  || (MEM_P (op0) && MEM_P (target))))
4267 	  || reg_mentioned_p (target, op1)
4268 	  || (MEM_P (op1) && MEM_P (target))))
4269     target = 0;
4270 
4271   /* Get the mode in which to perform this computation.  Normally it will
4272      be MODE, but sometimes we can't do the desired operation in MODE.
4273      If so, pick a wider mode in which we can do the operation.  Convert
4274      to that mode at the start to avoid repeated conversions.
4275 
4276      First see what operations we need.  These depend on the expression
4277      we are evaluating.  (We assume that divxx3 insns exist under the
4278      same conditions that modxx3 insns and that these insns don't normally
4279      fail.  If these assumptions are not correct, we may generate less
4280      efficient code in some cases.)
4281 
4282      Then see if we find a mode in which we can open-code that operation
4283      (either a division, modulus, or shift).  Finally, check for the smallest
4284      mode for which we can do the operation with a library call.  */
4285 
4286   /* We might want to refine this now that we have division-by-constant
4287      optimization.  Since expmed_mult_highpart tries so many variants, it is
4288      not straightforward to generalize this.  Maybe we should make an array
4289      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4290 
4291   optab1 = (op1_is_pow2
4292 	    ? (unsignedp ? lshr_optab : ashr_optab)
4293 	    : (unsignedp ? udiv_optab : sdiv_optab));
4294   optab2 = (op1_is_pow2 ? optab1
4295 	    : (unsignedp ? udivmod_optab : sdivmod_optab));
4296 
4297   FOR_EACH_MODE_FROM (compute_mode, mode)
4298     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4299 	|| optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4300       break;
4301 
4302   if (compute_mode == VOIDmode)
4303     FOR_EACH_MODE_FROM (compute_mode, mode)
4304       if (optab_libfunc (optab1, compute_mode)
4305 	  || optab_libfunc (optab2, compute_mode))
4306 	break;
4307 
4308   /* If we still couldn't find a mode, use MODE, but expand_binop will
4309      probably die.  */
4310   if (compute_mode == VOIDmode)
4311     compute_mode = mode;
4312 
4313   if (target && GET_MODE (target) == compute_mode)
4314     tquotient = target;
4315   else
4316     tquotient = gen_reg_rtx (compute_mode);
4317 
4318 #if 0
4319   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4320      (mode), and thereby get better code when OP1 is a constant.  Do that
4321      later.  It will require going over all usages of SIZE below.  */
4322   size = GET_MODE_BITSIZE (mode);
4323 #endif
4324 
4325   /* Only deduct something for a REM if the last divide done was
4326      for a different constant.   Then set the constant of the last
4327      divide.  */
4328   max_cost = (unsignedp
4329 	      ? udiv_cost (speed, compute_mode)
4330 	      : sdiv_cost (speed, compute_mode));
4331   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4332 		     && INTVAL (op1) == last_div_const))
4333     max_cost -= (mul_cost (speed, compute_mode)
4334 		 + add_cost (speed, compute_mode));
4335 
4336   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4337 
4338   /* Now convert to the best mode to use.  */
4339   if (compute_mode != mode)
4340     {
4341       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4342       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4343 
4344       /* convert_modes may have placed op1 into a register, so we
4345 	 must recompute the following.  */
4346       op1_is_constant = CONST_INT_P (op1);
4347       if (op1_is_constant)
4348 	{
4349 	  wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4350 	  op1_is_pow2 = (wi::popcount (ext_op1) == 1
4351 			 || (! unsignedp
4352 			     && wi::popcount (wi::neg (ext_op1)) == 1));
4353 	}
4354       else
4355 	op1_is_pow2 = 0;
4356     }
4357 
4358   /* If one of the operands is a volatile MEM, copy it into a register.  */
4359 
4360   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4361     op0 = force_reg (compute_mode, op0);
4362   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4363     op1 = force_reg (compute_mode, op1);
4364 
4365   /* If we need the remainder or if OP1 is constant, we need to
4366      put OP0 in a register in case it has any queued subexpressions.  */
4367   if (rem_flag || op1_is_constant)
4368     op0 = force_reg (compute_mode, op0);
4369 
4370   last = get_last_insn ();
4371 
4372   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4373   if (unsignedp)
4374     {
4375       if (code == FLOOR_DIV_EXPR)
4376 	code = TRUNC_DIV_EXPR;
4377       if (code == FLOOR_MOD_EXPR)
4378 	code = TRUNC_MOD_EXPR;
4379       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4380 	code = TRUNC_DIV_EXPR;
4381     }
4382 
4383   if (op1 != const0_rtx)
4384     switch (code)
4385       {
4386       case TRUNC_MOD_EXPR:
4387       case TRUNC_DIV_EXPR:
4388 	if (op1_is_constant)
4389 	  {
4390 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4391 	    int size = GET_MODE_BITSIZE (int_mode);
4392 	    if (unsignedp)
4393 	      {
4394 		unsigned HOST_WIDE_INT mh, ml;
4395 		int pre_shift, post_shift;
4396 		int dummy;
4397 		wide_int wd = rtx_mode_t (op1, int_mode);
4398 		unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4399 
4400 		if (wi::popcount (wd) == 1)
4401 		  {
4402 		    pre_shift = floor_log2 (d);
4403 		    if (rem_flag)
4404 		      {
4405 			unsigned HOST_WIDE_INT mask
4406 			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4407 			remainder
4408 			  = expand_binop (int_mode, and_optab, op0,
4409 					  gen_int_mode (mask, int_mode),
4410 					  remainder, 1,
4411 					  OPTAB_LIB_WIDEN);
4412 			if (remainder)
4413 			  return gen_lowpart (mode, remainder);
4414 		      }
4415 		    quotient = expand_shift (RSHIFT_EXPR, int_mode, op0,
4416 					     pre_shift, tquotient, 1);
4417 		  }
4418 		else if (size <= HOST_BITS_PER_WIDE_INT)
4419 		  {
4420 		    if (d >= (HOST_WIDE_INT_1U << (size - 1)))
4421 		      {
4422 			/* Most significant bit of divisor is set; emit an scc
4423 			   insn.  */
4424 			quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4425 							  int_mode, 1, 1);
4426 		      }
4427 		    else
4428 		      {
4429 			/* Find a suitable multiplier and right shift count
4430 			   instead of multiplying with D.  */
4431 
4432 			mh = choose_multiplier (d, size, size,
4433 						&ml, &post_shift, &dummy);
4434 
4435 			/* If the suggested multiplier is more than SIZE bits,
4436 			   we can do better for even divisors, using an
4437 			   initial right shift.  */
4438 			if (mh != 0 && (d & 1) == 0)
4439 			  {
4440 			    pre_shift = ctz_or_zero (d);
4441 			    mh = choose_multiplier (d >> pre_shift, size,
4442 						    size - pre_shift,
4443 						    &ml, &post_shift, &dummy);
4444 			    gcc_assert (!mh);
4445 			  }
4446 			else
4447 			  pre_shift = 0;
4448 
4449 			if (mh != 0)
4450 			  {
4451 			    rtx t1, t2, t3, t4;
4452 
4453 			    if (post_shift - 1 >= BITS_PER_WORD)
4454 			      goto fail1;
4455 
4456 			    extra_cost
4457 			      = (shift_cost (speed, int_mode, post_shift - 1)
4458 				 + shift_cost (speed, int_mode, 1)
4459 				 + 2 * add_cost (speed, int_mode));
4460 			    t1 = expmed_mult_highpart
4461 			      (int_mode, op0, gen_int_mode (ml, int_mode),
4462 			       NULL_RTX, 1, max_cost - extra_cost);
4463 			    if (t1 == 0)
4464 			      goto fail1;
4465 			    t2 = force_operand (gen_rtx_MINUS (int_mode,
4466 							       op0, t1),
4467 						NULL_RTX);
4468 			    t3 = expand_shift (RSHIFT_EXPR, int_mode,
4469 					       t2, 1, NULL_RTX, 1);
4470 			    t4 = force_operand (gen_rtx_PLUS (int_mode,
4471 							      t1, t3),
4472 						NULL_RTX);
4473 			    quotient = expand_shift
4474 			      (RSHIFT_EXPR, int_mode, t4,
4475 			       post_shift - 1, tquotient, 1);
4476 			  }
4477 			else
4478 			  {
4479 			    rtx t1, t2;
4480 
4481 			    if (pre_shift >= BITS_PER_WORD
4482 				|| post_shift >= BITS_PER_WORD)
4483 			      goto fail1;
4484 
4485 			    t1 = expand_shift
4486 			      (RSHIFT_EXPR, int_mode, op0,
4487 			       pre_shift, NULL_RTX, 1);
4488 			    extra_cost
4489 			      = (shift_cost (speed, int_mode, pre_shift)
4490 				 + shift_cost (speed, int_mode, post_shift));
4491 			    t2 = expmed_mult_highpart
4492 			      (int_mode, t1,
4493 			       gen_int_mode (ml, int_mode),
4494 			       NULL_RTX, 1, max_cost - extra_cost);
4495 			    if (t2 == 0)
4496 			      goto fail1;
4497 			    quotient = expand_shift
4498 			      (RSHIFT_EXPR, int_mode, t2,
4499 			       post_shift, tquotient, 1);
4500 			  }
4501 		      }
4502 		  }
4503 		else		/* Too wide mode to use tricky code */
4504 		  break;
4505 
4506 		insn = get_last_insn ();
4507 		if (insn != last)
4508 		  set_dst_reg_note (insn, REG_EQUAL,
4509 				    gen_rtx_UDIV (int_mode, op0, op1),
4510 				    quotient);
4511 	      }
4512 	    else		/* TRUNC_DIV, signed */
4513 	      {
4514 		unsigned HOST_WIDE_INT ml;
4515 		int lgup, post_shift;
4516 		rtx mlr;
4517 		HOST_WIDE_INT d = INTVAL (op1);
4518 		unsigned HOST_WIDE_INT abs_d;
4519 
4520 		/* Not prepared to handle division/remainder by
4521 		   0xffffffffffffffff8000000000000000 etc.  */
4522 		if (d == HOST_WIDE_INT_MIN && size > HOST_BITS_PER_WIDE_INT)
4523 		  break;
4524 
4525 		/* Since d might be INT_MIN, we have to cast to
4526 		   unsigned HOST_WIDE_INT before negating to avoid
4527 		   undefined signed overflow.  */
4528 		abs_d = (d >= 0
4529 			 ? (unsigned HOST_WIDE_INT) d
4530 			 : - (unsigned HOST_WIDE_INT) d);
4531 
4532 		/* n rem d = n rem -d */
4533 		if (rem_flag && d < 0)
4534 		  {
4535 		    d = abs_d;
4536 		    op1 = gen_int_mode (abs_d, int_mode);
4537 		  }
4538 
4539 		if (d == 1)
4540 		  quotient = op0;
4541 		else if (d == -1)
4542 		  quotient = expand_unop (int_mode, neg_optab, op0,
4543 					  tquotient, 0);
4544 		else if (size <= HOST_BITS_PER_WIDE_INT
4545 			 && abs_d == HOST_WIDE_INT_1U << (size - 1))
4546 		  {
4547 		    /* This case is not handled correctly below.  */
4548 		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4549 						int_mode, 1, 1);
4550 		    if (quotient == 0)
4551 		      goto fail1;
4552 		  }
4553 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4554 			 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4555 			 && (rem_flag
4556 			     ? smod_pow2_cheap (speed, int_mode)
4557 			     : sdiv_pow2_cheap (speed, int_mode))
4558 			 /* We assume that cheap metric is true if the
4559 			    optab has an expander for this mode.  */
4560 			 && ((optab_handler ((rem_flag ? smod_optab
4561 					      : sdiv_optab),
4562 					     int_mode)
4563 			      != CODE_FOR_nothing)
4564 			     || (optab_handler (sdivmod_optab, int_mode)
4565 				 != CODE_FOR_nothing)))
4566 		  ;
4567 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4568 		  {
4569 		    if (rem_flag)
4570 		      {
4571 			remainder = expand_smod_pow2 (int_mode, op0, d);
4572 			if (remainder)
4573 			  return gen_lowpart (mode, remainder);
4574 		      }
4575 
4576 		    if (sdiv_pow2_cheap (speed, int_mode)
4577 			&& ((optab_handler (sdiv_optab, int_mode)
4578 			     != CODE_FOR_nothing)
4579 			    || (optab_handler (sdivmod_optab, int_mode)
4580 				!= CODE_FOR_nothing)))
4581 		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4582 						int_mode, op0,
4583 						gen_int_mode (abs_d,
4584 							      int_mode),
4585 						NULL_RTX, 0);
4586 		    else
4587 		      quotient = expand_sdiv_pow2 (int_mode, op0, abs_d);
4588 
4589 		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4590 		       negate the quotient.  */
4591 		    if (d < 0)
4592 		      {
4593 			insn = get_last_insn ();
4594 			if (insn != last
4595 			    && abs_d < (HOST_WIDE_INT_1U
4596 					<< (HOST_BITS_PER_WIDE_INT - 1)))
4597 			  set_dst_reg_note (insn, REG_EQUAL,
4598 					    gen_rtx_DIV (int_mode, op0,
4599 							 gen_int_mode
4600 							   (abs_d,
4601 							    int_mode)),
4602 					    quotient);
4603 
4604 			quotient = expand_unop (int_mode, neg_optab,
4605 						quotient, quotient, 0);
4606 		      }
4607 		  }
4608 		else if (size <= HOST_BITS_PER_WIDE_INT)
4609 		  {
4610 		    choose_multiplier (abs_d, size, size - 1,
4611 				       &ml, &post_shift, &lgup);
4612 		    if (ml < HOST_WIDE_INT_1U << (size - 1))
4613 		      {
4614 			rtx t1, t2, t3;
4615 
4616 			if (post_shift >= BITS_PER_WORD
4617 			    || size - 1 >= BITS_PER_WORD)
4618 			  goto fail1;
4619 
4620 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4621 				      + shift_cost (speed, int_mode, size - 1)
4622 				      + add_cost (speed, int_mode));
4623 			t1 = expmed_mult_highpart
4624 			  (int_mode, op0, gen_int_mode (ml, int_mode),
4625 			   NULL_RTX, 0, max_cost - extra_cost);
4626 			if (t1 == 0)
4627 			  goto fail1;
4628 			t2 = expand_shift
4629 			  (RSHIFT_EXPR, int_mode, t1,
4630 			   post_shift, NULL_RTX, 0);
4631 			t3 = expand_shift
4632 			  (RSHIFT_EXPR, int_mode, op0,
4633 			   size - 1, NULL_RTX, 0);
4634 			if (d < 0)
4635 			  quotient
4636 			    = force_operand (gen_rtx_MINUS (int_mode, t3, t2),
4637 					     tquotient);
4638 			else
4639 			  quotient
4640 			    = force_operand (gen_rtx_MINUS (int_mode, t2, t3),
4641 					     tquotient);
4642 		      }
4643 		    else
4644 		      {
4645 			rtx t1, t2, t3, t4;
4646 
4647 			if (post_shift >= BITS_PER_WORD
4648 			    || size - 1 >= BITS_PER_WORD)
4649 			  goto fail1;
4650 
4651 			ml |= HOST_WIDE_INT_M1U << (size - 1);
4652 			mlr = gen_int_mode (ml, int_mode);
4653 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4654 				      + shift_cost (speed, int_mode, size - 1)
4655 				      + 2 * add_cost (speed, int_mode));
4656 			t1 = expmed_mult_highpart (int_mode, op0, mlr,
4657 						   NULL_RTX, 0,
4658 						   max_cost - extra_cost);
4659 			if (t1 == 0)
4660 			  goto fail1;
4661 			t2 = force_operand (gen_rtx_PLUS (int_mode, t1, op0),
4662 					    NULL_RTX);
4663 			t3 = expand_shift
4664 			  (RSHIFT_EXPR, int_mode, t2,
4665 			   post_shift, NULL_RTX, 0);
4666 			t4 = expand_shift
4667 			  (RSHIFT_EXPR, int_mode, op0,
4668 			   size - 1, NULL_RTX, 0);
4669 			if (d < 0)
4670 			  quotient
4671 			    = force_operand (gen_rtx_MINUS (int_mode, t4, t3),
4672 					     tquotient);
4673 			else
4674 			  quotient
4675 			    = force_operand (gen_rtx_MINUS (int_mode, t3, t4),
4676 					     tquotient);
4677 		      }
4678 		  }
4679 		else		/* Too wide mode to use tricky code */
4680 		  break;
4681 
4682 		insn = get_last_insn ();
4683 		if (insn != last)
4684 		  set_dst_reg_note (insn, REG_EQUAL,
4685 				    gen_rtx_DIV (int_mode, op0, op1),
4686 				    quotient);
4687 	      }
4688 	    break;
4689 	  }
4690       fail1:
4691 	delete_insns_since (last);
4692 	break;
4693 
4694       case FLOOR_DIV_EXPR:
4695       case FLOOR_MOD_EXPR:
4696       /* We will come here only for signed operations.  */
4697 	if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
4698 	  {
4699 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
4700 	    int size = GET_MODE_BITSIZE (int_mode);
4701 	    unsigned HOST_WIDE_INT mh, ml;
4702 	    int pre_shift, lgup, post_shift;
4703 	    HOST_WIDE_INT d = INTVAL (op1);
4704 
4705 	    if (d > 0)
4706 	      {
4707 		/* We could just as easily deal with negative constants here,
4708 		   but it does not seem worth the trouble for GCC 2.6.  */
4709 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4710 		  {
4711 		    pre_shift = floor_log2 (d);
4712 		    if (rem_flag)
4713 		      {
4714 			unsigned HOST_WIDE_INT mask
4715 			  = (HOST_WIDE_INT_1U << pre_shift) - 1;
4716 			remainder = expand_binop
4717 			  (int_mode, and_optab, op0,
4718 			   gen_int_mode (mask, int_mode),
4719 			   remainder, 0, OPTAB_LIB_WIDEN);
4720 			if (remainder)
4721 			  return gen_lowpart (mode, remainder);
4722 		      }
4723 		    quotient = expand_shift
4724 		      (RSHIFT_EXPR, int_mode, op0,
4725 		       pre_shift, tquotient, 0);
4726 		  }
4727 		else
4728 		  {
4729 		    rtx t1, t2, t3, t4;
4730 
4731 		    mh = choose_multiplier (d, size, size - 1,
4732 					    &ml, &post_shift, &lgup);
4733 		    gcc_assert (!mh);
4734 
4735 		    if (post_shift < BITS_PER_WORD
4736 			&& size - 1 < BITS_PER_WORD)
4737 		      {
4738 			t1 = expand_shift
4739 			  (RSHIFT_EXPR, int_mode, op0,
4740 			   size - 1, NULL_RTX, 0);
4741 			t2 = expand_binop (int_mode, xor_optab, op0, t1,
4742 					   NULL_RTX, 0, OPTAB_WIDEN);
4743 			extra_cost = (shift_cost (speed, int_mode, post_shift)
4744 				      + shift_cost (speed, int_mode, size - 1)
4745 				      + 2 * add_cost (speed, int_mode));
4746 			t3 = expmed_mult_highpart
4747 			  (int_mode, t2, gen_int_mode (ml, int_mode),
4748 			   NULL_RTX, 1, max_cost - extra_cost);
4749 			if (t3 != 0)
4750 			  {
4751 			    t4 = expand_shift
4752 			      (RSHIFT_EXPR, int_mode, t3,
4753 			       post_shift, NULL_RTX, 1);
4754 			    quotient = expand_binop (int_mode, xor_optab,
4755 						     t4, t1, tquotient, 0,
4756 						     OPTAB_WIDEN);
4757 			  }
4758 		      }
4759 		  }
4760 	      }
4761 	    else
4762 	      {
4763 		rtx nsign, t1, t2, t3, t4;
4764 		t1 = force_operand (gen_rtx_PLUS (int_mode,
4765 						  op0, constm1_rtx), NULL_RTX);
4766 		t2 = expand_binop (int_mode, ior_optab, op0, t1, NULL_RTX,
4767 				   0, OPTAB_WIDEN);
4768 		nsign = expand_shift (RSHIFT_EXPR, int_mode, t2,
4769 				      size - 1, NULL_RTX, 0);
4770 		t3 = force_operand (gen_rtx_MINUS (int_mode, t1, nsign),
4771 				    NULL_RTX);
4772 		t4 = expand_divmod (0, TRUNC_DIV_EXPR, int_mode, t3, op1,
4773 				    NULL_RTX, 0);
4774 		if (t4)
4775 		  {
4776 		    rtx t5;
4777 		    t5 = expand_unop (int_mode, one_cmpl_optab, nsign,
4778 				      NULL_RTX, 0);
4779 		    quotient = force_operand (gen_rtx_PLUS (int_mode, t4, t5),
4780 					      tquotient);
4781 		  }
4782 	      }
4783 	  }
4784 
4785 	if (quotient != 0)
4786 	  break;
4787 	delete_insns_since (last);
4788 
4789 	/* Try using an instruction that produces both the quotient and
4790 	   remainder, using truncation.  We can easily compensate the quotient
4791 	   or remainder to get floor rounding, once we have the remainder.
4792 	   Notice that we compute also the final remainder value here,
4793 	   and return the result right away.  */
4794 	if (target == 0 || GET_MODE (target) != compute_mode)
4795 	  target = gen_reg_rtx (compute_mode);
4796 
4797 	if (rem_flag)
4798 	  {
4799 	    remainder
4800 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4801 	    quotient = gen_reg_rtx (compute_mode);
4802 	  }
4803 	else
4804 	  {
4805 	    quotient
4806 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4807 	    remainder = gen_reg_rtx (compute_mode);
4808 	  }
4809 
4810 	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4811 				 quotient, remainder, 0))
4812 	  {
4813 	    /* This could be computed with a branch-less sequence.
4814 	       Save that for later.  */
4815 	    rtx tem;
4816 	    rtx_code_label *label = gen_label_rtx ();
4817 	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4818 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4819 				NULL_RTX, 0, OPTAB_WIDEN);
4820 	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4821 	    expand_dec (quotient, const1_rtx);
4822 	    expand_inc (remainder, op1);
4823 	    emit_label (label);
4824 	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4825 	  }
4826 
4827 	/* No luck with division elimination or divmod.  Have to do it
4828 	   by conditionally adjusting op0 *and* the result.  */
4829 	{
4830 	  rtx_code_label *label1, *label2, *label3, *label4, *label5;
4831 	  rtx adjusted_op0;
4832 	  rtx tem;
4833 
4834 	  quotient = gen_reg_rtx (compute_mode);
4835 	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4836 	  label1 = gen_label_rtx ();
4837 	  label2 = gen_label_rtx ();
4838 	  label3 = gen_label_rtx ();
4839 	  label4 = gen_label_rtx ();
4840 	  label5 = gen_label_rtx ();
4841 	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4842 	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4843 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4844 			      quotient, 0, OPTAB_LIB_WIDEN);
4845 	  if (tem != quotient)
4846 	    emit_move_insn (quotient, tem);
4847 	  emit_jump_insn (targetm.gen_jump (label5));
4848 	  emit_barrier ();
4849 	  emit_label (label1);
4850 	  expand_inc (adjusted_op0, const1_rtx);
4851 	  emit_jump_insn (targetm.gen_jump (label4));
4852 	  emit_barrier ();
4853 	  emit_label (label2);
4854 	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4855 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4856 			      quotient, 0, OPTAB_LIB_WIDEN);
4857 	  if (tem != quotient)
4858 	    emit_move_insn (quotient, tem);
4859 	  emit_jump_insn (targetm.gen_jump (label5));
4860 	  emit_barrier ();
4861 	  emit_label (label3);
4862 	  expand_dec (adjusted_op0, const1_rtx);
4863 	  emit_label (label4);
4864 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4865 			      quotient, 0, OPTAB_LIB_WIDEN);
4866 	  if (tem != quotient)
4867 	    emit_move_insn (quotient, tem);
4868 	  expand_dec (quotient, const1_rtx);
4869 	  emit_label (label5);
4870 	}
4871 	break;
4872 
4873       case CEIL_DIV_EXPR:
4874       case CEIL_MOD_EXPR:
4875 	if (unsignedp)
4876 	  {
4877 	    if (op1_is_constant
4878 		&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4879 		&& (HWI_COMPUTABLE_MODE_P (compute_mode)
4880 		    || INTVAL (op1) >= 0))
4881 	      {
4882 		scalar_int_mode int_mode
4883 		  = as_a <scalar_int_mode> (compute_mode);
4884 		rtx t1, t2, t3;
4885 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4886 		t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
4887 				   floor_log2 (d), tquotient, 1);
4888 		t2 = expand_binop (int_mode, and_optab, op0,
4889 				   gen_int_mode (d - 1, int_mode),
4890 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4891 		t3 = gen_reg_rtx (int_mode);
4892 		t3 = emit_store_flag (t3, NE, t2, const0_rtx, int_mode, 1, 1);
4893 		if (t3 == 0)
4894 		  {
4895 		    rtx_code_label *lab;
4896 		    lab = gen_label_rtx ();
4897 		    do_cmp_and_jump (t2, const0_rtx, EQ, int_mode, lab);
4898 		    expand_inc (t1, const1_rtx);
4899 		    emit_label (lab);
4900 		    quotient = t1;
4901 		  }
4902 		else
4903 		  quotient = force_operand (gen_rtx_PLUS (int_mode, t1, t3),
4904 					    tquotient);
4905 		break;
4906 	      }
4907 
4908 	    /* Try using an instruction that produces both the quotient and
4909 	       remainder, using truncation.  We can easily compensate the
4910 	       quotient or remainder to get ceiling rounding, once we have the
4911 	       remainder.  Notice that we compute also the final remainder
4912 	       value here, and return the result right away.  */
4913 	    if (target == 0 || GET_MODE (target) != compute_mode)
4914 	      target = gen_reg_rtx (compute_mode);
4915 
4916 	    if (rem_flag)
4917 	      {
4918 		remainder = (REG_P (target)
4919 			     ? target : gen_reg_rtx (compute_mode));
4920 		quotient = gen_reg_rtx (compute_mode);
4921 	      }
4922 	    else
4923 	      {
4924 		quotient = (REG_P (target)
4925 			    ? target : gen_reg_rtx (compute_mode));
4926 		remainder = gen_reg_rtx (compute_mode);
4927 	      }
4928 
4929 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4930 				     remainder, 1))
4931 	      {
4932 		/* This could be computed with a branch-less sequence.
4933 		   Save that for later.  */
4934 		rtx_code_label *label = gen_label_rtx ();
4935 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4936 				 compute_mode, label);
4937 		expand_inc (quotient, const1_rtx);
4938 		expand_dec (remainder, op1);
4939 		emit_label (label);
4940 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4941 	      }
4942 
4943 	    /* No luck with division elimination or divmod.  Have to do it
4944 	       by conditionally adjusting op0 *and* the result.  */
4945 	    {
4946 	      rtx_code_label *label1, *label2;
4947 	      rtx adjusted_op0, tem;
4948 
4949 	      quotient = gen_reg_rtx (compute_mode);
4950 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4951 	      label1 = gen_label_rtx ();
4952 	      label2 = gen_label_rtx ();
4953 	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4954 			       compute_mode, label1);
4955 	      emit_move_insn  (quotient, const0_rtx);
4956 	      emit_jump_insn (targetm.gen_jump (label2));
4957 	      emit_barrier ();
4958 	      emit_label (label1);
4959 	      expand_dec (adjusted_op0, const1_rtx);
4960 	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4961 				  quotient, 1, OPTAB_LIB_WIDEN);
4962 	      if (tem != quotient)
4963 		emit_move_insn (quotient, tem);
4964 	      expand_inc (quotient, const1_rtx);
4965 	      emit_label (label2);
4966 	    }
4967 	  }
4968 	else /* signed */
4969 	  {
4970 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4971 		&& INTVAL (op1) >= 0)
4972 	      {
4973 		/* This is extremely similar to the code for the unsigned case
4974 		   above.  For 2.7 we should merge these variants, but for
4975 		   2.6.1 I don't want to touch the code for unsigned since that
4976 		   get used in C.  The signed case will only be used by other
4977 		   languages (Ada).  */
4978 
4979 		rtx t1, t2, t3;
4980 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4981 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4982 				   floor_log2 (d), tquotient, 0);
4983 		t2 = expand_binop (compute_mode, and_optab, op0,
4984 				   gen_int_mode (d - 1, compute_mode),
4985 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4986 		t3 = gen_reg_rtx (compute_mode);
4987 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4988 				      compute_mode, 1, 1);
4989 		if (t3 == 0)
4990 		  {
4991 		    rtx_code_label *lab;
4992 		    lab = gen_label_rtx ();
4993 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4994 		    expand_inc (t1, const1_rtx);
4995 		    emit_label (lab);
4996 		    quotient = t1;
4997 		  }
4998 		else
4999 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
5000 							  t1, t3),
5001 					    tquotient);
5002 		break;
5003 	      }
5004 
5005 	    /* Try using an instruction that produces both the quotient and
5006 	       remainder, using truncation.  We can easily compensate the
5007 	       quotient or remainder to get ceiling rounding, once we have the
5008 	       remainder.  Notice that we compute also the final remainder
5009 	       value here, and return the result right away.  */
5010 	    if (target == 0 || GET_MODE (target) != compute_mode)
5011 	      target = gen_reg_rtx (compute_mode);
5012 	    if (rem_flag)
5013 	      {
5014 		remainder= (REG_P (target)
5015 			    ? target : gen_reg_rtx (compute_mode));
5016 		quotient = gen_reg_rtx (compute_mode);
5017 	      }
5018 	    else
5019 	      {
5020 		quotient = (REG_P (target)
5021 			    ? target : gen_reg_rtx (compute_mode));
5022 		remainder = gen_reg_rtx (compute_mode);
5023 	      }
5024 
5025 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
5026 				     remainder, 0))
5027 	      {
5028 		/* This could be computed with a branch-less sequence.
5029 		   Save that for later.  */
5030 		rtx tem;
5031 		rtx_code_label *label = gen_label_rtx ();
5032 		do_cmp_and_jump (remainder, const0_rtx, EQ,
5033 				 compute_mode, label);
5034 		tem = expand_binop (compute_mode, xor_optab, op0, op1,
5035 				    NULL_RTX, 0, OPTAB_WIDEN);
5036 		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
5037 		expand_inc (quotient, const1_rtx);
5038 		expand_dec (remainder, op1);
5039 		emit_label (label);
5040 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
5041 	      }
5042 
5043 	    /* No luck with division elimination or divmod.  Have to do it
5044 	       by conditionally adjusting op0 *and* the result.  */
5045 	    {
5046 	      rtx_code_label *label1, *label2, *label3, *label4, *label5;
5047 	      rtx adjusted_op0;
5048 	      rtx tem;
5049 
5050 	      quotient = gen_reg_rtx (compute_mode);
5051 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
5052 	      label1 = gen_label_rtx ();
5053 	      label2 = gen_label_rtx ();
5054 	      label3 = gen_label_rtx ();
5055 	      label4 = gen_label_rtx ();
5056 	      label5 = gen_label_rtx ();
5057 	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
5058 	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
5059 			       compute_mode, label1);
5060 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5061 				  quotient, 0, OPTAB_LIB_WIDEN);
5062 	      if (tem != quotient)
5063 		emit_move_insn (quotient, tem);
5064 	      emit_jump_insn (targetm.gen_jump (label5));
5065 	      emit_barrier ();
5066 	      emit_label (label1);
5067 	      expand_dec (adjusted_op0, const1_rtx);
5068 	      emit_jump_insn (targetm.gen_jump (label4));
5069 	      emit_barrier ();
5070 	      emit_label (label2);
5071 	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
5072 			       compute_mode, label3);
5073 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5074 				  quotient, 0, OPTAB_LIB_WIDEN);
5075 	      if (tem != quotient)
5076 		emit_move_insn (quotient, tem);
5077 	      emit_jump_insn (targetm.gen_jump (label5));
5078 	      emit_barrier ();
5079 	      emit_label (label3);
5080 	      expand_inc (adjusted_op0, const1_rtx);
5081 	      emit_label (label4);
5082 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
5083 				  quotient, 0, OPTAB_LIB_WIDEN);
5084 	      if (tem != quotient)
5085 		emit_move_insn (quotient, tem);
5086 	      expand_inc (quotient, const1_rtx);
5087 	      emit_label (label5);
5088 	    }
5089 	  }
5090 	break;
5091 
5092       case EXACT_DIV_EXPR:
5093 	if (op1_is_constant && HWI_COMPUTABLE_MODE_P (compute_mode))
5094 	  {
5095 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5096 	    int size = GET_MODE_BITSIZE (int_mode);
5097 	    HOST_WIDE_INT d = INTVAL (op1);
5098 	    unsigned HOST_WIDE_INT ml;
5099 	    int pre_shift;
5100 	    rtx t1;
5101 
5102 	    pre_shift = ctz_or_zero (d);
5103 	    ml = invert_mod2n (d >> pre_shift, size);
5104 	    t1 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5105 			       pre_shift, NULL_RTX, unsignedp);
5106 	    quotient = expand_mult (int_mode, t1, gen_int_mode (ml, int_mode),
5107 				    NULL_RTX, 1);
5108 
5109 	    insn = get_last_insn ();
5110 	    set_dst_reg_note (insn, REG_EQUAL,
5111 			      gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5112 					      int_mode, op0, op1),
5113 			      quotient);
5114 	  }
5115 	break;
5116 
5117       case ROUND_DIV_EXPR:
5118       case ROUND_MOD_EXPR:
5119 	if (unsignedp)
5120 	  {
5121 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5122 	    rtx tem;
5123 	    rtx_code_label *label;
5124 	    label = gen_label_rtx ();
5125 	    quotient = gen_reg_rtx (int_mode);
5126 	    remainder = gen_reg_rtx (int_mode);
5127 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
5128 	      {
5129 		rtx tem;
5130 		quotient = expand_binop (int_mode, udiv_optab, op0, op1,
5131 					 quotient, 1, OPTAB_LIB_WIDEN);
5132 		tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 1);
5133 		remainder = expand_binop (int_mode, sub_optab, op0, tem,
5134 					  remainder, 1, OPTAB_LIB_WIDEN);
5135 	      }
5136 	    tem = plus_constant (int_mode, op1, -1);
5137 	    tem = expand_shift (RSHIFT_EXPR, int_mode, tem, 1, NULL_RTX, 1);
5138 	    do_cmp_and_jump (remainder, tem, LEU, int_mode, label);
5139 	    expand_inc (quotient, const1_rtx);
5140 	    expand_dec (remainder, op1);
5141 	    emit_label (label);
5142 	  }
5143 	else
5144 	  {
5145 	    scalar_int_mode int_mode = as_a <scalar_int_mode> (compute_mode);
5146 	    int size = GET_MODE_BITSIZE (int_mode);
5147 	    rtx abs_rem, abs_op1, tem, mask;
5148 	    rtx_code_label *label;
5149 	    label = gen_label_rtx ();
5150 	    quotient = gen_reg_rtx (int_mode);
5151 	    remainder = gen_reg_rtx (int_mode);
5152 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
5153 	      {
5154 		rtx tem;
5155 		quotient = expand_binop (int_mode, sdiv_optab, op0, op1,
5156 					 quotient, 0, OPTAB_LIB_WIDEN);
5157 		tem = expand_mult (int_mode, quotient, op1, NULL_RTX, 0);
5158 		remainder = expand_binop (int_mode, sub_optab, op0, tem,
5159 					  remainder, 0, OPTAB_LIB_WIDEN);
5160 	      }
5161 	    abs_rem = expand_abs (int_mode, remainder, NULL_RTX, 1, 0);
5162 	    abs_op1 = expand_abs (int_mode, op1, NULL_RTX, 1, 0);
5163 	    tem = expand_shift (LSHIFT_EXPR, int_mode, abs_rem,
5164 				1, NULL_RTX, 1);
5165 	    do_cmp_and_jump (tem, abs_op1, LTU, int_mode, label);
5166 	    tem = expand_binop (int_mode, xor_optab, op0, op1,
5167 				NULL_RTX, 0, OPTAB_WIDEN);
5168 	    mask = expand_shift (RSHIFT_EXPR, int_mode, tem,
5169 				 size - 1, NULL_RTX, 0);
5170 	    tem = expand_binop (int_mode, xor_optab, mask, const1_rtx,
5171 				NULL_RTX, 0, OPTAB_WIDEN);
5172 	    tem = expand_binop (int_mode, sub_optab, tem, mask,
5173 				NULL_RTX, 0, OPTAB_WIDEN);
5174 	    expand_inc (quotient, tem);
5175 	    tem = expand_binop (int_mode, xor_optab, mask, op1,
5176 				NULL_RTX, 0, OPTAB_WIDEN);
5177 	    tem = expand_binop (int_mode, sub_optab, tem, mask,
5178 				NULL_RTX, 0, OPTAB_WIDEN);
5179 	    expand_dec (remainder, tem);
5180 	    emit_label (label);
5181 	  }
5182 	return gen_lowpart (mode, rem_flag ? remainder : quotient);
5183 
5184       default:
5185 	gcc_unreachable ();
5186       }
5187 
5188   if (quotient == 0)
5189     {
5190       if (target && GET_MODE (target) != compute_mode)
5191 	target = 0;
5192 
5193       if (rem_flag)
5194 	{
5195 	  /* Try to produce the remainder without producing the quotient.
5196 	     If we seem to have a divmod pattern that does not require widening,
5197 	     don't try widening here.  We should really have a WIDEN argument
5198 	     to expand_twoval_binop, since what we'd really like to do here is
5199 	     1) try a mod insn in compute_mode
5200 	     2) try a divmod insn in compute_mode
5201 	     3) try a div insn in compute_mode and multiply-subtract to get
5202 	        remainder
5203 	     4) try the same things with widening allowed.  */
5204 	  remainder
5205 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5206 				 op0, op1, target,
5207 				 unsignedp,
5208 				 ((optab_handler (optab2, compute_mode)
5209 				   != CODE_FOR_nothing)
5210 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
5211 	  if (remainder == 0)
5212 	    {
5213 	      /* No luck there.  Can we do remainder and divide at once
5214 		 without a library call?  */
5215 	      remainder = gen_reg_rtx (compute_mode);
5216 	      if (! expand_twoval_binop ((unsignedp
5217 					  ? udivmod_optab
5218 					  : sdivmod_optab),
5219 					 op0, op1,
5220 					 NULL_RTX, remainder, unsignedp))
5221 		remainder = 0;
5222 	    }
5223 
5224 	  if (remainder)
5225 	    return gen_lowpart (mode, remainder);
5226 	}
5227 
5228       /* Produce the quotient.  Try a quotient insn, but not a library call.
5229 	 If we have a divmod in this mode, use it in preference to widening
5230 	 the div (for this test we assume it will not fail). Note that optab2
5231 	 is set to the one of the two optabs that the call below will use.  */
5232       quotient
5233 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5234 			     op0, op1, rem_flag ? NULL_RTX : target,
5235 			     unsignedp,
5236 			     ((optab_handler (optab2, compute_mode)
5237 			       != CODE_FOR_nothing)
5238 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
5239 
5240       if (quotient == 0)
5241 	{
5242 	  /* No luck there.  Try a quotient-and-remainder insn,
5243 	     keeping the quotient alone.  */
5244 	  quotient = gen_reg_rtx (compute_mode);
5245 	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5246 				     op0, op1,
5247 				     quotient, NULL_RTX, unsignedp))
5248 	    {
5249 	      quotient = 0;
5250 	      if (! rem_flag)
5251 		/* Still no luck.  If we are not computing the remainder,
5252 		   use a library call for the quotient.  */
5253 		quotient = sign_expand_binop (compute_mode,
5254 					      udiv_optab, sdiv_optab,
5255 					      op0, op1, target,
5256 					      unsignedp, OPTAB_LIB_WIDEN);
5257 	    }
5258 	}
5259     }
5260 
5261   if (rem_flag)
5262     {
5263       if (target && GET_MODE (target) != compute_mode)
5264 	target = 0;
5265 
5266       if (quotient == 0)
5267 	{
5268 	  /* No divide instruction either.  Use library for remainder.  */
5269 	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5270 					 op0, op1, target,
5271 					 unsignedp, OPTAB_LIB_WIDEN);
5272 	  /* No remainder function.  Try a quotient-and-remainder
5273 	     function, keeping the remainder.  */
5274 	  if (!remainder)
5275 	    {
5276 	      remainder = gen_reg_rtx (compute_mode);
5277 	      if (!expand_twoval_binop_libfunc
5278 		  (unsignedp ? udivmod_optab : sdivmod_optab,
5279 		   op0, op1,
5280 		   NULL_RTX, remainder,
5281 		   unsignedp ? UMOD : MOD))
5282 		remainder = NULL_RTX;
5283 	    }
5284 	}
5285       else
5286 	{
5287 	  /* We divided.  Now finish doing X - Y * (X / Y).  */
5288 	  remainder = expand_mult (compute_mode, quotient, op1,
5289 				   NULL_RTX, unsignedp);
5290 	  remainder = expand_binop (compute_mode, sub_optab, op0,
5291 				    remainder, target, unsignedp,
5292 				    OPTAB_LIB_WIDEN);
5293 	}
5294     }
5295 
5296   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5297 }
5298 
5299 /* Return a tree node with data type TYPE, describing the value of X.
5300    Usually this is an VAR_DECL, if there is no obvious better choice.
5301    X may be an expression, however we only support those expressions
5302    generated by loop.c.  */
5303 
5304 tree
make_tree(tree type,rtx x)5305 make_tree (tree type, rtx x)
5306 {
5307   tree t;
5308 
5309   switch (GET_CODE (x))
5310     {
5311     case CONST_INT:
5312     case CONST_WIDE_INT:
5313       t = wide_int_to_tree (type, rtx_mode_t (x, TYPE_MODE (type)));
5314       return t;
5315 
5316     case CONST_DOUBLE:
5317       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5318       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5319 	t = wide_int_to_tree (type,
5320 			      wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5321 						    HOST_BITS_PER_WIDE_INT * 2));
5322       else
5323 	t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5324 
5325       return t;
5326 
5327     case CONST_VECTOR:
5328       {
5329 	unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
5330 	unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
5331 	tree itype = TREE_TYPE (type);
5332 
5333 	/* Build a tree with vector elements.  */
5334 	tree_vector_builder elts (type, npatterns, nelts_per_pattern);
5335 	unsigned int count = elts.encoded_nelts ();
5336 	for (unsigned int i = 0; i < count; ++i)
5337 	  {
5338 	    rtx elt = CONST_VECTOR_ELT (x, i);
5339 	    elts.quick_push (make_tree (itype, elt));
5340 	  }
5341 
5342 	return elts.build ();
5343       }
5344 
5345     case PLUS:
5346       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5347 			  make_tree (type, XEXP (x, 1)));
5348 
5349     case MINUS:
5350       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5351 			  make_tree (type, XEXP (x, 1)));
5352 
5353     case NEG:
5354       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5355 
5356     case MULT:
5357       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5358 			  make_tree (type, XEXP (x, 1)));
5359 
5360     case ASHIFT:
5361       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5362 			  make_tree (type, XEXP (x, 1)));
5363 
5364     case LSHIFTRT:
5365       t = unsigned_type_for (type);
5366       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5367 			    		 make_tree (t, XEXP (x, 0)),
5368 				    	 make_tree (type, XEXP (x, 1))));
5369 
5370     case ASHIFTRT:
5371       t = signed_type_for (type);
5372       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5373 					 make_tree (t, XEXP (x, 0)),
5374 				    	 make_tree (type, XEXP (x, 1))));
5375 
5376     case DIV:
5377       if (TREE_CODE (type) != REAL_TYPE)
5378 	t = signed_type_for (type);
5379       else
5380 	t = type;
5381 
5382       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5383 				    	 make_tree (t, XEXP (x, 0)),
5384 				    	 make_tree (t, XEXP (x, 1))));
5385     case UDIV:
5386       t = unsigned_type_for (type);
5387       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5388 				    	 make_tree (t, XEXP (x, 0)),
5389 				    	 make_tree (t, XEXP (x, 1))));
5390 
5391     case SIGN_EXTEND:
5392     case ZERO_EXTEND:
5393       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5394 					  GET_CODE (x) == ZERO_EXTEND);
5395       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5396 
5397     case CONST:
5398       return make_tree (type, XEXP (x, 0));
5399 
5400     case SYMBOL_REF:
5401       t = SYMBOL_REF_DECL (x);
5402       if (t)
5403 	return fold_convert (type, build_fold_addr_expr (t));
5404       /* fall through.  */
5405 
5406     default:
5407       if (CONST_POLY_INT_P (x))
5408 	return wide_int_to_tree (t, const_poly_int_value (x));
5409 
5410       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5411 
5412       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5413 	 address mode to pointer mode.  */
5414       if (POINTER_TYPE_P (type))
5415 	x = convert_memory_address_addr_space
5416 	  (SCALAR_INT_TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5417 
5418       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5419 	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5420       t->decl_with_rtl.rtl = x;
5421 
5422       return t;
5423     }
5424 }
5425 
5426 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5427    and returning TARGET.
5428 
5429    If TARGET is 0, a pseudo-register or constant is returned.  */
5430 
5431 rtx
expand_and(machine_mode mode,rtx op0,rtx op1,rtx target)5432 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5433 {
5434   rtx tem = 0;
5435 
5436   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5437     tem = simplify_binary_operation (AND, mode, op0, op1);
5438   if (tem == 0)
5439     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5440 
5441   if (target == 0)
5442     target = tem;
5443   else if (tem != target)
5444     emit_move_insn (target, tem);
5445   return target;
5446 }
5447 
5448 /* Helper function for emit_store_flag.  */
5449 rtx
emit_cstore(rtx target,enum insn_code icode,enum rtx_code code,machine_mode mode,machine_mode compare_mode,int unsignedp,rtx x,rtx y,int normalizep,machine_mode target_mode)5450 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5451 	     machine_mode mode, machine_mode compare_mode,
5452 	     int unsignedp, rtx x, rtx y, int normalizep,
5453 	     machine_mode target_mode)
5454 {
5455   struct expand_operand ops[4];
5456   rtx op0, comparison, subtarget;
5457   rtx_insn *last;
5458   scalar_int_mode result_mode = targetm.cstore_mode (icode);
5459   scalar_int_mode int_target_mode;
5460 
5461   last = get_last_insn ();
5462   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5463   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5464   if (!x || !y)
5465     {
5466       delete_insns_since (last);
5467       return NULL_RTX;
5468     }
5469 
5470   if (target_mode == VOIDmode)
5471     int_target_mode = result_mode;
5472   else
5473     int_target_mode = as_a <scalar_int_mode> (target_mode);
5474   if (!target)
5475     target = gen_reg_rtx (int_target_mode);
5476 
5477   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5478 
5479   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5480   create_fixed_operand (&ops[1], comparison);
5481   create_fixed_operand (&ops[2], x);
5482   create_fixed_operand (&ops[3], y);
5483   if (!maybe_expand_insn (icode, 4, ops))
5484     {
5485       delete_insns_since (last);
5486       return NULL_RTX;
5487     }
5488   subtarget = ops[0].value;
5489 
5490   /* If we are converting to a wider mode, first convert to
5491      INT_TARGET_MODE, then normalize.  This produces better combining
5492      opportunities on machines that have a SIGN_EXTRACT when we are
5493      testing a single bit.  This mostly benefits the 68k.
5494 
5495      If STORE_FLAG_VALUE does not have the sign bit set when
5496      interpreted in MODE, we can do this conversion as unsigned, which
5497      is usually more efficient.  */
5498   if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (result_mode))
5499     {
5500       convert_move (target, subtarget,
5501 		    val_signbit_known_clear_p (result_mode,
5502 					       STORE_FLAG_VALUE));
5503       op0 = target;
5504       result_mode = int_target_mode;
5505     }
5506   else
5507     op0 = subtarget;
5508 
5509   /* If we want to keep subexpressions around, don't reuse our last
5510      target.  */
5511   if (optimize)
5512     subtarget = 0;
5513 
5514   /* Now normalize to the proper value in MODE.  Sometimes we don't
5515      have to do anything.  */
5516   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5517     ;
5518   /* STORE_FLAG_VALUE might be the most negative number, so write
5519      the comparison this way to avoid a compiler-time warning.  */
5520   else if (- normalizep == STORE_FLAG_VALUE)
5521     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5522 
5523   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5524      it hard to use a value of just the sign bit due to ANSI integer
5525      constant typing rules.  */
5526   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5527     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5528 			GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5529 			normalizep == 1);
5530   else
5531     {
5532       gcc_assert (STORE_FLAG_VALUE & 1);
5533 
5534       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5535       if (normalizep == -1)
5536 	op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5537     }
5538 
5539   /* If we were converting to a smaller mode, do the conversion now.  */
5540   if (int_target_mode != result_mode)
5541     {
5542       convert_move (target, op0, 0);
5543       return target;
5544     }
5545   else
5546     return op0;
5547 }
5548 
5549 
5550 /* A subroutine of emit_store_flag only including "tricks" that do not
5551    need a recursive call.  These are kept separate to avoid infinite
5552    loops.  */
5553 
5554 static rtx
emit_store_flag_1(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep,machine_mode target_mode)5555 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5556 		   machine_mode mode, int unsignedp, int normalizep,
5557 		   machine_mode target_mode)
5558 {
5559   rtx subtarget;
5560   enum insn_code icode;
5561   machine_mode compare_mode;
5562   enum mode_class mclass;
5563   enum rtx_code scode;
5564 
5565   if (unsignedp)
5566     code = unsigned_condition (code);
5567   scode = swap_condition (code);
5568 
5569   /* If one operand is constant, make it the second one.  Only do this
5570      if the other operand is not constant as well.  */
5571 
5572   if (swap_commutative_operands_p (op0, op1))
5573     {
5574       std::swap (op0, op1);
5575       code = swap_condition (code);
5576     }
5577 
5578   if (mode == VOIDmode)
5579     mode = GET_MODE (op0);
5580 
5581   /* For some comparisons with 1 and -1, we can convert this to
5582      comparisons with zero.  This will often produce more opportunities for
5583      store-flag insns.  */
5584 
5585   switch (code)
5586     {
5587     case LT:
5588       if (op1 == const1_rtx)
5589 	op1 = const0_rtx, code = LE;
5590       break;
5591     case LE:
5592       if (op1 == constm1_rtx)
5593 	op1 = const0_rtx, code = LT;
5594       break;
5595     case GE:
5596       if (op1 == const1_rtx)
5597 	op1 = const0_rtx, code = GT;
5598       break;
5599     case GT:
5600       if (op1 == constm1_rtx)
5601 	op1 = const0_rtx, code = GE;
5602       break;
5603     case GEU:
5604       if (op1 == const1_rtx)
5605 	op1 = const0_rtx, code = NE;
5606       break;
5607     case LTU:
5608       if (op1 == const1_rtx)
5609 	op1 = const0_rtx, code = EQ;
5610       break;
5611     default:
5612       break;
5613     }
5614 
5615   /* If we are comparing a double-word integer with zero or -1, we can
5616      convert the comparison into one involving a single word.  */
5617   scalar_int_mode int_mode;
5618   if (is_int_mode (mode, &int_mode)
5619       && GET_MODE_BITSIZE (int_mode) == BITS_PER_WORD * 2
5620       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5621     {
5622       rtx tem;
5623       if ((code == EQ || code == NE)
5624 	  && (op1 == const0_rtx || op1 == constm1_rtx))
5625 	{
5626 	  rtx op00, op01;
5627 
5628 	  /* Do a logical OR or AND of the two words and compare the
5629 	     result.  */
5630 	  op00 = simplify_gen_subreg (word_mode, op0, int_mode, 0);
5631 	  op01 = simplify_gen_subreg (word_mode, op0, int_mode, UNITS_PER_WORD);
5632 	  tem = expand_binop (word_mode,
5633 			      op1 == const0_rtx ? ior_optab : and_optab,
5634 			      op00, op01, NULL_RTX, unsignedp,
5635 			      OPTAB_DIRECT);
5636 
5637 	  if (tem != 0)
5638 	    tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5639 				   unsignedp, normalizep);
5640 	}
5641       else if ((code == LT || code == GE) && op1 == const0_rtx)
5642 	{
5643 	  rtx op0h;
5644 
5645 	  /* If testing the sign bit, can just test on high word.  */
5646 	  op0h = simplify_gen_subreg (word_mode, op0, int_mode,
5647 				      subreg_highpart_offset (word_mode,
5648 							      int_mode));
5649 	  tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5650 				 unsignedp, normalizep);
5651 	}
5652       else
5653 	tem = NULL_RTX;
5654 
5655       if (tem)
5656 	{
5657 	  if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5658 	    return tem;
5659 	  if (!target)
5660 	    target = gen_reg_rtx (target_mode);
5661 
5662 	  convert_move (target, tem,
5663 			!val_signbit_known_set_p (word_mode,
5664 						  (normalizep ? normalizep
5665 						   : STORE_FLAG_VALUE)));
5666 	  return target;
5667 	}
5668     }
5669 
5670   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5671      complement of A (for GE) and shifting the sign bit to the low bit.  */
5672   if (op1 == const0_rtx && (code == LT || code == GE)
5673       && is_int_mode (mode, &int_mode)
5674       && (normalizep || STORE_FLAG_VALUE == 1
5675 	  || val_signbit_p (int_mode, STORE_FLAG_VALUE)))
5676     {
5677       scalar_int_mode int_target_mode;
5678       subtarget = target;
5679 
5680       if (!target)
5681 	int_target_mode = int_mode;
5682       else
5683 	{
5684 	  /* If the result is to be wider than OP0, it is best to convert it
5685 	     first.  If it is to be narrower, it is *incorrect* to convert it
5686 	     first.  */
5687 	  int_target_mode = as_a <scalar_int_mode> (target_mode);
5688 	  if (GET_MODE_SIZE (int_target_mode) > GET_MODE_SIZE (int_mode))
5689 	    {
5690 	      op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5691 	      int_mode = int_target_mode;
5692 	    }
5693 	}
5694 
5695       if (int_target_mode != int_mode)
5696 	subtarget = 0;
5697 
5698       if (code == GE)
5699 	op0 = expand_unop (int_mode, one_cmpl_optab, op0,
5700 			   ((STORE_FLAG_VALUE == 1 || normalizep)
5701 			    ? 0 : subtarget), 0);
5702 
5703       if (STORE_FLAG_VALUE == 1 || normalizep)
5704 	/* If we are supposed to produce a 0/1 value, we want to do
5705 	   a logical shift from the sign bit to the low-order bit; for
5706 	   a -1/0 value, we do an arithmetic shift.  */
5707 	op0 = expand_shift (RSHIFT_EXPR, int_mode, op0,
5708 			    GET_MODE_BITSIZE (int_mode) - 1,
5709 			    subtarget, normalizep != -1);
5710 
5711       if (int_mode != int_target_mode)
5712 	op0 = convert_modes (int_target_mode, int_mode, op0, 0);
5713 
5714       return op0;
5715     }
5716 
5717   mclass = GET_MODE_CLASS (mode);
5718   FOR_EACH_MODE_FROM (compare_mode, mode)
5719     {
5720      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5721      icode = optab_handler (cstore_optab, optab_mode);
5722      if (icode != CODE_FOR_nothing)
5723 	{
5724 	  do_pending_stack_adjust ();
5725 	  rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5726 				 unsignedp, op0, op1, normalizep, target_mode);
5727 	  if (tem)
5728 	    return tem;
5729 
5730 	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5731 	    {
5732 	      tem = emit_cstore (target, icode, scode, mode, compare_mode,
5733 				 unsignedp, op1, op0, normalizep, target_mode);
5734 	      if (tem)
5735 	        return tem;
5736 	    }
5737 	  break;
5738 	}
5739     }
5740 
5741   return 0;
5742 }
5743 
5744 /* Subroutine of emit_store_flag that handles cases in which the operands
5745    are scalar integers.  SUBTARGET is the target to use for temporary
5746    operations and TRUEVAL is the value to store when the condition is
5747    true.  All other arguments are as for emit_store_flag.  */
5748 
5749 rtx
emit_store_flag_int(rtx target,rtx subtarget,enum rtx_code code,rtx op0,rtx op1,scalar_int_mode mode,int unsignedp,int normalizep,rtx trueval)5750 emit_store_flag_int (rtx target, rtx subtarget, enum rtx_code code, rtx op0,
5751 		     rtx op1, scalar_int_mode mode, int unsignedp,
5752 		     int normalizep, rtx trueval)
5753 {
5754   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5755   rtx_insn *last = get_last_insn ();
5756 
5757   /* If this is an equality comparison of integers, we can try to exclusive-or
5758      (or subtract) the two operands and use a recursive call to try the
5759      comparison with zero.  Don't do any of these cases if branches are
5760      very cheap.  */
5761 
5762   if ((code == EQ || code == NE) && op1 != const0_rtx)
5763     {
5764       rtx tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5765 			      OPTAB_WIDEN);
5766 
5767       if (tem == 0)
5768 	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5769 			    OPTAB_WIDEN);
5770       if (tem != 0)
5771 	tem = emit_store_flag (target, code, tem, const0_rtx,
5772 			       mode, unsignedp, normalizep);
5773       if (tem != 0)
5774 	return tem;
5775 
5776       delete_insns_since (last);
5777     }
5778 
5779   /* For integer comparisons, try the reverse comparison.  However, for
5780      small X and if we'd have anyway to extend, implementing "X != 0"
5781      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5782   rtx_code rcode = reverse_condition (code);
5783   if (can_compare_p (rcode, mode, ccp_store_flag)
5784       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5785 	    && code == NE
5786 	    && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5787 	    && op1 == const0_rtx))
5788     {
5789       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5790 		      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5791 
5792       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5793       if (want_add
5794 	  && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5795 		       optimize_insn_for_speed_p ()) == 0)
5796 	{
5797 	  rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5798 				       STORE_FLAG_VALUE, target_mode);
5799 	  if (tem != 0)
5800 	    tem = expand_binop (target_mode, add_optab, tem,
5801 				gen_int_mode (normalizep, target_mode),
5802 				target, 0, OPTAB_WIDEN);
5803 	  if (tem != 0)
5804 	    return tem;
5805 	}
5806       else if (!want_add
5807 	       && rtx_cost (trueval, mode, XOR, 1,
5808 			    optimize_insn_for_speed_p ()) == 0)
5809 	{
5810 	  rtx tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5811 				       normalizep, target_mode);
5812 	  if (tem != 0)
5813 	    tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5814 				INTVAL (trueval) >= 0, OPTAB_WIDEN);
5815 	  if (tem != 0)
5816 	    return tem;
5817 	}
5818 
5819       delete_insns_since (last);
5820     }
5821 
5822   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5823      the constant zero.  Reject all other comparisons at this point.  Only
5824      do LE and GT if branches are expensive since they are expensive on
5825      2-operand machines.  */
5826 
5827   if (op1 != const0_rtx
5828       || (code != EQ && code != NE
5829 	  && (BRANCH_COST (optimize_insn_for_speed_p (),
5830 			   false) <= 1 || (code != LE && code != GT))))
5831     return 0;
5832 
5833   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5834      do the necessary operation below.  */
5835 
5836   rtx tem = 0;
5837 
5838   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5839      the sign bit set.  */
5840 
5841   if (code == LE)
5842     {
5843       /* This is destructive, so SUBTARGET can't be OP0.  */
5844       if (rtx_equal_p (subtarget, op0))
5845 	subtarget = 0;
5846 
5847       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5848 			  OPTAB_WIDEN);
5849       if (tem)
5850 	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5851 			    OPTAB_WIDEN);
5852     }
5853 
5854   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5855      number of bits in the mode of OP0, minus one.  */
5856 
5857   if (code == GT)
5858     {
5859       if (rtx_equal_p (subtarget, op0))
5860 	subtarget = 0;
5861 
5862       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5863 				GET_MODE_BITSIZE (mode) - 1,
5864 				subtarget, 0);
5865       if (tem)
5866 	tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5867 			    OPTAB_WIDEN);
5868     }
5869 
5870   if (code == EQ || code == NE)
5871     {
5872       /* For EQ or NE, one way to do the comparison is to apply an operation
5873 	 that converts the operand into a positive number if it is nonzero
5874 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5875 	 for NE we negate.  This puts the result in the sign bit.  Then we
5876 	 normalize with a shift, if needed.
5877 
5878 	 Two operations that can do the above actions are ABS and FFS, so try
5879 	 them.  If that doesn't work, and MODE is smaller than a full word,
5880 	 we can use zero-extension to the wider mode (an unsigned conversion)
5881 	 as the operation.  */
5882 
5883       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5884 	 that is compensated by the subsequent overflow when subtracting
5885 	 one / negating.  */
5886 
5887       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5888 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5889       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5890 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5891       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5892 	{
5893 	  tem = convert_modes (word_mode, mode, op0, 1);
5894 	  mode = word_mode;
5895 	}
5896 
5897       if (tem != 0)
5898 	{
5899 	  if (code == EQ)
5900 	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5901 				0, OPTAB_WIDEN);
5902 	  else
5903 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5904 	}
5905 
5906       /* If we couldn't do it that way, for NE we can "or" the two's complement
5907 	 of the value with itself.  For EQ, we take the one's complement of
5908 	 that "or", which is an extra insn, so we only handle EQ if branches
5909 	 are expensive.  */
5910 
5911       if (tem == 0
5912 	  && (code == NE
5913 	      || BRANCH_COST (optimize_insn_for_speed_p (),
5914 			      false) > 1))
5915 	{
5916 	  if (rtx_equal_p (subtarget, op0))
5917 	    subtarget = 0;
5918 
5919 	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5920 	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5921 			      OPTAB_WIDEN);
5922 
5923 	  if (tem && code == EQ)
5924 	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5925 	}
5926     }
5927 
5928   if (tem && normalizep)
5929     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5930 			      GET_MODE_BITSIZE (mode) - 1,
5931 			      subtarget, normalizep == 1);
5932 
5933   if (tem)
5934     {
5935       if (!target)
5936 	;
5937       else if (GET_MODE (tem) != target_mode)
5938 	{
5939 	  convert_move (target, tem, 0);
5940 	  tem = target;
5941 	}
5942       else if (!subtarget)
5943 	{
5944 	  emit_move_insn (target, tem);
5945 	  tem = target;
5946 	}
5947     }
5948   else
5949     delete_insns_since (last);
5950 
5951   return tem;
5952 }
5953 
5954 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5955    and storing in TARGET.  Normally return TARGET.
5956    Return 0 if that cannot be done.
5957 
5958    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5959    it is VOIDmode, they cannot both be CONST_INT.
5960 
5961    UNSIGNEDP is for the case where we have to widen the operands
5962    to perform the operation.  It says to use zero-extension.
5963 
5964    NORMALIZEP is 1 if we should convert the result to be either zero
5965    or one.  Normalize is -1 if we should convert the result to be
5966    either zero or -1.  If NORMALIZEP is zero, the result will be left
5967    "raw" out of the scc insn.  */
5968 
5969 rtx
emit_store_flag(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)5970 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5971 		 machine_mode mode, int unsignedp, int normalizep)
5972 {
5973   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5974   enum rtx_code rcode;
5975   rtx subtarget;
5976   rtx tem, trueval;
5977   rtx_insn *last;
5978 
5979   /* If we compare constants, we shouldn't use a store-flag operation,
5980      but a constant load.  We can get there via the vanilla route that
5981      usually generates a compare-branch sequence, but will in this case
5982      fold the comparison to a constant, and thus elide the branch.  */
5983   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5984     return NULL_RTX;
5985 
5986   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5987 			   target_mode);
5988   if (tem)
5989     return tem;
5990 
5991   /* If we reached here, we can't do this with a scc insn, however there
5992      are some comparisons that can be done in other ways.  Don't do any
5993      of these cases if branches are very cheap.  */
5994   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5995     return 0;
5996 
5997   /* See what we need to return.  We can only return a 1, -1, or the
5998      sign bit.  */
5999 
6000   if (normalizep == 0)
6001     {
6002       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
6003 	normalizep = STORE_FLAG_VALUE;
6004 
6005       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
6006 	;
6007       else
6008 	return 0;
6009     }
6010 
6011   last = get_last_insn ();
6012 
6013   /* If optimizing, use different pseudo registers for each insn, instead
6014      of reusing the same pseudo.  This leads to better CSE, but slows
6015      down the compiler, since there are more pseudos.  */
6016   subtarget = (!optimize
6017 	       && (target_mode == mode)) ? target : NULL_RTX;
6018   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
6019 
6020   /* For floating-point comparisons, try the reverse comparison or try
6021      changing the "orderedness" of the comparison.  */
6022   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6023     {
6024       enum rtx_code first_code;
6025       bool and_them;
6026 
6027       rcode = reverse_condition_maybe_unordered (code);
6028       if (can_compare_p (rcode, mode, ccp_store_flag)
6029 	  && (code == ORDERED || code == UNORDERED
6030 	      || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6031 	      || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6032 	{
6033 	  int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
6034 			  || (STORE_FLAG_VALUE == -1 && normalizep == 1));
6035 
6036 	  /* For the reverse comparison, use either an addition or a XOR.  */
6037 	  if (want_add
6038 	      && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
6039 			   optimize_insn_for_speed_p ()) == 0)
6040 	    {
6041 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6042 				       STORE_FLAG_VALUE, target_mode);
6043 	      if (tem)
6044 		return expand_binop (target_mode, add_optab, tem,
6045 				     gen_int_mode (normalizep, target_mode),
6046 				     target, 0, OPTAB_WIDEN);
6047 	    }
6048 	  else if (!want_add
6049 		   && rtx_cost (trueval, mode, XOR, 1,
6050 				optimize_insn_for_speed_p ()) == 0)
6051 	    {
6052 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
6053 				       normalizep, target_mode);
6054 	      if (tem)
6055 		return expand_binop (target_mode, xor_optab, tem, trueval,
6056 				     target, INTVAL (trueval) >= 0,
6057 				     OPTAB_WIDEN);
6058 	    }
6059 	}
6060 
6061       delete_insns_since (last);
6062 
6063       /* Cannot split ORDERED and UNORDERED, only try the above trick.  */
6064       if (code == ORDERED || code == UNORDERED)
6065 	return 0;
6066 
6067       and_them = split_comparison (code, mode, &first_code, &code);
6068 
6069       /* If there are no NaNs, the first comparison should always fall through.
6070 	 Effectively change the comparison to the other one.  */
6071       if (!HONOR_NANS (mode))
6072 	{
6073 	  gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
6074 	  return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
6075 				    target_mode);
6076 	}
6077 
6078       if (!HAVE_conditional_move)
6079 	return 0;
6080 
6081       /* Do not turn a trapping comparison into a non-trapping one.  */
6082       if ((code != EQ && code != NE && code != UNEQ && code != LTGT)
6083 	  && flag_trapping_math)
6084 	return 0;
6085 
6086       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
6087 	 conditional move.  */
6088       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
6089 			       normalizep, target_mode);
6090       if (tem == 0)
6091 	return 0;
6092 
6093       if (and_them)
6094 	tem = emit_conditional_move (target, code, op0, op1, mode,
6095 				     tem, const0_rtx, GET_MODE (tem), 0);
6096       else
6097 	tem = emit_conditional_move (target, code, op0, op1, mode,
6098 				     trueval, tem, GET_MODE (tem), 0);
6099 
6100       if (tem == 0)
6101 	delete_insns_since (last);
6102       return tem;
6103     }
6104 
6105   /* The remaining tricks only apply to integer comparisons.  */
6106 
6107   scalar_int_mode int_mode;
6108   if (is_int_mode (mode, &int_mode))
6109     return emit_store_flag_int (target, subtarget, code, op0, op1, int_mode,
6110 				unsignedp, normalizep, trueval);
6111 
6112   return 0;
6113 }
6114 
6115 /* Like emit_store_flag, but always succeeds.  */
6116 
6117 rtx
emit_store_flag_force(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)6118 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
6119 		       machine_mode mode, int unsignedp, int normalizep)
6120 {
6121   rtx tem;
6122   rtx_code_label *label;
6123   rtx trueval, falseval;
6124 
6125   /* First see if emit_store_flag can do the job.  */
6126   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
6127   if (tem != 0)
6128     return tem;
6129 
6130   /* If one operand is constant, make it the second one.  Only do this
6131      if the other operand is not constant as well.  */
6132   if (swap_commutative_operands_p (op0, op1))
6133     {
6134       std::swap (op0, op1);
6135       code = swap_condition (code);
6136     }
6137 
6138   if (mode == VOIDmode)
6139     mode = GET_MODE (op0);
6140 
6141   if (!target)
6142     target = gen_reg_rtx (word_mode);
6143 
6144   /* If this failed, we have to do this with set/compare/jump/set code.
6145      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
6146   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
6147   if (code == NE
6148       && GET_MODE_CLASS (mode) == MODE_INT
6149       && REG_P (target)
6150       && op0 == target
6151       && op1 == const0_rtx)
6152     {
6153       label = gen_label_rtx ();
6154       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
6155 			       NULL_RTX, NULL, label,
6156 			       profile_probability::uninitialized ());
6157       emit_move_insn (target, trueval);
6158       emit_label (label);
6159       return target;
6160     }
6161 
6162   if (!REG_P (target)
6163       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
6164     target = gen_reg_rtx (GET_MODE (target));
6165 
6166   /* Jump in the right direction if the target cannot implement CODE
6167      but can jump on its reverse condition.  */
6168   falseval = const0_rtx;
6169   if (! can_compare_p (code, mode, ccp_jump)
6170       && (! FLOAT_MODE_P (mode)
6171           || code == ORDERED || code == UNORDERED
6172           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
6173           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
6174     {
6175       enum rtx_code rcode;
6176       if (FLOAT_MODE_P (mode))
6177         rcode = reverse_condition_maybe_unordered (code);
6178       else
6179         rcode = reverse_condition (code);
6180 
6181       /* Canonicalize to UNORDERED for the libcall.  */
6182       if (can_compare_p (rcode, mode, ccp_jump)
6183           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
6184 	{
6185 	  falseval = trueval;
6186 	  trueval = const0_rtx;
6187 	  code = rcode;
6188 	}
6189     }
6190 
6191   emit_move_insn (target, trueval);
6192   label = gen_label_rtx ();
6193   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
6194 			   label, profile_probability::uninitialized ());
6195 
6196   emit_move_insn (target, falseval);
6197   emit_label (label);
6198 
6199   return target;
6200 }
6201 
6202 /* Perform possibly multi-word comparison and conditional jump to LABEL
6203    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6204    now a thin wrapper around do_compare_rtx_and_jump.  */
6205 
6206 static void
do_cmp_and_jump(rtx arg1,rtx arg2,enum rtx_code op,machine_mode mode,rtx_code_label * label)6207 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6208 		 rtx_code_label *label)
6209 {
6210   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6211   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6212 			   NULL, label, profile_probability::uninitialized ());
6213 }
6214