1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987-2016 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "predict.h"
30 #include "tm_p.h"
31 #include "expmed.h"
32 #include "optabs.h"
33 #include "emit-rtl.h"
34 #include "diagnostic-core.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "dojump.h"
38 #include "explow.h"
39 #include "expr.h"
40 #include "langhooks.h"
41 
42 struct target_expmed default_target_expmed;
43 #if SWITCHABLE_TARGET
44 struct target_expmed *this_target_expmed = &default_target_expmed;
45 #endif
46 
47 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
48 				   unsigned HOST_WIDE_INT,
49 				   unsigned HOST_WIDE_INT,
50 				   unsigned HOST_WIDE_INT,
51 				   rtx, bool);
52 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
53 				     unsigned HOST_WIDE_INT,
54 				     rtx, bool);
55 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
56 				   unsigned HOST_WIDE_INT,
57 				   unsigned HOST_WIDE_INT,
58 				   unsigned HOST_WIDE_INT,
59 				   rtx, bool);
60 static rtx extract_fixed_bit_field (machine_mode, rtx,
61 				    unsigned HOST_WIDE_INT,
62 				    unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
64 				      unsigned HOST_WIDE_INT,
65 				      unsigned HOST_WIDE_INT, rtx, int, bool);
66 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
68 				    unsigned HOST_WIDE_INT, int, bool);
69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
70 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
71 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
72 
73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
74    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
75    The mask is truncated if necessary to the width of mode MODE.  The
76    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
77 
78 static inline rtx
mask_rtx(machine_mode mode,int bitpos,int bitsize,bool complement)79 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
80 {
81   return immed_wide_int_const
82     (wi::shifted_mask (bitpos, bitsize, complement,
83 		       GET_MODE_PRECISION (mode)), mode);
84 }
85 
86 /* Test whether a value is zero of a power of two.  */
87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
88   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
89 
90 struct init_expmed_rtl
91 {
92   rtx reg;
93   rtx plus;
94   rtx neg;
95   rtx mult;
96   rtx sdiv;
97   rtx udiv;
98   rtx sdiv_32;
99   rtx smod_32;
100   rtx wide_mult;
101   rtx wide_lshr;
102   rtx wide_trunc;
103   rtx shift;
104   rtx shift_mult;
105   rtx shift_add;
106   rtx shift_sub0;
107   rtx shift_sub1;
108   rtx zext;
109   rtx trunc;
110 
111   rtx pow2[MAX_BITS_PER_WORD];
112   rtx cint[MAX_BITS_PER_WORD];
113 };
114 
115 static void
init_expmed_one_conv(struct init_expmed_rtl * all,machine_mode to_mode,machine_mode from_mode,bool speed)116 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
117 		      machine_mode from_mode, bool speed)
118 {
119   int to_size, from_size;
120   rtx which;
121 
122   to_size = GET_MODE_PRECISION (to_mode);
123   from_size = GET_MODE_PRECISION (from_mode);
124 
125   /* Most partial integers have a precision less than the "full"
126      integer it requires for storage.  In case one doesn't, for
127      comparison purposes here, reduce the bit size by one in that
128      case.  */
129   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
130       && exact_log2 (to_size) != -1)
131     to_size --;
132   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
133       && exact_log2 (from_size) != -1)
134     from_size --;
135 
136   /* Assume cost of zero-extend and sign-extend is the same.  */
137   which = (to_size < from_size ? all->trunc : all->zext);
138 
139   PUT_MODE (all->reg, from_mode);
140   set_convert_cost (to_mode, from_mode, speed,
141 		    set_src_cost (which, to_mode, speed));
142 }
143 
144 static void
init_expmed_one_mode(struct init_expmed_rtl * all,machine_mode mode,int speed)145 init_expmed_one_mode (struct init_expmed_rtl *all,
146 		      machine_mode mode, int speed)
147 {
148   int m, n, mode_bitsize;
149   machine_mode mode_from;
150 
151   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
152 
153   PUT_MODE (all->reg, mode);
154   PUT_MODE (all->plus, mode);
155   PUT_MODE (all->neg, mode);
156   PUT_MODE (all->mult, mode);
157   PUT_MODE (all->sdiv, mode);
158   PUT_MODE (all->udiv, mode);
159   PUT_MODE (all->sdiv_32, mode);
160   PUT_MODE (all->smod_32, mode);
161   PUT_MODE (all->wide_trunc, mode);
162   PUT_MODE (all->shift, mode);
163   PUT_MODE (all->shift_mult, mode);
164   PUT_MODE (all->shift_add, mode);
165   PUT_MODE (all->shift_sub0, mode);
166   PUT_MODE (all->shift_sub1, mode);
167   PUT_MODE (all->zext, mode);
168   PUT_MODE (all->trunc, mode);
169 
170   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
171   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
172   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
173   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
174   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
175 
176   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
177 				     <= 2 * add_cost (speed, mode)));
178   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
179 				     <= 4 * add_cost (speed, mode)));
180 
181   set_shift_cost (speed, mode, 0, 0);
182   {
183     int cost = add_cost (speed, mode);
184     set_shiftadd_cost (speed, mode, 0, cost);
185     set_shiftsub0_cost (speed, mode, 0, cost);
186     set_shiftsub1_cost (speed, mode, 0, cost);
187   }
188 
189   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
190   for (m = 1; m < n; m++)
191     {
192       XEXP (all->shift, 1) = all->cint[m];
193       XEXP (all->shift_mult, 1) = all->pow2[m];
194 
195       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
196       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
197 						       speed));
198       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
199 							speed));
200       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
201 							speed));
202     }
203 
204   if (SCALAR_INT_MODE_P (mode))
205     {
206       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
207 	   mode_from = (machine_mode)(mode_from + 1))
208 	init_expmed_one_conv (all, mode, mode_from, speed);
209     }
210   if (GET_MODE_CLASS (mode) == MODE_INT)
211     {
212       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
213       if (wider_mode != VOIDmode)
214 	{
215 	  PUT_MODE (all->zext, wider_mode);
216 	  PUT_MODE (all->wide_mult, wider_mode);
217 	  PUT_MODE (all->wide_lshr, wider_mode);
218 	  XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
219 
220 	  set_mul_widen_cost (speed, wider_mode,
221 			      set_src_cost (all->wide_mult, wider_mode, speed));
222 	  set_mul_highpart_cost (speed, mode,
223 				 set_src_cost (all->wide_trunc, mode, speed));
224 	}
225     }
226 }
227 
228 void
init_expmed(void)229 init_expmed (void)
230 {
231   struct init_expmed_rtl all;
232   machine_mode mode = QImode;
233   int m, speed;
234 
235   memset (&all, 0, sizeof all);
236   for (m = 1; m < MAX_BITS_PER_WORD; m++)
237     {
238       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
239       all.cint[m] = GEN_INT (m);
240     }
241 
242   /* Avoid using hard regs in ways which may be unsupported.  */
243   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
244   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
245   all.neg = gen_rtx_NEG (mode, all.reg);
246   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
247   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
248   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
249   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
250   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
251   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
252   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
253   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
254   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
255   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
256   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
257   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
258   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
259   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
260   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
261 
262   for (speed = 0; speed < 2; speed++)
263     {
264       crtl->maybe_hot_insn_p = speed;
265       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
266 
267       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
268 	   mode = (machine_mode)(mode + 1))
269 	init_expmed_one_mode (&all, mode, speed);
270 
271       if (MIN_MODE_PARTIAL_INT != VOIDmode)
272 	for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
273 	     mode = (machine_mode)(mode + 1))
274 	  init_expmed_one_mode (&all, mode, speed);
275 
276       if (MIN_MODE_VECTOR_INT != VOIDmode)
277 	for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
278 	     mode = (machine_mode)(mode + 1))
279 	  init_expmed_one_mode (&all, mode, speed);
280     }
281 
282   if (alg_hash_used_p ())
283     {
284       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
285       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
286     }
287   else
288     set_alg_hash_used_p (true);
289   default_rtl_profile ();
290 
291   ggc_free (all.trunc);
292   ggc_free (all.shift_sub1);
293   ggc_free (all.shift_sub0);
294   ggc_free (all.shift_add);
295   ggc_free (all.shift_mult);
296   ggc_free (all.shift);
297   ggc_free (all.wide_trunc);
298   ggc_free (all.wide_lshr);
299   ggc_free (all.wide_mult);
300   ggc_free (all.zext);
301   ggc_free (all.smod_32);
302   ggc_free (all.sdiv_32);
303   ggc_free (all.udiv);
304   ggc_free (all.sdiv);
305   ggc_free (all.mult);
306   ggc_free (all.neg);
307   ggc_free (all.plus);
308   ggc_free (all.reg);
309 }
310 
311 /* Return an rtx representing minus the value of X.
312    MODE is the intended mode of the result,
313    useful if X is a CONST_INT.  */
314 
315 rtx
negate_rtx(machine_mode mode,rtx x)316 negate_rtx (machine_mode mode, rtx x)
317 {
318   rtx result = simplify_unary_operation (NEG, mode, x, mode);
319 
320   if (result == 0)
321     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
322 
323   return result;
324 }
325 
326 /* Whether reverse storage order is supported on the target.  */
327 static int reverse_storage_order_supported = -1;
328 
329 /* Check whether reverse storage order is supported on the target.  */
330 
331 static void
check_reverse_storage_order_support(void)332 check_reverse_storage_order_support (void)
333 {
334   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
335     {
336       reverse_storage_order_supported = 0;
337       sorry ("reverse scalar storage order");
338     }
339   else
340     reverse_storage_order_supported = 1;
341 }
342 
343 /* Whether reverse FP storage order is supported on the target.  */
344 static int reverse_float_storage_order_supported = -1;
345 
346 /* Check whether reverse FP storage order is supported on the target.  */
347 
348 static void
check_reverse_float_storage_order_support(void)349 check_reverse_float_storage_order_support (void)
350 {
351   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
352     {
353       reverse_float_storage_order_supported = 0;
354       sorry ("reverse floating-point scalar storage order");
355     }
356   else
357     reverse_float_storage_order_supported = 1;
358 }
359 
360 /* Return an rtx representing value of X with reverse storage order.
361    MODE is the intended mode of the result,
362    useful if X is a CONST_INT.  */
363 
364 rtx
flip_storage_order(enum machine_mode mode,rtx x)365 flip_storage_order (enum machine_mode mode, rtx x)
366 {
367   enum machine_mode int_mode;
368   rtx result;
369 
370   if (mode == QImode)
371     return x;
372 
373   if (COMPLEX_MODE_P (mode))
374     {
375       rtx real = read_complex_part (x, false);
376       rtx imag = read_complex_part (x, true);
377 
378       real = flip_storage_order (GET_MODE_INNER (mode), real);
379       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
380 
381       return gen_rtx_CONCAT (mode, real, imag);
382     }
383 
384   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
385     check_reverse_storage_order_support ();
386 
387   if (SCALAR_INT_MODE_P (mode))
388     int_mode = mode;
389   else
390     {
391       if (FLOAT_MODE_P (mode)
392 	  && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
393 	check_reverse_float_storage_order_support ();
394 
395       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
396       if (int_mode == BLKmode)
397 	{
398 	  sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
399 	  return x;
400 	}
401       x = gen_lowpart (int_mode, x);
402     }
403 
404   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
405   if (result == 0)
406     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
407 
408   if (int_mode != mode)
409     result = gen_lowpart (mode, result);
410 
411   return result;
412 }
413 
414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
415    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
416    If MODE is BLKmode, return a reference to every byte in the bitfield.
417    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
418 
419 static rtx
narrow_bit_field_mem(rtx mem,machine_mode mode,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT * new_bitnum)420 narrow_bit_field_mem (rtx mem, machine_mode mode,
421 		      unsigned HOST_WIDE_INT bitsize,
422 		      unsigned HOST_WIDE_INT bitnum,
423 		      unsigned HOST_WIDE_INT *new_bitnum)
424 {
425   if (mode == BLKmode)
426     {
427       *new_bitnum = bitnum % BITS_PER_UNIT;
428       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
429       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
430 			    / BITS_PER_UNIT);
431       return adjust_bitfield_address_size (mem, mode, offset, size);
432     }
433   else
434     {
435       unsigned int unit = GET_MODE_BITSIZE (mode);
436       *new_bitnum = bitnum % unit;
437       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
438       return adjust_bitfield_address (mem, mode, offset);
439     }
440 }
441 
442 /* The caller wants to perform insertion or extraction PATTERN on a
443    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
444    BITREGION_START and BITREGION_END are as for store_bit_field
445    and FIELDMODE is the natural mode of the field.
446 
447    Search for a mode that is compatible with the memory access
448    restrictions and (where applicable) with a register insertion or
449    extraction.  Return the new memory on success, storing the adjusted
450    bit position in *NEW_BITNUM.  Return null otherwise.  */
451 
452 static rtx
adjust_bit_field_mem_for_reg(enum extraction_pattern pattern,rtx op0,HOST_WIDE_INT bitsize,HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end,machine_mode fieldmode,unsigned HOST_WIDE_INT * new_bitnum)453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
454 			      rtx op0, HOST_WIDE_INT bitsize,
455 			      HOST_WIDE_INT bitnum,
456 			      unsigned HOST_WIDE_INT bitregion_start,
457 			      unsigned HOST_WIDE_INT bitregion_end,
458 			      machine_mode fieldmode,
459 			      unsigned HOST_WIDE_INT *new_bitnum)
460 {
461   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
462 				bitregion_end, MEM_ALIGN (op0),
463 				MEM_VOLATILE_P (op0));
464   machine_mode best_mode;
465   if (iter.next_mode (&best_mode))
466     {
467       /* We can use a memory in BEST_MODE.  See whether this is true for
468 	 any wider modes.  All other things being equal, we prefer to
469 	 use the widest mode possible because it tends to expose more
470 	 CSE opportunities.  */
471       if (!iter.prefer_smaller_modes ())
472 	{
473 	  /* Limit the search to the mode required by the corresponding
474 	     register insertion or extraction instruction, if any.  */
475 	  machine_mode limit_mode = word_mode;
476 	  extraction_insn insn;
477 	  if (get_best_reg_extraction_insn (&insn, pattern,
478 					    GET_MODE_BITSIZE (best_mode),
479 					    fieldmode))
480 	    limit_mode = insn.field_mode;
481 
482 	  machine_mode wider_mode;
483 	  while (iter.next_mode (&wider_mode)
484 		 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
485 	    best_mode = wider_mode;
486 	}
487       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
488 				   new_bitnum);
489     }
490   return NULL_RTX;
491 }
492 
493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
494    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
495    offset is then BITNUM / BITS_PER_UNIT.  */
496 
497 static bool
lowpart_bit_field_p(unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT bitsize,machine_mode struct_mode)498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
499 		     unsigned HOST_WIDE_INT bitsize,
500 		     machine_mode struct_mode)
501 {
502   if (BYTES_BIG_ENDIAN)
503     return (bitnum % BITS_PER_UNIT == 0
504 	    && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
505 		|| (bitnum + bitsize) % BITS_PER_WORD == 0));
506   else
507     return bitnum % BITS_PER_WORD == 0;
508 }
509 
510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
511    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
512    Return false if the access would touch memory outside the range
513    BITREGION_START to BITREGION_END for conformance to the C++ memory
514    model.  */
515 
516 static bool
strict_volatile_bitfield_p(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,machine_mode fieldmode,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end)517 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
518 			    unsigned HOST_WIDE_INT bitnum,
519 			    machine_mode fieldmode,
520 			    unsigned HOST_WIDE_INT bitregion_start,
521 			    unsigned HOST_WIDE_INT bitregion_end)
522 {
523   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
524 
525   /* -fstrict-volatile-bitfields must be enabled and we must have a
526      volatile MEM.  */
527   if (!MEM_P (op0)
528       || !MEM_VOLATILE_P (op0)
529       || flag_strict_volatile_bitfields <= 0)
530     return false;
531 
532   /* Non-integral modes likely only happen with packed structures.
533      Punt.  */
534   if (!SCALAR_INT_MODE_P (fieldmode))
535     return false;
536 
537   /* The bit size must not be larger than the field mode, and
538      the field mode must not be larger than a word.  */
539   if (bitsize > modesize || modesize > BITS_PER_WORD)
540     return false;
541 
542   /* Check for cases of unaligned fields that must be split.  */
543   if (bitnum % modesize + bitsize > modesize)
544     return false;
545 
546   /* The memory must be sufficiently aligned for a MODESIZE access.
547      This condition guarantees, that the memory access will not
548      touch anything after the end of the structure.  */
549   if (MEM_ALIGN (op0) < modesize)
550     return false;
551 
552   /* Check for cases where the C++ memory model applies.  */
553   if (bitregion_end != 0
554       && (bitnum - bitnum % modesize < bitregion_start
555 	  || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
556     return false;
557 
558   return true;
559 }
560 
561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
562    bit number BITNUM can be treated as a simple value of mode MODE.  */
563 
564 static bool
simple_mem_bitfield_p(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,machine_mode mode)565 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
566 		       unsigned HOST_WIDE_INT bitnum, machine_mode mode)
567 {
568   return (MEM_P (op0)
569 	  && bitnum % BITS_PER_UNIT == 0
570 	  && bitsize == GET_MODE_BITSIZE (mode)
571 	  && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
572 	      || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
573 		  && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
574 }
575 
576 /* Try to use instruction INSV to store VALUE into a field of OP0.
577    BITSIZE and BITNUM are as for store_bit_field.  */
578 
579 static bool
store_bit_field_using_insv(const extraction_insn * insv,rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value)580 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
581 			    unsigned HOST_WIDE_INT bitsize,
582 			    unsigned HOST_WIDE_INT bitnum,
583 			    rtx value)
584 {
585   struct expand_operand ops[4];
586   rtx value1;
587   rtx xop0 = op0;
588   rtx_insn *last = get_last_insn ();
589   bool copy_back = false;
590 
591   machine_mode op_mode = insv->field_mode;
592   unsigned int unit = GET_MODE_BITSIZE (op_mode);
593   if (bitsize == 0 || bitsize > unit)
594     return false;
595 
596   if (MEM_P (xop0))
597     /* Get a reference to the first byte of the field.  */
598     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
599 				 &bitnum);
600   else
601     {
602       /* Convert from counting within OP0 to counting in OP_MODE.  */
603       if (BYTES_BIG_ENDIAN)
604 	bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
605 
606       /* If xop0 is a register, we need it in OP_MODE
607 	 to make it acceptable to the format of insv.  */
608       if (GET_CODE (xop0) == SUBREG)
609 	/* We can't just change the mode, because this might clobber op0,
610 	   and we will need the original value of op0 if insv fails.  */
611 	xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
612       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
613 	xop0 = gen_lowpart_SUBREG (op_mode, xop0);
614     }
615 
616   /* If the destination is a paradoxical subreg such that we need a
617      truncate to the inner mode, perform the insertion on a temporary and
618      truncate the result to the original destination.  Note that we can't
619      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
620      X) 0)) is (reg:N X).  */
621   if (GET_CODE (xop0) == SUBREG
622       && REG_P (SUBREG_REG (xop0))
623       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
624 					 op_mode))
625     {
626       rtx tem = gen_reg_rtx (op_mode);
627       emit_move_insn (tem, xop0);
628       xop0 = tem;
629       copy_back = true;
630     }
631 
632   /* There are similar overflow check at the start of store_bit_field_1,
633      but that only check the situation where the field lies completely
634      outside the register, while there do have situation where the field
635      lies partialy in the register, we need to adjust bitsize for this
636      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
637      will broken on those arch support bit insert instruction, like arm, aarch64
638      etc.  */
639   if (bitsize + bitnum > unit && bitnum < unit)
640     {
641       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
642 	       "destination object, data truncated into %wu-bit",
643 	       bitsize, unit - bitnum);
644       bitsize = unit - bitnum;
645     }
646 
647   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
648      "backwards" from the size of the unit we are inserting into.
649      Otherwise, we count bits from the most significant on a
650      BYTES/BITS_BIG_ENDIAN machine.  */
651 
652   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
653     bitnum = unit - bitsize - bitnum;
654 
655   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
656   value1 = value;
657   if (GET_MODE (value) != op_mode)
658     {
659       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
660 	{
661 	  rtx tmp;
662 	  /* Optimization: Don't bother really extending VALUE
663 	     if it has all the bits we will actually use.  However,
664 	     if we must narrow it, be sure we do it correctly.  */
665 
666 	  if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
667 	    {
668 	      tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
669 	      if (! tmp)
670 		tmp = simplify_gen_subreg (op_mode,
671 					   force_reg (GET_MODE (value),
672 						      value1),
673 					   GET_MODE (value), 0);
674 	    }
675 	  else
676 	    {
677 	      tmp = gen_lowpart_if_possible (op_mode, value1);
678 	      if (! tmp)
679 		tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
680 						       value1));
681 	    }
682 	  value1 = tmp;
683 	}
684       else if (CONST_INT_P (value))
685 	value1 = gen_int_mode (INTVAL (value), op_mode);
686       else
687 	/* Parse phase is supposed to make VALUE's data type
688 	   match that of the component reference, which is a type
689 	   at least as wide as the field; so VALUE should have
690 	   a mode that corresponds to that type.  */
691 	gcc_assert (CONSTANT_P (value));
692     }
693 
694   create_fixed_operand (&ops[0], xop0);
695   create_integer_operand (&ops[1], bitsize);
696   create_integer_operand (&ops[2], bitnum);
697   create_input_operand (&ops[3], value1, op_mode);
698   if (maybe_expand_insn (insv->icode, 4, ops))
699     {
700       if (copy_back)
701 	convert_move (op0, xop0, true);
702       return true;
703     }
704   delete_insns_since (last);
705   return false;
706 }
707 
708 /* A subroutine of store_bit_field, with the same arguments.  Return true
709    if the operation could be implemented.
710 
711    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
712    no other way of implementing the operation.  If FALLBACK_P is false,
713    return false instead.  */
714 
715 static bool
store_bit_field_1(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end,machine_mode fieldmode,rtx value,bool reverse,bool fallback_p)716 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
717 		   unsigned HOST_WIDE_INT bitnum,
718 		   unsigned HOST_WIDE_INT bitregion_start,
719 		   unsigned HOST_WIDE_INT bitregion_end,
720 		   machine_mode fieldmode,
721 		   rtx value, bool reverse, bool fallback_p)
722 {
723   rtx op0 = str_rtx;
724   rtx orig_value;
725 
726   while (GET_CODE (op0) == SUBREG)
727     {
728       /* The following line once was done only if WORDS_BIG_ENDIAN,
729 	 but I think that is a mistake.  WORDS_BIG_ENDIAN is
730 	 meaningful at a much higher level; when structures are copied
731 	 between memory and regs, the higher-numbered regs
732 	 always get higher addresses.  */
733       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
734       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
735       int byte_offset = 0;
736 
737       /* Paradoxical subregs need special handling on big-endian machines.  */
738       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
739 	{
740 	  int difference = inner_mode_size - outer_mode_size;
741 
742 	  if (WORDS_BIG_ENDIAN)
743 	    byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
744 	  if (BYTES_BIG_ENDIAN)
745 	    byte_offset += difference % UNITS_PER_WORD;
746 	}
747       else
748 	byte_offset = SUBREG_BYTE (op0);
749 
750       bitnum += byte_offset * BITS_PER_UNIT;
751       op0 = SUBREG_REG (op0);
752     }
753 
754   /* No action is needed if the target is a register and if the field
755      lies completely outside that register.  This can occur if the source
756      code contains an out-of-bounds access to a small array.  */
757   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
758     return true;
759 
760   /* Use vec_set patterns for inserting parts of vectors whenever
761      available.  */
762   if (VECTOR_MODE_P (GET_MODE (op0))
763       && !MEM_P (op0)
764       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
765       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
766       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
767       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
768     {
769       struct expand_operand ops[3];
770       machine_mode outermode = GET_MODE (op0);
771       machine_mode innermode = GET_MODE_INNER (outermode);
772       enum insn_code icode = optab_handler (vec_set_optab, outermode);
773       int pos = bitnum / GET_MODE_BITSIZE (innermode);
774 
775       create_fixed_operand (&ops[0], op0);
776       create_input_operand (&ops[1], value, innermode);
777       create_integer_operand (&ops[2], pos);
778       if (maybe_expand_insn (icode, 3, ops))
779 	return true;
780     }
781 
782   /* If the target is a register, overwriting the entire object, or storing
783      a full-word or multi-word field can be done with just a SUBREG.  */
784   if (!MEM_P (op0)
785       && bitsize == GET_MODE_BITSIZE (fieldmode)
786       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
787 	  || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
788     {
789       /* Use the subreg machinery either to narrow OP0 to the required
790 	 words or to cope with mode punning between equal-sized modes.
791 	 In the latter case, use subreg on the rhs side, not lhs.  */
792       rtx sub;
793 
794       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
795 	{
796 	  sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
797 	  if (sub)
798 	    {
799 	      if (reverse)
800 		sub = flip_storage_order (GET_MODE (op0), sub);
801 	      emit_move_insn (op0, sub);
802 	      return true;
803 	    }
804 	}
805       else
806 	{
807 	  sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
808 				     bitnum / BITS_PER_UNIT);
809 	  if (sub)
810 	    {
811 	      if (reverse)
812 		value = flip_storage_order (fieldmode, value);
813 	      emit_move_insn (sub, value);
814 	      return true;
815 	    }
816 	}
817     }
818 
819   /* If the target is memory, storing any naturally aligned field can be
820      done with a simple store.  For targets that support fast unaligned
821      memory, any naturally sized, unit aligned field can be done directly.  */
822   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
823     {
824       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
825       if (reverse)
826 	value = flip_storage_order (fieldmode, value);
827       emit_move_insn (op0, value);
828       return true;
829     }
830 
831   /* Make sure we are playing with integral modes.  Pun with subregs
832      if we aren't.  This must come after the entire register case above,
833      since that case is valid for any mode.  The following cases are only
834      valid for integral modes.  */
835   {
836     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
837     if (imode != GET_MODE (op0))
838       {
839 	if (MEM_P (op0))
840 	  op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
841 	else
842 	  {
843 	    gcc_assert (imode != BLKmode);
844 	    op0 = gen_lowpart (imode, op0);
845 	  }
846       }
847   }
848 
849   /* Storing an lsb-aligned field in a register
850      can be done with a movstrict instruction.  */
851 
852   if (!MEM_P (op0)
853       && !reverse
854       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
855       && bitsize == GET_MODE_BITSIZE (fieldmode)
856       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
857     {
858       struct expand_operand ops[2];
859       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
860       rtx arg0 = op0;
861       unsigned HOST_WIDE_INT subreg_off;
862 
863       if (GET_CODE (arg0) == SUBREG)
864 	{
865 	  /* Else we've got some float mode source being extracted into
866 	     a different float mode destination -- this combination of
867 	     subregs results in Severe Tire Damage.  */
868 	  gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
869 		      || GET_MODE_CLASS (fieldmode) == MODE_INT
870 		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
871 	  arg0 = SUBREG_REG (arg0);
872 	}
873 
874       subreg_off = bitnum / BITS_PER_UNIT;
875       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
876 	{
877 	  arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
878 
879 	  create_fixed_operand (&ops[0], arg0);
880 	  /* Shrink the source operand to FIELDMODE.  */
881 	  create_convert_operand_to (&ops[1], value, fieldmode, false);
882 	  if (maybe_expand_insn (icode, 2, ops))
883 	    return true;
884 	}
885     }
886 
887   /* Handle fields bigger than a word.  */
888 
889   if (bitsize > BITS_PER_WORD)
890     {
891       /* Here we transfer the words of the field
892 	 in the order least significant first.
893 	 This is because the most significant word is the one which may
894 	 be less than full.
895 	 However, only do that if the value is not BLKmode.  */
896 
897       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
898       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
899       unsigned int i;
900       rtx_insn *last;
901 
902       /* This is the mode we must force value to, so that there will be enough
903 	 subwords to extract.  Note that fieldmode will often (always?) be
904 	 VOIDmode, because that is what store_field uses to indicate that this
905 	 is a bit field, but passing VOIDmode to operand_subword_force
906 	 is not allowed.  */
907       fieldmode = GET_MODE (value);
908       if (fieldmode == VOIDmode)
909 	fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
910 
911       last = get_last_insn ();
912       for (i = 0; i < nwords; i++)
913 	{
914 	  /* If I is 0, use the low-order word in both field and target;
915 	     if I is 1, use the next to lowest word; and so on.  */
916 	  unsigned int wordnum = (backwards
917 				  ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
918 				  - i - 1
919 				  : i);
920 	  unsigned int bit_offset = (backwards ^ reverse
921 				     ? MAX ((int) bitsize - ((int) i + 1)
922 					    * BITS_PER_WORD,
923 					    0)
924 				     : (int) i * BITS_PER_WORD);
925 	  rtx value_word = operand_subword_force (value, wordnum, fieldmode);
926 	  unsigned HOST_WIDE_INT new_bitsize =
927 	    MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
928 
929 	  /* If the remaining chunk doesn't have full wordsize we have
930 	     to make sure that for big-endian machines the higher order
931 	     bits are used.  */
932 	  if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
933 	    value_word = simplify_expand_binop (word_mode, lshr_optab,
934 						value_word,
935 						GEN_INT (BITS_PER_WORD
936 							 - new_bitsize),
937 						NULL_RTX, true,
938 						OPTAB_LIB_WIDEN);
939 
940 	  if (!store_bit_field_1 (op0, new_bitsize,
941 				  bitnum + bit_offset,
942 				  bitregion_start, bitregion_end,
943 				  word_mode,
944 				  value_word, reverse, fallback_p))
945 	    {
946 	      delete_insns_since (last);
947 	      return false;
948 	    }
949 	}
950       return true;
951     }
952 
953   /* If VALUE has a floating-point or complex mode, access it as an
954      integer of the corresponding size.  This can occur on a machine
955      with 64 bit registers that uses SFmode for float.  It can also
956      occur for unaligned float or complex fields.  */
957   orig_value = value;
958   if (GET_MODE (value) != VOIDmode
959       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
961     {
962       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
963       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
964     }
965 
966   /* If OP0 is a multi-word register, narrow it to the affected word.
967      If the region spans two words, defer to store_split_bit_field.  */
968   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
969     {
970       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
971 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
972       gcc_assert (op0);
973       bitnum %= BITS_PER_WORD;
974       if (bitnum + bitsize > BITS_PER_WORD)
975 	{
976 	  if (!fallback_p)
977 	    return false;
978 
979 	  store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
980 				 bitregion_end, value, reverse);
981 	  return true;
982 	}
983     }
984 
985   /* From here on we can assume that the field to be stored in fits
986      within a word.  If the destination is a register, it too fits
987      in a word.  */
988 
989   extraction_insn insv;
990   if (!MEM_P (op0)
991       && !reverse
992       && get_best_reg_extraction_insn (&insv, EP_insv,
993 				       GET_MODE_BITSIZE (GET_MODE (op0)),
994 				       fieldmode)
995       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
996     return true;
997 
998   /* If OP0 is a memory, try copying it to a register and seeing if a
999      cheap register alternative is available.  */
1000   if (MEM_P (op0) && !reverse)
1001     {
1002       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1003 					fieldmode)
1004 	  && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1005 	return true;
1006 
1007       rtx_insn *last = get_last_insn ();
1008 
1009       /* Try loading part of OP0 into a register, inserting the bitfield
1010 	 into that, and then copying the result back to OP0.  */
1011       unsigned HOST_WIDE_INT bitpos;
1012       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1013 					       bitregion_start, bitregion_end,
1014 					       fieldmode, &bitpos);
1015       if (xop0)
1016 	{
1017 	  rtx tempreg = copy_to_reg (xop0);
1018 	  if (store_bit_field_1 (tempreg, bitsize, bitpos,
1019 				 bitregion_start, bitregion_end,
1020 				 fieldmode, orig_value, reverse, false))
1021 	    {
1022 	      emit_move_insn (xop0, tempreg);
1023 	      return true;
1024 	    }
1025 	  delete_insns_since (last);
1026 	}
1027     }
1028 
1029   if (!fallback_p)
1030     return false;
1031 
1032   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1033 			 bitregion_end, value, reverse);
1034   return true;
1035 }
1036 
1037 /* Generate code to store value from rtx VALUE
1038    into a bit-field within structure STR_RTX
1039    containing BITSIZE bits starting at bit BITNUM.
1040 
1041    BITREGION_START is bitpos of the first bitfield in this region.
1042    BITREGION_END is the bitpos of the ending bitfield in this region.
1043    These two fields are 0, if the C++ memory model does not apply,
1044    or we are not interested in keeping track of bitfield regions.
1045 
1046    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1047 
1048    If REVERSE is true, the store is to be done in reverse order.  */
1049 
1050 void
store_bit_field(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end,machine_mode fieldmode,rtx value,bool reverse)1051 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1052 		 unsigned HOST_WIDE_INT bitnum,
1053 		 unsigned HOST_WIDE_INT bitregion_start,
1054 		 unsigned HOST_WIDE_INT bitregion_end,
1055 		 machine_mode fieldmode,
1056 		 rtx value, bool reverse)
1057 {
1058   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1059   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1060 				  bitregion_start, bitregion_end))
1061     {
1062       /* Storing of a full word can be done with a simple store.
1063 	 We know here that the field can be accessed with one single
1064 	 instruction.  For targets that support unaligned memory,
1065 	 an unaligned access may be necessary.  */
1066       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1067 	{
1068 	  str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1069 					     bitnum / BITS_PER_UNIT);
1070 	  if (reverse)
1071 	    value = flip_storage_order (fieldmode, value);
1072 	  gcc_assert (bitnum % BITS_PER_UNIT == 0);
1073 	  emit_move_insn (str_rtx, value);
1074 	}
1075       else
1076 	{
1077 	  rtx temp;
1078 
1079 	  str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1080 					  &bitnum);
1081 	  gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1082 	  temp = copy_to_reg (str_rtx);
1083 	  if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1084 				  fieldmode, value, reverse, true))
1085 	    gcc_unreachable ();
1086 
1087 	  emit_move_insn (str_rtx, temp);
1088 	}
1089 
1090       return;
1091     }
1092 
1093   /* Under the C++0x memory model, we must not touch bits outside the
1094      bit region.  Adjust the address to start at the beginning of the
1095      bit region.  */
1096   if (MEM_P (str_rtx) && bitregion_start > 0)
1097     {
1098       machine_mode bestmode;
1099       HOST_WIDE_INT offset, size;
1100 
1101       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1102 
1103       offset = bitregion_start / BITS_PER_UNIT;
1104       bitnum -= bitregion_start;
1105       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1106       bitregion_end -= bitregion_start;
1107       bitregion_start = 0;
1108       bestmode = get_best_mode (bitsize, bitnum,
1109 				bitregion_start, bitregion_end,
1110 				MEM_ALIGN (str_rtx), VOIDmode,
1111 				MEM_VOLATILE_P (str_rtx));
1112       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1113     }
1114 
1115   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1116 			  bitregion_start, bitregion_end,
1117 			  fieldmode, value, reverse, true))
1118     gcc_unreachable ();
1119 }
1120 
1121 /* Use shifts and boolean operations to store VALUE into a bit field of
1122    width BITSIZE in OP0, starting at bit BITNUM.
1123 
1124    If REVERSE is true, the store is to be done in reverse order.  */
1125 
1126 static void
store_fixed_bit_field(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end,rtx value,bool reverse)1127 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1128 		       unsigned HOST_WIDE_INT bitnum,
1129 		       unsigned HOST_WIDE_INT bitregion_start,
1130 		       unsigned HOST_WIDE_INT bitregion_end,
1131 		       rtx value, bool reverse)
1132 {
1133   /* There is a case not handled here:
1134      a structure with a known alignment of just a halfword
1135      and a field split across two aligned halfwords within the structure.
1136      Or likewise a structure with a known alignment of just a byte
1137      and a field split across two bytes.
1138      Such cases are not supposed to be able to occur.  */
1139 
1140   if (MEM_P (op0))
1141     {
1142       machine_mode mode = GET_MODE (op0);
1143       if (GET_MODE_BITSIZE (mode) == 0
1144 	  || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1145 	mode = word_mode;
1146       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1147 			    MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1148 
1149       if (mode == VOIDmode)
1150 	{
1151 	  /* The only way this should occur is if the field spans word
1152 	     boundaries.  */
1153 	  store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1154 				 bitregion_end, value, reverse);
1155 	  return;
1156 	}
1157 
1158       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1159     }
1160 
1161   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1162 }
1163 
1164 /* Helper function for store_fixed_bit_field, stores
1165    the bit field always using the MODE of OP0.  */
1166 
1167 static void
store_fixed_bit_field_1(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx value,bool reverse)1168 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169 			 unsigned HOST_WIDE_INT bitnum,
1170 			 rtx value, bool reverse)
1171 {
1172   machine_mode mode;
1173   rtx temp;
1174   int all_zero = 0;
1175   int all_one = 0;
1176 
1177   mode = GET_MODE (op0);
1178   gcc_assert (SCALAR_INT_MODE_P (mode));
1179 
1180   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1182 
1183   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1184     /* BITNUM is the distance between our msb
1185        and that of the containing datum.
1186        Convert it to the distance from the lsb.  */
1187     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1188 
1189   /* Now BITNUM is always the distance between our lsb
1190      and that of OP0.  */
1191 
1192   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1193      we must first convert its mode to MODE.  */
1194 
1195   if (CONST_INT_P (value))
1196     {
1197       unsigned HOST_WIDE_INT v = UINTVAL (value);
1198 
1199       if (bitsize < HOST_BITS_PER_WIDE_INT)
1200 	v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1201 
1202       if (v == 0)
1203 	all_zero = 1;
1204       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1205 		&& v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1206 	       || (bitsize == HOST_BITS_PER_WIDE_INT
1207 		   && v == (unsigned HOST_WIDE_INT) -1))
1208 	all_one = 1;
1209 
1210       value = lshift_value (mode, v, bitnum);
1211     }
1212   else
1213     {
1214       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1215 		      && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1216 
1217       if (GET_MODE (value) != mode)
1218 	value = convert_to_mode (mode, value, 1);
1219 
1220       if (must_and)
1221 	value = expand_binop (mode, and_optab, value,
1222 			      mask_rtx (mode, 0, bitsize, 0),
1223 			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1224       if (bitnum > 0)
1225 	value = expand_shift (LSHIFT_EXPR, mode, value,
1226 			      bitnum, NULL_RTX, 1);
1227     }
1228 
1229   if (reverse)
1230     value = flip_storage_order (mode, value);
1231 
1232   /* Now clear the chosen bits in OP0,
1233      except that if VALUE is -1 we need not bother.  */
1234   /* We keep the intermediates in registers to allow CSE to combine
1235      consecutive bitfield assignments.  */
1236 
1237   temp = force_reg (mode, op0);
1238 
1239   if (! all_one)
1240     {
1241       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1242       if (reverse)
1243 	mask = flip_storage_order (mode, mask);
1244       temp = expand_binop (mode, and_optab, temp, mask,
1245 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1246       temp = force_reg (mode, temp);
1247     }
1248 
1249   /* Now logical-or VALUE into OP0, unless it is zero.  */
1250 
1251   if (! all_zero)
1252     {
1253       temp = expand_binop (mode, ior_optab, temp, value,
1254 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1255       temp = force_reg (mode, temp);
1256     }
1257 
1258   if (op0 != temp)
1259     {
1260       op0 = copy_rtx (op0);
1261       emit_move_insn (op0, temp);
1262     }
1263 }
1264 
1265 /* Store a bit field that is split across multiple accessible memory objects.
1266 
1267    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1268    BITSIZE is the field width; BITPOS the position of its first bit
1269    (within the word).
1270    VALUE is the value to store.
1271 
1272    If REVERSE is true, the store is to be done in reverse order.
1273 
1274    This does not yet handle fields wider than BITS_PER_WORD.  */
1275 
1276 static void
store_split_bit_field(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,unsigned HOST_WIDE_INT bitregion_start,unsigned HOST_WIDE_INT bitregion_end,rtx value,bool reverse)1277 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1278 		       unsigned HOST_WIDE_INT bitpos,
1279 		       unsigned HOST_WIDE_INT bitregion_start,
1280 		       unsigned HOST_WIDE_INT bitregion_end,
1281 		       rtx value, bool reverse)
1282 {
1283   unsigned int unit, total_bits, bitsdone = 0;
1284 
1285   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1286      much at a time.  */
1287   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1288     unit = BITS_PER_WORD;
1289   else
1290     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1291 
1292   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1294      again, and we will mutually recurse forever.  */
1295   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1296     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1297 
1298   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1300      that VALUE might be a floating-point constant.  */
1301   if (CONSTANT_P (value) && !CONST_INT_P (value))
1302     {
1303       rtx word = gen_lowpart_common (word_mode, value);
1304 
1305       if (word && (value != word))
1306 	value = word;
1307       else
1308 	value = gen_lowpart_common (word_mode,
1309 				    force_reg (GET_MODE (value) != VOIDmode
1310 					       ? GET_MODE (value)
1311 					       : word_mode, value));
1312     }
1313 
1314   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1315 
1316   while (bitsdone < bitsize)
1317     {
1318       unsigned HOST_WIDE_INT thissize;
1319       unsigned HOST_WIDE_INT thispos;
1320       unsigned HOST_WIDE_INT offset;
1321       rtx part, word;
1322 
1323       offset = (bitpos + bitsdone) / unit;
1324       thispos = (bitpos + bitsdone) % unit;
1325 
1326       /* When region of bytes we can touch is restricted, decrease
1327 	 UNIT close to the end of the region as needed.  If op0 is a REG
1328 	 or SUBREG of REG, don't do this, as there can't be data races
1329 	 on a register and we can expand shorter code in some cases.  */
1330       if (bitregion_end
1331 	  && unit > BITS_PER_UNIT
1332 	  && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1333 	  && !REG_P (op0)
1334 	  && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1335 	{
1336 	  unit = unit / 2;
1337 	  continue;
1338 	}
1339 
1340       /* THISSIZE must not overrun a word boundary.  Otherwise,
1341 	 store_fixed_bit_field will call us again, and we will mutually
1342 	 recurse forever.  */
1343       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1344       thissize = MIN (thissize, unit - thispos);
1345 
1346       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1347 	{
1348 	  /* Fetch successively less significant portions.  */
1349 	  if (CONST_INT_P (value))
1350 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1351 			     >> (bitsize - bitsdone - thissize))
1352 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1353           /* Likewise, but the source is little-endian.  */
1354           else if (reverse)
1355 	    part = extract_fixed_bit_field (word_mode, value, thissize,
1356 					    bitsize - bitsdone - thissize,
1357 					    NULL_RTX, 1, false);
1358 	  else
1359 	    {
1360 	      int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1361 	      /* The args are chosen so that the last part includes the
1362 		 lsb.  Give extract_bit_field the value it needs (with
1363 		 endianness compensation) to fetch the piece we want.  */
1364 	      part = extract_fixed_bit_field (word_mode, value, thissize,
1365 					      total_bits - bitsize + bitsdone,
1366 					      NULL_RTX, 1, false);
1367 	    }
1368 	}
1369       else
1370 	{
1371 	  /* Fetch successively more significant portions.  */
1372 	  if (CONST_INT_P (value))
1373 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1374 			     >> bitsdone)
1375 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1376 	  /* Likewise, but the source is big-endian.  */
1377           else if (reverse)
1378 	    part = extract_fixed_bit_field (word_mode, value, thissize,
1379 					    total_bits - bitsdone - thissize,
1380 					    NULL_RTX, 1, false);
1381 	  else
1382 	    part = extract_fixed_bit_field (word_mode, value, thissize,
1383 					    bitsdone, NULL_RTX, 1, false);
1384 	}
1385 
1386       /* If OP0 is a register, then handle OFFSET here.
1387 
1388 	 When handling multiword bitfields, extract_bit_field may pass
1389 	 down a word_mode SUBREG of a larger REG for a bitfield that actually
1390 	 crosses a word boundary.  Thus, for a SUBREG, we must find
1391 	 the current word starting from the base register.  */
1392       if (GET_CODE (op0) == SUBREG)
1393 	{
1394 	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1395 			    + (offset * unit / BITS_PER_WORD);
1396 	  machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1397 	  if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1398 	    word = word_offset ? const0_rtx : op0;
1399 	  else
1400 	    word = operand_subword_force (SUBREG_REG (op0), word_offset,
1401 					  GET_MODE (SUBREG_REG (op0)));
1402 	  offset &= BITS_PER_WORD / unit - 1;
1403 	}
1404       else if (REG_P (op0))
1405 	{
1406 	  machine_mode op0_mode = GET_MODE (op0);
1407 	  if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1408 	    word = offset ? const0_rtx : op0;
1409 	  else
1410 	    word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1411 					  GET_MODE (op0));
1412 	  offset &= BITS_PER_WORD / unit - 1;
1413 	}
1414       else
1415 	word = op0;
1416 
1417       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1418 	 it is just an out-of-bounds access.  Ignore it.  */
1419       if (word != const0_rtx)
1420 	store_fixed_bit_field (word, thissize, offset * unit + thispos,
1421 			       bitregion_start, bitregion_end, part,
1422 			       reverse);
1423       bitsdone += thissize;
1424     }
1425 }
1426 
1427 /* A subroutine of extract_bit_field_1 that converts return value X
1428    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1429    to extract_bit_field.  */
1430 
1431 static rtx
convert_extracted_bit_field(rtx x,machine_mode mode,machine_mode tmode,bool unsignedp)1432 convert_extracted_bit_field (rtx x, machine_mode mode,
1433 			     machine_mode tmode, bool unsignedp)
1434 {
1435   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1436     return x;
1437 
1438   /* If the x mode is not a scalar integral, first convert to the
1439      integer mode of that size and then access it as a floating-point
1440      value via a SUBREG.  */
1441   if (!SCALAR_INT_MODE_P (tmode))
1442     {
1443       machine_mode smode;
1444 
1445       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1446       x = convert_to_mode (smode, x, unsignedp);
1447       x = force_reg (smode, x);
1448       return gen_lowpart (tmode, x);
1449     }
1450 
1451   return convert_to_mode (tmode, x, unsignedp);
1452 }
1453 
1454 /* Try to use an ext(z)v pattern to extract a field from OP0.
1455    Return the extracted value on success, otherwise return null.
1456    EXT_MODE is the mode of the extraction and the other arguments
1457    are as for extract_bit_field.  */
1458 
1459 static rtx
extract_bit_field_using_extv(const extraction_insn * extv,rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode)1460 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1461 			      unsigned HOST_WIDE_INT bitsize,
1462 			      unsigned HOST_WIDE_INT bitnum,
1463 			      int unsignedp, rtx target,
1464 			      machine_mode mode, machine_mode tmode)
1465 {
1466   struct expand_operand ops[4];
1467   rtx spec_target = target;
1468   rtx spec_target_subreg = 0;
1469   machine_mode ext_mode = extv->field_mode;
1470   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1471 
1472   if (bitsize == 0 || unit < bitsize)
1473     return NULL_RTX;
1474 
1475   if (MEM_P (op0))
1476     /* Get a reference to the first byte of the field.  */
1477     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1478 				&bitnum);
1479   else
1480     {
1481       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1482       if (BYTES_BIG_ENDIAN)
1483 	bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1484 
1485       /* If op0 is a register, we need it in EXT_MODE to make it
1486 	 acceptable to the format of ext(z)v.  */
1487       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1488 	return NULL_RTX;
1489       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1490 	op0 = gen_lowpart_SUBREG (ext_mode, op0);
1491     }
1492 
1493   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1494      "backwards" from the size of the unit we are extracting from.
1495      Otherwise, we count bits from the most significant on a
1496      BYTES/BITS_BIG_ENDIAN machine.  */
1497 
1498   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1499     bitnum = unit - bitsize - bitnum;
1500 
1501   if (target == 0)
1502     target = spec_target = gen_reg_rtx (tmode);
1503 
1504   if (GET_MODE (target) != ext_mode)
1505     {
1506       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1507 	 between the mode of the extraction (word_mode) and the target
1508 	 mode.  Instead, create a temporary and use convert_move to set
1509 	 the target.  */
1510       if (REG_P (target)
1511 	  && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1512 	{
1513 	  target = gen_lowpart (ext_mode, target);
1514 	  if (GET_MODE_PRECISION (ext_mode)
1515 	      > GET_MODE_PRECISION (GET_MODE (spec_target)))
1516 	    spec_target_subreg = target;
1517 	}
1518       else
1519 	target = gen_reg_rtx (ext_mode);
1520     }
1521 
1522   create_output_operand (&ops[0], target, ext_mode);
1523   create_fixed_operand (&ops[1], op0);
1524   create_integer_operand (&ops[2], bitsize);
1525   create_integer_operand (&ops[3], bitnum);
1526   if (maybe_expand_insn (extv->icode, 4, ops))
1527     {
1528       target = ops[0].value;
1529       if (target == spec_target)
1530 	return target;
1531       if (target == spec_target_subreg)
1532 	return spec_target;
1533       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1534     }
1535   return NULL_RTX;
1536 }
1537 
1538 /* A subroutine of extract_bit_field, with the same arguments.
1539    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1540    if we can find no other means of implementing the operation.
1541    if FALLBACK_P is false, return NULL instead.  */
1542 
1543 static rtx
extract_bit_field_1(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse,bool fallback_p)1544 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1545 		     unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1546 		     machine_mode mode, machine_mode tmode,
1547 		     bool reverse, bool fallback_p)
1548 {
1549   rtx op0 = str_rtx;
1550   machine_mode int_mode;
1551   machine_mode mode1;
1552 
1553   if (tmode == VOIDmode)
1554     tmode = mode;
1555 
1556   while (GET_CODE (op0) == SUBREG)
1557     {
1558       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1559       op0 = SUBREG_REG (op0);
1560     }
1561 
1562   /* If we have an out-of-bounds access to a register, just return an
1563      uninitialized register of the required mode.  This can occur if the
1564      source code contains an out-of-bounds access to a small array.  */
1565   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1566     return gen_reg_rtx (tmode);
1567 
1568   if (REG_P (op0)
1569       && mode == GET_MODE (op0)
1570       && bitnum == 0
1571       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1572     {
1573       if (reverse)
1574 	op0 = flip_storage_order (mode, op0);
1575       /* We're trying to extract a full register from itself.  */
1576       return op0;
1577     }
1578 
1579   /* See if we can get a better vector mode before extracting.  */
1580   if (VECTOR_MODE_P (GET_MODE (op0))
1581       && !MEM_P (op0)
1582       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1583     {
1584       machine_mode new_mode;
1585 
1586       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1587 	new_mode = MIN_MODE_VECTOR_FLOAT;
1588       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1589 	new_mode = MIN_MODE_VECTOR_FRACT;
1590       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1591 	new_mode = MIN_MODE_VECTOR_UFRACT;
1592       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1593 	new_mode = MIN_MODE_VECTOR_ACCUM;
1594       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1595 	new_mode = MIN_MODE_VECTOR_UACCUM;
1596       else
1597 	new_mode = MIN_MODE_VECTOR_INT;
1598 
1599       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1600 	if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1601 	    && targetm.vector_mode_supported_p (new_mode))
1602 	  break;
1603       if (new_mode != VOIDmode)
1604 	op0 = gen_lowpart (new_mode, op0);
1605     }
1606 
1607   /* Use vec_extract patterns for extracting parts of vectors whenever
1608      available.  */
1609   if (VECTOR_MODE_P (GET_MODE (op0))
1610       && !MEM_P (op0)
1611       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1612       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1613 	  == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1614     {
1615       struct expand_operand ops[3];
1616       machine_mode outermode = GET_MODE (op0);
1617       machine_mode innermode = GET_MODE_INNER (outermode);
1618       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1619       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1620 
1621       create_output_operand (&ops[0], target, innermode);
1622       create_input_operand (&ops[1], op0, outermode);
1623       create_integer_operand (&ops[2], pos);
1624       if (maybe_expand_insn (icode, 3, ops))
1625 	{
1626 	  target = ops[0].value;
1627       	  if (GET_MODE (target) != mode)
1628 	    return gen_lowpart (tmode, target);
1629 	  return target;
1630 	}
1631     }
1632 
1633   /* Make sure we are playing with integral modes.  Pun with subregs
1634      if we aren't.  */
1635   {
1636     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1637     if (imode != GET_MODE (op0))
1638       {
1639 	if (MEM_P (op0))
1640 	  op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1641 	else if (imode != BLKmode)
1642 	  {
1643 	    op0 = gen_lowpart (imode, op0);
1644 
1645 	    /* If we got a SUBREG, force it into a register since we
1646 	       aren't going to be able to do another SUBREG on it.  */
1647 	    if (GET_CODE (op0) == SUBREG)
1648 	      op0 = force_reg (imode, op0);
1649 	  }
1650 	else if (REG_P (op0))
1651 	  {
1652 	    rtx reg, subreg;
1653 	    imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1654 					    MODE_INT);
1655 	    reg = gen_reg_rtx (imode);
1656 	    subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1657 	    emit_move_insn (subreg, op0);
1658 	    op0 = reg;
1659 	    bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1660 	  }
1661 	else
1662 	  {
1663 	    HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1664 	    rtx mem = assign_stack_temp (GET_MODE (op0), size);
1665 	    emit_move_insn (mem, op0);
1666 	    op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1667 	  }
1668       }
1669   }
1670 
1671   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1672      If that's wrong, the solution is to test for it and set TARGET to 0
1673      if needed.  */
1674 
1675   /* Get the mode of the field to use for atomic access or subreg
1676      conversion.  */
1677   mode1 = mode;
1678   if (SCALAR_INT_MODE_P (tmode))
1679     {
1680       machine_mode try_mode = mode_for_size (bitsize,
1681 						  GET_MODE_CLASS (tmode), 0);
1682       if (try_mode != BLKmode)
1683 	mode1 = try_mode;
1684     }
1685   gcc_assert (mode1 != BLKmode);
1686 
1687   /* Extraction of a full MODE1 value can be done with a subreg as long
1688      as the least significant bit of the value is the least significant
1689      bit of either OP0 or a word of OP0.  */
1690   if (!MEM_P (op0)
1691       && !reverse
1692       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1693       && bitsize == GET_MODE_BITSIZE (mode1)
1694       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1695     {
1696       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1697 				     bitnum / BITS_PER_UNIT);
1698       if (sub)
1699 	return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1700     }
1701 
1702   /* Extraction of a full MODE1 value can be done with a load as long as
1703      the field is on a byte boundary and is sufficiently aligned.  */
1704   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1705     {
1706       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1707       if (reverse)
1708 	op0 = flip_storage_order (mode1, op0);
1709       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1710     }
1711 
1712   /* Handle fields bigger than a word.  */
1713 
1714   if (bitsize > BITS_PER_WORD)
1715     {
1716       /* Here we transfer the words of the field
1717 	 in the order least significant first.
1718 	 This is because the most significant word is the one which may
1719 	 be less than full.  */
1720 
1721       const bool backwards = WORDS_BIG_ENDIAN;
1722       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1723       unsigned int i;
1724       rtx_insn *last;
1725 
1726       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1727 	target = gen_reg_rtx (mode);
1728 
1729       /* In case we're about to clobber a base register or something
1730 	 (see gcc.c-torture/execute/20040625-1.c).   */
1731       if (reg_mentioned_p (target, str_rtx))
1732 	target = gen_reg_rtx (mode);
1733 
1734       /* Indicate for flow that the entire target reg is being set.  */
1735       emit_clobber (target);
1736 
1737       last = get_last_insn ();
1738       for (i = 0; i < nwords; i++)
1739 	{
1740 	  /* If I is 0, use the low-order word in both field and target;
1741 	     if I is 1, use the next to lowest word; and so on.  */
1742 	  /* Word number in TARGET to use.  */
1743 	  unsigned int wordnum
1744 	    = (backwards
1745 	       ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1746 	       : i);
1747 	  /* Offset from start of field in OP0.  */
1748 	  unsigned int bit_offset = (backwards ^ reverse
1749 				     ? MAX ((int) bitsize - ((int) i + 1)
1750 					    * BITS_PER_WORD,
1751 					    0)
1752 				     : (int) i * BITS_PER_WORD);
1753 	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1754 	  rtx result_part
1755 	    = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1756 					     bitsize - i * BITS_PER_WORD),
1757 				   bitnum + bit_offset, 1, target_part,
1758 				   mode, word_mode, reverse, fallback_p);
1759 
1760 	  gcc_assert (target_part);
1761 	  if (!result_part)
1762 	    {
1763 	      delete_insns_since (last);
1764 	      return NULL;
1765 	    }
1766 
1767 	  if (result_part != target_part)
1768 	    emit_move_insn (target_part, result_part);
1769 	}
1770 
1771       if (unsignedp)
1772 	{
1773 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1774 	     need to be zero'd out.  */
1775 	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1776 	    {
1777 	      unsigned int i, total_words;
1778 
1779 	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1780 	      for (i = nwords; i < total_words; i++)
1781 		emit_move_insn
1782 		  (operand_subword (target,
1783 				    backwards ? total_words - i - 1 : i,
1784 				    1, VOIDmode),
1785 		   const0_rtx);
1786 	    }
1787 	  return target;
1788 	}
1789 
1790       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1791       target = expand_shift (LSHIFT_EXPR, mode, target,
1792 			     GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1793       return expand_shift (RSHIFT_EXPR, mode, target,
1794 			   GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1795     }
1796 
1797   /* If OP0 is a multi-word register, narrow it to the affected word.
1798      If the region spans two words, defer to extract_split_bit_field.  */
1799   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1800     {
1801       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1802 				 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1803       bitnum %= BITS_PER_WORD;
1804       if (bitnum + bitsize > BITS_PER_WORD)
1805 	{
1806 	  if (!fallback_p)
1807 	    return NULL_RTX;
1808 	  target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1809 					    reverse);
1810 	  return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1811 	}
1812     }
1813 
1814   /* From here on we know the desired field is smaller than a word.
1815      If OP0 is a register, it too fits within a word.  */
1816   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1817   extraction_insn extv;
1818   if (!MEM_P (op0)
1819       && !reverse
1820       /* ??? We could limit the structure size to the part of OP0 that
1821 	 contains the field, with appropriate checks for endianness
1822 	 and TRULY_NOOP_TRUNCATION.  */
1823       && get_best_reg_extraction_insn (&extv, pattern,
1824 				       GET_MODE_BITSIZE (GET_MODE (op0)),
1825 				       tmode))
1826     {
1827       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1828 						 unsignedp, target, mode,
1829 						 tmode);
1830       if (result)
1831 	return result;
1832     }
1833 
1834   /* If OP0 is a memory, try copying it to a register and seeing if a
1835      cheap register alternative is available.  */
1836   if (MEM_P (op0) & !reverse)
1837     {
1838       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1839 					tmode))
1840 	{
1841 	  rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1842 						     bitnum, unsignedp,
1843 						     target, mode,
1844 						     tmode);
1845 	  if (result)
1846 	    return result;
1847 	}
1848 
1849       rtx_insn *last = get_last_insn ();
1850 
1851       /* Try loading part of OP0 into a register and extracting the
1852 	 bitfield from that.  */
1853       unsigned HOST_WIDE_INT bitpos;
1854       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1855 					       0, 0, tmode, &bitpos);
1856       if (xop0)
1857 	{
1858 	  xop0 = copy_to_reg (xop0);
1859 	  rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1860 					    unsignedp, target,
1861 					    mode, tmode, reverse, false);
1862 	  if (result)
1863 	    return result;
1864 	  delete_insns_since (last);
1865 	}
1866     }
1867 
1868   if (!fallback_p)
1869     return NULL;
1870 
1871   /* Find a correspondingly-sized integer field, so we can apply
1872      shifts and masks to it.  */
1873   int_mode = int_mode_for_mode (tmode);
1874   if (int_mode == BLKmode)
1875     int_mode = int_mode_for_mode (mode);
1876   /* Should probably push op0 out to memory and then do a load.  */
1877   gcc_assert (int_mode != BLKmode);
1878 
1879   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1880 				    unsignedp, reverse);
1881 
1882   /* Complex values must be reversed piecewise, so we need to undo the global
1883      reversal, convert to the complex mode and reverse again.  */
1884   if (reverse && COMPLEX_MODE_P (tmode))
1885     {
1886       target = flip_storage_order (int_mode, target);
1887       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1888       target = flip_storage_order (tmode, target);
1889     }
1890   else
1891     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1892 
1893   return target;
1894 }
1895 
1896 /* Generate code to extract a byte-field from STR_RTX
1897    containing BITSIZE bits, starting at BITNUM,
1898    and put it in TARGET if possible (if TARGET is nonzero).
1899    Regardless of TARGET, we return the rtx for where the value is placed.
1900 
1901    STR_RTX is the structure containing the byte (a REG or MEM).
1902    UNSIGNEDP is nonzero if this is an unsigned bit field.
1903    MODE is the natural mode of the field value once extracted.
1904    TMODE is the mode the caller would like the value to have;
1905    but the value may be returned with type MODE instead.
1906 
1907    If REVERSE is true, the extraction is to be done in reverse order.
1908 
1909    If a TARGET is specified and we can store in it at no extra cost,
1910    we do so, and return TARGET.
1911    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1912    if they are equally easy.  */
1913 
1914 rtx
extract_bit_field(rtx str_rtx,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,int unsignedp,rtx target,machine_mode mode,machine_mode tmode,bool reverse)1915 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1916 		   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1917 		   machine_mode mode, machine_mode tmode, bool reverse)
1918 {
1919   machine_mode mode1;
1920 
1921   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1922   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1923     mode1 = GET_MODE (str_rtx);
1924   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1925     mode1 = GET_MODE (target);
1926   else
1927     mode1 = tmode;
1928 
1929   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1930     {
1931       /* Extraction of a full MODE1 value can be done with a simple load.
1932 	 We know here that the field can be accessed with one single
1933 	 instruction.  For targets that support unaligned memory,
1934 	 an unaligned access may be necessary.  */
1935       if (bitsize == GET_MODE_BITSIZE (mode1))
1936 	{
1937 	  rtx result = adjust_bitfield_address (str_rtx, mode1,
1938 						bitnum / BITS_PER_UNIT);
1939 	  if (reverse)
1940 	    result = flip_storage_order (mode1, result);
1941 	  gcc_assert (bitnum % BITS_PER_UNIT == 0);
1942 	  return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1943 	}
1944 
1945       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1946 				      &bitnum);
1947       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1948       str_rtx = copy_to_reg (str_rtx);
1949     }
1950 
1951   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1952 			      target, mode, tmode, reverse, true);
1953 }
1954 
1955 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1956    from bit BITNUM of OP0.
1957 
1958    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1959    If REVERSE is true, the extraction is to be done in reverse order.
1960 
1961    If TARGET is nonzero, attempts to store the value there
1962    and return TARGET, but this is not guaranteed.
1963    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1964 
1965 static rtx
extract_fixed_bit_field(machine_mode tmode,rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)1966 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1967 			 unsigned HOST_WIDE_INT bitsize,
1968 			 unsigned HOST_WIDE_INT bitnum, rtx target,
1969 			 int unsignedp, bool reverse)
1970 {
1971   if (MEM_P (op0))
1972     {
1973       machine_mode mode
1974 	= get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1975 			 MEM_VOLATILE_P (op0));
1976 
1977       if (mode == VOIDmode)
1978 	/* The only way this should occur is if the field spans word
1979 	   boundaries.  */
1980 	return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1981 					reverse);
1982 
1983       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1984     }
1985 
1986   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1987 				    target, unsignedp, reverse);
1988 }
1989 
1990 /* Helper function for extract_fixed_bit_field, extracts
1991    the bit field always using the MODE of OP0.  */
1992 
1993 static rtx
extract_fixed_bit_field_1(machine_mode tmode,rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitnum,rtx target,int unsignedp,bool reverse)1994 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1995 			   unsigned HOST_WIDE_INT bitsize,
1996 			   unsigned HOST_WIDE_INT bitnum, rtx target,
1997 			   int unsignedp, bool reverse)
1998 {
1999   machine_mode mode = GET_MODE (op0);
2000   gcc_assert (SCALAR_INT_MODE_P (mode));
2001 
2002   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2003      for invalid input, such as extract equivalent of f5 from
2004      gcc.dg/pr48335-2.c.  */
2005 
2006   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2007     /* BITNUM is the distance between our msb and that of OP0.
2008        Convert it to the distance from the lsb.  */
2009     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2010 
2011   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2012      We have reduced the big-endian case to the little-endian case.  */
2013   if (reverse)
2014     op0 = flip_storage_order (mode, op0);
2015 
2016   if (unsignedp)
2017     {
2018       if (bitnum)
2019 	{
2020 	  /* If the field does not already start at the lsb,
2021 	     shift it so it does.  */
2022 	  /* Maybe propagate the target for the shift.  */
2023 	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2024 	  if (tmode != mode)
2025 	    subtarget = 0;
2026 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2027 	}
2028       /* Convert the value to the desired mode.  */
2029       if (mode != tmode)
2030 	op0 = convert_to_mode (tmode, op0, 1);
2031 
2032       /* Unless the msb of the field used to be the msb when we shifted,
2033 	 mask out the upper bits.  */
2034 
2035       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2036 	return expand_binop (GET_MODE (op0), and_optab, op0,
2037 			     mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2038 			     target, 1, OPTAB_LIB_WIDEN);
2039       return op0;
2040     }
2041 
2042   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2043      then arithmetic-shift its lsb to the lsb of the word.  */
2044   op0 = force_reg (mode, op0);
2045 
2046   /* Find the narrowest integer mode that contains the field.  */
2047 
2048   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2049        mode = GET_MODE_WIDER_MODE (mode))
2050     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2051       {
2052 	op0 = convert_to_mode (mode, op0, 0);
2053 	break;
2054       }
2055 
2056   if (mode != tmode)
2057     target = 0;
2058 
2059   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2060     {
2061       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2062       /* Maybe propagate the target for the shift.  */
2063       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2064       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2065     }
2066 
2067   return expand_shift (RSHIFT_EXPR, mode, op0,
2068 		       GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2069 }
2070 
2071 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2072    VALUE << BITPOS.  */
2073 
2074 static rtx
lshift_value(machine_mode mode,unsigned HOST_WIDE_INT value,int bitpos)2075 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2076 	      int bitpos)
2077 {
2078   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2079 }
2080 
2081 /* Extract a bit field that is split across two words
2082    and return an RTX for the result.
2083 
2084    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2085    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2086    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2087 
2088    If REVERSE is true, the extraction is to be done in reverse order.  */
2089 
2090 static rtx
extract_split_bit_field(rtx op0,unsigned HOST_WIDE_INT bitsize,unsigned HOST_WIDE_INT bitpos,int unsignedp,bool reverse)2091 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2092 			 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2093 			 bool reverse)
2094 {
2095   unsigned int unit;
2096   unsigned int bitsdone = 0;
2097   rtx result = NULL_RTX;
2098   int first = 1;
2099 
2100   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2101      much at a time.  */
2102   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2103     unit = BITS_PER_WORD;
2104   else
2105     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2106 
2107   while (bitsdone < bitsize)
2108     {
2109       unsigned HOST_WIDE_INT thissize;
2110       rtx part, word;
2111       unsigned HOST_WIDE_INT thispos;
2112       unsigned HOST_WIDE_INT offset;
2113 
2114       offset = (bitpos + bitsdone) / unit;
2115       thispos = (bitpos + bitsdone) % unit;
2116 
2117       /* THISSIZE must not overrun a word boundary.  Otherwise,
2118 	 extract_fixed_bit_field will call us again, and we will mutually
2119 	 recurse forever.  */
2120       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2121       thissize = MIN (thissize, unit - thispos);
2122 
2123       /* If OP0 is a register, then handle OFFSET here.
2124 
2125 	 When handling multiword bitfields, extract_bit_field may pass
2126 	 down a word_mode SUBREG of a larger REG for a bitfield that actually
2127 	 crosses a word boundary.  Thus, for a SUBREG, we must find
2128 	 the current word starting from the base register.  */
2129       if (GET_CODE (op0) == SUBREG)
2130 	{
2131 	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2132 	  word = operand_subword_force (SUBREG_REG (op0), word_offset,
2133 					GET_MODE (SUBREG_REG (op0)));
2134 	  offset = 0;
2135 	}
2136       else if (REG_P (op0))
2137 	{
2138 	  word = operand_subword_force (op0, offset, GET_MODE (op0));
2139 	  offset = 0;
2140 	}
2141       else
2142 	word = op0;
2143 
2144       /* Extract the parts in bit-counting order,
2145 	 whose meaning is determined by BYTES_PER_UNIT.
2146 	 OFFSET is in UNITs, and UNIT is in bits.  */
2147       part = extract_fixed_bit_field (word_mode, word, thissize,
2148 				      offset * unit + thispos, 0, 1, reverse);
2149       bitsdone += thissize;
2150 
2151       /* Shift this part into place for the result.  */
2152       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2153 	{
2154 	  if (bitsize != bitsdone)
2155 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2156 				 bitsize - bitsdone, 0, 1);
2157 	}
2158       else
2159 	{
2160 	  if (bitsdone != thissize)
2161 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2162 				 bitsdone - thissize, 0, 1);
2163 	}
2164 
2165       if (first)
2166 	result = part;
2167       else
2168 	/* Combine the parts with bitwise or.  This works
2169 	   because we extracted each part as an unsigned bit field.  */
2170 	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2171 			       OPTAB_LIB_WIDEN);
2172 
2173       first = 0;
2174     }
2175 
2176   /* Unsigned bit field: we are done.  */
2177   if (unsignedp)
2178     return result;
2179   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2180   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2181 			 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2182   return expand_shift (RSHIFT_EXPR, word_mode, result,
2183 		       BITS_PER_WORD - bitsize, NULL_RTX, 0);
2184 }
2185 
2186 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2187    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2188    MODE, fill the upper bits with zeros.  Fail if the layout of either
2189    mode is unknown (as for CC modes) or if the extraction would involve
2190    unprofitable mode punning.  Return the value on success, otherwise
2191    return null.
2192 
2193    This is different from gen_lowpart* in these respects:
2194 
2195      - the returned value must always be considered an rvalue
2196 
2197      - when MODE is wider than SRC_MODE, the extraction involves
2198        a zero extension
2199 
2200      - when MODE is smaller than SRC_MODE, the extraction involves
2201        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2202 
2203    In other words, this routine performs a computation, whereas the
2204    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2205    operations.  */
2206 
2207 rtx
extract_low_bits(machine_mode mode,machine_mode src_mode,rtx src)2208 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2209 {
2210   machine_mode int_mode, src_int_mode;
2211 
2212   if (mode == src_mode)
2213     return src;
2214 
2215   if (CONSTANT_P (src))
2216     {
2217       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2218 	 fails, it will happily create (subreg (symbol_ref)) or similar
2219 	 invalid SUBREGs.  */
2220       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2221       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2222       if (ret)
2223 	return ret;
2224 
2225       if (GET_MODE (src) == VOIDmode
2226 	  || !validate_subreg (mode, src_mode, src, byte))
2227 	return NULL_RTX;
2228 
2229       src = force_reg (GET_MODE (src), src);
2230       return gen_rtx_SUBREG (mode, src, byte);
2231     }
2232 
2233   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2234     return NULL_RTX;
2235 
2236   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2237       && MODES_TIEABLE_P (mode, src_mode))
2238     {
2239       rtx x = gen_lowpart_common (mode, src);
2240       if (x)
2241         return x;
2242     }
2243 
2244   src_int_mode = int_mode_for_mode (src_mode);
2245   int_mode = int_mode_for_mode (mode);
2246   if (src_int_mode == BLKmode || int_mode == BLKmode)
2247     return NULL_RTX;
2248 
2249   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2250     return NULL_RTX;
2251   if (!MODES_TIEABLE_P (int_mode, mode))
2252     return NULL_RTX;
2253 
2254   src = gen_lowpart (src_int_mode, src);
2255   src = convert_modes (int_mode, src_int_mode, src, true);
2256   src = gen_lowpart (mode, src);
2257   return src;
2258 }
2259 
2260 /* Add INC into TARGET.  */
2261 
2262 void
expand_inc(rtx target,rtx inc)2263 expand_inc (rtx target, rtx inc)
2264 {
2265   rtx value = expand_binop (GET_MODE (target), add_optab,
2266 			    target, inc,
2267 			    target, 0, OPTAB_LIB_WIDEN);
2268   if (value != target)
2269     emit_move_insn (target, value);
2270 }
2271 
2272 /* Subtract DEC from TARGET.  */
2273 
2274 void
expand_dec(rtx target,rtx dec)2275 expand_dec (rtx target, rtx dec)
2276 {
2277   rtx value = expand_binop (GET_MODE (target), sub_optab,
2278 			    target, dec,
2279 			    target, 0, OPTAB_LIB_WIDEN);
2280   if (value != target)
2281     emit_move_insn (target, value);
2282 }
2283 
2284 /* Output a shift instruction for expression code CODE,
2285    with SHIFTED being the rtx for the value to shift,
2286    and AMOUNT the rtx for the amount to shift by.
2287    Store the result in the rtx TARGET, if that is convenient.
2288    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2289    Return the rtx for where the value is.
2290    If that cannot be done, abort the compilation unless MAY_FAIL is true,
2291    in which case 0 is returned.  */
2292 
2293 static rtx
2294 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2295 		rtx amount, rtx target, int unsignedp, bool may_fail = false)
2296 {
2297   rtx op1, temp = 0;
2298   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2299   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2300   optab lshift_optab = ashl_optab;
2301   optab rshift_arith_optab = ashr_optab;
2302   optab rshift_uns_optab = lshr_optab;
2303   optab lrotate_optab = rotl_optab;
2304   optab rrotate_optab = rotr_optab;
2305   machine_mode op1_mode;
2306   machine_mode scalar_mode = mode;
2307   int attempt;
2308   bool speed = optimize_insn_for_speed_p ();
2309 
2310   if (VECTOR_MODE_P (mode))
2311     scalar_mode = GET_MODE_INNER (mode);
2312   op1 = amount;
2313   op1_mode = GET_MODE (op1);
2314 
2315   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2316      shift amount is a vector, use the vector/vector shift patterns.  */
2317   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2318     {
2319       lshift_optab = vashl_optab;
2320       rshift_arith_optab = vashr_optab;
2321       rshift_uns_optab = vlshr_optab;
2322       lrotate_optab = vrotl_optab;
2323       rrotate_optab = vrotr_optab;
2324     }
2325 
2326   /* Previously detected shift-counts computed by NEGATE_EXPR
2327      and shifted in the other direction; but that does not work
2328      on all machines.  */
2329 
2330   if (SHIFT_COUNT_TRUNCATED)
2331     {
2332       if (CONST_INT_P (op1)
2333 	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2334 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2335 	op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2336 		       % GET_MODE_BITSIZE (scalar_mode));
2337       else if (GET_CODE (op1) == SUBREG
2338 	       && subreg_lowpart_p (op1)
2339 	       && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2340 	       && SCALAR_INT_MODE_P (GET_MODE (op1)))
2341 	op1 = SUBREG_REG (op1);
2342     }
2343 
2344   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2345      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2346      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2347      amount instead.  */
2348   if (rotate
2349       && CONST_INT_P (op1)
2350       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2351 		   GET_MODE_BITSIZE (scalar_mode) - 1))
2352     {
2353       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2354       left = !left;
2355       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2356     }
2357 
2358   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2359      Note that this is not the case for bigger values.  For instance a rotation
2360      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2361      0x04030201 (bswapsi).  */
2362   if (rotate
2363       && CONST_INT_P (op1)
2364       && INTVAL (op1) == BITS_PER_UNIT
2365       && GET_MODE_SIZE (scalar_mode) == 2
2366       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2367     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2368 				  unsignedp);
2369 
2370   if (op1 == const0_rtx)
2371     return shifted;
2372 
2373   /* Check whether its cheaper to implement a left shift by a constant
2374      bit count by a sequence of additions.  */
2375   if (code == LSHIFT_EXPR
2376       && CONST_INT_P (op1)
2377       && INTVAL (op1) > 0
2378       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2379       && INTVAL (op1) < MAX_BITS_PER_WORD
2380       && (shift_cost (speed, mode, INTVAL (op1))
2381 	  > INTVAL (op1) * add_cost (speed, mode))
2382       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2383     {
2384       int i;
2385       for (i = 0; i < INTVAL (op1); i++)
2386 	{
2387 	  temp = force_reg (mode, shifted);
2388 	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2389 				  unsignedp, OPTAB_LIB_WIDEN);
2390 	}
2391       return shifted;
2392     }
2393 
2394   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2395     {
2396       enum optab_methods methods;
2397 
2398       if (attempt == 0)
2399 	methods = OPTAB_DIRECT;
2400       else if (attempt == 1)
2401 	methods = OPTAB_WIDEN;
2402       else
2403 	methods = OPTAB_LIB_WIDEN;
2404 
2405       if (rotate)
2406 	{
2407 	  /* Widening does not work for rotation.  */
2408 	  if (methods == OPTAB_WIDEN)
2409 	    continue;
2410 	  else if (methods == OPTAB_LIB_WIDEN)
2411 	    {
2412 	      /* If we have been unable to open-code this by a rotation,
2413 		 do it as the IOR of two shifts.  I.e., to rotate A
2414 		 by N bits, compute
2415 		 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2416 		 where C is the bitsize of A.
2417 
2418 		 It is theoretically possible that the target machine might
2419 		 not be able to perform either shift and hence we would
2420 		 be making two libcalls rather than just the one for the
2421 		 shift (similarly if IOR could not be done).  We will allow
2422 		 this extremely unlikely lossage to avoid complicating the
2423 		 code below.  */
2424 
2425 	      rtx subtarget = target == shifted ? 0 : target;
2426 	      rtx new_amount, other_amount;
2427 	      rtx temp1;
2428 
2429 	      new_amount = op1;
2430 	      if (op1 == const0_rtx)
2431 		return shifted;
2432 	      else if (CONST_INT_P (op1))
2433 		other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2434 					- INTVAL (op1));
2435 	      else
2436 		{
2437 		  other_amount
2438 		    = simplify_gen_unary (NEG, GET_MODE (op1),
2439 					  op1, GET_MODE (op1));
2440 		  HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2441 		  other_amount
2442 		    = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2443 					   gen_int_mode (mask, GET_MODE (op1)));
2444 		}
2445 
2446 	      shifted = force_reg (mode, shifted);
2447 
2448 	      temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2449 				     mode, shifted, new_amount, 0, 1);
2450 	      temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2451 				      mode, shifted, other_amount,
2452 				      subtarget, 1);
2453 	      return expand_binop (mode, ior_optab, temp, temp1, target,
2454 				   unsignedp, methods);
2455 	    }
2456 
2457 	  temp = expand_binop (mode,
2458 			       left ? lrotate_optab : rrotate_optab,
2459 			       shifted, op1, target, unsignedp, methods);
2460 	}
2461       else if (unsignedp)
2462 	temp = expand_binop (mode,
2463 			     left ? lshift_optab : rshift_uns_optab,
2464 			     shifted, op1, target, unsignedp, methods);
2465 
2466       /* Do arithmetic shifts.
2467 	 Also, if we are going to widen the operand, we can just as well
2468 	 use an arithmetic right-shift instead of a logical one.  */
2469       if (temp == 0 && ! rotate
2470 	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2471 	{
2472 	  enum optab_methods methods1 = methods;
2473 
2474 	  /* If trying to widen a log shift to an arithmetic shift,
2475 	     don't accept an arithmetic shift of the same size.  */
2476 	  if (unsignedp)
2477 	    methods1 = OPTAB_MUST_WIDEN;
2478 
2479 	  /* Arithmetic shift */
2480 
2481 	  temp = expand_binop (mode,
2482 			       left ? lshift_optab : rshift_arith_optab,
2483 			       shifted, op1, target, unsignedp, methods1);
2484 	}
2485 
2486       /* We used to try extzv here for logical right shifts, but that was
2487 	 only useful for one machine, the VAX, and caused poor code
2488 	 generation there for lshrdi3, so the code was deleted and a
2489 	 define_expand for lshrsi3 was added to vax.md.  */
2490     }
2491 
2492   gcc_assert (temp != NULL_RTX || may_fail);
2493   return temp;
2494 }
2495 
2496 /* Output a shift instruction for expression code CODE,
2497    with SHIFTED being the rtx for the value to shift,
2498    and AMOUNT the amount to shift by.
2499    Store the result in the rtx TARGET, if that is convenient.
2500    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2501    Return the rtx for where the value is.  */
2502 
2503 rtx
expand_shift(enum tree_code code,machine_mode mode,rtx shifted,int amount,rtx target,int unsignedp)2504 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2505 	      int amount, rtx target, int unsignedp)
2506 {
2507   return expand_shift_1 (code, mode,
2508 			 shifted, GEN_INT (amount), target, unsignedp);
2509 }
2510 
2511 /* Likewise, but return 0 if that cannot be done.  */
2512 
2513 static rtx
maybe_expand_shift(enum tree_code code,machine_mode mode,rtx shifted,int amount,rtx target,int unsignedp)2514 maybe_expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2515 		    int amount, rtx target, int unsignedp)
2516 {
2517   return expand_shift_1 (code, mode,
2518 			 shifted, GEN_INT (amount), target, unsignedp, true);
2519 }
2520 
2521 /* Output a shift instruction for expression code CODE,
2522    with SHIFTED being the rtx for the value to shift,
2523    and AMOUNT the tree for the amount to shift by.
2524    Store the result in the rtx TARGET, if that is convenient.
2525    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2526    Return the rtx for where the value is.  */
2527 
2528 rtx
expand_variable_shift(enum tree_code code,machine_mode mode,rtx shifted,tree amount,rtx target,int unsignedp)2529 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2530 		       tree amount, rtx target, int unsignedp)
2531 {
2532   return expand_shift_1 (code, mode,
2533 			 shifted, expand_normal (amount), target, unsignedp);
2534 }
2535 
2536 
2537 /* Indicates the type of fixup needed after a constant multiplication.
2538    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2539    the result should be negated, and ADD_VARIANT means that the
2540    multiplicand should be added to the result.  */
2541 enum mult_variant {basic_variant, negate_variant, add_variant};
2542 
2543 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2544 			const struct mult_cost *, machine_mode mode);
2545 static bool choose_mult_variant (machine_mode, HOST_WIDE_INT,
2546 				 struct algorithm *, enum mult_variant *, int);
2547 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2548 			      const struct algorithm *, enum mult_variant);
2549 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2550 static rtx extract_high_half (machine_mode, rtx);
2551 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2552 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2553 				       int, int);
2554 /* Compute and return the best algorithm for multiplying by T.
2555    The algorithm must cost less than cost_limit
2556    If retval.cost >= COST_LIMIT, no algorithm was found and all
2557    other field of the returned struct are undefined.
2558    MODE is the machine mode of the multiplication.  */
2559 
2560 static void
synth_mult(struct algorithm * alg_out,unsigned HOST_WIDE_INT t,const struct mult_cost * cost_limit,machine_mode mode)2561 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2562 	    const struct mult_cost *cost_limit, machine_mode mode)
2563 {
2564   int m;
2565   struct algorithm *alg_in, *best_alg;
2566   struct mult_cost best_cost;
2567   struct mult_cost new_limit;
2568   int op_cost, op_latency;
2569   unsigned HOST_WIDE_INT orig_t = t;
2570   unsigned HOST_WIDE_INT q;
2571   int maxm, hash_index;
2572   bool cache_hit = false;
2573   enum alg_code cache_alg = alg_zero;
2574   bool speed = optimize_insn_for_speed_p ();
2575   machine_mode imode;
2576   struct alg_hash_entry *entry_ptr;
2577 
2578   /* Indicate that no algorithm is yet found.  If no algorithm
2579      is found, this value will be returned and indicate failure.  */
2580   alg_out->cost.cost = cost_limit->cost + 1;
2581   alg_out->cost.latency = cost_limit->latency + 1;
2582 
2583   if (cost_limit->cost < 0
2584       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2585     return;
2586 
2587   /* Be prepared for vector modes.  */
2588   imode = GET_MODE_INNER (mode);
2589 
2590   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2591 
2592   /* Restrict the bits of "t" to the multiplication's mode.  */
2593   t &= GET_MODE_MASK (imode);
2594 
2595   /* t == 1 can be done in zero cost.  */
2596   if (t == 1)
2597     {
2598       alg_out->ops = 1;
2599       alg_out->cost.cost = 0;
2600       alg_out->cost.latency = 0;
2601       alg_out->op[0] = alg_m;
2602       return;
2603     }
2604 
2605   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2606      fail now.  */
2607   if (t == 0)
2608     {
2609       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2610 	return;
2611       else
2612 	{
2613 	  alg_out->ops = 1;
2614 	  alg_out->cost.cost = zero_cost (speed);
2615 	  alg_out->cost.latency = zero_cost (speed);
2616 	  alg_out->op[0] = alg_zero;
2617 	  return;
2618 	}
2619     }
2620 
2621   /* We'll be needing a couple extra algorithm structures now.  */
2622 
2623   alg_in = XALLOCA (struct algorithm);
2624   best_alg = XALLOCA (struct algorithm);
2625   best_cost = *cost_limit;
2626 
2627   /* Compute the hash index.  */
2628   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2629 
2630   /* See if we already know what to do for T.  */
2631   entry_ptr = alg_hash_entry_ptr (hash_index);
2632   if (entry_ptr->t == t
2633       && entry_ptr->mode == mode
2634       && entry_ptr->mode == mode
2635       && entry_ptr->speed == speed
2636       && entry_ptr->alg != alg_unknown)
2637     {
2638       cache_alg = entry_ptr->alg;
2639 
2640       if (cache_alg == alg_impossible)
2641 	{
2642 	  /* The cache tells us that it's impossible to synthesize
2643 	     multiplication by T within entry_ptr->cost.  */
2644 	  if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2645 	    /* COST_LIMIT is at least as restrictive as the one
2646 	       recorded in the hash table, in which case we have no
2647 	       hope of synthesizing a multiplication.  Just
2648 	       return.  */
2649 	    return;
2650 
2651 	  /* If we get here, COST_LIMIT is less restrictive than the
2652 	     one recorded in the hash table, so we may be able to
2653 	     synthesize a multiplication.  Proceed as if we didn't
2654 	     have the cache entry.  */
2655 	}
2656       else
2657 	{
2658 	  if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2659 	    /* The cached algorithm shows that this multiplication
2660 	       requires more cost than COST_LIMIT.  Just return.  This
2661 	       way, we don't clobber this cache entry with
2662 	       alg_impossible but retain useful information.  */
2663 	    return;
2664 
2665 	  cache_hit = true;
2666 
2667 	  switch (cache_alg)
2668 	    {
2669 	    case alg_shift:
2670 	      goto do_alg_shift;
2671 
2672 	    case alg_add_t_m2:
2673 	    case alg_sub_t_m2:
2674 	      goto do_alg_addsub_t_m2;
2675 
2676 	    case alg_add_factor:
2677 	    case alg_sub_factor:
2678 	      goto do_alg_addsub_factor;
2679 
2680 	    case alg_add_t2_m:
2681 	      goto do_alg_add_t2_m;
2682 
2683 	    case alg_sub_t2_m:
2684 	      goto do_alg_sub_t2_m;
2685 
2686 	    default:
2687 	      gcc_unreachable ();
2688 	    }
2689 	}
2690     }
2691 
2692   /* If we have a group of zero bits at the low-order part of T, try
2693      multiplying by the remaining bits and then doing a shift.  */
2694 
2695   if ((t & 1) == 0)
2696     {
2697     do_alg_shift:
2698       m = floor_log2 (t & -t);	/* m = number of low zero bits */
2699       if (m < maxm)
2700 	{
2701 	  q = t >> m;
2702 	  /* The function expand_shift will choose between a shift and
2703 	     a sequence of additions, so the observed cost is given as
2704 	     MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2705 	  op_cost = m * add_cost (speed, mode);
2706 	  if (shift_cost (speed, mode, m) < op_cost)
2707 	    op_cost = shift_cost (speed, mode, m);
2708 	  new_limit.cost = best_cost.cost - op_cost;
2709 	  new_limit.latency = best_cost.latency - op_cost;
2710 	  synth_mult (alg_in, q, &new_limit, mode);
2711 
2712 	  alg_in->cost.cost += op_cost;
2713 	  alg_in->cost.latency += op_cost;
2714 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2715 	    {
2716 	      best_cost = alg_in->cost;
2717 	      std::swap (alg_in, best_alg);
2718 	      best_alg->log[best_alg->ops] = m;
2719 	      best_alg->op[best_alg->ops] = alg_shift;
2720 	    }
2721 
2722 	  /* See if treating ORIG_T as a signed number yields a better
2723 	     sequence.  Try this sequence only for a negative ORIG_T
2724 	     as it would be useless for a non-negative ORIG_T.  */
2725 	  if ((HOST_WIDE_INT) orig_t < 0)
2726 	    {
2727 	      /* Shift ORIG_T as follows because a right shift of a
2728 		 negative-valued signed type is implementation
2729 		 defined.  */
2730 	      q = ~(~orig_t >> m);
2731 	      /* The function expand_shift will choose between a shift
2732 		 and a sequence of additions, so the observed cost is
2733 		 given as MIN (m * add_cost(speed, mode),
2734 		 shift_cost(speed, mode, m)).  */
2735 	      op_cost = m * add_cost (speed, mode);
2736 	      if (shift_cost (speed, mode, m) < op_cost)
2737 		op_cost = shift_cost (speed, mode, m);
2738 	      new_limit.cost = best_cost.cost - op_cost;
2739 	      new_limit.latency = best_cost.latency - op_cost;
2740 	      synth_mult (alg_in, q, &new_limit, mode);
2741 
2742 	      alg_in->cost.cost += op_cost;
2743 	      alg_in->cost.latency += op_cost;
2744 	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2745 		{
2746 		  best_cost = alg_in->cost;
2747 		  std::swap (alg_in, best_alg);
2748 		  best_alg->log[best_alg->ops] = m;
2749 		  best_alg->op[best_alg->ops] = alg_shift;
2750 		}
2751 	    }
2752 	}
2753       if (cache_hit)
2754 	goto done;
2755     }
2756 
2757   /* If we have an odd number, add or subtract one.  */
2758   if ((t & 1) != 0)
2759     {
2760       unsigned HOST_WIDE_INT w;
2761 
2762     do_alg_addsub_t_m2:
2763       for (w = 1; (w & t) != 0; w <<= 1)
2764 	;
2765       /* If T was -1, then W will be zero after the loop.  This is another
2766 	 case where T ends with ...111.  Handling this with (T + 1) and
2767 	 subtract 1 produces slightly better code and results in algorithm
2768 	 selection much faster than treating it like the ...0111 case
2769 	 below.  */
2770       if (w == 0
2771 	  || (w > 2
2772 	      /* Reject the case where t is 3.
2773 		 Thus we prefer addition in that case.  */
2774 	      && t != 3))
2775 	{
2776 	  /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2777 
2778 	  op_cost = add_cost (speed, mode);
2779 	  new_limit.cost = best_cost.cost - op_cost;
2780 	  new_limit.latency = best_cost.latency - op_cost;
2781 	  synth_mult (alg_in, t + 1, &new_limit, mode);
2782 
2783 	  alg_in->cost.cost += op_cost;
2784 	  alg_in->cost.latency += op_cost;
2785 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2786 	    {
2787 	      best_cost = alg_in->cost;
2788 	      std::swap (alg_in, best_alg);
2789 	      best_alg->log[best_alg->ops] = 0;
2790 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2791 	    }
2792 	}
2793       else
2794 	{
2795 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2796 
2797 	  op_cost = add_cost (speed, mode);
2798 	  new_limit.cost = best_cost.cost - op_cost;
2799 	  new_limit.latency = best_cost.latency - op_cost;
2800 	  synth_mult (alg_in, t - 1, &new_limit, mode);
2801 
2802 	  alg_in->cost.cost += op_cost;
2803 	  alg_in->cost.latency += op_cost;
2804 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2805 	    {
2806 	      best_cost = alg_in->cost;
2807 	      std::swap (alg_in, best_alg);
2808 	      best_alg->log[best_alg->ops] = 0;
2809 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2810 	    }
2811 	}
2812 
2813       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2814 	 quickly with a - a * n for some appropriate constant n.  */
2815       m = exact_log2 (-orig_t + 1);
2816       if (m >= 0 && m < maxm)
2817 	{
2818 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2819 	  /* If the target has a cheap shift-and-subtract insn use
2820 	     that in preference to a shift insn followed by a sub insn.
2821 	     Assume that the shift-and-sub is "atomic" with a latency
2822 	     equal to it's cost, otherwise assume that on superscalar
2823 	     hardware the shift may be executed concurrently with the
2824 	     earlier steps in the algorithm.  */
2825 	  if (shiftsub1_cost (speed, mode, m) <= op_cost)
2826 	    {
2827 	      op_cost = shiftsub1_cost (speed, mode, m);
2828 	      op_latency = op_cost;
2829 	    }
2830 	  else
2831 	    op_latency = add_cost (speed, mode);
2832 
2833 	  new_limit.cost = best_cost.cost - op_cost;
2834 	  new_limit.latency = best_cost.latency - op_latency;
2835 	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2836 		      &new_limit, mode);
2837 
2838 	  alg_in->cost.cost += op_cost;
2839 	  alg_in->cost.latency += op_latency;
2840 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2841 	    {
2842 	      best_cost = alg_in->cost;
2843 	      std::swap (alg_in, best_alg);
2844 	      best_alg->log[best_alg->ops] = m;
2845 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2846 	    }
2847 	}
2848 
2849       if (cache_hit)
2850 	goto done;
2851     }
2852 
2853   /* Look for factors of t of the form
2854      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2855      If we find such a factor, we can multiply by t using an algorithm that
2856      multiplies by q, shift the result by m and add/subtract it to itself.
2857 
2858      We search for large factors first and loop down, even if large factors
2859      are less probable than small; if we find a large factor we will find a
2860      good sequence quickly, and therefore be able to prune (by decreasing
2861      COST_LIMIT) the search.  */
2862 
2863  do_alg_addsub_factor:
2864   for (m = floor_log2 (t - 1); m >= 2; m--)
2865     {
2866       unsigned HOST_WIDE_INT d;
2867 
2868       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2869       if (t % d == 0 && t > d && m < maxm
2870 	  && (!cache_hit || cache_alg == alg_add_factor))
2871 	{
2872 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2873 	  if (shiftadd_cost (speed, mode, m) <= op_cost)
2874 	    op_cost = shiftadd_cost (speed, mode, m);
2875 
2876 	  op_latency = op_cost;
2877 
2878 
2879 	  new_limit.cost = best_cost.cost - op_cost;
2880 	  new_limit.latency = best_cost.latency - op_latency;
2881 	  synth_mult (alg_in, t / d, &new_limit, mode);
2882 
2883 	  alg_in->cost.cost += op_cost;
2884 	  alg_in->cost.latency += op_latency;
2885 	  if (alg_in->cost.latency < op_cost)
2886 	    alg_in->cost.latency = op_cost;
2887 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2888 	    {
2889 	      best_cost = alg_in->cost;
2890 	      std::swap (alg_in, best_alg);
2891 	      best_alg->log[best_alg->ops] = m;
2892 	      best_alg->op[best_alg->ops] = alg_add_factor;
2893 	    }
2894 	  /* Other factors will have been taken care of in the recursion.  */
2895 	  break;
2896 	}
2897 
2898       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2899       if (t % d == 0 && t > d && m < maxm
2900 	  && (!cache_hit || cache_alg == alg_sub_factor))
2901 	{
2902 	  op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2903 	  if (shiftsub0_cost (speed, mode, m) <= op_cost)
2904 	    op_cost = shiftsub0_cost (speed, mode, m);
2905 
2906 	  op_latency = op_cost;
2907 
2908 	  new_limit.cost = best_cost.cost - op_cost;
2909 	  new_limit.latency = best_cost.latency - op_latency;
2910 	  synth_mult (alg_in, t / d, &new_limit, mode);
2911 
2912 	  alg_in->cost.cost += op_cost;
2913 	  alg_in->cost.latency += op_latency;
2914 	  if (alg_in->cost.latency < op_cost)
2915 	    alg_in->cost.latency = op_cost;
2916 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2917 	    {
2918 	      best_cost = alg_in->cost;
2919 	      std::swap (alg_in, best_alg);
2920 	      best_alg->log[best_alg->ops] = m;
2921 	      best_alg->op[best_alg->ops] = alg_sub_factor;
2922 	    }
2923 	  break;
2924 	}
2925     }
2926   if (cache_hit)
2927     goto done;
2928 
2929   /* Try shift-and-add (load effective address) instructions,
2930      i.e. do a*3, a*5, a*9.  */
2931   if ((t & 1) != 0)
2932     {
2933     do_alg_add_t2_m:
2934       q = t - 1;
2935       q = q & -q;
2936       m = exact_log2 (q);
2937       if (m >= 0 && m < maxm)
2938 	{
2939 	  op_cost = shiftadd_cost (speed, mode, m);
2940 	  new_limit.cost = best_cost.cost - op_cost;
2941 	  new_limit.latency = best_cost.latency - op_cost;
2942 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2943 
2944 	  alg_in->cost.cost += op_cost;
2945 	  alg_in->cost.latency += op_cost;
2946 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2947 	    {
2948 	      best_cost = alg_in->cost;
2949 	      std::swap (alg_in, best_alg);
2950 	      best_alg->log[best_alg->ops] = m;
2951 	      best_alg->op[best_alg->ops] = alg_add_t2_m;
2952 	    }
2953 	}
2954       if (cache_hit)
2955 	goto done;
2956 
2957     do_alg_sub_t2_m:
2958       q = t + 1;
2959       q = q & -q;
2960       m = exact_log2 (q);
2961       if (m >= 0 && m < maxm)
2962 	{
2963 	  op_cost = shiftsub0_cost (speed, mode, m);
2964 	  new_limit.cost = best_cost.cost - op_cost;
2965 	  new_limit.latency = best_cost.latency - op_cost;
2966 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2967 
2968 	  alg_in->cost.cost += op_cost;
2969 	  alg_in->cost.latency += op_cost;
2970 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2971 	    {
2972 	      best_cost = alg_in->cost;
2973 	      std::swap (alg_in, best_alg);
2974 	      best_alg->log[best_alg->ops] = m;
2975 	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
2976 	    }
2977 	}
2978       if (cache_hit)
2979 	goto done;
2980     }
2981 
2982  done:
2983   /* If best_cost has not decreased, we have not found any algorithm.  */
2984   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2985     {
2986       /* We failed to find an algorithm.  Record alg_impossible for
2987 	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2988 	 we are asked to find an algorithm for T within the same or
2989 	 lower COST_LIMIT, we can immediately return to the
2990 	 caller.  */
2991       entry_ptr->t = t;
2992       entry_ptr->mode = mode;
2993       entry_ptr->speed = speed;
2994       entry_ptr->alg = alg_impossible;
2995       entry_ptr->cost = *cost_limit;
2996       return;
2997     }
2998 
2999   /* Cache the result.  */
3000   if (!cache_hit)
3001     {
3002       entry_ptr->t = t;
3003       entry_ptr->mode = mode;
3004       entry_ptr->speed = speed;
3005       entry_ptr->alg = best_alg->op[best_alg->ops];
3006       entry_ptr->cost.cost = best_cost.cost;
3007       entry_ptr->cost.latency = best_cost.latency;
3008     }
3009 
3010   /* If we are getting a too long sequence for `struct algorithm'
3011      to record, make this search fail.  */
3012   if (best_alg->ops == MAX_BITS_PER_WORD)
3013     return;
3014 
3015   /* Copy the algorithm from temporary space to the space at alg_out.
3016      We avoid using structure assignment because the majority of
3017      best_alg is normally undefined, and this is a critical function.  */
3018   alg_out->ops = best_alg->ops + 1;
3019   alg_out->cost = best_cost;
3020   memcpy (alg_out->op, best_alg->op,
3021 	  alg_out->ops * sizeof *alg_out->op);
3022   memcpy (alg_out->log, best_alg->log,
3023 	  alg_out->ops * sizeof *alg_out->log);
3024 }
3025 
3026 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3027    Try three variations:
3028 
3029        - a shift/add sequence based on VAL itself
3030        - a shift/add sequence based on -VAL, followed by a negation
3031        - a shift/add sequence based on VAL - 1, followed by an addition.
3032 
3033    Return true if the cheapest of these cost less than MULT_COST,
3034    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3035 
3036 static bool
choose_mult_variant(machine_mode mode,HOST_WIDE_INT val,struct algorithm * alg,enum mult_variant * variant,int mult_cost)3037 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3038 		     struct algorithm *alg, enum mult_variant *variant,
3039 		     int mult_cost)
3040 {
3041   struct algorithm alg2;
3042   struct mult_cost limit;
3043   int op_cost;
3044   bool speed = optimize_insn_for_speed_p ();
3045 
3046   /* Fail quickly for impossible bounds.  */
3047   if (mult_cost < 0)
3048     return false;
3049 
3050   /* Ensure that mult_cost provides a reasonable upper bound.
3051      Any constant multiplication can be performed with less
3052      than 2 * bits additions.  */
3053   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3054   if (mult_cost > op_cost)
3055     mult_cost = op_cost;
3056 
3057   *variant = basic_variant;
3058   limit.cost = mult_cost;
3059   limit.latency = mult_cost;
3060   synth_mult (alg, val, &limit, mode);
3061 
3062   /* This works only if the inverted value actually fits in an
3063      `unsigned int' */
3064   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3065     {
3066       op_cost = neg_cost (speed, mode);
3067       if (MULT_COST_LESS (&alg->cost, mult_cost))
3068 	{
3069 	  limit.cost = alg->cost.cost - op_cost;
3070 	  limit.latency = alg->cost.latency - op_cost;
3071 	}
3072       else
3073 	{
3074 	  limit.cost = mult_cost - op_cost;
3075 	  limit.latency = mult_cost - op_cost;
3076 	}
3077 
3078       synth_mult (&alg2, -val, &limit, mode);
3079       alg2.cost.cost += op_cost;
3080       alg2.cost.latency += op_cost;
3081       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3082 	*alg = alg2, *variant = negate_variant;
3083     }
3084 
3085   /* This proves very useful for division-by-constant.  */
3086   op_cost = add_cost (speed, mode);
3087   if (MULT_COST_LESS (&alg->cost, mult_cost))
3088     {
3089       limit.cost = alg->cost.cost - op_cost;
3090       limit.latency = alg->cost.latency - op_cost;
3091     }
3092   else
3093     {
3094       limit.cost = mult_cost - op_cost;
3095       limit.latency = mult_cost - op_cost;
3096     }
3097 
3098   synth_mult (&alg2, val - 1, &limit, mode);
3099   alg2.cost.cost += op_cost;
3100   alg2.cost.latency += op_cost;
3101   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3102     *alg = alg2, *variant = add_variant;
3103 
3104   return MULT_COST_LESS (&alg->cost, mult_cost);
3105 }
3106 
3107 /* A subroutine of expand_mult, used for constant multiplications.
3108    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3109    convenient.  Use the shift/add sequence described by ALG and apply
3110    the final fixup specified by VARIANT.  */
3111 
3112 static rtx
expand_mult_const(machine_mode mode,rtx op0,HOST_WIDE_INT val,rtx target,const struct algorithm * alg,enum mult_variant variant)3113 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3114 		   rtx target, const struct algorithm *alg,
3115 		   enum mult_variant variant)
3116 {
3117   HOST_WIDE_INT val_so_far;
3118   rtx_insn *insn;
3119   rtx accum, tem;
3120   int opno;
3121   machine_mode nmode;
3122 
3123   /* Avoid referencing memory over and over and invalid sharing
3124      on SUBREGs.  */
3125   op0 = force_reg (mode, op0);
3126 
3127   /* ACCUM starts out either as OP0 or as a zero, depending on
3128      the first operation.  */
3129 
3130   if (alg->op[0] == alg_zero)
3131     {
3132       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3133       val_so_far = 0;
3134     }
3135   else if (alg->op[0] == alg_m)
3136     {
3137       accum = copy_to_mode_reg (mode, op0);
3138       val_so_far = 1;
3139     }
3140   else
3141     gcc_unreachable ();
3142 
3143   for (opno = 1; opno < alg->ops; opno++)
3144     {
3145       int log = alg->log[opno];
3146       rtx shift_subtarget = optimize ? 0 : accum;
3147       rtx add_target
3148 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
3149 	   && !optimize)
3150 	  ? target : 0;
3151       rtx accum_target = optimize ? 0 : accum;
3152       rtx accum_inner;
3153 
3154       switch (alg->op[opno])
3155 	{
3156 	case alg_shift:
3157 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3158 	  /* REG_EQUAL note will be attached to the following insn.  */
3159 	  emit_move_insn (accum, tem);
3160 	  val_so_far <<= log;
3161 	  break;
3162 
3163 	case alg_add_t_m2:
3164 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3165 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3166 				 add_target ? add_target : accum_target);
3167 	  val_so_far += (HOST_WIDE_INT) 1 << log;
3168 	  break;
3169 
3170 	case alg_sub_t_m2:
3171 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3172 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3173 				 add_target ? add_target : accum_target);
3174 	  val_so_far -= (HOST_WIDE_INT) 1 << log;
3175 	  break;
3176 
3177 	case alg_add_t2_m:
3178 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3179 				log, shift_subtarget, 0);
3180 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3181 				 add_target ? add_target : accum_target);
3182 	  val_so_far = (val_so_far << log) + 1;
3183 	  break;
3184 
3185 	case alg_sub_t2_m:
3186 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3187 				log, shift_subtarget, 0);
3188 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3189 				 add_target ? add_target : accum_target);
3190 	  val_so_far = (val_so_far << log) - 1;
3191 	  break;
3192 
3193 	case alg_add_factor:
3194 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3195 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3196 				 add_target ? add_target : accum_target);
3197 	  val_so_far += val_so_far << log;
3198 	  break;
3199 
3200 	case alg_sub_factor:
3201 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3202 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3203 				 (add_target
3204 				  ? add_target : (optimize ? 0 : tem)));
3205 	  val_so_far = (val_so_far << log) - val_so_far;
3206 	  break;
3207 
3208 	default:
3209 	  gcc_unreachable ();
3210 	}
3211 
3212       if (SCALAR_INT_MODE_P (mode))
3213 	{
3214 	  /* Write a REG_EQUAL note on the last insn so that we can cse
3215 	     multiplication sequences.  Note that if ACCUM is a SUBREG,
3216 	     we've set the inner register and must properly indicate that.  */
3217           tem = op0, nmode = mode;
3218           accum_inner = accum;
3219           if (GET_CODE (accum) == SUBREG)
3220 	    {
3221 	      accum_inner = SUBREG_REG (accum);
3222 	      nmode = GET_MODE (accum_inner);
3223 	      tem = gen_lowpart (nmode, op0);
3224 	    }
3225 
3226           insn = get_last_insn ();
3227           set_dst_reg_note (insn, REG_EQUAL,
3228 			    gen_rtx_MULT (nmode, tem,
3229 					  gen_int_mode (val_so_far, nmode)),
3230 			    accum_inner);
3231 	}
3232     }
3233 
3234   if (variant == negate_variant)
3235     {
3236       val_so_far = -val_so_far;
3237       accum = expand_unop (mode, neg_optab, accum, target, 0);
3238     }
3239   else if (variant == add_variant)
3240     {
3241       val_so_far = val_so_far + 1;
3242       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3243     }
3244 
3245   /* Compare only the bits of val and val_so_far that are significant
3246      in the result mode, to avoid sign-/zero-extension confusion.  */
3247   nmode = GET_MODE_INNER (mode);
3248   val &= GET_MODE_MASK (nmode);
3249   val_so_far &= GET_MODE_MASK (nmode);
3250   gcc_assert (val == val_so_far);
3251 
3252   return accum;
3253 }
3254 
3255 /* Perform a multiplication and return an rtx for the result.
3256    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3257    TARGET is a suggestion for where to store the result (an rtx).
3258 
3259    We check specially for a constant integer as OP1.
3260    If you want this check for OP0 as well, then before calling
3261    you should swap the two operands if OP0 would be constant.  */
3262 
3263 rtx
expand_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)3264 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3265 	     int unsignedp)
3266 {
3267   enum mult_variant variant;
3268   struct algorithm algorithm;
3269   rtx scalar_op1;
3270   int max_cost;
3271   bool speed = optimize_insn_for_speed_p ();
3272   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3273 
3274   if (CONSTANT_P (op0))
3275     std::swap (op0, op1);
3276 
3277   /* For vectors, there are several simplifications that can be made if
3278      all elements of the vector constant are identical.  */
3279   scalar_op1 = unwrap_const_vec_duplicate (op1);
3280 
3281   if (INTEGRAL_MODE_P (mode))
3282     {
3283       rtx fake_reg;
3284       HOST_WIDE_INT coeff;
3285       bool is_neg;
3286       int mode_bitsize;
3287 
3288       if (op1 == CONST0_RTX (mode))
3289 	return op1;
3290       if (op1 == CONST1_RTX (mode))
3291 	return op0;
3292       if (op1 == CONSTM1_RTX (mode))
3293 	return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3294 			    op0, target, 0);
3295 
3296       if (do_trapv)
3297 	goto skip_synth;
3298 
3299       /* If mode is integer vector mode, check if the backend supports
3300 	 vector lshift (by scalar or vector) at all.  If not, we can't use
3301 	 synthetized multiply.  */
3302       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3303 	  && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3304 	  && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3305 	goto skip_synth;
3306 
3307       /* These are the operations that are potentially turned into
3308 	 a sequence of shifts and additions.  */
3309       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3310 
3311       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3312 	 less than or equal in size to `unsigned int' this doesn't matter.
3313 	 If the mode is larger than `unsigned int', then synth_mult works
3314 	 only if the constant value exactly fits in an `unsigned int' without
3315 	 any truncation.  This means that multiplying by negative values does
3316 	 not work; results are off by 2^32 on a 32 bit machine.  */
3317       if (CONST_INT_P (scalar_op1))
3318 	{
3319 	  coeff = INTVAL (scalar_op1);
3320 	  is_neg = coeff < 0;
3321 	}
3322 #if TARGET_SUPPORTS_WIDE_INT
3323       else if (CONST_WIDE_INT_P (scalar_op1))
3324 #else
3325       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3326 #endif
3327 	{
3328 	  int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3329 	  /* Perfect power of 2 (other than 1, which is handled above).  */
3330 	  if (shift > 0)
3331 	    return expand_shift (LSHIFT_EXPR, mode, op0,
3332 				 shift, target, unsignedp);
3333 	  else
3334 	    goto skip_synth;
3335 	}
3336       else
3337 	goto skip_synth;
3338 
3339       /* We used to test optimize here, on the grounds that it's better to
3340 	 produce a smaller program when -O is not used.  But this causes
3341 	 such a terrible slowdown sometimes that it seems better to always
3342 	 use synth_mult.  */
3343 
3344       /* Special case powers of two.  */
3345       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3346 	  && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3347 	return expand_shift (LSHIFT_EXPR, mode, op0,
3348 			     floor_log2 (coeff), target, unsignedp);
3349 
3350       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3351 
3352       /* Attempt to handle multiplication of DImode values by negative
3353 	 coefficients, by performing the multiplication by a positive
3354 	 multiplier and then inverting the result.  */
3355       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3356 	{
3357 	  /* Its safe to use -coeff even for INT_MIN, as the
3358 	     result is interpreted as an unsigned coefficient.
3359 	     Exclude cost of op0 from max_cost to match the cost
3360 	     calculation of the synth_mult.  */
3361 	  coeff = -(unsigned HOST_WIDE_INT) coeff;
3362 	  max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3363 				    mode, speed)
3364 		      - neg_cost (speed, mode));
3365 	  if (max_cost <= 0)
3366 	    goto skip_synth;
3367 
3368 	  /* Special case powers of two.  */
3369 	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3370 	    {
3371 	      rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3372 				       floor_log2 (coeff), target, unsignedp);
3373 	      return expand_unop (mode, neg_optab, temp, target, 0);
3374 	    }
3375 
3376 	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3377 				   max_cost))
3378 	    {
3379 	      rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3380 					    &algorithm, variant);
3381 	      return expand_unop (mode, neg_optab, temp, target, 0);
3382 	    }
3383 	  goto skip_synth;
3384 	}
3385 
3386       /* Exclude cost of op0 from max_cost to match the cost
3387 	 calculation of the synth_mult.  */
3388       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3389       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3390 	return expand_mult_const (mode, op0, coeff, target,
3391 				  &algorithm, variant);
3392     }
3393  skip_synth:
3394 
3395   /* Expand x*2.0 as x+x.  */
3396   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3397       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3398     {
3399       op0 = force_reg (GET_MODE (op0), op0);
3400       return expand_binop (mode, add_optab, op0, op0,
3401 			   target, unsignedp, OPTAB_LIB_WIDEN);
3402     }
3403 
3404   /* This used to use umul_optab if unsigned, but for non-widening multiply
3405      there is no difference between signed and unsigned.  */
3406   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3407 		      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3408   gcc_assert (op0);
3409   return op0;
3410 }
3411 
3412 /* Return a cost estimate for multiplying a register by the given
3413    COEFFicient in the given MODE and SPEED.  */
3414 
3415 int
mult_by_coeff_cost(HOST_WIDE_INT coeff,machine_mode mode,bool speed)3416 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3417 {
3418   int max_cost;
3419   struct algorithm algorithm;
3420   enum mult_variant variant;
3421 
3422   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3423   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3424 			   mode, speed);
3425   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3426     return algorithm.cost.cost;
3427   else
3428     return max_cost;
3429 }
3430 
3431 /* Perform a widening multiplication and return an rtx for the result.
3432    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3433    TARGET is a suggestion for where to store the result (an rtx).
3434    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3435    or smul_widen_optab.
3436 
3437    We check specially for a constant integer as OP1, comparing the
3438    cost of a widening multiply against the cost of a sequence of shifts
3439    and adds.  */
3440 
3441 rtx
expand_widening_mult(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,optab this_optab)3442 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3443 		      int unsignedp, optab this_optab)
3444 {
3445   bool speed = optimize_insn_for_speed_p ();
3446   rtx cop1;
3447 
3448   if (CONST_INT_P (op1)
3449       && GET_MODE (op0) != VOIDmode
3450       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3451 				this_optab == umul_widen_optab))
3452       && CONST_INT_P (cop1)
3453       && (INTVAL (cop1) >= 0
3454 	  || HWI_COMPUTABLE_MODE_P (mode)))
3455     {
3456       HOST_WIDE_INT coeff = INTVAL (cop1);
3457       int max_cost;
3458       enum mult_variant variant;
3459       struct algorithm algorithm;
3460 
3461       if (coeff == 0)
3462 	return CONST0_RTX (mode);
3463 
3464       /* Special case powers of two.  */
3465       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3466 	{
3467 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3468 	  return expand_shift (LSHIFT_EXPR, mode, op0,
3469 			       floor_log2 (coeff), target, unsignedp);
3470 	}
3471 
3472       /* Exclude cost of op0 from max_cost to match the cost
3473 	 calculation of the synth_mult.  */
3474       max_cost = mul_widen_cost (speed, mode);
3475       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3476 			       max_cost))
3477 	{
3478 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3479 	  return expand_mult_const (mode, op0, coeff, target,
3480 				    &algorithm, variant);
3481 	}
3482     }
3483   return expand_binop (mode, this_optab, op0, op1, target,
3484 		       unsignedp, OPTAB_LIB_WIDEN);
3485 }
3486 
3487 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3488    replace division by D, and put the least significant N bits of the result
3489    in *MULTIPLIER_PTR and return the most significant bit.
3490 
3491    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3492    needed precision is in PRECISION (should be <= N).
3493 
3494    PRECISION should be as small as possible so this function can choose
3495    multiplier more freely.
3496 
3497    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3498    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3499 
3500    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3501    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3502 
3503 unsigned HOST_WIDE_INT
choose_multiplier(unsigned HOST_WIDE_INT d,int n,int precision,unsigned HOST_WIDE_INT * multiplier_ptr,int * post_shift_ptr,int * lgup_ptr)3504 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3505 		   unsigned HOST_WIDE_INT *multiplier_ptr,
3506 		   int *post_shift_ptr, int *lgup_ptr)
3507 {
3508   int lgup, post_shift;
3509   int pow, pow2;
3510 
3511   /* lgup = ceil(log2(divisor)); */
3512   lgup = ceil_log2 (d);
3513 
3514   gcc_assert (lgup <= n);
3515 
3516   pow = n + lgup;
3517   pow2 = n + lgup - precision;
3518 
3519   /* mlow = 2^(N + lgup)/d */
3520   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3521   wide_int mlow = wi::udiv_trunc (val, d);
3522 
3523   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3524   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3525   wide_int mhigh = wi::udiv_trunc (val, d);
3526 
3527   /* If precision == N, then mlow, mhigh exceed 2^N
3528      (but they do not exceed 2^(N+1)).  */
3529 
3530   /* Reduce to lowest terms.  */
3531   for (post_shift = lgup; post_shift > 0; post_shift--)
3532     {
3533       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3534 						       HOST_BITS_PER_WIDE_INT);
3535       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3536 						       HOST_BITS_PER_WIDE_INT);
3537       if (ml_lo >= mh_lo)
3538 	break;
3539 
3540       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3541       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3542     }
3543 
3544   *post_shift_ptr = post_shift;
3545   *lgup_ptr = lgup;
3546   if (n < HOST_BITS_PER_WIDE_INT)
3547     {
3548       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3549       *multiplier_ptr = mhigh.to_uhwi () & mask;
3550       return mhigh.to_uhwi () >= mask;
3551     }
3552   else
3553     {
3554       *multiplier_ptr = mhigh.to_uhwi ();
3555       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3556     }
3557 }
3558 
3559 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3560    congruent to 1 (mod 2**N).  */
3561 
3562 static unsigned HOST_WIDE_INT
invert_mod2n(unsigned HOST_WIDE_INT x,int n)3563 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3564 {
3565   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3566 
3567   /* The algorithm notes that the choice y = x satisfies
3568      x*y == 1 mod 2^3, since x is assumed odd.
3569      Each iteration doubles the number of bits of significance in y.  */
3570 
3571   unsigned HOST_WIDE_INT mask;
3572   unsigned HOST_WIDE_INT y = x;
3573   int nbit = 3;
3574 
3575   mask = (n == HOST_BITS_PER_WIDE_INT
3576 	  ? ~(unsigned HOST_WIDE_INT) 0
3577 	  : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3578 
3579   while (nbit < n)
3580     {
3581       y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3582       nbit *= 2;
3583     }
3584   return y;
3585 }
3586 
3587 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3588    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3589    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3590    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3591    become signed.
3592 
3593    The result is put in TARGET if that is convenient.
3594 
3595    MODE is the mode of operation.  */
3596 
3597 rtx
expand_mult_highpart_adjust(machine_mode mode,rtx adj_operand,rtx op0,rtx op1,rtx target,int unsignedp)3598 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3599 			     rtx op1, rtx target, int unsignedp)
3600 {
3601   rtx tem;
3602   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3603 
3604   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3605 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3606   tem = expand_and (mode, tem, op1, NULL_RTX);
3607   adj_operand
3608     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3609 		     adj_operand);
3610 
3611   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3612 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3613   tem = expand_and (mode, tem, op0, NULL_RTX);
3614   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3615 			  target);
3616 
3617   return target;
3618 }
3619 
3620 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3621 
3622 static rtx
extract_high_half(machine_mode mode,rtx op)3623 extract_high_half (machine_mode mode, rtx op)
3624 {
3625   machine_mode wider_mode;
3626 
3627   if (mode == word_mode)
3628     return gen_highpart (mode, op);
3629 
3630   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3631 
3632   wider_mode = GET_MODE_WIDER_MODE (mode);
3633   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3634 		     GET_MODE_BITSIZE (mode), 0, 1);
3635   return convert_modes (mode, wider_mode, op, 0);
3636 }
3637 
3638 /* Like expmed_mult_highpart, but only consider using a multiplication
3639    optab.  OP1 is an rtx for the constant operand.  */
3640 
3641 static rtx
expmed_mult_highpart_optab(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3642 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3643 			    rtx target, int unsignedp, int max_cost)
3644 {
3645   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3646   machine_mode wider_mode;
3647   optab moptab;
3648   rtx tem;
3649   int size;
3650   bool speed = optimize_insn_for_speed_p ();
3651 
3652   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3653 
3654   wider_mode = GET_MODE_WIDER_MODE (mode);
3655   size = GET_MODE_BITSIZE (mode);
3656 
3657   /* Firstly, try using a multiplication insn that only generates the needed
3658      high part of the product, and in the sign flavor of unsignedp.  */
3659   if (mul_highpart_cost (speed, mode) < max_cost)
3660     {
3661       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3662       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3663 			  unsignedp, OPTAB_DIRECT);
3664       if (tem)
3665 	return tem;
3666     }
3667 
3668   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3669      Need to adjust the result after the multiplication.  */
3670   if (size - 1 < BITS_PER_WORD
3671       && (mul_highpart_cost (speed, mode)
3672 	  + 2 * shift_cost (speed, mode, size-1)
3673 	  + 4 * add_cost (speed, mode) < max_cost))
3674     {
3675       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3676       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3677 			  unsignedp, OPTAB_DIRECT);
3678       if (tem)
3679 	/* We used the wrong signedness.  Adjust the result.  */
3680 	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3681 					    tem, unsignedp);
3682     }
3683 
3684   /* Try widening multiplication.  */
3685   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3686   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3687       && mul_widen_cost (speed, wider_mode) < max_cost)
3688     {
3689       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3690 			  unsignedp, OPTAB_WIDEN);
3691       if (tem)
3692 	return extract_high_half (mode, tem);
3693     }
3694 
3695   /* Try widening the mode and perform a non-widening multiplication.  */
3696   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3697       && size - 1 < BITS_PER_WORD
3698       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3699 	  < max_cost))
3700     {
3701       rtx_insn *insns;
3702       rtx wop0, wop1;
3703 
3704       /* We need to widen the operands, for example to ensure the
3705 	 constant multiplier is correctly sign or zero extended.
3706 	 Use a sequence to clean-up any instructions emitted by
3707 	 the conversions if things don't work out.  */
3708       start_sequence ();
3709       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3710       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3711       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3712 			  unsignedp, OPTAB_WIDEN);
3713       insns = get_insns ();
3714       end_sequence ();
3715 
3716       if (tem)
3717 	{
3718 	  emit_insn (insns);
3719 	  return extract_high_half (mode, tem);
3720 	}
3721     }
3722 
3723   /* Try widening multiplication of opposite signedness, and adjust.  */
3724   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3725   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3726       && size - 1 < BITS_PER_WORD
3727       && (mul_widen_cost (speed, wider_mode)
3728 	  + 2 * shift_cost (speed, mode, size-1)
3729 	  + 4 * add_cost (speed, mode) < max_cost))
3730     {
3731       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3732 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3733       if (tem != 0)
3734 	{
3735 	  tem = extract_high_half (mode, tem);
3736 	  /* We used the wrong signedness.  Adjust the result.  */
3737 	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3738 					      target, unsignedp);
3739 	}
3740     }
3741 
3742   return 0;
3743 }
3744 
3745 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3746    putting the high half of the result in TARGET if that is convenient,
3747    and return where the result is.  If the operation can not be performed,
3748    0 is returned.
3749 
3750    MODE is the mode of operation and result.
3751 
3752    UNSIGNEDP nonzero means unsigned multiply.
3753 
3754    MAX_COST is the total allowed cost for the expanded RTL.  */
3755 
3756 static rtx
expmed_mult_highpart(machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp,int max_cost)3757 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3758 		      rtx target, int unsignedp, int max_cost)
3759 {
3760   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3761   unsigned HOST_WIDE_INT cnst1;
3762   int extra_cost;
3763   bool sign_adjust = false;
3764   enum mult_variant variant;
3765   struct algorithm alg;
3766   rtx tem;
3767   bool speed = optimize_insn_for_speed_p ();
3768 
3769   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3770   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3771   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3772 
3773   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3774 
3775   /* We can't optimize modes wider than BITS_PER_WORD.
3776      ??? We might be able to perform double-word arithmetic if
3777      mode == word_mode, however all the cost calculations in
3778      synth_mult etc. assume single-word operations.  */
3779   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3780     return expmed_mult_highpart_optab (mode, op0, op1, target,
3781 				       unsignedp, max_cost);
3782 
3783   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3784 
3785   /* Check whether we try to multiply by a negative constant.  */
3786   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3787     {
3788       sign_adjust = true;
3789       extra_cost += add_cost (speed, mode);
3790     }
3791 
3792   /* See whether shift/add multiplication is cheap enough.  */
3793   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3794 			   max_cost - extra_cost))
3795     {
3796       /* See whether the specialized multiplication optabs are
3797 	 cheaper than the shift/add version.  */
3798       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3799 					alg.cost.cost + extra_cost);
3800       if (tem)
3801 	return tem;
3802 
3803       tem = convert_to_mode (wider_mode, op0, unsignedp);
3804       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3805       tem = extract_high_half (mode, tem);
3806 
3807       /* Adjust result for signedness.  */
3808       if (sign_adjust)
3809 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3810 
3811       return tem;
3812     }
3813   return expmed_mult_highpart_optab (mode, op0, op1, target,
3814 				     unsignedp, max_cost);
3815 }
3816 
3817 
3818 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3819 
3820 static rtx
expand_smod_pow2(machine_mode mode,rtx op0,HOST_WIDE_INT d)3821 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3822 {
3823   rtx result, temp, shift;
3824   rtx_code_label *label;
3825   int logd;
3826   int prec = GET_MODE_PRECISION (mode);
3827 
3828   logd = floor_log2 (d);
3829   result = gen_reg_rtx (mode);
3830 
3831   /* Avoid conditional branches when they're expensive.  */
3832   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3833       && optimize_insn_for_speed_p ())
3834     {
3835       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3836 				      mode, 0, -1);
3837       if (signmask)
3838 	{
3839 	  HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3840 	  signmask = force_reg (mode, signmask);
3841 	  shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3842 
3843 	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
3844 	     which instruction sequence to use.  If logical right shifts
3845 	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3846 	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3847 
3848 	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
3849 	  if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3850 	      || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3851 		  > COSTS_N_INSNS (2)))
3852 	    {
3853 	      temp = expand_binop (mode, xor_optab, op0, signmask,
3854 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3855 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3856 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3857 	      temp = expand_binop (mode, and_optab, temp,
3858 				   gen_int_mode (masklow, mode),
3859 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3860 	      temp = expand_binop (mode, xor_optab, temp, signmask,
3861 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3862 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3863 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3864 	    }
3865 	  else
3866 	    {
3867 	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
3868 				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3869 	      signmask = force_reg (mode, signmask);
3870 
3871 	      temp = expand_binop (mode, add_optab, op0, signmask,
3872 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3873 	      temp = expand_binop (mode, and_optab, temp,
3874 				   gen_int_mode (masklow, mode),
3875 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3876 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3877 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3878 	    }
3879 	  return temp;
3880 	}
3881     }
3882 
3883   /* Mask contains the mode's signbit and the significant bits of the
3884      modulus.  By including the signbit in the operation, many targets
3885      can avoid an explicit compare operation in the following comparison
3886      against zero.  */
3887   wide_int mask = wi::mask (logd, false, prec);
3888   mask = wi::set_bit (mask, prec - 1);
3889 
3890   temp = expand_binop (mode, and_optab, op0,
3891 		       immed_wide_int_const (mask, mode),
3892 		       result, 1, OPTAB_LIB_WIDEN);
3893   if (temp != result)
3894     emit_move_insn (result, temp);
3895 
3896   label = gen_label_rtx ();
3897   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3898 
3899   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3900 		       0, OPTAB_LIB_WIDEN);
3901 
3902   mask = wi::mask (logd, true, prec);
3903   temp = expand_binop (mode, ior_optab, temp,
3904 		       immed_wide_int_const (mask, mode),
3905 		       result, 1, OPTAB_LIB_WIDEN);
3906   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3907 		       0, OPTAB_LIB_WIDEN);
3908   if (temp != result)
3909     emit_move_insn (result, temp);
3910   emit_label (label);
3911   return result;
3912 }
3913 
3914 /* Expand signed division of OP0 by a power of two D in mode MODE.
3915    This routine is only called for positive values of D.  */
3916 
3917 static rtx
expand_sdiv_pow2(machine_mode mode,rtx op0,HOST_WIDE_INT d)3918 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3919 {
3920   rtx temp;
3921   rtx_code_label *label;
3922   int logd;
3923 
3924   logd = floor_log2 (d);
3925 
3926   if (d == 2
3927       && BRANCH_COST (optimize_insn_for_speed_p (),
3928 		      false) >= 1)
3929     {
3930       temp = gen_reg_rtx (mode);
3931       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3932       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3933 			   0, OPTAB_LIB_WIDEN);
3934       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3935     }
3936 
3937   if (HAVE_conditional_move
3938       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3939     {
3940       rtx temp2;
3941 
3942       start_sequence ();
3943       temp2 = copy_to_mode_reg (mode, op0);
3944       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3945 			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
3946       temp = force_reg (mode, temp);
3947 
3948       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3949       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3950 				     mode, temp, temp2, mode, 0);
3951       if (temp2)
3952 	{
3953 	  rtx_insn *seq = get_insns ();
3954 	  end_sequence ();
3955 	  emit_insn (seq);
3956 	  return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3957 	}
3958       end_sequence ();
3959     }
3960 
3961   if (BRANCH_COST (optimize_insn_for_speed_p (),
3962 		   false) >= 2)
3963     {
3964       int ushift = GET_MODE_BITSIZE (mode) - logd;
3965 
3966       temp = gen_reg_rtx (mode);
3967       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3968       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3969 	  || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3970 	     > COSTS_N_INSNS (1))
3971 	temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3972 			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
3973       else
3974 	temp = expand_shift (RSHIFT_EXPR, mode, temp,
3975 			     ushift, NULL_RTX, 1);
3976       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3977 			   0, OPTAB_LIB_WIDEN);
3978       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3979     }
3980 
3981   label = gen_label_rtx ();
3982   temp = copy_to_mode_reg (mode, op0);
3983   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3984   expand_inc (temp, gen_int_mode (d - 1, mode));
3985   emit_label (label);
3986   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3987 }
3988 
3989 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3990    if that is convenient, and returning where the result is.
3991    You may request either the quotient or the remainder as the result;
3992    specify REM_FLAG nonzero to get the remainder.
3993 
3994    CODE is the expression code for which kind of division this is;
3995    it controls how rounding is done.  MODE is the machine mode to use.
3996    UNSIGNEDP nonzero means do unsigned division.  */
3997 
3998 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3999    and then correct it by or'ing in missing high bits
4000    if result of ANDI is nonzero.
4001    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
4002    This could optimize to a bfexts instruction.
4003    But C doesn't use these operations, so their optimizations are
4004    left for later.  */
4005 /* ??? For modulo, we don't actually need the highpart of the first product,
4006    the low part will do nicely.  And for small divisors, the second multiply
4007    can also be a low-part only multiply or even be completely left out.
4008    E.g. to calculate the remainder of a division by 3 with a 32 bit
4009    multiply, multiply with 0x55555556 and extract the upper two bits;
4010    the result is exact for inputs up to 0x1fffffff.
4011    The input range can be reduced by using cross-sum rules.
4012    For odd divisors >= 3, the following table gives right shift counts
4013    so that if a number is shifted by an integer multiple of the given
4014    amount, the remainder stays the same:
4015    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
4016    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
4017    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
4018    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
4019    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4020 
4021    Cross-sum rules for even numbers can be derived by leaving as many bits
4022    to the right alone as the divisor has zeros to the right.
4023    E.g. if x is an unsigned 32 bit number:
4024    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4025    */
4026 
4027 rtx
expand_divmod(int rem_flag,enum tree_code code,machine_mode mode,rtx op0,rtx op1,rtx target,int unsignedp)4028 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4029 	       rtx op0, rtx op1, rtx target, int unsignedp)
4030 {
4031   machine_mode compute_mode;
4032   rtx tquotient;
4033   rtx quotient = 0, remainder = 0;
4034   rtx_insn *last;
4035   int size;
4036   rtx_insn *insn;
4037   optab optab1, optab2;
4038   int op1_is_constant, op1_is_pow2 = 0;
4039   int max_cost, extra_cost;
4040   static HOST_WIDE_INT last_div_const = 0;
4041   bool speed = optimize_insn_for_speed_p ();
4042 
4043   op1_is_constant = CONST_INT_P (op1);
4044   if (op1_is_constant)
4045     {
4046       wide_int ext_op1 = rtx_mode_t (op1, mode);
4047       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4048 		     || (! unsignedp
4049 			 && wi::popcount (wi::neg (ext_op1)) == 1));
4050     }
4051 
4052   /*
4053      This is the structure of expand_divmod:
4054 
4055      First comes code to fix up the operands so we can perform the operations
4056      correctly and efficiently.
4057 
4058      Second comes a switch statement with code specific for each rounding mode.
4059      For some special operands this code emits all RTL for the desired
4060      operation, for other cases, it generates only a quotient and stores it in
4061      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4062      to indicate that it has not done anything.
4063 
4064      Last comes code that finishes the operation.  If QUOTIENT is set and
4065      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4066      QUOTIENT is not set, it is computed using trunc rounding.
4067 
4068      We try to generate special code for division and remainder when OP1 is a
4069      constant.  If |OP1| = 2**n we can use shifts and some other fast
4070      operations.  For other values of OP1, we compute a carefully selected
4071      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4072      by m.
4073 
4074      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4075      half of the product.  Different strategies for generating the product are
4076      implemented in expmed_mult_highpart.
4077 
4078      If what we actually want is the remainder, we generate that by another
4079      by-constant multiplication and a subtraction.  */
4080 
4081   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4082      code below will malfunction if we are, so check here and handle
4083      the special case if so.  */
4084   if (op1 == const1_rtx)
4085     return rem_flag ? const0_rtx : op0;
4086 
4087     /* When dividing by -1, we could get an overflow.
4088      negv_optab can handle overflows.  */
4089   if (! unsignedp && op1 == constm1_rtx)
4090     {
4091       if (rem_flag)
4092 	return const0_rtx;
4093       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4094 			  ? negv_optab : neg_optab, op0, target, 0);
4095     }
4096 
4097   if (target
4098       /* Don't use the function value register as a target
4099 	 since we have to read it as well as write it,
4100 	 and function-inlining gets confused by this.  */
4101       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4102 	  /* Don't clobber an operand while doing a multi-step calculation.  */
4103 	  || ((rem_flag || op1_is_constant)
4104 	      && (reg_mentioned_p (target, op0)
4105 		  || (MEM_P (op0) && MEM_P (target))))
4106 	  || reg_mentioned_p (target, op1)
4107 	  || (MEM_P (op1) && MEM_P (target))))
4108     target = 0;
4109 
4110   /* Get the mode in which to perform this computation.  Normally it will
4111      be MODE, but sometimes we can't do the desired operation in MODE.
4112      If so, pick a wider mode in which we can do the operation.  Convert
4113      to that mode at the start to avoid repeated conversions.
4114 
4115      First see what operations we need.  These depend on the expression
4116      we are evaluating.  (We assume that divxx3 insns exist under the
4117      same conditions that modxx3 insns and that these insns don't normally
4118      fail.  If these assumptions are not correct, we may generate less
4119      efficient code in some cases.)
4120 
4121      Then see if we find a mode in which we can open-code that operation
4122      (either a division, modulus, or shift).  Finally, check for the smallest
4123      mode for which we can do the operation with a library call.  */
4124 
4125   /* We might want to refine this now that we have division-by-constant
4126      optimization.  Since expmed_mult_highpart tries so many variants, it is
4127      not straightforward to generalize this.  Maybe we should make an array
4128      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4129 
4130   optab1 = (op1_is_pow2
4131 	    ? (unsignedp ? lshr_optab : ashr_optab)
4132 	    : (unsignedp ? udiv_optab : sdiv_optab));
4133   optab2 = (op1_is_pow2 ? optab1
4134 	    : (unsignedp ? udivmod_optab : sdivmod_optab));
4135 
4136   for (compute_mode = mode; compute_mode != VOIDmode;
4137        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4138     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4139 	|| optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4140       break;
4141 
4142   if (compute_mode == VOIDmode)
4143     for (compute_mode = mode; compute_mode != VOIDmode;
4144 	 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4145       if (optab_libfunc (optab1, compute_mode)
4146 	  || optab_libfunc (optab2, compute_mode))
4147 	break;
4148 
4149   /* If we still couldn't find a mode, use MODE, but expand_binop will
4150      probably die.  */
4151   if (compute_mode == VOIDmode)
4152     compute_mode = mode;
4153 
4154   if (target && GET_MODE (target) == compute_mode)
4155     tquotient = target;
4156   else
4157     tquotient = gen_reg_rtx (compute_mode);
4158 
4159   size = GET_MODE_BITSIZE (compute_mode);
4160 #if 0
4161   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4162      (mode), and thereby get better code when OP1 is a constant.  Do that
4163      later.  It will require going over all usages of SIZE below.  */
4164   size = GET_MODE_BITSIZE (mode);
4165 #endif
4166 
4167   /* Only deduct something for a REM if the last divide done was
4168      for a different constant.   Then set the constant of the last
4169      divide.  */
4170   max_cost = (unsignedp
4171 	      ? udiv_cost (speed, compute_mode)
4172 	      : sdiv_cost (speed, compute_mode));
4173   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4174 		     && INTVAL (op1) == last_div_const))
4175     max_cost -= (mul_cost (speed, compute_mode)
4176 		 + add_cost (speed, compute_mode));
4177 
4178   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4179 
4180   /* Now convert to the best mode to use.  */
4181   if (compute_mode != mode)
4182     {
4183       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4184       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4185 
4186       /* convert_modes may have placed op1 into a register, so we
4187 	 must recompute the following.  */
4188       op1_is_constant = CONST_INT_P (op1);
4189       if (op1_is_constant)
4190 	{
4191 	  wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4192 	  op1_is_pow2 = (wi::popcount (ext_op1) == 1
4193 			 || (! unsignedp
4194 			     && wi::popcount (wi::neg (ext_op1)) == 1));
4195 	}
4196       else
4197 	op1_is_pow2 = 0;
4198     }
4199 
4200   /* If one of the operands is a volatile MEM, copy it into a register.  */
4201 
4202   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4203     op0 = force_reg (compute_mode, op0);
4204   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4205     op1 = force_reg (compute_mode, op1);
4206 
4207   /* If we need the remainder or if OP1 is constant, we need to
4208      put OP0 in a register in case it has any queued subexpressions.  */
4209   if (rem_flag || op1_is_constant)
4210     op0 = force_reg (compute_mode, op0);
4211 
4212   last = get_last_insn ();
4213 
4214   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4215   if (unsignedp)
4216     {
4217       if (code == FLOOR_DIV_EXPR)
4218 	code = TRUNC_DIV_EXPR;
4219       if (code == FLOOR_MOD_EXPR)
4220 	code = TRUNC_MOD_EXPR;
4221       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4222 	code = TRUNC_DIV_EXPR;
4223     }
4224 
4225   if (op1 != const0_rtx)
4226     switch (code)
4227       {
4228       case TRUNC_MOD_EXPR:
4229       case TRUNC_DIV_EXPR:
4230 	if (op1_is_constant)
4231 	  {
4232 	    if (unsignedp)
4233 	      {
4234 		unsigned HOST_WIDE_INT mh, ml;
4235 		int pre_shift, post_shift;
4236 		int dummy;
4237 		wide_int wd = rtx_mode_t (op1, compute_mode);
4238 		unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4239 
4240 		if (wi::popcount (wd) == 1)
4241 		  {
4242 		    pre_shift = floor_log2 (d);
4243 		    if (rem_flag)
4244 		      {
4245 			unsigned HOST_WIDE_INT mask
4246 			  = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4247 			remainder
4248 			  = expand_binop (compute_mode, and_optab, op0,
4249 					  gen_int_mode (mask, compute_mode),
4250 					  remainder, 1,
4251 					  OPTAB_LIB_WIDEN);
4252 			if (remainder)
4253 			  return gen_lowpart (mode, remainder);
4254 		      }
4255 		    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4256 					     pre_shift, tquotient, 1);
4257 		  }
4258 		else if (size <= HOST_BITS_PER_WIDE_INT)
4259 		  {
4260 		    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4261 		      {
4262 			/* Most significant bit of divisor is set; emit an scc
4263 			   insn.  */
4264 			quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4265 							  compute_mode, 1, 1);
4266 		      }
4267 		    else
4268 		      {
4269 			/* Find a suitable multiplier and right shift count
4270 			   instead of multiplying with D.  */
4271 
4272 			mh = choose_multiplier (d, size, size,
4273 						&ml, &post_shift, &dummy);
4274 
4275 			/* If the suggested multiplier is more than SIZE bits,
4276 			   we can do better for even divisors, using an
4277 			   initial right shift.  */
4278 			if (mh != 0 && (d & 1) == 0)
4279 			  {
4280 			    pre_shift = floor_log2 (d & -d);
4281 			    mh = choose_multiplier (d >> pre_shift, size,
4282 						    size - pre_shift,
4283 						    &ml, &post_shift, &dummy);
4284 			    gcc_assert (!mh);
4285 			  }
4286 			else
4287 			  pre_shift = 0;
4288 
4289 			if (mh != 0)
4290 			  {
4291 			    rtx t1, t2, t3, t4;
4292 
4293 			    if (post_shift - 1 >= BITS_PER_WORD)
4294 			      goto fail1;
4295 
4296 			    extra_cost
4297 			      = (shift_cost (speed, compute_mode, post_shift - 1)
4298 				 + shift_cost (speed, compute_mode, 1)
4299 				 + 2 * add_cost (speed, compute_mode));
4300 			    t1 = expmed_mult_highpart
4301 			      (compute_mode, op0,
4302 			       gen_int_mode (ml, compute_mode),
4303 			       NULL_RTX, 1, max_cost - extra_cost);
4304 			    if (t1 == 0)
4305 			      goto fail1;
4306 			    t2 = force_operand (gen_rtx_MINUS (compute_mode,
4307 							       op0, t1),
4308 						NULL_RTX);
4309 			    t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4310 					       t2, 1, NULL_RTX, 1);
4311 			    t4 = force_operand (gen_rtx_PLUS (compute_mode,
4312 							      t1, t3),
4313 						NULL_RTX);
4314 			    quotient = expand_shift
4315 			      (RSHIFT_EXPR, compute_mode, t4,
4316 			       post_shift - 1, tquotient, 1);
4317 			  }
4318 			else
4319 			  {
4320 			    rtx t1, t2;
4321 
4322 			    if (pre_shift >= BITS_PER_WORD
4323 				|| post_shift >= BITS_PER_WORD)
4324 			      goto fail1;
4325 
4326 			    t1 = expand_shift
4327 			      (RSHIFT_EXPR, compute_mode, op0,
4328 			       pre_shift, NULL_RTX, 1);
4329 			    extra_cost
4330 			      = (shift_cost (speed, compute_mode, pre_shift)
4331 				 + shift_cost (speed, compute_mode, post_shift));
4332 			    t2 = expmed_mult_highpart
4333 			      (compute_mode, t1,
4334 			       gen_int_mode (ml, compute_mode),
4335 			       NULL_RTX, 1, max_cost - extra_cost);
4336 			    if (t2 == 0)
4337 			      goto fail1;
4338 			    quotient = expand_shift
4339 			      (RSHIFT_EXPR, compute_mode, t2,
4340 			       post_shift, tquotient, 1);
4341 			  }
4342 		      }
4343 		  }
4344 		else		/* Too wide mode to use tricky code */
4345 		  break;
4346 
4347 		insn = get_last_insn ();
4348 		if (insn != last)
4349 		  set_dst_reg_note (insn, REG_EQUAL,
4350 				    gen_rtx_UDIV (compute_mode, op0, op1),
4351 				    quotient);
4352 	      }
4353 	    else		/* TRUNC_DIV, signed */
4354 	      {
4355 		unsigned HOST_WIDE_INT ml;
4356 		int lgup, post_shift;
4357 		rtx mlr;
4358 		HOST_WIDE_INT d = INTVAL (op1);
4359 		unsigned HOST_WIDE_INT abs_d;
4360 
4361 		/* Since d might be INT_MIN, we have to cast to
4362 		   unsigned HOST_WIDE_INT before negating to avoid
4363 		   undefined signed overflow.  */
4364 		abs_d = (d >= 0
4365 			 ? (unsigned HOST_WIDE_INT) d
4366 			 : - (unsigned HOST_WIDE_INT) d);
4367 
4368 		/* n rem d = n rem -d */
4369 		if (rem_flag && d < 0)
4370 		  {
4371 		    d = abs_d;
4372 		    op1 = gen_int_mode (abs_d, compute_mode);
4373 		  }
4374 
4375 		if (d == 1)
4376 		  quotient = op0;
4377 		else if (d == -1)
4378 		  quotient = expand_unop (compute_mode, neg_optab, op0,
4379 					  tquotient, 0);
4380 		else if (size <= HOST_BITS_PER_WIDE_INT
4381 			 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4382 		  {
4383 		    /* This case is not handled correctly below.  */
4384 		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4385 						compute_mode, 1, 1);
4386 		    if (quotient == 0)
4387 		      goto fail1;
4388 		  }
4389 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4390 			 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4391 			 && (rem_flag
4392 			     ? smod_pow2_cheap (speed, compute_mode)
4393 			     : sdiv_pow2_cheap (speed, compute_mode))
4394 			 /* We assume that cheap metric is true if the
4395 			    optab has an expander for this mode.  */
4396 			 && ((optab_handler ((rem_flag ? smod_optab
4397 					      : sdiv_optab),
4398 					     compute_mode)
4399 			      != CODE_FOR_nothing)
4400 			     || (optab_handler (sdivmod_optab,
4401 						compute_mode)
4402 				 != CODE_FOR_nothing)))
4403 		  ;
4404 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4405 			 && (size <= HOST_BITS_PER_WIDE_INT
4406 			     || abs_d != (unsigned HOST_WIDE_INT) d))
4407 		  {
4408 		    if (rem_flag)
4409 		      {
4410 			remainder = expand_smod_pow2 (compute_mode, op0, d);
4411 			if (remainder)
4412 			  return gen_lowpart (mode, remainder);
4413 		      }
4414 
4415 		    if (sdiv_pow2_cheap (speed, compute_mode)
4416 			&& ((optab_handler (sdiv_optab, compute_mode)
4417 			     != CODE_FOR_nothing)
4418 			    || (optab_handler (sdivmod_optab, compute_mode)
4419 				!= CODE_FOR_nothing)))
4420 		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4421 						compute_mode, op0,
4422 						gen_int_mode (abs_d,
4423 							      compute_mode),
4424 						NULL_RTX, 0);
4425 		    else
4426 		      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4427 
4428 		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4429 		       negate the quotient.  */
4430 		    if (d < 0)
4431 		      {
4432 			insn = get_last_insn ();
4433 			if (insn != last
4434 			    && abs_d < ((unsigned HOST_WIDE_INT) 1
4435 					<< (HOST_BITS_PER_WIDE_INT - 1)))
4436 			  set_dst_reg_note (insn, REG_EQUAL,
4437 					    gen_rtx_DIV (compute_mode, op0,
4438 							 gen_int_mode
4439 							   (abs_d,
4440 							    compute_mode)),
4441 					    quotient);
4442 
4443 			quotient = expand_unop (compute_mode, neg_optab,
4444 						quotient, quotient, 0);
4445 		      }
4446 		  }
4447 		else if (size <= HOST_BITS_PER_WIDE_INT)
4448 		  {
4449 		    choose_multiplier (abs_d, size, size - 1,
4450 				       &ml, &post_shift, &lgup);
4451 		    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4452 		      {
4453 			rtx t1, t2, t3;
4454 
4455 			if (post_shift >= BITS_PER_WORD
4456 			    || size - 1 >= BITS_PER_WORD)
4457 			  goto fail1;
4458 
4459 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4460 				      + shift_cost (speed, compute_mode, size - 1)
4461 				      + add_cost (speed, compute_mode));
4462 			t1 = expmed_mult_highpart
4463 			  (compute_mode, op0, gen_int_mode (ml, compute_mode),
4464 			   NULL_RTX, 0, max_cost - extra_cost);
4465 			if (t1 == 0)
4466 			  goto fail1;
4467 			t2 = expand_shift
4468 			  (RSHIFT_EXPR, compute_mode, t1,
4469 			   post_shift, NULL_RTX, 0);
4470 			t3 = expand_shift
4471 			  (RSHIFT_EXPR, compute_mode, op0,
4472 			   size - 1, NULL_RTX, 0);
4473 			if (d < 0)
4474 			  quotient
4475 			    = force_operand (gen_rtx_MINUS (compute_mode,
4476 							    t3, t2),
4477 					     tquotient);
4478 			else
4479 			  quotient
4480 			    = force_operand (gen_rtx_MINUS (compute_mode,
4481 							    t2, t3),
4482 					     tquotient);
4483 		      }
4484 		    else
4485 		      {
4486 			rtx t1, t2, t3, t4;
4487 
4488 			if (post_shift >= BITS_PER_WORD
4489 			    || size - 1 >= BITS_PER_WORD)
4490 			  goto fail1;
4491 
4492 			ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4493 			mlr = gen_int_mode (ml, compute_mode);
4494 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4495 				      + shift_cost (speed, compute_mode, size - 1)
4496 				      + 2 * add_cost (speed, compute_mode));
4497 			t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4498 						   NULL_RTX, 0,
4499 						   max_cost - extra_cost);
4500 			if (t1 == 0)
4501 			  goto fail1;
4502 			t2 = force_operand (gen_rtx_PLUS (compute_mode,
4503 							  t1, op0),
4504 					    NULL_RTX);
4505 			t3 = expand_shift
4506 			  (RSHIFT_EXPR, compute_mode, t2,
4507 			   post_shift, NULL_RTX, 0);
4508 			t4 = expand_shift
4509 			  (RSHIFT_EXPR, compute_mode, op0,
4510 			   size - 1, NULL_RTX, 0);
4511 			if (d < 0)
4512 			  quotient
4513 			    = force_operand (gen_rtx_MINUS (compute_mode,
4514 							    t4, t3),
4515 					     tquotient);
4516 			else
4517 			  quotient
4518 			    = force_operand (gen_rtx_MINUS (compute_mode,
4519 							    t3, t4),
4520 					     tquotient);
4521 		      }
4522 		  }
4523 		else		/* Too wide mode to use tricky code */
4524 		  break;
4525 
4526 		insn = get_last_insn ();
4527 		if (insn != last)
4528 		  set_dst_reg_note (insn, REG_EQUAL,
4529 				    gen_rtx_DIV (compute_mode, op0, op1),
4530 				    quotient);
4531 	      }
4532 	    break;
4533 	  }
4534       fail1:
4535 	delete_insns_since (last);
4536 	break;
4537 
4538       case FLOOR_DIV_EXPR:
4539       case FLOOR_MOD_EXPR:
4540       /* We will come here only for signed operations.  */
4541 	if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4542 	  {
4543 	    unsigned HOST_WIDE_INT mh, ml;
4544 	    int pre_shift, lgup, post_shift;
4545 	    HOST_WIDE_INT d = INTVAL (op1);
4546 
4547 	    if (d > 0)
4548 	      {
4549 		/* We could just as easily deal with negative constants here,
4550 		   but it does not seem worth the trouble for GCC 2.6.  */
4551 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4552 		  {
4553 		    pre_shift = floor_log2 (d);
4554 		    if (rem_flag)
4555 		      {
4556 			unsigned HOST_WIDE_INT mask
4557 			  = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4558 			remainder = expand_binop
4559 			  (compute_mode, and_optab, op0,
4560 			   gen_int_mode (mask, compute_mode),
4561 			   remainder, 0, OPTAB_LIB_WIDEN);
4562 			if (remainder)
4563 			  return gen_lowpart (mode, remainder);
4564 		      }
4565 		    quotient = expand_shift
4566 		      (RSHIFT_EXPR, compute_mode, op0,
4567 		       pre_shift, tquotient, 0);
4568 		  }
4569 		else
4570 		  {
4571 		    rtx t1, t2, t3, t4;
4572 
4573 		    mh = choose_multiplier (d, size, size - 1,
4574 					    &ml, &post_shift, &lgup);
4575 		    gcc_assert (!mh);
4576 
4577 		    if (post_shift < BITS_PER_WORD
4578 			&& size - 1 < BITS_PER_WORD)
4579 		      {
4580 			t1 = expand_shift
4581 			  (RSHIFT_EXPR, compute_mode, op0,
4582 			   size - 1, NULL_RTX, 0);
4583 			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4584 					   NULL_RTX, 0, OPTAB_WIDEN);
4585 			extra_cost = (shift_cost (speed, compute_mode, post_shift)
4586 				      + shift_cost (speed, compute_mode, size - 1)
4587 				      + 2 * add_cost (speed, compute_mode));
4588 			t3 = expmed_mult_highpart
4589 			  (compute_mode, t2, gen_int_mode (ml, compute_mode),
4590 			   NULL_RTX, 1, max_cost - extra_cost);
4591 			if (t3 != 0)
4592 			  {
4593 			    t4 = expand_shift
4594 			      (RSHIFT_EXPR, compute_mode, t3,
4595 			       post_shift, NULL_RTX, 1);
4596 			    quotient = expand_binop (compute_mode, xor_optab,
4597 						     t4, t1, tquotient, 0,
4598 						     OPTAB_WIDEN);
4599 			  }
4600 		      }
4601 		  }
4602 	      }
4603 	    else
4604 	      {
4605 		rtx nsign, t1, t2, t3, t4;
4606 		t1 = force_operand (gen_rtx_PLUS (compute_mode,
4607 						  op0, constm1_rtx), NULL_RTX);
4608 		t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4609 				   0, OPTAB_WIDEN);
4610 		nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4611 				      size - 1, NULL_RTX, 0);
4612 		t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4613 				    NULL_RTX);
4614 		t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4615 				    NULL_RTX, 0);
4616 		if (t4)
4617 		  {
4618 		    rtx t5;
4619 		    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4620 				      NULL_RTX, 0);
4621 		    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4622 							    t4, t5),
4623 					      tquotient);
4624 		  }
4625 	      }
4626 	  }
4627 
4628 	if (quotient != 0)
4629 	  break;
4630 	delete_insns_since (last);
4631 
4632 	/* Try using an instruction that produces both the quotient and
4633 	   remainder, using truncation.  We can easily compensate the quotient
4634 	   or remainder to get floor rounding, once we have the remainder.
4635 	   Notice that we compute also the final remainder value here,
4636 	   and return the result right away.  */
4637 	if (target == 0 || GET_MODE (target) != compute_mode)
4638 	  target = gen_reg_rtx (compute_mode);
4639 
4640 	if (rem_flag)
4641 	  {
4642 	    remainder
4643 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4644 	    quotient = gen_reg_rtx (compute_mode);
4645 	  }
4646 	else
4647 	  {
4648 	    quotient
4649 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4650 	    remainder = gen_reg_rtx (compute_mode);
4651 	  }
4652 
4653 	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4654 				 quotient, remainder, 0))
4655 	  {
4656 	    /* This could be computed with a branch-less sequence.
4657 	       Save that for later.  */
4658 	    rtx tem;
4659 	    rtx_code_label *label = gen_label_rtx ();
4660 	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4661 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4662 				NULL_RTX, 0, OPTAB_WIDEN);
4663 	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4664 	    expand_dec (quotient, const1_rtx);
4665 	    expand_inc (remainder, op1);
4666 	    emit_label (label);
4667 	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4668 	  }
4669 
4670 	/* No luck with division elimination or divmod.  Have to do it
4671 	   by conditionally adjusting op0 *and* the result.  */
4672 	{
4673 	  rtx_code_label *label1, *label2, *label3, *label4, *label5;
4674 	  rtx adjusted_op0;
4675 	  rtx tem;
4676 
4677 	  quotient = gen_reg_rtx (compute_mode);
4678 	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4679 	  label1 = gen_label_rtx ();
4680 	  label2 = gen_label_rtx ();
4681 	  label3 = gen_label_rtx ();
4682 	  label4 = gen_label_rtx ();
4683 	  label5 = gen_label_rtx ();
4684 	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4685 	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4686 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4687 			      quotient, 0, OPTAB_LIB_WIDEN);
4688 	  if (tem != quotient)
4689 	    emit_move_insn (quotient, tem);
4690 	  emit_jump_insn (targetm.gen_jump (label5));
4691 	  emit_barrier ();
4692 	  emit_label (label1);
4693 	  expand_inc (adjusted_op0, const1_rtx);
4694 	  emit_jump_insn (targetm.gen_jump (label4));
4695 	  emit_barrier ();
4696 	  emit_label (label2);
4697 	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4698 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4699 			      quotient, 0, OPTAB_LIB_WIDEN);
4700 	  if (tem != quotient)
4701 	    emit_move_insn (quotient, tem);
4702 	  emit_jump_insn (targetm.gen_jump (label5));
4703 	  emit_barrier ();
4704 	  emit_label (label3);
4705 	  expand_dec (adjusted_op0, const1_rtx);
4706 	  emit_label (label4);
4707 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4708 			      quotient, 0, OPTAB_LIB_WIDEN);
4709 	  if (tem != quotient)
4710 	    emit_move_insn (quotient, tem);
4711 	  expand_dec (quotient, const1_rtx);
4712 	  emit_label (label5);
4713 	}
4714 	break;
4715 
4716       case CEIL_DIV_EXPR:
4717       case CEIL_MOD_EXPR:
4718 	if (unsignedp)
4719 	  {
4720 	    if (op1_is_constant
4721 		&& EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4722 		&& (size <= HOST_BITS_PER_WIDE_INT
4723 		    || INTVAL (op1) >= 0))
4724 	      {
4725 		rtx t1, t2, t3;
4726 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4727 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4728 				   floor_log2 (d), tquotient, 1);
4729 		t2 = expand_binop (compute_mode, and_optab, op0,
4730 				   gen_int_mode (d - 1, compute_mode),
4731 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4732 		t3 = gen_reg_rtx (compute_mode);
4733 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4734 				      compute_mode, 1, 1);
4735 		if (t3 == 0)
4736 		  {
4737 		    rtx_code_label *lab;
4738 		    lab = gen_label_rtx ();
4739 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4740 		    expand_inc (t1, const1_rtx);
4741 		    emit_label (lab);
4742 		    quotient = t1;
4743 		  }
4744 		else
4745 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4746 							  t1, t3),
4747 					    tquotient);
4748 		break;
4749 	      }
4750 
4751 	    /* Try using an instruction that produces both the quotient and
4752 	       remainder, using truncation.  We can easily compensate the
4753 	       quotient or remainder to get ceiling rounding, once we have the
4754 	       remainder.  Notice that we compute also the final remainder
4755 	       value here, and return the result right away.  */
4756 	    if (target == 0 || GET_MODE (target) != compute_mode)
4757 	      target = gen_reg_rtx (compute_mode);
4758 
4759 	    if (rem_flag)
4760 	      {
4761 		remainder = (REG_P (target)
4762 			     ? target : gen_reg_rtx (compute_mode));
4763 		quotient = gen_reg_rtx (compute_mode);
4764 	      }
4765 	    else
4766 	      {
4767 		quotient = (REG_P (target)
4768 			    ? target : gen_reg_rtx (compute_mode));
4769 		remainder = gen_reg_rtx (compute_mode);
4770 	      }
4771 
4772 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4773 				     remainder, 1))
4774 	      {
4775 		/* This could be computed with a branch-less sequence.
4776 		   Save that for later.  */
4777 		rtx_code_label *label = gen_label_rtx ();
4778 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4779 				 compute_mode, label);
4780 		expand_inc (quotient, const1_rtx);
4781 		expand_dec (remainder, op1);
4782 		emit_label (label);
4783 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4784 	      }
4785 
4786 	    /* No luck with division elimination or divmod.  Have to do it
4787 	       by conditionally adjusting op0 *and* the result.  */
4788 	    {
4789 	      rtx_code_label *label1, *label2;
4790 	      rtx adjusted_op0, tem;
4791 
4792 	      quotient = gen_reg_rtx (compute_mode);
4793 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4794 	      label1 = gen_label_rtx ();
4795 	      label2 = gen_label_rtx ();
4796 	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4797 			       compute_mode, label1);
4798 	      emit_move_insn  (quotient, const0_rtx);
4799 	      emit_jump_insn (targetm.gen_jump (label2));
4800 	      emit_barrier ();
4801 	      emit_label (label1);
4802 	      expand_dec (adjusted_op0, const1_rtx);
4803 	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4804 				  quotient, 1, OPTAB_LIB_WIDEN);
4805 	      if (tem != quotient)
4806 		emit_move_insn (quotient, tem);
4807 	      expand_inc (quotient, const1_rtx);
4808 	      emit_label (label2);
4809 	    }
4810 	  }
4811 	else /* signed */
4812 	  {
4813 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4814 		&& INTVAL (op1) >= 0)
4815 	      {
4816 		/* This is extremely similar to the code for the unsigned case
4817 		   above.  For 2.7 we should merge these variants, but for
4818 		   2.6.1 I don't want to touch the code for unsigned since that
4819 		   get used in C.  The signed case will only be used by other
4820 		   languages (Ada).  */
4821 
4822 		rtx t1, t2, t3;
4823 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4824 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4825 				   floor_log2 (d), tquotient, 0);
4826 		t2 = expand_binop (compute_mode, and_optab, op0,
4827 				   gen_int_mode (d - 1, compute_mode),
4828 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4829 		t3 = gen_reg_rtx (compute_mode);
4830 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4831 				      compute_mode, 1, 1);
4832 		if (t3 == 0)
4833 		  {
4834 		    rtx_code_label *lab;
4835 		    lab = gen_label_rtx ();
4836 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4837 		    expand_inc (t1, const1_rtx);
4838 		    emit_label (lab);
4839 		    quotient = t1;
4840 		  }
4841 		else
4842 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4843 							  t1, t3),
4844 					    tquotient);
4845 		break;
4846 	      }
4847 
4848 	    /* Try using an instruction that produces both the quotient and
4849 	       remainder, using truncation.  We can easily compensate the
4850 	       quotient or remainder to get ceiling rounding, once we have the
4851 	       remainder.  Notice that we compute also the final remainder
4852 	       value here, and return the result right away.  */
4853 	    if (target == 0 || GET_MODE (target) != compute_mode)
4854 	      target = gen_reg_rtx (compute_mode);
4855 	    if (rem_flag)
4856 	      {
4857 		remainder= (REG_P (target)
4858 			    ? target : gen_reg_rtx (compute_mode));
4859 		quotient = gen_reg_rtx (compute_mode);
4860 	      }
4861 	    else
4862 	      {
4863 		quotient = (REG_P (target)
4864 			    ? target : gen_reg_rtx (compute_mode));
4865 		remainder = gen_reg_rtx (compute_mode);
4866 	      }
4867 
4868 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4869 				     remainder, 0))
4870 	      {
4871 		/* This could be computed with a branch-less sequence.
4872 		   Save that for later.  */
4873 		rtx tem;
4874 		rtx_code_label *label = gen_label_rtx ();
4875 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4876 				 compute_mode, label);
4877 		tem = expand_binop (compute_mode, xor_optab, op0, op1,
4878 				    NULL_RTX, 0, OPTAB_WIDEN);
4879 		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4880 		expand_inc (quotient, const1_rtx);
4881 		expand_dec (remainder, op1);
4882 		emit_label (label);
4883 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4884 	      }
4885 
4886 	    /* No luck with division elimination or divmod.  Have to do it
4887 	       by conditionally adjusting op0 *and* the result.  */
4888 	    {
4889 	      rtx_code_label *label1, *label2, *label3, *label4, *label5;
4890 	      rtx adjusted_op0;
4891 	      rtx tem;
4892 
4893 	      quotient = gen_reg_rtx (compute_mode);
4894 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4895 	      label1 = gen_label_rtx ();
4896 	      label2 = gen_label_rtx ();
4897 	      label3 = gen_label_rtx ();
4898 	      label4 = gen_label_rtx ();
4899 	      label5 = gen_label_rtx ();
4900 	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4901 	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4902 			       compute_mode, label1);
4903 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4904 				  quotient, 0, OPTAB_LIB_WIDEN);
4905 	      if (tem != quotient)
4906 		emit_move_insn (quotient, tem);
4907 	      emit_jump_insn (targetm.gen_jump (label5));
4908 	      emit_barrier ();
4909 	      emit_label (label1);
4910 	      expand_dec (adjusted_op0, const1_rtx);
4911 	      emit_jump_insn (targetm.gen_jump (label4));
4912 	      emit_barrier ();
4913 	      emit_label (label2);
4914 	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4915 			       compute_mode, label3);
4916 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4917 				  quotient, 0, OPTAB_LIB_WIDEN);
4918 	      if (tem != quotient)
4919 		emit_move_insn (quotient, tem);
4920 	      emit_jump_insn (targetm.gen_jump (label5));
4921 	      emit_barrier ();
4922 	      emit_label (label3);
4923 	      expand_inc (adjusted_op0, const1_rtx);
4924 	      emit_label (label4);
4925 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4926 				  quotient, 0, OPTAB_LIB_WIDEN);
4927 	      if (tem != quotient)
4928 		emit_move_insn (quotient, tem);
4929 	      expand_inc (quotient, const1_rtx);
4930 	      emit_label (label5);
4931 	    }
4932 	  }
4933 	break;
4934 
4935       case EXACT_DIV_EXPR:
4936 	if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4937 	  {
4938 	    HOST_WIDE_INT d = INTVAL (op1);
4939 	    unsigned HOST_WIDE_INT ml;
4940 	    int pre_shift;
4941 	    rtx t1;
4942 
4943 	    pre_shift = floor_log2 (d & -d);
4944 	    ml = invert_mod2n (d >> pre_shift, size);
4945 	    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4946 			       pre_shift, NULL_RTX, unsignedp);
4947 	    quotient = expand_mult (compute_mode, t1,
4948 				    gen_int_mode (ml, compute_mode),
4949 				    NULL_RTX, 1);
4950 
4951 	    insn = get_last_insn ();
4952 	    set_dst_reg_note (insn, REG_EQUAL,
4953 			      gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4954 					      compute_mode, op0, op1),
4955 			      quotient);
4956 	  }
4957 	break;
4958 
4959       case ROUND_DIV_EXPR:
4960       case ROUND_MOD_EXPR:
4961 	if (unsignedp)
4962 	  {
4963 	    rtx tem;
4964 	    rtx_code_label *label;
4965 	    label = gen_label_rtx ();
4966 	    quotient = gen_reg_rtx (compute_mode);
4967 	    remainder = gen_reg_rtx (compute_mode);
4968 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4969 	      {
4970 		rtx tem;
4971 		quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4972 					 quotient, 1, OPTAB_LIB_WIDEN);
4973 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4974 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4975 					  remainder, 1, OPTAB_LIB_WIDEN);
4976 	      }
4977 	    tem = plus_constant (compute_mode, op1, -1);
4978 	    tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4979 	    do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4980 	    expand_inc (quotient, const1_rtx);
4981 	    expand_dec (remainder, op1);
4982 	    emit_label (label);
4983 	  }
4984 	else
4985 	  {
4986 	    rtx abs_rem, abs_op1, tem, mask;
4987 	    rtx_code_label *label;
4988 	    label = gen_label_rtx ();
4989 	    quotient = gen_reg_rtx (compute_mode);
4990 	    remainder = gen_reg_rtx (compute_mode);
4991 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4992 	      {
4993 		rtx tem;
4994 		quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4995 					 quotient, 0, OPTAB_LIB_WIDEN);
4996 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4997 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4998 					  remainder, 0, OPTAB_LIB_WIDEN);
4999 	      }
5000 	    abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
5001 	    abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
5002 	    tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
5003 				1, NULL_RTX, 1);
5004 	    do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
5005 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
5006 				NULL_RTX, 0, OPTAB_WIDEN);
5007 	    mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
5008 				 size - 1, NULL_RTX, 0);
5009 	    tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
5010 				NULL_RTX, 0, OPTAB_WIDEN);
5011 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
5012 				NULL_RTX, 0, OPTAB_WIDEN);
5013 	    expand_inc (quotient, tem);
5014 	    tem = expand_binop (compute_mode, xor_optab, mask, op1,
5015 				NULL_RTX, 0, OPTAB_WIDEN);
5016 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
5017 				NULL_RTX, 0, OPTAB_WIDEN);
5018 	    expand_dec (remainder, tem);
5019 	    emit_label (label);
5020 	  }
5021 	return gen_lowpart (mode, rem_flag ? remainder : quotient);
5022 
5023       default:
5024 	gcc_unreachable ();
5025       }
5026 
5027   if (quotient == 0)
5028     {
5029       if (target && GET_MODE (target) != compute_mode)
5030 	target = 0;
5031 
5032       if (rem_flag)
5033 	{
5034 	  /* Try to produce the remainder without producing the quotient.
5035 	     If we seem to have a divmod pattern that does not require widening,
5036 	     don't try widening here.  We should really have a WIDEN argument
5037 	     to expand_twoval_binop, since what we'd really like to do here is
5038 	     1) try a mod insn in compute_mode
5039 	     2) try a divmod insn in compute_mode
5040 	     3) try a div insn in compute_mode and multiply-subtract to get
5041 	        remainder
5042 	     4) try the same things with widening allowed.  */
5043 	  remainder
5044 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5045 				 op0, op1, target,
5046 				 unsignedp,
5047 				 ((optab_handler (optab2, compute_mode)
5048 				   != CODE_FOR_nothing)
5049 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
5050 	  if (remainder == 0)
5051 	    {
5052 	      /* No luck there.  Can we do remainder and divide at once
5053 		 without a library call?  */
5054 	      remainder = gen_reg_rtx (compute_mode);
5055 	      if (! expand_twoval_binop ((unsignedp
5056 					  ? udivmod_optab
5057 					  : sdivmod_optab),
5058 					 op0, op1,
5059 					 NULL_RTX, remainder, unsignedp))
5060 		remainder = 0;
5061 	    }
5062 
5063 	  if (remainder)
5064 	    return gen_lowpart (mode, remainder);
5065 	}
5066 
5067       /* Produce the quotient.  Try a quotient insn, but not a library call.
5068 	 If we have a divmod in this mode, use it in preference to widening
5069 	 the div (for this test we assume it will not fail). Note that optab2
5070 	 is set to the one of the two optabs that the call below will use.  */
5071       quotient
5072 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5073 			     op0, op1, rem_flag ? NULL_RTX : target,
5074 			     unsignedp,
5075 			     ((optab_handler (optab2, compute_mode)
5076 			       != CODE_FOR_nothing)
5077 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
5078 
5079       if (quotient == 0)
5080 	{
5081 	  /* No luck there.  Try a quotient-and-remainder insn,
5082 	     keeping the quotient alone.  */
5083 	  quotient = gen_reg_rtx (compute_mode);
5084 	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5085 				     op0, op1,
5086 				     quotient, NULL_RTX, unsignedp))
5087 	    {
5088 	      quotient = 0;
5089 	      if (! rem_flag)
5090 		/* Still no luck.  If we are not computing the remainder,
5091 		   use a library call for the quotient.  */
5092 		quotient = sign_expand_binop (compute_mode,
5093 					      udiv_optab, sdiv_optab,
5094 					      op0, op1, target,
5095 					      unsignedp, OPTAB_LIB_WIDEN);
5096 	    }
5097 	}
5098     }
5099 
5100   if (rem_flag)
5101     {
5102       if (target && GET_MODE (target) != compute_mode)
5103 	target = 0;
5104 
5105       if (quotient == 0)
5106 	{
5107 	  /* No divide instruction either.  Use library for remainder.  */
5108 	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5109 					 op0, op1, target,
5110 					 unsignedp, OPTAB_LIB_WIDEN);
5111 	  /* No remainder function.  Try a quotient-and-remainder
5112 	     function, keeping the remainder.  */
5113 	  if (!remainder)
5114 	    {
5115 	      remainder = gen_reg_rtx (compute_mode);
5116 	      if (!expand_twoval_binop_libfunc
5117 		  (unsignedp ? udivmod_optab : sdivmod_optab,
5118 		   op0, op1,
5119 		   NULL_RTX, remainder,
5120 		   unsignedp ? UMOD : MOD))
5121 		remainder = NULL_RTX;
5122 	    }
5123 	}
5124       else
5125 	{
5126 	  /* We divided.  Now finish doing X - Y * (X / Y).  */
5127 	  remainder = expand_mult (compute_mode, quotient, op1,
5128 				   NULL_RTX, unsignedp);
5129 	  remainder = expand_binop (compute_mode, sub_optab, op0,
5130 				    remainder, target, unsignedp,
5131 				    OPTAB_LIB_WIDEN);
5132 	}
5133     }
5134 
5135   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5136 }
5137 
5138 /* Return a tree node with data type TYPE, describing the value of X.
5139    Usually this is an VAR_DECL, if there is no obvious better choice.
5140    X may be an expression, however we only support those expressions
5141    generated by loop.c.  */
5142 
5143 tree
make_tree(tree type,rtx x)5144 make_tree (tree type, rtx x)
5145 {
5146   tree t;
5147 
5148   switch (GET_CODE (x))
5149     {
5150     case CONST_INT:
5151     case CONST_WIDE_INT:
5152       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5153       return t;
5154 
5155     case CONST_DOUBLE:
5156       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5157       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5158 	t = wide_int_to_tree (type,
5159 			      wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5160 						    HOST_BITS_PER_WIDE_INT * 2));
5161       else
5162 	t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5163 
5164       return t;
5165 
5166     case CONST_VECTOR:
5167       {
5168 	int units = CONST_VECTOR_NUNITS (x);
5169 	tree itype = TREE_TYPE (type);
5170 	tree *elts;
5171 	int i;
5172 
5173 	/* Build a tree with vector elements.  */
5174 	elts = XALLOCAVEC (tree, units);
5175 	for (i = units - 1; i >= 0; --i)
5176 	  {
5177 	    rtx elt = CONST_VECTOR_ELT (x, i);
5178 	    elts[i] = make_tree (itype, elt);
5179 	  }
5180 
5181 	return build_vector (type, elts);
5182       }
5183 
5184     case PLUS:
5185       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5186 			  make_tree (type, XEXP (x, 1)));
5187 
5188     case MINUS:
5189       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5190 			  make_tree (type, XEXP (x, 1)));
5191 
5192     case NEG:
5193       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5194 
5195     case MULT:
5196       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5197 			  make_tree (type, XEXP (x, 1)));
5198 
5199     case ASHIFT:
5200       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5201 			  make_tree (type, XEXP (x, 1)));
5202 
5203     case LSHIFTRT:
5204       t = unsigned_type_for (type);
5205       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5206 			    		 make_tree (t, XEXP (x, 0)),
5207 				    	 make_tree (type, XEXP (x, 1))));
5208 
5209     case ASHIFTRT:
5210       t = signed_type_for (type);
5211       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5212 					 make_tree (t, XEXP (x, 0)),
5213 				    	 make_tree (type, XEXP (x, 1))));
5214 
5215     case DIV:
5216       if (TREE_CODE (type) != REAL_TYPE)
5217 	t = signed_type_for (type);
5218       else
5219 	t = type;
5220 
5221       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5222 				    	 make_tree (t, XEXP (x, 0)),
5223 				    	 make_tree (t, XEXP (x, 1))));
5224     case UDIV:
5225       t = unsigned_type_for (type);
5226       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5227 				    	 make_tree (t, XEXP (x, 0)),
5228 				    	 make_tree (t, XEXP (x, 1))));
5229 
5230     case SIGN_EXTEND:
5231     case ZERO_EXTEND:
5232       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5233 					  GET_CODE (x) == ZERO_EXTEND);
5234       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5235 
5236     case CONST:
5237       return make_tree (type, XEXP (x, 0));
5238 
5239     case SYMBOL_REF:
5240       t = SYMBOL_REF_DECL (x);
5241       if (t)
5242 	return fold_convert (type, build_fold_addr_expr (t));
5243       /* else fall through.  */
5244 
5245     default:
5246       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5247 
5248       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5249 	 address mode to pointer mode.  */
5250       if (POINTER_TYPE_P (type))
5251 	x = convert_memory_address_addr_space
5252 	      (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5253 
5254       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5255 	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5256       t->decl_with_rtl.rtl = x;
5257 
5258       return t;
5259     }
5260 }
5261 
5262 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5263    and returning TARGET.
5264 
5265    If TARGET is 0, a pseudo-register or constant is returned.  */
5266 
5267 rtx
expand_and(machine_mode mode,rtx op0,rtx op1,rtx target)5268 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5269 {
5270   rtx tem = 0;
5271 
5272   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5273     tem = simplify_binary_operation (AND, mode, op0, op1);
5274   if (tem == 0)
5275     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5276 
5277   if (target == 0)
5278     target = tem;
5279   else if (tem != target)
5280     emit_move_insn (target, tem);
5281   return target;
5282 }
5283 
5284 /* Helper function for emit_store_flag.  */
5285 rtx
emit_cstore(rtx target,enum insn_code icode,enum rtx_code code,machine_mode mode,machine_mode compare_mode,int unsignedp,rtx x,rtx y,int normalizep,machine_mode target_mode)5286 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5287 	     machine_mode mode, machine_mode compare_mode,
5288 	     int unsignedp, rtx x, rtx y, int normalizep,
5289 	     machine_mode target_mode)
5290 {
5291   struct expand_operand ops[4];
5292   rtx op0, comparison, subtarget;
5293   rtx_insn *last;
5294   machine_mode result_mode = targetm.cstore_mode (icode);
5295 
5296   last = get_last_insn ();
5297   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5298   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5299   if (!x || !y)
5300     {
5301       delete_insns_since (last);
5302       return NULL_RTX;
5303     }
5304 
5305   if (target_mode == VOIDmode)
5306     target_mode = result_mode;
5307   if (!target)
5308     target = gen_reg_rtx (target_mode);
5309 
5310   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5311 
5312   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5313   create_fixed_operand (&ops[1], comparison);
5314   create_fixed_operand (&ops[2], x);
5315   create_fixed_operand (&ops[3], y);
5316   if (!maybe_expand_insn (icode, 4, ops))
5317     {
5318       delete_insns_since (last);
5319       return NULL_RTX;
5320     }
5321   subtarget = ops[0].value;
5322 
5323   /* If we are converting to a wider mode, first convert to
5324      TARGET_MODE, then normalize.  This produces better combining
5325      opportunities on machines that have a SIGN_EXTRACT when we are
5326      testing a single bit.  This mostly benefits the 68k.
5327 
5328      If STORE_FLAG_VALUE does not have the sign bit set when
5329      interpreted in MODE, we can do this conversion as unsigned, which
5330      is usually more efficient.  */
5331   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5332     {
5333       convert_move (target, subtarget,
5334 		    val_signbit_known_clear_p (result_mode,
5335 					       STORE_FLAG_VALUE));
5336       op0 = target;
5337       result_mode = target_mode;
5338     }
5339   else
5340     op0 = subtarget;
5341 
5342   /* If we want to keep subexpressions around, don't reuse our last
5343      target.  */
5344   if (optimize)
5345     subtarget = 0;
5346 
5347   /* Now normalize to the proper value in MODE.  Sometimes we don't
5348      have to do anything.  */
5349   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5350     ;
5351   /* STORE_FLAG_VALUE might be the most negative number, so write
5352      the comparison this way to avoid a compiler-time warning.  */
5353   else if (- normalizep == STORE_FLAG_VALUE)
5354     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5355 
5356   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5357      it hard to use a value of just the sign bit due to ANSI integer
5358      constant typing rules.  */
5359   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5360     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5361 			GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5362 			normalizep == 1);
5363   else
5364     {
5365       gcc_assert (STORE_FLAG_VALUE & 1);
5366 
5367       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5368       if (normalizep == -1)
5369 	op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5370     }
5371 
5372   /* If we were converting to a smaller mode, do the conversion now.  */
5373   if (target_mode != result_mode)
5374     {
5375       convert_move (target, op0, 0);
5376       return target;
5377     }
5378   else
5379     return op0;
5380 }
5381 
5382 
5383 /* A subroutine of emit_store_flag only including "tricks" that do not
5384    need a recursive call.  These are kept separate to avoid infinite
5385    loops.  */
5386 
5387 static rtx
emit_store_flag_1(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep,machine_mode target_mode)5388 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5389 		   machine_mode mode, int unsignedp, int normalizep,
5390 		   machine_mode target_mode)
5391 {
5392   rtx subtarget;
5393   enum insn_code icode;
5394   machine_mode compare_mode;
5395   enum mode_class mclass;
5396   enum rtx_code scode;
5397 
5398   if (unsignedp)
5399     code = unsigned_condition (code);
5400   scode = swap_condition (code);
5401 
5402   /* If one operand is constant, make it the second one.  Only do this
5403      if the other operand is not constant as well.  */
5404 
5405   if (swap_commutative_operands_p (op0, op1))
5406     {
5407       std::swap (op0, op1);
5408       code = swap_condition (code);
5409     }
5410 
5411   if (mode == VOIDmode)
5412     mode = GET_MODE (op0);
5413 
5414   /* For some comparisons with 1 and -1, we can convert this to
5415      comparisons with zero.  This will often produce more opportunities for
5416      store-flag insns.  */
5417 
5418   switch (code)
5419     {
5420     case LT:
5421       if (op1 == const1_rtx)
5422 	op1 = const0_rtx, code = LE;
5423       break;
5424     case LE:
5425       if (op1 == constm1_rtx)
5426 	op1 = const0_rtx, code = LT;
5427       break;
5428     case GE:
5429       if (op1 == const1_rtx)
5430 	op1 = const0_rtx, code = GT;
5431       break;
5432     case GT:
5433       if (op1 == constm1_rtx)
5434 	op1 = const0_rtx, code = GE;
5435       break;
5436     case GEU:
5437       if (op1 == const1_rtx)
5438 	op1 = const0_rtx, code = NE;
5439       break;
5440     case LTU:
5441       if (op1 == const1_rtx)
5442 	op1 = const0_rtx, code = EQ;
5443       break;
5444     default:
5445       break;
5446     }
5447 
5448   /* If we are comparing a double-word integer with zero or -1, we can
5449      convert the comparison into one involving a single word.  */
5450   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5451       && GET_MODE_CLASS (mode) == MODE_INT
5452       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5453     {
5454       rtx tem;
5455       if ((code == EQ || code == NE)
5456 	  && (op1 == const0_rtx || op1 == constm1_rtx))
5457 	{
5458 	  rtx op00, op01;
5459 
5460 	  /* Do a logical OR or AND of the two words and compare the
5461 	     result.  */
5462 	  op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5463 	  op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5464 	  tem = expand_binop (word_mode,
5465 			      op1 == const0_rtx ? ior_optab : and_optab,
5466 			      op00, op01, NULL_RTX, unsignedp,
5467 			      OPTAB_DIRECT);
5468 
5469 	  if (tem != 0)
5470 	    tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5471 				   unsignedp, normalizep);
5472 	}
5473       else if ((code == LT || code == GE) && op1 == const0_rtx)
5474 	{
5475 	  rtx op0h;
5476 
5477 	  /* If testing the sign bit, can just test on high word.  */
5478 	  op0h = simplify_gen_subreg (word_mode, op0, mode,
5479 				      subreg_highpart_offset (word_mode,
5480 							      mode));
5481 	  tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5482 				 unsignedp, normalizep);
5483 	}
5484       else
5485 	tem = NULL_RTX;
5486 
5487       if (tem)
5488 	{
5489 	  if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5490 	    return tem;
5491 	  if (!target)
5492 	    target = gen_reg_rtx (target_mode);
5493 
5494 	  convert_move (target, tem,
5495 			!val_signbit_known_set_p (word_mode,
5496 						  (normalizep ? normalizep
5497 						   : STORE_FLAG_VALUE)));
5498 	  return target;
5499 	}
5500     }
5501 
5502   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5503      complement of A (for GE) and shifting the sign bit to the low bit.  */
5504   if (op1 == const0_rtx && (code == LT || code == GE)
5505       && GET_MODE_CLASS (mode) == MODE_INT
5506       && (normalizep || STORE_FLAG_VALUE == 1
5507 	  || val_signbit_p (mode, STORE_FLAG_VALUE)))
5508     {
5509       subtarget = target;
5510 
5511       if (!target)
5512 	target_mode = mode;
5513 
5514       /* If the result is to be wider than OP0, it is best to convert it
5515 	 first.  If it is to be narrower, it is *incorrect* to convert it
5516 	 first.  */
5517       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5518 	{
5519 	  op0 = convert_modes (target_mode, mode, op0, 0);
5520 	  mode = target_mode;
5521 	}
5522 
5523       if (target_mode != mode)
5524 	subtarget = 0;
5525 
5526       if (code == GE)
5527 	op0 = expand_unop (mode, one_cmpl_optab, op0,
5528 			   ((STORE_FLAG_VALUE == 1 || normalizep)
5529 			    ? 0 : subtarget), 0);
5530 
5531       if (STORE_FLAG_VALUE == 1 || normalizep)
5532 	/* If we are supposed to produce a 0/1 value, we want to do
5533 	   a logical shift from the sign bit to the low-order bit; for
5534 	   a -1/0 value, we do an arithmetic shift.  */
5535 	op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5536 			    GET_MODE_BITSIZE (mode) - 1,
5537 			    subtarget, normalizep != -1);
5538 
5539       if (mode != target_mode)
5540 	op0 = convert_modes (target_mode, mode, op0, 0);
5541 
5542       return op0;
5543     }
5544 
5545   mclass = GET_MODE_CLASS (mode);
5546   for (compare_mode = mode; compare_mode != VOIDmode;
5547        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5548     {
5549      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5550      icode = optab_handler (cstore_optab, optab_mode);
5551      if (icode != CODE_FOR_nothing)
5552 	{
5553 	  do_pending_stack_adjust ();
5554 	  rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5555 				 unsignedp, op0, op1, normalizep, target_mode);
5556 	  if (tem)
5557 	    return tem;
5558 
5559 	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5560 	    {
5561 	      tem = emit_cstore (target, icode, scode, mode, compare_mode,
5562 				 unsignedp, op1, op0, normalizep, target_mode);
5563 	      if (tem)
5564 	        return tem;
5565 	    }
5566 	  break;
5567 	}
5568     }
5569 
5570   return 0;
5571 }
5572 
5573 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5574    and storing in TARGET.  Normally return TARGET.
5575    Return 0 if that cannot be done.
5576 
5577    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5578    it is VOIDmode, they cannot both be CONST_INT.
5579 
5580    UNSIGNEDP is for the case where we have to widen the operands
5581    to perform the operation.  It says to use zero-extension.
5582 
5583    NORMALIZEP is 1 if we should convert the result to be either zero
5584    or one.  Normalize is -1 if we should convert the result to be
5585    either zero or -1.  If NORMALIZEP is zero, the result will be left
5586    "raw" out of the scc insn.  */
5587 
5588 rtx
emit_store_flag(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)5589 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5590 		 machine_mode mode, int unsignedp, int normalizep)
5591 {
5592   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5593   enum rtx_code rcode;
5594   rtx subtarget;
5595   rtx tem, trueval;
5596   rtx_insn *last;
5597 
5598   /* If we compare constants, we shouldn't use a store-flag operation,
5599      but a constant load.  We can get there via the vanilla route that
5600      usually generates a compare-branch sequence, but will in this case
5601      fold the comparison to a constant, and thus elide the branch.  */
5602   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5603     return NULL_RTX;
5604 
5605   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5606 			   target_mode);
5607   if (tem)
5608     return tem;
5609 
5610   /* If we reached here, we can't do this with a scc insn, however there
5611      are some comparisons that can be done in other ways.  Don't do any
5612      of these cases if branches are very cheap.  */
5613   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5614     return 0;
5615 
5616   /* See what we need to return.  We can only return a 1, -1, or the
5617      sign bit.  */
5618 
5619   if (normalizep == 0)
5620     {
5621       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5622 	normalizep = STORE_FLAG_VALUE;
5623 
5624       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5625 	;
5626       else
5627 	return 0;
5628     }
5629 
5630   last = get_last_insn ();
5631 
5632   /* If optimizing, use different pseudo registers for each insn, instead
5633      of reusing the same pseudo.  This leads to better CSE, but slows
5634      down the compiler, since there are more pseudos */
5635   subtarget = (!optimize
5636 	       && (target_mode == mode)) ? target : NULL_RTX;
5637   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5638 
5639   /* For floating-point comparisons, try the reverse comparison or try
5640      changing the "orderedness" of the comparison.  */
5641   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5642     {
5643       enum rtx_code first_code;
5644       bool and_them;
5645 
5646       rcode = reverse_condition_maybe_unordered (code);
5647       if (can_compare_p (rcode, mode, ccp_store_flag)
5648           && (code == ORDERED || code == UNORDERED
5649 	      || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5650 	      || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5651 	{
5652           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5653 		          || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5654 
5655 	  /* For the reverse comparison, use either an addition or a XOR.  */
5656           if (want_add
5657 	      && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5658 			   optimize_insn_for_speed_p ()) == 0)
5659 	    {
5660 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5661 				       STORE_FLAG_VALUE, target_mode);
5662 	      if (tem)
5663                 return expand_binop (target_mode, add_optab, tem,
5664 				     gen_int_mode (normalizep, target_mode),
5665 				     target, 0, OPTAB_WIDEN);
5666 	    }
5667           else if (!want_add
5668 	           && rtx_cost (trueval, mode, XOR, 1,
5669 			        optimize_insn_for_speed_p ()) == 0)
5670 	    {
5671 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5672 				       normalizep, target_mode);
5673 	      if (tem)
5674                 return expand_binop (target_mode, xor_optab, tem, trueval,
5675 				     target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5676 	    }
5677 	}
5678 
5679       delete_insns_since (last);
5680 
5681       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5682       if (code == ORDERED || code == UNORDERED)
5683 	return 0;
5684 
5685       and_them = split_comparison (code, mode, &first_code, &code);
5686 
5687       /* If there are no NaNs, the first comparison should always fall through.
5688          Effectively change the comparison to the other one.  */
5689       if (!HONOR_NANS (mode))
5690 	{
5691           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5692 	  return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5693 				    target_mode);
5694 	}
5695 
5696       if (!HAVE_conditional_move)
5697 	return 0;
5698 
5699       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5700 	 conditional move.  */
5701       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5702 			       normalizep, target_mode);
5703       if (tem == 0)
5704 	return 0;
5705 
5706       if (and_them)
5707         tem = emit_conditional_move (target, code, op0, op1, mode,
5708 				     tem, const0_rtx, GET_MODE (tem), 0);
5709       else
5710         tem = emit_conditional_move (target, code, op0, op1, mode,
5711 				     trueval, tem, GET_MODE (tem), 0);
5712 
5713       if (tem == 0)
5714         delete_insns_since (last);
5715       return tem;
5716     }
5717 
5718   /* The remaining tricks only apply to integer comparisons.  */
5719 
5720   if (GET_MODE_CLASS (mode) != MODE_INT)
5721     return 0;
5722 
5723   /* If this is an equality comparison of integers, we can try to exclusive-or
5724      (or subtract) the two operands and use a recursive call to try the
5725      comparison with zero.  Don't do any of these cases if branches are
5726      very cheap.  */
5727 
5728   if ((code == EQ || code == NE) && op1 != const0_rtx)
5729     {
5730       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5731 			  OPTAB_WIDEN);
5732 
5733       if (tem == 0)
5734 	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5735 			    OPTAB_WIDEN);
5736       if (tem != 0)
5737 	tem = emit_store_flag (target, code, tem, const0_rtx,
5738 			       mode, unsignedp, normalizep);
5739       if (tem != 0)
5740 	return tem;
5741 
5742       delete_insns_since (last);
5743     }
5744 
5745   /* For integer comparisons, try the reverse comparison.  However, for
5746      small X and if we'd have anyway to extend, implementing "X != 0"
5747      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5748   rcode = reverse_condition (code);
5749   if (can_compare_p (rcode, mode, ccp_store_flag)
5750       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5751 	    && code == NE
5752 	    && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5753 	    && op1 == const0_rtx))
5754     {
5755       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5756 		      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5757 
5758       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5759       if (want_add
5760 	  && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5761 		       optimize_insn_for_speed_p ()) == 0)
5762 	{
5763 	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5764 				   STORE_FLAG_VALUE, target_mode);
5765 	  if (tem != 0)
5766             tem = expand_binop (target_mode, add_optab, tem,
5767 				gen_int_mode (normalizep, target_mode),
5768 				target, 0, OPTAB_WIDEN);
5769 	}
5770       else if (!want_add
5771 	       && rtx_cost (trueval, mode, XOR, 1,
5772 			    optimize_insn_for_speed_p ()) == 0)
5773 	{
5774 	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5775 				   normalizep, target_mode);
5776 	  if (tem != 0)
5777             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5778 				INTVAL (trueval) >= 0, OPTAB_WIDEN);
5779 	}
5780 
5781       if (tem != 0)
5782 	return tem;
5783       delete_insns_since (last);
5784     }
5785 
5786   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5787      the constant zero.  Reject all other comparisons at this point.  Only
5788      do LE and GT if branches are expensive since they are expensive on
5789      2-operand machines.  */
5790 
5791   if (op1 != const0_rtx
5792       || (code != EQ && code != NE
5793 	  && (BRANCH_COST (optimize_insn_for_speed_p (),
5794 			   false) <= 1 || (code != LE && code != GT))))
5795     return 0;
5796 
5797   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5798      do the necessary operation below.  */
5799 
5800   tem = 0;
5801 
5802   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5803      the sign bit set.  */
5804 
5805   if (code == LE)
5806     {
5807       /* This is destructive, so SUBTARGET can't be OP0.  */
5808       if (rtx_equal_p (subtarget, op0))
5809 	subtarget = 0;
5810 
5811       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5812 			  OPTAB_WIDEN);
5813       if (tem)
5814 	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5815 			    OPTAB_WIDEN);
5816     }
5817 
5818   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5819      number of bits in the mode of OP0, minus one.  */
5820 
5821   if (code == GT)
5822     {
5823       if (rtx_equal_p (subtarget, op0))
5824 	subtarget = 0;
5825 
5826       tem = maybe_expand_shift (RSHIFT_EXPR, mode, op0,
5827 				GET_MODE_BITSIZE (mode) - 1,
5828 				subtarget, 0);
5829       if (tem)
5830 	tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5831 			    OPTAB_WIDEN);
5832     }
5833 
5834   if (code == EQ || code == NE)
5835     {
5836       /* For EQ or NE, one way to do the comparison is to apply an operation
5837 	 that converts the operand into a positive number if it is nonzero
5838 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5839 	 for NE we negate.  This puts the result in the sign bit.  Then we
5840 	 normalize with a shift, if needed.
5841 
5842 	 Two operations that can do the above actions are ABS and FFS, so try
5843 	 them.  If that doesn't work, and MODE is smaller than a full word,
5844 	 we can use zero-extension to the wider mode (an unsigned conversion)
5845 	 as the operation.  */
5846 
5847       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5848 	 that is compensated by the subsequent overflow when subtracting
5849 	 one / negating.  */
5850 
5851       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5852 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5853       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5854 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5855       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5856 	{
5857 	  tem = convert_modes (word_mode, mode, op0, 1);
5858 	  mode = word_mode;
5859 	}
5860 
5861       if (tem != 0)
5862 	{
5863 	  if (code == EQ)
5864 	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5865 				0, OPTAB_WIDEN);
5866 	  else
5867 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5868 	}
5869 
5870       /* If we couldn't do it that way, for NE we can "or" the two's complement
5871 	 of the value with itself.  For EQ, we take the one's complement of
5872 	 that "or", which is an extra insn, so we only handle EQ if branches
5873 	 are expensive.  */
5874 
5875       if (tem == 0
5876 	  && (code == NE
5877 	      || BRANCH_COST (optimize_insn_for_speed_p (),
5878 		      	      false) > 1))
5879 	{
5880 	  if (rtx_equal_p (subtarget, op0))
5881 	    subtarget = 0;
5882 
5883 	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5884 	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5885 			      OPTAB_WIDEN);
5886 
5887 	  if (tem && code == EQ)
5888 	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5889 	}
5890     }
5891 
5892   if (tem && normalizep)
5893     tem = maybe_expand_shift (RSHIFT_EXPR, mode, tem,
5894 			      GET_MODE_BITSIZE (mode) - 1,
5895 			      subtarget, normalizep == 1);
5896 
5897   if (tem)
5898     {
5899       if (!target)
5900         ;
5901       else if (GET_MODE (tem) != target_mode)
5902 	{
5903 	  convert_move (target, tem, 0);
5904 	  tem = target;
5905 	}
5906       else if (!subtarget)
5907 	{
5908 	  emit_move_insn (target, tem);
5909 	  tem = target;
5910 	}
5911     }
5912   else
5913     delete_insns_since (last);
5914 
5915   return tem;
5916 }
5917 
5918 /* Like emit_store_flag, but always succeeds.  */
5919 
5920 rtx
emit_store_flag_force(rtx target,enum rtx_code code,rtx op0,rtx op1,machine_mode mode,int unsignedp,int normalizep)5921 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5922 		       machine_mode mode, int unsignedp, int normalizep)
5923 {
5924   rtx tem;
5925   rtx_code_label *label;
5926   rtx trueval, falseval;
5927 
5928   /* First see if emit_store_flag can do the job.  */
5929   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5930   if (tem != 0)
5931     return tem;
5932 
5933   /* If one operand is constant, make it the second one.  Only do this
5934      if the other operand is not constant as well.  */
5935 
5936   if (swap_commutative_operands_p (op0, op1))
5937     {
5938       std::swap (op0, op1);
5939       code = swap_condition (code);
5940     }
5941 
5942   if (mode == VOIDmode)
5943     mode = GET_MODE (op0);
5944 
5945   if (!target)
5946     target = gen_reg_rtx (word_mode);
5947 
5948   /* If this failed, we have to do this with set/compare/jump/set code.
5949      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5950   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5951   if (code == NE
5952       && GET_MODE_CLASS (mode) == MODE_INT
5953       && REG_P (target)
5954       && op0 == target
5955       && op1 == const0_rtx)
5956     {
5957       label = gen_label_rtx ();
5958       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5959 			       NULL_RTX, NULL, label, -1);
5960       emit_move_insn (target, trueval);
5961       emit_label (label);
5962       return target;
5963     }
5964 
5965   if (!REG_P (target)
5966       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5967     target = gen_reg_rtx (GET_MODE (target));
5968 
5969   /* Jump in the right direction if the target cannot implement CODE
5970      but can jump on its reverse condition.  */
5971   falseval = const0_rtx;
5972   if (! can_compare_p (code, mode, ccp_jump)
5973       && (! FLOAT_MODE_P (mode)
5974           || code == ORDERED || code == UNORDERED
5975           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5976           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5977     {
5978       enum rtx_code rcode;
5979       if (FLOAT_MODE_P (mode))
5980         rcode = reverse_condition_maybe_unordered (code);
5981       else
5982         rcode = reverse_condition (code);
5983 
5984       /* Canonicalize to UNORDERED for the libcall.  */
5985       if (can_compare_p (rcode, mode, ccp_jump)
5986           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5987 	{
5988 	  falseval = trueval;
5989 	  trueval = const0_rtx;
5990 	  code = rcode;
5991 	}
5992     }
5993 
5994   emit_move_insn (target, trueval);
5995   label = gen_label_rtx ();
5996   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5997 			   label, -1);
5998 
5999   emit_move_insn (target, falseval);
6000   emit_label (label);
6001 
6002   return target;
6003 }
6004 
6005 /* Perform possibly multi-word comparison and conditional jump to LABEL
6006    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
6007    now a thin wrapper around do_compare_rtx_and_jump.  */
6008 
6009 static void
do_cmp_and_jump(rtx arg1,rtx arg2,enum rtx_code op,machine_mode mode,rtx_code_label * label)6010 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
6011 		 rtx_code_label *label)
6012 {
6013   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
6014   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
6015 			   NULL, label, -1);
6016 }
6017