xref: /dragonfly/contrib/gcc-4.7/gcc/expmed.c (revision d4ef6694)
1 /* Medium-level subroutines: convert bit-field store and extract
2    and shifts, multiplies and divides to rtl instructions.
3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5    2011
6    Free Software Foundation, Inc.
7 
8 This file is part of GCC.
9 
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14 
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18 for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3.  If not see
22 <http://www.gnu.org/licenses/>.  */
23 
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42 
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47 
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 				   unsigned HOST_WIDE_INT,
50 				   unsigned HOST_WIDE_INT,
51 				   unsigned HOST_WIDE_INT,
52 				   unsigned HOST_WIDE_INT,
53 				   rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 				   unsigned HOST_WIDE_INT,
56 				   unsigned HOST_WIDE_INT,
57 				   unsigned HOST_WIDE_INT,
58 				   rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 				    unsigned HOST_WIDE_INT,
61 				    unsigned HOST_WIDE_INT,
62 				    unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 				    unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
70 
71 /* Test whether a value is zero of a power of two.  */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
73 
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
76 #endif
77 
78 
79 /* Reduce conditional compilation elsewhere.  */
80 #ifndef HAVE_insv
81 #define HAVE_insv	0
82 #define CODE_FOR_insv	CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
84 #endif
85 #ifndef HAVE_extv
86 #define HAVE_extv	0
87 #define CODE_FOR_extv	CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
89 #endif
90 #ifndef HAVE_extzv
91 #define HAVE_extzv	0
92 #define CODE_FOR_extzv	CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
94 #endif
95 
96 void
97 init_expmed (void)
98 {
99   struct
100   {
101     struct rtx_def reg;		rtunion reg_fld[2];
102     struct rtx_def plus;	rtunion plus_fld1;
103     struct rtx_def neg;
104     struct rtx_def mult;	rtunion mult_fld1;
105     struct rtx_def sdiv;	rtunion sdiv_fld1;
106     struct rtx_def udiv;	rtunion udiv_fld1;
107     struct rtx_def zext;
108     struct rtx_def sdiv_32;	rtunion sdiv_32_fld1;
109     struct rtx_def smod_32;	rtunion smod_32_fld1;
110     struct rtx_def wide_mult;	rtunion wide_mult_fld1;
111     struct rtx_def wide_lshr;	rtunion wide_lshr_fld1;
112     struct rtx_def wide_trunc;
113     struct rtx_def shift;	rtunion shift_fld1;
114     struct rtx_def shift_mult;	rtunion shift_mult_fld1;
115     struct rtx_def shift_add;	rtunion shift_add_fld1;
116     struct rtx_def shift_sub0;	rtunion shift_sub0_fld1;
117     struct rtx_def shift_sub1;	rtunion shift_sub1_fld1;
118   } all;
119 
120   rtx pow2[MAX_BITS_PER_WORD];
121   rtx cint[MAX_BITS_PER_WORD];
122   int m, n;
123   enum machine_mode mode, wider_mode;
124   int speed;
125 
126 
127   for (m = 1; m < MAX_BITS_PER_WORD; m++)
128     {
129       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
130       cint[m] = GEN_INT (m);
131     }
132   memset (&all, 0, sizeof all);
133 
134   PUT_CODE (&all.reg, REG);
135   /* Avoid using hard regs in ways which may be unsupported.  */
136   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
137 
138   PUT_CODE (&all.plus, PLUS);
139   XEXP (&all.plus, 0) = &all.reg;
140   XEXP (&all.plus, 1) = &all.reg;
141 
142   PUT_CODE (&all.neg, NEG);
143   XEXP (&all.neg, 0) = &all.reg;
144 
145   PUT_CODE (&all.mult, MULT);
146   XEXP (&all.mult, 0) = &all.reg;
147   XEXP (&all.mult, 1) = &all.reg;
148 
149   PUT_CODE (&all.sdiv, DIV);
150   XEXP (&all.sdiv, 0) = &all.reg;
151   XEXP (&all.sdiv, 1) = &all.reg;
152 
153   PUT_CODE (&all.udiv, UDIV);
154   XEXP (&all.udiv, 0) = &all.reg;
155   XEXP (&all.udiv, 1) = &all.reg;
156 
157   PUT_CODE (&all.sdiv_32, DIV);
158   XEXP (&all.sdiv_32, 0) = &all.reg;
159   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
160 
161   PUT_CODE (&all.smod_32, MOD);
162   XEXP (&all.smod_32, 0) = &all.reg;
163   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
164 
165   PUT_CODE (&all.zext, ZERO_EXTEND);
166   XEXP (&all.zext, 0) = &all.reg;
167 
168   PUT_CODE (&all.wide_mult, MULT);
169   XEXP (&all.wide_mult, 0) = &all.zext;
170   XEXP (&all.wide_mult, 1) = &all.zext;
171 
172   PUT_CODE (&all.wide_lshr, LSHIFTRT);
173   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
174 
175   PUT_CODE (&all.wide_trunc, TRUNCATE);
176   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
177 
178   PUT_CODE (&all.shift, ASHIFT);
179   XEXP (&all.shift, 0) = &all.reg;
180 
181   PUT_CODE (&all.shift_mult, MULT);
182   XEXP (&all.shift_mult, 0) = &all.reg;
183 
184   PUT_CODE (&all.shift_add, PLUS);
185   XEXP (&all.shift_add, 0) = &all.shift_mult;
186   XEXP (&all.shift_add, 1) = &all.reg;
187 
188   PUT_CODE (&all.shift_sub0, MINUS);
189   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
190   XEXP (&all.shift_sub0, 1) = &all.reg;
191 
192   PUT_CODE (&all.shift_sub1, MINUS);
193   XEXP (&all.shift_sub1, 0) = &all.reg;
194   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
195 
196   for (speed = 0; speed < 2; speed++)
197     {
198       crtl->maybe_hot_insn_p = speed;
199       zero_cost[speed] = set_src_cost (const0_rtx, speed);
200 
201       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
202 	   mode != VOIDmode;
203 	   mode = GET_MODE_WIDER_MODE (mode))
204 	{
205 	  PUT_MODE (&all.reg, mode);
206 	  PUT_MODE (&all.plus, mode);
207 	  PUT_MODE (&all.neg, mode);
208 	  PUT_MODE (&all.mult, mode);
209 	  PUT_MODE (&all.sdiv, mode);
210 	  PUT_MODE (&all.udiv, mode);
211 	  PUT_MODE (&all.sdiv_32, mode);
212 	  PUT_MODE (&all.smod_32, mode);
213 	  PUT_MODE (&all.wide_trunc, mode);
214 	  PUT_MODE (&all.shift, mode);
215 	  PUT_MODE (&all.shift_mult, mode);
216 	  PUT_MODE (&all.shift_add, mode);
217 	  PUT_MODE (&all.shift_sub0, mode);
218 	  PUT_MODE (&all.shift_sub1, mode);
219 
220 	  add_cost[speed][mode] = set_src_cost (&all.plus, speed);
221 	  neg_cost[speed][mode] = set_src_cost (&all.neg, speed);
222 	  mul_cost[speed][mode] = set_src_cost (&all.mult, speed);
223 	  sdiv_cost[speed][mode] = set_src_cost (&all.sdiv, speed);
224 	  udiv_cost[speed][mode] = set_src_cost (&all.udiv, speed);
225 
226 	  sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all.sdiv_32, speed)
227 				          <= 2 * add_cost[speed][mode]);
228 	  smod_pow2_cheap[speed][mode] = (set_src_cost (&all.smod_32, speed)
229 				          <= 4 * add_cost[speed][mode]);
230 
231 	  wider_mode = GET_MODE_WIDER_MODE (mode);
232 	  if (wider_mode != VOIDmode)
233 	    {
234 	      PUT_MODE (&all.zext, wider_mode);
235 	      PUT_MODE (&all.wide_mult, wider_mode);
236 	      PUT_MODE (&all.wide_lshr, wider_mode);
237 	      XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
238 
239 	      mul_widen_cost[speed][wider_mode]
240 	        = set_src_cost (&all.wide_mult, speed);
241 	      mul_highpart_cost[speed][mode]
242 	        = set_src_cost (&all.wide_trunc, speed);
243 	    }
244 
245 	  shift_cost[speed][mode][0] = 0;
246 	  shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
247 	    = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
248 
249 	  n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
250 	  for (m = 1; m < n; m++)
251 	    {
252 	      XEXP (&all.shift, 1) = cint[m];
253 	      XEXP (&all.shift_mult, 1) = pow2[m];
254 
255 	      shift_cost[speed][mode][m] = set_src_cost (&all.shift, speed);
256 	      shiftadd_cost[speed][mode][m] = set_src_cost (&all.shift_add,
257 							    speed);
258 	      shiftsub0_cost[speed][mode][m] = set_src_cost (&all.shift_sub0,
259 							     speed);
260 	      shiftsub1_cost[speed][mode][m] = set_src_cost (&all.shift_sub1,
261 							     speed);
262 	    }
263 	}
264     }
265   if (alg_hash_used_p)
266     memset (alg_hash, 0, sizeof (alg_hash));
267   else
268     alg_hash_used_p = true;
269   default_rtl_profile ();
270 }
271 
272 /* Return an rtx representing minus the value of X.
273    MODE is the intended mode of the result,
274    useful if X is a CONST_INT.  */
275 
276 rtx
277 negate_rtx (enum machine_mode mode, rtx x)
278 {
279   rtx result = simplify_unary_operation (NEG, mode, x, mode);
280 
281   if (result == 0)
282     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
283 
284   return result;
285 }
286 
287 /* Report on the availability of insv/extv/extzv and the desired mode
288    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
289    is false; else the mode of the specified operand.  If OPNO is -1,
290    all the caller cares about is whether the insn is available.  */
291 enum machine_mode
292 mode_for_extraction (enum extraction_pattern pattern, int opno)
293 {
294   const struct insn_data_d *data;
295 
296   switch (pattern)
297     {
298     case EP_insv:
299       if (HAVE_insv)
300 	{
301 	  data = &insn_data[CODE_FOR_insv];
302 	  break;
303 	}
304       return MAX_MACHINE_MODE;
305 
306     case EP_extv:
307       if (HAVE_extv)
308 	{
309 	  data = &insn_data[CODE_FOR_extv];
310 	  break;
311 	}
312       return MAX_MACHINE_MODE;
313 
314     case EP_extzv:
315       if (HAVE_extzv)
316 	{
317 	  data = &insn_data[CODE_FOR_extzv];
318 	  break;
319 	}
320       return MAX_MACHINE_MODE;
321 
322     default:
323       gcc_unreachable ();
324     }
325 
326   if (opno == -1)
327     return VOIDmode;
328 
329   /* Everyone who uses this function used to follow it with
330      if (result == VOIDmode) result = word_mode; */
331   if (data->operand[opno].mode == VOIDmode)
332     return word_mode;
333   return data->operand[opno].mode;
334 }
335 
336 /* A subroutine of store_bit_field, with the same arguments.  Return true
337    if the operation could be implemented.
338 
339    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
340    no other way of implementing the operation.  If FALLBACK_P is false,
341    return false instead.  */
342 
343 static bool
344 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
345 		   unsigned HOST_WIDE_INT bitnum,
346 		   unsigned HOST_WIDE_INT bitregion_start,
347 		   unsigned HOST_WIDE_INT bitregion_end,
348 		   enum machine_mode fieldmode,
349 		   rtx value, bool fallback_p)
350 {
351   unsigned int unit
352     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
353   unsigned HOST_WIDE_INT offset, bitpos;
354   rtx op0 = str_rtx;
355   int byte_offset;
356   rtx orig_value;
357 
358   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
359 
360   while (GET_CODE (op0) == SUBREG)
361     {
362       /* The following line once was done only if WORDS_BIG_ENDIAN,
363 	 but I think that is a mistake.  WORDS_BIG_ENDIAN is
364 	 meaningful at a much higher level; when structures are copied
365 	 between memory and regs, the higher-numbered regs
366 	 always get higher addresses.  */
367       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
368       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
369 
370       byte_offset = 0;
371 
372       /* Paradoxical subregs need special handling on big endian machines.  */
373       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
374 	{
375 	  int difference = inner_mode_size - outer_mode_size;
376 
377 	  if (WORDS_BIG_ENDIAN)
378 	    byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
379 	  if (BYTES_BIG_ENDIAN)
380 	    byte_offset += difference % UNITS_PER_WORD;
381 	}
382       else
383 	byte_offset = SUBREG_BYTE (op0);
384 
385       bitnum += byte_offset * BITS_PER_UNIT;
386       op0 = SUBREG_REG (op0);
387     }
388 
389   /* No action is needed if the target is a register and if the field
390      lies completely outside that register.  This can occur if the source
391      code contains an out-of-bounds access to a small array.  */
392   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
393     return true;
394 
395   /* Use vec_set patterns for inserting parts of vectors whenever
396      available.  */
397   if (VECTOR_MODE_P (GET_MODE (op0))
398       && !MEM_P (op0)
399       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
400       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
401       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
402       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
403     {
404       struct expand_operand ops[3];
405       enum machine_mode outermode = GET_MODE (op0);
406       enum machine_mode innermode = GET_MODE_INNER (outermode);
407       enum insn_code icode = optab_handler (vec_set_optab, outermode);
408       int pos = bitnum / GET_MODE_BITSIZE (innermode);
409 
410       create_fixed_operand (&ops[0], op0);
411       create_input_operand (&ops[1], value, innermode);
412       create_integer_operand (&ops[2], pos);
413       if (maybe_expand_insn (icode, 3, ops))
414 	return true;
415     }
416 
417   /* If the target is a register, overwriting the entire object, or storing
418      a full-word or multi-word field can be done with just a SUBREG.
419 
420      If the target is memory, storing any naturally aligned field can be
421      done with a simple store.  For targets that support fast unaligned
422      memory, any naturally sized, unit aligned field can be done directly.  */
423 
424   offset = bitnum / unit;
425   bitpos = bitnum % unit;
426   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
427                 + (offset * UNITS_PER_WORD);
428 
429   if (bitpos == 0
430       && bitsize == GET_MODE_BITSIZE (fieldmode)
431       && (!MEM_P (op0)
432 	  ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
433 	      || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
434 	     && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
435 		 || validate_subreg (fieldmode, GET_MODE (op0), op0,
436 				     byte_offset)))
437 	  : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
438 	     || (offset * BITS_PER_UNIT % bitsize == 0
439 		 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
440     {
441       if (MEM_P (op0))
442 	op0 = adjust_address (op0, fieldmode, offset);
443       else if (GET_MODE (op0) != fieldmode)
444 	op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
445 				   byte_offset);
446       emit_move_insn (op0, value);
447       return true;
448     }
449 
450   /* Make sure we are playing with integral modes.  Pun with subregs
451      if we aren't.  This must come after the entire register case above,
452      since that case is valid for any mode.  The following cases are only
453      valid for integral modes.  */
454   {
455     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
456     if (imode != GET_MODE (op0))
457       {
458 	if (MEM_P (op0))
459 	  op0 = adjust_address (op0, imode, 0);
460 	else
461 	  {
462 	    gcc_assert (imode != BLKmode);
463 	    op0 = gen_lowpart (imode, op0);
464 	  }
465       }
466   }
467 
468   /* We may be accessing data outside the field, which means
469      we can alias adjacent data.  */
470   /* ?? not always for C++0x memory model ?? */
471   if (MEM_P (op0))
472     {
473       op0 = shallow_copy_rtx (op0);
474       set_mem_alias_set (op0, 0);
475       set_mem_expr (op0, 0);
476     }
477 
478   /* If OP0 is a register, BITPOS must count within a word.
479      But as we have it, it counts within whatever size OP0 now has.
480      On a bigendian machine, these are not the same, so convert.  */
481   if (BYTES_BIG_ENDIAN
482       && !MEM_P (op0)
483       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
484     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
485 
486   /* Storing an lsb-aligned field in a register
487      can be done with a movestrict instruction.  */
488 
489   if (!MEM_P (op0)
490       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
491       && bitsize == GET_MODE_BITSIZE (fieldmode)
492       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
493     {
494       struct expand_operand ops[2];
495       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
496       rtx arg0 = op0;
497       unsigned HOST_WIDE_INT subreg_off;
498 
499       if (GET_CODE (arg0) == SUBREG)
500 	{
501 	  /* Else we've got some float mode source being extracted into
502 	     a different float mode destination -- this combination of
503 	     subregs results in Severe Tire Damage.  */
504 	  gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
505 		      || GET_MODE_CLASS (fieldmode) == MODE_INT
506 		      || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
507 	  arg0 = SUBREG_REG (arg0);
508 	}
509 
510       subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
511 		   + (offset * UNITS_PER_WORD);
512       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
513 	{
514 	  arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
515 
516 	  create_fixed_operand (&ops[0], arg0);
517 	  /* Shrink the source operand to FIELDMODE.  */
518 	  create_convert_operand_to (&ops[1], value, fieldmode, false);
519 	  if (maybe_expand_insn (icode, 2, ops))
520 	    return true;
521 	}
522     }
523 
524   /* Handle fields bigger than a word.  */
525 
526   if (bitsize > BITS_PER_WORD)
527     {
528       /* Here we transfer the words of the field
529 	 in the order least significant first.
530 	 This is because the most significant word is the one which may
531 	 be less than full.
532 	 However, only do that if the value is not BLKmode.  */
533 
534       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
535       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
536       unsigned int i;
537       rtx last;
538 
539       /* This is the mode we must force value to, so that there will be enough
540 	 subwords to extract.  Note that fieldmode will often (always?) be
541 	 VOIDmode, because that is what store_field uses to indicate that this
542 	 is a bit field, but passing VOIDmode to operand_subword_force
543 	 is not allowed.  */
544       fieldmode = GET_MODE (value);
545       if (fieldmode == VOIDmode)
546 	fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
547 
548       last = get_last_insn ();
549       for (i = 0; i < nwords; i++)
550 	{
551 	  /* If I is 0, use the low-order word in both field and target;
552 	     if I is 1, use the next to lowest word; and so on.  */
553 	  unsigned int wordnum = (backwards
554 				  ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
555 				  - i - 1
556 				  : i);
557 	  unsigned int bit_offset = (backwards
558 				     ? MAX ((int) bitsize - ((int) i + 1)
559 					    * BITS_PER_WORD,
560 					    0)
561 				     : (int) i * BITS_PER_WORD);
562 	  rtx value_word = operand_subword_force (value, wordnum, fieldmode);
563 	  unsigned HOST_WIDE_INT new_bitsize =
564 	    MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
565 
566 	  /* If the remaining chunk doesn't have full wordsize we have
567 	     to make sure that for big endian machines the higher order
568 	     bits are used.  */
569 	  if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
570 	    value_word = simplify_expand_binop (word_mode, lshr_optab,
571 						value_word,
572 						GEN_INT (BITS_PER_WORD
573 							 - new_bitsize),
574 						NULL_RTX, true,
575 						OPTAB_LIB_WIDEN);
576 
577 	  if (!store_bit_field_1 (op0, new_bitsize,
578 				  bitnum + bit_offset,
579 				  bitregion_start, bitregion_end,
580 				  word_mode,
581 				  value_word, fallback_p))
582 	    {
583 	      delete_insns_since (last);
584 	      return false;
585 	    }
586 	}
587       return true;
588     }
589 
590   /* From here on we can assume that the field to be stored in is
591      a full-word (whatever type that is), since it is shorter than a word.  */
592 
593   /* OFFSET is the number of words or bytes (UNIT says which)
594      from STR_RTX to the first word or byte containing part of the field.  */
595 
596   if (!MEM_P (op0))
597     {
598       if (offset != 0
599 	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
600 	{
601 	  if (!REG_P (op0))
602 	    {
603 	      /* Since this is a destination (lvalue), we can't copy
604 		 it to a pseudo.  We can remove a SUBREG that does not
605 		 change the size of the operand.  Such a SUBREG may
606 		 have been added above.  */
607 	      gcc_assert (GET_CODE (op0) == SUBREG
608 			  && (GET_MODE_SIZE (GET_MODE (op0))
609 			      == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
610 	      op0 = SUBREG_REG (op0);
611 	    }
612 	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
613 		                op0, (offset * UNITS_PER_WORD));
614 	}
615       offset = 0;
616     }
617 
618   /* If VALUE has a floating-point or complex mode, access it as an
619      integer of the corresponding size.  This can occur on a machine
620      with 64 bit registers that uses SFmode for float.  It can also
621      occur for unaligned float or complex fields.  */
622   orig_value = value;
623   if (GET_MODE (value) != VOIDmode
624       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
625       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
626     {
627       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
628       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
629     }
630 
631   /* Now OFFSET is nonzero only if OP0 is memory
632      and is therefore always measured in bytes.  */
633 
634   if (HAVE_insv
635       && GET_MODE (value) != BLKmode
636       && bitsize > 0
637       && GET_MODE_BITSIZE (op_mode) >= bitsize
638       /* Do not use insv for volatile bitfields when
639          -fstrict-volatile-bitfields is in effect.  */
640       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
641 	   && flag_strict_volatile_bitfields > 0)
642       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
643 	    && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
644       /* Do not use insv if the bit region is restricted and
645 	 op_mode integer at offset doesn't fit into the
646 	 restricted region.  */
647       && !(MEM_P (op0) && bitregion_end
648 	   && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
649 	      > bitregion_end + 1))
650     {
651       struct expand_operand ops[4];
652       int xbitpos = bitpos;
653       rtx value1;
654       rtx xop0 = op0;
655       rtx last = get_last_insn ();
656       bool copy_back = false;
657 
658       /* Add OFFSET into OP0's address.  */
659       if (MEM_P (xop0))
660 	xop0 = adjust_address (xop0, byte_mode, offset);
661 
662       /* If xop0 is a register, we need it in OP_MODE
663 	 to make it acceptable to the format of insv.  */
664       if (GET_CODE (xop0) == SUBREG)
665 	/* We can't just change the mode, because this might clobber op0,
666 	   and we will need the original value of op0 if insv fails.  */
667 	xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
668       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
669 	xop0 = gen_lowpart_SUBREG (op_mode, xop0);
670 
671       /* If the destination is a paradoxical subreg such that we need a
672 	 truncate to the inner mode, perform the insertion on a temporary and
673 	 truncate the result to the original destination.  Note that we can't
674 	 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
675 	 X) 0)) is (reg:N X).  */
676       if (GET_CODE (xop0) == SUBREG
677 	  && REG_P (SUBREG_REG (xop0))
678 	  && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
679 					      op_mode)))
680 	{
681 	  rtx tem = gen_reg_rtx (op_mode);
682 	  emit_move_insn (tem, xop0);
683 	  xop0 = tem;
684 	  copy_back = true;
685 	}
686 
687       /* We have been counting XBITPOS within UNIT.
688 	 Count instead within the size of the register.  */
689       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
690 	xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
691 
692       unit = GET_MODE_BITSIZE (op_mode);
693 
694       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
695          "backwards" from the size of the unit we are inserting into.
696 	 Otherwise, we count bits from the most significant on a
697 	 BYTES/BITS_BIG_ENDIAN machine.  */
698 
699       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
700 	xbitpos = unit - bitsize - xbitpos;
701 
702       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
703       value1 = value;
704       if (GET_MODE (value) != op_mode)
705 	{
706 	  if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
707 	    {
708 	      /* Optimization: Don't bother really extending VALUE
709 		 if it has all the bits we will actually use.  However,
710 		 if we must narrow it, be sure we do it correctly.  */
711 
712 	      if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
713 		{
714 		  rtx tmp;
715 
716 		  tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
717 		  if (! tmp)
718 		    tmp = simplify_gen_subreg (op_mode,
719 					       force_reg (GET_MODE (value),
720 							  value1),
721 					       GET_MODE (value), 0);
722 		  value1 = tmp;
723 		}
724 	      else
725 		value1 = gen_lowpart (op_mode, value1);
726 	    }
727 	  else if (CONST_INT_P (value))
728 	    value1 = gen_int_mode (INTVAL (value), op_mode);
729 	  else
730 	    /* Parse phase is supposed to make VALUE's data type
731 	       match that of the component reference, which is a type
732 	       at least as wide as the field; so VALUE should have
733 	       a mode that corresponds to that type.  */
734 	    gcc_assert (CONSTANT_P (value));
735 	}
736 
737       create_fixed_operand (&ops[0], xop0);
738       create_integer_operand (&ops[1], bitsize);
739       create_integer_operand (&ops[2], xbitpos);
740       create_input_operand (&ops[3], value1, op_mode);
741       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
742 	{
743 	  if (copy_back)
744 	    convert_move (op0, xop0, true);
745 	  return true;
746 	}
747       delete_insns_since (last);
748     }
749 
750   /* If OP0 is a memory, try copying it to a register and seeing if a
751      cheap register alternative is available.  */
752   if (HAVE_insv && MEM_P (op0))
753     {
754       enum machine_mode bestmode;
755       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
756 
757       if (bitregion_end)
758 	maxbits = bitregion_end - bitregion_start + 1;
759 
760       /* Get the mode to use for inserting into this field.  If OP0 is
761 	 BLKmode, get the smallest mode consistent with the alignment. If
762 	 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
763 	 mode. Otherwise, use the smallest mode containing the field.  */
764 
765       if (GET_MODE (op0) == BLKmode
766 	  || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
767 	  || (op_mode != MAX_MACHINE_MODE
768 	      && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
769 	bestmode = get_best_mode (bitsize, bitnum,
770 				  bitregion_start, bitregion_end,
771 				  MEM_ALIGN (op0),
772 				  (op_mode == MAX_MACHINE_MODE
773 				   ? VOIDmode : op_mode),
774 				  MEM_VOLATILE_P (op0));
775       else
776 	bestmode = GET_MODE (op0);
777 
778       if (bestmode != VOIDmode
779 	  && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
780 	  && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
781 	       && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
782 	{
783 	  rtx last, tempreg, xop0;
784 	  unsigned HOST_WIDE_INT xoffset, xbitpos;
785 
786 	  last = get_last_insn ();
787 
788 	  /* Adjust address to point to the containing unit of
789 	     that mode.  Compute the offset as a multiple of this unit,
790 	     counting in bytes.  */
791 	  unit = GET_MODE_BITSIZE (bestmode);
792 	  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
793 	  xbitpos = bitnum % unit;
794 	  xop0 = adjust_address (op0, bestmode, xoffset);
795 
796 	  /* Fetch that unit, store the bitfield in it, then store
797 	     the unit.  */
798 	  tempreg = copy_to_reg (xop0);
799 	  if (store_bit_field_1 (tempreg, bitsize, xbitpos,
800 				 bitregion_start, bitregion_end,
801 				 fieldmode, orig_value, false))
802 	    {
803 	      emit_move_insn (xop0, tempreg);
804 	      return true;
805 	    }
806 	  delete_insns_since (last);
807 	}
808     }
809 
810   if (!fallback_p)
811     return false;
812 
813   store_fixed_bit_field (op0, offset, bitsize, bitpos,
814 			 bitregion_start, bitregion_end, value);
815   return true;
816 }
817 
818 /* Generate code to store value from rtx VALUE
819    into a bit-field within structure STR_RTX
820    containing BITSIZE bits starting at bit BITNUM.
821 
822    BITREGION_START is bitpos of the first bitfield in this region.
823    BITREGION_END is the bitpos of the ending bitfield in this region.
824    These two fields are 0, if the C++ memory model does not apply,
825    or we are not interested in keeping track of bitfield regions.
826 
827    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
828 
829 void
830 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
831 		 unsigned HOST_WIDE_INT bitnum,
832 		 unsigned HOST_WIDE_INT bitregion_start,
833 		 unsigned HOST_WIDE_INT bitregion_end,
834 		 enum machine_mode fieldmode,
835 		 rtx value)
836 {
837   /* Under the C++0x memory model, we must not touch bits outside the
838      bit region.  Adjust the address to start at the beginning of the
839      bit region.  */
840   if (MEM_P (str_rtx) && bitregion_start > 0)
841     {
842       enum machine_mode bestmode;
843       enum machine_mode op_mode;
844       unsigned HOST_WIDE_INT offset;
845 
846       op_mode = mode_for_extraction (EP_insv, 3);
847       if (op_mode == MAX_MACHINE_MODE)
848 	op_mode = VOIDmode;
849 
850       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
851 
852       offset = bitregion_start / BITS_PER_UNIT;
853       bitnum -= bitregion_start;
854       bitregion_end -= bitregion_start;
855       bitregion_start = 0;
856       bestmode = get_best_mode (bitsize, bitnum,
857 				bitregion_start, bitregion_end,
858 				MEM_ALIGN (str_rtx),
859 				op_mode,
860 				MEM_VOLATILE_P (str_rtx));
861       str_rtx = adjust_address (str_rtx, bestmode, offset);
862     }
863 
864   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
865 			  bitregion_start, bitregion_end,
866 			  fieldmode, value, true))
867     gcc_unreachable ();
868 }
869 
870 /* Use shifts and boolean operations to store VALUE
871    into a bit field of width BITSIZE
872    in a memory location specified by OP0 except offset by OFFSET bytes.
873      (OFFSET must be 0 if OP0 is a register.)
874    The field starts at position BITPOS within the byte.
875     (If OP0 is a register, it may be a full word or a narrower mode,
876      but BITPOS still counts within a full word,
877      which is significant on bigendian machines.)  */
878 
879 static void
880 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
881 		       unsigned HOST_WIDE_INT bitsize,
882 		       unsigned HOST_WIDE_INT bitpos,
883 		       unsigned HOST_WIDE_INT bitregion_start,
884 		       unsigned HOST_WIDE_INT bitregion_end,
885 		       rtx value)
886 {
887   enum machine_mode mode;
888   unsigned int total_bits = BITS_PER_WORD;
889   rtx temp;
890   int all_zero = 0;
891   int all_one = 0;
892 
893   /* There is a case not handled here:
894      a structure with a known alignment of just a halfword
895      and a field split across two aligned halfwords within the structure.
896      Or likewise a structure with a known alignment of just a byte
897      and a field split across two bytes.
898      Such cases are not supposed to be able to occur.  */
899 
900   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
901     {
902       gcc_assert (!offset);
903       /* Special treatment for a bit field split across two registers.  */
904       if (bitsize + bitpos > BITS_PER_WORD)
905 	{
906 	  store_split_bit_field (op0, bitsize, bitpos,
907 				 bitregion_start, bitregion_end,
908 				 value);
909 	  return;
910 	}
911     }
912   else
913     {
914       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
915 
916       if (bitregion_end)
917 	maxbits = bitregion_end - bitregion_start + 1;
918 
919       /* Get the proper mode to use for this field.  We want a mode that
920 	 includes the entire field.  If such a mode would be larger than
921 	 a word, we won't be doing the extraction the normal way.
922 	 We don't want a mode bigger than the destination.  */
923 
924       mode = GET_MODE (op0);
925       if (GET_MODE_BITSIZE (mode) == 0
926 	  || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
927 	mode = word_mode;
928 
929       if (MEM_VOLATILE_P (op0)
930           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
931 	  && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
932 	  && flag_strict_volatile_bitfields > 0)
933 	mode = GET_MODE (op0);
934       else
935 	mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
936 			      bitregion_start, bitregion_end,
937 			      MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
938 
939       if (mode == VOIDmode)
940 	{
941 	  /* The only way this should occur is if the field spans word
942 	     boundaries.  */
943 	  store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
944 				 bitregion_start, bitregion_end, value);
945 	  return;
946 	}
947 
948       total_bits = GET_MODE_BITSIZE (mode);
949 
950       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
951 	 be in the range 0 to total_bits-1, and put any excess bytes in
952 	 OFFSET.  */
953       if (bitpos >= total_bits)
954 	{
955 	  offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
956 	  bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
957 		     * BITS_PER_UNIT);
958 	}
959 
960       /* Get ref to an aligned byte, halfword, or word containing the field.
961 	 Adjust BITPOS to be position within a word,
962 	 and OFFSET to be the offset of that word.
963 	 Then alter OP0 to refer to that word.  */
964       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
965       offset -= (offset % (total_bits / BITS_PER_UNIT));
966       op0 = adjust_address (op0, mode, offset);
967     }
968 
969   mode = GET_MODE (op0);
970 
971   /* Now MODE is either some integral mode for a MEM as OP0,
972      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
973      The bit field is contained entirely within OP0.
974      BITPOS is the starting bit number within OP0.
975      (OP0's mode may actually be narrower than MODE.)  */
976 
977   if (BYTES_BIG_ENDIAN)
978       /* BITPOS is the distance between our msb
979 	 and that of the containing datum.
980 	 Convert it to the distance from the lsb.  */
981       bitpos = total_bits - bitsize - bitpos;
982 
983   /* Now BITPOS is always the distance between our lsb
984      and that of OP0.  */
985 
986   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
987      we must first convert its mode to MODE.  */
988 
989   if (CONST_INT_P (value))
990     {
991       HOST_WIDE_INT v = INTVAL (value);
992 
993       if (bitsize < HOST_BITS_PER_WIDE_INT)
994 	v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
995 
996       if (v == 0)
997 	all_zero = 1;
998       else if ((bitsize < HOST_BITS_PER_WIDE_INT
999 		&& v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1000 	       || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1001 	all_one = 1;
1002 
1003       value = lshift_value (mode, value, bitpos, bitsize);
1004     }
1005   else
1006     {
1007       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1008 		      && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1009 
1010       if (GET_MODE (value) != mode)
1011 	value = convert_to_mode (mode, value, 1);
1012 
1013       if (must_and)
1014 	value = expand_binop (mode, and_optab, value,
1015 			      mask_rtx (mode, 0, bitsize, 0),
1016 			      NULL_RTX, 1, OPTAB_LIB_WIDEN);
1017       if (bitpos > 0)
1018 	value = expand_shift (LSHIFT_EXPR, mode, value,
1019 			      bitpos, NULL_RTX, 1);
1020     }
1021 
1022   /* Now clear the chosen bits in OP0,
1023      except that if VALUE is -1 we need not bother.  */
1024   /* We keep the intermediates in registers to allow CSE to combine
1025      consecutive bitfield assignments.  */
1026 
1027   temp = force_reg (mode, op0);
1028 
1029   if (! all_one)
1030     {
1031       temp = expand_binop (mode, and_optab, temp,
1032 			   mask_rtx (mode, bitpos, bitsize, 1),
1033 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1034       temp = force_reg (mode, temp);
1035     }
1036 
1037   /* Now logical-or VALUE into OP0, unless it is zero.  */
1038 
1039   if (! all_zero)
1040     {
1041       temp = expand_binop (mode, ior_optab, temp, value,
1042 			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
1043       temp = force_reg (mode, temp);
1044     }
1045 
1046   if (op0 != temp)
1047     {
1048       op0 = copy_rtx (op0);
1049       emit_move_insn (op0, temp);
1050     }
1051 }
1052 
1053 /* Store a bit field that is split across multiple accessible memory objects.
1054 
1055    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1056    BITSIZE is the field width; BITPOS the position of its first bit
1057    (within the word).
1058    VALUE is the value to store.
1059 
1060    This does not yet handle fields wider than BITS_PER_WORD.  */
1061 
1062 static void
1063 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1064 		       unsigned HOST_WIDE_INT bitpos,
1065 		       unsigned HOST_WIDE_INT bitregion_start,
1066 		       unsigned HOST_WIDE_INT bitregion_end,
1067 		       rtx value)
1068 {
1069   unsigned int unit;
1070   unsigned int bitsdone = 0;
1071 
1072   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1073      much at a time.  */
1074   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1075     unit = BITS_PER_WORD;
1076   else
1077     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1078 
1079   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1080      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1081      that VALUE might be a floating-point constant.  */
1082   if (CONSTANT_P (value) && !CONST_INT_P (value))
1083     {
1084       rtx word = gen_lowpart_common (word_mode, value);
1085 
1086       if (word && (value != word))
1087 	value = word;
1088       else
1089 	value = gen_lowpart_common (word_mode,
1090 				    force_reg (GET_MODE (value) != VOIDmode
1091 					       ? GET_MODE (value)
1092 					       : word_mode, value));
1093     }
1094 
1095   while (bitsdone < bitsize)
1096     {
1097       unsigned HOST_WIDE_INT thissize;
1098       rtx part, word;
1099       unsigned HOST_WIDE_INT thispos;
1100       unsigned HOST_WIDE_INT offset;
1101 
1102       offset = (bitpos + bitsdone) / unit;
1103       thispos = (bitpos + bitsdone) % unit;
1104 
1105       /* When region of bytes we can touch is restricted, decrease
1106 	 UNIT close to the end of the region as needed.  */
1107       if (bitregion_end
1108 	  && unit > BITS_PER_UNIT
1109 	  && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1110 	{
1111 	  unit = unit / 2;
1112 	  continue;
1113 	}
1114 
1115       /* THISSIZE must not overrun a word boundary.  Otherwise,
1116 	 store_fixed_bit_field will call us again, and we will mutually
1117 	 recurse forever.  */
1118       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1119       thissize = MIN (thissize, unit - thispos);
1120 
1121       if (BYTES_BIG_ENDIAN)
1122 	{
1123 	  int total_bits;
1124 
1125 	  /* We must do an endian conversion exactly the same way as it is
1126 	     done in extract_bit_field, so that the two calls to
1127 	     extract_fixed_bit_field will have comparable arguments.  */
1128 	  if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1129 	    total_bits = BITS_PER_WORD;
1130 	  else
1131 	    total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1132 
1133 	  /* Fetch successively less significant portions.  */
1134 	  if (CONST_INT_P (value))
1135 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1136 			     >> (bitsize - bitsdone - thissize))
1137 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1138 	  else
1139 	    /* The args are chosen so that the last part includes the
1140 	       lsb.  Give extract_bit_field the value it needs (with
1141 	       endianness compensation) to fetch the piece we want.  */
1142 	    part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1143 					    total_bits - bitsize + bitsdone,
1144 					    NULL_RTX, 1, false);
1145 	}
1146       else
1147 	{
1148 	  /* Fetch successively more significant portions.  */
1149 	  if (CONST_INT_P (value))
1150 	    part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1151 			     >> bitsdone)
1152 			    & (((HOST_WIDE_INT) 1 << thissize) - 1));
1153 	  else
1154 	    part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1155 					    bitsdone, NULL_RTX, 1, false);
1156 	}
1157 
1158       /* If OP0 is a register, then handle OFFSET here.
1159 
1160 	 When handling multiword bitfields, extract_bit_field may pass
1161 	 down a word_mode SUBREG of a larger REG for a bitfield that actually
1162 	 crosses a word boundary.  Thus, for a SUBREG, we must find
1163 	 the current word starting from the base register.  */
1164       if (GET_CODE (op0) == SUBREG)
1165 	{
1166 	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1167 	  enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1168 	  if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1169 	    word = word_offset ? const0_rtx : op0;
1170 	  else
1171 	    word = operand_subword_force (SUBREG_REG (op0), word_offset,
1172 					  GET_MODE (SUBREG_REG (op0)));
1173 	  offset = 0;
1174 	}
1175       else if (REG_P (op0))
1176 	{
1177 	  enum machine_mode op0_mode = GET_MODE (op0);
1178 	  if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1179 	    word = offset ? const0_rtx : op0;
1180 	  else
1181 	    word = operand_subword_force (op0, offset, GET_MODE (op0));
1182 	  offset = 0;
1183 	}
1184       else
1185 	word = op0;
1186 
1187       /* OFFSET is in UNITs, and UNIT is in bits.
1188 	 store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
1189 	 it is just an out-of-bounds access.  Ignore it.  */
1190       if (word != const0_rtx)
1191 	store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1192 			       thispos, bitregion_start, bitregion_end, part);
1193       bitsdone += thissize;
1194     }
1195 }
1196 
1197 /* A subroutine of extract_bit_field_1 that converts return value X
1198    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1199    to extract_bit_field.  */
1200 
1201 static rtx
1202 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1203 			     enum machine_mode tmode, bool unsignedp)
1204 {
1205   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1206     return x;
1207 
1208   /* If the x mode is not a scalar integral, first convert to the
1209      integer mode of that size and then access it as a floating-point
1210      value via a SUBREG.  */
1211   if (!SCALAR_INT_MODE_P (tmode))
1212     {
1213       enum machine_mode smode;
1214 
1215       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1216       x = convert_to_mode (smode, x, unsignedp);
1217       x = force_reg (smode, x);
1218       return gen_lowpart (tmode, x);
1219     }
1220 
1221   return convert_to_mode (tmode, x, unsignedp);
1222 }
1223 
1224 /* A subroutine of extract_bit_field, with the same arguments.
1225    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1226    if we can find no other means of implementing the operation.
1227    if FALLBACK_P is false, return NULL instead.  */
1228 
1229 static rtx
1230 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1231 		     unsigned HOST_WIDE_INT bitnum,
1232 		     int unsignedp, bool packedp, rtx target,
1233 		     enum machine_mode mode, enum machine_mode tmode,
1234 		     bool fallback_p)
1235 {
1236   unsigned int unit
1237     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1238   unsigned HOST_WIDE_INT offset, bitpos;
1239   rtx op0 = str_rtx;
1240   enum machine_mode int_mode;
1241   enum machine_mode ext_mode;
1242   enum machine_mode mode1;
1243   int byte_offset;
1244 
1245   if (tmode == VOIDmode)
1246     tmode = mode;
1247 
1248   while (GET_CODE (op0) == SUBREG)
1249     {
1250       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1251       op0 = SUBREG_REG (op0);
1252     }
1253 
1254   /* If we have an out-of-bounds access to a register, just return an
1255      uninitialized register of the required mode.  This can occur if the
1256      source code contains an out-of-bounds access to a small array.  */
1257   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1258     return gen_reg_rtx (tmode);
1259 
1260   if (REG_P (op0)
1261       && mode == GET_MODE (op0)
1262       && bitnum == 0
1263       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1264     {
1265       /* We're trying to extract a full register from itself.  */
1266       return op0;
1267     }
1268 
1269   /* See if we can get a better vector mode before extracting.  */
1270   if (VECTOR_MODE_P (GET_MODE (op0))
1271       && !MEM_P (op0)
1272       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1273     {
1274       enum machine_mode new_mode;
1275 
1276       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1277 	new_mode = MIN_MODE_VECTOR_FLOAT;
1278       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1279 	new_mode = MIN_MODE_VECTOR_FRACT;
1280       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1281 	new_mode = MIN_MODE_VECTOR_UFRACT;
1282       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1283 	new_mode = MIN_MODE_VECTOR_ACCUM;
1284       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1285 	new_mode = MIN_MODE_VECTOR_UACCUM;
1286       else
1287 	new_mode = MIN_MODE_VECTOR_INT;
1288 
1289       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1290 	if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1291 	    && targetm.vector_mode_supported_p (new_mode))
1292 	  break;
1293       if (new_mode != VOIDmode)
1294 	op0 = gen_lowpart (new_mode, op0);
1295     }
1296 
1297   /* Use vec_extract patterns for extracting parts of vectors whenever
1298      available.  */
1299   if (VECTOR_MODE_P (GET_MODE (op0))
1300       && !MEM_P (op0)
1301       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1302       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1303 	  == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1304     {
1305       struct expand_operand ops[3];
1306       enum machine_mode outermode = GET_MODE (op0);
1307       enum machine_mode innermode = GET_MODE_INNER (outermode);
1308       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1309       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1310 
1311       create_output_operand (&ops[0], target, innermode);
1312       create_input_operand (&ops[1], op0, outermode);
1313       create_integer_operand (&ops[2], pos);
1314       if (maybe_expand_insn (icode, 3, ops))
1315 	{
1316 	  target = ops[0].value;
1317       	  if (GET_MODE (target) != mode)
1318 	    return gen_lowpart (tmode, target);
1319 	  return target;
1320 	}
1321     }
1322 
1323   /* Make sure we are playing with integral modes.  Pun with subregs
1324      if we aren't.  */
1325   {
1326     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1327     if (imode != GET_MODE (op0))
1328       {
1329 	if (MEM_P (op0))
1330 	  op0 = adjust_address (op0, imode, 0);
1331 	else if (imode != BLKmode)
1332 	  {
1333 	    op0 = gen_lowpart (imode, op0);
1334 
1335 	    /* If we got a SUBREG, force it into a register since we
1336 	       aren't going to be able to do another SUBREG on it.  */
1337 	    if (GET_CODE (op0) == SUBREG)
1338 	      op0 = force_reg (imode, op0);
1339 	  }
1340 	else if (REG_P (op0))
1341 	  {
1342 	    rtx reg, subreg;
1343 	    imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1344 					    MODE_INT);
1345 	    reg = gen_reg_rtx (imode);
1346 	    subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1347 	    emit_move_insn (subreg, op0);
1348 	    op0 = reg;
1349 	    bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1350 	  }
1351 	else
1352 	  {
1353 	    rtx mem = assign_stack_temp (GET_MODE (op0),
1354 					 GET_MODE_SIZE (GET_MODE (op0)), 0);
1355 	    emit_move_insn (mem, op0);
1356 	    op0 = adjust_address (mem, BLKmode, 0);
1357 	  }
1358       }
1359   }
1360 
1361   /* We may be accessing data outside the field, which means
1362      we can alias adjacent data.  */
1363   if (MEM_P (op0))
1364     {
1365       op0 = shallow_copy_rtx (op0);
1366       set_mem_alias_set (op0, 0);
1367       set_mem_expr (op0, 0);
1368     }
1369 
1370   /* Extraction of a full-word or multi-word value from a structure
1371      in a register or aligned memory can be done with just a SUBREG.
1372      A subword value in the least significant part of a register
1373      can also be extracted with a SUBREG.  For this, we need the
1374      byte offset of the value in op0.  */
1375 
1376   bitpos = bitnum % unit;
1377   offset = bitnum / unit;
1378   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1379 
1380   /* If OP0 is a register, BITPOS must count within a word.
1381      But as we have it, it counts within whatever size OP0 now has.
1382      On a bigendian machine, these are not the same, so convert.  */
1383   if (BYTES_BIG_ENDIAN
1384       && !MEM_P (op0)
1385       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1386     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1387 
1388   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1389      If that's wrong, the solution is to test for it and set TARGET to 0
1390      if needed.  */
1391 
1392   /* Only scalar integer modes can be converted via subregs.  There is an
1393      additional problem for FP modes here in that they can have a precision
1394      which is different from the size.  mode_for_size uses precision, but
1395      we want a mode based on the size, so we must avoid calling it for FP
1396      modes.  */
1397   mode1  = (SCALAR_INT_MODE_P (tmode)
1398 	    ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1399 	    : mode);
1400 
1401   /* If the bitfield is volatile, we need to make sure the access
1402      remains on a type-aligned boundary.  */
1403   if (GET_CODE (op0) == MEM
1404       && MEM_VOLATILE_P (op0)
1405       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1406       && flag_strict_volatile_bitfields > 0)
1407     goto no_subreg_mode_swap;
1408 
1409   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1410 	&& bitpos % BITS_PER_WORD == 0)
1411        || (mode1 != BLKmode
1412 	   /* ??? The big endian test here is wrong.  This is correct
1413 	      if the value is in a register, and if mode_for_size is not
1414 	      the same mode as op0.  This causes us to get unnecessarily
1415 	      inefficient code from the Thumb port when -mbig-endian.  */
1416 	   && (BYTES_BIG_ENDIAN
1417 	       ? bitpos + bitsize == BITS_PER_WORD
1418 	       : bitpos == 0)))
1419       && ((!MEM_P (op0)
1420 	   && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1421 	   && GET_MODE_SIZE (mode1) != 0
1422 	   && byte_offset % GET_MODE_SIZE (mode1) == 0)
1423 	  || (MEM_P (op0)
1424 	      && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1425 		  || (offset * BITS_PER_UNIT % bitsize == 0
1426 		      && MEM_ALIGN (op0) % bitsize == 0)))))
1427     {
1428       if (MEM_P (op0))
1429 	op0 = adjust_address (op0, mode1, offset);
1430       else if (mode1 != GET_MODE (op0))
1431 	{
1432 	  rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1433 					 byte_offset);
1434 	  if (sub == NULL)
1435 	    goto no_subreg_mode_swap;
1436 	  op0 = sub;
1437 	}
1438       if (mode1 != mode)
1439 	return convert_to_mode (tmode, op0, unsignedp);
1440       return op0;
1441     }
1442  no_subreg_mode_swap:
1443 
1444   /* Handle fields bigger than a word.  */
1445 
1446   if (bitsize > BITS_PER_WORD)
1447     {
1448       /* Here we transfer the words of the field
1449 	 in the order least significant first.
1450 	 This is because the most significant word is the one which may
1451 	 be less than full.  */
1452 
1453       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1454       unsigned int i;
1455 
1456       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1457 	target = gen_reg_rtx (mode);
1458 
1459       /* Indicate for flow that the entire target reg is being set.  */
1460       emit_clobber (target);
1461 
1462       for (i = 0; i < nwords; i++)
1463 	{
1464 	  /* If I is 0, use the low-order word in both field and target;
1465 	     if I is 1, use the next to lowest word; and so on.  */
1466 	  /* Word number in TARGET to use.  */
1467 	  unsigned int wordnum
1468 	    = (WORDS_BIG_ENDIAN
1469 	       ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1470 	       : i);
1471 	  /* Offset from start of field in OP0.  */
1472 	  unsigned int bit_offset = (WORDS_BIG_ENDIAN
1473 				     ? MAX (0, ((int) bitsize - ((int) i + 1)
1474 						* (int) BITS_PER_WORD))
1475 				     : (int) i * BITS_PER_WORD);
1476 	  rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1477 	  rtx result_part
1478 	    = extract_bit_field (op0, MIN (BITS_PER_WORD,
1479 					   bitsize - i * BITS_PER_WORD),
1480 				 bitnum + bit_offset, 1, false, target_part, mode,
1481 				 word_mode);
1482 
1483 	  gcc_assert (target_part);
1484 
1485 	  if (result_part != target_part)
1486 	    emit_move_insn (target_part, result_part);
1487 	}
1488 
1489       if (unsignedp)
1490 	{
1491 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
1492 	     need to be zero'd out.  */
1493 	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1494 	    {
1495 	      unsigned int i, total_words;
1496 
1497 	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1498 	      for (i = nwords; i < total_words; i++)
1499 		emit_move_insn
1500 		  (operand_subword (target,
1501 				    WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1502 				    1, VOIDmode),
1503 		   const0_rtx);
1504 	    }
1505 	  return target;
1506 	}
1507 
1508       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1509       target = expand_shift (LSHIFT_EXPR, mode, target,
1510 			     GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1511       return expand_shift (RSHIFT_EXPR, mode, target,
1512 			   GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1513     }
1514 
1515   /* From here on we know the desired field is smaller than a word.  */
1516 
1517   /* Check if there is a correspondingly-sized integer field, so we can
1518      safely extract it as one size of integer, if necessary; then
1519      truncate or extend to the size that is wanted; then use SUBREGs or
1520      convert_to_mode to get one of the modes we really wanted.  */
1521 
1522   int_mode = int_mode_for_mode (tmode);
1523   if (int_mode == BLKmode)
1524     int_mode = int_mode_for_mode (mode);
1525   /* Should probably push op0 out to memory and then do a load.  */
1526   gcc_assert (int_mode != BLKmode);
1527 
1528   /* OFFSET is the number of words or bytes (UNIT says which)
1529      from STR_RTX to the first word or byte containing part of the field.  */
1530   if (!MEM_P (op0))
1531     {
1532       if (offset != 0
1533 	  || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1534 	{
1535 	  if (!REG_P (op0))
1536 	    op0 = copy_to_reg (op0);
1537 	  op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1538 		                op0, (offset * UNITS_PER_WORD));
1539 	}
1540       offset = 0;
1541     }
1542 
1543   /* Now OFFSET is nonzero only for memory operands.  */
1544   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1545   if (ext_mode != MAX_MACHINE_MODE
1546       && bitsize > 0
1547       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1548       /* Do not use extv/extzv for volatile bitfields when
1549          -fstrict-volatile-bitfields is in effect.  */
1550       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1551 	   && flag_strict_volatile_bitfields > 0)
1552       /* If op0 is a register, we need it in EXT_MODE to make it
1553 	 acceptable to the format of ext(z)v.  */
1554       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1555       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1556 	   && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1557     {
1558       struct expand_operand ops[4];
1559       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1560       rtx xop0 = op0;
1561       rtx xtarget = target;
1562       rtx xspec_target = target;
1563       rtx xspec_target_subreg = 0;
1564 
1565       /* If op0 is a register, we need it in EXT_MODE to make it
1566 	 acceptable to the format of ext(z)v.  */
1567       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1568 	xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1569       if (MEM_P (xop0))
1570 	/* Get ref to first byte containing part of the field.  */
1571 	xop0 = adjust_address (xop0, byte_mode, xoffset);
1572 
1573       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1574       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1575 	xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1576 
1577       unit = GET_MODE_BITSIZE (ext_mode);
1578 
1579       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1580          "backwards" from the size of the unit we are extracting from.
1581 	 Otherwise, we count bits from the most significant on a
1582 	 BYTES/BITS_BIG_ENDIAN machine.  */
1583 
1584       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1585 	xbitpos = unit - bitsize - xbitpos;
1586 
1587       if (xtarget == 0)
1588 	xtarget = xspec_target = gen_reg_rtx (tmode);
1589 
1590       if (GET_MODE (xtarget) != ext_mode)
1591 	{
1592 	  /* Don't use LHS paradoxical subreg if explicit truncation is needed
1593 	     between the mode of the extraction (word_mode) and the target
1594 	     mode.  Instead, create a temporary and use convert_move to set
1595 	     the target.  */
1596 	  if (REG_P (xtarget)
1597 	      && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1598 	    {
1599 	      xtarget = gen_lowpart (ext_mode, xtarget);
1600 	      if (GET_MODE_PRECISION (ext_mode)
1601 		  > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1602 		xspec_target_subreg = xtarget;
1603 	    }
1604 	  else
1605 	    xtarget = gen_reg_rtx (ext_mode);
1606 	}
1607 
1608       create_output_operand (&ops[0], xtarget, ext_mode);
1609       create_fixed_operand (&ops[1], xop0);
1610       create_integer_operand (&ops[2], bitsize);
1611       create_integer_operand (&ops[3], xbitpos);
1612       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1613 			     4, ops))
1614 	{
1615 	  xtarget = ops[0].value;
1616 	  if (xtarget == xspec_target)
1617 	    return xtarget;
1618 	  if (xtarget == xspec_target_subreg)
1619 	    return xspec_target;
1620 	  return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1621 	}
1622     }
1623 
1624   /* If OP0 is a memory, try copying it to a register and seeing if a
1625      cheap register alternative is available.  */
1626   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1627     {
1628       enum machine_mode bestmode;
1629 
1630       /* Get the mode to use for inserting into this field.  If
1631 	 OP0 is BLKmode, get the smallest mode consistent with the
1632 	 alignment. If OP0 is a non-BLKmode object that is no
1633 	 wider than EXT_MODE, use its mode. Otherwise, use the
1634 	 smallest mode containing the field.  */
1635 
1636       if (GET_MODE (op0) == BLKmode
1637 	  || (ext_mode != MAX_MACHINE_MODE
1638 	      && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1639 	bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1640 				  (ext_mode == MAX_MACHINE_MODE
1641 				   ? VOIDmode : ext_mode),
1642 				  MEM_VOLATILE_P (op0));
1643       else
1644 	bestmode = GET_MODE (op0);
1645 
1646       if (bestmode != VOIDmode
1647 	  && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1648 	       && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1649 	{
1650 	  unsigned HOST_WIDE_INT xoffset, xbitpos;
1651 
1652 	  /* Compute the offset as a multiple of this unit,
1653 	     counting in bytes.  */
1654 	  unit = GET_MODE_BITSIZE (bestmode);
1655 	  xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1656 	  xbitpos = bitnum % unit;
1657 
1658 	  /* Make sure the register is big enough for the whole field.  */
1659 	  if (xoffset * BITS_PER_UNIT + unit
1660 	      >= offset * BITS_PER_UNIT + bitsize)
1661 	    {
1662 	      rtx last, result, xop0;
1663 
1664 	      last = get_last_insn ();
1665 
1666 	      /* Fetch it to a register in that size.  */
1667 	      xop0 = adjust_address (op0, bestmode, xoffset);
1668 	      xop0 = force_reg (bestmode, xop0);
1669 	      result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1670 					    unsignedp, packedp, target,
1671 					    mode, tmode, false);
1672 	      if (result)
1673 		return result;
1674 
1675 	      delete_insns_since (last);
1676 	    }
1677 	}
1678     }
1679 
1680   if (!fallback_p)
1681     return NULL;
1682 
1683   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1684 				    bitpos, target, unsignedp, packedp);
1685   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1686 }
1687 
1688 /* Generate code to extract a byte-field from STR_RTX
1689    containing BITSIZE bits, starting at BITNUM,
1690    and put it in TARGET if possible (if TARGET is nonzero).
1691    Regardless of TARGET, we return the rtx for where the value is placed.
1692 
1693    STR_RTX is the structure containing the byte (a REG or MEM).
1694    UNSIGNEDP is nonzero if this is an unsigned bit field.
1695    PACKEDP is nonzero if the field has the packed attribute.
1696    MODE is the natural mode of the field value once extracted.
1697    TMODE is the mode the caller would like the value to have;
1698    but the value may be returned with type MODE instead.
1699 
1700    If a TARGET is specified and we can store in it at no extra cost,
1701    we do so, and return TARGET.
1702    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1703    if they are equally easy.  */
1704 
1705 rtx
1706 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1707 		   unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1708 		   rtx target, enum machine_mode mode, enum machine_mode tmode)
1709 {
1710   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1711 			      target, mode, tmode, true);
1712 }
1713 
1714 /* Extract a bit field using shifts and boolean operations
1715    Returns an rtx to represent the value.
1716    OP0 addresses a register (word) or memory (byte).
1717    BITPOS says which bit within the word or byte the bit field starts in.
1718    OFFSET says how many bytes farther the bit field starts;
1719     it is 0 if OP0 is a register.
1720    BITSIZE says how many bits long the bit field is.
1721     (If OP0 is a register, it may be narrower than a full word,
1722      but BITPOS still counts within a full word,
1723      which is significant on bigendian machines.)
1724 
1725    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1726    PACKEDP is true if the field has the packed attribute.
1727 
1728    If TARGET is nonzero, attempts to store the value there
1729    and return TARGET, but this is not guaranteed.
1730    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1731 
1732 static rtx
1733 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1734 			 unsigned HOST_WIDE_INT offset,
1735 			 unsigned HOST_WIDE_INT bitsize,
1736 			 unsigned HOST_WIDE_INT bitpos, rtx target,
1737 			 int unsignedp, bool packedp)
1738 {
1739   unsigned int total_bits = BITS_PER_WORD;
1740   enum machine_mode mode;
1741 
1742   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1743     {
1744       /* Special treatment for a bit field split across two registers.  */
1745       if (bitsize + bitpos > BITS_PER_WORD)
1746 	return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1747     }
1748   else
1749     {
1750       /* Get the proper mode to use for this field.  We want a mode that
1751 	 includes the entire field.  If such a mode would be larger than
1752 	 a word, we won't be doing the extraction the normal way.  */
1753 
1754       if (MEM_VOLATILE_P (op0)
1755 	  && flag_strict_volatile_bitfields > 0)
1756 	{
1757 	  if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1758 	    mode = GET_MODE (op0);
1759 	  else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1760 	    mode = GET_MODE (target);
1761 	  else
1762 	    mode = tmode;
1763 	}
1764       else
1765 	mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1766 			      MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1767 
1768       if (mode == VOIDmode)
1769 	/* The only way this should occur is if the field spans word
1770 	   boundaries.  */
1771 	return extract_split_bit_field (op0, bitsize,
1772 					bitpos + offset * BITS_PER_UNIT,
1773 					unsignedp);
1774 
1775       total_bits = GET_MODE_BITSIZE (mode);
1776 
1777       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1778 	 be in the range 0 to total_bits-1, and put any excess bytes in
1779 	 OFFSET.  */
1780       if (bitpos >= total_bits)
1781 	{
1782 	  offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1783 	  bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1784 		     * BITS_PER_UNIT);
1785 	}
1786 
1787       /* If we're accessing a volatile MEM, we can't do the next
1788 	 alignment step if it results in a multi-word access where we
1789 	 otherwise wouldn't have one.  So, check for that case
1790 	 here.  */
1791       if (MEM_P (op0)
1792 	  && MEM_VOLATILE_P (op0)
1793 	  && flag_strict_volatile_bitfields > 0
1794 	  && bitpos + bitsize <= total_bits
1795 	  && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1796 	{
1797 	  if (STRICT_ALIGNMENT)
1798 	    {
1799 	      static bool informed_about_misalignment = false;
1800 	      bool warned;
1801 
1802 	      if (packedp)
1803 		{
1804 		  if (bitsize == total_bits)
1805 		    warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1806 					 "multiple accesses to volatile structure member"
1807 					 " because of packed attribute");
1808 		  else
1809 		    warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1810 					 "multiple accesses to volatile structure bitfield"
1811 					 " because of packed attribute");
1812 
1813 		  return extract_split_bit_field (op0, bitsize,
1814 						  bitpos + offset * BITS_PER_UNIT,
1815 						  unsignedp);
1816 		}
1817 
1818 	      if (bitsize == total_bits)
1819 		warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1820 				     "mis-aligned access used for structure member");
1821 	      else
1822 		warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1823 				     "mis-aligned access used for structure bitfield");
1824 
1825 	      if (! informed_about_misalignment && warned)
1826 		{
1827 		  informed_about_misalignment = true;
1828 		  inform (input_location,
1829 			  "when a volatile object spans multiple type-sized locations,"
1830 			  " the compiler must choose between using a single mis-aligned access to"
1831 			  " preserve the volatility, or using multiple aligned accesses to avoid"
1832 			  " runtime faults; this code may fail at runtime if the hardware does"
1833 			  " not allow this access");
1834 		}
1835 	    }
1836 	}
1837       else
1838 	{
1839 
1840 	  /* Get ref to an aligned byte, halfword, or word containing the field.
1841 	     Adjust BITPOS to be position within a word,
1842 	     and OFFSET to be the offset of that word.
1843 	     Then alter OP0 to refer to that word.  */
1844 	  bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1845 	  offset -= (offset % (total_bits / BITS_PER_UNIT));
1846 	}
1847 
1848       op0 = adjust_address (op0, mode, offset);
1849     }
1850 
1851   mode = GET_MODE (op0);
1852 
1853   if (BYTES_BIG_ENDIAN)
1854     /* BITPOS is the distance between our msb and that of OP0.
1855        Convert it to the distance from the lsb.  */
1856     bitpos = total_bits - bitsize - bitpos;
1857 
1858   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1859      We have reduced the big-endian case to the little-endian case.  */
1860 
1861   if (unsignedp)
1862     {
1863       if (bitpos)
1864 	{
1865 	  /* If the field does not already start at the lsb,
1866 	     shift it so it does.  */
1867 	  /* Maybe propagate the target for the shift.  */
1868 	  /* But not if we will return it--could confuse integrate.c.  */
1869 	  rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1870 	  if (tmode != mode) subtarget = 0;
1871 	  op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1872 	}
1873       /* Convert the value to the desired mode.  */
1874       if (mode != tmode)
1875 	op0 = convert_to_mode (tmode, op0, 1);
1876 
1877       /* Unless the msb of the field used to be the msb when we shifted,
1878 	 mask out the upper bits.  */
1879 
1880       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1881 	return expand_binop (GET_MODE (op0), and_optab, op0,
1882 			     mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1883 			     target, 1, OPTAB_LIB_WIDEN);
1884       return op0;
1885     }
1886 
1887   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1888      then arithmetic-shift its lsb to the lsb of the word.  */
1889   op0 = force_reg (mode, op0);
1890 
1891   /* Find the narrowest integer mode that contains the field.  */
1892 
1893   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1894        mode = GET_MODE_WIDER_MODE (mode))
1895     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1896       {
1897 	op0 = convert_to_mode (mode, op0, 0);
1898 	break;
1899       }
1900 
1901   if (mode != tmode)
1902     target = 0;
1903 
1904   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1905     {
1906       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1907       /* Maybe propagate the target for the shift.  */
1908       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1909       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1910     }
1911 
1912   return expand_shift (RSHIFT_EXPR, mode, op0,
1913 		       GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1914 }
1915 
1916 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1917    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1918    complement of that if COMPLEMENT.  The mask is truncated if
1919    necessary to the width of mode MODE.  The mask is zero-extended if
1920    BITSIZE+BITPOS is too small for MODE.  */
1921 
1922 static rtx
1923 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1924 {
1925   double_int mask;
1926 
1927   mask = double_int_mask (bitsize);
1928   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1929 
1930   if (complement)
1931     mask = double_int_not (mask);
1932 
1933   return immed_double_int_const (mask, mode);
1934 }
1935 
1936 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1937    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1938 
1939 static rtx
1940 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1941 {
1942   double_int val;
1943 
1944   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1945   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1946 
1947   return immed_double_int_const (val, mode);
1948 }
1949 
1950 /* Extract a bit field that is split across two words
1951    and return an RTX for the result.
1952 
1953    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1954    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1955    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1956 
1957 static rtx
1958 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1959 			 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1960 {
1961   unsigned int unit;
1962   unsigned int bitsdone = 0;
1963   rtx result = NULL_RTX;
1964   int first = 1;
1965 
1966   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1967      much at a time.  */
1968   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1969     unit = BITS_PER_WORD;
1970   else
1971     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1972 
1973   while (bitsdone < bitsize)
1974     {
1975       unsigned HOST_WIDE_INT thissize;
1976       rtx part, word;
1977       unsigned HOST_WIDE_INT thispos;
1978       unsigned HOST_WIDE_INT offset;
1979 
1980       offset = (bitpos + bitsdone) / unit;
1981       thispos = (bitpos + bitsdone) % unit;
1982 
1983       /* THISSIZE must not overrun a word boundary.  Otherwise,
1984 	 extract_fixed_bit_field will call us again, and we will mutually
1985 	 recurse forever.  */
1986       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1987       thissize = MIN (thissize, unit - thispos);
1988 
1989       /* If OP0 is a register, then handle OFFSET here.
1990 
1991 	 When handling multiword bitfields, extract_bit_field may pass
1992 	 down a word_mode SUBREG of a larger REG for a bitfield that actually
1993 	 crosses a word boundary.  Thus, for a SUBREG, we must find
1994 	 the current word starting from the base register.  */
1995       if (GET_CODE (op0) == SUBREG)
1996 	{
1997 	  int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1998 	  word = operand_subword_force (SUBREG_REG (op0), word_offset,
1999 					GET_MODE (SUBREG_REG (op0)));
2000 	  offset = 0;
2001 	}
2002       else if (REG_P (op0))
2003 	{
2004 	  word = operand_subword_force (op0, offset, GET_MODE (op0));
2005 	  offset = 0;
2006 	}
2007       else
2008 	word = op0;
2009 
2010       /* Extract the parts in bit-counting order,
2011 	 whose meaning is determined by BYTES_PER_UNIT.
2012 	 OFFSET is in UNITs, and UNIT is in bits.
2013 	 extract_fixed_bit_field wants offset in bytes.  */
2014       part = extract_fixed_bit_field (word_mode, word,
2015 				      offset * unit / BITS_PER_UNIT,
2016 				      thissize, thispos, 0, 1, false);
2017       bitsdone += thissize;
2018 
2019       /* Shift this part into place for the result.  */
2020       if (BYTES_BIG_ENDIAN)
2021 	{
2022 	  if (bitsize != bitsdone)
2023 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2024 				 bitsize - bitsdone, 0, 1);
2025 	}
2026       else
2027 	{
2028 	  if (bitsdone != thissize)
2029 	    part = expand_shift (LSHIFT_EXPR, word_mode, part,
2030 				 bitsdone - thissize, 0, 1);
2031 	}
2032 
2033       if (first)
2034 	result = part;
2035       else
2036 	/* Combine the parts with bitwise or.  This works
2037 	   because we extracted each part as an unsigned bit field.  */
2038 	result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2039 			       OPTAB_LIB_WIDEN);
2040 
2041       first = 0;
2042     }
2043 
2044   /* Unsigned bit field: we are done.  */
2045   if (unsignedp)
2046     return result;
2047   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2048   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2049 			 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2050   return expand_shift (RSHIFT_EXPR, word_mode, result,
2051 		       BITS_PER_WORD - bitsize, NULL_RTX, 0);
2052 }
2053 
2054 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2055    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2056    MODE, fill the upper bits with zeros.  Fail if the layout of either
2057    mode is unknown (as for CC modes) or if the extraction would involve
2058    unprofitable mode punning.  Return the value on success, otherwise
2059    return null.
2060 
2061    This is different from gen_lowpart* in these respects:
2062 
2063      - the returned value must always be considered an rvalue
2064 
2065      - when MODE is wider than SRC_MODE, the extraction involves
2066        a zero extension
2067 
2068      - when MODE is smaller than SRC_MODE, the extraction involves
2069        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2070 
2071    In other words, this routine performs a computation, whereas the
2072    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2073    operations.  */
2074 
2075 rtx
2076 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2077 {
2078   enum machine_mode int_mode, src_int_mode;
2079 
2080   if (mode == src_mode)
2081     return src;
2082 
2083   if (CONSTANT_P (src))
2084     {
2085       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2086 	 fails, it will happily create (subreg (symbol_ref)) or similar
2087 	 invalid SUBREGs.  */
2088       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2089       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2090       if (ret)
2091 	return ret;
2092 
2093       if (GET_MODE (src) == VOIDmode
2094 	  || !validate_subreg (mode, src_mode, src, byte))
2095 	return NULL_RTX;
2096 
2097       src = force_reg (GET_MODE (src), src);
2098       return gen_rtx_SUBREG (mode, src, byte);
2099     }
2100 
2101   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2102     return NULL_RTX;
2103 
2104   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2105       && MODES_TIEABLE_P (mode, src_mode))
2106     {
2107       rtx x = gen_lowpart_common (mode, src);
2108       if (x)
2109         return x;
2110     }
2111 
2112   src_int_mode = int_mode_for_mode (src_mode);
2113   int_mode = int_mode_for_mode (mode);
2114   if (src_int_mode == BLKmode || int_mode == BLKmode)
2115     return NULL_RTX;
2116 
2117   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2118     return NULL_RTX;
2119   if (!MODES_TIEABLE_P (int_mode, mode))
2120     return NULL_RTX;
2121 
2122   src = gen_lowpart (src_int_mode, src);
2123   src = convert_modes (int_mode, src_int_mode, src, true);
2124   src = gen_lowpart (mode, src);
2125   return src;
2126 }
2127 
2128 /* Add INC into TARGET.  */
2129 
2130 void
2131 expand_inc (rtx target, rtx inc)
2132 {
2133   rtx value = expand_binop (GET_MODE (target), add_optab,
2134 			    target, inc,
2135 			    target, 0, OPTAB_LIB_WIDEN);
2136   if (value != target)
2137     emit_move_insn (target, value);
2138 }
2139 
2140 /* Subtract DEC from TARGET.  */
2141 
2142 void
2143 expand_dec (rtx target, rtx dec)
2144 {
2145   rtx value = expand_binop (GET_MODE (target), sub_optab,
2146 			    target, dec,
2147 			    target, 0, OPTAB_LIB_WIDEN);
2148   if (value != target)
2149     emit_move_insn (target, value);
2150 }
2151 
2152 /* Output a shift instruction for expression code CODE,
2153    with SHIFTED being the rtx for the value to shift,
2154    and AMOUNT the rtx for the amount to shift by.
2155    Store the result in the rtx TARGET, if that is convenient.
2156    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2157    Return the rtx for where the value is.  */
2158 
2159 static rtx
2160 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2161 		rtx amount, rtx target, int unsignedp)
2162 {
2163   rtx op1, temp = 0;
2164   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2165   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2166   optab lshift_optab = ashl_optab;
2167   optab rshift_arith_optab = ashr_optab;
2168   optab rshift_uns_optab = lshr_optab;
2169   optab lrotate_optab = rotl_optab;
2170   optab rrotate_optab = rotr_optab;
2171   enum machine_mode op1_mode;
2172   int attempt;
2173   bool speed = optimize_insn_for_speed_p ();
2174 
2175   op1 = amount;
2176   op1_mode = GET_MODE (op1);
2177 
2178   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2179      shift amount is a vector, use the vector/vector shift patterns.  */
2180   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2181     {
2182       lshift_optab = vashl_optab;
2183       rshift_arith_optab = vashr_optab;
2184       rshift_uns_optab = vlshr_optab;
2185       lrotate_optab = vrotl_optab;
2186       rrotate_optab = vrotr_optab;
2187     }
2188 
2189   /* Previously detected shift-counts computed by NEGATE_EXPR
2190      and shifted in the other direction; but that does not work
2191      on all machines.  */
2192 
2193   if (SHIFT_COUNT_TRUNCATED)
2194     {
2195       if (CONST_INT_P (op1)
2196 	  && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2197 	      (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2198 	op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2199 		       % GET_MODE_BITSIZE (mode));
2200       else if (GET_CODE (op1) == SUBREG
2201 	       && subreg_lowpart_p (op1)
2202 	       && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2203 	       && SCALAR_INT_MODE_P (GET_MODE (op1)))
2204 	op1 = SUBREG_REG (op1);
2205     }
2206 
2207   if (op1 == const0_rtx)
2208     return shifted;
2209 
2210   /* Check whether its cheaper to implement a left shift by a constant
2211      bit count by a sequence of additions.  */
2212   if (code == LSHIFT_EXPR
2213       && CONST_INT_P (op1)
2214       && INTVAL (op1) > 0
2215       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2216       && INTVAL (op1) < MAX_BITS_PER_WORD
2217       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2218       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2219     {
2220       int i;
2221       for (i = 0; i < INTVAL (op1); i++)
2222 	{
2223 	  temp = force_reg (mode, shifted);
2224 	  shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2225 				  unsignedp, OPTAB_LIB_WIDEN);
2226 	}
2227       return shifted;
2228     }
2229 
2230   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2231     {
2232       enum optab_methods methods;
2233 
2234       if (attempt == 0)
2235 	methods = OPTAB_DIRECT;
2236       else if (attempt == 1)
2237 	methods = OPTAB_WIDEN;
2238       else
2239 	methods = OPTAB_LIB_WIDEN;
2240 
2241       if (rotate)
2242 	{
2243 	  /* Widening does not work for rotation.  */
2244 	  if (methods == OPTAB_WIDEN)
2245 	    continue;
2246 	  else if (methods == OPTAB_LIB_WIDEN)
2247 	    {
2248 	      /* If we have been unable to open-code this by a rotation,
2249 		 do it as the IOR of two shifts.  I.e., to rotate A
2250 		 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2251 		 where C is the bitsize of A.
2252 
2253 		 It is theoretically possible that the target machine might
2254 		 not be able to perform either shift and hence we would
2255 		 be making two libcalls rather than just the one for the
2256 		 shift (similarly if IOR could not be done).  We will allow
2257 		 this extremely unlikely lossage to avoid complicating the
2258 		 code below.  */
2259 
2260 	      rtx subtarget = target == shifted ? 0 : target;
2261 	      rtx new_amount, other_amount;
2262 	      rtx temp1;
2263 
2264 	      new_amount = op1;
2265 	      if (CONST_INT_P (op1))
2266 		other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2267 					- INTVAL (op1));
2268 	      else
2269 		other_amount
2270 		  = simplify_gen_binary (MINUS, GET_MODE (op1),
2271 					 GEN_INT (GET_MODE_PRECISION (mode)),
2272 					 op1);
2273 
2274 	      shifted = force_reg (mode, shifted);
2275 
2276 	      temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2277 				     mode, shifted, new_amount, 0, 1);
2278 	      temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2279 				      mode, shifted, other_amount,
2280 				      subtarget, 1);
2281 	      return expand_binop (mode, ior_optab, temp, temp1, target,
2282 				   unsignedp, methods);
2283 	    }
2284 
2285 	  temp = expand_binop (mode,
2286 			       left ? lrotate_optab : rrotate_optab,
2287 			       shifted, op1, target, unsignedp, methods);
2288 	}
2289       else if (unsignedp)
2290 	temp = expand_binop (mode,
2291 			     left ? lshift_optab : rshift_uns_optab,
2292 			     shifted, op1, target, unsignedp, methods);
2293 
2294       /* Do arithmetic shifts.
2295 	 Also, if we are going to widen the operand, we can just as well
2296 	 use an arithmetic right-shift instead of a logical one.  */
2297       if (temp == 0 && ! rotate
2298 	  && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2299 	{
2300 	  enum optab_methods methods1 = methods;
2301 
2302 	  /* If trying to widen a log shift to an arithmetic shift,
2303 	     don't accept an arithmetic shift of the same size.  */
2304 	  if (unsignedp)
2305 	    methods1 = OPTAB_MUST_WIDEN;
2306 
2307 	  /* Arithmetic shift */
2308 
2309 	  temp = expand_binop (mode,
2310 			       left ? lshift_optab : rshift_arith_optab,
2311 			       shifted, op1, target, unsignedp, methods1);
2312 	}
2313 
2314       /* We used to try extzv here for logical right shifts, but that was
2315 	 only useful for one machine, the VAX, and caused poor code
2316 	 generation there for lshrdi3, so the code was deleted and a
2317 	 define_expand for lshrsi3 was added to vax.md.  */
2318     }
2319 
2320   gcc_assert (temp);
2321   return temp;
2322 }
2323 
2324 /* Output a shift instruction for expression code CODE,
2325    with SHIFTED being the rtx for the value to shift,
2326    and AMOUNT the amount to shift by.
2327    Store the result in the rtx TARGET, if that is convenient.
2328    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2329    Return the rtx for where the value is.  */
2330 
2331 rtx
2332 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2333 	      int amount, rtx target, int unsignedp)
2334 {
2335   return expand_shift_1 (code, mode,
2336 			 shifted, GEN_INT (amount), target, unsignedp);
2337 }
2338 
2339 /* Output a shift instruction for expression code CODE,
2340    with SHIFTED being the rtx for the value to shift,
2341    and AMOUNT the tree for the amount to shift by.
2342    Store the result in the rtx TARGET, if that is convenient.
2343    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2344    Return the rtx for where the value is.  */
2345 
2346 rtx
2347 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2348 		       tree amount, rtx target, int unsignedp)
2349 {
2350   return expand_shift_1 (code, mode,
2351 			 shifted, expand_normal (amount), target, unsignedp);
2352 }
2353 
2354 
2355 /* Indicates the type of fixup needed after a constant multiplication.
2356    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2357    the result should be negated, and ADD_VARIANT means that the
2358    multiplicand should be added to the result.  */
2359 enum mult_variant {basic_variant, negate_variant, add_variant};
2360 
2361 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2362 			const struct mult_cost *, enum machine_mode mode);
2363 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2364 				 struct algorithm *, enum mult_variant *, int);
2365 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2366 			      const struct algorithm *, enum mult_variant);
2367 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2368 						 int, rtx *, int *, int *);
2369 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2370 static rtx extract_high_half (enum machine_mode, rtx);
2371 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2372 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2373 				       int, int);
2374 /* Compute and return the best algorithm for multiplying by T.
2375    The algorithm must cost less than cost_limit
2376    If retval.cost >= COST_LIMIT, no algorithm was found and all
2377    other field of the returned struct are undefined.
2378    MODE is the machine mode of the multiplication.  */
2379 
2380 static void
2381 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2382 	    const struct mult_cost *cost_limit, enum machine_mode mode)
2383 {
2384   int m;
2385   struct algorithm *alg_in, *best_alg;
2386   struct mult_cost best_cost;
2387   struct mult_cost new_limit;
2388   int op_cost, op_latency;
2389   unsigned HOST_WIDE_INT orig_t = t;
2390   unsigned HOST_WIDE_INT q;
2391   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2392   int hash_index;
2393   bool cache_hit = false;
2394   enum alg_code cache_alg = alg_zero;
2395   bool speed = optimize_insn_for_speed_p ();
2396 
2397   /* Indicate that no algorithm is yet found.  If no algorithm
2398      is found, this value will be returned and indicate failure.  */
2399   alg_out->cost.cost = cost_limit->cost + 1;
2400   alg_out->cost.latency = cost_limit->latency + 1;
2401 
2402   if (cost_limit->cost < 0
2403       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2404     return;
2405 
2406   /* Restrict the bits of "t" to the multiplication's mode.  */
2407   t &= GET_MODE_MASK (mode);
2408 
2409   /* t == 1 can be done in zero cost.  */
2410   if (t == 1)
2411     {
2412       alg_out->ops = 1;
2413       alg_out->cost.cost = 0;
2414       alg_out->cost.latency = 0;
2415       alg_out->op[0] = alg_m;
2416       return;
2417     }
2418 
2419   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2420      fail now.  */
2421   if (t == 0)
2422     {
2423       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2424 	return;
2425       else
2426 	{
2427 	  alg_out->ops = 1;
2428 	  alg_out->cost.cost = zero_cost[speed];
2429 	  alg_out->cost.latency = zero_cost[speed];
2430 	  alg_out->op[0] = alg_zero;
2431 	  return;
2432 	}
2433     }
2434 
2435   /* We'll be needing a couple extra algorithm structures now.  */
2436 
2437   alg_in = XALLOCA (struct algorithm);
2438   best_alg = XALLOCA (struct algorithm);
2439   best_cost = *cost_limit;
2440 
2441   /* Compute the hash index.  */
2442   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2443 
2444   /* See if we already know what to do for T.  */
2445   if (alg_hash[hash_index].t == t
2446       && alg_hash[hash_index].mode == mode
2447       && alg_hash[hash_index].mode == mode
2448       && alg_hash[hash_index].speed == speed
2449       && alg_hash[hash_index].alg != alg_unknown)
2450     {
2451       cache_alg = alg_hash[hash_index].alg;
2452 
2453       if (cache_alg == alg_impossible)
2454 	{
2455 	  /* The cache tells us that it's impossible to synthesize
2456 	     multiplication by T within alg_hash[hash_index].cost.  */
2457 	  if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2458 	    /* COST_LIMIT is at least as restrictive as the one
2459 	       recorded in the hash table, in which case we have no
2460 	       hope of synthesizing a multiplication.  Just
2461 	       return.  */
2462 	    return;
2463 
2464 	  /* If we get here, COST_LIMIT is less restrictive than the
2465 	     one recorded in the hash table, so we may be able to
2466 	     synthesize a multiplication.  Proceed as if we didn't
2467 	     have the cache entry.  */
2468 	}
2469       else
2470 	{
2471 	  if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2472 	    /* The cached algorithm shows that this multiplication
2473 	       requires more cost than COST_LIMIT.  Just return.  This
2474 	       way, we don't clobber this cache entry with
2475 	       alg_impossible but retain useful information.  */
2476 	    return;
2477 
2478 	  cache_hit = true;
2479 
2480 	  switch (cache_alg)
2481 	    {
2482 	    case alg_shift:
2483 	      goto do_alg_shift;
2484 
2485 	    case alg_add_t_m2:
2486 	    case alg_sub_t_m2:
2487 	      goto do_alg_addsub_t_m2;
2488 
2489 	    case alg_add_factor:
2490 	    case alg_sub_factor:
2491 	      goto do_alg_addsub_factor;
2492 
2493 	    case alg_add_t2_m:
2494 	      goto do_alg_add_t2_m;
2495 
2496 	    case alg_sub_t2_m:
2497 	      goto do_alg_sub_t2_m;
2498 
2499 	    default:
2500 	      gcc_unreachable ();
2501 	    }
2502 	}
2503     }
2504 
2505   /* If we have a group of zero bits at the low-order part of T, try
2506      multiplying by the remaining bits and then doing a shift.  */
2507 
2508   if ((t & 1) == 0)
2509     {
2510     do_alg_shift:
2511       m = floor_log2 (t & -t);	/* m = number of low zero bits */
2512       if (m < maxm)
2513 	{
2514 	  q = t >> m;
2515 	  /* The function expand_shift will choose between a shift and
2516 	     a sequence of additions, so the observed cost is given as
2517 	     MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2518 	  op_cost = m * add_cost[speed][mode];
2519 	  if (shift_cost[speed][mode][m] < op_cost)
2520 	    op_cost = shift_cost[speed][mode][m];
2521 	  new_limit.cost = best_cost.cost - op_cost;
2522 	  new_limit.latency = best_cost.latency - op_cost;
2523 	  synth_mult (alg_in, q, &new_limit, mode);
2524 
2525 	  alg_in->cost.cost += op_cost;
2526 	  alg_in->cost.latency += op_cost;
2527 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2528 	    {
2529 	      struct algorithm *x;
2530 	      best_cost = alg_in->cost;
2531 	      x = alg_in, alg_in = best_alg, best_alg = x;
2532 	      best_alg->log[best_alg->ops] = m;
2533 	      best_alg->op[best_alg->ops] = alg_shift;
2534 	    }
2535 
2536 	  /* See if treating ORIG_T as a signed number yields a better
2537 	     sequence.  Try this sequence only for a negative ORIG_T
2538 	     as it would be useless for a non-negative ORIG_T.  */
2539 	  if ((HOST_WIDE_INT) orig_t < 0)
2540 	    {
2541 	      /* Shift ORIG_T as follows because a right shift of a
2542 		 negative-valued signed type is implementation
2543 		 defined.  */
2544 	      q = ~(~orig_t >> m);
2545 	      /* The function expand_shift will choose between a shift
2546 		 and a sequence of additions, so the observed cost is
2547 		 given as MIN (m * add_cost[speed][mode],
2548 		 shift_cost[speed][mode][m]).  */
2549 	      op_cost = m * add_cost[speed][mode];
2550 	      if (shift_cost[speed][mode][m] < op_cost)
2551 		op_cost = shift_cost[speed][mode][m];
2552 	      new_limit.cost = best_cost.cost - op_cost;
2553 	      new_limit.latency = best_cost.latency - op_cost;
2554 	      synth_mult (alg_in, q, &new_limit, mode);
2555 
2556 	      alg_in->cost.cost += op_cost;
2557 	      alg_in->cost.latency += op_cost;
2558 	      if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2559 		{
2560 		  struct algorithm *x;
2561 		  best_cost = alg_in->cost;
2562 		  x = alg_in, alg_in = best_alg, best_alg = x;
2563 		  best_alg->log[best_alg->ops] = m;
2564 		  best_alg->op[best_alg->ops] = alg_shift;
2565 		}
2566 	    }
2567 	}
2568       if (cache_hit)
2569 	goto done;
2570     }
2571 
2572   /* If we have an odd number, add or subtract one.  */
2573   if ((t & 1) != 0)
2574     {
2575       unsigned HOST_WIDE_INT w;
2576 
2577     do_alg_addsub_t_m2:
2578       for (w = 1; (w & t) != 0; w <<= 1)
2579 	;
2580       /* If T was -1, then W will be zero after the loop.  This is another
2581 	 case where T ends with ...111.  Handling this with (T + 1) and
2582 	 subtract 1 produces slightly better code and results in algorithm
2583 	 selection much faster than treating it like the ...0111 case
2584 	 below.  */
2585       if (w == 0
2586 	  || (w > 2
2587 	      /* Reject the case where t is 3.
2588 		 Thus we prefer addition in that case.  */
2589 	      && t != 3))
2590 	{
2591 	  /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2592 
2593 	  op_cost = add_cost[speed][mode];
2594 	  new_limit.cost = best_cost.cost - op_cost;
2595 	  new_limit.latency = best_cost.latency - op_cost;
2596 	  synth_mult (alg_in, t + 1, &new_limit, mode);
2597 
2598 	  alg_in->cost.cost += op_cost;
2599 	  alg_in->cost.latency += op_cost;
2600 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2601 	    {
2602 	      struct algorithm *x;
2603 	      best_cost = alg_in->cost;
2604 	      x = alg_in, alg_in = best_alg, best_alg = x;
2605 	      best_alg->log[best_alg->ops] = 0;
2606 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2607 	    }
2608 	}
2609       else
2610 	{
2611 	  /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2612 
2613 	  op_cost = add_cost[speed][mode];
2614 	  new_limit.cost = best_cost.cost - op_cost;
2615 	  new_limit.latency = best_cost.latency - op_cost;
2616 	  synth_mult (alg_in, t - 1, &new_limit, mode);
2617 
2618 	  alg_in->cost.cost += op_cost;
2619 	  alg_in->cost.latency += op_cost;
2620 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2621 	    {
2622 	      struct algorithm *x;
2623 	      best_cost = alg_in->cost;
2624 	      x = alg_in, alg_in = best_alg, best_alg = x;
2625 	      best_alg->log[best_alg->ops] = 0;
2626 	      best_alg->op[best_alg->ops] = alg_add_t_m2;
2627 	    }
2628 	}
2629 
2630       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2631 	 quickly with a - a * n for some appropriate constant n.  */
2632       m = exact_log2 (-orig_t + 1);
2633       if (m >= 0 && m < maxm)
2634 	{
2635 	  op_cost = shiftsub1_cost[speed][mode][m];
2636 	  new_limit.cost = best_cost.cost - op_cost;
2637 	  new_limit.latency = best_cost.latency - op_cost;
2638 	  synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2639 
2640 	  alg_in->cost.cost += op_cost;
2641 	  alg_in->cost.latency += op_cost;
2642 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2643 	    {
2644 	      struct algorithm *x;
2645 	      best_cost = alg_in->cost;
2646 	      x = alg_in, alg_in = best_alg, best_alg = x;
2647 	      best_alg->log[best_alg->ops] = m;
2648 	      best_alg->op[best_alg->ops] = alg_sub_t_m2;
2649 	    }
2650 	}
2651 
2652       if (cache_hit)
2653 	goto done;
2654     }
2655 
2656   /* Look for factors of t of the form
2657      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2658      If we find such a factor, we can multiply by t using an algorithm that
2659      multiplies by q, shift the result by m and add/subtract it to itself.
2660 
2661      We search for large factors first and loop down, even if large factors
2662      are less probable than small; if we find a large factor we will find a
2663      good sequence quickly, and therefore be able to prune (by decreasing
2664      COST_LIMIT) the search.  */
2665 
2666  do_alg_addsub_factor:
2667   for (m = floor_log2 (t - 1); m >= 2; m--)
2668     {
2669       unsigned HOST_WIDE_INT d;
2670 
2671       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2672       if (t % d == 0 && t > d && m < maxm
2673 	  && (!cache_hit || cache_alg == alg_add_factor))
2674 	{
2675 	  /* If the target has a cheap shift-and-add instruction use
2676 	     that in preference to a shift insn followed by an add insn.
2677 	     Assume that the shift-and-add is "atomic" with a latency
2678 	     equal to its cost, otherwise assume that on superscalar
2679 	     hardware the shift may be executed concurrently with the
2680 	     earlier steps in the algorithm.  */
2681 	  op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2682 	  if (shiftadd_cost[speed][mode][m] < op_cost)
2683 	    {
2684 	      op_cost = shiftadd_cost[speed][mode][m];
2685 	      op_latency = op_cost;
2686 	    }
2687 	  else
2688 	    op_latency = add_cost[speed][mode];
2689 
2690 	  new_limit.cost = best_cost.cost - op_cost;
2691 	  new_limit.latency = best_cost.latency - op_latency;
2692 	  synth_mult (alg_in, t / d, &new_limit, mode);
2693 
2694 	  alg_in->cost.cost += op_cost;
2695 	  alg_in->cost.latency += op_latency;
2696 	  if (alg_in->cost.latency < op_cost)
2697 	    alg_in->cost.latency = op_cost;
2698 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2699 	    {
2700 	      struct algorithm *x;
2701 	      best_cost = alg_in->cost;
2702 	      x = alg_in, alg_in = best_alg, best_alg = x;
2703 	      best_alg->log[best_alg->ops] = m;
2704 	      best_alg->op[best_alg->ops] = alg_add_factor;
2705 	    }
2706 	  /* Other factors will have been taken care of in the recursion.  */
2707 	  break;
2708 	}
2709 
2710       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2711       if (t % d == 0 && t > d && m < maxm
2712 	  && (!cache_hit || cache_alg == alg_sub_factor))
2713 	{
2714 	  /* If the target has a cheap shift-and-subtract insn use
2715 	     that in preference to a shift insn followed by a sub insn.
2716 	     Assume that the shift-and-sub is "atomic" with a latency
2717 	     equal to it's cost, otherwise assume that on superscalar
2718 	     hardware the shift may be executed concurrently with the
2719 	     earlier steps in the algorithm.  */
2720 	  op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2721 	  if (shiftsub0_cost[speed][mode][m] < op_cost)
2722 	    {
2723 	      op_cost = shiftsub0_cost[speed][mode][m];
2724 	      op_latency = op_cost;
2725 	    }
2726 	  else
2727 	    op_latency = add_cost[speed][mode];
2728 
2729 	  new_limit.cost = best_cost.cost - op_cost;
2730 	  new_limit.latency = best_cost.latency - op_latency;
2731 	  synth_mult (alg_in, t / d, &new_limit, mode);
2732 
2733 	  alg_in->cost.cost += op_cost;
2734 	  alg_in->cost.latency += op_latency;
2735 	  if (alg_in->cost.latency < op_cost)
2736 	    alg_in->cost.latency = op_cost;
2737 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2738 	    {
2739 	      struct algorithm *x;
2740 	      best_cost = alg_in->cost;
2741 	      x = alg_in, alg_in = best_alg, best_alg = x;
2742 	      best_alg->log[best_alg->ops] = m;
2743 	      best_alg->op[best_alg->ops] = alg_sub_factor;
2744 	    }
2745 	  break;
2746 	}
2747     }
2748   if (cache_hit)
2749     goto done;
2750 
2751   /* Try shift-and-add (load effective address) instructions,
2752      i.e. do a*3, a*5, a*9.  */
2753   if ((t & 1) != 0)
2754     {
2755     do_alg_add_t2_m:
2756       q = t - 1;
2757       q = q & -q;
2758       m = exact_log2 (q);
2759       if (m >= 0 && m < maxm)
2760 	{
2761 	  op_cost = shiftadd_cost[speed][mode][m];
2762 	  new_limit.cost = best_cost.cost - op_cost;
2763 	  new_limit.latency = best_cost.latency - op_cost;
2764 	  synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2765 
2766 	  alg_in->cost.cost += op_cost;
2767 	  alg_in->cost.latency += op_cost;
2768 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2769 	    {
2770 	      struct algorithm *x;
2771 	      best_cost = alg_in->cost;
2772 	      x = alg_in, alg_in = best_alg, best_alg = x;
2773 	      best_alg->log[best_alg->ops] = m;
2774 	      best_alg->op[best_alg->ops] = alg_add_t2_m;
2775 	    }
2776 	}
2777       if (cache_hit)
2778 	goto done;
2779 
2780     do_alg_sub_t2_m:
2781       q = t + 1;
2782       q = q & -q;
2783       m = exact_log2 (q);
2784       if (m >= 0 && m < maxm)
2785 	{
2786 	  op_cost = shiftsub0_cost[speed][mode][m];
2787 	  new_limit.cost = best_cost.cost - op_cost;
2788 	  new_limit.latency = best_cost.latency - op_cost;
2789 	  synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2790 
2791 	  alg_in->cost.cost += op_cost;
2792 	  alg_in->cost.latency += op_cost;
2793 	  if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2794 	    {
2795 	      struct algorithm *x;
2796 	      best_cost = alg_in->cost;
2797 	      x = alg_in, alg_in = best_alg, best_alg = x;
2798 	      best_alg->log[best_alg->ops] = m;
2799 	      best_alg->op[best_alg->ops] = alg_sub_t2_m;
2800 	    }
2801 	}
2802       if (cache_hit)
2803 	goto done;
2804     }
2805 
2806  done:
2807   /* If best_cost has not decreased, we have not found any algorithm.  */
2808   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2809     {
2810       /* We failed to find an algorithm.  Record alg_impossible for
2811 	 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2812 	 we are asked to find an algorithm for T within the same or
2813 	 lower COST_LIMIT, we can immediately return to the
2814 	 caller.  */
2815       alg_hash[hash_index].t = t;
2816       alg_hash[hash_index].mode = mode;
2817       alg_hash[hash_index].speed = speed;
2818       alg_hash[hash_index].alg = alg_impossible;
2819       alg_hash[hash_index].cost = *cost_limit;
2820       return;
2821     }
2822 
2823   /* Cache the result.  */
2824   if (!cache_hit)
2825     {
2826       alg_hash[hash_index].t = t;
2827       alg_hash[hash_index].mode = mode;
2828       alg_hash[hash_index].speed = speed;
2829       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2830       alg_hash[hash_index].cost.cost = best_cost.cost;
2831       alg_hash[hash_index].cost.latency = best_cost.latency;
2832     }
2833 
2834   /* If we are getting a too long sequence for `struct algorithm'
2835      to record, make this search fail.  */
2836   if (best_alg->ops == MAX_BITS_PER_WORD)
2837     return;
2838 
2839   /* Copy the algorithm from temporary space to the space at alg_out.
2840      We avoid using structure assignment because the majority of
2841      best_alg is normally undefined, and this is a critical function.  */
2842   alg_out->ops = best_alg->ops + 1;
2843   alg_out->cost = best_cost;
2844   memcpy (alg_out->op, best_alg->op,
2845 	  alg_out->ops * sizeof *alg_out->op);
2846   memcpy (alg_out->log, best_alg->log,
2847 	  alg_out->ops * sizeof *alg_out->log);
2848 }
2849 
2850 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2851    Try three variations:
2852 
2853        - a shift/add sequence based on VAL itself
2854        - a shift/add sequence based on -VAL, followed by a negation
2855        - a shift/add sequence based on VAL - 1, followed by an addition.
2856 
2857    Return true if the cheapest of these cost less than MULT_COST,
2858    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2859 
2860 static bool
2861 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2862 		     struct algorithm *alg, enum mult_variant *variant,
2863 		     int mult_cost)
2864 {
2865   struct algorithm alg2;
2866   struct mult_cost limit;
2867   int op_cost;
2868   bool speed = optimize_insn_for_speed_p ();
2869 
2870   /* Fail quickly for impossible bounds.  */
2871   if (mult_cost < 0)
2872     return false;
2873 
2874   /* Ensure that mult_cost provides a reasonable upper bound.
2875      Any constant multiplication can be performed with less
2876      than 2 * bits additions.  */
2877   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2878   if (mult_cost > op_cost)
2879     mult_cost = op_cost;
2880 
2881   *variant = basic_variant;
2882   limit.cost = mult_cost;
2883   limit.latency = mult_cost;
2884   synth_mult (alg, val, &limit, mode);
2885 
2886   /* This works only if the inverted value actually fits in an
2887      `unsigned int' */
2888   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2889     {
2890       op_cost = neg_cost[speed][mode];
2891       if (MULT_COST_LESS (&alg->cost, mult_cost))
2892 	{
2893 	  limit.cost = alg->cost.cost - op_cost;
2894 	  limit.latency = alg->cost.latency - op_cost;
2895 	}
2896       else
2897 	{
2898 	  limit.cost = mult_cost - op_cost;
2899 	  limit.latency = mult_cost - op_cost;
2900 	}
2901 
2902       synth_mult (&alg2, -val, &limit, mode);
2903       alg2.cost.cost += op_cost;
2904       alg2.cost.latency += op_cost;
2905       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2906 	*alg = alg2, *variant = negate_variant;
2907     }
2908 
2909   /* This proves very useful for division-by-constant.  */
2910   op_cost = add_cost[speed][mode];
2911   if (MULT_COST_LESS (&alg->cost, mult_cost))
2912     {
2913       limit.cost = alg->cost.cost - op_cost;
2914       limit.latency = alg->cost.latency - op_cost;
2915     }
2916   else
2917     {
2918       limit.cost = mult_cost - op_cost;
2919       limit.latency = mult_cost - op_cost;
2920     }
2921 
2922   synth_mult (&alg2, val - 1, &limit, mode);
2923   alg2.cost.cost += op_cost;
2924   alg2.cost.latency += op_cost;
2925   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2926     *alg = alg2, *variant = add_variant;
2927 
2928   return MULT_COST_LESS (&alg->cost, mult_cost);
2929 }
2930 
2931 /* A subroutine of expand_mult, used for constant multiplications.
2932    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2933    convenient.  Use the shift/add sequence described by ALG and apply
2934    the final fixup specified by VARIANT.  */
2935 
2936 static rtx
2937 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2938 		   rtx target, const struct algorithm *alg,
2939 		   enum mult_variant variant)
2940 {
2941   HOST_WIDE_INT val_so_far;
2942   rtx insn, accum, tem;
2943   int opno;
2944   enum machine_mode nmode;
2945 
2946   /* Avoid referencing memory over and over and invalid sharing
2947      on SUBREGs.  */
2948   op0 = force_reg (mode, op0);
2949 
2950   /* ACCUM starts out either as OP0 or as a zero, depending on
2951      the first operation.  */
2952 
2953   if (alg->op[0] == alg_zero)
2954     {
2955       accum = copy_to_mode_reg (mode, const0_rtx);
2956       val_so_far = 0;
2957     }
2958   else if (alg->op[0] == alg_m)
2959     {
2960       accum = copy_to_mode_reg (mode, op0);
2961       val_so_far = 1;
2962     }
2963   else
2964     gcc_unreachable ();
2965 
2966   for (opno = 1; opno < alg->ops; opno++)
2967     {
2968       int log = alg->log[opno];
2969       rtx shift_subtarget = optimize ? 0 : accum;
2970       rtx add_target
2971 	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
2972 	   && !optimize)
2973 	  ? target : 0;
2974       rtx accum_target = optimize ? 0 : accum;
2975       rtx accum_inner;
2976 
2977       switch (alg->op[opno])
2978 	{
2979 	case alg_shift:
2980 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2981 	  /* REG_EQUAL note will be attached to the following insn.  */
2982 	  emit_move_insn (accum, tem);
2983 	  val_so_far <<= log;
2984 	  break;
2985 
2986 	case alg_add_t_m2:
2987 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2988 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2989 				 add_target ? add_target : accum_target);
2990 	  val_so_far += (HOST_WIDE_INT) 1 << log;
2991 	  break;
2992 
2993 	case alg_sub_t_m2:
2994 	  tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2995 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2996 				 add_target ? add_target : accum_target);
2997 	  val_so_far -= (HOST_WIDE_INT) 1 << log;
2998 	  break;
2999 
3000 	case alg_add_t2_m:
3001 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3002 				log, shift_subtarget, 0);
3003 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3004 				 add_target ? add_target : accum_target);
3005 	  val_so_far = (val_so_far << log) + 1;
3006 	  break;
3007 
3008 	case alg_sub_t2_m:
3009 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
3010 				log, shift_subtarget, 0);
3011 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3012 				 add_target ? add_target : accum_target);
3013 	  val_so_far = (val_so_far << log) - 1;
3014 	  break;
3015 
3016 	case alg_add_factor:
3017 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3018 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3019 				 add_target ? add_target : accum_target);
3020 	  val_so_far += val_so_far << log;
3021 	  break;
3022 
3023 	case alg_sub_factor:
3024 	  tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3025 	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3026 				 (add_target
3027 				  ? add_target : (optimize ? 0 : tem)));
3028 	  val_so_far = (val_so_far << log) - val_so_far;
3029 	  break;
3030 
3031 	default:
3032 	  gcc_unreachable ();
3033 	}
3034 
3035       /* Write a REG_EQUAL note on the last insn so that we can cse
3036 	 multiplication sequences.  Note that if ACCUM is a SUBREG,
3037 	 we've set the inner register and must properly indicate
3038 	 that.  */
3039 
3040       tem = op0, nmode = mode;
3041       accum_inner = accum;
3042       if (GET_CODE (accum) == SUBREG)
3043 	{
3044 	  accum_inner = SUBREG_REG (accum);
3045 	  nmode = GET_MODE (accum_inner);
3046 	  tem = gen_lowpart (nmode, op0);
3047 	}
3048 
3049       insn = get_last_insn ();
3050       set_dst_reg_note (insn, REG_EQUAL,
3051 			gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3052 			accum_inner);
3053     }
3054 
3055   if (variant == negate_variant)
3056     {
3057       val_so_far = -val_so_far;
3058       accum = expand_unop (mode, neg_optab, accum, target, 0);
3059     }
3060   else if (variant == add_variant)
3061     {
3062       val_so_far = val_so_far + 1;
3063       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3064     }
3065 
3066   /* Compare only the bits of val and val_so_far that are significant
3067      in the result mode, to avoid sign-/zero-extension confusion.  */
3068   val &= GET_MODE_MASK (mode);
3069   val_so_far &= GET_MODE_MASK (mode);
3070   gcc_assert (val == val_so_far);
3071 
3072   return accum;
3073 }
3074 
3075 /* Perform a multiplication and return an rtx for the result.
3076    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3077    TARGET is a suggestion for where to store the result (an rtx).
3078 
3079    We check specially for a constant integer as OP1.
3080    If you want this check for OP0 as well, then before calling
3081    you should swap the two operands if OP0 would be constant.  */
3082 
3083 rtx
3084 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3085 	     int unsignedp)
3086 {
3087   enum mult_variant variant;
3088   struct algorithm algorithm;
3089   int max_cost;
3090   bool speed = optimize_insn_for_speed_p ();
3091 
3092   /* Handling const0_rtx here allows us to use zero as a rogue value for
3093      coeff below.  */
3094   if (op1 == const0_rtx)
3095     return const0_rtx;
3096   if (op1 == const1_rtx)
3097     return op0;
3098   if (op1 == constm1_rtx)
3099     return expand_unop (mode,
3100 			GET_MODE_CLASS (mode) == MODE_INT
3101 			&& !unsignedp && flag_trapv
3102 			? negv_optab : neg_optab,
3103 			op0, target, 0);
3104 
3105   /* These are the operations that are potentially turned into a sequence
3106      of shifts and additions.  */
3107   if (SCALAR_INT_MODE_P (mode)
3108       && (unsignedp || !flag_trapv))
3109     {
3110       HOST_WIDE_INT coeff = 0;
3111       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3112 
3113       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3114 	 less than or equal in size to `unsigned int' this doesn't matter.
3115 	 If the mode is larger than `unsigned int', then synth_mult works
3116 	 only if the constant value exactly fits in an `unsigned int' without
3117 	 any truncation.  This means that multiplying by negative values does
3118 	 not work; results are off by 2^32 on a 32 bit machine.  */
3119 
3120       if (CONST_INT_P (op1))
3121 	{
3122 	  /* Attempt to handle multiplication of DImode values by negative
3123 	     coefficients, by performing the multiplication by a positive
3124 	     multiplier and then inverting the result.  */
3125 	  if (INTVAL (op1) < 0
3126 	      && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3127 	    {
3128 	      /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3129 		 result is interpreted as an unsigned coefficient.
3130 		 Exclude cost of op0 from max_cost to match the cost
3131 		 calculation of the synth_mult.  */
3132 	      max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3133 					speed)
3134 			  - neg_cost[speed][mode]);
3135 	      if (max_cost > 0
3136 		  && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3137 					  &variant, max_cost))
3138 		{
3139 		  rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3140 						NULL_RTX, &algorithm,
3141 						variant);
3142 		  return expand_unop (mode, neg_optab, temp, target, 0);
3143 		}
3144 	    }
3145 	  else coeff = INTVAL (op1);
3146 	}
3147       else if (GET_CODE (op1) == CONST_DOUBLE)
3148 	{
3149 	  /* If we are multiplying in DImode, it may still be a win
3150 	     to try to work with shifts and adds.  */
3151 	  if (CONST_DOUBLE_HIGH (op1) == 0
3152 	      && CONST_DOUBLE_LOW (op1) > 0)
3153 	    coeff = CONST_DOUBLE_LOW (op1);
3154 	  else if (CONST_DOUBLE_LOW (op1) == 0
3155 		   && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3156 	    {
3157 	      int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3158 			  + HOST_BITS_PER_WIDE_INT;
3159 	      return expand_shift (LSHIFT_EXPR, mode, op0,
3160 				   shift, target, unsignedp);
3161 	    }
3162 	}
3163 
3164       /* We used to test optimize here, on the grounds that it's better to
3165 	 produce a smaller program when -O is not used.  But this causes
3166 	 such a terrible slowdown sometimes that it seems better to always
3167 	 use synth_mult.  */
3168       if (coeff != 0)
3169 	{
3170 	  /* Special case powers of two.  */
3171 	  if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3172 	    return expand_shift (LSHIFT_EXPR, mode, op0,
3173 				 floor_log2 (coeff), target, unsignedp);
3174 
3175 	  /* Exclude cost of op0 from max_cost to match the cost
3176 	     calculation of the synth_mult.  */
3177 	  max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3178 	  if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3179 				   max_cost))
3180 	    return expand_mult_const (mode, op0, coeff, target,
3181 				      &algorithm, variant);
3182 	}
3183     }
3184 
3185   if (GET_CODE (op0) == CONST_DOUBLE)
3186     {
3187       rtx temp = op0;
3188       op0 = op1;
3189       op1 = temp;
3190     }
3191 
3192   /* Expand x*2.0 as x+x.  */
3193   if (GET_CODE (op1) == CONST_DOUBLE
3194       && SCALAR_FLOAT_MODE_P (mode))
3195     {
3196       REAL_VALUE_TYPE d;
3197       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3198 
3199       if (REAL_VALUES_EQUAL (d, dconst2))
3200 	{
3201 	  op0 = force_reg (GET_MODE (op0), op0);
3202 	  return expand_binop (mode, add_optab, op0, op0,
3203 			       target, unsignedp, OPTAB_LIB_WIDEN);
3204 	}
3205     }
3206 
3207   /* This used to use umul_optab if unsigned, but for non-widening multiply
3208      there is no difference between signed and unsigned.  */
3209   op0 = expand_binop (mode,
3210 		      ! unsignedp
3211 		      && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3212 		      ? smulv_optab : smul_optab,
3213 		      op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3214   gcc_assert (op0);
3215   return op0;
3216 }
3217 
3218 /* Perform a widening multiplication and return an rtx for the result.
3219    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3220    TARGET is a suggestion for where to store the result (an rtx).
3221    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3222    or smul_widen_optab.
3223 
3224    We check specially for a constant integer as OP1, comparing the
3225    cost of a widening multiply against the cost of a sequence of shifts
3226    and adds.  */
3227 
3228 rtx
3229 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3230 		      int unsignedp, optab this_optab)
3231 {
3232   bool speed = optimize_insn_for_speed_p ();
3233   rtx cop1;
3234 
3235   if (CONST_INT_P (op1)
3236       && GET_MODE (op0) != VOIDmode
3237       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3238 				this_optab == umul_widen_optab))
3239       && CONST_INT_P (cop1)
3240       && (INTVAL (cop1) >= 0
3241 	  || HWI_COMPUTABLE_MODE_P (mode)))
3242     {
3243       HOST_WIDE_INT coeff = INTVAL (cop1);
3244       int max_cost;
3245       enum mult_variant variant;
3246       struct algorithm algorithm;
3247 
3248       /* Special case powers of two.  */
3249       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3250 	{
3251 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3252 	  return expand_shift (LSHIFT_EXPR, mode, op0,
3253 			       floor_log2 (coeff), target, unsignedp);
3254 	}
3255 
3256       /* Exclude cost of op0 from max_cost to match the cost
3257 	 calculation of the synth_mult.  */
3258       max_cost = mul_widen_cost[speed][mode];
3259       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3260 			       max_cost))
3261 	{
3262 	  op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3263 	  return expand_mult_const (mode, op0, coeff, target,
3264 				    &algorithm, variant);
3265 	}
3266     }
3267   return expand_binop (mode, this_optab, op0, op1, target,
3268 		       unsignedp, OPTAB_LIB_WIDEN);
3269 }
3270 
3271 /* Return the smallest n such that 2**n >= X.  */
3272 
3273 int
3274 ceil_log2 (unsigned HOST_WIDE_INT x)
3275 {
3276   return floor_log2 (x - 1) + 1;
3277 }
3278 
3279 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3280    replace division by D, and put the least significant N bits of the result
3281    in *MULTIPLIER_PTR and return the most significant bit.
3282 
3283    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3284    needed precision is in PRECISION (should be <= N).
3285 
3286    PRECISION should be as small as possible so this function can choose
3287    multiplier more freely.
3288 
3289    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3290    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3291 
3292    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3293    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3294 
3295 static
3296 unsigned HOST_WIDE_INT
3297 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3298 		   rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3299 {
3300   HOST_WIDE_INT mhigh_hi, mlow_hi;
3301   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3302   int lgup, post_shift;
3303   int pow, pow2;
3304   unsigned HOST_WIDE_INT nl, dummy1;
3305   HOST_WIDE_INT nh, dummy2;
3306 
3307   /* lgup = ceil(log2(divisor)); */
3308   lgup = ceil_log2 (d);
3309 
3310   gcc_assert (lgup <= n);
3311 
3312   pow = n + lgup;
3313   pow2 = n + lgup - precision;
3314 
3315   /* We could handle this with some effort, but this case is much
3316      better handled directly with a scc insn, so rely on caller using
3317      that.  */
3318   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3319 
3320   /* mlow = 2^(N + lgup)/d */
3321  if (pow >= HOST_BITS_PER_WIDE_INT)
3322     {
3323       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3324       nl = 0;
3325     }
3326   else
3327     {
3328       nh = 0;
3329       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3330     }
3331   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3332 			&mlow_lo, &mlow_hi, &dummy1, &dummy2);
3333 
3334   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3335   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3336     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3337   else
3338     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3339   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3340 			&mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3341 
3342   gcc_assert (!mhigh_hi || nh - d < d);
3343   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3344   /* Assert that mlow < mhigh.  */
3345   gcc_assert (mlow_hi < mhigh_hi
3346 	      || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3347 
3348   /* If precision == N, then mlow, mhigh exceed 2^N
3349      (but they do not exceed 2^(N+1)).  */
3350 
3351   /* Reduce to lowest terms.  */
3352   for (post_shift = lgup; post_shift > 0; post_shift--)
3353     {
3354       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3355       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3356       if (ml_lo >= mh_lo)
3357 	break;
3358 
3359       mlow_hi = 0;
3360       mlow_lo = ml_lo;
3361       mhigh_hi = 0;
3362       mhigh_lo = mh_lo;
3363     }
3364 
3365   *post_shift_ptr = post_shift;
3366   *lgup_ptr = lgup;
3367   if (n < HOST_BITS_PER_WIDE_INT)
3368     {
3369       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3370       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3371       return mhigh_lo >= mask;
3372     }
3373   else
3374     {
3375       *multiplier_ptr = GEN_INT (mhigh_lo);
3376       return mhigh_hi;
3377     }
3378 }
3379 
3380 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3381    congruent to 1 (mod 2**N).  */
3382 
3383 static unsigned HOST_WIDE_INT
3384 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3385 {
3386   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3387 
3388   /* The algorithm notes that the choice y = x satisfies
3389      x*y == 1 mod 2^3, since x is assumed odd.
3390      Each iteration doubles the number of bits of significance in y.  */
3391 
3392   unsigned HOST_WIDE_INT mask;
3393   unsigned HOST_WIDE_INT y = x;
3394   int nbit = 3;
3395 
3396   mask = (n == HOST_BITS_PER_WIDE_INT
3397 	  ? ~(unsigned HOST_WIDE_INT) 0
3398 	  : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3399 
3400   while (nbit < n)
3401     {
3402       y = y * (2 - x*y) & mask;		/* Modulo 2^N */
3403       nbit *= 2;
3404     }
3405   return y;
3406 }
3407 
3408 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3409    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3410    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3411    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3412    become signed.
3413 
3414    The result is put in TARGET if that is convenient.
3415 
3416    MODE is the mode of operation.  */
3417 
3418 rtx
3419 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3420 			     rtx op1, rtx target, int unsignedp)
3421 {
3422   rtx tem;
3423   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3424 
3425   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3426 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3427   tem = expand_and (mode, tem, op1, NULL_RTX);
3428   adj_operand
3429     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3430 		     adj_operand);
3431 
3432   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3433 		      GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3434   tem = expand_and (mode, tem, op0, NULL_RTX);
3435   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3436 			  target);
3437 
3438   return target;
3439 }
3440 
3441 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3442 
3443 static rtx
3444 extract_high_half (enum machine_mode mode, rtx op)
3445 {
3446   enum machine_mode wider_mode;
3447 
3448   if (mode == word_mode)
3449     return gen_highpart (mode, op);
3450 
3451   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3452 
3453   wider_mode = GET_MODE_WIDER_MODE (mode);
3454   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3455 		     GET_MODE_BITSIZE (mode), 0, 1);
3456   return convert_modes (mode, wider_mode, op, 0);
3457 }
3458 
3459 /* Like expand_mult_highpart, but only consider using a multiplication
3460    optab.  OP1 is an rtx for the constant operand.  */
3461 
3462 static rtx
3463 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3464 			    rtx target, int unsignedp, int max_cost)
3465 {
3466   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3467   enum machine_mode wider_mode;
3468   optab moptab;
3469   rtx tem;
3470   int size;
3471   bool speed = optimize_insn_for_speed_p ();
3472 
3473   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3474 
3475   wider_mode = GET_MODE_WIDER_MODE (mode);
3476   size = GET_MODE_BITSIZE (mode);
3477 
3478   /* Firstly, try using a multiplication insn that only generates the needed
3479      high part of the product, and in the sign flavor of unsignedp.  */
3480   if (mul_highpart_cost[speed][mode] < max_cost)
3481     {
3482       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3483       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3484 			  unsignedp, OPTAB_DIRECT);
3485       if (tem)
3486 	return tem;
3487     }
3488 
3489   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3490      Need to adjust the result after the multiplication.  */
3491   if (size - 1 < BITS_PER_WORD
3492       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3493 	  + 4 * add_cost[speed][mode] < max_cost))
3494     {
3495       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3496       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3497 			  unsignedp, OPTAB_DIRECT);
3498       if (tem)
3499 	/* We used the wrong signedness.  Adjust the result.  */
3500 	return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3501 					    tem, unsignedp);
3502     }
3503 
3504   /* Try widening multiplication.  */
3505   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3506   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3507       && mul_widen_cost[speed][wider_mode] < max_cost)
3508     {
3509       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3510 			  unsignedp, OPTAB_WIDEN);
3511       if (tem)
3512 	return extract_high_half (mode, tem);
3513     }
3514 
3515   /* Try widening the mode and perform a non-widening multiplication.  */
3516   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3517       && size - 1 < BITS_PER_WORD
3518       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3519     {
3520       rtx insns, wop0, wop1;
3521 
3522       /* We need to widen the operands, for example to ensure the
3523 	 constant multiplier is correctly sign or zero extended.
3524 	 Use a sequence to clean-up any instructions emitted by
3525 	 the conversions if things don't work out.  */
3526       start_sequence ();
3527       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3528       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3529       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3530 			  unsignedp, OPTAB_WIDEN);
3531       insns = get_insns ();
3532       end_sequence ();
3533 
3534       if (tem)
3535 	{
3536 	  emit_insn (insns);
3537 	  return extract_high_half (mode, tem);
3538 	}
3539     }
3540 
3541   /* Try widening multiplication of opposite signedness, and adjust.  */
3542   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3543   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3544       && size - 1 < BITS_PER_WORD
3545       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3546 	  + 4 * add_cost[speed][mode] < max_cost))
3547     {
3548       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3549 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3550       if (tem != 0)
3551 	{
3552 	  tem = extract_high_half (mode, tem);
3553 	  /* We used the wrong signedness.  Adjust the result.  */
3554 	  return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3555 					      target, unsignedp);
3556 	}
3557     }
3558 
3559   return 0;
3560 }
3561 
3562 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3563    putting the high half of the result in TARGET if that is convenient,
3564    and return where the result is.  If the operation can not be performed,
3565    0 is returned.
3566 
3567    MODE is the mode of operation and result.
3568 
3569    UNSIGNEDP nonzero means unsigned multiply.
3570 
3571    MAX_COST is the total allowed cost for the expanded RTL.  */
3572 
3573 static rtx
3574 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3575 		      rtx target, int unsignedp, int max_cost)
3576 {
3577   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3578   unsigned HOST_WIDE_INT cnst1;
3579   int extra_cost;
3580   bool sign_adjust = false;
3581   enum mult_variant variant;
3582   struct algorithm alg;
3583   rtx tem;
3584   bool speed = optimize_insn_for_speed_p ();
3585 
3586   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3587   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3588   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3589 
3590   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3591 
3592   /* We can't optimize modes wider than BITS_PER_WORD.
3593      ??? We might be able to perform double-word arithmetic if
3594      mode == word_mode, however all the cost calculations in
3595      synth_mult etc. assume single-word operations.  */
3596   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3597     return expand_mult_highpart_optab (mode, op0, op1, target,
3598 				       unsignedp, max_cost);
3599 
3600   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3601 
3602   /* Check whether we try to multiply by a negative constant.  */
3603   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3604     {
3605       sign_adjust = true;
3606       extra_cost += add_cost[speed][mode];
3607     }
3608 
3609   /* See whether shift/add multiplication is cheap enough.  */
3610   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3611 			   max_cost - extra_cost))
3612     {
3613       /* See whether the specialized multiplication optabs are
3614 	 cheaper than the shift/add version.  */
3615       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3616 					alg.cost.cost + extra_cost);
3617       if (tem)
3618 	return tem;
3619 
3620       tem = convert_to_mode (wider_mode, op0, unsignedp);
3621       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3622       tem = extract_high_half (mode, tem);
3623 
3624       /* Adjust result for signedness.  */
3625       if (sign_adjust)
3626 	tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3627 
3628       return tem;
3629     }
3630   return expand_mult_highpart_optab (mode, op0, op1, target,
3631 				     unsignedp, max_cost);
3632 }
3633 
3634 
3635 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3636 
3637 static rtx
3638 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3639 {
3640   unsigned HOST_WIDE_INT masklow, maskhigh;
3641   rtx result, temp, shift, label;
3642   int logd;
3643 
3644   logd = floor_log2 (d);
3645   result = gen_reg_rtx (mode);
3646 
3647   /* Avoid conditional branches when they're expensive.  */
3648   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3649       && optimize_insn_for_speed_p ())
3650     {
3651       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3652 				      mode, 0, -1);
3653       if (signmask)
3654 	{
3655 	  signmask = force_reg (mode, signmask);
3656 	  masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3657 	  shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3658 
3659 	  /* Use the rtx_cost of a LSHIFTRT instruction to determine
3660 	     which instruction sequence to use.  If logical right shifts
3661 	     are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3662 	     use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3663 
3664 	  temp = gen_rtx_LSHIFTRT (mode, result, shift);
3665 	  if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3666 	      || (set_src_cost (temp, optimize_insn_for_speed_p ())
3667 		  > COSTS_N_INSNS (2)))
3668 	    {
3669 	      temp = expand_binop (mode, xor_optab, op0, signmask,
3670 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3671 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3672 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3673 	      temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3674 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3675 	      temp = expand_binop (mode, xor_optab, temp, signmask,
3676 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3677 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3678 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3679 	    }
3680 	  else
3681 	    {
3682 	      signmask = expand_binop (mode, lshr_optab, signmask, shift,
3683 				       NULL_RTX, 1, OPTAB_LIB_WIDEN);
3684 	      signmask = force_reg (mode, signmask);
3685 
3686 	      temp = expand_binop (mode, add_optab, op0, signmask,
3687 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3688 	      temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3689 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3690 	      temp = expand_binop (mode, sub_optab, temp, signmask,
3691 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
3692 	    }
3693 	  return temp;
3694 	}
3695     }
3696 
3697   /* Mask contains the mode's signbit and the significant bits of the
3698      modulus.  By including the signbit in the operation, many targets
3699      can avoid an explicit compare operation in the following comparison
3700      against zero.  */
3701 
3702   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3703   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3704     {
3705       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3706       maskhigh = -1;
3707     }
3708   else
3709     maskhigh = (HOST_WIDE_INT) -1
3710 		 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3711 
3712   temp = expand_binop (mode, and_optab, op0,
3713 		       immed_double_const (masklow, maskhigh, mode),
3714 		       result, 1, OPTAB_LIB_WIDEN);
3715   if (temp != result)
3716     emit_move_insn (result, temp);
3717 
3718   label = gen_label_rtx ();
3719   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3720 
3721   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3722 		       0, OPTAB_LIB_WIDEN);
3723   masklow = (HOST_WIDE_INT) -1 << logd;
3724   maskhigh = -1;
3725   temp = expand_binop (mode, ior_optab, temp,
3726 		       immed_double_const (masklow, maskhigh, mode),
3727 		       result, 1, OPTAB_LIB_WIDEN);
3728   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3729 		       0, OPTAB_LIB_WIDEN);
3730   if (temp != result)
3731     emit_move_insn (result, temp);
3732   emit_label (label);
3733   return result;
3734 }
3735 
3736 /* Expand signed division of OP0 by a power of two D in mode MODE.
3737    This routine is only called for positive values of D.  */
3738 
3739 static rtx
3740 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3741 {
3742   rtx temp, label;
3743   int logd;
3744 
3745   logd = floor_log2 (d);
3746 
3747   if (d == 2
3748       && BRANCH_COST (optimize_insn_for_speed_p (),
3749 		      false) >= 1)
3750     {
3751       temp = gen_reg_rtx (mode);
3752       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3753       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3754 			   0, OPTAB_LIB_WIDEN);
3755       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3756     }
3757 
3758 #ifdef HAVE_conditional_move
3759   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3760       >= 2)
3761     {
3762       rtx temp2;
3763 
3764       /* ??? emit_conditional_move forces a stack adjustment via
3765 	 compare_from_rtx so, if the sequence is discarded, it will
3766 	 be lost.  Do it now instead.  */
3767       do_pending_stack_adjust ();
3768 
3769       start_sequence ();
3770       temp2 = copy_to_mode_reg (mode, op0);
3771       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3772 			   NULL_RTX, 0, OPTAB_LIB_WIDEN);
3773       temp = force_reg (mode, temp);
3774 
3775       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3776       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3777 				     mode, temp, temp2, mode, 0);
3778       if (temp2)
3779 	{
3780 	  rtx seq = get_insns ();
3781 	  end_sequence ();
3782 	  emit_insn (seq);
3783 	  return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3784 	}
3785       end_sequence ();
3786     }
3787 #endif
3788 
3789   if (BRANCH_COST (optimize_insn_for_speed_p (),
3790 		   false) >= 2)
3791     {
3792       int ushift = GET_MODE_BITSIZE (mode) - logd;
3793 
3794       temp = gen_reg_rtx (mode);
3795       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3796       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3797 	temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3798 			     NULL_RTX, 0, OPTAB_LIB_WIDEN);
3799       else
3800 	temp = expand_shift (RSHIFT_EXPR, mode, temp,
3801 			     ushift, NULL_RTX, 1);
3802       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3803 			   0, OPTAB_LIB_WIDEN);
3804       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3805     }
3806 
3807   label = gen_label_rtx ();
3808   temp = copy_to_mode_reg (mode, op0);
3809   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3810   expand_inc (temp, GEN_INT (d - 1));
3811   emit_label (label);
3812   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3813 }
3814 
3815 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3816    if that is convenient, and returning where the result is.
3817    You may request either the quotient or the remainder as the result;
3818    specify REM_FLAG nonzero to get the remainder.
3819 
3820    CODE is the expression code for which kind of division this is;
3821    it controls how rounding is done.  MODE is the machine mode to use.
3822    UNSIGNEDP nonzero means do unsigned division.  */
3823 
3824 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3825    and then correct it by or'ing in missing high bits
3826    if result of ANDI is nonzero.
3827    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3828    This could optimize to a bfexts instruction.
3829    But C doesn't use these operations, so their optimizations are
3830    left for later.  */
3831 /* ??? For modulo, we don't actually need the highpart of the first product,
3832    the low part will do nicely.  And for small divisors, the second multiply
3833    can also be a low-part only multiply or even be completely left out.
3834    E.g. to calculate the remainder of a division by 3 with a 32 bit
3835    multiply, multiply with 0x55555556 and extract the upper two bits;
3836    the result is exact for inputs up to 0x1fffffff.
3837    The input range can be reduced by using cross-sum rules.
3838    For odd divisors >= 3, the following table gives right shift counts
3839    so that if a number is shifted by an integer multiple of the given
3840    amount, the remainder stays the same:
3841    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3842    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3843    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3844    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3845    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3846 
3847    Cross-sum rules for even numbers can be derived by leaving as many bits
3848    to the right alone as the divisor has zeros to the right.
3849    E.g. if x is an unsigned 32 bit number:
3850    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3851    */
3852 
3853 rtx
3854 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3855 	       rtx op0, rtx op1, rtx target, int unsignedp)
3856 {
3857   enum machine_mode compute_mode;
3858   rtx tquotient;
3859   rtx quotient = 0, remainder = 0;
3860   rtx last;
3861   int size;
3862   rtx insn;
3863   optab optab1, optab2;
3864   int op1_is_constant, op1_is_pow2 = 0;
3865   int max_cost, extra_cost;
3866   static HOST_WIDE_INT last_div_const = 0;
3867   static HOST_WIDE_INT ext_op1;
3868   bool speed = optimize_insn_for_speed_p ();
3869 
3870   op1_is_constant = CONST_INT_P (op1);
3871   if (op1_is_constant)
3872     {
3873       ext_op1 = INTVAL (op1);
3874       if (unsignedp)
3875 	ext_op1 &= GET_MODE_MASK (mode);
3876       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3877 		     || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3878     }
3879 
3880   /*
3881      This is the structure of expand_divmod:
3882 
3883      First comes code to fix up the operands so we can perform the operations
3884      correctly and efficiently.
3885 
3886      Second comes a switch statement with code specific for each rounding mode.
3887      For some special operands this code emits all RTL for the desired
3888      operation, for other cases, it generates only a quotient and stores it in
3889      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3890      to indicate that it has not done anything.
3891 
3892      Last comes code that finishes the operation.  If QUOTIENT is set and
3893      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3894      QUOTIENT is not set, it is computed using trunc rounding.
3895 
3896      We try to generate special code for division and remainder when OP1 is a
3897      constant.  If |OP1| = 2**n we can use shifts and some other fast
3898      operations.  For other values of OP1, we compute a carefully selected
3899      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3900      by m.
3901 
3902      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3903      half of the product.  Different strategies for generating the product are
3904      implemented in expand_mult_highpart.
3905 
3906      If what we actually want is the remainder, we generate that by another
3907      by-constant multiplication and a subtraction.  */
3908 
3909   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3910      code below will malfunction if we are, so check here and handle
3911      the special case if so.  */
3912   if (op1 == const1_rtx)
3913     return rem_flag ? const0_rtx : op0;
3914 
3915     /* When dividing by -1, we could get an overflow.
3916      negv_optab can handle overflows.  */
3917   if (! unsignedp && op1 == constm1_rtx)
3918     {
3919       if (rem_flag)
3920 	return const0_rtx;
3921       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3922 			  ? negv_optab : neg_optab, op0, target, 0);
3923     }
3924 
3925   if (target
3926       /* Don't use the function value register as a target
3927 	 since we have to read it as well as write it,
3928 	 and function-inlining gets confused by this.  */
3929       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3930 	  /* Don't clobber an operand while doing a multi-step calculation.  */
3931 	  || ((rem_flag || op1_is_constant)
3932 	      && (reg_mentioned_p (target, op0)
3933 		  || (MEM_P (op0) && MEM_P (target))))
3934 	  || reg_mentioned_p (target, op1)
3935 	  || (MEM_P (op1) && MEM_P (target))))
3936     target = 0;
3937 
3938   /* Get the mode in which to perform this computation.  Normally it will
3939      be MODE, but sometimes we can't do the desired operation in MODE.
3940      If so, pick a wider mode in which we can do the operation.  Convert
3941      to that mode at the start to avoid repeated conversions.
3942 
3943      First see what operations we need.  These depend on the expression
3944      we are evaluating.  (We assume that divxx3 insns exist under the
3945      same conditions that modxx3 insns and that these insns don't normally
3946      fail.  If these assumptions are not correct, we may generate less
3947      efficient code in some cases.)
3948 
3949      Then see if we find a mode in which we can open-code that operation
3950      (either a division, modulus, or shift).  Finally, check for the smallest
3951      mode for which we can do the operation with a library call.  */
3952 
3953   /* We might want to refine this now that we have division-by-constant
3954      optimization.  Since expand_mult_highpart tries so many variants, it is
3955      not straightforward to generalize this.  Maybe we should make an array
3956      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3957 
3958   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3959 	    ? (unsignedp ? lshr_optab : ashr_optab)
3960 	    : (unsignedp ? udiv_optab : sdiv_optab));
3961   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3962 	    ? optab1
3963 	    : (unsignedp ? udivmod_optab : sdivmod_optab));
3964 
3965   for (compute_mode = mode; compute_mode != VOIDmode;
3966        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3967     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3968 	|| optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3969       break;
3970 
3971   if (compute_mode == VOIDmode)
3972     for (compute_mode = mode; compute_mode != VOIDmode;
3973 	 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3974       if (optab_libfunc (optab1, compute_mode)
3975 	  || optab_libfunc (optab2, compute_mode))
3976 	break;
3977 
3978   /* If we still couldn't find a mode, use MODE, but expand_binop will
3979      probably die.  */
3980   if (compute_mode == VOIDmode)
3981     compute_mode = mode;
3982 
3983   if (target && GET_MODE (target) == compute_mode)
3984     tquotient = target;
3985   else
3986     tquotient = gen_reg_rtx (compute_mode);
3987 
3988   size = GET_MODE_BITSIZE (compute_mode);
3989 #if 0
3990   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3991      (mode), and thereby get better code when OP1 is a constant.  Do that
3992      later.  It will require going over all usages of SIZE below.  */
3993   size = GET_MODE_BITSIZE (mode);
3994 #endif
3995 
3996   /* Only deduct something for a REM if the last divide done was
3997      for a different constant.   Then set the constant of the last
3998      divide.  */
3999   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
4000   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4001 		     && INTVAL (op1) == last_div_const))
4002     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
4003 
4004   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4005 
4006   /* Now convert to the best mode to use.  */
4007   if (compute_mode != mode)
4008     {
4009       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4010       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4011 
4012       /* convert_modes may have placed op1 into a register, so we
4013 	 must recompute the following.  */
4014       op1_is_constant = CONST_INT_P (op1);
4015       op1_is_pow2 = (op1_is_constant
4016 		     && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4017 			  || (! unsignedp
4018 			      && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4019     }
4020 
4021   /* If one of the operands is a volatile MEM, copy it into a register.  */
4022 
4023   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4024     op0 = force_reg (compute_mode, op0);
4025   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4026     op1 = force_reg (compute_mode, op1);
4027 
4028   /* If we need the remainder or if OP1 is constant, we need to
4029      put OP0 in a register in case it has any queued subexpressions.  */
4030   if (rem_flag || op1_is_constant)
4031     op0 = force_reg (compute_mode, op0);
4032 
4033   last = get_last_insn ();
4034 
4035   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4036   if (unsignedp)
4037     {
4038       if (code == FLOOR_DIV_EXPR)
4039 	code = TRUNC_DIV_EXPR;
4040       if (code == FLOOR_MOD_EXPR)
4041 	code = TRUNC_MOD_EXPR;
4042       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4043 	code = TRUNC_DIV_EXPR;
4044     }
4045 
4046   if (op1 != const0_rtx)
4047     switch (code)
4048       {
4049       case TRUNC_MOD_EXPR:
4050       case TRUNC_DIV_EXPR:
4051 	if (op1_is_constant)
4052 	  {
4053 	    if (unsignedp)
4054 	      {
4055 		unsigned HOST_WIDE_INT mh;
4056 		int pre_shift, post_shift;
4057 		int dummy;
4058 		rtx ml;
4059 		unsigned HOST_WIDE_INT d = (INTVAL (op1)
4060 					    & GET_MODE_MASK (compute_mode));
4061 
4062 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4063 		  {
4064 		    pre_shift = floor_log2 (d);
4065 		    if (rem_flag)
4066 		      {
4067 			remainder
4068 			  = expand_binop (compute_mode, and_optab, op0,
4069 					  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4070 					  remainder, 1,
4071 					  OPTAB_LIB_WIDEN);
4072 			if (remainder)
4073 			  return gen_lowpart (mode, remainder);
4074 		      }
4075 		    quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4076 					     pre_shift, tquotient, 1);
4077 		  }
4078 		else if (size <= HOST_BITS_PER_WIDE_INT)
4079 		  {
4080 		    if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4081 		      {
4082 			/* Most significant bit of divisor is set; emit an scc
4083 			   insn.  */
4084 			quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4085 							  compute_mode, 1, 1);
4086 		      }
4087 		    else
4088 		      {
4089 			/* Find a suitable multiplier and right shift count
4090 			   instead of multiplying with D.  */
4091 
4092 			mh = choose_multiplier (d, size, size,
4093 						&ml, &post_shift, &dummy);
4094 
4095 			/* If the suggested multiplier is more than SIZE bits,
4096 			   we can do better for even divisors, using an
4097 			   initial right shift.  */
4098 			if (mh != 0 && (d & 1) == 0)
4099 			  {
4100 			    pre_shift = floor_log2 (d & -d);
4101 			    mh = choose_multiplier (d >> pre_shift, size,
4102 						    size - pre_shift,
4103 						    &ml, &post_shift, &dummy);
4104 			    gcc_assert (!mh);
4105 			  }
4106 			else
4107 			  pre_shift = 0;
4108 
4109 			if (mh != 0)
4110 			  {
4111 			    rtx t1, t2, t3, t4;
4112 
4113 			    if (post_shift - 1 >= BITS_PER_WORD)
4114 			      goto fail1;
4115 
4116 			    extra_cost
4117 			      = (shift_cost[speed][compute_mode][post_shift - 1]
4118 				 + shift_cost[speed][compute_mode][1]
4119 				 + 2 * add_cost[speed][compute_mode]);
4120 			    t1 = expand_mult_highpart (compute_mode, op0, ml,
4121 						       NULL_RTX, 1,
4122 						       max_cost - extra_cost);
4123 			    if (t1 == 0)
4124 			      goto fail1;
4125 			    t2 = force_operand (gen_rtx_MINUS (compute_mode,
4126 							       op0, t1),
4127 						NULL_RTX);
4128 			    t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4129 					       t2, 1, NULL_RTX, 1);
4130 			    t4 = force_operand (gen_rtx_PLUS (compute_mode,
4131 							      t1, t3),
4132 						NULL_RTX);
4133 			    quotient = expand_shift
4134 			      (RSHIFT_EXPR, compute_mode, t4,
4135 			       post_shift - 1, tquotient, 1);
4136 			  }
4137 			else
4138 			  {
4139 			    rtx t1, t2;
4140 
4141 			    if (pre_shift >= BITS_PER_WORD
4142 				|| post_shift >= BITS_PER_WORD)
4143 			      goto fail1;
4144 
4145 			    t1 = expand_shift
4146 			      (RSHIFT_EXPR, compute_mode, op0,
4147 			       pre_shift, NULL_RTX, 1);
4148 			    extra_cost
4149 			      = (shift_cost[speed][compute_mode][pre_shift]
4150 				 + shift_cost[speed][compute_mode][post_shift]);
4151 			    t2 = expand_mult_highpart (compute_mode, t1, ml,
4152 						       NULL_RTX, 1,
4153 						       max_cost - extra_cost);
4154 			    if (t2 == 0)
4155 			      goto fail1;
4156 			    quotient = expand_shift
4157 			      (RSHIFT_EXPR, compute_mode, t2,
4158 			       post_shift, tquotient, 1);
4159 			  }
4160 		      }
4161 		  }
4162 		else		/* Too wide mode to use tricky code */
4163 		  break;
4164 
4165 		insn = get_last_insn ();
4166 		if (insn != last)
4167 		  set_dst_reg_note (insn, REG_EQUAL,
4168 				    gen_rtx_UDIV (compute_mode, op0, op1),
4169 				    quotient);
4170 	      }
4171 	    else		/* TRUNC_DIV, signed */
4172 	      {
4173 		unsigned HOST_WIDE_INT ml;
4174 		int lgup, post_shift;
4175 		rtx mlr;
4176 		HOST_WIDE_INT d = INTVAL (op1);
4177 		unsigned HOST_WIDE_INT abs_d;
4178 
4179 		/* Since d might be INT_MIN, we have to cast to
4180 		   unsigned HOST_WIDE_INT before negating to avoid
4181 		   undefined signed overflow.  */
4182 		abs_d = (d >= 0
4183 			 ? (unsigned HOST_WIDE_INT) d
4184 			 : - (unsigned HOST_WIDE_INT) d);
4185 
4186 		/* n rem d = n rem -d */
4187 		if (rem_flag && d < 0)
4188 		  {
4189 		    d = abs_d;
4190 		    op1 = gen_int_mode (abs_d, compute_mode);
4191 		  }
4192 
4193 		if (d == 1)
4194 		  quotient = op0;
4195 		else if (d == -1)
4196 		  quotient = expand_unop (compute_mode, neg_optab, op0,
4197 					  tquotient, 0);
4198 		else if (HOST_BITS_PER_WIDE_INT >= size
4199 			 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4200 		  {
4201 		    /* This case is not handled correctly below.  */
4202 		    quotient = emit_store_flag (tquotient, EQ, op0, op1,
4203 						compute_mode, 1, 1);
4204 		    if (quotient == 0)
4205 		      goto fail1;
4206 		  }
4207 		else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4208 			 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4209 				      : sdiv_pow2_cheap[speed][compute_mode])
4210 			 /* We assume that cheap metric is true if the
4211 			    optab has an expander for this mode.  */
4212 			 && ((optab_handler ((rem_flag ? smod_optab
4213 					      : sdiv_optab),
4214 					     compute_mode)
4215 			      != CODE_FOR_nothing)
4216 			     || (optab_handler (sdivmod_optab,
4217 						compute_mode)
4218 				 != CODE_FOR_nothing)))
4219 		  ;
4220 		else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4221 		  {
4222 		    if (rem_flag)
4223 		      {
4224 			remainder = expand_smod_pow2 (compute_mode, op0, d);
4225 			if (remainder)
4226 			  return gen_lowpart (mode, remainder);
4227 		      }
4228 
4229 		    if (sdiv_pow2_cheap[speed][compute_mode]
4230 			&& ((optab_handler (sdiv_optab, compute_mode)
4231 			     != CODE_FOR_nothing)
4232 			    || (optab_handler (sdivmod_optab, compute_mode)
4233 				!= CODE_FOR_nothing)))
4234 		      quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4235 						compute_mode, op0,
4236 						gen_int_mode (abs_d,
4237 							      compute_mode),
4238 						NULL_RTX, 0);
4239 		    else
4240 		      quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4241 
4242 		    /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4243 		       negate the quotient.  */
4244 		    if (d < 0)
4245 		      {
4246 			insn = get_last_insn ();
4247 			if (insn != last
4248 			    && abs_d < ((unsigned HOST_WIDE_INT) 1
4249 					<< (HOST_BITS_PER_WIDE_INT - 1)))
4250 			  set_dst_reg_note (insn, REG_EQUAL,
4251 					    gen_rtx_DIV (compute_mode, op0,
4252 							 gen_int_mode
4253 							   (abs_d,
4254 							    compute_mode)),
4255 					    quotient);
4256 
4257 			quotient = expand_unop (compute_mode, neg_optab,
4258 						quotient, quotient, 0);
4259 		      }
4260 		  }
4261 		else if (size <= HOST_BITS_PER_WIDE_INT)
4262 		  {
4263 		    choose_multiplier (abs_d, size, size - 1,
4264 				       &mlr, &post_shift, &lgup);
4265 		    ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4266 		    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4267 		      {
4268 			rtx t1, t2, t3;
4269 
4270 			if (post_shift >= BITS_PER_WORD
4271 			    || size - 1 >= BITS_PER_WORD)
4272 			  goto fail1;
4273 
4274 			extra_cost = (shift_cost[speed][compute_mode][post_shift]
4275 				      + shift_cost[speed][compute_mode][size - 1]
4276 				      + add_cost[speed][compute_mode]);
4277 			t1 = expand_mult_highpart (compute_mode, op0, mlr,
4278 						   NULL_RTX, 0,
4279 						   max_cost - extra_cost);
4280 			if (t1 == 0)
4281 			  goto fail1;
4282 			t2 = expand_shift
4283 			  (RSHIFT_EXPR, compute_mode, t1,
4284 			   post_shift, NULL_RTX, 0);
4285 			t3 = expand_shift
4286 			  (RSHIFT_EXPR, compute_mode, op0,
4287 			   size - 1, NULL_RTX, 0);
4288 			if (d < 0)
4289 			  quotient
4290 			    = force_operand (gen_rtx_MINUS (compute_mode,
4291 							    t3, t2),
4292 					     tquotient);
4293 			else
4294 			  quotient
4295 			    = force_operand (gen_rtx_MINUS (compute_mode,
4296 							    t2, t3),
4297 					     tquotient);
4298 		      }
4299 		    else
4300 		      {
4301 			rtx t1, t2, t3, t4;
4302 
4303 			if (post_shift >= BITS_PER_WORD
4304 			    || size - 1 >= BITS_PER_WORD)
4305 			  goto fail1;
4306 
4307 			ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4308 			mlr = gen_int_mode (ml, compute_mode);
4309 			extra_cost = (shift_cost[speed][compute_mode][post_shift]
4310 				      + shift_cost[speed][compute_mode][size - 1]
4311 				      + 2 * add_cost[speed][compute_mode]);
4312 			t1 = expand_mult_highpart (compute_mode, op0, mlr,
4313 						   NULL_RTX, 0,
4314 						   max_cost - extra_cost);
4315 			if (t1 == 0)
4316 			  goto fail1;
4317 			t2 = force_operand (gen_rtx_PLUS (compute_mode,
4318 							  t1, op0),
4319 					    NULL_RTX);
4320 			t3 = expand_shift
4321 			  (RSHIFT_EXPR, compute_mode, t2,
4322 			   post_shift, NULL_RTX, 0);
4323 			t4 = expand_shift
4324 			  (RSHIFT_EXPR, compute_mode, op0,
4325 			   size - 1, NULL_RTX, 0);
4326 			if (d < 0)
4327 			  quotient
4328 			    = force_operand (gen_rtx_MINUS (compute_mode,
4329 							    t4, t3),
4330 					     tquotient);
4331 			else
4332 			  quotient
4333 			    = force_operand (gen_rtx_MINUS (compute_mode,
4334 							    t3, t4),
4335 					     tquotient);
4336 		      }
4337 		  }
4338 		else		/* Too wide mode to use tricky code */
4339 		  break;
4340 
4341 		insn = get_last_insn ();
4342 		if (insn != last)
4343 		  set_dst_reg_note (insn, REG_EQUAL,
4344 				    gen_rtx_DIV (compute_mode, op0, op1),
4345 				    quotient);
4346 	      }
4347 	    break;
4348 	  }
4349       fail1:
4350 	delete_insns_since (last);
4351 	break;
4352 
4353       case FLOOR_DIV_EXPR:
4354       case FLOOR_MOD_EXPR:
4355       /* We will come here only for signed operations.  */
4356 	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4357 	  {
4358 	    unsigned HOST_WIDE_INT mh;
4359 	    int pre_shift, lgup, post_shift;
4360 	    HOST_WIDE_INT d = INTVAL (op1);
4361 	    rtx ml;
4362 
4363 	    if (d > 0)
4364 	      {
4365 		/* We could just as easily deal with negative constants here,
4366 		   but it does not seem worth the trouble for GCC 2.6.  */
4367 		if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4368 		  {
4369 		    pre_shift = floor_log2 (d);
4370 		    if (rem_flag)
4371 		      {
4372 			remainder = expand_binop (compute_mode, and_optab, op0,
4373 						  GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4374 						  remainder, 0, OPTAB_LIB_WIDEN);
4375 			if (remainder)
4376 			  return gen_lowpart (mode, remainder);
4377 		      }
4378 		    quotient = expand_shift
4379 		      (RSHIFT_EXPR, compute_mode, op0,
4380 		       pre_shift, tquotient, 0);
4381 		  }
4382 		else
4383 		  {
4384 		    rtx t1, t2, t3, t4;
4385 
4386 		    mh = choose_multiplier (d, size, size - 1,
4387 					    &ml, &post_shift, &lgup);
4388 		    gcc_assert (!mh);
4389 
4390 		    if (post_shift < BITS_PER_WORD
4391 			&& size - 1 < BITS_PER_WORD)
4392 		      {
4393 			t1 = expand_shift
4394 			  (RSHIFT_EXPR, compute_mode, op0,
4395 			   size - 1, NULL_RTX, 0);
4396 			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4397 					   NULL_RTX, 0, OPTAB_WIDEN);
4398 			extra_cost = (shift_cost[speed][compute_mode][post_shift]
4399 				      + shift_cost[speed][compute_mode][size - 1]
4400 				      + 2 * add_cost[speed][compute_mode]);
4401 			t3 = expand_mult_highpart (compute_mode, t2, ml,
4402 						   NULL_RTX, 1,
4403 						   max_cost - extra_cost);
4404 			if (t3 != 0)
4405 			  {
4406 			    t4 = expand_shift
4407 			      (RSHIFT_EXPR, compute_mode, t3,
4408 			       post_shift, NULL_RTX, 1);
4409 			    quotient = expand_binop (compute_mode, xor_optab,
4410 						     t4, t1, tquotient, 0,
4411 						     OPTAB_WIDEN);
4412 			  }
4413 		      }
4414 		  }
4415 	      }
4416 	    else
4417 	      {
4418 		rtx nsign, t1, t2, t3, t4;
4419 		t1 = force_operand (gen_rtx_PLUS (compute_mode,
4420 						  op0, constm1_rtx), NULL_RTX);
4421 		t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4422 				   0, OPTAB_WIDEN);
4423 		nsign = expand_shift
4424 		  (RSHIFT_EXPR, compute_mode, t2,
4425 		   size - 1, NULL_RTX, 0);
4426 		t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4427 				    NULL_RTX);
4428 		t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4429 				    NULL_RTX, 0);
4430 		if (t4)
4431 		  {
4432 		    rtx t5;
4433 		    t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4434 				      NULL_RTX, 0);
4435 		    quotient = force_operand (gen_rtx_PLUS (compute_mode,
4436 							    t4, t5),
4437 					      tquotient);
4438 		  }
4439 	      }
4440 	  }
4441 
4442 	if (quotient != 0)
4443 	  break;
4444 	delete_insns_since (last);
4445 
4446 	/* Try using an instruction that produces both the quotient and
4447 	   remainder, using truncation.  We can easily compensate the quotient
4448 	   or remainder to get floor rounding, once we have the remainder.
4449 	   Notice that we compute also the final remainder value here,
4450 	   and return the result right away.  */
4451 	if (target == 0 || GET_MODE (target) != compute_mode)
4452 	  target = gen_reg_rtx (compute_mode);
4453 
4454 	if (rem_flag)
4455 	  {
4456 	    remainder
4457 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4458 	    quotient = gen_reg_rtx (compute_mode);
4459 	  }
4460 	else
4461 	  {
4462 	    quotient
4463 	      = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4464 	    remainder = gen_reg_rtx (compute_mode);
4465 	  }
4466 
4467 	if (expand_twoval_binop (sdivmod_optab, op0, op1,
4468 				 quotient, remainder, 0))
4469 	  {
4470 	    /* This could be computed with a branch-less sequence.
4471 	       Save that for later.  */
4472 	    rtx tem;
4473 	    rtx label = gen_label_rtx ();
4474 	    do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4475 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4476 				NULL_RTX, 0, OPTAB_WIDEN);
4477 	    do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4478 	    expand_dec (quotient, const1_rtx);
4479 	    expand_inc (remainder, op1);
4480 	    emit_label (label);
4481 	    return gen_lowpart (mode, rem_flag ? remainder : quotient);
4482 	  }
4483 
4484 	/* No luck with division elimination or divmod.  Have to do it
4485 	   by conditionally adjusting op0 *and* the result.  */
4486 	{
4487 	  rtx label1, label2, label3, label4, label5;
4488 	  rtx adjusted_op0;
4489 	  rtx tem;
4490 
4491 	  quotient = gen_reg_rtx (compute_mode);
4492 	  adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4493 	  label1 = gen_label_rtx ();
4494 	  label2 = gen_label_rtx ();
4495 	  label3 = gen_label_rtx ();
4496 	  label4 = gen_label_rtx ();
4497 	  label5 = gen_label_rtx ();
4498 	  do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4499 	  do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4500 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4501 			      quotient, 0, OPTAB_LIB_WIDEN);
4502 	  if (tem != quotient)
4503 	    emit_move_insn (quotient, tem);
4504 	  emit_jump_insn (gen_jump (label5));
4505 	  emit_barrier ();
4506 	  emit_label (label1);
4507 	  expand_inc (adjusted_op0, const1_rtx);
4508 	  emit_jump_insn (gen_jump (label4));
4509 	  emit_barrier ();
4510 	  emit_label (label2);
4511 	  do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4512 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4513 			      quotient, 0, OPTAB_LIB_WIDEN);
4514 	  if (tem != quotient)
4515 	    emit_move_insn (quotient, tem);
4516 	  emit_jump_insn (gen_jump (label5));
4517 	  emit_barrier ();
4518 	  emit_label (label3);
4519 	  expand_dec (adjusted_op0, const1_rtx);
4520 	  emit_label (label4);
4521 	  tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4522 			      quotient, 0, OPTAB_LIB_WIDEN);
4523 	  if (tem != quotient)
4524 	    emit_move_insn (quotient, tem);
4525 	  expand_dec (quotient, const1_rtx);
4526 	  emit_label (label5);
4527 	}
4528 	break;
4529 
4530       case CEIL_DIV_EXPR:
4531       case CEIL_MOD_EXPR:
4532 	if (unsignedp)
4533 	  {
4534 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4535 	      {
4536 		rtx t1, t2, t3;
4537 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4538 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4539 				   floor_log2 (d), tquotient, 1);
4540 		t2 = expand_binop (compute_mode, and_optab, op0,
4541 				   GEN_INT (d - 1),
4542 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4543 		t3 = gen_reg_rtx (compute_mode);
4544 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4545 				      compute_mode, 1, 1);
4546 		if (t3 == 0)
4547 		  {
4548 		    rtx lab;
4549 		    lab = gen_label_rtx ();
4550 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4551 		    expand_inc (t1, const1_rtx);
4552 		    emit_label (lab);
4553 		    quotient = t1;
4554 		  }
4555 		else
4556 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4557 							  t1, t3),
4558 					    tquotient);
4559 		break;
4560 	      }
4561 
4562 	    /* Try using an instruction that produces both the quotient and
4563 	       remainder, using truncation.  We can easily compensate the
4564 	       quotient or remainder to get ceiling rounding, once we have the
4565 	       remainder.  Notice that we compute also the final remainder
4566 	       value here, and return the result right away.  */
4567 	    if (target == 0 || GET_MODE (target) != compute_mode)
4568 	      target = gen_reg_rtx (compute_mode);
4569 
4570 	    if (rem_flag)
4571 	      {
4572 		remainder = (REG_P (target)
4573 			     ? target : gen_reg_rtx (compute_mode));
4574 		quotient = gen_reg_rtx (compute_mode);
4575 	      }
4576 	    else
4577 	      {
4578 		quotient = (REG_P (target)
4579 			    ? target : gen_reg_rtx (compute_mode));
4580 		remainder = gen_reg_rtx (compute_mode);
4581 	      }
4582 
4583 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4584 				     remainder, 1))
4585 	      {
4586 		/* This could be computed with a branch-less sequence.
4587 		   Save that for later.  */
4588 		rtx label = gen_label_rtx ();
4589 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4590 				 compute_mode, label);
4591 		expand_inc (quotient, const1_rtx);
4592 		expand_dec (remainder, op1);
4593 		emit_label (label);
4594 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4595 	      }
4596 
4597 	    /* No luck with division elimination or divmod.  Have to do it
4598 	       by conditionally adjusting op0 *and* the result.  */
4599 	    {
4600 	      rtx label1, label2;
4601 	      rtx adjusted_op0, tem;
4602 
4603 	      quotient = gen_reg_rtx (compute_mode);
4604 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4605 	      label1 = gen_label_rtx ();
4606 	      label2 = gen_label_rtx ();
4607 	      do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4608 			       compute_mode, label1);
4609 	      emit_move_insn  (quotient, const0_rtx);
4610 	      emit_jump_insn (gen_jump (label2));
4611 	      emit_barrier ();
4612 	      emit_label (label1);
4613 	      expand_dec (adjusted_op0, const1_rtx);
4614 	      tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4615 				  quotient, 1, OPTAB_LIB_WIDEN);
4616 	      if (tem != quotient)
4617 		emit_move_insn (quotient, tem);
4618 	      expand_inc (quotient, const1_rtx);
4619 	      emit_label (label2);
4620 	    }
4621 	  }
4622 	else /* signed */
4623 	  {
4624 	    if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4625 		&& INTVAL (op1) >= 0)
4626 	      {
4627 		/* This is extremely similar to the code for the unsigned case
4628 		   above.  For 2.7 we should merge these variants, but for
4629 		   2.6.1 I don't want to touch the code for unsigned since that
4630 		   get used in C.  The signed case will only be used by other
4631 		   languages (Ada).  */
4632 
4633 		rtx t1, t2, t3;
4634 		unsigned HOST_WIDE_INT d = INTVAL (op1);
4635 		t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4636 				   floor_log2 (d), tquotient, 0);
4637 		t2 = expand_binop (compute_mode, and_optab, op0,
4638 				   GEN_INT (d - 1),
4639 				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
4640 		t3 = gen_reg_rtx (compute_mode);
4641 		t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4642 				      compute_mode, 1, 1);
4643 		if (t3 == 0)
4644 		  {
4645 		    rtx lab;
4646 		    lab = gen_label_rtx ();
4647 		    do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4648 		    expand_inc (t1, const1_rtx);
4649 		    emit_label (lab);
4650 		    quotient = t1;
4651 		  }
4652 		else
4653 		  quotient = force_operand (gen_rtx_PLUS (compute_mode,
4654 							  t1, t3),
4655 					    tquotient);
4656 		break;
4657 	      }
4658 
4659 	    /* Try using an instruction that produces both the quotient and
4660 	       remainder, using truncation.  We can easily compensate the
4661 	       quotient or remainder to get ceiling rounding, once we have the
4662 	       remainder.  Notice that we compute also the final remainder
4663 	       value here, and return the result right away.  */
4664 	    if (target == 0 || GET_MODE (target) != compute_mode)
4665 	      target = gen_reg_rtx (compute_mode);
4666 	    if (rem_flag)
4667 	      {
4668 		remainder= (REG_P (target)
4669 			    ? target : gen_reg_rtx (compute_mode));
4670 		quotient = gen_reg_rtx (compute_mode);
4671 	      }
4672 	    else
4673 	      {
4674 		quotient = (REG_P (target)
4675 			    ? target : gen_reg_rtx (compute_mode));
4676 		remainder = gen_reg_rtx (compute_mode);
4677 	      }
4678 
4679 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4680 				     remainder, 0))
4681 	      {
4682 		/* This could be computed with a branch-less sequence.
4683 		   Save that for later.  */
4684 		rtx tem;
4685 		rtx label = gen_label_rtx ();
4686 		do_cmp_and_jump (remainder, const0_rtx, EQ,
4687 				 compute_mode, label);
4688 		tem = expand_binop (compute_mode, xor_optab, op0, op1,
4689 				    NULL_RTX, 0, OPTAB_WIDEN);
4690 		do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4691 		expand_inc (quotient, const1_rtx);
4692 		expand_dec (remainder, op1);
4693 		emit_label (label);
4694 		return gen_lowpart (mode, rem_flag ? remainder : quotient);
4695 	      }
4696 
4697 	    /* No luck with division elimination or divmod.  Have to do it
4698 	       by conditionally adjusting op0 *and* the result.  */
4699 	    {
4700 	      rtx label1, label2, label3, label4, label5;
4701 	      rtx adjusted_op0;
4702 	      rtx tem;
4703 
4704 	      quotient = gen_reg_rtx (compute_mode);
4705 	      adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4706 	      label1 = gen_label_rtx ();
4707 	      label2 = gen_label_rtx ();
4708 	      label3 = gen_label_rtx ();
4709 	      label4 = gen_label_rtx ();
4710 	      label5 = gen_label_rtx ();
4711 	      do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4712 	      do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4713 			       compute_mode, label1);
4714 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4715 				  quotient, 0, OPTAB_LIB_WIDEN);
4716 	      if (tem != quotient)
4717 		emit_move_insn (quotient, tem);
4718 	      emit_jump_insn (gen_jump (label5));
4719 	      emit_barrier ();
4720 	      emit_label (label1);
4721 	      expand_dec (adjusted_op0, const1_rtx);
4722 	      emit_jump_insn (gen_jump (label4));
4723 	      emit_barrier ();
4724 	      emit_label (label2);
4725 	      do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4726 			       compute_mode, label3);
4727 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4728 				  quotient, 0, OPTAB_LIB_WIDEN);
4729 	      if (tem != quotient)
4730 		emit_move_insn (quotient, tem);
4731 	      emit_jump_insn (gen_jump (label5));
4732 	      emit_barrier ();
4733 	      emit_label (label3);
4734 	      expand_inc (adjusted_op0, const1_rtx);
4735 	      emit_label (label4);
4736 	      tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4737 				  quotient, 0, OPTAB_LIB_WIDEN);
4738 	      if (tem != quotient)
4739 		emit_move_insn (quotient, tem);
4740 	      expand_inc (quotient, const1_rtx);
4741 	      emit_label (label5);
4742 	    }
4743 	  }
4744 	break;
4745 
4746       case EXACT_DIV_EXPR:
4747 	if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4748 	  {
4749 	    HOST_WIDE_INT d = INTVAL (op1);
4750 	    unsigned HOST_WIDE_INT ml;
4751 	    int pre_shift;
4752 	    rtx t1;
4753 
4754 	    pre_shift = floor_log2 (d & -d);
4755 	    ml = invert_mod2n (d >> pre_shift, size);
4756 	    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4757 			       pre_shift, NULL_RTX, unsignedp);
4758 	    quotient = expand_mult (compute_mode, t1,
4759 				    gen_int_mode (ml, compute_mode),
4760 				    NULL_RTX, 1);
4761 
4762 	    insn = get_last_insn ();
4763 	    set_dst_reg_note (insn, REG_EQUAL,
4764 			      gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4765 					      compute_mode, op0, op1),
4766 			      quotient);
4767 	  }
4768 	break;
4769 
4770       case ROUND_DIV_EXPR:
4771       case ROUND_MOD_EXPR:
4772 	if (unsignedp)
4773 	  {
4774 	    rtx tem;
4775 	    rtx label;
4776 	    label = gen_label_rtx ();
4777 	    quotient = gen_reg_rtx (compute_mode);
4778 	    remainder = gen_reg_rtx (compute_mode);
4779 	    if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4780 	      {
4781 		rtx tem;
4782 		quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4783 					 quotient, 1, OPTAB_LIB_WIDEN);
4784 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4785 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4786 					  remainder, 1, OPTAB_LIB_WIDEN);
4787 	      }
4788 	    tem = plus_constant (op1, -1);
4789 	    tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4790 	    do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4791 	    expand_inc (quotient, const1_rtx);
4792 	    expand_dec (remainder, op1);
4793 	    emit_label (label);
4794 	  }
4795 	else
4796 	  {
4797 	    rtx abs_rem, abs_op1, tem, mask;
4798 	    rtx label;
4799 	    label = gen_label_rtx ();
4800 	    quotient = gen_reg_rtx (compute_mode);
4801 	    remainder = gen_reg_rtx (compute_mode);
4802 	    if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4803 	      {
4804 		rtx tem;
4805 		quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4806 					 quotient, 0, OPTAB_LIB_WIDEN);
4807 		tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4808 		remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4809 					  remainder, 0, OPTAB_LIB_WIDEN);
4810 	      }
4811 	    abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4812 	    abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4813 	    tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4814 				1, NULL_RTX, 1);
4815 	    do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4816 	    tem = expand_binop (compute_mode, xor_optab, op0, op1,
4817 				NULL_RTX, 0, OPTAB_WIDEN);
4818 	    mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4819 				 size - 1, NULL_RTX, 0);
4820 	    tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4821 				NULL_RTX, 0, OPTAB_WIDEN);
4822 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4823 				NULL_RTX, 0, OPTAB_WIDEN);
4824 	    expand_inc (quotient, tem);
4825 	    tem = expand_binop (compute_mode, xor_optab, mask, op1,
4826 				NULL_RTX, 0, OPTAB_WIDEN);
4827 	    tem = expand_binop (compute_mode, sub_optab, tem, mask,
4828 				NULL_RTX, 0, OPTAB_WIDEN);
4829 	    expand_dec (remainder, tem);
4830 	    emit_label (label);
4831 	  }
4832 	return gen_lowpart (mode, rem_flag ? remainder : quotient);
4833 
4834       default:
4835 	gcc_unreachable ();
4836       }
4837 
4838   if (quotient == 0)
4839     {
4840       if (target && GET_MODE (target) != compute_mode)
4841 	target = 0;
4842 
4843       if (rem_flag)
4844 	{
4845 	  /* Try to produce the remainder without producing the quotient.
4846 	     If we seem to have a divmod pattern that does not require widening,
4847 	     don't try widening here.  We should really have a WIDEN argument
4848 	     to expand_twoval_binop, since what we'd really like to do here is
4849 	     1) try a mod insn in compute_mode
4850 	     2) try a divmod insn in compute_mode
4851 	     3) try a div insn in compute_mode and multiply-subtract to get
4852 	        remainder
4853 	     4) try the same things with widening allowed.  */
4854 	  remainder
4855 	    = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4856 				 op0, op1, target,
4857 				 unsignedp,
4858 				 ((optab_handler (optab2, compute_mode)
4859 				   != CODE_FOR_nothing)
4860 				  ? OPTAB_DIRECT : OPTAB_WIDEN));
4861 	  if (remainder == 0)
4862 	    {
4863 	      /* No luck there.  Can we do remainder and divide at once
4864 		 without a library call?  */
4865 	      remainder = gen_reg_rtx (compute_mode);
4866 	      if (! expand_twoval_binop ((unsignedp
4867 					  ? udivmod_optab
4868 					  : sdivmod_optab),
4869 					 op0, op1,
4870 					 NULL_RTX, remainder, unsignedp))
4871 		remainder = 0;
4872 	    }
4873 
4874 	  if (remainder)
4875 	    return gen_lowpart (mode, remainder);
4876 	}
4877 
4878       /* Produce the quotient.  Try a quotient insn, but not a library call.
4879 	 If we have a divmod in this mode, use it in preference to widening
4880 	 the div (for this test we assume it will not fail). Note that optab2
4881 	 is set to the one of the two optabs that the call below will use.  */
4882       quotient
4883 	= sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4884 			     op0, op1, rem_flag ? NULL_RTX : target,
4885 			     unsignedp,
4886 			     ((optab_handler (optab2, compute_mode)
4887 			       != CODE_FOR_nothing)
4888 			      ? OPTAB_DIRECT : OPTAB_WIDEN));
4889 
4890       if (quotient == 0)
4891 	{
4892 	  /* No luck there.  Try a quotient-and-remainder insn,
4893 	     keeping the quotient alone.  */
4894 	  quotient = gen_reg_rtx (compute_mode);
4895 	  if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4896 				     op0, op1,
4897 				     quotient, NULL_RTX, unsignedp))
4898 	    {
4899 	      quotient = 0;
4900 	      if (! rem_flag)
4901 		/* Still no luck.  If we are not computing the remainder,
4902 		   use a library call for the quotient.  */
4903 		quotient = sign_expand_binop (compute_mode,
4904 					      udiv_optab, sdiv_optab,
4905 					      op0, op1, target,
4906 					      unsignedp, OPTAB_LIB_WIDEN);
4907 	    }
4908 	}
4909     }
4910 
4911   if (rem_flag)
4912     {
4913       if (target && GET_MODE (target) != compute_mode)
4914 	target = 0;
4915 
4916       if (quotient == 0)
4917 	{
4918 	  /* No divide instruction either.  Use library for remainder.  */
4919 	  remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4920 					 op0, op1, target,
4921 					 unsignedp, OPTAB_LIB_WIDEN);
4922 	  /* No remainder function.  Try a quotient-and-remainder
4923 	     function, keeping the remainder.  */
4924 	  if (!remainder)
4925 	    {
4926 	      remainder = gen_reg_rtx (compute_mode);
4927 	      if (!expand_twoval_binop_libfunc
4928 		  (unsignedp ? udivmod_optab : sdivmod_optab,
4929 		   op0, op1,
4930 		   NULL_RTX, remainder,
4931 		   unsignedp ? UMOD : MOD))
4932 		remainder = NULL_RTX;
4933 	    }
4934 	}
4935       else
4936 	{
4937 	  /* We divided.  Now finish doing X - Y * (X / Y).  */
4938 	  remainder = expand_mult (compute_mode, quotient, op1,
4939 				   NULL_RTX, unsignedp);
4940 	  remainder = expand_binop (compute_mode, sub_optab, op0,
4941 				    remainder, target, unsignedp,
4942 				    OPTAB_LIB_WIDEN);
4943 	}
4944     }
4945 
4946   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4947 }
4948 
4949 /* Return a tree node with data type TYPE, describing the value of X.
4950    Usually this is an VAR_DECL, if there is no obvious better choice.
4951    X may be an expression, however we only support those expressions
4952    generated by loop.c.  */
4953 
4954 tree
4955 make_tree (tree type, rtx x)
4956 {
4957   tree t;
4958 
4959   switch (GET_CODE (x))
4960     {
4961     case CONST_INT:
4962       {
4963 	HOST_WIDE_INT hi = 0;
4964 
4965 	if (INTVAL (x) < 0
4966 	    && !(TYPE_UNSIGNED (type)
4967 		 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4968 		     < HOST_BITS_PER_WIDE_INT)))
4969 	  hi = -1;
4970 
4971 	t = build_int_cst_wide (type, INTVAL (x), hi);
4972 
4973 	return t;
4974       }
4975 
4976     case CONST_DOUBLE:
4977       if (GET_MODE (x) == VOIDmode)
4978 	t = build_int_cst_wide (type,
4979 				CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4980       else
4981 	{
4982 	  REAL_VALUE_TYPE d;
4983 
4984 	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4985 	  t = build_real (type, d);
4986 	}
4987 
4988       return t;
4989 
4990     case CONST_VECTOR:
4991       {
4992 	int units = CONST_VECTOR_NUNITS (x);
4993 	tree itype = TREE_TYPE (type);
4994 	tree t = NULL_TREE;
4995 	int i;
4996 
4997 
4998 	/* Build a tree with vector elements.  */
4999 	for (i = units - 1; i >= 0; --i)
5000 	  {
5001 	    rtx elt = CONST_VECTOR_ELT (x, i);
5002 	    t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
5003 	  }
5004 
5005 	return build_vector (type, t);
5006       }
5007 
5008     case PLUS:
5009       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5010 			  make_tree (type, XEXP (x, 1)));
5011 
5012     case MINUS:
5013       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5014 			  make_tree (type, XEXP (x, 1)));
5015 
5016     case NEG:
5017       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5018 
5019     case MULT:
5020       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5021 			  make_tree (type, XEXP (x, 1)));
5022 
5023     case ASHIFT:
5024       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5025 			  make_tree (type, XEXP (x, 1)));
5026 
5027     case LSHIFTRT:
5028       t = unsigned_type_for (type);
5029       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5030 			    		 make_tree (t, XEXP (x, 0)),
5031 				    	 make_tree (type, XEXP (x, 1))));
5032 
5033     case ASHIFTRT:
5034       t = signed_type_for (type);
5035       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5036 					 make_tree (t, XEXP (x, 0)),
5037 				    	 make_tree (type, XEXP (x, 1))));
5038 
5039     case DIV:
5040       if (TREE_CODE (type) != REAL_TYPE)
5041 	t = signed_type_for (type);
5042       else
5043 	t = type;
5044 
5045       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5046 				    	 make_tree (t, XEXP (x, 0)),
5047 				    	 make_tree (t, XEXP (x, 1))));
5048     case UDIV:
5049       t = unsigned_type_for (type);
5050       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5051 				    	 make_tree (t, XEXP (x, 0)),
5052 				    	 make_tree (t, XEXP (x, 1))));
5053 
5054     case SIGN_EXTEND:
5055     case ZERO_EXTEND:
5056       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5057 					  GET_CODE (x) == ZERO_EXTEND);
5058       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5059 
5060     case CONST:
5061       return make_tree (type, XEXP (x, 0));
5062 
5063     case SYMBOL_REF:
5064       t = SYMBOL_REF_DECL (x);
5065       if (t)
5066 	return fold_convert (type, build_fold_addr_expr (t));
5067       /* else fall through.  */
5068 
5069     default:
5070       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5071 
5072       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5073 	 address mode to pointer mode.  */
5074       if (POINTER_TYPE_P (type))
5075 	x = convert_memory_address_addr_space
5076 	      (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5077 
5078       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5079 	 want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5080       t->decl_with_rtl.rtl = x;
5081 
5082       return t;
5083     }
5084 }
5085 
5086 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5087    and returning TARGET.
5088 
5089    If TARGET is 0, a pseudo-register or constant is returned.  */
5090 
5091 rtx
5092 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5093 {
5094   rtx tem = 0;
5095 
5096   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5097     tem = simplify_binary_operation (AND, mode, op0, op1);
5098   if (tem == 0)
5099     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5100 
5101   if (target == 0)
5102     target = tem;
5103   else if (tem != target)
5104     emit_move_insn (target, tem);
5105   return target;
5106 }
5107 
5108 /* Helper function for emit_store_flag.  */
5109 static rtx
5110 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5111 	     enum machine_mode mode, enum machine_mode compare_mode,
5112 	     int unsignedp, rtx x, rtx y, int normalizep,
5113 	     enum machine_mode target_mode)
5114 {
5115   struct expand_operand ops[4];
5116   rtx op0, last, comparison, subtarget;
5117   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5118 
5119   last = get_last_insn ();
5120   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5121   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5122   if (!x || !y)
5123     {
5124       delete_insns_since (last);
5125       return NULL_RTX;
5126     }
5127 
5128   if (target_mode == VOIDmode)
5129     target_mode = result_mode;
5130   if (!target)
5131     target = gen_reg_rtx (target_mode);
5132 
5133   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5134 
5135   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5136   create_fixed_operand (&ops[1], comparison);
5137   create_fixed_operand (&ops[2], x);
5138   create_fixed_operand (&ops[3], y);
5139   if (!maybe_expand_insn (icode, 4, ops))
5140     {
5141       delete_insns_since (last);
5142       return NULL_RTX;
5143     }
5144   subtarget = ops[0].value;
5145 
5146   /* If we are converting to a wider mode, first convert to
5147      TARGET_MODE, then normalize.  This produces better combining
5148      opportunities on machines that have a SIGN_EXTRACT when we are
5149      testing a single bit.  This mostly benefits the 68k.
5150 
5151      If STORE_FLAG_VALUE does not have the sign bit set when
5152      interpreted in MODE, we can do this conversion as unsigned, which
5153      is usually more efficient.  */
5154   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5155     {
5156       convert_move (target, subtarget,
5157 		    val_signbit_known_clear_p (result_mode,
5158 					       STORE_FLAG_VALUE));
5159       op0 = target;
5160       result_mode = target_mode;
5161     }
5162   else
5163     op0 = subtarget;
5164 
5165   /* If we want to keep subexpressions around, don't reuse our last
5166      target.  */
5167   if (optimize)
5168     subtarget = 0;
5169 
5170   /* Now normalize to the proper value in MODE.  Sometimes we don't
5171      have to do anything.  */
5172   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5173     ;
5174   /* STORE_FLAG_VALUE might be the most negative number, so write
5175      the comparison this way to avoid a compiler-time warning.  */
5176   else if (- normalizep == STORE_FLAG_VALUE)
5177     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5178 
5179   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5180      it hard to use a value of just the sign bit due to ANSI integer
5181      constant typing rules.  */
5182   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5183     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5184 			GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5185 			normalizep == 1);
5186   else
5187     {
5188       gcc_assert (STORE_FLAG_VALUE & 1);
5189 
5190       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5191       if (normalizep == -1)
5192 	op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5193     }
5194 
5195   /* If we were converting to a smaller mode, do the conversion now.  */
5196   if (target_mode != result_mode)
5197     {
5198       convert_move (target, op0, 0);
5199       return target;
5200     }
5201   else
5202     return op0;
5203 }
5204 
5205 
5206 /* A subroutine of emit_store_flag only including "tricks" that do not
5207    need a recursive call.  These are kept separate to avoid infinite
5208    loops.  */
5209 
5210 static rtx
5211 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5212 		   enum machine_mode mode, int unsignedp, int normalizep,
5213 		   enum machine_mode target_mode)
5214 {
5215   rtx subtarget;
5216   enum insn_code icode;
5217   enum machine_mode compare_mode;
5218   enum mode_class mclass;
5219   enum rtx_code scode;
5220   rtx tem;
5221 
5222   if (unsignedp)
5223     code = unsigned_condition (code);
5224   scode = swap_condition (code);
5225 
5226   /* If one operand is constant, make it the second one.  Only do this
5227      if the other operand is not constant as well.  */
5228 
5229   if (swap_commutative_operands_p (op0, op1))
5230     {
5231       tem = op0;
5232       op0 = op1;
5233       op1 = tem;
5234       code = swap_condition (code);
5235     }
5236 
5237   if (mode == VOIDmode)
5238     mode = GET_MODE (op0);
5239 
5240   /* For some comparisons with 1 and -1, we can convert this to
5241      comparisons with zero.  This will often produce more opportunities for
5242      store-flag insns.  */
5243 
5244   switch (code)
5245     {
5246     case LT:
5247       if (op1 == const1_rtx)
5248 	op1 = const0_rtx, code = LE;
5249       break;
5250     case LE:
5251       if (op1 == constm1_rtx)
5252 	op1 = const0_rtx, code = LT;
5253       break;
5254     case GE:
5255       if (op1 == const1_rtx)
5256 	op1 = const0_rtx, code = GT;
5257       break;
5258     case GT:
5259       if (op1 == constm1_rtx)
5260 	op1 = const0_rtx, code = GE;
5261       break;
5262     case GEU:
5263       if (op1 == const1_rtx)
5264 	op1 = const0_rtx, code = NE;
5265       break;
5266     case LTU:
5267       if (op1 == const1_rtx)
5268 	op1 = const0_rtx, code = EQ;
5269       break;
5270     default:
5271       break;
5272     }
5273 
5274   /* If we are comparing a double-word integer with zero or -1, we can
5275      convert the comparison into one involving a single word.  */
5276   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5277       && GET_MODE_CLASS (mode) == MODE_INT
5278       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5279     {
5280       if ((code == EQ || code == NE)
5281 	  && (op1 == const0_rtx || op1 == constm1_rtx))
5282 	{
5283 	  rtx op00, op01;
5284 
5285 	  /* Do a logical OR or AND of the two words and compare the
5286 	     result.  */
5287 	  op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5288 	  op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5289 	  tem = expand_binop (word_mode,
5290 			      op1 == const0_rtx ? ior_optab : and_optab,
5291 			      op00, op01, NULL_RTX, unsignedp,
5292 			      OPTAB_DIRECT);
5293 
5294 	  if (tem != 0)
5295 	    tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5296 				   unsignedp, normalizep);
5297 	}
5298       else if ((code == LT || code == GE) && op1 == const0_rtx)
5299 	{
5300 	  rtx op0h;
5301 
5302 	  /* If testing the sign bit, can just test on high word.  */
5303 	  op0h = simplify_gen_subreg (word_mode, op0, mode,
5304 				      subreg_highpart_offset (word_mode,
5305 							      mode));
5306 	  tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5307 				 unsignedp, normalizep);
5308 	}
5309       else
5310 	tem = NULL_RTX;
5311 
5312       if (tem)
5313 	{
5314 	  if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5315 	    return tem;
5316 	  if (!target)
5317 	    target = gen_reg_rtx (target_mode);
5318 
5319 	  convert_move (target, tem,
5320 			!val_signbit_known_set_p (word_mode,
5321 						  (normalizep ? normalizep
5322 						   : STORE_FLAG_VALUE)));
5323 	  return target;
5324 	}
5325     }
5326 
5327   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5328      complement of A (for GE) and shifting the sign bit to the low bit.  */
5329   if (op1 == const0_rtx && (code == LT || code == GE)
5330       && GET_MODE_CLASS (mode) == MODE_INT
5331       && (normalizep || STORE_FLAG_VALUE == 1
5332 	  || val_signbit_p (mode, STORE_FLAG_VALUE)))
5333     {
5334       subtarget = target;
5335 
5336       if (!target)
5337 	target_mode = mode;
5338 
5339       /* If the result is to be wider than OP0, it is best to convert it
5340 	 first.  If it is to be narrower, it is *incorrect* to convert it
5341 	 first.  */
5342       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5343 	{
5344 	  op0 = convert_modes (target_mode, mode, op0, 0);
5345 	  mode = target_mode;
5346 	}
5347 
5348       if (target_mode != mode)
5349 	subtarget = 0;
5350 
5351       if (code == GE)
5352 	op0 = expand_unop (mode, one_cmpl_optab, op0,
5353 			   ((STORE_FLAG_VALUE == 1 || normalizep)
5354 			    ? 0 : subtarget), 0);
5355 
5356       if (STORE_FLAG_VALUE == 1 || normalizep)
5357 	/* If we are supposed to produce a 0/1 value, we want to do
5358 	   a logical shift from the sign bit to the low-order bit; for
5359 	   a -1/0 value, we do an arithmetic shift.  */
5360 	op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5361 			    GET_MODE_BITSIZE (mode) - 1,
5362 			    subtarget, normalizep != -1);
5363 
5364       if (mode != target_mode)
5365 	op0 = convert_modes (target_mode, mode, op0, 0);
5366 
5367       return op0;
5368     }
5369 
5370   mclass = GET_MODE_CLASS (mode);
5371   for (compare_mode = mode; compare_mode != VOIDmode;
5372        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5373     {
5374      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5375      icode = optab_handler (cstore_optab, optab_mode);
5376      if (icode != CODE_FOR_nothing)
5377 	{
5378 	  do_pending_stack_adjust ();
5379 	  tem = emit_cstore (target, icode, code, mode, compare_mode,
5380 			     unsignedp, op0, op1, normalizep, target_mode);
5381 	  if (tem)
5382 	    return tem;
5383 
5384 	  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5385 	    {
5386 	      tem = emit_cstore (target, icode, scode, mode, compare_mode,
5387 				 unsignedp, op1, op0, normalizep, target_mode);
5388 	      if (tem)
5389 	        return tem;
5390 	    }
5391 	  break;
5392 	}
5393     }
5394 
5395   return 0;
5396 }
5397 
5398 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5399    and storing in TARGET.  Normally return TARGET.
5400    Return 0 if that cannot be done.
5401 
5402    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5403    it is VOIDmode, they cannot both be CONST_INT.
5404 
5405    UNSIGNEDP is for the case where we have to widen the operands
5406    to perform the operation.  It says to use zero-extension.
5407 
5408    NORMALIZEP is 1 if we should convert the result to be either zero
5409    or one.  Normalize is -1 if we should convert the result to be
5410    either zero or -1.  If NORMALIZEP is zero, the result will be left
5411    "raw" out of the scc insn.  */
5412 
5413 rtx
5414 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5415 		 enum machine_mode mode, int unsignedp, int normalizep)
5416 {
5417   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5418   enum rtx_code rcode;
5419   rtx subtarget;
5420   rtx tem, last, trueval;
5421 
5422   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5423 			   target_mode);
5424   if (tem)
5425     return tem;
5426 
5427   /* If we reached here, we can't do this with a scc insn, however there
5428      are some comparisons that can be done in other ways.  Don't do any
5429      of these cases if branches are very cheap.  */
5430   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5431     return 0;
5432 
5433   /* See what we need to return.  We can only return a 1, -1, or the
5434      sign bit.  */
5435 
5436   if (normalizep == 0)
5437     {
5438       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5439 	normalizep = STORE_FLAG_VALUE;
5440 
5441       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5442 	;
5443       else
5444 	return 0;
5445     }
5446 
5447   last = get_last_insn ();
5448 
5449   /* If optimizing, use different pseudo registers for each insn, instead
5450      of reusing the same pseudo.  This leads to better CSE, but slows
5451      down the compiler, since there are more pseudos */
5452   subtarget = (!optimize
5453 	       && (target_mode == mode)) ? target : NULL_RTX;
5454   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5455 
5456   /* For floating-point comparisons, try the reverse comparison or try
5457      changing the "orderedness" of the comparison.  */
5458   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5459     {
5460       enum rtx_code first_code;
5461       bool and_them;
5462 
5463       rcode = reverse_condition_maybe_unordered (code);
5464       if (can_compare_p (rcode, mode, ccp_store_flag)
5465           && (code == ORDERED || code == UNORDERED
5466 	      || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5467 	      || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5468 	{
5469           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5470 		          || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5471 
5472 	  /* For the reverse comparison, use either an addition or a XOR.  */
5473           if (want_add
5474 	      && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5475 			   optimize_insn_for_speed_p ()) == 0)
5476 	    {
5477 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5478 				       STORE_FLAG_VALUE, target_mode);
5479 	      if (tem)
5480                 return expand_binop (target_mode, add_optab, tem,
5481 				     GEN_INT (normalizep),
5482 				     target, 0, OPTAB_WIDEN);
5483 	    }
5484           else if (!want_add
5485 	           && rtx_cost (trueval, XOR, 1,
5486 			        optimize_insn_for_speed_p ()) == 0)
5487 	    {
5488 	      tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5489 				       normalizep, target_mode);
5490 	      if (tem)
5491                 return expand_binop (target_mode, xor_optab, tem, trueval,
5492 				     target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5493 	    }
5494 	}
5495 
5496       delete_insns_since (last);
5497 
5498       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5499       if (code == ORDERED || code == UNORDERED)
5500 	return 0;
5501 
5502       and_them = split_comparison (code, mode, &first_code, &code);
5503 
5504       /* If there are no NaNs, the first comparison should always fall through.
5505          Effectively change the comparison to the other one.  */
5506       if (!HONOR_NANS (mode))
5507 	{
5508           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5509 	  return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5510 				    target_mode);
5511 	}
5512 
5513 #ifdef HAVE_conditional_move
5514       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5515 	 conditional move.  */
5516       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5517 			       normalizep, target_mode);
5518       if (tem == 0)
5519 	return 0;
5520 
5521       if (and_them)
5522         tem = emit_conditional_move (target, code, op0, op1, mode,
5523 				     tem, const0_rtx, GET_MODE (tem), 0);
5524       else
5525         tem = emit_conditional_move (target, code, op0, op1, mode,
5526 				     trueval, tem, GET_MODE (tem), 0);
5527 
5528       if (tem == 0)
5529         delete_insns_since (last);
5530       return tem;
5531 #else
5532       return 0;
5533 #endif
5534     }
5535 
5536   /* The remaining tricks only apply to integer comparisons.  */
5537 
5538   if (GET_MODE_CLASS (mode) != MODE_INT)
5539     return 0;
5540 
5541   /* If this is an equality comparison of integers, we can try to exclusive-or
5542      (or subtract) the two operands and use a recursive call to try the
5543      comparison with zero.  Don't do any of these cases if branches are
5544      very cheap.  */
5545 
5546   if ((code == EQ || code == NE) && op1 != const0_rtx)
5547     {
5548       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5549 			  OPTAB_WIDEN);
5550 
5551       if (tem == 0)
5552 	tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5553 			    OPTAB_WIDEN);
5554       if (tem != 0)
5555 	tem = emit_store_flag (target, code, tem, const0_rtx,
5556 			       mode, unsignedp, normalizep);
5557       if (tem != 0)
5558 	return tem;
5559 
5560       delete_insns_since (last);
5561     }
5562 
5563   /* For integer comparisons, try the reverse comparison.  However, for
5564      small X and if we'd have anyway to extend, implementing "X != 0"
5565      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5566   rcode = reverse_condition (code);
5567   if (can_compare_p (rcode, mode, ccp_store_flag)
5568       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5569 	    && code == NE
5570 	    && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5571 	    && op1 == const0_rtx))
5572     {
5573       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5574 		      || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5575 
5576       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5577       if (want_add
5578 	  && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5579 		       optimize_insn_for_speed_p ()) == 0)
5580 	{
5581 	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5582 				   STORE_FLAG_VALUE, target_mode);
5583 	  if (tem != 0)
5584             tem = expand_binop (target_mode, add_optab, tem,
5585 				GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5586 	}
5587       else if (!want_add
5588 	       && rtx_cost (trueval, XOR, 1,
5589 			    optimize_insn_for_speed_p ()) == 0)
5590 	{
5591 	  tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5592 				   normalizep, target_mode);
5593 	  if (tem != 0)
5594             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5595 				INTVAL (trueval) >= 0, OPTAB_WIDEN);
5596 	}
5597 
5598       if (tem != 0)
5599 	return tem;
5600       delete_insns_since (last);
5601     }
5602 
5603   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5604      the constant zero.  Reject all other comparisons at this point.  Only
5605      do LE and GT if branches are expensive since they are expensive on
5606      2-operand machines.  */
5607 
5608   if (op1 != const0_rtx
5609       || (code != EQ && code != NE
5610 	  && (BRANCH_COST (optimize_insn_for_speed_p (),
5611 			   false) <= 1 || (code != LE && code != GT))))
5612     return 0;
5613 
5614   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5615      do the necessary operation below.  */
5616 
5617   tem = 0;
5618 
5619   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5620      the sign bit set.  */
5621 
5622   if (code == LE)
5623     {
5624       /* This is destructive, so SUBTARGET can't be OP0.  */
5625       if (rtx_equal_p (subtarget, op0))
5626 	subtarget = 0;
5627 
5628       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5629 			  OPTAB_WIDEN);
5630       if (tem)
5631 	tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5632 			    OPTAB_WIDEN);
5633     }
5634 
5635   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5636      number of bits in the mode of OP0, minus one.  */
5637 
5638   if (code == GT)
5639     {
5640       if (rtx_equal_p (subtarget, op0))
5641 	subtarget = 0;
5642 
5643       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5644 			  GET_MODE_BITSIZE (mode) - 1,
5645 			  subtarget, 0);
5646       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5647 			  OPTAB_WIDEN);
5648     }
5649 
5650   if (code == EQ || code == NE)
5651     {
5652       /* For EQ or NE, one way to do the comparison is to apply an operation
5653 	 that converts the operand into a positive number if it is nonzero
5654 	 or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5655 	 for NE we negate.  This puts the result in the sign bit.  Then we
5656 	 normalize with a shift, if needed.
5657 
5658 	 Two operations that can do the above actions are ABS and FFS, so try
5659 	 them.  If that doesn't work, and MODE is smaller than a full word,
5660 	 we can use zero-extension to the wider mode (an unsigned conversion)
5661 	 as the operation.  */
5662 
5663       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5664 	 that is compensated by the subsequent overflow when subtracting
5665 	 one / negating.  */
5666 
5667       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5668 	tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5669       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5670 	tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5671       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5672 	{
5673 	  tem = convert_modes (word_mode, mode, op0, 1);
5674 	  mode = word_mode;
5675 	}
5676 
5677       if (tem != 0)
5678 	{
5679 	  if (code == EQ)
5680 	    tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5681 				0, OPTAB_WIDEN);
5682 	  else
5683 	    tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5684 	}
5685 
5686       /* If we couldn't do it that way, for NE we can "or" the two's complement
5687 	 of the value with itself.  For EQ, we take the one's complement of
5688 	 that "or", which is an extra insn, so we only handle EQ if branches
5689 	 are expensive.  */
5690 
5691       if (tem == 0
5692 	  && (code == NE
5693 	      || BRANCH_COST (optimize_insn_for_speed_p (),
5694 		      	      false) > 1))
5695 	{
5696 	  if (rtx_equal_p (subtarget, op0))
5697 	    subtarget = 0;
5698 
5699 	  tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5700 	  tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5701 			      OPTAB_WIDEN);
5702 
5703 	  if (tem && code == EQ)
5704 	    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5705 	}
5706     }
5707 
5708   if (tem && normalizep)
5709     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5710 			GET_MODE_BITSIZE (mode) - 1,
5711 			subtarget, normalizep == 1);
5712 
5713   if (tem)
5714     {
5715       if (!target)
5716         ;
5717       else if (GET_MODE (tem) != target_mode)
5718 	{
5719 	  convert_move (target, tem, 0);
5720 	  tem = target;
5721 	}
5722       else if (!subtarget)
5723 	{
5724 	  emit_move_insn (target, tem);
5725 	  tem = target;
5726 	}
5727     }
5728   else
5729     delete_insns_since (last);
5730 
5731   return tem;
5732 }
5733 
5734 /* Like emit_store_flag, but always succeeds.  */
5735 
5736 rtx
5737 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5738 		       enum machine_mode mode, int unsignedp, int normalizep)
5739 {
5740   rtx tem, label;
5741   rtx trueval, falseval;
5742 
5743   /* First see if emit_store_flag can do the job.  */
5744   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5745   if (tem != 0)
5746     return tem;
5747 
5748   if (!target)
5749     target = gen_reg_rtx (word_mode);
5750 
5751   /* If this failed, we have to do this with set/compare/jump/set code.
5752      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5753   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5754   if (code == NE
5755       && GET_MODE_CLASS (mode) == MODE_INT
5756       && REG_P (target)
5757       && op0 == target
5758       && op1 == const0_rtx)
5759     {
5760       label = gen_label_rtx ();
5761       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5762 			       mode, NULL_RTX, NULL_RTX, label, -1);
5763       emit_move_insn (target, trueval);
5764       emit_label (label);
5765       return target;
5766     }
5767 
5768   if (!REG_P (target)
5769       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5770     target = gen_reg_rtx (GET_MODE (target));
5771 
5772   /* Jump in the right direction if the target cannot implement CODE
5773      but can jump on its reverse condition.  */
5774   falseval = const0_rtx;
5775   if (! can_compare_p (code, mode, ccp_jump)
5776       && (! FLOAT_MODE_P (mode)
5777           || code == ORDERED || code == UNORDERED
5778           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5779           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5780     {
5781       enum rtx_code rcode;
5782       if (FLOAT_MODE_P (mode))
5783         rcode = reverse_condition_maybe_unordered (code);
5784       else
5785         rcode = reverse_condition (code);
5786 
5787       /* Canonicalize to UNORDERED for the libcall.  */
5788       if (can_compare_p (rcode, mode, ccp_jump)
5789           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5790 	{
5791 	  falseval = trueval;
5792 	  trueval = const0_rtx;
5793 	  code = rcode;
5794 	}
5795     }
5796 
5797   emit_move_insn (target, trueval);
5798   label = gen_label_rtx ();
5799   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5800 			   NULL_RTX, label, -1);
5801 
5802   emit_move_insn (target, falseval);
5803   emit_label (label);
5804 
5805   return target;
5806 }
5807 
5808 /* Perform possibly multi-word comparison and conditional jump to LABEL
5809    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5810    now a thin wrapper around do_compare_rtx_and_jump.  */
5811 
5812 static void
5813 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5814 		 rtx label)
5815 {
5816   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5817   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5818 			   NULL_RTX, NULL_RTX, label, -1);
5819 }
5820