1 /* Decompose multiword subregs.
2    Copyright (C) 2007-2021 Free Software Foundation, Inc.
3    Contributed by Richard Henderson <rth@redhat.com>
4 		  Ian Lance Taylor <iant@google.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "insn-config.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfgrtl.h"
37 #include "cfgbuild.h"
38 #include "dce.h"
39 #include "expr.h"
40 #include "tree-pass.h"
41 #include "lower-subreg.h"
42 #include "rtl-iter.h"
43 #include "target.h"
44 
45 
46 /* Decompose multi-word pseudo-registers into individual
47    pseudo-registers when possible and profitable.  This is possible
48    when all the uses of a multi-word register are via SUBREG, or are
49    copies of the register to another location.  Breaking apart the
50    register permits more CSE and permits better register allocation.
51    This is profitable if the machine does not have move instructions
52    to do this.
53 
54    This pass only splits moves with modes that are wider than
55    word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
56    integer modes that are twice the width of word_mode.  The latter
57    could be generalized if there was a need to do this, but the trend in
58    architectures is to not need this.
59 
60    There are two useful preprocessor defines for use by maintainers:
61 
62    #define LOG_COSTS 1
63 
64    if you wish to see the actual cost estimates that are being used
65    for each mode wider than word mode and the cost estimates for zero
66    extension and the shifts.   This can be useful when port maintainers
67    are tuning insn rtx costs.
68 
69    #define FORCE_LOWERING 1
70 
71    if you wish to test the pass with all the transformation forced on.
72    This can be useful for finding bugs in the transformations.  */
73 
74 #define LOG_COSTS 0
75 #define FORCE_LOWERING 0
76 
77 /* Bit N in this bitmap is set if regno N is used in a context in
78    which we can decompose it.  */
79 static bitmap decomposable_context;
80 
81 /* Bit N in this bitmap is set if regno N is used in a context in
82    which it cannot be decomposed.  */
83 static bitmap non_decomposable_context;
84 
85 /* Bit N in this bitmap is set if regno N is used in a subreg
86    which changes the mode but not the size.  This typically happens
87    when the register accessed as a floating-point value; we want to
88    avoid generating accesses to its subwords in integer modes.  */
89 static bitmap subreg_context;
90 
91 /* Bit N in the bitmap in element M of this array is set if there is a
92    copy from reg M to reg N.  */
93 static vec<bitmap> reg_copy_graph;
94 
95 struct target_lower_subreg default_target_lower_subreg;
96 #if SWITCHABLE_TARGET
97 struct target_lower_subreg *this_target_lower_subreg
98   = &default_target_lower_subreg;
99 #endif
100 
101 #define twice_word_mode \
102   this_target_lower_subreg->x_twice_word_mode
103 #define choices \
104   this_target_lower_subreg->x_choices
105 
106 /* Return true if MODE is a mode we know how to lower.  When returning true,
107    store its byte size in *BYTES and its word size in *WORDS.  */
108 
109 static inline bool
interesting_mode_p(machine_mode mode,unsigned int * bytes,unsigned int * words)110 interesting_mode_p (machine_mode mode, unsigned int *bytes,
111 		    unsigned int *words)
112 {
113   if (!GET_MODE_SIZE (mode).is_constant (bytes))
114     return false;
115   *words = CEIL (*bytes, UNITS_PER_WORD);
116   return true;
117 }
118 
119 /* RTXes used while computing costs.  */
120 struct cost_rtxes {
121   /* Source and target registers.  */
122   rtx source;
123   rtx target;
124 
125   /* A twice_word_mode ZERO_EXTEND of SOURCE.  */
126   rtx zext;
127 
128   /* A shift of SOURCE.  */
129   rtx shift;
130 
131   /* A SET of TARGET.  */
132   rtx set;
133 };
134 
135 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
136    rtxes in RTXES.  SPEED_P selects between the speed and size cost.  */
137 
138 static int
shift_cost(bool speed_p,struct cost_rtxes * rtxes,enum rtx_code code,machine_mode mode,int op1)139 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
140 	    machine_mode mode, int op1)
141 {
142   PUT_CODE (rtxes->shift, code);
143   PUT_MODE (rtxes->shift, mode);
144   PUT_MODE (rtxes->source, mode);
145   XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1);
146   return set_src_cost (rtxes->shift, mode, speed_p);
147 }
148 
149 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
150    to true if it is profitable to split a double-word CODE shift
151    of X + BITS_PER_WORD bits.  SPEED_P says whether we are testing
152    for speed or size profitability.
153 
154    Use the rtxes in RTXES to calculate costs.  WORD_MOVE_ZERO_COST is
155    the cost of moving zero into a word-mode register.  WORD_MOVE_COST
156    is the cost of moving between word registers.  */
157 
158 static void
compute_splitting_shift(bool speed_p,struct cost_rtxes * rtxes,bool * splitting,enum rtx_code code,int word_move_zero_cost,int word_move_cost)159 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
160 			 bool *splitting, enum rtx_code code,
161 			 int word_move_zero_cost, int word_move_cost)
162 {
163   int wide_cost, narrow_cost, upper_cost, i;
164 
165   for (i = 0; i < BITS_PER_WORD; i++)
166     {
167       wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
168 			      i + BITS_PER_WORD);
169       if (i == 0)
170 	narrow_cost = word_move_cost;
171       else
172 	narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
173 
174       if (code != ASHIFTRT)
175 	upper_cost = word_move_zero_cost;
176       else if (i == BITS_PER_WORD - 1)
177 	upper_cost = word_move_cost;
178       else
179 	upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
180 				 BITS_PER_WORD - 1);
181 
182       if (LOG_COSTS)
183 	fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
184 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
185 		 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
186 
187       if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
188 	splitting[i] = true;
189     }
190 }
191 
192 /* Compute what we should do when optimizing for speed or size; SPEED_P
193    selects which.  Use RTXES for computing costs.  */
194 
195 static void
compute_costs(bool speed_p,struct cost_rtxes * rtxes)196 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
197 {
198   unsigned int i;
199   int word_move_zero_cost, word_move_cost;
200 
201   PUT_MODE (rtxes->target, word_mode);
202   SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
203   word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
204 
205   SET_SRC (rtxes->set) = rtxes->source;
206   word_move_cost = set_rtx_cost (rtxes->set, speed_p);
207 
208   if (LOG_COSTS)
209     fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
210 	     GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
211 
212   for (i = 0; i < MAX_MACHINE_MODE; i++)
213     {
214       machine_mode mode = (machine_mode) i;
215       unsigned int size, factor;
216       if (interesting_mode_p (mode, &size, &factor) && factor > 1)
217 	{
218 	  unsigned int mode_move_cost;
219 
220 	  PUT_MODE (rtxes->target, mode);
221 	  PUT_MODE (rtxes->source, mode);
222 	  mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
223 
224 	  if (LOG_COSTS)
225 	    fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
226 		     GET_MODE_NAME (mode), mode_move_cost,
227 		     word_move_cost, factor);
228 
229 	  if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
230 	    {
231 	      choices[speed_p].move_modes_to_split[i] = true;
232 	      choices[speed_p].something_to_do = true;
233 	    }
234 	}
235     }
236 
237   /* For the moves and shifts, the only case that is checked is one
238      where the mode of the target is an integer mode twice the width
239      of the word_mode.
240 
241      If it is not profitable to split a double word move then do not
242      even consider the shifts or the zero extension.  */
243   if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
244     {
245       int zext_cost;
246 
247       /* The only case here to check to see if moving the upper part with a
248 	 zero is cheaper than doing the zext itself.  */
249       PUT_MODE (rtxes->source, word_mode);
250       zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
251 
252       if (LOG_COSTS)
253 	fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
254 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
255 		 zext_cost, word_move_cost, word_move_zero_cost);
256 
257       if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
258 	choices[speed_p].splitting_zext = true;
259 
260       compute_splitting_shift (speed_p, rtxes,
261 			       choices[speed_p].splitting_ashift, ASHIFT,
262 			       word_move_zero_cost, word_move_cost);
263       compute_splitting_shift (speed_p, rtxes,
264 			       choices[speed_p].splitting_lshiftrt, LSHIFTRT,
265 			       word_move_zero_cost, word_move_cost);
266       compute_splitting_shift (speed_p, rtxes,
267 			       choices[speed_p].splitting_ashiftrt, ASHIFTRT,
268 			       word_move_zero_cost, word_move_cost);
269     }
270 }
271 
272 /* Do one-per-target initialisation.  This involves determining
273    which operations on the machine are profitable.  If none are found,
274    then the pass just returns when called.  */
275 
276 void
init_lower_subreg(void)277 init_lower_subreg (void)
278 {
279   struct cost_rtxes rtxes;
280 
281   memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
282 
283   twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
284 
285   rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
286   rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
287   rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
288   rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
289   rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
290 
291   if (LOG_COSTS)
292     fprintf (stderr, "\nSize costs\n==========\n\n");
293   compute_costs (false, &rtxes);
294 
295   if (LOG_COSTS)
296     fprintf (stderr, "\nSpeed costs\n===========\n\n");
297   compute_costs (true, &rtxes);
298 }
299 
300 static bool
simple_move_operand(rtx x)301 simple_move_operand (rtx x)
302 {
303   if (GET_CODE (x) == SUBREG)
304     x = SUBREG_REG (x);
305 
306   if (!OBJECT_P (x))
307     return false;
308 
309   if (GET_CODE (x) == LABEL_REF
310       || GET_CODE (x) == SYMBOL_REF
311       || GET_CODE (x) == HIGH
312       || GET_CODE (x) == CONST)
313     return false;
314 
315   if (MEM_P (x)
316       && (MEM_VOLATILE_P (x)
317 	  || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
318     return false;
319 
320   return true;
321 }
322 
323 /* If X is an operator that can be treated as a simple move that we
324    can split, then return the operand that is operated on.  */
325 
326 static rtx
operand_for_swap_move_operator(rtx x)327 operand_for_swap_move_operator (rtx x)
328 {
329   /* A word sized rotate of a register pair is equivalent to swapping
330      the registers in the register pair.  */
331   if (GET_CODE (x) == ROTATE
332       && GET_MODE (x) == twice_word_mode
333       && simple_move_operand (XEXP (x, 0))
334       && CONST_INT_P (XEXP (x, 1))
335       && INTVAL (XEXP (x, 1)) == BITS_PER_WORD)
336     return XEXP (x, 0);
337 
338   return NULL_RTX;
339 }
340 
341 /* If INSN is a single set between two objects that we want to split,
342    return the single set.  SPEED_P says whether we are optimizing
343    INSN for speed or size.
344 
345    INSN should have been passed to recog and extract_insn before this
346    is called.  */
347 
348 static rtx
simple_move(rtx_insn * insn,bool speed_p)349 simple_move (rtx_insn *insn, bool speed_p)
350 {
351   rtx x, op;
352   rtx set;
353   machine_mode mode;
354 
355   if (recog_data.n_operands != 2)
356     return NULL_RTX;
357 
358   set = single_set (insn);
359   if (!set)
360     return NULL_RTX;
361 
362   x = SET_DEST (set);
363   if (x != recog_data.operand[0] && x != recog_data.operand[1])
364     return NULL_RTX;
365   if (!simple_move_operand (x))
366     return NULL_RTX;
367 
368   x = SET_SRC (set);
369   if ((op = operand_for_swap_move_operator (x)) != NULL_RTX)
370     x = op;
371 
372   if (x != recog_data.operand[0] && x != recog_data.operand[1])
373     return NULL_RTX;
374   /* For the src we can handle ASM_OPERANDS, and it is beneficial for
375      things like x86 rdtsc which returns a DImode value.  */
376   if (GET_CODE (x) != ASM_OPERANDS
377       && !simple_move_operand (x))
378     return NULL_RTX;
379 
380   /* We try to decompose in integer modes, to avoid generating
381      inefficient code copying between integer and floating point
382      registers.  That means that we can't decompose if this is a
383      non-integer mode for which there is no integer mode of the same
384      size.  */
385   mode = GET_MODE (SET_DEST (set));
386   if (!SCALAR_INT_MODE_P (mode)
387       && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ())
388     return NULL_RTX;
389 
390   /* Reject PARTIAL_INT modes.  They are used for processor specific
391      purposes and it's probably best not to tamper with them.  */
392   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
393     return NULL_RTX;
394 
395   if (!choices[speed_p].move_modes_to_split[(int) mode])
396     return NULL_RTX;
397 
398   return set;
399 }
400 
401 /* If SET is a copy from one multi-word pseudo-register to another,
402    record that in reg_copy_graph.  Return whether it is such a
403    copy.  */
404 
405 static bool
find_pseudo_copy(rtx set)406 find_pseudo_copy (rtx set)
407 {
408   rtx dest = SET_DEST (set);
409   rtx src = SET_SRC (set);
410   rtx op;
411   unsigned int rd, rs;
412   bitmap b;
413 
414   if ((op = operand_for_swap_move_operator (src)) != NULL_RTX)
415     src = op;
416 
417   if (!REG_P (dest) || !REG_P (src))
418     return false;
419 
420   rd = REGNO (dest);
421   rs = REGNO (src);
422   if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
423     return false;
424 
425   b = reg_copy_graph[rs];
426   if (b == NULL)
427     {
428       b = BITMAP_ALLOC (NULL);
429       reg_copy_graph[rs] = b;
430     }
431 
432   bitmap_set_bit (b, rd);
433 
434   return true;
435 }
436 
437 /* Look through the registers in DECOMPOSABLE_CONTEXT.  For each case
438    where they are copied to another register, add the register to
439    which they are copied to DECOMPOSABLE_CONTEXT.  Use
440    NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
441    copies of registers which are in NON_DECOMPOSABLE_CONTEXT.  */
442 
443 static void
propagate_pseudo_copies(void)444 propagate_pseudo_copies (void)
445 {
446   auto_bitmap queue, propagate;
447 
448   bitmap_copy (queue, decomposable_context);
449   do
450     {
451       bitmap_iterator iter;
452       unsigned int i;
453 
454       bitmap_clear (propagate);
455 
456       EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
457 	{
458 	  bitmap b = reg_copy_graph[i];
459 	  if (b)
460 	    bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
461 	}
462 
463       bitmap_and_compl (queue, propagate, decomposable_context);
464       bitmap_ior_into (decomposable_context, propagate);
465     }
466   while (!bitmap_empty_p (queue));
467 }
468 
469 /* A pointer to one of these values is passed to
470    find_decomposable_subregs.  */
471 
472 enum classify_move_insn
473 {
474   /* Not a simple move from one location to another.  */
475   NOT_SIMPLE_MOVE,
476   /* A simple move we want to decompose.  */
477   DECOMPOSABLE_SIMPLE_MOVE,
478   /* Any other simple move.  */
479   SIMPLE_MOVE
480 };
481 
482 /* If we find a SUBREG in *LOC which we could use to decompose a
483    pseudo-register, set a bit in DECOMPOSABLE_CONTEXT.  If we find an
484    unadorned register which is not a simple pseudo-register copy,
485    DATA will point at the type of move, and we set a bit in
486    DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate.  */
487 
488 static void
find_decomposable_subregs(rtx * loc,enum classify_move_insn * pcmi)489 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
490 {
491   subrtx_var_iterator::array_type array;
492   FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
493     {
494       rtx x = *iter;
495       if (GET_CODE (x) == SUBREG)
496 	{
497 	  rtx inner = SUBREG_REG (x);
498 	  unsigned int regno, outer_size, inner_size, outer_words, inner_words;
499 
500 	  if (!REG_P (inner))
501 	    continue;
502 
503 	  regno = REGNO (inner);
504 	  if (HARD_REGISTER_NUM_P (regno))
505 	    {
506 	      iter.skip_subrtxes ();
507 	      continue;
508 	    }
509 
510 	  if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words)
511 	      || !interesting_mode_p (GET_MODE (inner), &inner_size,
512 				      &inner_words))
513 	    continue;
514 
515 	  /* We only try to decompose single word subregs of multi-word
516 	     registers.  When we find one, we return -1 to avoid iterating
517 	     over the inner register.
518 
519 	     ??? This doesn't allow, e.g., DImode subregs of TImode values
520 	     on 32-bit targets.  We would need to record the way the
521 	     pseudo-register was used, and only decompose if all the uses
522 	     were the same number and size of pieces.  Hopefully this
523 	     doesn't happen much.  */
524 
525 	  if (outer_words == 1
526 	      && inner_words > 1
527 	      /* Don't allow to decompose floating point subregs of
528 		 multi-word pseudos if the floating point mode does
529 		 not have word size, because otherwise we'd generate
530 		 a subreg with that floating mode from a different
531 		 sized integral pseudo which is not allowed by
532 		 validate_subreg.  */
533 	      && (!FLOAT_MODE_P (GET_MODE (x))
534 		  || outer_size == UNITS_PER_WORD))
535 	    {
536 	      bitmap_set_bit (decomposable_context, regno);
537 	      iter.skip_subrtxes ();
538 	      continue;
539 	    }
540 
541 	  /* If this is a cast from one mode to another, where the modes
542 	     have the same size, and they are not tieable, then mark this
543 	     register as non-decomposable.  If we decompose it we are
544 	     likely to mess up whatever the backend is trying to do.  */
545 	  if (outer_words > 1
546 	      && outer_size == inner_size
547 	      && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
548 	    {
549 	      bitmap_set_bit (non_decomposable_context, regno);
550 	      bitmap_set_bit (subreg_context, regno);
551 	      iter.skip_subrtxes ();
552 	      continue;
553 	    }
554 	}
555       else if (REG_P (x))
556 	{
557 	  unsigned int regno, size, words;
558 
559 	  /* We will see an outer SUBREG before we see the inner REG, so
560 	     when we see a plain REG here it means a direct reference to
561 	     the register.
562 
563 	     If this is not a simple copy from one location to another,
564 	     then we cannot decompose this register.  If this is a simple
565 	     copy we want to decompose, and the mode is right,
566 	     then we mark the register as decomposable.
567 	     Otherwise we don't say anything about this register --
568 	     it could be decomposed, but whether that would be
569 	     profitable depends upon how it is used elsewhere.
570 
571 	     We only set bits in the bitmap for multi-word
572 	     pseudo-registers, since those are the only ones we care about
573 	     and it keeps the size of the bitmaps down.  */
574 
575 	  regno = REGNO (x);
576 	  if (!HARD_REGISTER_NUM_P (regno)
577 	      && interesting_mode_p (GET_MODE (x), &size, &words)
578 	      && words > 1)
579 	    {
580 	      switch (*pcmi)
581 		{
582 		case NOT_SIMPLE_MOVE:
583 		  bitmap_set_bit (non_decomposable_context, regno);
584 		  break;
585 		case DECOMPOSABLE_SIMPLE_MOVE:
586 		  if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
587 		    bitmap_set_bit (decomposable_context, regno);
588 		  break;
589 		case SIMPLE_MOVE:
590 		  break;
591 		default:
592 		  gcc_unreachable ();
593 		}
594 	    }
595 	}
596       else if (MEM_P (x))
597 	{
598 	  enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
599 
600 	  /* Any registers used in a MEM do not participate in a
601 	     SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE.  Do our own recursion
602 	     here, and return -1 to block the parent's recursion.  */
603 	  find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
604 	  iter.skip_subrtxes ();
605 	}
606     }
607 }
608 
609 /* Decompose REGNO into word-sized components.  We smash the REG node
610    in place.  This ensures that (1) something goes wrong quickly if we
611    fail to make some replacement, and (2) the debug information inside
612    the symbol table is automatically kept up to date.  */
613 
614 static void
decompose_register(unsigned int regno)615 decompose_register (unsigned int regno)
616 {
617   rtx reg;
618   unsigned int size, words, i;
619   rtvec v;
620 
621   reg = regno_reg_rtx[regno];
622 
623   regno_reg_rtx[regno] = NULL_RTX;
624 
625   if (!interesting_mode_p (GET_MODE (reg), &size, &words))
626     gcc_unreachable ();
627 
628   v = rtvec_alloc (words);
629   for (i = 0; i < words; ++i)
630     RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
631 
632   PUT_CODE (reg, CONCATN);
633   XVEC (reg, 0) = v;
634 
635   if (dump_file)
636     {
637       fprintf (dump_file, "; Splitting reg %u ->", regno);
638       for (i = 0; i < words; ++i)
639 	fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
640       fputc ('\n', dump_file);
641     }
642 }
643 
644 /* Get a SUBREG of a CONCATN.  */
645 
646 static rtx
simplify_subreg_concatn(machine_mode outermode,rtx op,poly_uint64 orig_byte)647 simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte)
648 {
649   unsigned int outer_size, outer_words, inner_size, inner_words;
650   machine_mode innermode, partmode;
651   rtx part;
652   unsigned int final_offset;
653   unsigned int byte;
654 
655   innermode = GET_MODE (op);
656   if (!interesting_mode_p (outermode, &outer_size, &outer_words)
657       || !interesting_mode_p (innermode, &inner_size, &inner_words))
658     gcc_unreachable ();
659 
660   /* Must be constant if interesting_mode_p passes.  */
661   byte = orig_byte.to_constant ();
662   gcc_assert (GET_CODE (op) == CONCATN);
663   gcc_assert (byte % outer_size == 0);
664 
665   gcc_assert (byte < inner_size);
666   if (outer_size > inner_size)
667     return NULL_RTX;
668 
669   inner_size /= XVECLEN (op, 0);
670   part = XVECEXP (op, 0, byte / inner_size);
671   partmode = GET_MODE (part);
672 
673   final_offset = byte % inner_size;
674   if (final_offset + outer_size > inner_size)
675     return NULL_RTX;
676 
677   /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
678      regular CONST_VECTORs.  They have vector or integer modes, depending
679      on the capabilities of the target.  Cope with them.  */
680   if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
681     partmode = GET_MODE_INNER (innermode);
682   else if (partmode == VOIDmode)
683     partmode = mode_for_size (inner_size * BITS_PER_UNIT,
684 			      GET_MODE_CLASS (innermode), 0).require ();
685 
686   return simplify_gen_subreg (outermode, part, partmode, final_offset);
687 }
688 
689 /* Wrapper around simplify_gen_subreg which handles CONCATN.  */
690 
691 static rtx
simplify_gen_subreg_concatn(machine_mode outermode,rtx op,machine_mode innermode,unsigned int byte)692 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
693 			     machine_mode innermode, unsigned int byte)
694 {
695   rtx ret;
696 
697   /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
698      If OP is a SUBREG of a CONCATN, then it must be a simple mode
699      change with the same size and offset 0, or it must extract a
700      part.  We shouldn't see anything else here.  */
701   if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
702     {
703       rtx op2;
704 
705       if (known_eq (GET_MODE_SIZE (GET_MODE (op)),
706 		    GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
707 	  && known_eq (SUBREG_BYTE (op), 0))
708 	return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
709 					    GET_MODE (SUBREG_REG (op)), byte);
710 
711       op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
712 				     SUBREG_BYTE (op));
713       if (op2 == NULL_RTX)
714 	{
715 	  /* We don't handle paradoxical subregs here.  */
716 	  gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
717 	  gcc_assert (!paradoxical_subreg_p (op));
718 	  op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
719 					 byte + SUBREG_BYTE (op));
720 	  gcc_assert (op2 != NULL_RTX);
721 	  return op2;
722 	}
723 
724       op = op2;
725       gcc_assert (op != NULL_RTX);
726       gcc_assert (innermode == GET_MODE (op));
727     }
728 
729   if (GET_CODE (op) == CONCATN)
730     return simplify_subreg_concatn (outermode, op, byte);
731 
732   ret = simplify_gen_subreg (outermode, op, innermode, byte);
733 
734   /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
735      resolve_simple_move will ask for the high part of the paradoxical
736      subreg, which does not have a value.  Just return a zero.  */
737   if (ret == NULL_RTX
738       && paradoxical_subreg_p (op))
739     return CONST0_RTX (outermode);
740 
741   gcc_assert (ret != NULL_RTX);
742   return ret;
743 }
744 
745 /* Return whether we should resolve X into the registers into which it
746    was decomposed.  */
747 
748 static bool
resolve_reg_p(rtx x)749 resolve_reg_p (rtx x)
750 {
751   return GET_CODE (x) == CONCATN;
752 }
753 
754 /* Return whether X is a SUBREG of a register which we need to
755    resolve.  */
756 
757 static bool
resolve_subreg_p(rtx x)758 resolve_subreg_p (rtx x)
759 {
760   if (GET_CODE (x) != SUBREG)
761     return false;
762   return resolve_reg_p (SUBREG_REG (x));
763 }
764 
765 /* Look for SUBREGs in *LOC which need to be decomposed.  */
766 
767 static bool
resolve_subreg_use(rtx * loc,rtx insn)768 resolve_subreg_use (rtx *loc, rtx insn)
769 {
770   subrtx_ptr_iterator::array_type array;
771   FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
772     {
773       rtx *loc = *iter;
774       rtx x = *loc;
775       if (resolve_subreg_p (x))
776 	{
777 	  x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
778 				       SUBREG_BYTE (x));
779 
780 	  /* It is possible for a note to contain a reference which we can
781 	     decompose.  In this case, return 1 to the caller to indicate
782 	     that the note must be removed.  */
783 	  if (!x)
784 	    {
785 	      gcc_assert (!insn);
786 	      return true;
787 	    }
788 
789 	  validate_change (insn, loc, x, 1);
790 	  iter.skip_subrtxes ();
791 	}
792       else if (resolve_reg_p (x))
793 	/* Return 1 to the caller to indicate that we found a direct
794 	   reference to a register which is being decomposed.  This can
795 	   happen inside notes, multiword shift or zero-extend
796 	   instructions.  */
797 	return true;
798     }
799 
800   return false;
801 }
802 
803 /* Resolve any decomposed registers which appear in register notes on
804    INSN.  */
805 
806 static void
resolve_reg_notes(rtx_insn * insn)807 resolve_reg_notes (rtx_insn *insn)
808 {
809   rtx *pnote, note;
810 
811   note = find_reg_equal_equiv_note (insn);
812   if (note)
813     {
814       int old_count = num_validated_changes ();
815       if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
816 	remove_note (insn, note);
817       else
818 	if (old_count != num_validated_changes ())
819 	  df_notes_rescan (insn);
820     }
821 
822   pnote = &REG_NOTES (insn);
823   while (*pnote != NULL_RTX)
824     {
825       bool del = false;
826 
827       note = *pnote;
828       switch (REG_NOTE_KIND (note))
829 	{
830 	case REG_DEAD:
831 	case REG_UNUSED:
832 	  if (resolve_reg_p (XEXP (note, 0)))
833 	    del = true;
834 	  break;
835 
836 	default:
837 	  break;
838 	}
839 
840       if (del)
841 	*pnote = XEXP (note, 1);
842       else
843 	pnote = &XEXP (note, 1);
844     }
845 }
846 
847 /* Return whether X can be decomposed into subwords.  */
848 
849 static bool
can_decompose_p(rtx x)850 can_decompose_p (rtx x)
851 {
852   if (REG_P (x))
853     {
854       unsigned int regno = REGNO (x);
855 
856       if (HARD_REGISTER_NUM_P (regno))
857 	{
858 	  unsigned int byte, num_bytes, num_words;
859 
860 	  if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words))
861 	    return false;
862 	  for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
863 	    if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
864 	      return false;
865 	  return true;
866 	}
867       else
868 	return !bitmap_bit_p (subreg_context, regno);
869     }
870 
871   return true;
872 }
873 
874 /* OPND is a concatn operand this is used with a simple move operator.
875    Return a new rtx with the concatn's operands swapped.  */
876 
877 static rtx
resolve_operand_for_swap_move_operator(rtx opnd)878 resolve_operand_for_swap_move_operator (rtx opnd)
879 {
880   gcc_assert (GET_CODE (opnd) == CONCATN);
881   rtx concatn = copy_rtx (opnd);
882   rtx op0 = XVECEXP (concatn, 0, 0);
883   rtx op1 = XVECEXP (concatn, 0, 1);
884   XVECEXP (concatn, 0, 0) = op1;
885   XVECEXP (concatn, 0, 1) = op0;
886   return concatn;
887 }
888 
889 /* Decompose the registers used in a simple move SET within INSN.  If
890    we don't change anything, return INSN, otherwise return the start
891    of the sequence of moves.  */
892 
893 static rtx_insn *
resolve_simple_move(rtx set,rtx_insn * insn)894 resolve_simple_move (rtx set, rtx_insn *insn)
895 {
896   rtx src, dest, real_dest, src_op;
897   rtx_insn *insns;
898   machine_mode orig_mode, dest_mode;
899   unsigned int orig_size, words;
900   bool pushing;
901 
902   src = SET_SRC (set);
903   dest = SET_DEST (set);
904   orig_mode = GET_MODE (dest);
905 
906   if (!interesting_mode_p (orig_mode, &orig_size, &words))
907     gcc_unreachable ();
908   gcc_assert (words > 1);
909 
910   start_sequence ();
911 
912   /* We have to handle copying from a SUBREG of a decomposed reg where
913      the SUBREG is larger than word size.  Rather than assume that we
914      can take a word_mode SUBREG of the destination, we copy to a new
915      register and then copy that to the destination.  */
916 
917   real_dest = NULL_RTX;
918 
919   if ((src_op = operand_for_swap_move_operator (src)) != NULL_RTX)
920     {
921       if (resolve_reg_p (dest))
922 	{
923 	  /* DEST is a CONCATN, so swap its operands and strip
924 	     SRC's operator.  */
925 	  dest = resolve_operand_for_swap_move_operator (dest);
926 	  src = src_op;
927 	}
928       else if (resolve_reg_p (src_op))
929 	{
930 	  /* SRC is an operation on a CONCATN, so strip the operator and
931 	     swap the CONCATN's operands.  */
932 	  src = resolve_operand_for_swap_move_operator (src_op);
933 	}
934     }
935 
936   if (GET_CODE (src) == SUBREG
937       && resolve_reg_p (SUBREG_REG (src))
938       && (maybe_ne (SUBREG_BYTE (src), 0)
939 	  || maybe_ne (orig_size, GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
940     {
941       real_dest = dest;
942       dest = gen_reg_rtx (orig_mode);
943       if (REG_P (real_dest))
944 	REG_ATTRS (dest) = REG_ATTRS (real_dest);
945     }
946 
947   /* Similarly if we are copying to a SUBREG of a decomposed reg where
948      the SUBREG is larger than word size.  */
949 
950   if (GET_CODE (dest) == SUBREG
951       && resolve_reg_p (SUBREG_REG (dest))
952       && (maybe_ne (SUBREG_BYTE (dest), 0)
953 	  || maybe_ne (orig_size,
954 		       GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
955     {
956       rtx reg, smove;
957       rtx_insn *minsn;
958 
959       reg = gen_reg_rtx (orig_mode);
960       minsn = emit_move_insn (reg, src);
961       smove = single_set (minsn);
962       gcc_assert (smove != NULL_RTX);
963       resolve_simple_move (smove, minsn);
964       src = reg;
965     }
966 
967   /* If we didn't have any big SUBREGS of decomposed registers, and
968      neither side of the move is a register we are decomposing, then
969      we don't have to do anything here.  */
970 
971   if (src == SET_SRC (set)
972       && dest == SET_DEST (set)
973       && !resolve_reg_p (src)
974       && !resolve_subreg_p (src)
975       && !resolve_reg_p (dest)
976       && !resolve_subreg_p (dest))
977     {
978       end_sequence ();
979       return insn;
980     }
981 
982   /* It's possible for the code to use a subreg of a decomposed
983      register while forming an address.  We need to handle that before
984      passing the address to emit_move_insn.  We pass NULL_RTX as the
985      insn parameter to resolve_subreg_use because we cannot validate
986      the insn yet.  */
987   if (MEM_P (src) || MEM_P (dest))
988     {
989       int acg;
990 
991       if (MEM_P (src))
992 	resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
993       if (MEM_P (dest))
994 	resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
995       acg = apply_change_group ();
996       gcc_assert (acg);
997     }
998 
999   /* If SRC is a register which we can't decompose, or has side
1000      effects, we need to move via a temporary register.  */
1001 
1002   if (!can_decompose_p (src)
1003       || side_effects_p (src)
1004       || GET_CODE (src) == ASM_OPERANDS)
1005     {
1006       rtx reg;
1007 
1008       reg = gen_reg_rtx (orig_mode);
1009 
1010       if (AUTO_INC_DEC)
1011 	{
1012 	  rtx_insn *move = emit_move_insn (reg, src);
1013 	  if (MEM_P (src))
1014 	    {
1015 	      rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1016 	      if (note)
1017 		add_reg_note (move, REG_INC, XEXP (note, 0));
1018 	    }
1019 	}
1020       else
1021 	emit_move_insn (reg, src);
1022 
1023       src = reg;
1024     }
1025 
1026   /* If DEST is a register which we can't decompose, or has side
1027      effects, we need to first move to a temporary register.  We
1028      handle the common case of pushing an operand directly.  We also
1029      go through a temporary register if it holds a floating point
1030      value.  This gives us better code on systems which can't move
1031      data easily between integer and floating point registers.  */
1032 
1033   dest_mode = orig_mode;
1034   pushing = push_operand (dest, dest_mode);
1035   if (!can_decompose_p (dest)
1036       || (side_effects_p (dest) && !pushing)
1037       || (!SCALAR_INT_MODE_P (dest_mode)
1038 	  && !resolve_reg_p (dest)
1039 	  && !resolve_subreg_p (dest)))
1040     {
1041       if (real_dest == NULL_RTX)
1042 	real_dest = dest;
1043       if (!SCALAR_INT_MODE_P (dest_mode))
1044 	dest_mode = int_mode_for_mode (dest_mode).require ();
1045       dest = gen_reg_rtx (dest_mode);
1046       if (REG_P (real_dest))
1047 	REG_ATTRS (dest) = REG_ATTRS (real_dest);
1048     }
1049 
1050   if (pushing)
1051     {
1052       unsigned int i, j, jinc;
1053 
1054       gcc_assert (orig_size % UNITS_PER_WORD == 0);
1055       gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1056       gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1057 
1058       if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1059 	{
1060 	  j = 0;
1061 	  jinc = 1;
1062 	}
1063       else
1064 	{
1065 	  j = words - 1;
1066 	  jinc = -1;
1067 	}
1068 
1069       for (i = 0; i < words; ++i, j += jinc)
1070 	{
1071 	  rtx temp;
1072 
1073 	  temp = copy_rtx (XEXP (dest, 0));
1074 	  temp = adjust_automodify_address_nv (dest, word_mode, temp,
1075 					       j * UNITS_PER_WORD);
1076 	  emit_move_insn (temp,
1077 			  simplify_gen_subreg_concatn (word_mode, src,
1078 						       orig_mode,
1079 						       j * UNITS_PER_WORD));
1080 	}
1081     }
1082   else
1083     {
1084       unsigned int i;
1085 
1086       if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1087 	emit_clobber (dest);
1088 
1089       for (i = 0; i < words; ++i)
1090 	{
1091 	  rtx t = simplify_gen_subreg_concatn (word_mode, dest,
1092 					       dest_mode,
1093 					       i * UNITS_PER_WORD);
1094 	  /* simplify_gen_subreg_concatn can return (const_int 0) for
1095 	     some sub-objects of paradoxical subregs.  As a source operand,
1096 	     that's fine.  As a destination it must be avoided.  Those are
1097 	     supposed to be don't care bits, so we can just drop that store
1098 	     on the floor.  */
1099 	  if (t != CONST0_RTX (word_mode))
1100 	    emit_move_insn (t,
1101 			    simplify_gen_subreg_concatn (word_mode, src,
1102 							 orig_mode,
1103 							 i * UNITS_PER_WORD));
1104 	}
1105     }
1106 
1107   if (real_dest != NULL_RTX)
1108     {
1109       rtx mdest, smove;
1110       rtx_insn *minsn;
1111 
1112       if (dest_mode == orig_mode)
1113 	mdest = dest;
1114       else
1115 	mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1116       minsn = emit_move_insn (real_dest, mdest);
1117 
1118   if (AUTO_INC_DEC && MEM_P (real_dest)
1119       && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1120     {
1121       rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1122       if (note)
1123 	add_reg_note (minsn, REG_INC, XEXP (note, 0));
1124     }
1125 
1126       smove = single_set (minsn);
1127       gcc_assert (smove != NULL_RTX);
1128 
1129       resolve_simple_move (smove, minsn);
1130     }
1131 
1132   insns = get_insns ();
1133   end_sequence ();
1134 
1135   copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1136 
1137   emit_insn_before (insns, insn);
1138 
1139   /* If we get here via self-recursion, then INSN is not yet in the insns
1140      chain and delete_insn will fail.  We only want to remove INSN from the
1141      current sequence.  See PR56738.  */
1142   if (in_sequence_p ())
1143     remove_insn (insn);
1144   else
1145     delete_insn (insn);
1146 
1147   return insns;
1148 }
1149 
1150 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1151    component registers.  Return whether we changed something.  */
1152 
1153 static bool
resolve_clobber(rtx pat,rtx_insn * insn)1154 resolve_clobber (rtx pat, rtx_insn *insn)
1155 {
1156   rtx reg;
1157   machine_mode orig_mode;
1158   unsigned int orig_size, words, i;
1159   int ret;
1160 
1161   reg = XEXP (pat, 0);
1162   /* For clobbers we can look through paradoxical subregs which
1163      we do not handle in simplify_gen_subreg_concatn.  */
1164   if (paradoxical_subreg_p (reg))
1165     reg = SUBREG_REG (reg);
1166   if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1167     return false;
1168 
1169   orig_mode = GET_MODE (reg);
1170   if (!interesting_mode_p (orig_mode, &orig_size, &words))
1171     gcc_unreachable ();
1172 
1173   ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1174 			 simplify_gen_subreg_concatn (word_mode, reg,
1175 						      orig_mode, 0),
1176 			 0);
1177   df_insn_rescan (insn);
1178   gcc_assert (ret != 0);
1179 
1180   for (i = words - 1; i > 0; --i)
1181     {
1182       rtx x;
1183 
1184       x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1185 				       i * UNITS_PER_WORD);
1186       x = gen_rtx_CLOBBER (VOIDmode, x);
1187       emit_insn_after (x, insn);
1188     }
1189 
1190   resolve_reg_notes (insn);
1191 
1192   return true;
1193 }
1194 
1195 /* A USE of a decomposed register is no longer meaningful.  Return
1196    whether we changed something.  */
1197 
1198 static bool
resolve_use(rtx pat,rtx_insn * insn)1199 resolve_use (rtx pat, rtx_insn *insn)
1200 {
1201   if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1202     {
1203       delete_insn (insn);
1204       return true;
1205     }
1206 
1207   resolve_reg_notes (insn);
1208 
1209   return false;
1210 }
1211 
1212 /* A VAR_LOCATION can be simplified.  */
1213 
1214 static void
resolve_debug(rtx_insn * insn)1215 resolve_debug (rtx_insn *insn)
1216 {
1217   subrtx_ptr_iterator::array_type array;
1218   FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1219     {
1220       rtx *loc = *iter;
1221       rtx x = *loc;
1222       if (resolve_subreg_p (x))
1223 	{
1224 	  x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1225 				       SUBREG_BYTE (x));
1226 
1227 	  if (x)
1228 	    *loc = x;
1229 	  else
1230 	    x = copy_rtx (*loc);
1231 	}
1232       if (resolve_reg_p (x))
1233 	*loc = copy_rtx (x);
1234     }
1235 
1236   df_insn_rescan (insn);
1237 
1238   resolve_reg_notes (insn);
1239 }
1240 
1241 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1242    set the decomposable_context bitmap accordingly.  SPEED_P is true
1243    if we are optimizing INSN for speed rather than size.  Return true
1244    if INSN is decomposable.  */
1245 
1246 static bool
find_decomposable_shift_zext(rtx_insn * insn,bool speed_p)1247 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1248 {
1249   rtx set;
1250   rtx op;
1251   rtx op_operand;
1252 
1253   set = single_set (insn);
1254   if (!set)
1255     return false;
1256 
1257   op = SET_SRC (set);
1258   if (GET_CODE (op) != ASHIFT
1259       && GET_CODE (op) != LSHIFTRT
1260       && GET_CODE (op) != ASHIFTRT
1261       && GET_CODE (op) != ZERO_EXTEND)
1262     return false;
1263 
1264   op_operand = XEXP (op, 0);
1265   if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1266       || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1267       || HARD_REGISTER_NUM_P (REGNO (op_operand))
1268       || GET_MODE (op) != twice_word_mode)
1269     return false;
1270 
1271   if (GET_CODE (op) == ZERO_EXTEND)
1272     {
1273       if (GET_MODE (op_operand) != word_mode
1274 	  || !choices[speed_p].splitting_zext)
1275 	return false;
1276     }
1277   else /* left or right shift */
1278     {
1279       bool *splitting = (GET_CODE (op) == ASHIFT
1280 			 ? choices[speed_p].splitting_ashift
1281 			 : GET_CODE (op) == ASHIFTRT
1282 			 ? choices[speed_p].splitting_ashiftrt
1283 			 : choices[speed_p].splitting_lshiftrt);
1284       if (!CONST_INT_P (XEXP (op, 1))
1285 	  || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1286 			2 * BITS_PER_WORD - 1)
1287 	  || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1288 	return false;
1289 
1290       bitmap_set_bit (decomposable_context, REGNO (op_operand));
1291     }
1292 
1293   bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1294 
1295   return true;
1296 }
1297 
1298 /* Decompose a more than word wide shift (in INSN) of a multiword
1299    pseudo or a multiword zero-extend of a wordmode pseudo into a move
1300    and 'set to zero' insn.  Return a pointer to the new insn when a
1301    replacement was done.  */
1302 
1303 static rtx_insn *
resolve_shift_zext(rtx_insn * insn)1304 resolve_shift_zext (rtx_insn *insn)
1305 {
1306   rtx set;
1307   rtx op;
1308   rtx op_operand;
1309   rtx_insn *insns;
1310   rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1311   int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1312   scalar_int_mode inner_mode;
1313 
1314   set = single_set (insn);
1315   if (!set)
1316     return NULL;
1317 
1318   op = SET_SRC (set);
1319   if (GET_CODE (op) != ASHIFT
1320       && GET_CODE (op) != LSHIFTRT
1321       && GET_CODE (op) != ASHIFTRT
1322       && GET_CODE (op) != ZERO_EXTEND)
1323     return NULL;
1324 
1325   op_operand = XEXP (op, 0);
1326   if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
1327     return NULL;
1328 
1329   /* We can tear this operation apart only if the regs were already
1330      torn apart.  */
1331   if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1332     return NULL;
1333 
1334   /* src_reg_num is the number of the word mode register which we
1335      are operating on.  For a left shift and a zero_extend on little
1336      endian machines this is register 0.  */
1337   src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1338 		? 1 : 0;
1339 
1340   if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
1341     src_reg_num = 1 - src_reg_num;
1342 
1343   if (GET_CODE (op) == ZERO_EXTEND)
1344     dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1345   else
1346     dest_reg_num = 1 - src_reg_num;
1347 
1348   offset1 = UNITS_PER_WORD * dest_reg_num;
1349   offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1350   src_offset = UNITS_PER_WORD * src_reg_num;
1351 
1352   start_sequence ();
1353 
1354   dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1355                                           GET_MODE (SET_DEST (set)),
1356                                           offset1);
1357   dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1358 					    GET_MODE (SET_DEST (set)),
1359 					    offset2);
1360   src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1361                                          GET_MODE (op_operand),
1362                                          src_offset);
1363   if (GET_CODE (op) == ASHIFTRT
1364       && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1365     upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1366 			      BITS_PER_WORD - 1, NULL_RTX, 0);
1367 
1368   if (GET_CODE (op) != ZERO_EXTEND)
1369     {
1370       int shift_count = INTVAL (XEXP (op, 1));
1371       if (shift_count > BITS_PER_WORD)
1372 	src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1373 				LSHIFT_EXPR : RSHIFT_EXPR,
1374 				word_mode, src_reg,
1375 				shift_count - BITS_PER_WORD,
1376 				dest_reg, GET_CODE (op) != ASHIFTRT);
1377     }
1378 
1379   if (dest_reg != src_reg)
1380     emit_move_insn (dest_reg, src_reg);
1381   if (GET_CODE (op) != ASHIFTRT)
1382     emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1383   else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1384     emit_move_insn (dest_upper, copy_rtx (src_reg));
1385   else
1386     emit_move_insn (dest_upper, upper_src);
1387   insns = get_insns ();
1388 
1389   end_sequence ();
1390 
1391   emit_insn_before (insns, insn);
1392 
1393   if (dump_file)
1394     {
1395       rtx_insn *in;
1396       fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1397       for (in = insns; in != insn; in = NEXT_INSN (in))
1398 	fprintf (dump_file, "%d ", INSN_UID (in));
1399       fprintf (dump_file, "\n");
1400     }
1401 
1402   delete_insn (insn);
1403   return insns;
1404 }
1405 
1406 /* Print to dump_file a description of what we're doing with shift code CODE.
1407    SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD.  */
1408 
1409 static void
dump_shift_choices(enum rtx_code code,bool * splitting)1410 dump_shift_choices (enum rtx_code code, bool *splitting)
1411 {
1412   int i;
1413   const char *sep;
1414 
1415   fprintf (dump_file,
1416 	   "  Splitting mode %s for %s lowering with shift amounts = ",
1417 	   GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1418   sep = "";
1419   for (i = 0; i < BITS_PER_WORD; i++)
1420     if (splitting[i])
1421       {
1422 	fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1423 	sep = ",";
1424       }
1425   fprintf (dump_file, "\n");
1426 }
1427 
1428 /* Print to dump_file a description of what we're doing when optimizing
1429    for speed or size; SPEED_P says which.  DESCRIPTION is a description
1430    of the SPEED_P choice.  */
1431 
1432 static void
dump_choices(bool speed_p,const char * description)1433 dump_choices (bool speed_p, const char *description)
1434 {
1435   unsigned int size, factor, i;
1436 
1437   fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1438 
1439   for (i = 0; i < MAX_MACHINE_MODE; i++)
1440     if (interesting_mode_p ((machine_mode) i, &size, &factor)
1441 	&& factor > 1)
1442       fprintf (dump_file, "  %s mode %s for copy lowering.\n",
1443 	       choices[speed_p].move_modes_to_split[i]
1444 	       ? "Splitting"
1445 	       : "Skipping",
1446 	       GET_MODE_NAME ((machine_mode) i));
1447 
1448   fprintf (dump_file, "  %s mode %s for zero_extend lowering.\n",
1449 	   choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1450 	   GET_MODE_NAME (twice_word_mode));
1451 
1452   dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1453   dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1454   dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1455   fprintf (dump_file, "\n");
1456 }
1457 
1458 /* Look for registers which are always accessed via word-sized SUBREGs
1459    or -if DECOMPOSE_COPIES is true- via copies.  Decompose these
1460    registers into several word-sized pseudo-registers.  */
1461 
1462 static void
decompose_multiword_subregs(bool decompose_copies)1463 decompose_multiword_subregs (bool decompose_copies)
1464 {
1465   unsigned int max;
1466   basic_block bb;
1467   bool speed_p;
1468 
1469   if (dump_file)
1470     {
1471       dump_choices (false, "size");
1472       dump_choices (true, "speed");
1473     }
1474 
1475   /* Check if this target even has any modes to consider lowering.   */
1476   if (!choices[false].something_to_do && !choices[true].something_to_do)
1477     {
1478       if (dump_file)
1479 	fprintf (dump_file, "Nothing to do!\n");
1480       return;
1481     }
1482 
1483   max = max_reg_num ();
1484 
1485   /* First see if there are any multi-word pseudo-registers.  If there
1486      aren't, there is nothing we can do.  This should speed up this
1487      pass in the normal case, since it should be faster than scanning
1488      all the insns.  */
1489   {
1490     unsigned int i;
1491     bool useful_modes_seen = false;
1492 
1493     for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1494       if (regno_reg_rtx[i] != NULL)
1495 	{
1496 	  machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1497 	  if (choices[false].move_modes_to_split[(int) mode]
1498 	      || choices[true].move_modes_to_split[(int) mode])
1499 	    {
1500 	      useful_modes_seen = true;
1501 	      break;
1502 	    }
1503 	}
1504 
1505     if (!useful_modes_seen)
1506       {
1507 	if (dump_file)
1508 	  fprintf (dump_file, "Nothing to lower in this function.\n");
1509 	return;
1510       }
1511   }
1512 
1513   if (df)
1514     {
1515       df_set_flags (DF_DEFER_INSN_RESCAN);
1516       run_word_dce ();
1517     }
1518 
1519   /* FIXME: It may be possible to change this code to look for each
1520      multi-word pseudo-register and to find each insn which sets or
1521      uses that register.  That should be faster than scanning all the
1522      insns.  */
1523 
1524   decomposable_context = BITMAP_ALLOC (NULL);
1525   non_decomposable_context = BITMAP_ALLOC (NULL);
1526   subreg_context = BITMAP_ALLOC (NULL);
1527 
1528   reg_copy_graph.create (max);
1529   reg_copy_graph.safe_grow_cleared (max, true);
1530   memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1531 
1532   speed_p = optimize_function_for_speed_p (cfun);
1533   FOR_EACH_BB_FN (bb, cfun)
1534     {
1535       rtx_insn *insn;
1536 
1537       FOR_BB_INSNS (bb, insn)
1538 	{
1539 	  rtx set;
1540 	  enum classify_move_insn cmi;
1541 	  int i, n;
1542 
1543 	  if (!INSN_P (insn)
1544 	      || GET_CODE (PATTERN (insn)) == CLOBBER
1545 	      || GET_CODE (PATTERN (insn)) == USE)
1546 	    continue;
1547 
1548 	  recog_memoized (insn);
1549 
1550 	  if (find_decomposable_shift_zext (insn, speed_p))
1551 	    continue;
1552 
1553 	  extract_insn (insn);
1554 
1555 	  set = simple_move (insn, speed_p);
1556 
1557 	  if (!set)
1558 	    cmi = NOT_SIMPLE_MOVE;
1559 	  else
1560 	    {
1561 	      /* We mark pseudo-to-pseudo copies as decomposable during the
1562 		 second pass only.  The first pass is so early that there is
1563 		 good chance such moves will be optimized away completely by
1564 		 subsequent optimizations anyway.
1565 
1566 		 However, we call find_pseudo_copy even during the first pass
1567 		 so as to properly set up the reg_copy_graph.  */
1568 	      if (find_pseudo_copy (set))
1569 		cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1570 	      else
1571 		cmi = SIMPLE_MOVE;
1572 	    }
1573 
1574 	  n = recog_data.n_operands;
1575 	  for (i = 0; i < n; ++i)
1576 	    {
1577 	      find_decomposable_subregs (&recog_data.operand[i], &cmi);
1578 
1579 	      /* We handle ASM_OPERANDS as a special case to support
1580 		 things like x86 rdtsc which returns a DImode value.
1581 		 We can decompose the output, which will certainly be
1582 		 operand 0, but not the inputs.  */
1583 
1584 	      if (cmi == SIMPLE_MOVE
1585 		  && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1586 		{
1587 		  gcc_assert (i == 0);
1588 		  cmi = NOT_SIMPLE_MOVE;
1589 		}
1590 	    }
1591 	}
1592     }
1593 
1594   bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1595   if (!bitmap_empty_p (decomposable_context))
1596     {
1597       unsigned int i;
1598       sbitmap_iterator sbi;
1599       bitmap_iterator iter;
1600       unsigned int regno;
1601 
1602       propagate_pseudo_copies ();
1603 
1604       auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1605       bitmap_clear (sub_blocks);
1606 
1607       EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1608 	decompose_register (regno);
1609 
1610       FOR_EACH_BB_FN (bb, cfun)
1611 	{
1612 	  rtx_insn *insn;
1613 
1614 	  FOR_BB_INSNS (bb, insn)
1615 	    {
1616 	      rtx pat;
1617 
1618 	      if (!INSN_P (insn))
1619 		continue;
1620 
1621 	      pat = PATTERN (insn);
1622 	      if (GET_CODE (pat) == CLOBBER)
1623 		resolve_clobber (pat, insn);
1624 	      else if (GET_CODE (pat) == USE)
1625 		resolve_use (pat, insn);
1626 	      else if (DEBUG_INSN_P (insn))
1627 		resolve_debug (insn);
1628 	      else
1629 		{
1630 		  rtx set;
1631 		  int i;
1632 
1633 		  recog_memoized (insn);
1634 		  extract_insn (insn);
1635 
1636 		  set = simple_move (insn, speed_p);
1637 		  if (set)
1638 		    {
1639 		      rtx_insn *orig_insn = insn;
1640 		      bool cfi = control_flow_insn_p (insn);
1641 
1642 		      /* We can end up splitting loads to multi-word pseudos
1643 			 into separate loads to machine word size pseudos.
1644 			 When this happens, we first had one load that can
1645 			 throw, and after resolve_simple_move we'll have a
1646 			 bunch of loads (at least two).  All those loads may
1647 			 trap if we can have non-call exceptions, so they
1648 			 all will end the current basic block.  We split the
1649 			 block after the outer loop over all insns, but we
1650 			 make sure here that we will be able to split the
1651 			 basic block and still produce the correct control
1652 			 flow graph for it.  */
1653 		      gcc_assert (!cfi
1654 				  || (cfun->can_throw_non_call_exceptions
1655 				      && can_throw_internal (insn)));
1656 
1657 		      insn = resolve_simple_move (set, insn);
1658 		      if (insn != orig_insn)
1659 			{
1660 			  recog_memoized (insn);
1661 			  extract_insn (insn);
1662 
1663 			  if (cfi)
1664 			    bitmap_set_bit (sub_blocks, bb->index);
1665 			}
1666 		    }
1667 		  else
1668 		    {
1669 		      rtx_insn *decomposed_shift;
1670 
1671 		      decomposed_shift = resolve_shift_zext (insn);
1672 		      if (decomposed_shift != NULL_RTX)
1673 			{
1674 			  insn = decomposed_shift;
1675 			  recog_memoized (insn);
1676 			  extract_insn (insn);
1677 			}
1678 		    }
1679 
1680 		  for (i = recog_data.n_operands - 1; i >= 0; --i)
1681 		    resolve_subreg_use (recog_data.operand_loc[i], insn);
1682 
1683 		  resolve_reg_notes (insn);
1684 
1685 		  if (num_validated_changes () > 0)
1686 		    {
1687 		      for (i = recog_data.n_dups - 1; i >= 0; --i)
1688 			{
1689 			  rtx *pl = recog_data.dup_loc[i];
1690 			  int dup_num = recog_data.dup_num[i];
1691 			  rtx *px = recog_data.operand_loc[dup_num];
1692 
1693 			  validate_unshare_change (insn, pl, *px, 1);
1694 			}
1695 
1696 		      i = apply_change_group ();
1697 		      gcc_assert (i);
1698 		    }
1699 		}
1700 	    }
1701 	}
1702 
1703       /* If we had insns to split that caused control flow insns in the middle
1704 	 of a basic block, split those blocks now.  Note that we only handle
1705 	 the case where splitting a load has caused multiple possibly trapping
1706 	 loads to appear.  */
1707       EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1708 	{
1709 	  rtx_insn *insn, *end;
1710 	  edge fallthru;
1711 
1712 	  bb = BASIC_BLOCK_FOR_FN (cfun, i);
1713 	  insn = BB_HEAD (bb);
1714 	  end = BB_END (bb);
1715 
1716 	  while (insn != end)
1717 	    {
1718 	      if (control_flow_insn_p (insn))
1719 		{
1720 		  /* Split the block after insn.  There will be a fallthru
1721 		     edge, which is OK so we keep it.  We have to create the
1722 		     exception edges ourselves.  */
1723 		  fallthru = split_block (bb, insn);
1724 		  rtl_make_eh_edge (NULL, bb, BB_END (bb));
1725 		  bb = fallthru->dest;
1726 		  insn = BB_HEAD (bb);
1727 		}
1728 	      else
1729 	        insn = NEXT_INSN (insn);
1730 	    }
1731 	}
1732     }
1733 
1734   {
1735     unsigned int i;
1736     bitmap b;
1737 
1738     FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1739       if (b)
1740 	BITMAP_FREE (b);
1741   }
1742 
1743   reg_copy_graph.release ();
1744 
1745   BITMAP_FREE (decomposable_context);
1746   BITMAP_FREE (non_decomposable_context);
1747   BITMAP_FREE (subreg_context);
1748 }
1749 
1750 /* Implement first lower subreg pass.  */
1751 
1752 namespace {
1753 
1754 const pass_data pass_data_lower_subreg =
1755 {
1756   RTL_PASS, /* type */
1757   "subreg1", /* name */
1758   OPTGROUP_NONE, /* optinfo_flags */
1759   TV_LOWER_SUBREG, /* tv_id */
1760   0, /* properties_required */
1761   0, /* properties_provided */
1762   0, /* properties_destroyed */
1763   0, /* todo_flags_start */
1764   0, /* todo_flags_finish */
1765 };
1766 
1767 class pass_lower_subreg : public rtl_opt_pass
1768 {
1769 public:
pass_lower_subreg(gcc::context * ctxt)1770   pass_lower_subreg (gcc::context *ctxt)
1771     : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1772   {}
1773 
1774   /* opt_pass methods: */
gate(function *)1775   virtual bool gate (function *) { return flag_split_wide_types != 0; }
execute(function *)1776   virtual unsigned int execute (function *)
1777     {
1778       decompose_multiword_subregs (false);
1779       return 0;
1780     }
1781 
1782 }; // class pass_lower_subreg
1783 
1784 } // anon namespace
1785 
1786 rtl_opt_pass *
make_pass_lower_subreg(gcc::context * ctxt)1787 make_pass_lower_subreg (gcc::context *ctxt)
1788 {
1789   return new pass_lower_subreg (ctxt);
1790 }
1791 
1792 /* Implement second lower subreg pass.  */
1793 
1794 namespace {
1795 
1796 const pass_data pass_data_lower_subreg2 =
1797 {
1798   RTL_PASS, /* type */
1799   "subreg2", /* name */
1800   OPTGROUP_NONE, /* optinfo_flags */
1801   TV_LOWER_SUBREG, /* tv_id */
1802   0, /* properties_required */
1803   0, /* properties_provided */
1804   0, /* properties_destroyed */
1805   0, /* todo_flags_start */
1806   TODO_df_finish, /* todo_flags_finish */
1807 };
1808 
1809 class pass_lower_subreg2 : public rtl_opt_pass
1810 {
1811 public:
pass_lower_subreg2(gcc::context * ctxt)1812   pass_lower_subreg2 (gcc::context *ctxt)
1813     : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1814   {}
1815 
1816   /* opt_pass methods: */
gate(function *)1817   virtual bool gate (function *) { return flag_split_wide_types
1818 					  && flag_split_wide_types_early; }
execute(function *)1819   virtual unsigned int execute (function *)
1820     {
1821       decompose_multiword_subregs (true);
1822       return 0;
1823     }
1824 
1825 }; // class pass_lower_subreg2
1826 
1827 } // anon namespace
1828 
1829 rtl_opt_pass *
make_pass_lower_subreg2(gcc::context * ctxt)1830 make_pass_lower_subreg2 (gcc::context *ctxt)
1831 {
1832   return new pass_lower_subreg2 (ctxt);
1833 }
1834 
1835 /* Implement third lower subreg pass.  */
1836 
1837 namespace {
1838 
1839 const pass_data pass_data_lower_subreg3 =
1840 {
1841   RTL_PASS, /* type */
1842   "subreg3", /* name */
1843   OPTGROUP_NONE, /* optinfo_flags */
1844   TV_LOWER_SUBREG, /* tv_id */
1845   0, /* properties_required */
1846   0, /* properties_provided */
1847   0, /* properties_destroyed */
1848   0, /* todo_flags_start */
1849   TODO_df_finish, /* todo_flags_finish */
1850 };
1851 
1852 class pass_lower_subreg3 : public rtl_opt_pass
1853 {
1854 public:
pass_lower_subreg3(gcc::context * ctxt)1855   pass_lower_subreg3 (gcc::context *ctxt)
1856     : rtl_opt_pass (pass_data_lower_subreg3, ctxt)
1857   {}
1858 
1859   /* opt_pass methods: */
gate(function *)1860   virtual bool gate (function *) { return flag_split_wide_types; }
execute(function *)1861   virtual unsigned int execute (function *)
1862     {
1863       decompose_multiword_subregs (true);
1864       return 0;
1865     }
1866 
1867 }; // class pass_lower_subreg3
1868 
1869 } // anon namespace
1870 
1871 rtl_opt_pass *
make_pass_lower_subreg3(gcc::context * ctxt)1872 make_pass_lower_subreg3 (gcc::context *ctxt)
1873 {
1874   return new pass_lower_subreg3 (ctxt);
1875 }
1876