1 /* Machine description for AArch64 architecture.
2    Copyright (C) 2009-2013 Free Software Foundation, Inc.
3    Contributed by ARM Ltd.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    GCC is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "regs.h"
30 #include "df.h"
31 #include "hard-reg-set.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "reload.h"
35 #include "toplev.h"
36 #include "target.h"
37 #include "target-def.h"
38 #include "targhooks.h"
39 #include "ggc.h"
40 #include "function.h"
41 #include "tm_p.h"
42 #include "recog.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
45 #include "gimple.h"
46 #include "optabs.h"
47 #include "dwarf2.h"
48 
49 /* Classifies an address.
50 
51    ADDRESS_REG_IMM
52        A simple base register plus immediate offset.
53 
54    ADDRESS_REG_WB
55        A base register indexed by immediate offset with writeback.
56 
57    ADDRESS_REG_REG
58        A base register indexed by (optionally scaled) register.
59 
60    ADDRESS_REG_UXTW
61        A base register indexed by (optionally scaled) zero-extended register.
62 
63    ADDRESS_REG_SXTW
64        A base register indexed by (optionally scaled) sign-extended register.
65 
66    ADDRESS_LO_SUM
67        A LO_SUM rtx with a base register and "LO12" symbol relocation.
68 
69    ADDRESS_SYMBOLIC:
70        A constant symbolic address, in pc-relative literal pool.  */
71 
72 enum aarch64_address_type {
73   ADDRESS_REG_IMM,
74   ADDRESS_REG_WB,
75   ADDRESS_REG_REG,
76   ADDRESS_REG_UXTW,
77   ADDRESS_REG_SXTW,
78   ADDRESS_LO_SUM,
79   ADDRESS_SYMBOLIC
80 };
81 
82 struct aarch64_address_info {
83   enum aarch64_address_type type;
84   rtx base;
85   rtx offset;
86   int shift;
87   enum aarch64_symbol_type symbol_type;
88 };
89 
90 /* The current code model.  */
91 enum aarch64_code_model aarch64_cmodel;
92 
93 #ifdef HAVE_AS_TLS
94 #undef TARGET_HAVE_TLS
95 #define TARGET_HAVE_TLS 1
96 #endif
97 
98 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
99 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
100 						     const_tree,
101 						     enum machine_mode *, int *,
102 						     bool *);
103 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
104 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
105 static void aarch64_override_options_after_change (void);
106 static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
107 					 int *, unsigned char *, int *, int *);
108 static bool aarch64_vector_mode_supported_p (enum machine_mode);
109 static unsigned bit_count (unsigned HOST_WIDE_INT);
110 static bool aarch64_const_vec_all_same_int_p (rtx,
111 					      HOST_WIDE_INT, HOST_WIDE_INT);
112 
113 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
114 						 const unsigned char *sel);
115 
116 /* The processor for which instructions should be scheduled.  */
117 enum aarch64_processor aarch64_tune = generic;
118 
119 /* The current tuning set.  */
120 const struct tune_params *aarch64_tune_params;
121 
122 /* Mask to specify which instructions we are allowed to generate.  */
123 unsigned long aarch64_isa_flags = 0;
124 
125 /* Mask to specify which instruction scheduling options should be used.  */
126 unsigned long aarch64_tune_flags = 0;
127 
128 /* Tuning parameters.  */
129 
130 #if HAVE_DESIGNATED_INITIALIZERS
131 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
132 #else
133 #define NAMED_PARAM(NAME, VAL) (VAL)
134 #endif
135 
136 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
137 __extension__
138 #endif
139 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
140 {
141   NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
142   NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
143   NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
144   NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
145   NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
146   NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
147   NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
148   NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
149   NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
150   NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
151   NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
152   NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
153 };
154 
155 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
156 __extension__
157 #endif
158 static const struct cpu_addrcost_table generic_addrcost_table =
159 {
160   NAMED_PARAM (pre_modify, 0),
161   NAMED_PARAM (post_modify, 0),
162   NAMED_PARAM (register_offset, 0),
163   NAMED_PARAM (register_extend, 0),
164   NAMED_PARAM (imm_offset, 0)
165 };
166 
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168 __extension__
169 #endif
170 static const struct cpu_regmove_cost generic_regmove_cost =
171 {
172   NAMED_PARAM (GP2GP, 1),
173   NAMED_PARAM (GP2FP, 2),
174   NAMED_PARAM (FP2GP, 2),
175   /* We currently do not provide direct support for TFmode Q->Q move.
176      Therefore we need to raise the cost above 2 in order to have
177      reload handle the situation.  */
178   NAMED_PARAM (FP2FP, 4)
179 };
180 
181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182 __extension__
183 #endif
184 static const struct tune_params generic_tunings =
185 {
186   &generic_rtx_cost_table,
187   &generic_addrcost_table,
188   &generic_regmove_cost,
189   NAMED_PARAM (memmov_cost, 4)
190 };
191 
192 /* A processor implementing AArch64.  */
193 struct processor
194 {
195   const char *const name;
196   enum aarch64_processor core;
197   const char *arch;
198   const unsigned long flags;
199   const struct tune_params *const tune;
200 };
201 
202 /* Processor cores implementing AArch64.  */
203 static const struct processor all_cores[] =
204 {
205 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
206   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
207 #include "aarch64-cores.def"
208 #undef AARCH64_CORE
209   {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
210   {NULL, aarch64_none, NULL, 0, NULL}
211 };
212 
213 /* Architectures implementing AArch64.  */
214 static const struct processor all_architectures[] =
215 {
216 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
217   {NAME, CORE, #ARCH, FLAGS, NULL},
218 #include "aarch64-arches.def"
219 #undef AARCH64_ARCH
220   {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
221   {NULL, aarch64_none, NULL, 0, NULL}
222 };
223 
224 /* Target specification.  These are populated as commandline arguments
225    are processed, or NULL if not specified.  */
226 static const struct processor *selected_arch;
227 static const struct processor *selected_cpu;
228 static const struct processor *selected_tune;
229 
230 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
231 
232 /* An ISA extension in the co-processor and main instruction set space.  */
233 struct aarch64_option_extension
234 {
235   const char *const name;
236   const unsigned long flags_on;
237   const unsigned long flags_off;
238 };
239 
240 /* ISA extensions in AArch64.  */
241 static const struct aarch64_option_extension all_extensions[] =
242 {
243 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
244   {NAME, FLAGS_ON, FLAGS_OFF},
245 #include "aarch64-option-extensions.def"
246 #undef AARCH64_OPT_EXTENSION
247   {NULL, 0, 0}
248 };
249 
250 /* Used to track the size of an address when generating a pre/post
251    increment address.  */
252 static enum machine_mode aarch64_memory_reference_mode;
253 
254 /* Used to force GTY into this file.  */
255 static GTY(()) int gty_dummy;
256 
257 /* A table of valid AArch64 "bitmask immediate" values for
258    logical instructions.  */
259 
260 #define AARCH64_NUM_BITMASKS  5334
261 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
262 
263 /* Did we set flag_omit_frame_pointer just so
264    aarch64_frame_pointer_required would be called? */
265 static bool faked_omit_frame_pointer;
266 
267 typedef enum aarch64_cond_code
268 {
269   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
270   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
271   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
272 }
273 aarch64_cc;
274 
275 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
276 
277 /* The condition codes of the processor, and the inverse function.  */
278 static const char * const aarch64_condition_codes[] =
279 {
280   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
281   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
282 };
283 
284 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
285 unsigned
aarch64_dbx_register_number(unsigned regno)286 aarch64_dbx_register_number (unsigned regno)
287 {
288    if (GP_REGNUM_P (regno))
289      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
290    else if (regno == SP_REGNUM)
291      return AARCH64_DWARF_SP;
292    else if (FP_REGNUM_P (regno))
293      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
294 
295    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
296       equivalent DWARF register.  */
297    return DWARF_FRAME_REGISTERS;
298 }
299 
300 /* Return TRUE if MODE is any of the large INT modes.  */
301 static bool
aarch64_vect_struct_mode_p(enum machine_mode mode)302 aarch64_vect_struct_mode_p (enum machine_mode mode)
303 {
304   return mode == OImode || mode == CImode || mode == XImode;
305 }
306 
307 /* Return TRUE if MODE is any of the vector modes.  */
308 static bool
aarch64_vector_mode_p(enum machine_mode mode)309 aarch64_vector_mode_p (enum machine_mode mode)
310 {
311   return aarch64_vector_mode_supported_p (mode)
312 	 || aarch64_vect_struct_mode_p (mode);
313 }
314 
315 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
316 static bool
aarch64_array_mode_supported_p(enum machine_mode mode,unsigned HOST_WIDE_INT nelems)317 aarch64_array_mode_supported_p (enum machine_mode mode,
318 				unsigned HOST_WIDE_INT nelems)
319 {
320   if (TARGET_SIMD
321       && AARCH64_VALID_SIMD_QREG_MODE (mode)
322       && (nelems >= 2 && nelems <= 4))
323     return true;
324 
325   return false;
326 }
327 
328 /* Implement HARD_REGNO_NREGS.  */
329 
330 int
aarch64_hard_regno_nregs(unsigned regno,enum machine_mode mode)331 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
332 {
333   switch (aarch64_regno_regclass (regno))
334     {
335     case FP_REGS:
336     case FP_LO_REGS:
337       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
338     default:
339       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
340     }
341   gcc_unreachable ();
342 }
343 
344 /* Implement HARD_REGNO_MODE_OK.  */
345 
346 int
aarch64_hard_regno_mode_ok(unsigned regno,enum machine_mode mode)347 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
348 {
349   if (GET_MODE_CLASS (mode) == MODE_CC)
350     return regno == CC_REGNUM;
351 
352   if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
353       || regno == ARG_POINTER_REGNUM)
354     return mode == Pmode;
355 
356   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
357     return 1;
358 
359   if (FP_REGNUM_P (regno))
360     {
361       if (aarch64_vect_struct_mode_p (mode))
362 	return
363 	  (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
364       else
365 	return 1;
366     }
367 
368   return 0;
369 }
370 
371 /* Return true if calls to DECL should be treated as
372    long-calls (ie called via a register).  */
373 static bool
aarch64_decl_is_long_call_p(const_tree decl ATTRIBUTE_UNUSED)374 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
375 {
376   return false;
377 }
378 
379 /* Return true if calls to symbol-ref SYM should be treated as
380    long-calls (ie called via a register).  */
381 bool
aarch64_is_long_call_p(rtx sym)382 aarch64_is_long_call_p (rtx sym)
383 {
384   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
385 }
386 
387 /* Return true if the offsets to a zero/sign-extract operation
388    represent an expression that matches an extend operation.  The
389    operands represent the paramters from
390 
391    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
392 bool
aarch64_is_extend_from_extract(enum machine_mode mode,rtx mult_imm,rtx extract_imm)393 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
394 				rtx extract_imm)
395 {
396   HOST_WIDE_INT mult_val, extract_val;
397 
398   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
399     return false;
400 
401   mult_val = INTVAL (mult_imm);
402   extract_val = INTVAL (extract_imm);
403 
404   if (extract_val > 8
405       && extract_val < GET_MODE_BITSIZE (mode)
406       && exact_log2 (extract_val & ~7) > 0
407       && (extract_val & 7) <= 4
408       && mult_val == (1 << (extract_val & 7)))
409     return true;
410 
411   return false;
412 }
413 
414 /* Emit an insn that's a simple single-set.  Both the operands must be
415    known to be valid.  */
416 inline static rtx
emit_set_insn(rtx x,rtx y)417 emit_set_insn (rtx x, rtx y)
418 {
419   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
420 }
421 
422 /* X and Y are two things to compare using CODE.  Emit the compare insn and
423    return the rtx for register 0 in the proper mode.  */
424 rtx
aarch64_gen_compare_reg(RTX_CODE code,rtx x,rtx y)425 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
426 {
427   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
428   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
429 
430   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
431   return cc_reg;
432 }
433 
434 /* Build the SYMBOL_REF for __tls_get_addr.  */
435 
436 static GTY(()) rtx tls_get_addr_libfunc;
437 
438 rtx
aarch64_tls_get_addr(void)439 aarch64_tls_get_addr (void)
440 {
441   if (!tls_get_addr_libfunc)
442     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
443   return tls_get_addr_libfunc;
444 }
445 
446 /* Return the TLS model to use for ADDR.  */
447 
448 static enum tls_model
tls_symbolic_operand_type(rtx addr)449 tls_symbolic_operand_type (rtx addr)
450 {
451   enum tls_model tls_kind = TLS_MODEL_NONE;
452   rtx sym, addend;
453 
454   if (GET_CODE (addr) == CONST)
455     {
456       split_const (addr, &sym, &addend);
457       if (GET_CODE (sym) == SYMBOL_REF)
458 	tls_kind = SYMBOL_REF_TLS_MODEL (sym);
459     }
460   else if (GET_CODE (addr) == SYMBOL_REF)
461     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
462 
463   return tls_kind;
464 }
465 
466 /* We'll allow lo_sum's in addresses in our legitimate addresses
467    so that combine would take care of combining addresses where
468    necessary, but for generation purposes, we'll generate the address
469    as :
470    RTL                               Absolute
471    tmp = hi (symbol_ref);            adrp  x1, foo
472    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
473                                      nop
474 
475    PIC                               TLS
476    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
477    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
478                                      bl   __tls_get_addr
479                                      nop
480 
481    Load TLS symbol, depending on TLS mechanism and TLS access model.
482 
483    Global Dynamic - Traditional TLS:
484    adrp tmp, :tlsgd:imm
485    add  dest, tmp, #:tlsgd_lo12:imm
486    bl   __tls_get_addr
487 
488    Global Dynamic - TLS Descriptors:
489    adrp dest, :tlsdesc:imm
490    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
491    add  dest, dest, #:tlsdesc_lo12:imm
492    blr  tmp
493    mrs  tp, tpidr_el0
494    add  dest, dest, tp
495 
496    Initial Exec:
497    mrs  tp, tpidr_el0
498    adrp tmp, :gottprel:imm
499    ldr  dest, [tmp, #:gottprel_lo12:imm]
500    add  dest, dest, tp
501 
502    Local Exec:
503    mrs  tp, tpidr_el0
504    add  t0, tp, #:tprel_hi12:imm
505    add  t0, #:tprel_lo12_nc:imm
506 */
507 
508 static void
aarch64_load_symref_appropriately(rtx dest,rtx imm,enum aarch64_symbol_type type)509 aarch64_load_symref_appropriately (rtx dest, rtx imm,
510 				   enum aarch64_symbol_type type)
511 {
512   switch (type)
513     {
514     case SYMBOL_SMALL_ABSOLUTE:
515       {
516 	rtx tmp_reg = dest;
517 	if (can_create_pseudo_p ())
518 	  {
519 	    tmp_reg =  gen_reg_rtx (Pmode);
520 	  }
521 
522 	emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
523 	emit_insn (gen_add_losym (dest, tmp_reg, imm));
524 	return;
525       }
526 
527     case SYMBOL_SMALL_GOT:
528       {
529 	rtx tmp_reg = dest;
530 	if (can_create_pseudo_p ())
531 	  {
532 	    tmp_reg =  gen_reg_rtx (Pmode);
533 	  }
534 	emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
535 	emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
536 	return;
537       }
538 
539     case SYMBOL_SMALL_TLSGD:
540       {
541 	rtx insns;
542 	rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
543 
544 	start_sequence ();
545 	emit_call_insn (gen_tlsgd_small (result, imm));
546 	insns = get_insns ();
547 	end_sequence ();
548 
549 	RTL_CONST_CALL_P (insns) = 1;
550 	emit_libcall_block (insns, dest, result, imm);
551 	return;
552       }
553 
554     case SYMBOL_SMALL_TLSDESC:
555       {
556 	rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
557 	rtx tp;
558 
559 	emit_insn (gen_tlsdesc_small (imm));
560 	tp = aarch64_load_tp (NULL);
561 	emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
562 	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
563 	return;
564       }
565 
566     case SYMBOL_SMALL_GOTTPREL:
567       {
568 	rtx tmp_reg = gen_reg_rtx (Pmode);
569 	rtx tp = aarch64_load_tp (NULL);
570 	emit_insn (gen_tlsie_small (tmp_reg, imm));
571 	emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
572 	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
573 	return;
574       }
575 
576     case SYMBOL_SMALL_TPREL:
577       {
578 	rtx tp = aarch64_load_tp (NULL);
579 	emit_insn (gen_tlsle_small (dest, tp, imm));
580 	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
581 	return;
582       }
583 
584     default:
585       gcc_unreachable ();
586     }
587 }
588 
589 /* Emit a move from SRC to DEST.  Assume that the move expanders can
590    handle all moves if !can_create_pseudo_p ().  The distinction is
591    important because, unlike emit_move_insn, the move expanders know
592    how to force Pmode objects into the constant pool even when the
593    constant pool address is not itself legitimate.  */
594 static rtx
aarch64_emit_move(rtx dest,rtx src)595 aarch64_emit_move (rtx dest, rtx src)
596 {
597   return (can_create_pseudo_p ()
598 	  ? emit_move_insn (dest, src)
599 	  : emit_move_insn_1 (dest, src));
600 }
601 
602 void
aarch64_split_128bit_move(rtx dst,rtx src)603 aarch64_split_128bit_move (rtx dst, rtx src)
604 {
605   rtx low_dst;
606 
607   gcc_assert (GET_MODE (dst) == TImode);
608 
609   if (REG_P (dst) && REG_P (src))
610     {
611       int src_regno = REGNO (src);
612       int dst_regno = REGNO (dst);
613 
614       gcc_assert (GET_MODE (src) == TImode);
615 
616       /* Handle r -> w, w -> r.  */
617       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
618 	{
619 	  emit_insn (gen_aarch64_movtilow_di (dst,
620 					      gen_lowpart (word_mode, src)));
621 	  emit_insn (gen_aarch64_movtihigh_di (dst,
622 					       gen_highpart (word_mode, src)));
623 	  return;
624 	}
625       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
626 	{
627 	  emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
628 					      src));
629 	  emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
630 					       src));
631 	  return;
632 	}
633       /* Fall through to r -> r cases.  */
634     }
635 
636   low_dst = gen_lowpart (word_mode, dst);
637   if (REG_P (low_dst)
638       && reg_overlap_mentioned_p (low_dst, src))
639     {
640       aarch64_emit_move (gen_highpart (word_mode, dst),
641 			 gen_highpart_mode (word_mode, TImode, src));
642       aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
643     }
644   else
645     {
646       aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
647       aarch64_emit_move (gen_highpart (word_mode, dst),
648 			 gen_highpart_mode (word_mode, TImode, src));
649     }
650 }
651 
652 bool
aarch64_split_128bit_move_p(rtx dst,rtx src)653 aarch64_split_128bit_move_p (rtx dst, rtx src)
654 {
655   return (! REG_P (src)
656 	  || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
657 }
658 
659 static rtx
aarch64_force_temporary(rtx x,rtx value)660 aarch64_force_temporary (rtx x, rtx value)
661 {
662   if (can_create_pseudo_p ())
663     return force_reg (Pmode, value);
664   else
665     {
666       x = aarch64_emit_move (x, value);
667       return x;
668     }
669 }
670 
671 
672 static rtx
aarch64_add_offset(enum machine_mode mode,rtx temp,rtx reg,HOST_WIDE_INT offset)673 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
674 {
675   if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
676     {
677       rtx high;
678       /* Load the full offset into a register.  This
679          might be improvable in the future.  */
680       high = GEN_INT (offset);
681       offset = 0;
682       high = aarch64_force_temporary (temp, high);
683       reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
684     }
685   return plus_constant (mode, reg, offset);
686 }
687 
688 void
aarch64_expand_mov_immediate(rtx dest,rtx imm)689 aarch64_expand_mov_immediate (rtx dest, rtx imm)
690 {
691   enum machine_mode mode = GET_MODE (dest);
692   unsigned HOST_WIDE_INT mask;
693   int i;
694   bool first;
695   unsigned HOST_WIDE_INT val;
696   bool subtargets;
697   rtx subtarget;
698   int one_match, zero_match;
699 
700   gcc_assert (mode == SImode || mode == DImode);
701 
702   /* Check on what type of symbol it is.  */
703   if (GET_CODE (imm) == SYMBOL_REF
704       || GET_CODE (imm) == LABEL_REF
705       || GET_CODE (imm) == CONST)
706     {
707       rtx mem, base, offset;
708       enum aarch64_symbol_type sty;
709 
710       /* If we have (const (plus symbol offset)), separate out the offset
711 	 before we start classifying the symbol.  */
712       split_const (imm, &base, &offset);
713 
714       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
715       switch (sty)
716 	{
717 	case SYMBOL_FORCE_TO_MEM:
718 	  if (offset != const0_rtx
719 	      && targetm.cannot_force_const_mem (mode, imm))
720 	    {
721 	      gcc_assert(can_create_pseudo_p ());
722 	      base = aarch64_force_temporary (dest, base);
723 	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
724 	      aarch64_emit_move (dest, base);
725 	      return;
726 	    }
727 	  mem = force_const_mem (mode, imm);
728 	  gcc_assert (mem);
729 	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
730 	  return;
731 
732         case SYMBOL_SMALL_TLSGD:
733         case SYMBOL_SMALL_TLSDESC:
734         case SYMBOL_SMALL_GOTTPREL:
735 	case SYMBOL_SMALL_GOT:
736 	  if (offset != const0_rtx)
737 	    {
738 	      gcc_assert(can_create_pseudo_p ());
739 	      base = aarch64_force_temporary (dest, base);
740 	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
741 	      aarch64_emit_move (dest, base);
742 	      return;
743 	    }
744 	  /* FALLTHRU */
745 
746         case SYMBOL_SMALL_TPREL:
747 	case SYMBOL_SMALL_ABSOLUTE:
748 	  aarch64_load_symref_appropriately (dest, imm, sty);
749 	  return;
750 
751 	default:
752 	  gcc_unreachable ();
753 	}
754     }
755 
756   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
757     {
758       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
759       return;
760     }
761 
762   if (!CONST_INT_P (imm))
763     {
764       if (GET_CODE (imm) == HIGH)
765 	emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
766       else
767         {
768 	  rtx mem = force_const_mem (mode, imm);
769 	  gcc_assert (mem);
770 	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
771 	}
772 
773       return;
774     }
775 
776   if (mode == SImode)
777     {
778       /* We know we can't do this in 1 insn, and we must be able to do it
779 	 in two; so don't mess around looking for sequences that don't buy
780 	 us anything.  */
781       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
782       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
783 				 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
784       return;
785     }
786 
787   /* Remaining cases are all for DImode.  */
788 
789   val = INTVAL (imm);
790   subtargets = optimize && can_create_pseudo_p ();
791 
792   one_match = 0;
793   zero_match = 0;
794   mask = 0xffff;
795 
796   for (i = 0; i < 64; i += 16, mask <<= 16)
797     {
798       if ((val & mask) == 0)
799 	zero_match++;
800       else if ((val & mask) == mask)
801 	one_match++;
802     }
803 
804   if (one_match == 2)
805     {
806       mask = 0xffff;
807       for (i = 0; i < 64; i += 16, mask <<= 16)
808 	{
809 	  if ((val & mask) != mask)
810 	    {
811 	      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
812 	      emit_insn (gen_insv_immdi (dest, GEN_INT (i),
813 					 GEN_INT ((val >> i) & 0xffff)));
814 	      return;
815 	    }
816 	}
817       gcc_unreachable ();
818     }
819 
820   if (zero_match == 2)
821     goto simple_sequence;
822 
823   mask = 0x0ffff0000UL;
824   for (i = 16; i < 64; i += 16, mask <<= 16)
825     {
826       HOST_WIDE_INT comp = mask & ~(mask - 1);
827 
828       if (aarch64_uimm12_shift (val - (val & mask)))
829 	{
830 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
831 
832 	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
833 	  emit_insn (gen_adddi3 (dest, subtarget,
834 				 GEN_INT (val - (val & mask))));
835 	  return;
836 	}
837       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
838 	{
839 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
840 
841 	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
842 				  GEN_INT ((val + comp) & mask)));
843 	  emit_insn (gen_adddi3 (dest, subtarget,
844 				 GEN_INT (val - ((val + comp) & mask))));
845 	  return;
846 	}
847       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
848 	{
849 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
850 
851 	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
852 				  GEN_INT ((val - comp) | ~mask)));
853 	  emit_insn (gen_adddi3 (dest, subtarget,
854 				 GEN_INT (val - ((val - comp) | ~mask))));
855 	  return;
856 	}
857       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
858 	{
859 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
860 
861 	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
862 				  GEN_INT (val | ~mask)));
863 	  emit_insn (gen_adddi3 (dest, subtarget,
864 				 GEN_INT (val - (val | ~mask))));
865 	  return;
866 	}
867     }
868 
869   /* See if we can do it by arithmetically combining two
870      immediates.  */
871   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
872     {
873       int j;
874       mask = 0xffff;
875 
876       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
877 	  || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
878 	{
879 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
880 	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
881 				  GEN_INT (aarch64_bitmasks[i])));
882 	  emit_insn (gen_adddi3 (dest, subtarget,
883 				 GEN_INT (val - aarch64_bitmasks[i])));
884 	  return;
885 	}
886 
887       for (j = 0; j < 64; j += 16, mask <<= 16)
888 	{
889 	  if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
890 	    {
891 	      emit_insn (gen_rtx_SET (VOIDmode, dest,
892 				      GEN_INT (aarch64_bitmasks[i])));
893 	      emit_insn (gen_insv_immdi (dest, GEN_INT (j),
894 					 GEN_INT ((val >> j) & 0xffff)));
895 	      return;
896 	    }
897 	}
898     }
899 
900   /* See if we can do it by logically combining two immediates.  */
901   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
902     {
903       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
904 	{
905 	  int j;
906 
907 	  for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
908 	    if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
909 	      {
910 		subtarget = subtargets ? gen_reg_rtx (mode) : dest;
911 		emit_insn (gen_rtx_SET (VOIDmode, subtarget,
912 					GEN_INT (aarch64_bitmasks[i])));
913 		emit_insn (gen_iordi3 (dest, subtarget,
914 				       GEN_INT (aarch64_bitmasks[j])));
915 		return;
916 	      }
917 	}
918       else if ((val & aarch64_bitmasks[i]) == val)
919 	{
920 	  int j;
921 
922 	  for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
923 	    if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
924 	      {
925 
926 		subtarget = subtargets ? gen_reg_rtx (mode) : dest;
927 		emit_insn (gen_rtx_SET (VOIDmode, subtarget,
928 					GEN_INT (aarch64_bitmasks[j])));
929 		emit_insn (gen_anddi3 (dest, subtarget,
930 				       GEN_INT (aarch64_bitmasks[i])));
931 		return;
932 	      }
933 	}
934     }
935 
936  simple_sequence:
937   first = true;
938   mask = 0xffff;
939   for (i = 0; i < 64; i += 16, mask <<= 16)
940     {
941       if ((val & mask) != 0)
942 	{
943 	  if (first)
944 	    {
945 	      emit_insn (gen_rtx_SET (VOIDmode, dest,
946 				      GEN_INT (val & mask)));
947 	      first = false;
948 	    }
949 	  else
950 	    emit_insn (gen_insv_immdi (dest, GEN_INT (i),
951 				       GEN_INT ((val >> i) & 0xffff)));
952 	}
953     }
954 }
955 
956 static bool
aarch64_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)957 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
958 {
959   /* Indirect calls are not currently supported.  */
960   if (decl == NULL)
961     return false;
962 
963   /* Cannot tail-call to long-calls, since these are outside of the
964      range of a branch instruction (we could handle this if we added
965      support for indirect tail-calls.  */
966   if (aarch64_decl_is_long_call_p (decl))
967     return false;
968 
969   return true;
970 }
971 
972 /* Implement TARGET_PASS_BY_REFERENCE.  */
973 
974 static bool
aarch64_pass_by_reference(cumulative_args_t pcum ATTRIBUTE_UNUSED,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)975 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
976 			   enum machine_mode mode,
977 			   const_tree type,
978 			   bool named ATTRIBUTE_UNUSED)
979 {
980   HOST_WIDE_INT size;
981   enum machine_mode dummymode;
982   int nregs;
983 
984   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
985   size = (mode == BLKmode && type)
986     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
987 
988   if (type)
989     {
990       /* Arrays always passed by reference.  */
991       if (TREE_CODE (type) == ARRAY_TYPE)
992 	return true;
993       /* Other aggregates based on their size.  */
994       if (AGGREGATE_TYPE_P (type))
995 	size = int_size_in_bytes (type);
996     }
997 
998   /* Variable sized arguments are always returned by reference.  */
999   if (size < 0)
1000     return true;
1001 
1002   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1003   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1004 					       &dummymode, &nregs,
1005 					       NULL))
1006     return false;
1007 
1008   /* Arguments which are variable sized or larger than 2 registers are
1009      passed by reference unless they are a homogenous floating point
1010      aggregate.  */
1011   return size > 2 * UNITS_PER_WORD;
1012 }
1013 
1014 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1015 static bool
aarch64_return_in_msb(const_tree valtype)1016 aarch64_return_in_msb (const_tree valtype)
1017 {
1018   enum machine_mode dummy_mode;
1019   int dummy_int;
1020 
1021   /* Never happens in little-endian mode.  */
1022   if (!BYTES_BIG_ENDIAN)
1023     return false;
1024 
1025   /* Only composite types smaller than or equal to 16 bytes can
1026      be potentially returned in registers.  */
1027   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1028       || int_size_in_bytes (valtype) <= 0
1029       || int_size_in_bytes (valtype) > 16)
1030     return false;
1031 
1032   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1033      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1034      is always passed/returned in the least significant bits of fp/simd
1035      register(s).  */
1036   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1037 					       &dummy_mode, &dummy_int, NULL))
1038     return false;
1039 
1040   return true;
1041 }
1042 
1043 /* Implement TARGET_FUNCTION_VALUE.
1044    Define how to find the value returned by a function.  */
1045 
1046 static rtx
aarch64_function_value(const_tree type,const_tree func,bool outgoing ATTRIBUTE_UNUSED)1047 aarch64_function_value (const_tree type, const_tree func,
1048 			bool outgoing ATTRIBUTE_UNUSED)
1049 {
1050   enum machine_mode mode;
1051   int unsignedp;
1052   int count;
1053   enum machine_mode ag_mode;
1054 
1055   mode = TYPE_MODE (type);
1056   if (INTEGRAL_TYPE_P (type))
1057     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1058 
1059   if (aarch64_return_in_msb (type))
1060     {
1061       HOST_WIDE_INT size = int_size_in_bytes (type);
1062 
1063       if (size % UNITS_PER_WORD != 0)
1064 	{
1065 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1066 	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1067 	}
1068     }
1069 
1070   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1071 					       &ag_mode, &count, NULL))
1072     {
1073       if (!aarch64_composite_type_p (type, mode))
1074 	{
1075 	  gcc_assert (count == 1 && mode == ag_mode);
1076 	  return gen_rtx_REG (mode, V0_REGNUM);
1077 	}
1078       else
1079 	{
1080 	  int i;
1081 	  rtx par;
1082 
1083 	  par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1084 	  for (i = 0; i < count; i++)
1085 	    {
1086 	      rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1087 	      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1088 				       GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1089 	      XVECEXP (par, 0, i) = tmp;
1090 	    }
1091 	  return par;
1092 	}
1093     }
1094   else
1095     return gen_rtx_REG (mode, R0_REGNUM);
1096 }
1097 
1098 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1099    Return true if REGNO is the number of a hard register in which the values
1100    of called function may come back.  */
1101 
1102 static bool
aarch64_function_value_regno_p(const unsigned int regno)1103 aarch64_function_value_regno_p (const unsigned int regno)
1104 {
1105   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1106      of 16-byte return values are: 128-bit integers and 16-byte small
1107      structures (excluding homogeneous floating-point aggregates).  */
1108   if (regno == R0_REGNUM || regno == R1_REGNUM)
1109     return true;
1110 
1111   /* Up to four fp/simd registers can return a function value, e.g. a
1112      homogeneous floating-point aggregate having four members.  */
1113   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1114     return !TARGET_GENERAL_REGS_ONLY;
1115 
1116   return false;
1117 }
1118 
1119 /* Implement TARGET_RETURN_IN_MEMORY.
1120 
1121    If the type T of the result of a function is such that
1122      void func (T arg)
1123    would require that arg be passed as a value in a register (or set of
1124    registers) according to the parameter passing rules, then the result
1125    is returned in the same registers as would be used for such an
1126    argument.  */
1127 
1128 static bool
aarch64_return_in_memory(const_tree type,const_tree fndecl ATTRIBUTE_UNUSED)1129 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1130 {
1131   HOST_WIDE_INT size;
1132   enum machine_mode ag_mode;
1133   int count;
1134 
1135   if (!AGGREGATE_TYPE_P (type)
1136       && TREE_CODE (type) != COMPLEX_TYPE
1137       && TREE_CODE (type) != VECTOR_TYPE)
1138     /* Simple scalar types always returned in registers.  */
1139     return false;
1140 
1141   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1142 					       type,
1143 					       &ag_mode,
1144 					       &count,
1145 					       NULL))
1146     return false;
1147 
1148   /* Types larger than 2 registers returned in memory.  */
1149   size = int_size_in_bytes (type);
1150   return (size < 0 || size > 2 * UNITS_PER_WORD);
1151 }
1152 
1153 static bool
aarch64_vfp_is_call_candidate(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,int * nregs)1154 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1155 			       const_tree type, int *nregs)
1156 {
1157   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1158   return aarch64_vfp_is_call_or_return_candidate (mode,
1159 						  type,
1160 						  &pcum->aapcs_vfp_rmode,
1161 						  nregs,
1162 						  NULL);
1163 }
1164 
1165 /* Given MODE and TYPE of a function argument, return the alignment in
1166    bits.  The idea is to suppress any stronger alignment requested by
1167    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1168    This is a helper function for local use only.  */
1169 
1170 static unsigned int
aarch64_function_arg_alignment(enum machine_mode mode,const_tree type)1171 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1172 {
1173   unsigned int alignment;
1174 
1175   if (type)
1176     {
1177       if (!integer_zerop (TYPE_SIZE (type)))
1178 	{
1179 	  if (TYPE_MODE (type) == mode)
1180 	    alignment = TYPE_ALIGN (type);
1181 	  else
1182 	    alignment = GET_MODE_ALIGNMENT (mode);
1183 	}
1184       else
1185 	alignment = 0;
1186     }
1187   else
1188     alignment = GET_MODE_ALIGNMENT (mode);
1189 
1190   return alignment;
1191 }
1192 
1193 /* Layout a function argument according to the AAPCS64 rules.  The rule
1194    numbers refer to the rule numbers in the AAPCS64.  */
1195 
1196 static void
aarch64_layout_arg(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)1197 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1198 		    const_tree type,
1199 		    bool named ATTRIBUTE_UNUSED)
1200 {
1201   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1202   int ncrn, nvrn, nregs;
1203   bool allocate_ncrn, allocate_nvrn;
1204   HOST_WIDE_INT size;
1205 
1206   /* We need to do this once per argument.  */
1207   if (pcum->aapcs_arg_processed)
1208     return;
1209 
1210   pcum->aapcs_arg_processed = true;
1211 
1212   /* Size in bytes, rounded to the nearest multiple of 8 bytes.  */
1213   size
1214     = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1215 			UNITS_PER_WORD);
1216 
1217   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1218   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1219 						 mode,
1220 						 type,
1221 						 &nregs);
1222 
1223   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1224      The following code thus handles passing by SIMD/FP registers first.  */
1225 
1226   nvrn = pcum->aapcs_nvrn;
1227 
1228   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1229      and homogenous short-vector aggregates (HVA).  */
1230   if (allocate_nvrn)
1231     {
1232       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1233 	{
1234 	  pcum->aapcs_nextnvrn = nvrn + nregs;
1235 	  if (!aarch64_composite_type_p (type, mode))
1236 	    {
1237 	      gcc_assert (nregs == 1);
1238 	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1239 	    }
1240 	  else
1241 	    {
1242 	      rtx par;
1243 	      int i;
1244 	      par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1245 	      for (i = 0; i < nregs; i++)
1246 		{
1247 		  rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1248 					 V0_REGNUM + nvrn + i);
1249 		  tmp = gen_rtx_EXPR_LIST
1250 		    (VOIDmode, tmp,
1251 		     GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1252 		  XVECEXP (par, 0, i) = tmp;
1253 		}
1254 	      pcum->aapcs_reg = par;
1255 	    }
1256 	  return;
1257 	}
1258       else
1259 	{
1260 	  /* C.3 NSRN is set to 8.  */
1261 	  pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1262 	  goto on_stack;
1263 	}
1264     }
1265 
1266   ncrn = pcum->aapcs_ncrn;
1267   nregs = size / UNITS_PER_WORD;
1268 
1269   /* C6 - C9.  though the sign and zero extension semantics are
1270      handled elsewhere.  This is the case where the argument fits
1271      entirely general registers.  */
1272   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1273     {
1274       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1275 
1276       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1277 
1278       /* C.8 if the argument has an alignment of 16 then the NGRN is
1279          rounded up to the next even number.  */
1280       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1281 	{
1282 	  ++ncrn;
1283 	  gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1284 	}
1285       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1286          A reg is still generated for it, but the caller should be smart
1287 	 enough not to use it.  */
1288       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1289 	{
1290 	  pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1291 	}
1292       else
1293 	{
1294 	  rtx par;
1295 	  int i;
1296 
1297 	  par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1298 	  for (i = 0; i < nregs; i++)
1299 	    {
1300 	      rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1301 	      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1302 				       GEN_INT (i * UNITS_PER_WORD));
1303 	      XVECEXP (par, 0, i) = tmp;
1304 	    }
1305 	  pcum->aapcs_reg = par;
1306 	}
1307 
1308       pcum->aapcs_nextncrn = ncrn + nregs;
1309       return;
1310     }
1311 
1312   /* C.11  */
1313   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1314 
1315   /* The argument is passed on stack; record the needed number of words for
1316      this argument and align the total size if necessary.  */
1317 on_stack:
1318   pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1319   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1320     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1321 					       16 / UNITS_PER_WORD);
1322   return;
1323 }
1324 
1325 /* Implement TARGET_FUNCTION_ARG.  */
1326 
1327 static rtx
aarch64_function_arg(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,bool named)1328 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1329 		      const_tree type, bool named)
1330 {
1331   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1332   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1333 
1334   if (mode == VOIDmode)
1335     return NULL_RTX;
1336 
1337   aarch64_layout_arg (pcum_v, mode, type, named);
1338   return pcum->aapcs_reg;
1339 }
1340 
1341 void
aarch64_init_cumulative_args(CUMULATIVE_ARGS * pcum,const_tree fntype ATTRIBUTE_UNUSED,rtx libname ATTRIBUTE_UNUSED,const_tree fndecl ATTRIBUTE_UNUSED,unsigned n_named ATTRIBUTE_UNUSED)1342 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1343 			   const_tree fntype ATTRIBUTE_UNUSED,
1344 			   rtx libname ATTRIBUTE_UNUSED,
1345 			   const_tree fndecl ATTRIBUTE_UNUSED,
1346 			   unsigned n_named ATTRIBUTE_UNUSED)
1347 {
1348   pcum->aapcs_ncrn = 0;
1349   pcum->aapcs_nvrn = 0;
1350   pcum->aapcs_nextncrn = 0;
1351   pcum->aapcs_nextnvrn = 0;
1352   pcum->pcs_variant = ARM_PCS_AAPCS64;
1353   pcum->aapcs_reg = NULL_RTX;
1354   pcum->aapcs_arg_processed = false;
1355   pcum->aapcs_stack_words = 0;
1356   pcum->aapcs_stack_size = 0;
1357 
1358   return;
1359 }
1360 
1361 static void
aarch64_function_arg_advance(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,bool named)1362 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1363 			      enum machine_mode mode,
1364 			      const_tree type,
1365 			      bool named)
1366 {
1367   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1368   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1369     {
1370       aarch64_layout_arg (pcum_v, mode, type, named);
1371       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1372 		  != (pcum->aapcs_stack_words != 0));
1373       pcum->aapcs_arg_processed = false;
1374       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1375       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1376       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1377       pcum->aapcs_stack_words = 0;
1378       pcum->aapcs_reg = NULL_RTX;
1379     }
1380 }
1381 
1382 bool
aarch64_function_arg_regno_p(unsigned regno)1383 aarch64_function_arg_regno_p (unsigned regno)
1384 {
1385   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1386 	  || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1387 }
1388 
1389 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1390    PARM_BOUNDARY bits of alignment, but will be given anything up
1391    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1392    that both before and after the layout of each argument, the Next
1393    Stacked Argument Address (NSAA) will have a minimum alignment of
1394    8 bytes.  */
1395 
1396 static unsigned int
aarch64_function_arg_boundary(enum machine_mode mode,const_tree type)1397 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1398 {
1399   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1400 
1401   if (alignment < PARM_BOUNDARY)
1402     alignment = PARM_BOUNDARY;
1403   if (alignment > STACK_BOUNDARY)
1404     alignment = STACK_BOUNDARY;
1405   return alignment;
1406 }
1407 
1408 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1409 
1410    Return true if an argument passed on the stack should be padded upwards,
1411    i.e. if the least-significant byte of the stack slot has useful data.
1412 
1413    Small aggregate types are placed in the lowest memory address.
1414 
1415    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1416 
1417 bool
aarch64_pad_arg_upward(enum machine_mode mode,const_tree type)1418 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1419 {
1420   /* On little-endian targets, the least significant byte of every stack
1421      argument is passed at the lowest byte address of the stack slot.  */
1422   if (!BYTES_BIG_ENDIAN)
1423     return true;
1424 
1425   /* Otherwise, integral types and floating point types are padded downward:
1426      the least significant byte of a stack argument is passed at the highest
1427      byte address of the stack slot.  */
1428   if (type
1429       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1430       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1431     return false;
1432 
1433   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1434   return true;
1435 }
1436 
1437 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1438 
1439    It specifies padding for the last (may also be the only)
1440    element of a block move between registers and memory.  If
1441    assuming the block is in the memory, padding upward means that
1442    the last element is padded after its highest significant byte,
1443    while in downward padding, the last element is padded at the
1444    its least significant byte side.
1445 
1446    Small aggregates and small complex types are always padded
1447    upwards.
1448 
1449    We don't need to worry about homogeneous floating-point or
1450    short-vector aggregates; their move is not affected by the
1451    padding direction determined here.  Regardless of endianness,
1452    each element of such an aggregate is put in the least
1453    significant bits of a fp/simd register.
1454 
1455    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1456    register has useful data, and return the opposite if the most
1457    significant byte does.  */
1458 
1459 bool
aarch64_pad_reg_upward(enum machine_mode mode,const_tree type,bool first ATTRIBUTE_UNUSED)1460 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1461 		     bool first ATTRIBUTE_UNUSED)
1462 {
1463 
1464   /* Small composite types are always padded upward.  */
1465   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1466     {
1467       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1468 			    : GET_MODE_SIZE (mode));
1469       if (size < 2 * UNITS_PER_WORD)
1470 	return true;
1471     }
1472 
1473   /* Otherwise, use the default padding.  */
1474   return !BYTES_BIG_ENDIAN;
1475 }
1476 
1477 static enum machine_mode
aarch64_libgcc_cmp_return_mode(void)1478 aarch64_libgcc_cmp_return_mode (void)
1479 {
1480   return SImode;
1481 }
1482 
1483 static bool
aarch64_frame_pointer_required(void)1484 aarch64_frame_pointer_required (void)
1485 {
1486   /* If the function contains dynamic stack allocations, we need to
1487      use the frame pointer to access the static parts of the frame.  */
1488   if (cfun->calls_alloca)
1489     return true;
1490 
1491   /* We may have turned flag_omit_frame_pointer on in order to have this
1492      function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1493      and we'll check it here.
1494      If we really did set flag_omit_frame_pointer normally, then we return false
1495      (no frame pointer required) in all cases.  */
1496 
1497   if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1498     return false;
1499   else if (flag_omit_leaf_frame_pointer)
1500     return !crtl->is_leaf;
1501   return true;
1502 }
1503 
1504 /* Mark the registers that need to be saved by the callee and calculate
1505    the size of the callee-saved registers area and frame record (both FP
1506    and LR may be omitted).  */
1507 static void
aarch64_layout_frame(void)1508 aarch64_layout_frame (void)
1509 {
1510   HOST_WIDE_INT offset = 0;
1511   int regno;
1512 
1513   if (reload_completed && cfun->machine->frame.laid_out)
1514     return;
1515 
1516   cfun->machine->frame.fp_lr_offset = 0;
1517 
1518   /* First mark all the registers that really need to be saved...  */
1519   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1520     cfun->machine->frame.reg_offset[regno] = -1;
1521 
1522   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1523     cfun->machine->frame.reg_offset[regno] = -1;
1524 
1525   /* ... that includes the eh data registers (if needed)...  */
1526   if (crtl->calls_eh_return)
1527     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1528       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1529 
1530   /* ... and any callee saved register that dataflow says is live.  */
1531   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1532     if (df_regs_ever_live_p (regno)
1533 	&& !call_used_regs[regno])
1534       cfun->machine->frame.reg_offset[regno] = 0;
1535 
1536   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1537     if (df_regs_ever_live_p (regno)
1538 	&& !call_used_regs[regno])
1539       cfun->machine->frame.reg_offset[regno] = 0;
1540 
1541   if (frame_pointer_needed)
1542     {
1543       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1544       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1545       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1546     }
1547 
1548   /* Now assign stack slots for them.  */
1549   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1550     if (cfun->machine->frame.reg_offset[regno] != -1)
1551       {
1552 	cfun->machine->frame.reg_offset[regno] = offset;
1553 	offset += UNITS_PER_WORD;
1554       }
1555 
1556   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1557     if (cfun->machine->frame.reg_offset[regno] != -1)
1558       {
1559 	cfun->machine->frame.reg_offset[regno] = offset;
1560 	offset += UNITS_PER_WORD;
1561       }
1562 
1563   if (frame_pointer_needed)
1564     {
1565       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1566       offset += UNITS_PER_WORD;
1567       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1568     }
1569 
1570   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1571     {
1572       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1573       offset += UNITS_PER_WORD;
1574       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1575     }
1576 
1577   cfun->machine->frame.padding0 =
1578     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1579   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1580 
1581   cfun->machine->frame.saved_regs_size = offset;
1582   cfun->machine->frame.laid_out = true;
1583 }
1584 
1585 /* Make the last instruction frame-related and note that it performs
1586    the operation described by FRAME_PATTERN.  */
1587 
1588 static void
aarch64_set_frame_expr(rtx frame_pattern)1589 aarch64_set_frame_expr (rtx frame_pattern)
1590 {
1591   rtx insn;
1592 
1593   insn = get_last_insn ();
1594   RTX_FRAME_RELATED_P (insn) = 1;
1595   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1596   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1597 				      frame_pattern,
1598 				      REG_NOTES (insn));
1599 }
1600 
1601 static bool
aarch64_register_saved_on_entry(int regno)1602 aarch64_register_saved_on_entry (int regno)
1603 {
1604   return cfun->machine->frame.reg_offset[regno] != -1;
1605 }
1606 
1607 
1608 static void
aarch64_save_or_restore_fprs(int start_offset,int increment,bool restore,rtx base_rtx)1609 aarch64_save_or_restore_fprs (int start_offset, int increment,
1610 			      bool restore, rtx base_rtx)
1611 
1612 {
1613   unsigned regno;
1614   unsigned regno2;
1615   rtx insn;
1616   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1617 
1618 
1619   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1620     {
1621       if (aarch64_register_saved_on_entry (regno))
1622 	{
1623 	  rtx mem;
1624 	  mem = gen_mem_ref (DFmode,
1625 			     plus_constant (Pmode,
1626 					    base_rtx,
1627 					    start_offset));
1628 
1629 	  for (regno2 = regno + 1;
1630 	       regno2 <= V31_REGNUM
1631 		 && !aarch64_register_saved_on_entry (regno2);
1632 	       regno2++)
1633 	    {
1634 	      /* Empty loop.  */
1635 	    }
1636 	  if (regno2 <= V31_REGNUM &&
1637 	      aarch64_register_saved_on_entry (regno2))
1638 	    {
1639 	      rtx mem2;
1640 	      /* Next highest register to be saved.  */
1641 	      mem2 = gen_mem_ref (DFmode,
1642 				  plus_constant
1643 				  (Pmode,
1644 				   base_rtx,
1645 				   start_offset + increment));
1646 	      if (restore == false)
1647 		{
1648 		  insn = emit_insn
1649 		    ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1650 					mem2, gen_rtx_REG (DFmode, regno2)));
1651 
1652 		}
1653 	      else
1654 		{
1655 		  insn = emit_insn
1656 		    ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1657 				       gen_rtx_REG (DFmode, regno2), mem2));
1658 
1659 		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1660 		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1661 		}
1662 
1663 		  /* The first part of a frame-related parallel insn
1664 		     is always assumed to be relevant to the frame
1665 		     calculations; subsequent parts, are only
1666 		     frame-related if explicitly marked.  */
1667 	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1668 					    1)) = 1;
1669 	      regno = regno2;
1670 	      start_offset += increment * 2;
1671 	    }
1672 	  else
1673 	    {
1674 	      if (restore == false)
1675 		insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1676 	      else
1677 		{
1678 		  insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1679 		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1680 		}
1681 	      start_offset += increment;
1682 	    }
1683 	  RTX_FRAME_RELATED_P (insn) = 1;
1684 	}
1685     }
1686 
1687 }
1688 
1689 
1690 /* offset from the stack pointer of where the saves and
1691    restore's have to happen.  */
1692 static void
aarch64_save_or_restore_callee_save_registers(HOST_WIDE_INT offset,bool restore)1693 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1694 					    bool restore)
1695 {
1696   rtx insn;
1697   rtx base_rtx = stack_pointer_rtx;
1698   HOST_WIDE_INT start_offset = offset;
1699   HOST_WIDE_INT increment = UNITS_PER_WORD;
1700   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1701   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1702   unsigned regno;
1703   unsigned regno2;
1704 
1705   for (regno = R0_REGNUM; regno <= limit; regno++)
1706     {
1707       if (aarch64_register_saved_on_entry (regno))
1708 	{
1709 	  rtx mem;
1710 	  mem = gen_mem_ref (Pmode,
1711 			     plus_constant (Pmode,
1712 					    base_rtx,
1713 					    start_offset));
1714 
1715 	  for (regno2 = regno + 1;
1716 	       regno2 <= limit
1717 		 && !aarch64_register_saved_on_entry (regno2);
1718 	       regno2++)
1719 	    {
1720 	      /* Empty loop.  */
1721 	    }
1722 	  if (regno2 <= limit &&
1723 	      aarch64_register_saved_on_entry (regno2))
1724 	    {
1725 	      rtx mem2;
1726 	      /* Next highest register to be saved.  */
1727 	      mem2 = gen_mem_ref (Pmode,
1728 				  plus_constant
1729 				  (Pmode,
1730 				   base_rtx,
1731 				   start_offset + increment));
1732 	      if (restore == false)
1733 		{
1734 		  insn = emit_insn
1735 		    ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1736 					mem2, gen_rtx_REG (DImode, regno2)));
1737 
1738 		}
1739 	      else
1740 		{
1741 		  insn = emit_insn
1742 		    ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1743 				     gen_rtx_REG (DImode, regno2), mem2));
1744 
1745 		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1746 		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1747 		}
1748 
1749 		  /* The first part of a frame-related parallel insn
1750 		     is always assumed to be relevant to the frame
1751 		     calculations; subsequent parts, are only
1752 		     frame-related if explicitly marked.  */
1753 	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1754 					    1)) = 1;
1755 	      regno = regno2;
1756 	      start_offset += increment * 2;
1757 	    }
1758 	  else
1759 	    {
1760 	      if (restore == false)
1761 		insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1762 	      else
1763 		{
1764 		  insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1765 		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1766 		}
1767 	      start_offset += increment;
1768 	    }
1769 	  RTX_FRAME_RELATED_P (insn) = 1;
1770 	}
1771     }
1772 
1773   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1774 
1775 }
1776 
1777 /* AArch64 stack frames generated by this compiler look like:
1778 
1779 	+-------------------------------+
1780 	|                               |
1781 	|  incoming stack arguments     |
1782 	|                               |
1783 	+-------------------------------+ <-- arg_pointer_rtx
1784 	|                               |
1785 	|  callee-allocated save area   |
1786 	|  for register varargs         |
1787 	|                               |
1788 	+-------------------------------+
1789 	|                               |
1790 	|  local variables              |
1791 	|                               |
1792 	+-------------------------------+ <-- frame_pointer_rtx
1793 	|                               |
1794 	|  callee-saved registers       |
1795 	|                               |
1796 	+-------------------------------+
1797 	|  LR'                          |
1798 	+-------------------------------+
1799 	|  FP'                          |
1800       P +-------------------------------+ <-- hard_frame_pointer_rtx
1801 	|  dynamic allocation           |
1802 	+-------------------------------+
1803 	|                               |
1804 	|  outgoing stack arguments     |
1805 	|                               |
1806 	+-------------------------------+ <-- stack_pointer_rtx
1807 
1808    Dynamic stack allocations such as alloca insert data at point P.
1809    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1810    hard_frame_pointer_rtx unchanged.  */
1811 
1812 /* Generate the prologue instructions for entry into a function.
1813    Establish the stack frame by decreasing the stack pointer with a
1814    properly calculated size and, if necessary, create a frame record
1815    filled with the values of LR and previous frame pointer.  The
1816    current FP is also set up is it is in use.  */
1817 
1818 void
aarch64_expand_prologue(void)1819 aarch64_expand_prologue (void)
1820 {
1821   /* sub sp, sp, #<frame_size>
1822      stp {fp, lr}, [sp, #<frame_size> - 16]
1823      add fp, sp, #<frame_size> - hardfp_offset
1824      stp {cs_reg}, [fp, #-16] etc.
1825 
1826      sub sp, sp, <final_adjustment_if_any>
1827   */
1828   HOST_WIDE_INT original_frame_size;	/* local variables + vararg save */
1829   HOST_WIDE_INT frame_size, offset;
1830   HOST_WIDE_INT fp_offset;		/* FP offset from SP */
1831   rtx insn;
1832 
1833   aarch64_layout_frame ();
1834   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1835   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1836 	      && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1837   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1838 		+ crtl->outgoing_args_size);
1839   offset = frame_size = AARCH64_ROUND_UP (frame_size,
1840 					  STACK_BOUNDARY / BITS_PER_UNIT);
1841 
1842   if (flag_stack_usage_info)
1843     current_function_static_stack_size = frame_size;
1844 
1845   fp_offset = (offset
1846 	       - original_frame_size
1847 	       - cfun->machine->frame.saved_regs_size);
1848 
1849   /* Store pairs and load pairs have a range only -512 to 504.  */
1850   if (offset >= 512)
1851     {
1852       /* When the frame has a large size, an initial decrease is done on
1853 	 the stack pointer to jump over the callee-allocated save area for
1854 	 register varargs, the local variable area and/or the callee-saved
1855 	 register area.  This will allow the pre-index write-back
1856 	 store pair instructions to be used for setting up the stack frame
1857 	 efficiently.  */
1858       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1859       if (offset >= 512)
1860 	offset = cfun->machine->frame.saved_regs_size;
1861 
1862       frame_size -= (offset + crtl->outgoing_args_size);
1863       fp_offset = 0;
1864 
1865       if (frame_size >= 0x1000000)
1866 	{
1867 	  rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1868 	  emit_move_insn (op0, GEN_INT (-frame_size));
1869 	  emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
1870 	  aarch64_set_frame_expr (gen_rtx_SET
1871 				  (Pmode, stack_pointer_rtx,
1872 				   gen_rtx_PLUS (Pmode,
1873 						 stack_pointer_rtx,
1874 						 GEN_INT (-frame_size))));
1875 	}
1876       else if (frame_size > 0)
1877 	{
1878 	  if ((frame_size & 0xfff) != frame_size)
1879 	    {
1880 	      insn = emit_insn (gen_add2_insn
1881 				(stack_pointer_rtx,
1882 				 GEN_INT (-(frame_size
1883 					    & ~(HOST_WIDE_INT)0xfff))));
1884 	      RTX_FRAME_RELATED_P (insn) = 1;
1885 	    }
1886 	  if ((frame_size & 0xfff) != 0)
1887 	    {
1888 	      insn = emit_insn (gen_add2_insn
1889 				(stack_pointer_rtx,
1890 				 GEN_INT (-(frame_size
1891 					    & (HOST_WIDE_INT)0xfff))));
1892 	      RTX_FRAME_RELATED_P (insn) = 1;
1893 	    }
1894 	}
1895     }
1896   else
1897     frame_size = -1;
1898 
1899   if (offset > 0)
1900     {
1901       /* Save the frame pointer and lr if the frame pointer is needed
1902 	 first.  Make the frame pointer point to the location of the
1903 	 old frame pointer on the stack.  */
1904       if (frame_pointer_needed)
1905 	{
1906 	  rtx mem_fp, mem_lr;
1907 
1908 	  if (fp_offset)
1909 	    {
1910 	      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1911 					       GEN_INT (-offset)));
1912 	      RTX_FRAME_RELATED_P (insn) = 1;
1913 	      aarch64_set_frame_expr (gen_rtx_SET
1914 				      (Pmode, stack_pointer_rtx,
1915 				       gen_rtx_MINUS (Pmode,
1916 						      stack_pointer_rtx,
1917 						      GEN_INT (offset))));
1918 	      mem_fp = gen_frame_mem (DImode,
1919 				      plus_constant (Pmode,
1920 						     stack_pointer_rtx,
1921 						     fp_offset));
1922 	      mem_lr = gen_frame_mem (DImode,
1923 				      plus_constant (Pmode,
1924 						     stack_pointer_rtx,
1925 						     fp_offset
1926 						     + UNITS_PER_WORD));
1927 	      insn = emit_insn (gen_store_pairdi (mem_fp,
1928 						  hard_frame_pointer_rtx,
1929 						  mem_lr,
1930 						  gen_rtx_REG (DImode,
1931 							       LR_REGNUM)));
1932 	    }
1933 	  else
1934 	    {
1935 	      insn = emit_insn (gen_storewb_pairdi_di
1936 				(stack_pointer_rtx, stack_pointer_rtx,
1937 				 hard_frame_pointer_rtx,
1938 				 gen_rtx_REG (DImode, LR_REGNUM),
1939 				 GEN_INT (-offset),
1940 				 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
1941 	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1942 	    }
1943 
1944 	  /* The first part of a frame-related parallel insn is always
1945 	     assumed to be relevant to the frame calculations;
1946 	     subsequent parts, are only frame-related if explicitly
1947 	     marked.  */
1948 	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1949 	  RTX_FRAME_RELATED_P (insn) = 1;
1950 
1951 	  /* Set up frame pointer to point to the location of the
1952 	     previous frame pointer on the stack.  */
1953 	  insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
1954 					   stack_pointer_rtx,
1955 					   GEN_INT (fp_offset)));
1956 	  aarch64_set_frame_expr (gen_rtx_SET
1957 				  (Pmode, hard_frame_pointer_rtx,
1958 				   gen_rtx_PLUS (Pmode,
1959 						 stack_pointer_rtx,
1960 						 GEN_INT (fp_offset))));
1961 	  RTX_FRAME_RELATED_P (insn) = 1;
1962 	  insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
1963 					   hard_frame_pointer_rtx));
1964 	}
1965       else
1966 	{
1967 	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1968 					   GEN_INT (-offset)));
1969 	  RTX_FRAME_RELATED_P (insn) = 1;
1970 	}
1971 
1972       aarch64_save_or_restore_callee_save_registers
1973 	(fp_offset + cfun->machine->frame.hardfp_offset, 0);
1974     }
1975 
1976   /* when offset >= 512,
1977      sub sp, sp, #<outgoing_args_size> */
1978   if (frame_size > -1)
1979     {
1980       if (crtl->outgoing_args_size > 0)
1981 	{
1982 	  insn = emit_insn (gen_add2_insn
1983 			    (stack_pointer_rtx,
1984 			     GEN_INT (- crtl->outgoing_args_size)));
1985 	  RTX_FRAME_RELATED_P (insn) = 1;
1986 	}
1987     }
1988 }
1989 
1990 /* Generate the epilogue instructions for returning from a function.  */
1991 void
aarch64_expand_epilogue(bool for_sibcall)1992 aarch64_expand_epilogue (bool for_sibcall)
1993 {
1994   HOST_WIDE_INT original_frame_size, frame_size, offset;
1995   HOST_WIDE_INT fp_offset;
1996   rtx insn;
1997   rtx cfa_reg;
1998 
1999   aarch64_layout_frame ();
2000   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2001   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2002 		+ crtl->outgoing_args_size);
2003   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2004 					  STACK_BOUNDARY / BITS_PER_UNIT);
2005 
2006   fp_offset = (offset
2007 	       - original_frame_size
2008 	       - cfun->machine->frame.saved_regs_size);
2009 
2010   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2011 
2012   /* Store pairs and load pairs have a range only -512 to 504.  */
2013   if (offset >= 512)
2014     {
2015       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2016       if (offset >= 512)
2017 	offset = cfun->machine->frame.saved_regs_size;
2018 
2019       frame_size -= (offset + crtl->outgoing_args_size);
2020       fp_offset = 0;
2021       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2022 	{
2023 	  insn = emit_insn (gen_add2_insn
2024 			    (stack_pointer_rtx,
2025 			     GEN_INT (crtl->outgoing_args_size)));
2026 	  RTX_FRAME_RELATED_P (insn) = 1;
2027 	}
2028     }
2029   else
2030     frame_size = -1;
2031 
2032   /* If there were outgoing arguments or we've done dynamic stack
2033      allocation, then restore the stack pointer from the frame
2034      pointer.  This is at most one insn and more efficient than using
2035      GCC's internal mechanism.  */
2036   if (frame_pointer_needed
2037       && (crtl->outgoing_args_size || cfun->calls_alloca))
2038     {
2039       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2040 				       hard_frame_pointer_rtx,
2041 				       GEN_INT (- fp_offset)));
2042       RTX_FRAME_RELATED_P (insn) = 1;
2043       /* As SP is set to (FP - fp_offset), according to the rules in
2044 	 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2045 	 from the value of SP from now on.  */
2046       cfa_reg = stack_pointer_rtx;
2047     }
2048 
2049   aarch64_save_or_restore_callee_save_registers
2050     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2051 
2052   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2053   if (offset > 0)
2054     {
2055       if (frame_pointer_needed)
2056 	{
2057 	  rtx mem_fp, mem_lr;
2058 
2059 	  if (fp_offset)
2060 	    {
2061 	      mem_fp = gen_frame_mem (DImode,
2062 				      plus_constant (Pmode,
2063 						     stack_pointer_rtx,
2064 						     fp_offset));
2065 	      mem_lr = gen_frame_mem (DImode,
2066 				      plus_constant (Pmode,
2067 						     stack_pointer_rtx,
2068 						     fp_offset
2069 						     + UNITS_PER_WORD));
2070 	      insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2071 						 mem_fp,
2072 						 gen_rtx_REG (DImode,
2073 							      LR_REGNUM),
2074 						 mem_lr));
2075 	    }
2076 	  else
2077 	    {
2078 	      insn = emit_insn (gen_loadwb_pairdi_di
2079 				(stack_pointer_rtx,
2080 				 stack_pointer_rtx,
2081 				 hard_frame_pointer_rtx,
2082 				 gen_rtx_REG (DImode, LR_REGNUM),
2083 				 GEN_INT (offset),
2084 				 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2085 	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2086 	      add_reg_note (insn, REG_CFA_ADJUST_CFA,
2087 			    (gen_rtx_SET (Pmode, stack_pointer_rtx,
2088 					  plus_constant (Pmode, cfa_reg,
2089 							 offset))));
2090 	    }
2091 
2092 	  /* The first part of a frame-related parallel insn
2093 	     is always assumed to be relevant to the frame
2094 	     calculations; subsequent parts, are only
2095 	     frame-related if explicitly marked.  */
2096 	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2097 	  RTX_FRAME_RELATED_P (insn) = 1;
2098 	  add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2099 	  add_reg_note (insn, REG_CFA_RESTORE,
2100 			gen_rtx_REG (DImode, LR_REGNUM));
2101 
2102 	  if (fp_offset)
2103 	    {
2104 	      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2105 					       GEN_INT (offset)));
2106 	      RTX_FRAME_RELATED_P (insn) = 1;
2107 	    }
2108 	}
2109       else
2110 	{
2111 	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2112 					   GEN_INT (offset)));
2113 	  RTX_FRAME_RELATED_P (insn) = 1;
2114 	}
2115     }
2116 
2117   /* Stack adjustment for exception handler.  */
2118   if (crtl->calls_eh_return)
2119     {
2120       /* We need to unwind the stack by the offset computed by
2121 	 EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2122 	 based on SP.  Ideally we would update the SP and define the
2123 	 CFA along the lines of:
2124 
2125 	 SP = SP + EH_RETURN_STACKADJ_RTX
2126 	 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2127 
2128 	 However the dwarf emitter only understands a constant
2129 	 register offset.
2130 
2131 	 The solution choosen here is to use the otherwise unused IP0
2132 	 as a temporary register to hold the current SP value.  The
2133 	 CFA is described using IP0 then SP is modified.  */
2134 
2135       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2136 
2137       insn = emit_move_insn (ip0, stack_pointer_rtx);
2138       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2139       RTX_FRAME_RELATED_P (insn) = 1;
2140 
2141       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2142 
2143       /* Ensure the assignment to IP0 does not get optimized away.  */
2144       emit_use (ip0);
2145     }
2146 
2147   if (frame_size > -1)
2148     {
2149       if (frame_size >= 0x1000000)
2150 	{
2151 	  rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2152 	  emit_move_insn (op0, GEN_INT (frame_size));
2153 	  emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2154 	  aarch64_set_frame_expr (gen_rtx_SET
2155 				  (Pmode, stack_pointer_rtx,
2156 				   gen_rtx_PLUS (Pmode,
2157 						 stack_pointer_rtx,
2158 						 GEN_INT (frame_size))));
2159 	}
2160       else if (frame_size > 0)
2161 	{
2162 	  if ((frame_size & 0xfff) != 0)
2163 	    {
2164 	      insn = emit_insn (gen_add2_insn
2165 				(stack_pointer_rtx,
2166 				 GEN_INT ((frame_size
2167 					   & (HOST_WIDE_INT) 0xfff))));
2168 	      RTX_FRAME_RELATED_P (insn) = 1;
2169 	    }
2170 	  if ((frame_size & 0xfff) != frame_size)
2171 	    {
2172 	      insn = emit_insn (gen_add2_insn
2173 				(stack_pointer_rtx,
2174 				 GEN_INT ((frame_size
2175 					   & ~ (HOST_WIDE_INT) 0xfff))));
2176 	      RTX_FRAME_RELATED_P (insn) = 1;
2177 	    }
2178 	}
2179 
2180       aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2181 					   gen_rtx_PLUS (Pmode,
2182 							 stack_pointer_rtx,
2183 							 GEN_INT (offset))));
2184     }
2185 
2186   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2187   if (!for_sibcall)
2188     emit_jump_insn (ret_rtx);
2189 }
2190 
2191 /* Return the place to copy the exception unwinding return address to.
2192    This will probably be a stack slot, but could (in theory be the
2193    return register).  */
2194 rtx
aarch64_final_eh_return_addr(void)2195 aarch64_final_eh_return_addr (void)
2196 {
2197   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2198   aarch64_layout_frame ();
2199   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2200   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2201 		+ crtl->outgoing_args_size);
2202   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2203 					  STACK_BOUNDARY / BITS_PER_UNIT);
2204   fp_offset = offset
2205     - original_frame_size
2206     - cfun->machine->frame.saved_regs_size;
2207 
2208   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2209     return gen_rtx_REG (DImode, LR_REGNUM);
2210 
2211   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2212      result in a store to save LR introduced by builtin_eh_return () being
2213      incorrectly deleted because the alias is not detected.
2214      So in the calculation of the address to copy the exception unwinding
2215      return address to, we note 2 cases.
2216      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2217      we return a SP-relative location since all the addresses are SP-relative
2218      in this case.  This prevents the store from being optimized away.
2219      If the fp_offset is not 0, then the addresses will be FP-relative and
2220      therefore we return a FP-relative location.  */
2221 
2222   if (frame_pointer_needed)
2223     {
2224       if (fp_offset)
2225         return gen_frame_mem (DImode,
2226 			      plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2227       else
2228         return gen_frame_mem (DImode,
2229 			      plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2230     }
2231 
2232   /* If FP is not needed, we calculate the location of LR, which would be
2233      at the top of the saved registers block.  */
2234 
2235   return gen_frame_mem (DImode,
2236 			plus_constant (Pmode,
2237 				       stack_pointer_rtx,
2238 				       fp_offset
2239 				       + cfun->machine->frame.saved_regs_size
2240 				       - 2 * UNITS_PER_WORD));
2241 }
2242 
2243 /* Output code to build up a constant in a register.  */
2244 static void
aarch64_build_constant(int regnum,HOST_WIDE_INT val)2245 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2246 {
2247   if (aarch64_bitmask_imm (val, DImode))
2248     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2249   else
2250     {
2251       int i;
2252       int ncount = 0;
2253       int zcount = 0;
2254       HOST_WIDE_INT valp = val >> 16;
2255       HOST_WIDE_INT valm;
2256       HOST_WIDE_INT tval;
2257 
2258       for (i = 16; i < 64; i += 16)
2259 	{
2260 	  valm = (valp & 0xffff);
2261 
2262 	  if (valm != 0)
2263 	    ++ zcount;
2264 
2265 	  if (valm != 0xffff)
2266 	    ++ ncount;
2267 
2268 	  valp >>= 16;
2269 	}
2270 
2271       /* zcount contains the number of additional MOVK instructions
2272 	 required if the constant is built up with an initial MOVZ instruction,
2273 	 while ncount is the number of MOVK instructions required if starting
2274 	 with a MOVN instruction.  Choose the sequence that yields the fewest
2275 	 number of instructions, preferring MOVZ instructions when they are both
2276 	 the same.  */
2277       if (ncount < zcount)
2278 	{
2279 	  emit_move_insn (gen_rtx_REG (Pmode, regnum),
2280 			  GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2281 	  tval = 0xffff;
2282 	}
2283       else
2284 	{
2285 	  emit_move_insn (gen_rtx_REG (Pmode, regnum),
2286 			  GEN_INT (val & 0xffff));
2287 	  tval = 0;
2288 	}
2289 
2290       val >>= 16;
2291 
2292       for (i = 16; i < 64; i += 16)
2293 	{
2294 	  if ((val & 0xffff) != tval)
2295 	    emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2296 				       GEN_INT (i), GEN_INT (val & 0xffff)));
2297 	  val >>= 16;
2298 	}
2299     }
2300 }
2301 
2302 static void
aarch64_add_constant(int regnum,int scratchreg,HOST_WIDE_INT delta)2303 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2304 {
2305   HOST_WIDE_INT mdelta = delta;
2306   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2307   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2308 
2309   if (mdelta < 0)
2310     mdelta = -mdelta;
2311 
2312   if (mdelta >= 4096 * 4096)
2313     {
2314       aarch64_build_constant (scratchreg, delta);
2315       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2316     }
2317   else if (mdelta > 0)
2318     {
2319       if (mdelta >= 4096)
2320 	{
2321 	  emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2322 	  rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2323 	  if (delta < 0)
2324 	    emit_insn (gen_rtx_SET (Pmode, this_rtx,
2325 				    gen_rtx_MINUS (Pmode, this_rtx, shift)));
2326 	  else
2327 	    emit_insn (gen_rtx_SET (Pmode, this_rtx,
2328 				    gen_rtx_PLUS (Pmode, this_rtx, shift)));
2329 	}
2330       if (mdelta % 4096 != 0)
2331 	{
2332 	  scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2333 	  emit_insn (gen_rtx_SET (Pmode, this_rtx,
2334 				  gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2335 	}
2336     }
2337 }
2338 
2339 /* Output code to add DELTA to the first argument, and then jump
2340    to FUNCTION.  Used for C++ multiple inheritance.  */
2341 static void
aarch64_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)2342 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2343 			 HOST_WIDE_INT delta,
2344 			 HOST_WIDE_INT vcall_offset,
2345 			 tree function)
2346 {
2347   /* The this pointer is always in x0.  Note that this differs from
2348      Arm where the this pointer maybe bumped to r1 if r0 is required
2349      to return a pointer to an aggregate.  On AArch64 a result value
2350      pointer will be in x8.  */
2351   int this_regno = R0_REGNUM;
2352   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2353 
2354   reload_completed = 1;
2355   emit_note (NOTE_INSN_PROLOGUE_END);
2356 
2357   if (vcall_offset == 0)
2358     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2359   else
2360     {
2361       gcc_assert ((vcall_offset & 0x7) == 0);
2362 
2363       this_rtx = gen_rtx_REG (Pmode, this_regno);
2364       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2365       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2366 
2367       addr = this_rtx;
2368       if (delta != 0)
2369 	{
2370 	  if (delta >= -256 && delta < 256)
2371 	    addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2372 				       plus_constant (Pmode, this_rtx, delta));
2373 	  else
2374 	    aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2375 	}
2376 
2377       aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2378 
2379       if (vcall_offset >= -256 && vcall_offset < 32768)
2380 	  addr = plus_constant (Pmode, temp0, vcall_offset);
2381       else
2382 	{
2383 	  aarch64_build_constant (IP1_REGNUM, vcall_offset);
2384 	  addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2385 	}
2386 
2387       aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2388       emit_insn (gen_add2_insn (this_rtx, temp1));
2389     }
2390 
2391   /* Generate a tail call to the target function.  */
2392   if (!TREE_USED (function))
2393     {
2394       assemble_external (function);
2395       TREE_USED (function) = 1;
2396     }
2397   funexp = XEXP (DECL_RTL (function), 0);
2398   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2399   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2400   SIBLING_CALL_P (insn) = 1;
2401 
2402   insn = get_insns ();
2403   shorten_branches (insn);
2404   final_start_function (insn, file, 1);
2405   final (insn, file, 1);
2406   final_end_function ();
2407 
2408   /* Stop pretending to be a post-reload pass.  */
2409   reload_completed = 0;
2410 }
2411 
2412 static int
aarch64_tls_operand_p_1(rtx * x,void * data ATTRIBUTE_UNUSED)2413 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2414 {
2415   if (GET_CODE (*x) == SYMBOL_REF)
2416     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2417 
2418   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2419      TLS offsets, not real symbol references.  */
2420   if (GET_CODE (*x) == UNSPEC
2421       && XINT (*x, 1) == UNSPEC_TLS)
2422     return -1;
2423 
2424   return 0;
2425 }
2426 
2427 static bool
aarch64_tls_referenced_p(rtx x)2428 aarch64_tls_referenced_p (rtx x)
2429 {
2430   if (!TARGET_HAVE_TLS)
2431     return false;
2432 
2433   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2434 }
2435 
2436 
2437 static int
aarch64_bitmasks_cmp(const void * i1,const void * i2)2438 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2439 {
2440   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2441   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2442 
2443   if (*imm1 < *imm2)
2444     return -1;
2445   if (*imm1 > *imm2)
2446     return +1;
2447   return 0;
2448 }
2449 
2450 
2451 static void
aarch64_build_bitmask_table(void)2452 aarch64_build_bitmask_table (void)
2453 {
2454   unsigned HOST_WIDE_INT mask, imm;
2455   unsigned int log_e, e, s, r;
2456   unsigned int nimms = 0;
2457 
2458   for (log_e = 1; log_e <= 6; log_e++)
2459     {
2460       e = 1 << log_e;
2461       if (e == 64)
2462 	mask = ~(HOST_WIDE_INT) 0;
2463       else
2464 	mask = ((HOST_WIDE_INT) 1 << e) - 1;
2465       for (s = 1; s < e; s++)
2466 	{
2467 	  for (r = 0; r < e; r++)
2468 	    {
2469 	      /* set s consecutive bits to 1 (s < 64) */
2470 	      imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2471 	      /* rotate right by r */
2472 	      if (r != 0)
2473 		imm = ((imm >> r) | (imm << (e - r))) & mask;
2474 	      /* replicate the constant depending on SIMD size */
2475 	      switch (log_e) {
2476 	      case 1: imm |= (imm <<  2);
2477 	      case 2: imm |= (imm <<  4);
2478 	      case 3: imm |= (imm <<  8);
2479 	      case 4: imm |= (imm << 16);
2480 	      case 5: imm |= (imm << 32);
2481 	      case 6:
2482 		break;
2483 	      default:
2484 		gcc_unreachable ();
2485 	      }
2486 	      gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2487 	      aarch64_bitmasks[nimms++] = imm;
2488 	    }
2489 	}
2490     }
2491 
2492   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2493   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2494 	 aarch64_bitmasks_cmp);
2495 }
2496 
2497 
2498 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2499    a left shift of 0 or 12 bits.  */
2500 bool
aarch64_uimm12_shift(HOST_WIDE_INT val)2501 aarch64_uimm12_shift (HOST_WIDE_INT val)
2502 {
2503   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2504 	  || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2505 	  );
2506 }
2507 
2508 
2509 /* Return true if val is an immediate that can be loaded into a
2510    register by a MOVZ instruction.  */
2511 static bool
aarch64_movw_imm(HOST_WIDE_INT val,enum machine_mode mode)2512 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2513 {
2514   if (GET_MODE_SIZE (mode) > 4)
2515     {
2516       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2517 	  || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2518 	return 1;
2519     }
2520   else
2521     {
2522       /* Ignore sign extension.  */
2523       val &= (HOST_WIDE_INT) 0xffffffff;
2524     }
2525   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2526 	  || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2527 }
2528 
2529 
2530 /* Return true if val is a valid bitmask immediate.  */
2531 bool
aarch64_bitmask_imm(HOST_WIDE_INT val,enum machine_mode mode)2532 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2533 {
2534   if (GET_MODE_SIZE (mode) < 8)
2535     {
2536       /* Replicate bit pattern.  */
2537       val &= (HOST_WIDE_INT) 0xffffffff;
2538       val |= val << 32;
2539     }
2540   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2541 		  sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2542 }
2543 
2544 
2545 /* Return true if val is an immediate that can be loaded into a
2546    register in a single instruction.  */
2547 bool
aarch64_move_imm(HOST_WIDE_INT val,enum machine_mode mode)2548 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2549 {
2550   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2551     return 1;
2552   return aarch64_bitmask_imm (val, mode);
2553 }
2554 
2555 static bool
aarch64_cannot_force_const_mem(enum machine_mode mode ATTRIBUTE_UNUSED,rtx x)2556 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2557 {
2558   rtx base, offset;
2559   if (GET_CODE (x) == HIGH)
2560     return true;
2561 
2562   split_const (x, &base, &offset);
2563   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2564     return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2565 
2566   return aarch64_tls_referenced_p (x);
2567 }
2568 
2569 /* Return true if register REGNO is a valid index register.
2570    STRICT_P is true if REG_OK_STRICT is in effect.  */
2571 
2572 bool
aarch64_regno_ok_for_index_p(int regno,bool strict_p)2573 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2574 {
2575   if (!HARD_REGISTER_NUM_P (regno))
2576     {
2577       if (!strict_p)
2578 	return true;
2579 
2580       if (!reg_renumber)
2581 	return false;
2582 
2583       regno = reg_renumber[regno];
2584     }
2585   return GP_REGNUM_P (regno);
2586 }
2587 
2588 /* Return true if register REGNO is a valid base register for mode MODE.
2589    STRICT_P is true if REG_OK_STRICT is in effect.  */
2590 
2591 bool
aarch64_regno_ok_for_base_p(int regno,bool strict_p)2592 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2593 {
2594   if (!HARD_REGISTER_NUM_P (regno))
2595     {
2596       if (!strict_p)
2597 	return true;
2598 
2599       if (!reg_renumber)
2600 	return false;
2601 
2602       regno = reg_renumber[regno];
2603     }
2604 
2605   /* The fake registers will be eliminated to either the stack or
2606      hard frame pointer, both of which are usually valid base registers.
2607      Reload deals with the cases where the eliminated form isn't valid.  */
2608   return (GP_REGNUM_P (regno)
2609 	  || regno == SP_REGNUM
2610 	  || regno == FRAME_POINTER_REGNUM
2611 	  || regno == ARG_POINTER_REGNUM);
2612 }
2613 
2614 /* Return true if X is a valid base register for mode MODE.
2615    STRICT_P is true if REG_OK_STRICT is in effect.  */
2616 
2617 static bool
aarch64_base_register_rtx_p(rtx x,bool strict_p)2618 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2619 {
2620   if (!strict_p && GET_CODE (x) == SUBREG)
2621     x = SUBREG_REG (x);
2622 
2623   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2624 }
2625 
2626 /* Return true if address offset is a valid index.  If it is, fill in INFO
2627    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2628 
2629 static bool
aarch64_classify_index(struct aarch64_address_info * info,rtx x,enum machine_mode mode,bool strict_p)2630 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2631 			enum machine_mode mode, bool strict_p)
2632 {
2633   enum aarch64_address_type type;
2634   rtx index;
2635   int shift;
2636 
2637   /* (reg:P) */
2638   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2639       && GET_MODE (x) == Pmode)
2640     {
2641       type = ADDRESS_REG_REG;
2642       index = x;
2643       shift = 0;
2644     }
2645   /* (sign_extend:DI (reg:SI)) */
2646   else if ((GET_CODE (x) == SIGN_EXTEND
2647 	    || GET_CODE (x) == ZERO_EXTEND)
2648 	   && GET_MODE (x) == DImode
2649 	   && GET_MODE (XEXP (x, 0)) == SImode)
2650     {
2651       type = (GET_CODE (x) == SIGN_EXTEND)
2652 	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2653       index = XEXP (x, 0);
2654       shift = 0;
2655     }
2656   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2657   else if (GET_CODE (x) == MULT
2658 	   && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2659 	       || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2660 	   && GET_MODE (XEXP (x, 0)) == DImode
2661 	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2662 	   && CONST_INT_P (XEXP (x, 1)))
2663     {
2664       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2665 	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2666       index = XEXP (XEXP (x, 0), 0);
2667       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2668     }
2669   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2670   else if (GET_CODE (x) == ASHIFT
2671 	   && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2672 	       || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2673 	   && GET_MODE (XEXP (x, 0)) == DImode
2674 	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2675 	   && CONST_INT_P (XEXP (x, 1)))
2676     {
2677       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2678 	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2679       index = XEXP (XEXP (x, 0), 0);
2680       shift = INTVAL (XEXP (x, 1));
2681     }
2682   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2683   else if ((GET_CODE (x) == SIGN_EXTRACT
2684 	    || GET_CODE (x) == ZERO_EXTRACT)
2685 	   && GET_MODE (x) == DImode
2686 	   && GET_CODE (XEXP (x, 0)) == MULT
2687 	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2688 	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2689     {
2690       type = (GET_CODE (x) == SIGN_EXTRACT)
2691 	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2692       index = XEXP (XEXP (x, 0), 0);
2693       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2694       if (INTVAL (XEXP (x, 1)) != 32 + shift
2695 	  || INTVAL (XEXP (x, 2)) != 0)
2696 	shift = -1;
2697     }
2698   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2699      (const_int 0xffffffff<<shift)) */
2700   else if (GET_CODE (x) == AND
2701 	   && GET_MODE (x) == DImode
2702 	   && GET_CODE (XEXP (x, 0)) == MULT
2703 	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2704 	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2705 	   && CONST_INT_P (XEXP (x, 1)))
2706     {
2707       type = ADDRESS_REG_UXTW;
2708       index = XEXP (XEXP (x, 0), 0);
2709       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2710       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2711 	shift = -1;
2712     }
2713   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2714   else if ((GET_CODE (x) == SIGN_EXTRACT
2715 	    || GET_CODE (x) == ZERO_EXTRACT)
2716 	   && GET_MODE (x) == DImode
2717 	   && GET_CODE (XEXP (x, 0)) == ASHIFT
2718 	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2719 	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2720     {
2721       type = (GET_CODE (x) == SIGN_EXTRACT)
2722 	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2723       index = XEXP (XEXP (x, 0), 0);
2724       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2725       if (INTVAL (XEXP (x, 1)) != 32 + shift
2726 	  || INTVAL (XEXP (x, 2)) != 0)
2727 	shift = -1;
2728     }
2729   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2730      (const_int 0xffffffff<<shift)) */
2731   else if (GET_CODE (x) == AND
2732 	   && GET_MODE (x) == DImode
2733 	   && GET_CODE (XEXP (x, 0)) == ASHIFT
2734 	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2735 	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2736 	   && CONST_INT_P (XEXP (x, 1)))
2737     {
2738       type = ADDRESS_REG_UXTW;
2739       index = XEXP (XEXP (x, 0), 0);
2740       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2741       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2742 	shift = -1;
2743     }
2744   /* (mult:P (reg:P) (const_int scale)) */
2745   else if (GET_CODE (x) == MULT
2746 	   && GET_MODE (x) == Pmode
2747 	   && GET_MODE (XEXP (x, 0)) == Pmode
2748 	   && CONST_INT_P (XEXP (x, 1)))
2749     {
2750       type = ADDRESS_REG_REG;
2751       index = XEXP (x, 0);
2752       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2753     }
2754   /* (ashift:P (reg:P) (const_int shift)) */
2755   else if (GET_CODE (x) == ASHIFT
2756 	   && GET_MODE (x) == Pmode
2757 	   && GET_MODE (XEXP (x, 0)) == Pmode
2758 	   && CONST_INT_P (XEXP (x, 1)))
2759     {
2760       type = ADDRESS_REG_REG;
2761       index = XEXP (x, 0);
2762       shift = INTVAL (XEXP (x, 1));
2763     }
2764   else
2765     return false;
2766 
2767   if (GET_CODE (index) == SUBREG)
2768     index = SUBREG_REG (index);
2769 
2770   if ((shift == 0 ||
2771        (shift > 0 && shift <= 3
2772 	&& (1 << shift) == GET_MODE_SIZE (mode)))
2773       && REG_P (index)
2774       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2775     {
2776       info->type = type;
2777       info->offset = index;
2778       info->shift = shift;
2779       return true;
2780     }
2781 
2782   return false;
2783 }
2784 
2785 static inline bool
offset_7bit_signed_scaled_p(enum machine_mode mode,HOST_WIDE_INT offset)2786 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2787 {
2788   return (offset >= -64 * GET_MODE_SIZE (mode)
2789 	  && offset < 64 * GET_MODE_SIZE (mode)
2790 	  && offset % GET_MODE_SIZE (mode) == 0);
2791 }
2792 
2793 static inline bool
offset_9bit_signed_unscaled_p(enum machine_mode mode ATTRIBUTE_UNUSED,HOST_WIDE_INT offset)2794 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2795 			       HOST_WIDE_INT offset)
2796 {
2797   return offset >= -256 && offset < 256;
2798 }
2799 
2800 static inline bool
offset_12bit_unsigned_scaled_p(enum machine_mode mode,HOST_WIDE_INT offset)2801 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2802 {
2803   return (offset >= 0
2804 	  && offset < 4096 * GET_MODE_SIZE (mode)
2805 	  && offset % GET_MODE_SIZE (mode) == 0);
2806 }
2807 
2808 /* Return true if X is a valid address for machine mode MODE.  If it is,
2809    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
2810    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
2811 
2812 static bool
aarch64_classify_address(struct aarch64_address_info * info,rtx x,enum machine_mode mode,RTX_CODE outer_code,bool strict_p)2813 aarch64_classify_address (struct aarch64_address_info *info,
2814 			  rtx x, enum machine_mode mode,
2815 			  RTX_CODE outer_code, bool strict_p)
2816 {
2817   enum rtx_code code = GET_CODE (x);
2818   rtx op0, op1;
2819   bool allow_reg_index_p =
2820     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2821 
2822   /* Don't support anything other than POST_INC or REG addressing for
2823      AdvSIMD.  */
2824   if (aarch64_vector_mode_p (mode)
2825       && (code != POST_INC && code != REG))
2826     return false;
2827 
2828   switch (code)
2829     {
2830     case REG:
2831     case SUBREG:
2832       info->type = ADDRESS_REG_IMM;
2833       info->base = x;
2834       info->offset = const0_rtx;
2835       return aarch64_base_register_rtx_p (x, strict_p);
2836 
2837     case PLUS:
2838       op0 = XEXP (x, 0);
2839       op1 = XEXP (x, 1);
2840       if (GET_MODE_SIZE (mode) != 0
2841 	  && CONST_INT_P (op1)
2842 	  && aarch64_base_register_rtx_p (op0, strict_p))
2843 	{
2844 	  HOST_WIDE_INT offset = INTVAL (op1);
2845 
2846 	  info->type = ADDRESS_REG_IMM;
2847 	  info->base = op0;
2848 	  info->offset = op1;
2849 
2850 	  /* TImode and TFmode values are allowed in both pairs of X
2851 	     registers and individual Q registers.  The available
2852 	     address modes are:
2853 	     X,X: 7-bit signed scaled offset
2854 	     Q:   9-bit signed offset
2855 	     We conservatively require an offset representable in either mode.
2856 	   */
2857 	  if (mode == TImode || mode == TFmode)
2858 	    return (offset_7bit_signed_scaled_p (mode, offset)
2859 		    && offset_9bit_signed_unscaled_p (mode, offset));
2860 
2861 	  if (outer_code == PARALLEL)
2862 	    return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2863 		    && offset_7bit_signed_scaled_p (mode, offset));
2864 	  else
2865 	    return (offset_9bit_signed_unscaled_p (mode, offset)
2866 		    || offset_12bit_unsigned_scaled_p (mode, offset));
2867 	}
2868 
2869       if (allow_reg_index_p)
2870 	{
2871 	  /* Look for base + (scaled/extended) index register.  */
2872 	  if (aarch64_base_register_rtx_p (op0, strict_p)
2873 	      && aarch64_classify_index (info, op1, mode, strict_p))
2874 	    {
2875 	      info->base = op0;
2876 	      return true;
2877 	    }
2878 	  if (aarch64_base_register_rtx_p (op1, strict_p)
2879 	      && aarch64_classify_index (info, op0, mode, strict_p))
2880 	    {
2881 	      info->base = op1;
2882 	      return true;
2883 	    }
2884 	}
2885 
2886       return false;
2887 
2888     case POST_INC:
2889     case POST_DEC:
2890     case PRE_INC:
2891     case PRE_DEC:
2892       info->type = ADDRESS_REG_WB;
2893       info->base = XEXP (x, 0);
2894       info->offset = NULL_RTX;
2895       return aarch64_base_register_rtx_p (info->base, strict_p);
2896 
2897     case POST_MODIFY:
2898     case PRE_MODIFY:
2899       info->type = ADDRESS_REG_WB;
2900       info->base = XEXP (x, 0);
2901       if (GET_CODE (XEXP (x, 1)) == PLUS
2902 	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
2903 	  && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
2904 	  && aarch64_base_register_rtx_p (info->base, strict_p))
2905 	{
2906 	  HOST_WIDE_INT offset;
2907 	  info->offset = XEXP (XEXP (x, 1), 1);
2908 	  offset = INTVAL (info->offset);
2909 
2910 	  /* TImode and TFmode values are allowed in both pairs of X
2911 	     registers and individual Q registers.  The available
2912 	     address modes are:
2913 	     X,X: 7-bit signed scaled offset
2914 	     Q:   9-bit signed offset
2915 	     We conservatively require an offset representable in either mode.
2916 	   */
2917 	  if (mode == TImode || mode == TFmode)
2918 	    return (offset_7bit_signed_scaled_p (mode, offset)
2919 		    && offset_9bit_signed_unscaled_p (mode, offset));
2920 
2921 	  if (outer_code == PARALLEL)
2922 	    return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2923 		    && offset_7bit_signed_scaled_p (mode, offset));
2924 	  else
2925 	    return offset_9bit_signed_unscaled_p (mode, offset);
2926 	}
2927       return false;
2928 
2929     case CONST:
2930     case SYMBOL_REF:
2931     case LABEL_REF:
2932       /* load literal: pc-relative constant pool entry.  Only supported
2933          for SI mode or larger.  */
2934       info->type = ADDRESS_SYMBOLIC;
2935       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
2936 	{
2937 	  rtx sym, addend;
2938 
2939 	  split_const (x, &sym, &addend);
2940 	  return (GET_CODE (sym) == LABEL_REF
2941 		  || (GET_CODE (sym) == SYMBOL_REF
2942 		      && CONSTANT_POOL_ADDRESS_P (sym)));
2943 	}
2944       return false;
2945 
2946     case LO_SUM:
2947       info->type = ADDRESS_LO_SUM;
2948       info->base = XEXP (x, 0);
2949       info->offset = XEXP (x, 1);
2950       if (allow_reg_index_p
2951 	  && aarch64_base_register_rtx_p (info->base, strict_p))
2952 	{
2953 	  rtx sym, offs;
2954 	  split_const (info->offset, &sym, &offs);
2955 	  if (GET_CODE (sym) == SYMBOL_REF
2956 	      && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
2957 		  == SYMBOL_SMALL_ABSOLUTE))
2958 	    {
2959 	      /* The symbol and offset must be aligned to the access size.  */
2960 	      unsigned int align;
2961 	      unsigned int ref_size;
2962 
2963 	      if (CONSTANT_POOL_ADDRESS_P (sym))
2964 		align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
2965 	      else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
2966 		{
2967 		  tree exp = SYMBOL_REF_DECL (sym);
2968 		  align = TYPE_ALIGN (TREE_TYPE (exp));
2969 		  align = CONSTANT_ALIGNMENT (exp, align);
2970 		}
2971 	      else if (SYMBOL_REF_DECL (sym))
2972 		align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
2973 	      else
2974 		align = BITS_PER_UNIT;
2975 
2976 	      ref_size = GET_MODE_SIZE (mode);
2977 	      if (ref_size == 0)
2978 		ref_size = GET_MODE_SIZE (DImode);
2979 
2980 	      return ((INTVAL (offs) & (ref_size - 1)) == 0
2981 		      && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
2982 	    }
2983 	}
2984       return false;
2985 
2986     default:
2987       return false;
2988     }
2989 }
2990 
2991 bool
aarch64_symbolic_address_p(rtx x)2992 aarch64_symbolic_address_p (rtx x)
2993 {
2994   rtx offset;
2995 
2996   split_const (x, &x, &offset);
2997   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
2998 }
2999 
3000 /* Classify the base of symbolic expression X, given that X appears in
3001    context CONTEXT.  */
3002 static enum aarch64_symbol_type
aarch64_classify_symbolic_expression(rtx x,enum aarch64_symbol_context context)3003 aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
3004 {
3005   rtx offset;
3006   split_const (x, &x, &offset);
3007   return aarch64_classify_symbol (x, context);
3008 }
3009 
3010 
3011 /* Return TRUE if X is a legitimate address for accessing memory in
3012    mode MODE.  */
3013 static bool
aarch64_legitimate_address_hook_p(enum machine_mode mode,rtx x,bool strict_p)3014 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3015 {
3016   struct aarch64_address_info addr;
3017 
3018   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3019 }
3020 
3021 /* Return TRUE if X is a legitimate address for accessing memory in
3022    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3023    pair operation.  */
3024 bool
aarch64_legitimate_address_p(enum machine_mode mode,rtx x,RTX_CODE outer_code,bool strict_p)3025 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3026 			   RTX_CODE outer_code, bool strict_p)
3027 {
3028   struct aarch64_address_info addr;
3029 
3030   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3031 }
3032 
3033 /* Return TRUE if rtx X is immediate constant 0.0 */
3034 bool
aarch64_float_const_zero_rtx_p(rtx x)3035 aarch64_float_const_zero_rtx_p (rtx x)
3036 {
3037   REAL_VALUE_TYPE r;
3038 
3039   if (GET_MODE (x) == VOIDmode)
3040     return false;
3041 
3042   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3043   if (REAL_VALUE_MINUS_ZERO (r))
3044     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3045   return REAL_VALUES_EQUAL (r, dconst0);
3046 }
3047 
3048 /* Return the fixed registers used for condition codes.  */
3049 
3050 static bool
aarch64_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)3051 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3052 {
3053   *p1 = CC_REGNUM;
3054   *p2 = INVALID_REGNUM;
3055   return true;
3056 }
3057 
3058 enum machine_mode
aarch64_select_cc_mode(RTX_CODE code,rtx x,rtx y)3059 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3060 {
3061   /* All floating point compares return CCFP if it is an equality
3062      comparison, and CCFPE otherwise.  */
3063   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3064     {
3065       switch (code)
3066 	{
3067 	case EQ:
3068 	case NE:
3069 	case UNORDERED:
3070 	case ORDERED:
3071 	case UNLT:
3072 	case UNLE:
3073 	case UNGT:
3074 	case UNGE:
3075 	case UNEQ:
3076 	case LTGT:
3077 	  return CCFPmode;
3078 
3079 	case LT:
3080 	case LE:
3081 	case GT:
3082 	case GE:
3083 	  return CCFPEmode;
3084 
3085 	default:
3086 	  gcc_unreachable ();
3087 	}
3088     }
3089 
3090   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3091       && y == const0_rtx
3092       && (code == EQ || code == NE || code == LT || code == GE)
3093       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND))
3094     return CC_NZmode;
3095 
3096   /* A compare with a shifted operand.  Because of canonicalization,
3097      the comparison will have to be swapped when we emit the assembly
3098      code.  */
3099   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3100       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3101       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3102 	  || GET_CODE (x) == LSHIFTRT
3103 	  || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3104     return CC_SWPmode;
3105 
3106   /* A compare of a mode narrower than SI mode against zero can be done
3107      by extending the value in the comparison.  */
3108   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3109       && y == const0_rtx)
3110     /* Only use sign-extension if we really need it.  */
3111     return ((code == GT || code == GE || code == LE || code == LT)
3112 	    ? CC_SESWPmode : CC_ZESWPmode);
3113 
3114   /* For everything else, return CCmode.  */
3115   return CCmode;
3116 }
3117 
3118 static unsigned
aarch64_get_condition_code(rtx x)3119 aarch64_get_condition_code (rtx x)
3120 {
3121   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3122   enum rtx_code comp_code = GET_CODE (x);
3123 
3124   if (GET_MODE_CLASS (mode) != MODE_CC)
3125     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3126 
3127   switch (mode)
3128     {
3129     case CCFPmode:
3130     case CCFPEmode:
3131       switch (comp_code)
3132 	{
3133 	case GE: return AARCH64_GE;
3134 	case GT: return AARCH64_GT;
3135 	case LE: return AARCH64_LS;
3136 	case LT: return AARCH64_MI;
3137 	case NE: return AARCH64_NE;
3138 	case EQ: return AARCH64_EQ;
3139 	case ORDERED: return AARCH64_VC;
3140 	case UNORDERED: return AARCH64_VS;
3141 	case UNLT: return AARCH64_LT;
3142 	case UNLE: return AARCH64_LE;
3143 	case UNGT: return AARCH64_HI;
3144 	case UNGE: return AARCH64_PL;
3145 	default: gcc_unreachable ();
3146 	}
3147       break;
3148 
3149     case CCmode:
3150       switch (comp_code)
3151 	{
3152 	case NE: return AARCH64_NE;
3153 	case EQ: return AARCH64_EQ;
3154 	case GE: return AARCH64_GE;
3155 	case GT: return AARCH64_GT;
3156 	case LE: return AARCH64_LE;
3157 	case LT: return AARCH64_LT;
3158 	case GEU: return AARCH64_CS;
3159 	case GTU: return AARCH64_HI;
3160 	case LEU: return AARCH64_LS;
3161 	case LTU: return AARCH64_CC;
3162 	default: gcc_unreachable ();
3163 	}
3164       break;
3165 
3166     case CC_SWPmode:
3167     case CC_ZESWPmode:
3168     case CC_SESWPmode:
3169       switch (comp_code)
3170 	{
3171 	case NE: return AARCH64_NE;
3172 	case EQ: return AARCH64_EQ;
3173 	case GE: return AARCH64_LE;
3174 	case GT: return AARCH64_LT;
3175 	case LE: return AARCH64_GE;
3176 	case LT: return AARCH64_GT;
3177 	case GEU: return AARCH64_LS;
3178 	case GTU: return AARCH64_CC;
3179 	case LEU: return AARCH64_CS;
3180 	case LTU: return AARCH64_HI;
3181 	default: gcc_unreachable ();
3182 	}
3183       break;
3184 
3185     case CC_NZmode:
3186       switch (comp_code)
3187 	{
3188 	case NE: return AARCH64_NE;
3189 	case EQ: return AARCH64_EQ;
3190 	case GE: return AARCH64_PL;
3191 	case LT: return AARCH64_MI;
3192 	default: gcc_unreachable ();
3193 	}
3194       break;
3195 
3196     default:
3197       gcc_unreachable ();
3198       break;
3199     }
3200 }
3201 
3202 static unsigned
bit_count(unsigned HOST_WIDE_INT value)3203 bit_count (unsigned HOST_WIDE_INT value)
3204 {
3205   unsigned count = 0;
3206 
3207   while (value)
3208     {
3209       count++;
3210       value &= value - 1;
3211     }
3212 
3213   return count;
3214 }
3215 
3216 void
aarch64_print_operand(FILE * f,rtx x,char code)3217 aarch64_print_operand (FILE *f, rtx x, char code)
3218 {
3219   switch (code)
3220     {
3221     case 'e':
3222       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3223       {
3224 	int n;
3225 
3226 	if (GET_CODE (x) != CONST_INT
3227 	    || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3228 	  {
3229 	    output_operand_lossage ("invalid operand for '%%%c'", code);
3230 	    return;
3231 	  }
3232 
3233 	switch (n)
3234 	  {
3235 	  case 3:
3236 	    fputc ('b', f);
3237 	    break;
3238 	  case 4:
3239 	    fputc ('h', f);
3240 	    break;
3241 	  case 5:
3242 	    fputc ('w', f);
3243 	    break;
3244 	  default:
3245 	    output_operand_lossage ("invalid operand for '%%%c'", code);
3246 	    return;
3247 	  }
3248       }
3249       break;
3250 
3251     case 'p':
3252       {
3253 	int n;
3254 
3255 	/* Print N such that 2^N == X.  */
3256 	if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3257 	  {
3258 	    output_operand_lossage ("invalid operand for '%%%c'", code);
3259 	    return;
3260 	  }
3261 
3262 	asm_fprintf (f, "%d", n);
3263       }
3264       break;
3265 
3266     case 'P':
3267       /* Print the number of non-zero bits in X (a const_int).  */
3268       if (GET_CODE (x) != CONST_INT)
3269 	{
3270 	  output_operand_lossage ("invalid operand for '%%%c'", code);
3271 	  return;
3272 	}
3273 
3274       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3275       break;
3276 
3277     case 'H':
3278       /* Print the higher numbered register of a pair (TImode) of regs.  */
3279       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3280 	{
3281 	  output_operand_lossage ("invalid operand for '%%%c'", code);
3282 	  return;
3283 	}
3284 
3285       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3286       break;
3287 
3288     case 'Q':
3289       /* Print the least significant register of a pair (TImode) of regs.  */
3290       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3291 	{
3292 	  output_operand_lossage ("invalid operand for '%%%c'", code);
3293 	  return;
3294 	}
3295       asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)]);
3296       break;
3297 
3298     case 'R':
3299       /* Print the most significant register of a pair (TImode) of regs.  */
3300       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3301 	{
3302 	  output_operand_lossage ("invalid operand for '%%%c'", code);
3303 	  return;
3304 	}
3305       asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
3306       break;
3307 
3308     case 'm':
3309       /* Print a condition (eq, ne, etc).  */
3310 
3311       /* CONST_TRUE_RTX means always -- that's the default.  */
3312       if (x == const_true_rtx)
3313 	return;
3314 
3315       if (!COMPARISON_P (x))
3316 	{
3317 	  output_operand_lossage ("invalid operand for '%%%c'", code);
3318 	  return;
3319 	}
3320 
3321       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3322       break;
3323 
3324     case 'M':
3325       /* Print the inverse of a condition (eq <-> ne, etc).  */
3326 
3327       /* CONST_TRUE_RTX means never -- that's the default.  */
3328       if (x == const_true_rtx)
3329 	{
3330 	  fputs ("nv", f);
3331 	  return;
3332 	}
3333 
3334       if (!COMPARISON_P (x))
3335 	{
3336 	  output_operand_lossage ("invalid operand for '%%%c'", code);
3337 	  return;
3338 	}
3339 
3340       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3341 				  (aarch64_get_condition_code (x))], f);
3342       break;
3343 
3344     case 'b':
3345     case 'h':
3346     case 's':
3347     case 'd':
3348     case 'q':
3349       /* Print a scalar FP/SIMD register name.  */
3350       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3351 	{
3352 	  output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3353 	  return;
3354 	}
3355       asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM);
3356       break;
3357 
3358     case 'S':
3359     case 'T':
3360     case 'U':
3361     case 'V':
3362       /* Print the first FP/SIMD register name in a list.  */
3363       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3364 	{
3365 	  output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3366 	  return;
3367 	}
3368       asm_fprintf (f, "%sv%d", REGISTER_PREFIX,
3369 			       REGNO (x) - V0_REGNUM + (code - 'S'));
3370       break;
3371 
3372     case 'X':
3373       /* Print integer constant in hex.  */
3374       if (GET_CODE (x) != CONST_INT)
3375 	{
3376 	  output_operand_lossage ("invalid operand for '%%%c'", code);
3377 	  return;
3378 	}
3379       asm_fprintf (f, "0x%wx", UINTVAL (x));
3380       break;
3381 
3382     case 'w':
3383     case 'x':
3384       /* Print a general register name or the zero register (32-bit or
3385          64-bit).  */
3386       if (x == const0_rtx
3387 	  || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3388 	{
3389 	  asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
3390 	  break;
3391 	}
3392 
3393       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3394 	{
3395 	  asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code,
3396 		       REGNO (x) - R0_REGNUM);
3397 	  break;
3398 	}
3399 
3400       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3401 	{
3402 	  asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : "");
3403 	  break;
3404 	}
3405 
3406       /* Fall through */
3407 
3408     case 0:
3409       /* Print a normal operand, if it's a general register, then we
3410 	 assume DImode.  */
3411       if (x == NULL)
3412 	{
3413 	  output_operand_lossage ("missing operand");
3414 	  return;
3415 	}
3416 
3417       switch (GET_CODE (x))
3418 	{
3419 	case REG:
3420 	  asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3421 	  break;
3422 
3423 	case MEM:
3424 	  aarch64_memory_reference_mode = GET_MODE (x);
3425 	  output_address (XEXP (x, 0));
3426 	  break;
3427 
3428 	case LABEL_REF:
3429 	case SYMBOL_REF:
3430 	  output_addr_const (asm_out_file, x);
3431 	  break;
3432 
3433 	case CONST_INT:
3434 	  asm_fprintf (f, "%wd", INTVAL (x));
3435 	  break;
3436 
3437 	case CONST_VECTOR:
3438 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3439 	    {
3440 	      gcc_assert (aarch64_const_vec_all_same_int_p (x,
3441 							    HOST_WIDE_INT_MIN,
3442 							    HOST_WIDE_INT_MAX));
3443 	      asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3444 	    }
3445 	  else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3446 	    {
3447 	      fputc ('0', f);
3448 	    }
3449 	  else
3450 	    gcc_unreachable ();
3451 	  break;
3452 
3453 	case CONST_DOUBLE:
3454 	  /* CONST_DOUBLE can represent a double-width integer.
3455 	     In this case, the mode of x is VOIDmode.  */
3456 	  if (GET_MODE (x) == VOIDmode)
3457 	    ; /* Do Nothing.  */
3458 	  else if (aarch64_float_const_zero_rtx_p (x))
3459 	    {
3460 	      fputc ('0', f);
3461 	      break;
3462 	    }
3463 	  else if (aarch64_float_const_representable_p (x))
3464 	    {
3465 #define buf_size 20
3466 	      char float_buf[buf_size] = {'\0'};
3467 	      REAL_VALUE_TYPE r;
3468 	      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3469 	      real_to_decimal_for_mode (float_buf, &r,
3470 					buf_size, buf_size,
3471 					1, GET_MODE (x));
3472 	      asm_fprintf (asm_out_file, "%s", float_buf);
3473 	      break;
3474 #undef buf_size
3475 	    }
3476 	  output_operand_lossage ("invalid constant");
3477 	  return;
3478 	default:
3479 	  output_operand_lossage ("invalid operand");
3480 	  return;
3481 	}
3482       break;
3483 
3484     case 'A':
3485       if (GET_CODE (x) == HIGH)
3486 	x = XEXP (x, 0);
3487 
3488       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3489 	{
3490 	case SYMBOL_SMALL_GOT:
3491 	  asm_fprintf (asm_out_file, ":got:");
3492 	  break;
3493 
3494 	case SYMBOL_SMALL_TLSGD:
3495 	  asm_fprintf (asm_out_file, ":tlsgd:");
3496 	  break;
3497 
3498 	case SYMBOL_SMALL_TLSDESC:
3499 	  asm_fprintf (asm_out_file, ":tlsdesc:");
3500 	  break;
3501 
3502 	case SYMBOL_SMALL_GOTTPREL:
3503 	  asm_fprintf (asm_out_file, ":gottprel:");
3504 	  break;
3505 
3506 	case SYMBOL_SMALL_TPREL:
3507 	  asm_fprintf (asm_out_file, ":tprel:");
3508 	  break;
3509 
3510 	default:
3511 	  break;
3512 	}
3513       output_addr_const (asm_out_file, x);
3514       break;
3515 
3516     case 'L':
3517       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3518 	{
3519 	case SYMBOL_SMALL_GOT:
3520 	  asm_fprintf (asm_out_file, ":lo12:");
3521 	  break;
3522 
3523 	case SYMBOL_SMALL_TLSGD:
3524 	  asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3525 	  break;
3526 
3527 	case SYMBOL_SMALL_TLSDESC:
3528 	  asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3529 	  break;
3530 
3531 	case SYMBOL_SMALL_GOTTPREL:
3532 	  asm_fprintf (asm_out_file, ":gottprel_lo12:");
3533 	  break;
3534 
3535 	case SYMBOL_SMALL_TPREL:
3536 	  asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3537 	  break;
3538 
3539 	default:
3540 	  break;
3541 	}
3542       output_addr_const (asm_out_file, x);
3543       break;
3544 
3545     case 'G':
3546 
3547       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3548 	{
3549 	case SYMBOL_SMALL_TPREL:
3550 	  asm_fprintf (asm_out_file, ":tprel_hi12:");
3551 	  break;
3552 	default:
3553 	  break;
3554 	}
3555       output_addr_const (asm_out_file, x);
3556       break;
3557 
3558     default:
3559       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3560       return;
3561     }
3562 }
3563 
3564 void
aarch64_print_operand_address(FILE * f,rtx x)3565 aarch64_print_operand_address (FILE *f, rtx x)
3566 {
3567   struct aarch64_address_info addr;
3568 
3569   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3570 			     MEM, true))
3571     switch (addr.type)
3572       {
3573       case ADDRESS_REG_IMM:
3574 	if (addr.offset == const0_rtx)
3575 	  asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3576 	else
3577 	  asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3578 		       INTVAL (addr.offset));
3579 	return;
3580 
3581       case ADDRESS_REG_REG:
3582 	if (addr.shift == 0)
3583 	  asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3584 		       reg_names [REGNO (addr.offset)]);
3585 	else
3586 	  asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3587 		       reg_names [REGNO (addr.offset)], addr.shift);
3588 	return;
3589 
3590       case ADDRESS_REG_UXTW:
3591 	if (addr.shift == 0)
3592 	  asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3593 		       REGNO (addr.offset) - R0_REGNUM);
3594 	else
3595 	  asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3596 		       REGNO (addr.offset) - R0_REGNUM, addr.shift);
3597 	return;
3598 
3599       case ADDRESS_REG_SXTW:
3600 	if (addr.shift == 0)
3601 	  asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3602 		       REGNO (addr.offset) - R0_REGNUM);
3603 	else
3604 	  asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3605 		       REGNO (addr.offset) - R0_REGNUM, addr.shift);
3606 	return;
3607 
3608       case ADDRESS_REG_WB:
3609 	switch (GET_CODE (x))
3610 	  {
3611 	  case PRE_INC:
3612 	    asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3613 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
3614 	    return;
3615 	  case POST_INC:
3616 	    asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3617 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
3618 	    return;
3619 	  case PRE_DEC:
3620 	    asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3621 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
3622 	    return;
3623 	  case POST_DEC:
3624 	    asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3625 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
3626 	    return;
3627 	  case PRE_MODIFY:
3628 	    asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3629 			 INTVAL (addr.offset));
3630 	    return;
3631 	  case POST_MODIFY:
3632 	    asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3633 			 INTVAL (addr.offset));
3634 	    return;
3635 	  default:
3636 	    break;
3637 	  }
3638 	break;
3639 
3640       case ADDRESS_LO_SUM:
3641 	asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3642 	output_addr_const (f, addr.offset);
3643 	asm_fprintf (f, "]");
3644 	return;
3645 
3646       case ADDRESS_SYMBOLIC:
3647 	break;
3648       }
3649 
3650   output_addr_const (f, x);
3651 }
3652 
3653 void
aarch64_function_profiler(FILE * f ATTRIBUTE_UNUSED,int labelno ATTRIBUTE_UNUSED)3654 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3655 			   int labelno ATTRIBUTE_UNUSED)
3656 {
3657   sorry ("function profiling");
3658 }
3659 
3660 bool
aarch64_label_mentioned_p(rtx x)3661 aarch64_label_mentioned_p (rtx x)
3662 {
3663   const char *fmt;
3664   int i;
3665 
3666   if (GET_CODE (x) == LABEL_REF)
3667     return true;
3668 
3669   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3670      referencing instruction, but they are constant offsets, not
3671      symbols.  */
3672   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3673     return false;
3674 
3675   fmt = GET_RTX_FORMAT (GET_CODE (x));
3676   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3677     {
3678       if (fmt[i] == 'E')
3679 	{
3680 	  int j;
3681 
3682 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3683 	    if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3684 	      return 1;
3685 	}
3686       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3687 	return 1;
3688     }
3689 
3690   return 0;
3691 }
3692 
3693 /* Implement REGNO_REG_CLASS.  */
3694 
3695 enum reg_class
aarch64_regno_regclass(unsigned regno)3696 aarch64_regno_regclass (unsigned regno)
3697 {
3698   if (GP_REGNUM_P (regno))
3699     return CORE_REGS;
3700 
3701   if (regno == SP_REGNUM)
3702     return STACK_REG;
3703 
3704   if (regno == FRAME_POINTER_REGNUM
3705       || regno == ARG_POINTER_REGNUM)
3706     return CORE_REGS;
3707 
3708   if (FP_REGNUM_P (regno))
3709     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3710 
3711   return NO_REGS;
3712 }
3713 
3714 /* Try a machine-dependent way of reloading an illegitimate address
3715    operand.  If we find one, push the reload and return the new rtx.  */
3716 
3717 rtx
aarch64_legitimize_reload_address(rtx * x_p,enum machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)3718 aarch64_legitimize_reload_address (rtx *x_p,
3719 				   enum machine_mode mode,
3720 				   int opnum, int type,
3721 				   int ind_levels ATTRIBUTE_UNUSED)
3722 {
3723   rtx x = *x_p;
3724 
3725   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3726   if (aarch64_vector_mode_p (mode)
3727       && GET_CODE (x) == PLUS
3728       && REG_P (XEXP (x, 0))
3729       && CONST_INT_P (XEXP (x, 1)))
3730     {
3731       rtx orig_rtx = x;
3732       x = copy_rtx (x);
3733       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3734 		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3735 		   opnum, (enum reload_type) type);
3736       return x;
3737     }
3738 
3739   /* We must recognize output that we have already generated ourselves.  */
3740   if (GET_CODE (x) == PLUS
3741       && GET_CODE (XEXP (x, 0)) == PLUS
3742       && REG_P (XEXP (XEXP (x, 0), 0))
3743       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3744       && CONST_INT_P (XEXP (x, 1)))
3745     {
3746       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3747 		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3748 		   opnum, (enum reload_type) type);
3749       return x;
3750     }
3751 
3752   /* We wish to handle large displacements off a base register by splitting
3753      the addend across an add and the mem insn.  This can cut the number of
3754      extra insns needed from 3 to 1.  It is only useful for load/store of a
3755      single register with 12 bit offset field.  */
3756   if (GET_CODE (x) == PLUS
3757       && REG_P (XEXP (x, 0))
3758       && CONST_INT_P (XEXP (x, 1))
3759       && HARD_REGISTER_P (XEXP (x, 0))
3760       && mode != TImode
3761       && mode != TFmode
3762       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3763     {
3764       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3765       HOST_WIDE_INT low = val & 0xfff;
3766       HOST_WIDE_INT high = val - low;
3767       HOST_WIDE_INT offs;
3768       rtx cst;
3769 
3770       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
3771 	 BLKmode alignment.  */
3772       if (GET_MODE_SIZE (mode) == 0)
3773 	return NULL_RTX;
3774 
3775       offs = low % GET_MODE_SIZE (mode);
3776 
3777       /* Align misaligned offset by adjusting high part to compensate.  */
3778       if (offs != 0)
3779 	{
3780 	  if (aarch64_uimm12_shift (high + offs))
3781 	    {
3782 	      /* Align down.  */
3783 	      low = low - offs;
3784 	      high = high + offs;
3785 	    }
3786 	  else
3787 	    {
3788 	      /* Align up.  */
3789 	      offs = GET_MODE_SIZE (mode) - offs;
3790 	      low = low + offs;
3791 	      high = high + (low & 0x1000) - offs;
3792 	      low &= 0xfff;
3793 	    }
3794 	}
3795 
3796       /* Check for overflow.  */
3797       if (high + low != val)
3798 	return NULL_RTX;
3799 
3800       cst = GEN_INT (high);
3801       if (!aarch64_uimm12_shift (high))
3802 	cst = force_const_mem (Pmode, cst);
3803 
3804       /* Reload high part into base reg, leaving the low part
3805 	 in the mem instruction.  */
3806       x = gen_rtx_PLUS (Pmode,
3807 			gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3808 			GEN_INT (low));
3809 
3810       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3811 		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3812 		   opnum, (enum reload_type) type);
3813       return x;
3814     }
3815 
3816   return NULL_RTX;
3817 }
3818 
3819 
3820 static reg_class_t
aarch64_secondary_reload(bool in_p ATTRIBUTE_UNUSED,rtx x,reg_class_t rclass,enum machine_mode mode,secondary_reload_info * sri)3821 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3822 			  reg_class_t rclass,
3823 			  enum machine_mode mode,
3824 			  secondary_reload_info *sri)
3825 {
3826   /* Address expressions of the form PLUS (SP, large_offset) need two
3827      scratch registers, one for the constant, and one for holding a
3828      copy of SP, since SP cannot be used on the RHS of an add-reg
3829      instruction.  */
3830   if (mode == DImode
3831       && GET_CODE (x) == PLUS
3832       && XEXP (x, 0) == stack_pointer_rtx
3833       && CONST_INT_P (XEXP (x, 1))
3834       && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3835     {
3836       sri->icode = CODE_FOR_reload_sp_immediate;
3837       return NO_REGS;
3838     }
3839 
3840   /* Without the TARGET_SIMD instructions we cannot move a Q register
3841      to a Q register directly.  We need a scratch.  */
3842   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3843       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3844       && reg_class_subset_p (rclass, FP_REGS))
3845     {
3846       if (mode == TFmode)
3847         sri->icode = CODE_FOR_aarch64_reload_movtf;
3848       else if (mode == TImode)
3849         sri->icode = CODE_FOR_aarch64_reload_movti;
3850       return NO_REGS;
3851     }
3852 
3853   /* A TFmode or TImode memory access should be handled via an FP_REGS
3854      because AArch64 has richer addressing modes for LDR/STR instructions
3855      than LDP/STP instructions.  */
3856   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3857       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3858     return FP_REGS;
3859 
3860   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3861       return CORE_REGS;
3862 
3863   return NO_REGS;
3864 }
3865 
3866 static bool
aarch64_can_eliminate(const int from,const int to)3867 aarch64_can_eliminate (const int from, const int to)
3868 {
3869   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3870      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
3871 
3872   if (frame_pointer_needed)
3873     {
3874       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3875 	return true;
3876       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3877 	return false;
3878       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3879 	  && !cfun->calls_alloca)
3880 	return true;
3881       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3882 	return true;
3883     return false;
3884     }
3885   else
3886     {
3887       /* If we decided that we didn't need a leaf frame pointer but then used
3888 	 LR in the function, then we'll want a frame pointer after all, so
3889 	 prevent this elimination to ensure a frame pointer is used.
3890 
3891 	 NOTE: the original value of flag_omit_frame_pointer gets trashed
3892 	 IFF flag_omit_leaf_frame_pointer is true, so we check the value
3893 	 of faked_omit_frame_pointer here (which is true when we always
3894 	 wish to keep non-leaf frame pointers but only wish to keep leaf frame
3895 	 pointers when LR is clobbered).  */
3896       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3897 	  && df_regs_ever_live_p (LR_REGNUM)
3898 	  && faked_omit_frame_pointer)
3899 	return false;
3900     }
3901 
3902   return true;
3903 }
3904 
3905 HOST_WIDE_INT
aarch64_initial_elimination_offset(unsigned from,unsigned to)3906 aarch64_initial_elimination_offset (unsigned from, unsigned to)
3907 {
3908   HOST_WIDE_INT frame_size;
3909   HOST_WIDE_INT offset;
3910 
3911   aarch64_layout_frame ();
3912   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
3913 		+ crtl->outgoing_args_size
3914 		+ cfun->machine->saved_varargs_size);
3915 
3916    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
3917    offset = frame_size;
3918 
3919    if (to == HARD_FRAME_POINTER_REGNUM)
3920      {
3921        if (from == ARG_POINTER_REGNUM)
3922 	 return offset - crtl->outgoing_args_size;
3923 
3924        if (from == FRAME_POINTER_REGNUM)
3925 	 return cfun->machine->frame.saved_regs_size;
3926      }
3927 
3928    if (to == STACK_POINTER_REGNUM)
3929      {
3930        if (from == FRAME_POINTER_REGNUM)
3931          {
3932            HOST_WIDE_INT elim = crtl->outgoing_args_size
3933                               + cfun->machine->frame.saved_regs_size
3934                               - cfun->machine->frame.fp_lr_offset;
3935            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
3936            return elim;
3937          }
3938      }
3939 
3940    return offset;
3941 }
3942 
3943 
3944 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
3945    previous frame.  */
3946 
3947 rtx
aarch64_return_addr(int count,rtx frame ATTRIBUTE_UNUSED)3948 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
3949 {
3950   if (count != 0)
3951     return const0_rtx;
3952   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
3953 }
3954 
3955 
3956 static void
aarch64_asm_trampoline_template(FILE * f)3957 aarch64_asm_trampoline_template (FILE *f)
3958 {
3959   asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
3960   asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
3961   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
3962   assemble_aligned_integer (4, const0_rtx);
3963   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3964   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3965 }
3966 
3967 unsigned
aarch64_trampoline_size(void)3968 aarch64_trampoline_size (void)
3969 {
3970   return 32;  /* 3 insns + padding + 2 dwords.  */
3971 }
3972 
3973 static void
aarch64_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)3974 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3975 {
3976   rtx fnaddr, mem, a_tramp;
3977 
3978   /* Don't need to copy the trailing D-words, we fill those in below.  */
3979   emit_block_move (m_tramp, assemble_trampoline_template (),
3980 		   GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
3981   mem = adjust_address (m_tramp, DImode, 16);
3982   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3983   emit_move_insn (mem, fnaddr);
3984 
3985   mem = adjust_address (m_tramp, DImode, 24);
3986   emit_move_insn (mem, chain_value);
3987 
3988   /* XXX We should really define a "clear_cache" pattern and use
3989      gen_clear_cache().  */
3990   a_tramp = XEXP (m_tramp, 0);
3991   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3992 		     LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3993 		     plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3994 }
3995 
3996 static unsigned char
aarch64_class_max_nregs(reg_class_t regclass,enum machine_mode mode)3997 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
3998 {
3999   switch (regclass)
4000     {
4001     case CORE_REGS:
4002     case POINTER_REGS:
4003     case GENERAL_REGS:
4004     case ALL_REGS:
4005     case FP_REGS:
4006     case FP_LO_REGS:
4007       return
4008 	aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4009  				       (GET_MODE_SIZE (mode) + 7) / 8;
4010     case STACK_REG:
4011       return 1;
4012 
4013     case NO_REGS:
4014       return 0;
4015 
4016     default:
4017       break;
4018     }
4019   gcc_unreachable ();
4020 }
4021 
4022 static reg_class_t
aarch64_preferred_reload_class(rtx x ATTRIBUTE_UNUSED,reg_class_t regclass)4023 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4024 {
4025   return ((regclass == POINTER_REGS || regclass == STACK_REG)
4026 	  ? GENERAL_REGS : regclass);
4027 }
4028 
4029 void
aarch64_asm_output_labelref(FILE * f,const char * name)4030 aarch64_asm_output_labelref (FILE* f, const char *name)
4031 {
4032   asm_fprintf (f, "%U%s", name);
4033 }
4034 
4035 static void
aarch64_elf_asm_constructor(rtx symbol,int priority)4036 aarch64_elf_asm_constructor (rtx symbol, int priority)
4037 {
4038   if (priority == DEFAULT_INIT_PRIORITY)
4039     default_ctor_section_asm_out_constructor (symbol, priority);
4040   else
4041     {
4042       section *s;
4043       char buf[18];
4044       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4045       s = get_section (buf, SECTION_WRITE, NULL);
4046       switch_to_section (s);
4047       assemble_align (POINTER_SIZE);
4048       fputs ("\t.dword\t", asm_out_file);
4049       output_addr_const (asm_out_file, symbol);
4050       fputc ('\n', asm_out_file);
4051     }
4052 }
4053 
4054 static void
aarch64_elf_asm_destructor(rtx symbol,int priority)4055 aarch64_elf_asm_destructor (rtx symbol, int priority)
4056 {
4057   if (priority == DEFAULT_INIT_PRIORITY)
4058     default_dtor_section_asm_out_destructor (symbol, priority);
4059   else
4060     {
4061       section *s;
4062       char buf[18];
4063       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4064       s = get_section (buf, SECTION_WRITE, NULL);
4065       switch_to_section (s);
4066       assemble_align (POINTER_SIZE);
4067       fputs ("\t.dword\t", asm_out_file);
4068       output_addr_const (asm_out_file, symbol);
4069       fputc ('\n', asm_out_file);
4070     }
4071 }
4072 
4073 const char*
aarch64_output_casesi(rtx * operands)4074 aarch64_output_casesi (rtx *operands)
4075 {
4076   char buf[100];
4077   char label[100];
4078   rtx diff_vec = PATTERN (next_real_insn (operands[2]));
4079   int index;
4080   static const char *const patterns[4][2] =
4081   {
4082     {
4083       "ldrb\t%w3, [%0,%w1,uxtw]",
4084       "add\t%3, %4, %w3, sxtb #2"
4085     },
4086     {
4087       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4088       "add\t%3, %4, %w3, sxth #2"
4089     },
4090     {
4091       "ldr\t%w3, [%0,%w1,uxtw #2]",
4092       "add\t%3, %4, %w3, sxtw #2"
4093     },
4094     /* We assume that DImode is only generated when not optimizing and
4095        that we don't really need 64-bit address offsets.  That would
4096        imply an object file with 8GB of code in a single function!  */
4097     {
4098       "ldr\t%w3, [%0,%w1,uxtw #2]",
4099       "add\t%3, %4, %w3, sxtw #2"
4100     }
4101   };
4102 
4103   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4104 
4105   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4106 
4107   gcc_assert (index >= 0 && index <= 3);
4108 
4109   /* Need to implement table size reduction, by chaning the code below.  */
4110   output_asm_insn (patterns[index][0], operands);
4111   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4112   snprintf (buf, sizeof (buf),
4113 	    "adr\t%%4, %s", targetm.strip_name_encoding (label));
4114   output_asm_insn (buf, operands);
4115   output_asm_insn (patterns[index][1], operands);
4116   output_asm_insn ("br\t%3", operands);
4117   assemble_label (asm_out_file, label);
4118   return "";
4119 }
4120 
4121 
4122 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4123    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4124    operator.  */
4125 
4126 int
aarch64_uxt_size(int shift,HOST_WIDE_INT mask)4127 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4128 {
4129   if (shift >= 0 && shift <= 3)
4130     {
4131       int size;
4132       for (size = 8; size <= 32; size *= 2)
4133 	{
4134 	  HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4135 	  if (mask == bits << shift)
4136 	    return size;
4137 	}
4138     }
4139   return 0;
4140 }
4141 
4142 static bool
aarch64_use_blocks_for_constant_p(enum machine_mode mode ATTRIBUTE_UNUSED,const_rtx x ATTRIBUTE_UNUSED)4143 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4144 				   const_rtx x ATTRIBUTE_UNUSED)
4145 {
4146   /* We can't use blocks for constants when we're using a per-function
4147      constant pool.  */
4148   return false;
4149 }
4150 
4151 static section *
aarch64_select_rtx_section(enum machine_mode mode ATTRIBUTE_UNUSED,rtx x ATTRIBUTE_UNUSED,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)4152 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4153 			    rtx x ATTRIBUTE_UNUSED,
4154 			    unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4155 {
4156   /* Force all constant pool entries into the current function section.  */
4157   return function_section (current_function_decl);
4158 }
4159 
4160 
4161 /* Costs.  */
4162 
4163 /* Helper function for rtx cost calculation.  Strip a shift expression
4164    from X.  Returns the inner operand if successful, or the original
4165    expression on failure.  */
4166 static rtx
aarch64_strip_shift(rtx x)4167 aarch64_strip_shift (rtx x)
4168 {
4169   rtx op = x;
4170 
4171   if ((GET_CODE (op) == ASHIFT
4172        || GET_CODE (op) == ASHIFTRT
4173        || GET_CODE (op) == LSHIFTRT)
4174       && CONST_INT_P (XEXP (op, 1)))
4175     return XEXP (op, 0);
4176 
4177   if (GET_CODE (op) == MULT
4178       && CONST_INT_P (XEXP (op, 1))
4179       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4180     return XEXP (op, 0);
4181 
4182   return x;
4183 }
4184 
4185 /* Helper function for rtx cost calculation.  Strip a shift or extend
4186    expression from X.  Returns the inner operand if successful, or the
4187    original expression on failure.  We deal with a number of possible
4188    canonicalization variations here.  */
4189 static rtx
aarch64_strip_shift_or_extend(rtx x)4190 aarch64_strip_shift_or_extend (rtx x)
4191 {
4192   rtx op = x;
4193 
4194   /* Zero and sign extraction of a widened value.  */
4195   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4196       && XEXP (op, 2) == const0_rtx
4197       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4198 					 XEXP (op, 1)))
4199     return XEXP (XEXP (op, 0), 0);
4200 
4201   /* It can also be represented (for zero-extend) as an AND with an
4202      immediate.  */
4203   if (GET_CODE (op) == AND
4204       && GET_CODE (XEXP (op, 0)) == MULT
4205       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4206       && CONST_INT_P (XEXP (op, 1))
4207       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4208 			   INTVAL (XEXP (op, 1))) != 0)
4209     return XEXP (XEXP (op, 0), 0);
4210 
4211   /* Now handle extended register, as this may also have an optional
4212      left shift by 1..4.  */
4213   if (GET_CODE (op) == ASHIFT
4214       && CONST_INT_P (XEXP (op, 1))
4215       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4216     op = XEXP (op, 0);
4217 
4218   if (GET_CODE (op) == ZERO_EXTEND
4219       || GET_CODE (op) == SIGN_EXTEND)
4220     op = XEXP (op, 0);
4221 
4222   if (op != x)
4223     return op;
4224 
4225   return aarch64_strip_shift (x);
4226 }
4227 
4228 /* Calculate the cost of calculating X, storing it in *COST.  Result
4229    is true if the total cost of the operation has now been calculated.  */
4230 static bool
aarch64_rtx_costs(rtx x,int code,int outer ATTRIBUTE_UNUSED,int param ATTRIBUTE_UNUSED,int * cost,bool speed)4231 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4232 		   int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4233 {
4234   rtx op0, op1;
4235   const struct cpu_rtx_cost_table *extra_cost
4236     = aarch64_tune_params->insn_extra_cost;
4237 
4238   switch (code)
4239     {
4240     case SET:
4241       op0 = SET_DEST (x);
4242       op1 = SET_SRC (x);
4243 
4244       switch (GET_CODE (op0))
4245 	{
4246 	case MEM:
4247 	  if (speed)
4248 	    *cost += extra_cost->memory_store;
4249 
4250 	  if (op1 != const0_rtx)
4251 	    *cost += rtx_cost (op1, SET, 1, speed);
4252 	  return true;
4253 
4254 	case SUBREG:
4255 	  if (! REG_P (SUBREG_REG (op0)))
4256 	    *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4257 	  /* Fall through.  */
4258 	case REG:
4259 	  /* Cost is just the cost of the RHS of the set.  */
4260 	  *cost += rtx_cost (op1, SET, 1, true);
4261 	  return true;
4262 
4263 	case ZERO_EXTRACT:  /* Bit-field insertion.  */
4264 	case SIGN_EXTRACT:
4265 	  /* Strip any redundant widening of the RHS to meet the width of
4266 	     the target.  */
4267 	  if (GET_CODE (op1) == SUBREG)
4268 	    op1 = SUBREG_REG (op1);
4269 	  if ((GET_CODE (op1) == ZERO_EXTEND
4270 	       || GET_CODE (op1) == SIGN_EXTEND)
4271 	      && GET_CODE (XEXP (op0, 1)) == CONST_INT
4272 	      && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4273 		  >= INTVAL (XEXP (op0, 1))))
4274 	    op1 = XEXP (op1, 0);
4275 	  *cost += rtx_cost (op1, SET, 1, speed);
4276 	  return true;
4277 
4278 	default:
4279 	  break;
4280 	}
4281       return false;
4282 
4283     case MEM:
4284       if (speed)
4285 	*cost += extra_cost->memory_load;
4286 
4287       return true;
4288 
4289     case NEG:
4290       op0 = CONST0_RTX (GET_MODE (x));
4291       op1 = XEXP (x, 0);
4292       goto cost_minus;
4293 
4294     case COMPARE:
4295       op0 = XEXP (x, 0);
4296       op1 = XEXP (x, 1);
4297 
4298       if (op1 == const0_rtx
4299 	  && GET_CODE (op0) == AND)
4300 	{
4301 	  x = op0;
4302 	  goto cost_logic;
4303 	}
4304 
4305       /* Comparisons can work if the order is swapped.
4306 	 Canonicalization puts the more complex operation first, but
4307 	 we want it in op1.  */
4308       if (! (REG_P (op0)
4309 	     || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4310 	{
4311 	  op0 = XEXP (x, 1);
4312 	  op1 = XEXP (x, 0);
4313 	}
4314       goto cost_minus;
4315 
4316     case MINUS:
4317       op0 = XEXP (x, 0);
4318       op1 = XEXP (x, 1);
4319 
4320     cost_minus:
4321       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4322 	  || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4323 	      && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4324 	{
4325 	  if (op0 != const0_rtx)
4326 	    *cost += rtx_cost (op0, MINUS, 0, speed);
4327 
4328 	  if (CONST_INT_P (op1))
4329 	    {
4330 	      if (!aarch64_uimm12_shift (INTVAL (op1)))
4331 		*cost += rtx_cost (op1, MINUS, 1, speed);
4332 	    }
4333 	  else
4334 	    {
4335 	      op1 = aarch64_strip_shift_or_extend (op1);
4336 	      *cost += rtx_cost (op1, MINUS, 1, speed);
4337 	    }
4338 	  return true;
4339 	}
4340 
4341       return false;
4342 
4343     case PLUS:
4344       op0 = XEXP (x, 0);
4345       op1 = XEXP (x, 1);
4346 
4347       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4348 	{
4349 	  if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4350 	    {
4351 	      *cost += rtx_cost (op0, PLUS, 0, speed);
4352 	    }
4353 	  else
4354 	    {
4355 	      rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4356 
4357 	      if (new_op0 == op0
4358 		  && GET_CODE (op0) == MULT)
4359 		{
4360 		  if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4361 		       && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4362 		      || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4363 			  && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4364 		    {
4365 		      *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4366 					  speed)
4367 				+ rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4368 					    speed)
4369 				+ rtx_cost (op1, PLUS, 1, speed));
4370 		      if (speed)
4371 			*cost += extra_cost->int_multiply_extend_add;
4372 		      return true;
4373 		    }
4374 		  *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4375 			    + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4376 			    + rtx_cost (op1, PLUS, 1, speed));
4377 
4378 		  if (speed)
4379 		    *cost += extra_cost->int_multiply_add;
4380 		}
4381 
4382 	      *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4383 			+ rtx_cost (op1, PLUS, 1, speed));
4384 	    }
4385 	  return true;
4386 	}
4387 
4388       return false;
4389 
4390     case IOR:
4391     case XOR:
4392     case AND:
4393     cost_logic:
4394       op0 = XEXP (x, 0);
4395       op1 = XEXP (x, 1);
4396 
4397       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4398 	{
4399 	  if (CONST_INT_P (op1)
4400 	      && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4401 	    {
4402 	      *cost += rtx_cost (op0, AND, 0, speed);
4403 	    }
4404 	  else
4405 	    {
4406 	      if (GET_CODE (op0) == NOT)
4407 		op0 = XEXP (op0, 0);
4408 	      op0 = aarch64_strip_shift (op0);
4409 	      *cost += (rtx_cost (op0, AND, 0, speed)
4410 			+ rtx_cost (op1, AND, 1, speed));
4411 	    }
4412 	  return true;
4413 	}
4414       return false;
4415 
4416     case ZERO_EXTEND:
4417       if ((GET_MODE (x) == DImode
4418 	   && GET_MODE (XEXP (x, 0)) == SImode)
4419 	  || GET_CODE (XEXP (x, 0)) == MEM)
4420 	{
4421 	  *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4422 	  return true;
4423 	}
4424       return false;
4425 
4426     case SIGN_EXTEND:
4427       if (GET_CODE (XEXP (x, 0)) == MEM)
4428 	{
4429 	  *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4430 	  return true;
4431 	}
4432       return false;
4433 
4434     case ROTATE:
4435       if (!CONST_INT_P (XEXP (x, 1)))
4436 	*cost += COSTS_N_INSNS (2);
4437       /* Fall through.  */
4438     case ROTATERT:
4439     case LSHIFTRT:
4440     case ASHIFT:
4441     case ASHIFTRT:
4442 
4443       /* Shifting by a register often takes an extra cycle.  */
4444       if (speed && !CONST_INT_P (XEXP (x, 1)))
4445 	*cost += extra_cost->register_shift;
4446 
4447       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4448       return true;
4449 
4450     case HIGH:
4451       if (!CONSTANT_P (XEXP (x, 0)))
4452 	*cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4453       return true;
4454 
4455     case LO_SUM:
4456       if (!CONSTANT_P (XEXP (x, 1)))
4457 	*cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4458       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4459       return true;
4460 
4461     case ZERO_EXTRACT:
4462     case SIGN_EXTRACT:
4463       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4464       return true;
4465 
4466     case MULT:
4467       op0 = XEXP (x, 0);
4468       op1 = XEXP (x, 1);
4469 
4470       *cost = COSTS_N_INSNS (1);
4471       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4472 	{
4473 	  if (CONST_INT_P (op1)
4474 	      && exact_log2 (INTVAL (op1)) > 0)
4475 	    {
4476 	      *cost += rtx_cost (op0, ASHIFT, 0, speed);
4477 	      return true;
4478 	    }
4479 
4480 	  if ((GET_CODE (op0) == ZERO_EXTEND
4481 	       && GET_CODE (op1) == ZERO_EXTEND)
4482 	      || (GET_CODE (op0) == SIGN_EXTEND
4483 		  && GET_CODE (op1) == SIGN_EXTEND))
4484 	    {
4485 	      *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4486 			+ rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4487 	      if (speed)
4488 		*cost += extra_cost->int_multiply_extend;
4489 	      return true;
4490 	    }
4491 
4492 	  if (speed)
4493 	    *cost += extra_cost->int_multiply;
4494 	}
4495       else if (speed)
4496 	{
4497 	  if (GET_MODE (x) == DFmode)
4498 	    *cost += extra_cost->double_multiply;
4499 	  else if (GET_MODE (x) == SFmode)
4500 	    *cost += extra_cost->float_multiply;
4501 	}
4502 
4503       return false;  /* All arguments need to be in registers.  */
4504 
4505     case MOD:
4506     case UMOD:
4507       *cost = COSTS_N_INSNS (2);
4508       if (speed)
4509 	{
4510 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4511 	    *cost += (extra_cost->int_multiply_add
4512 		      + extra_cost->int_divide);
4513 	  else if (GET_MODE (x) == DFmode)
4514 	    *cost += (extra_cost->double_multiply
4515 		      + extra_cost->double_divide);
4516 	  else if (GET_MODE (x) == SFmode)
4517 	    *cost += (extra_cost->float_multiply
4518 		      + extra_cost->float_divide);
4519 	}
4520       return false;  /* All arguments need to be in registers.  */
4521 
4522     case DIV:
4523     case UDIV:
4524       *cost = COSTS_N_INSNS (1);
4525       if (speed)
4526 	{
4527 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4528 	    *cost += extra_cost->int_divide;
4529 	  else if (GET_MODE (x) == DFmode)
4530 	    *cost += extra_cost->double_divide;
4531 	  else if (GET_MODE (x) == SFmode)
4532 	    *cost += extra_cost->float_divide;
4533 	}
4534       return false;  /* All arguments need to be in registers.  */
4535 
4536     default:
4537       break;
4538     }
4539   return false;
4540 }
4541 
4542 static int
aarch64_address_cost(rtx x ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)4543 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4544 		  enum machine_mode mode ATTRIBUTE_UNUSED,
4545 		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4546 {
4547   enum rtx_code c  = GET_CODE (x);
4548   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4549 
4550   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4551     return addr_cost->pre_modify;
4552 
4553   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4554     return addr_cost->post_modify;
4555 
4556   if (c == PLUS)
4557     {
4558       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4559 	return addr_cost->imm_offset;
4560       else if (GET_CODE (XEXP (x, 0)) == MULT
4561 	       || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4562 	       || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4563 	return addr_cost->register_extend;
4564 
4565       return addr_cost->register_offset;
4566     }
4567   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4568     return addr_cost->imm_offset;
4569 
4570   return 0;
4571 }
4572 
4573 static int
aarch64_register_move_cost(enum machine_mode mode,reg_class_t from_i,reg_class_t to_i)4574 aarch64_register_move_cost (enum machine_mode mode,
4575 			    reg_class_t from_i, reg_class_t to_i)
4576 {
4577   enum reg_class from = (enum reg_class) from_i;
4578   enum reg_class to = (enum reg_class) to_i;
4579   const struct cpu_regmove_cost *regmove_cost
4580     = aarch64_tune_params->regmove_cost;
4581 
4582   if (from == GENERAL_REGS && to == GENERAL_REGS)
4583     return regmove_cost->GP2GP;
4584   else if (from == GENERAL_REGS)
4585     return regmove_cost->GP2FP;
4586   else if (to == GENERAL_REGS)
4587     return regmove_cost->FP2GP;
4588 
4589   /* When AdvSIMD instructions are disabled it is not possible to move
4590      a 128-bit value directly between Q registers.  This is handled in
4591      secondary reload.  A general register is used as a scratch to move
4592      the upper DI value and the lower DI value is moved directly,
4593      hence the cost is the sum of three moves. */
4594   if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
4595     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4596 
4597   return regmove_cost->FP2FP;
4598 }
4599 
4600 static int
aarch64_memory_move_cost(enum machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass ATTRIBUTE_UNUSED,bool in ATTRIBUTE_UNUSED)4601 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4602 			  reg_class_t rclass ATTRIBUTE_UNUSED,
4603 			  bool in ATTRIBUTE_UNUSED)
4604 {
4605   return aarch64_tune_params->memmov_cost;
4606 }
4607 
4608 static void initialize_aarch64_code_model (void);
4609 
4610 /* Parse the architecture extension string.  */
4611 
4612 static void
aarch64_parse_extension(char * str)4613 aarch64_parse_extension (char *str)
4614 {
4615   /* The extension string is parsed left to right.  */
4616   const struct aarch64_option_extension *opt = NULL;
4617 
4618   /* Flag to say whether we are adding or removing an extension.  */
4619   int adding_ext = -1;
4620 
4621   while (str != NULL && *str != 0)
4622     {
4623       char *ext;
4624       size_t len;
4625 
4626       str++;
4627       ext = strchr (str, '+');
4628 
4629       if (ext != NULL)
4630 	len = ext - str;
4631       else
4632 	len = strlen (str);
4633 
4634       if (len >= 2 && strncmp (str, "no", 2) == 0)
4635 	{
4636 	  adding_ext = 0;
4637 	  len -= 2;
4638 	  str += 2;
4639 	}
4640       else if (len > 0)
4641 	adding_ext = 1;
4642 
4643       if (len == 0)
4644 	{
4645 	  error ("missing feature modifier after %qs", "+no");
4646 	  return;
4647 	}
4648 
4649       /* Scan over the extensions table trying to find an exact match.  */
4650       for (opt = all_extensions; opt->name != NULL; opt++)
4651 	{
4652 	  if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4653 	    {
4654 	      /* Add or remove the extension.  */
4655 	      if (adding_ext)
4656 		aarch64_isa_flags |= opt->flags_on;
4657 	      else
4658 		aarch64_isa_flags &= ~(opt->flags_off);
4659 	      break;
4660 	    }
4661 	}
4662 
4663       if (opt->name == NULL)
4664 	{
4665 	  /* Extension not found in list.  */
4666 	  error ("unknown feature modifier %qs", str);
4667 	  return;
4668 	}
4669 
4670       str = ext;
4671     };
4672 
4673   return;
4674 }
4675 
4676 /* Parse the ARCH string.  */
4677 
4678 static void
aarch64_parse_arch(void)4679 aarch64_parse_arch (void)
4680 {
4681   char *ext;
4682   const struct processor *arch;
4683   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4684   size_t len;
4685 
4686   strcpy (str, aarch64_arch_string);
4687 
4688   ext = strchr (str, '+');
4689 
4690   if (ext != NULL)
4691     len = ext - str;
4692   else
4693     len = strlen (str);
4694 
4695   if (len == 0)
4696     {
4697       error ("missing arch name in -march=%qs", str);
4698       return;
4699     }
4700 
4701   /* Loop through the list of supported ARCHs to find a match.  */
4702   for (arch = all_architectures; arch->name != NULL; arch++)
4703     {
4704       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4705 	{
4706 	  selected_arch = arch;
4707 	  aarch64_isa_flags = selected_arch->flags;
4708 	  selected_cpu = &all_cores[selected_arch->core];
4709 
4710 	  if (ext != NULL)
4711 	    {
4712 	      /* ARCH string contains at least one extension.  */
4713 	      aarch64_parse_extension (ext);
4714 	    }
4715 
4716 	  return;
4717 	}
4718     }
4719 
4720   /* ARCH name not found in list.  */
4721   error ("unknown value %qs for -march", str);
4722   return;
4723 }
4724 
4725 /* Parse the CPU string.  */
4726 
4727 static void
aarch64_parse_cpu(void)4728 aarch64_parse_cpu (void)
4729 {
4730   char *ext;
4731   const struct processor *cpu;
4732   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4733   size_t len;
4734 
4735   strcpy (str, aarch64_cpu_string);
4736 
4737   ext = strchr (str, '+');
4738 
4739   if (ext != NULL)
4740     len = ext - str;
4741   else
4742     len = strlen (str);
4743 
4744   if (len == 0)
4745     {
4746       error ("missing cpu name in -mcpu=%qs", str);
4747       return;
4748     }
4749 
4750   /* Loop through the list of supported CPUs to find a match.  */
4751   for (cpu = all_cores; cpu->name != NULL; cpu++)
4752     {
4753       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4754 	{
4755 	  selected_cpu = cpu;
4756 	  aarch64_isa_flags = selected_cpu->flags;
4757 
4758 	  if (ext != NULL)
4759 	    {
4760 	      /* CPU string contains at least one extension.  */
4761 	      aarch64_parse_extension (ext);
4762 	    }
4763 
4764 	  return;
4765 	}
4766     }
4767 
4768   /* CPU name not found in list.  */
4769   error ("unknown value %qs for -mcpu", str);
4770   return;
4771 }
4772 
4773 /* Parse the TUNE string.  */
4774 
4775 static void
aarch64_parse_tune(void)4776 aarch64_parse_tune (void)
4777 {
4778   const struct processor *cpu;
4779   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4780   strcpy (str, aarch64_tune_string);
4781 
4782   /* Loop through the list of supported CPUs to find a match.  */
4783   for (cpu = all_cores; cpu->name != NULL; cpu++)
4784     {
4785       if (strcmp (cpu->name, str) == 0)
4786 	{
4787 	  selected_tune = cpu;
4788 	  return;
4789 	}
4790     }
4791 
4792   /* CPU name not found in list.  */
4793   error ("unknown value %qs for -mtune", str);
4794   return;
4795 }
4796 
4797 
4798 /* Implement TARGET_OPTION_OVERRIDE.  */
4799 
4800 static void
aarch64_override_options(void)4801 aarch64_override_options (void)
4802 {
4803   /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4804      otherwise march remains undefined.  mtune can be used with either march or
4805      mcpu.  */
4806 
4807   if (aarch64_arch_string)
4808     {
4809       aarch64_parse_arch ();
4810       aarch64_cpu_string = NULL;
4811     }
4812 
4813   if (aarch64_cpu_string)
4814     {
4815       aarch64_parse_cpu ();
4816       selected_arch = NULL;
4817     }
4818 
4819   if (aarch64_tune_string)
4820     {
4821       aarch64_parse_tune ();
4822     }
4823 
4824   initialize_aarch64_code_model ();
4825 
4826   aarch64_build_bitmask_table ();
4827 
4828   /* This target defaults to strict volatile bitfields.  */
4829   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4830     flag_strict_volatile_bitfields = 1;
4831 
4832   /* If the user did not specify a processor, choose the default
4833      one for them.  This will be the CPU set during configuration using
4834      --with-cpu, otherwise it is "generic".  */
4835   if (!selected_cpu)
4836     {
4837       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4838       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4839     }
4840 
4841   gcc_assert (selected_cpu);
4842 
4843   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
4844   if (!selected_tune)
4845     selected_tune = &all_cores[selected_cpu->core];
4846 
4847   aarch64_tune_flags = selected_tune->flags;
4848   aarch64_tune = selected_tune->core;
4849   aarch64_tune_params = selected_tune->tune;
4850 
4851   if (aarch64_fix_a53_err835769 == 2)
4852     {
4853 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
4854       aarch64_fix_a53_err835769 = 1;
4855 #else
4856       aarch64_fix_a53_err835769 = 0;
4857 #endif
4858     }
4859 
4860   aarch64_override_options_after_change ();
4861 }
4862 
4863 /* Implement targetm.override_options_after_change.  */
4864 
4865 static void
aarch64_override_options_after_change(void)4866 aarch64_override_options_after_change (void)
4867 {
4868   faked_omit_frame_pointer = false;
4869 
4870   /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4871      that aarch64_frame_pointer_required will be called.  We need to remember
4872      whether flag_omit_frame_pointer was turned on normally or just faked.  */
4873 
4874   if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4875     {
4876       flag_omit_frame_pointer = true;
4877       faked_omit_frame_pointer = true;
4878     }
4879 }
4880 
4881 static struct machine_function *
aarch64_init_machine_status(void)4882 aarch64_init_machine_status (void)
4883 {
4884   struct machine_function *machine;
4885   machine = ggc_alloc_cleared_machine_function ();
4886   return machine;
4887 }
4888 
4889 void
aarch64_init_expanders(void)4890 aarch64_init_expanders (void)
4891 {
4892   init_machine_status = aarch64_init_machine_status;
4893 }
4894 
4895 /* A checking mechanism for the implementation of the various code models.  */
4896 static void
initialize_aarch64_code_model(void)4897 initialize_aarch64_code_model (void)
4898 {
4899    if (flag_pic)
4900      {
4901        switch (aarch64_cmodel_var)
4902 	 {
4903 	 case AARCH64_CMODEL_TINY:
4904 	   aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
4905 	   break;
4906 	 case AARCH64_CMODEL_SMALL:
4907 	   aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
4908 	   break;
4909 	 case AARCH64_CMODEL_LARGE:
4910 	   sorry ("code model %qs with -f%s", "large",
4911 		  flag_pic > 1 ? "PIC" : "pic");
4912 	 default:
4913 	   gcc_unreachable ();
4914 	 }
4915      }
4916    else
4917      aarch64_cmodel = aarch64_cmodel_var;
4918 }
4919 
4920 /* Return true if SYMBOL_REF X binds locally.  */
4921 
4922 static bool
aarch64_symbol_binds_local_p(const_rtx x)4923 aarch64_symbol_binds_local_p (const_rtx x)
4924 {
4925   return (SYMBOL_REF_DECL (x)
4926 	  ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
4927 	  : SYMBOL_REF_LOCAL_P (x));
4928 }
4929 
4930 /* Return true if SYMBOL_REF X is thread local */
4931 static bool
aarch64_tls_symbol_p(rtx x)4932 aarch64_tls_symbol_p (rtx x)
4933 {
4934   if (! TARGET_HAVE_TLS)
4935     return false;
4936 
4937   if (GET_CODE (x) != SYMBOL_REF)
4938     return false;
4939 
4940   return SYMBOL_REF_TLS_MODEL (x) != 0;
4941 }
4942 
4943 /* Classify a TLS symbol into one of the TLS kinds.  */
4944 enum aarch64_symbol_type
aarch64_classify_tls_symbol(rtx x)4945 aarch64_classify_tls_symbol (rtx x)
4946 {
4947   enum tls_model tls_kind = tls_symbolic_operand_type (x);
4948 
4949   switch (tls_kind)
4950     {
4951     case TLS_MODEL_GLOBAL_DYNAMIC:
4952     case TLS_MODEL_LOCAL_DYNAMIC:
4953       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
4954 
4955     case TLS_MODEL_INITIAL_EXEC:
4956       return SYMBOL_SMALL_GOTTPREL;
4957 
4958     case TLS_MODEL_LOCAL_EXEC:
4959       return SYMBOL_SMALL_TPREL;
4960 
4961     case TLS_MODEL_EMULATED:
4962     case TLS_MODEL_NONE:
4963       return SYMBOL_FORCE_TO_MEM;
4964 
4965     default:
4966       gcc_unreachable ();
4967     }
4968 }
4969 
4970 /* Return the method that should be used to access SYMBOL_REF or
4971    LABEL_REF X in context CONTEXT.  */
4972 enum aarch64_symbol_type
aarch64_classify_symbol(rtx x,enum aarch64_symbol_context context ATTRIBUTE_UNUSED)4973 aarch64_classify_symbol (rtx x,
4974 			 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
4975 {
4976   if (GET_CODE (x) == LABEL_REF)
4977     {
4978       switch (aarch64_cmodel)
4979 	{
4980 	case AARCH64_CMODEL_LARGE:
4981 	  return SYMBOL_FORCE_TO_MEM;
4982 
4983 	case AARCH64_CMODEL_TINY_PIC:
4984 	case AARCH64_CMODEL_TINY:
4985 	case AARCH64_CMODEL_SMALL_PIC:
4986 	case AARCH64_CMODEL_SMALL:
4987 	  return SYMBOL_SMALL_ABSOLUTE;
4988 
4989 	default:
4990 	  gcc_unreachable ();
4991 	}
4992     }
4993 
4994   gcc_assert (GET_CODE (x) == SYMBOL_REF);
4995 
4996   switch (aarch64_cmodel)
4997     {
4998     case AARCH64_CMODEL_LARGE:
4999       return SYMBOL_FORCE_TO_MEM;
5000 
5001     case AARCH64_CMODEL_TINY:
5002     case AARCH64_CMODEL_SMALL:
5003 
5004       /* This is needed to get DFmode, TImode constants to be loaded off
5005          the constant pool.  Is it necessary to dump TImode values into
5006          the constant pool.  We don't handle TImode constant loads properly
5007          yet and hence need to use the constant pool.  */
5008       if (CONSTANT_POOL_ADDRESS_P (x))
5009 	return SYMBOL_FORCE_TO_MEM;
5010 
5011       if (aarch64_tls_symbol_p (x))
5012 	return aarch64_classify_tls_symbol (x);
5013 
5014       if (SYMBOL_REF_WEAK (x))
5015 	return SYMBOL_FORCE_TO_MEM;
5016 
5017       return SYMBOL_SMALL_ABSOLUTE;
5018 
5019     case AARCH64_CMODEL_TINY_PIC:
5020     case AARCH64_CMODEL_SMALL_PIC:
5021 
5022       if (CONSTANT_POOL_ADDRESS_P (x))
5023 	return SYMBOL_FORCE_TO_MEM;
5024 
5025       if (aarch64_tls_symbol_p (x))
5026 	return aarch64_classify_tls_symbol (x);
5027 
5028       if (!aarch64_symbol_binds_local_p (x))
5029 	return SYMBOL_SMALL_GOT;
5030 
5031       return SYMBOL_SMALL_ABSOLUTE;
5032 
5033     default:
5034       gcc_unreachable ();
5035     }
5036   /* By default push everything into the constant pool.  */
5037   return SYMBOL_FORCE_TO_MEM;
5038 }
5039 
5040 /* Return true if X is a symbolic constant that can be used in context
5041    CONTEXT.  If it is, store the type of the symbol in *SYMBOL_TYPE.  */
5042 
5043 bool
aarch64_symbolic_constant_p(rtx x,enum aarch64_symbol_context context,enum aarch64_symbol_type * symbol_type)5044 aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
5045 			     enum aarch64_symbol_type *symbol_type)
5046 {
5047   rtx offset;
5048   split_const (x, &x, &offset);
5049   if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5050     *symbol_type = aarch64_classify_symbol (x, context);
5051   else
5052     return false;
5053 
5054   /* No checking of offset at this point.  */
5055   return true;
5056 }
5057 
5058 bool
aarch64_constant_address_p(rtx x)5059 aarch64_constant_address_p (rtx x)
5060 {
5061   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5062 }
5063 
5064 bool
aarch64_legitimate_pic_operand_p(rtx x)5065 aarch64_legitimate_pic_operand_p (rtx x)
5066 {
5067   if (GET_CODE (x) == SYMBOL_REF
5068       || (GET_CODE (x) == CONST
5069 	  && GET_CODE (XEXP (x, 0)) == PLUS
5070 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5071      return false;
5072 
5073   return true;
5074 }
5075 
5076 /* Return true if X holds either a quarter-precision or
5077      floating-point +0.0 constant.  */
5078 static bool
aarch64_valid_floating_const(enum machine_mode mode,rtx x)5079 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5080 {
5081   if (!CONST_DOUBLE_P (x))
5082     return false;
5083 
5084   /* TODO: We could handle moving 0.0 to a TFmode register,
5085      but first we would like to refactor the movtf_aarch64
5086      to be more amicable to split moves properly and
5087      correctly gate on TARGET_SIMD.  For now - reject all
5088      constants which are not to SFmode or DFmode registers.  */
5089   if (!(mode == SFmode || mode == DFmode))
5090     return false;
5091 
5092   if (aarch64_float_const_zero_rtx_p (x))
5093     return true;
5094   return aarch64_float_const_representable_p (x);
5095 }
5096 
5097 static bool
aarch64_legitimate_constant_p(enum machine_mode mode,rtx x)5098 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5099 {
5100   /* Do not allow vector struct mode constants.  We could support
5101      0 and -1 easily, but they need support in aarch64-simd.md.  */
5102   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5103     return false;
5104 
5105   /* This could probably go away because
5106      we now decompose CONST_INTs according to expand_mov_immediate.  */
5107   if ((GET_CODE (x) == CONST_VECTOR
5108        && aarch64_simd_valid_immediate (x, mode, false,
5109 					NULL, NULL, NULL, NULL, NULL) != -1)
5110       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5111 	return !targetm.cannot_force_const_mem (mode, x);
5112 
5113   if (GET_CODE (x) == HIGH
5114       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5115     return true;
5116 
5117   return aarch64_constant_address_p (x);
5118 }
5119 
5120 rtx
aarch64_load_tp(rtx target)5121 aarch64_load_tp (rtx target)
5122 {
5123   if (!target
5124       || GET_MODE (target) != Pmode
5125       || !register_operand (target, Pmode))
5126     target = gen_reg_rtx (Pmode);
5127 
5128   /* Can return in any reg.  */
5129   emit_insn (gen_aarch64_load_tp_hard (target));
5130   return target;
5131 }
5132 
5133 /* On AAPCS systems, this is the "struct __va_list".  */
5134 static GTY(()) tree va_list_type;
5135 
5136 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5137    Return the type to use as __builtin_va_list.
5138 
5139    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5140 
5141    struct __va_list
5142    {
5143      void *__stack;
5144      void *__gr_top;
5145      void *__vr_top;
5146      int   __gr_offs;
5147      int   __vr_offs;
5148    };  */
5149 
5150 static tree
aarch64_build_builtin_va_list(void)5151 aarch64_build_builtin_va_list (void)
5152 {
5153   tree va_list_name;
5154   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5155 
5156   /* Create the type.  */
5157   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5158   /* Give it the required name.  */
5159   va_list_name = build_decl (BUILTINS_LOCATION,
5160 			     TYPE_DECL,
5161 			     get_identifier ("__va_list"),
5162 			     va_list_type);
5163   DECL_ARTIFICIAL (va_list_name) = 1;
5164   TYPE_NAME (va_list_type) = va_list_name;
5165   TYPE_STUB_DECL (va_list_type) = va_list_name;
5166 
5167   /* Create the fields.  */
5168   f_stack = build_decl (BUILTINS_LOCATION,
5169 			FIELD_DECL, get_identifier ("__stack"),
5170 			ptr_type_node);
5171   f_grtop = build_decl (BUILTINS_LOCATION,
5172 			FIELD_DECL, get_identifier ("__gr_top"),
5173 			ptr_type_node);
5174   f_vrtop = build_decl (BUILTINS_LOCATION,
5175 			FIELD_DECL, get_identifier ("__vr_top"),
5176 			ptr_type_node);
5177   f_groff = build_decl (BUILTINS_LOCATION,
5178 			FIELD_DECL, get_identifier ("__gr_offs"),
5179 			integer_type_node);
5180   f_vroff = build_decl (BUILTINS_LOCATION,
5181 			FIELD_DECL, get_identifier ("__vr_offs"),
5182 			integer_type_node);
5183 
5184   DECL_ARTIFICIAL (f_stack) = 1;
5185   DECL_ARTIFICIAL (f_grtop) = 1;
5186   DECL_ARTIFICIAL (f_vrtop) = 1;
5187   DECL_ARTIFICIAL (f_groff) = 1;
5188   DECL_ARTIFICIAL (f_vroff) = 1;
5189 
5190   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5191   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5192   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5193   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5194   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5195 
5196   TYPE_FIELDS (va_list_type) = f_stack;
5197   DECL_CHAIN (f_stack) = f_grtop;
5198   DECL_CHAIN (f_grtop) = f_vrtop;
5199   DECL_CHAIN (f_vrtop) = f_groff;
5200   DECL_CHAIN (f_groff) = f_vroff;
5201 
5202   /* Compute its layout.  */
5203   layout_type (va_list_type);
5204 
5205   return va_list_type;
5206 }
5207 
5208 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5209 static void
aarch64_expand_builtin_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)5210 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5211 {
5212   const CUMULATIVE_ARGS *cum;
5213   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5214   tree stack, grtop, vrtop, groff, vroff;
5215   tree t;
5216   int gr_save_area_size;
5217   int vr_save_area_size;
5218   int vr_offset;
5219 
5220   cum = &crtl->args.info;
5221   gr_save_area_size
5222     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5223   vr_save_area_size
5224     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5225 
5226   if (TARGET_GENERAL_REGS_ONLY)
5227     {
5228       if (cum->aapcs_nvrn > 0)
5229 	sorry ("%qs and floating point or vector arguments",
5230 	       "-mgeneral-regs-only");
5231       vr_save_area_size = 0;
5232     }
5233 
5234   f_stack = TYPE_FIELDS (va_list_type_node);
5235   f_grtop = DECL_CHAIN (f_stack);
5236   f_vrtop = DECL_CHAIN (f_grtop);
5237   f_groff = DECL_CHAIN (f_vrtop);
5238   f_vroff = DECL_CHAIN (f_groff);
5239 
5240   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5241 		  NULL_TREE);
5242   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5243 		  NULL_TREE);
5244   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5245 		  NULL_TREE);
5246   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5247 		  NULL_TREE);
5248   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5249 		  NULL_TREE);
5250 
5251   /* Emit code to initialize STACK, which points to the next varargs stack
5252      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5253      by named arguments.  STACK is 8-byte aligned.  */
5254   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5255   if (cum->aapcs_stack_size > 0)
5256     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5257   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5258   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5259 
5260   /* Emit code to initialize GRTOP, the top of the GR save area.
5261      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5262   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5263   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5264   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5265 
5266   /* Emit code to initialize VRTOP, the top of the VR save area.
5267      This address is gr_save_area_bytes below GRTOP, rounded
5268      down to the next 16-byte boundary.  */
5269   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5270   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5271 			     STACK_BOUNDARY / BITS_PER_UNIT);
5272 
5273   if (vr_offset)
5274     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5275   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5276   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5277 
5278   /* Emit code to initialize GROFF, the offset from GRTOP of the
5279      next GPR argument.  */
5280   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5281 	      build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5282   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5283 
5284   /* Likewise emit code to initialize VROFF, the offset from FTOP
5285      of the next VR argument.  */
5286   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5287 	      build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5288   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5289 }
5290 
5291 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5292 
5293 static tree
aarch64_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)5294 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5295 			      gimple_seq *post_p ATTRIBUTE_UNUSED)
5296 {
5297   tree addr;
5298   bool indirect_p;
5299   bool is_ha;		/* is HFA or HVA.  */
5300   bool dw_align;	/* double-word align.  */
5301   enum machine_mode ag_mode = VOIDmode;
5302   int nregs;
5303   enum machine_mode mode;
5304 
5305   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5306   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5307   HOST_WIDE_INT size, rsize, adjust, align;
5308   tree t, u, cond1, cond2;
5309 
5310   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5311   if (indirect_p)
5312     type = build_pointer_type (type);
5313 
5314   mode = TYPE_MODE (type);
5315 
5316   f_stack = TYPE_FIELDS (va_list_type_node);
5317   f_grtop = DECL_CHAIN (f_stack);
5318   f_vrtop = DECL_CHAIN (f_grtop);
5319   f_groff = DECL_CHAIN (f_vrtop);
5320   f_vroff = DECL_CHAIN (f_groff);
5321 
5322   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5323 		  f_stack, NULL_TREE);
5324   size = int_size_in_bytes (type);
5325   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5326 
5327   dw_align = false;
5328   adjust = 0;
5329   if (aarch64_vfp_is_call_or_return_candidate (mode,
5330 					       type,
5331 					       &ag_mode,
5332 					       &nregs,
5333 					       &is_ha))
5334     {
5335       /* TYPE passed in fp/simd registers.  */
5336       if (TARGET_GENERAL_REGS_ONLY)
5337 	sorry ("%qs and floating point or vector arguments",
5338 	       "-mgeneral-regs-only");
5339 
5340       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5341 		      unshare_expr (valist), f_vrtop, NULL_TREE);
5342       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5343 		      unshare_expr (valist), f_vroff, NULL_TREE);
5344 
5345       rsize = nregs * UNITS_PER_VREG;
5346 
5347       if (is_ha)
5348 	{
5349 	  if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5350 	    adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5351 	}
5352       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5353 	       && size < UNITS_PER_VREG)
5354 	{
5355 	  adjust = UNITS_PER_VREG - size;
5356 	}
5357     }
5358   else
5359     {
5360       /* TYPE passed in general registers.  */
5361       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5362 		      unshare_expr (valist), f_grtop, NULL_TREE);
5363       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5364 		      unshare_expr (valist), f_groff, NULL_TREE);
5365       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5366       nregs = rsize / UNITS_PER_WORD;
5367 
5368       if (align > 8)
5369 	dw_align = true;
5370 
5371       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5372 	  && size < UNITS_PER_WORD)
5373 	{
5374 	  adjust = UNITS_PER_WORD  - size;
5375 	}
5376     }
5377 
5378   /* Get a local temporary for the field value.  */
5379   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5380 
5381   /* Emit code to branch if off >= 0.  */
5382   t = build2 (GE_EXPR, boolean_type_node, off,
5383 	      build_int_cst (TREE_TYPE (off), 0));
5384   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5385 
5386   if (dw_align)
5387     {
5388       /* Emit: offs = (offs + 15) & -16.  */
5389       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5390 		  build_int_cst (TREE_TYPE (off), 15));
5391       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5392 		  build_int_cst (TREE_TYPE (off), -16));
5393       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5394     }
5395   else
5396     roundup = NULL;
5397 
5398   /* Update ap.__[g|v]r_offs  */
5399   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5400 	      build_int_cst (TREE_TYPE (off), rsize));
5401   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5402 
5403   /* String up.  */
5404   if (roundup)
5405     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5406 
5407   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5408   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5409 	      build_int_cst (TREE_TYPE (f_off), 0));
5410   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5411 
5412   /* String up: make sure the assignment happens before the use.  */
5413   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5414   COND_EXPR_ELSE (cond1) = t;
5415 
5416   /* Prepare the trees handling the argument that is passed on the stack;
5417      the top level node will store in ON_STACK.  */
5418   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5419   if (align > 8)
5420     {
5421       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5422       t = fold_convert (intDI_type_node, arg);
5423       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5424 		  build_int_cst (TREE_TYPE (t), 15));
5425       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5426 		  build_int_cst (TREE_TYPE (t), -16));
5427       t = fold_convert (TREE_TYPE (arg), t);
5428       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5429     }
5430   else
5431     roundup = NULL;
5432   /* Advance ap.__stack  */
5433   t = fold_convert (intDI_type_node, arg);
5434   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5435 	      build_int_cst (TREE_TYPE (t), size + 7));
5436   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5437 	      build_int_cst (TREE_TYPE (t), -8));
5438   t = fold_convert (TREE_TYPE (arg), t);
5439   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5440   /* String up roundup and advance.  */
5441   if (roundup)
5442     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5443   /* String up with arg */
5444   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5445   /* Big-endianness related address adjustment.  */
5446   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5447       && size < UNITS_PER_WORD)
5448   {
5449     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5450 		size_int (UNITS_PER_WORD - size));
5451     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5452   }
5453 
5454   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5455   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5456 
5457   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5458   t = off;
5459   if (adjust)
5460     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5461 		build_int_cst (TREE_TYPE (off), adjust));
5462 
5463   t = fold_convert (sizetype, t);
5464   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5465 
5466   if (is_ha)
5467     {
5468       /* type ha; // treat as "struct {ftype field[n];}"
5469          ... [computing offs]
5470          for (i = 0; i <nregs; ++i, offs += 16)
5471 	   ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5472 	 return ha;  */
5473       int i;
5474       tree tmp_ha, field_t, field_ptr_t;
5475 
5476       /* Declare a local variable.  */
5477       tmp_ha = create_tmp_var_raw (type, "ha");
5478       gimple_add_tmp_var (tmp_ha);
5479 
5480       /* Establish the base type.  */
5481       switch (ag_mode)
5482 	{
5483 	case SFmode:
5484 	  field_t = float_type_node;
5485 	  field_ptr_t = float_ptr_type_node;
5486 	  break;
5487 	case DFmode:
5488 	  field_t = double_type_node;
5489 	  field_ptr_t = double_ptr_type_node;
5490 	  break;
5491 	case TFmode:
5492 	  field_t = long_double_type_node;
5493 	  field_ptr_t = long_double_ptr_type_node;
5494 	  break;
5495 /* The half precision and quad precision are not fully supported yet.  Enable
5496    the following code after the support is complete.  Need to find the correct
5497    type node for __fp16 *.  */
5498 #if 0
5499 	case HFmode:
5500 	  field_t = float_type_node;
5501 	  field_ptr_t = float_ptr_type_node;
5502 	  break;
5503 #endif
5504 	case V2SImode:
5505 	case V4SImode:
5506 	    {
5507 	      tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5508 	      field_t = build_vector_type_for_mode (innertype, ag_mode);
5509 	      field_ptr_t = build_pointer_type (field_t);
5510 	    }
5511 	  break;
5512 	default:
5513 	  gcc_assert (0);
5514 	}
5515 
5516       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5517       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5518       addr = t;
5519       t = fold_convert (field_ptr_t, addr);
5520       t = build2 (MODIFY_EXPR, field_t,
5521 		  build1 (INDIRECT_REF, field_t, tmp_ha),
5522 		  build1 (INDIRECT_REF, field_t, t));
5523 
5524       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5525       for (i = 1; i < nregs; ++i)
5526 	{
5527 	  addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5528 	  u = fold_convert (field_ptr_t, addr);
5529 	  u = build2 (MODIFY_EXPR, field_t,
5530 		      build2 (MEM_REF, field_t, tmp_ha,
5531 			      build_int_cst (field_ptr_t,
5532 					     (i *
5533 					      int_size_in_bytes (field_t)))),
5534 		      build1 (INDIRECT_REF, field_t, u));
5535 	  t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5536 	}
5537 
5538       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5539       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5540     }
5541 
5542   COND_EXPR_ELSE (cond2) = t;
5543   addr = fold_convert (build_pointer_type (type), cond1);
5544   addr = build_va_arg_indirect_ref (addr);
5545 
5546   if (indirect_p)
5547     addr = build_va_arg_indirect_ref (addr);
5548 
5549   return addr;
5550 }
5551 
5552 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5553 
5554 static void
aarch64_setup_incoming_varargs(cumulative_args_t cum_v,enum machine_mode mode,tree type,int * pretend_size ATTRIBUTE_UNUSED,int no_rtl)5555 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5556 				tree type, int *pretend_size ATTRIBUTE_UNUSED,
5557 				int no_rtl)
5558 {
5559   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5560   CUMULATIVE_ARGS local_cum;
5561   int gr_saved, vr_saved;
5562 
5563   /* The caller has advanced CUM up to, but not beyond, the last named
5564      argument.  Advance a local copy of CUM past the last "real" named
5565      argument, to find out how many registers are left over.  */
5566   local_cum = *cum;
5567   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5568 
5569   /* Found out how many registers we need to save.  */
5570   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5571   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5572 
5573   if (TARGET_GENERAL_REGS_ONLY)
5574     {
5575       if (local_cum.aapcs_nvrn > 0)
5576 	sorry ("%qs and floating point or vector arguments",
5577 	       "-mgeneral-regs-only");
5578       vr_saved = 0;
5579     }
5580 
5581   if (!no_rtl)
5582     {
5583       if (gr_saved > 0)
5584 	{
5585 	  rtx ptr, mem;
5586 
5587 	  /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5588 	  ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5589 			       - gr_saved * UNITS_PER_WORD);
5590 	  mem = gen_frame_mem (BLKmode, ptr);
5591 	  set_mem_alias_set (mem, get_varargs_alias_set ());
5592 
5593 	  move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5594 			       mem, gr_saved);
5595 	}
5596       if (vr_saved > 0)
5597 	{
5598 	  /* We can't use move_block_from_reg, because it will use
5599 	     the wrong mode, storing D regs only.  */
5600 	  enum machine_mode mode = TImode;
5601 	  int off, i;
5602 
5603 	  /* Set OFF to the offset from virtual_incoming_args_rtx of
5604 	     the first vector register.  The VR save area lies below
5605 	     the GR one, and is aligned to 16 bytes.  */
5606 	  off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5607 				   STACK_BOUNDARY / BITS_PER_UNIT);
5608 	  off -= vr_saved * UNITS_PER_VREG;
5609 
5610 	  for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5611 	    {
5612 	      rtx ptr, mem;
5613 
5614 	      ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5615 	      mem = gen_frame_mem (mode, ptr);
5616 	      set_mem_alias_set (mem, get_varargs_alias_set ());
5617 	      aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5618 	      off += UNITS_PER_VREG;
5619 	    }
5620 	}
5621     }
5622 
5623   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5624      any complication of having crtl->args.pretend_args_size changed.  */
5625   cfun->machine->saved_varargs_size
5626     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5627 		      STACK_BOUNDARY / BITS_PER_UNIT)
5628        + vr_saved * UNITS_PER_VREG);
5629 }
5630 
5631 static void
aarch64_conditional_register_usage(void)5632 aarch64_conditional_register_usage (void)
5633 {
5634   int i;
5635   if (!TARGET_FLOAT)
5636     {
5637       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5638 	{
5639 	  fixed_regs[i] = 1;
5640 	  call_used_regs[i] = 1;
5641 	}
5642     }
5643 }
5644 
5645 /* Walk down the type tree of TYPE counting consecutive base elements.
5646    If *MODEP is VOIDmode, then set it to the first valid floating point
5647    type.  If a non-floating point type is found, or if a floating point
5648    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5649    otherwise return the count in the sub-tree.  */
5650 static int
aapcs_vfp_sub_candidate(const_tree type,enum machine_mode * modep)5651 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5652 {
5653   enum machine_mode mode;
5654   HOST_WIDE_INT size;
5655 
5656   switch (TREE_CODE (type))
5657     {
5658     case REAL_TYPE:
5659       mode = TYPE_MODE (type);
5660       if (mode != DFmode && mode != SFmode && mode != TFmode)
5661 	return -1;
5662 
5663       if (*modep == VOIDmode)
5664 	*modep = mode;
5665 
5666       if (*modep == mode)
5667 	return 1;
5668 
5669       break;
5670 
5671     case COMPLEX_TYPE:
5672       mode = TYPE_MODE (TREE_TYPE (type));
5673       if (mode != DFmode && mode != SFmode && mode != TFmode)
5674 	return -1;
5675 
5676       if (*modep == VOIDmode)
5677 	*modep = mode;
5678 
5679       if (*modep == mode)
5680 	return 2;
5681 
5682       break;
5683 
5684     case VECTOR_TYPE:
5685       /* Use V2SImode and V4SImode as representatives of all 64-bit
5686 	 and 128-bit vector types.  */
5687       size = int_size_in_bytes (type);
5688       switch (size)
5689 	{
5690 	case 8:
5691 	  mode = V2SImode;
5692 	  break;
5693 	case 16:
5694 	  mode = V4SImode;
5695 	  break;
5696 	default:
5697 	  return -1;
5698 	}
5699 
5700       if (*modep == VOIDmode)
5701 	*modep = mode;
5702 
5703       /* Vector modes are considered to be opaque: two vectors are
5704 	 equivalent for the purposes of being homogeneous aggregates
5705 	 if they are the same size.  */
5706       if (*modep == mode)
5707 	return 1;
5708 
5709       break;
5710 
5711     case ARRAY_TYPE:
5712       {
5713 	int count;
5714 	tree index = TYPE_DOMAIN (type);
5715 
5716 	/* Can't handle incomplete types.  */
5717 	if (!COMPLETE_TYPE_P (type))
5718 	  return -1;
5719 
5720 	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5721 	if (count == -1
5722 	    || !index
5723 	    || !TYPE_MAX_VALUE (index)
5724 	    || !host_integerp (TYPE_MAX_VALUE (index), 1)
5725 	    || !TYPE_MIN_VALUE (index)
5726 	    || !host_integerp (TYPE_MIN_VALUE (index), 1)
5727 	    || count < 0)
5728 	  return -1;
5729 
5730 	count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5731 		      - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5732 
5733 	/* There must be no padding.  */
5734 	if (!host_integerp (TYPE_SIZE (type), 1)
5735 	    || (tree_low_cst (TYPE_SIZE (type), 1)
5736 		!= count * GET_MODE_BITSIZE (*modep)))
5737 	  return -1;
5738 
5739 	return count;
5740       }
5741 
5742     case RECORD_TYPE:
5743       {
5744 	int count = 0;
5745 	int sub_count;
5746 	tree field;
5747 
5748 	/* Can't handle incomplete types.  */
5749 	if (!COMPLETE_TYPE_P (type))
5750 	  return -1;
5751 
5752 	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5753 	  {
5754 	    if (TREE_CODE (field) != FIELD_DECL)
5755 	      continue;
5756 
5757 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5758 	    if (sub_count < 0)
5759 	      return -1;
5760 	    count += sub_count;
5761 	  }
5762 
5763 	/* There must be no padding.  */
5764 	if (!host_integerp (TYPE_SIZE (type), 1)
5765 	    || (tree_low_cst (TYPE_SIZE (type), 1)
5766 		!= count * GET_MODE_BITSIZE (*modep)))
5767 	  return -1;
5768 
5769 	return count;
5770       }
5771 
5772     case UNION_TYPE:
5773     case QUAL_UNION_TYPE:
5774       {
5775 	/* These aren't very interesting except in a degenerate case.  */
5776 	int count = 0;
5777 	int sub_count;
5778 	tree field;
5779 
5780 	/* Can't handle incomplete types.  */
5781 	if (!COMPLETE_TYPE_P (type))
5782 	  return -1;
5783 
5784 	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5785 	  {
5786 	    if (TREE_CODE (field) != FIELD_DECL)
5787 	      continue;
5788 
5789 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5790 	    if (sub_count < 0)
5791 	      return -1;
5792 	    count = count > sub_count ? count : sub_count;
5793 	  }
5794 
5795 	/* There must be no padding.  */
5796 	if (!host_integerp (TYPE_SIZE (type), 1)
5797 	    || (tree_low_cst (TYPE_SIZE (type), 1)
5798 		!= count * GET_MODE_BITSIZE (*modep)))
5799 	  return -1;
5800 
5801 	return count;
5802       }
5803 
5804     default:
5805       break;
5806     }
5807 
5808   return -1;
5809 }
5810 
5811 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
5812    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
5813    array types.  The C99 floating-point complex types are also considered
5814    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
5815    types, which are GCC extensions and out of the scope of AAPCS64, are
5816    treated as composite types here as well.
5817 
5818    Note that MODE itself is not sufficient in determining whether a type
5819    is such a composite type or not.  This is because
5820    stor-layout.c:compute_record_mode may have already changed the MODE
5821    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
5822    structure with only one field may have its MODE set to the mode of the
5823    field.  Also an integer mode whose size matches the size of the
5824    RECORD_TYPE type may be used to substitute the original mode
5825    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
5826    solely relied on.  */
5827 
5828 static bool
aarch64_composite_type_p(const_tree type,enum machine_mode mode)5829 aarch64_composite_type_p (const_tree type,
5830 			  enum machine_mode mode)
5831 {
5832   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5833     return true;
5834 
5835   if (mode == BLKmode
5836       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5837       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5838     return true;
5839 
5840   return false;
5841 }
5842 
5843 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5844    type as described in AAPCS64 \S 4.1.2.
5845 
5846    See the comment above aarch64_composite_type_p for the notes on MODE.  */
5847 
5848 static bool
aarch64_short_vector_p(const_tree type,enum machine_mode mode)5849 aarch64_short_vector_p (const_tree type,
5850 			enum machine_mode mode)
5851 {
5852   HOST_WIDE_INT size = -1;
5853 
5854   if (type && TREE_CODE (type) == VECTOR_TYPE)
5855     size = int_size_in_bytes (type);
5856   else if (!aarch64_composite_type_p (type, mode)
5857 	   && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5858 	       || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5859     size = GET_MODE_SIZE (mode);
5860 
5861   return (size == 8 || size == 16) ? true : false;
5862 }
5863 
5864 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
5865    shall be passed or returned in simd/fp register(s) (providing these
5866    parameter passing registers are available).
5867 
5868    Upon successful return, *COUNT returns the number of needed registers,
5869    *BASE_MODE returns the mode of the individual register and when IS_HAF
5870    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5871    floating-point aggregate or a homogeneous short-vector aggregate.  */
5872 
5873 static bool
aarch64_vfp_is_call_or_return_candidate(enum machine_mode mode,const_tree type,enum machine_mode * base_mode,int * count,bool * is_ha)5874 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5875 					 const_tree type,
5876 					 enum machine_mode *base_mode,
5877 					 int *count,
5878 					 bool *is_ha)
5879 {
5880   enum machine_mode new_mode = VOIDmode;
5881   bool composite_p = aarch64_composite_type_p (type, mode);
5882 
5883   if (is_ha != NULL) *is_ha = false;
5884 
5885   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5886       || aarch64_short_vector_p (type, mode))
5887     {
5888       *count = 1;
5889       new_mode = mode;
5890     }
5891   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5892     {
5893       if (is_ha != NULL) *is_ha = true;
5894       *count = 2;
5895       new_mode = GET_MODE_INNER (mode);
5896     }
5897   else if (type && composite_p)
5898     {
5899       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5900 
5901       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5902 	{
5903 	  if (is_ha != NULL) *is_ha = true;
5904 	  *count = ag_count;
5905 	}
5906       else
5907 	return false;
5908     }
5909   else
5910     return false;
5911 
5912   *base_mode = new_mode;
5913   return true;
5914 }
5915 
5916 /* Implement TARGET_STRUCT_VALUE_RTX.  */
5917 
5918 static rtx
aarch64_struct_value_rtx(tree fndecl ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)5919 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
5920 			  int incoming ATTRIBUTE_UNUSED)
5921 {
5922   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
5923 }
5924 
5925 /* Implements target hook vector_mode_supported_p.  */
5926 static bool
aarch64_vector_mode_supported_p(enum machine_mode mode)5927 aarch64_vector_mode_supported_p (enum machine_mode mode)
5928 {
5929   if (TARGET_SIMD
5930       && (mode == V4SImode  || mode == V8HImode
5931 	  || mode == V16QImode || mode == V2DImode
5932 	  || mode == V2SImode  || mode == V4HImode
5933 	  || mode == V8QImode || mode == V2SFmode
5934 	  || mode == V4SFmode || mode == V2DFmode))
5935     return true;
5936 
5937   return false;
5938 }
5939 
5940 /* Return quad mode as the preferred SIMD mode.  */
5941 static enum machine_mode
aarch64_preferred_simd_mode(enum machine_mode mode)5942 aarch64_preferred_simd_mode (enum machine_mode mode)
5943 {
5944   if (TARGET_SIMD)
5945     switch (mode)
5946       {
5947       case DFmode:
5948         return V2DFmode;
5949       case SFmode:
5950         return V4SFmode;
5951       case SImode:
5952         return V4SImode;
5953       case HImode:
5954         return V8HImode;
5955       case QImode:
5956         return V16QImode;
5957       case DImode:
5958           return V2DImode;
5959         break;
5960 
5961       default:;
5962       }
5963   return word_mode;
5964 }
5965 
5966 /* Return the bitmask of possible vector sizes for the vectorizer
5967    to iterate over.  */
5968 static unsigned int
aarch64_autovectorize_vector_sizes(void)5969 aarch64_autovectorize_vector_sizes (void)
5970 {
5971   return (16 | 8);
5972 }
5973 
5974 /* A table to help perform AArch64-specific name mangling for AdvSIMD
5975    vector types in order to conform to the AAPCS64 (see "Procedure
5976    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
5977    qualify for emission with the mangled names defined in that document,
5978    a vector type must not only be of the correct mode but also be
5979    composed of AdvSIMD vector element types (e.g.
5980    _builtin_aarch64_simd_qi); these types are registered by
5981    aarch64_init_simd_builtins ().  In other words, vector types defined
5982    in other ways e.g. via vector_size attribute will get default
5983    mangled names.  */
5984 typedef struct
5985 {
5986   enum machine_mode mode;
5987   const char *element_type_name;
5988   const char *mangled_name;
5989 } aarch64_simd_mangle_map_entry;
5990 
5991 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
5992   /* 64-bit containerized types.  */
5993   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
5994   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
5995   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
5996   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
5997   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
5998   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
5999   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
6000   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
6001   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6002   /* 128-bit containerized types.  */
6003   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
6004   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
6005   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
6006   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
6007   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
6008   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
6009   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
6010   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
6011   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
6012   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6013   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6014   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6015   { VOIDmode, NULL, NULL }
6016 };
6017 
6018 /* Implement TARGET_MANGLE_TYPE.  */
6019 
6020 static const char *
aarch64_mangle_type(const_tree type)6021 aarch64_mangle_type (const_tree type)
6022 {
6023   /* The AArch64 ABI documents say that "__va_list" has to be
6024      managled as if it is in the "std" namespace.  */
6025   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6026     return "St9__va_list";
6027 
6028   /* Check the mode of the vector type, and the name of the vector
6029      element type, against the table.  */
6030   if (TREE_CODE (type) == VECTOR_TYPE)
6031     {
6032       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6033 
6034       while (pos->mode != VOIDmode)
6035 	{
6036 	  tree elt_type = TREE_TYPE (type);
6037 
6038 	  if (pos->mode == TYPE_MODE (type)
6039 	      && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6040 	      && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6041 			  pos->element_type_name))
6042 	    return pos->mangled_name;
6043 
6044 	  pos++;
6045 	}
6046     }
6047 
6048   /* Use the default mangling.  */
6049   return NULL;
6050 }
6051 
6052 
6053 /* Return true iff X is a MEM rtx.  */
6054 
6055 static int
is_mem_p(rtx * x,void * data ATTRIBUTE_UNUSED)6056 is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED)
6057 {
6058   return MEM_P (*x);
6059 }
6060 
6061 
6062 /*  Return true if mem_insn contains a MEM RTX somewhere in it.  */
6063 
6064 static bool
has_memory_op(rtx mem_insn)6065 has_memory_op (rtx mem_insn)
6066 {
6067    rtx pattern = PATTERN (mem_insn);
6068    return for_each_rtx (&pattern, is_mem_p, NULL);
6069 }
6070 
6071 
6072 /* Find the first rtx before insn that will generate an assembly
6073    instruction.  */
6074 
6075 static rtx
aarch64_prev_real_insn(rtx insn)6076 aarch64_prev_real_insn (rtx insn)
6077 {
6078   if (!insn)
6079     return NULL;
6080 
6081   do
6082     {
6083       insn = prev_real_insn (insn);
6084     }
6085   while (insn && recog_memoized (insn) < 0);
6086 
6087   return insn;
6088 }
6089 
6090 /*  Return true iff t1 is the v8type of a multiply-accumulate instruction.  */
6091 
6092 static bool
is_madd_op(enum attr_v8type t1)6093 is_madd_op (enum attr_v8type t1)
6094 {
6095   return t1 == V8TYPE_MADD
6096          || t1 == V8TYPE_MADDL;
6097 }
6098 
6099 
6100 /* Check if there is a register dependency between a load and the insn
6101    for which we hold recog_data.  */
6102 
6103 static bool
dep_between_memop_and_curr(rtx memop)6104 dep_between_memop_and_curr (rtx memop)
6105 {
6106   rtx load_reg;
6107   int opno;
6108 
6109   gcc_assert (GET_CODE (memop) == SET);
6110 
6111   if (!REG_P (SET_DEST (memop)))
6112     return false;
6113 
6114   load_reg = SET_DEST (memop);
6115   for (opno = 1; opno < recog_data.n_operands; opno++)
6116     {
6117       rtx operand = recog_data.operand[opno];
6118       if (REG_P (operand)
6119           && reg_overlap_mentioned_p (load_reg, operand))
6120         return true;
6121 
6122     }
6123   return false;
6124 }
6125 
6126 
6127 
6128 /* When working around the Cortex-A53 erratum 835769,
6129    given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
6130    instruction and has a preceding memory instruction such that a NOP
6131    should be inserted between them.  */
6132 
6133 bool
aarch64_madd_needs_nop(rtx insn)6134 aarch64_madd_needs_nop (rtx insn)
6135 {
6136   enum attr_v8type attr_type;
6137   rtx prev;
6138   rtx body;
6139 
6140   if (!aarch64_fix_a53_err835769)
6141     return false;
6142 
6143   if (recog_memoized (insn) < 0)
6144     return false;
6145 
6146   attr_type = get_attr_v8type (insn);
6147   if (!is_madd_op (attr_type))
6148     return false;
6149 
6150   prev = aarch64_prev_real_insn (insn);
6151   /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
6152      Restore recog state to INSN to avoid state corruption.  */
6153   extract_constrain_insn_cached (insn);
6154 
6155   if (!prev || !has_memory_op (prev))
6156     return false;
6157 
6158   body = single_set (prev);
6159 
6160   /* If the previous insn is a memory op and there is no dependency between
6161      it and the madd, emit a nop between them.  If we know it's a memop but
6162      body is NULL, return true to be safe.  */
6163   if (GET_MODE (recog_data.operand[0]) == DImode
6164       && (!body || !dep_between_memop_and_curr (body)))
6165     return true;
6166 
6167   return false;
6168 
6169 }
6170 
6171 /* Implement FINAL_PRESCAN_INSN.  */
6172 
6173 void
aarch64_final_prescan_insn(rtx insn)6174 aarch64_final_prescan_insn (rtx insn)
6175 {
6176   if (aarch64_madd_needs_nop (insn))
6177     fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
6178 }
6179 
6180 
6181 /* Return the equivalent letter for size.  */
6182 static unsigned char
sizetochar(int size)6183 sizetochar (int size)
6184 {
6185   switch (size)
6186     {
6187     case 64: return 'd';
6188     case 32: return 's';
6189     case 16: return 'h';
6190     case 8 : return 'b';
6191     default: gcc_unreachable ();
6192     }
6193 }
6194 
6195 /* Return true iff x is a uniform vector of floating-point
6196    constants, and the constant can be represented in
6197    quarter-precision form.  Note, as aarch64_float_const_representable
6198    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6199 static bool
aarch64_vect_float_const_representable_p(rtx x)6200 aarch64_vect_float_const_representable_p (rtx x)
6201 {
6202   int i = 0;
6203   REAL_VALUE_TYPE r0, ri;
6204   rtx x0, xi;
6205 
6206   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6207     return false;
6208 
6209   x0 = CONST_VECTOR_ELT (x, 0);
6210   if (!CONST_DOUBLE_P (x0))
6211     return false;
6212 
6213   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6214 
6215   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6216     {
6217       xi = CONST_VECTOR_ELT (x, i);
6218       if (!CONST_DOUBLE_P (xi))
6219 	return false;
6220 
6221       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6222       if (!REAL_VALUES_EQUAL (r0, ri))
6223 	return false;
6224     }
6225 
6226   return aarch64_float_const_representable_p (x0);
6227 }
6228 
6229 /* TODO: This function returns values similar to those
6230    returned by neon_valid_immediate in gcc/config/arm/arm.c
6231    but the API here is different enough that these magic numbers
6232    are not used.  It should be sufficient to return true or false.  */
6233 static int
aarch64_simd_valid_immediate(rtx op,enum machine_mode mode,int inverse,rtx * modconst,int * elementwidth,unsigned char * elementchar,int * mvn,int * shift)6234 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6235 			      rtx *modconst, int *elementwidth,
6236 			      unsigned char *elementchar,
6237 			      int *mvn, int *shift)
6238 {
6239 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)	\
6240   matches = 1;						\
6241   for (i = 0; i < idx; i += (STRIDE))			\
6242     if (!(TEST))					\
6243       matches = 0;					\
6244   if (matches)						\
6245     {							\
6246       immtype = (CLASS);				\
6247       elsize = (ELSIZE);				\
6248       elchar = sizetochar (elsize);			\
6249       eshift = (SHIFT);					\
6250       emvn = (NEG);					\
6251       break;						\
6252     }
6253 
6254   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6255   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6256   unsigned char bytes[16];
6257   unsigned char elchar = 0;
6258   int immtype = -1, matches;
6259   unsigned int invmask = inverse ? 0xff : 0;
6260   int eshift, emvn;
6261 
6262   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6263     {
6264       bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
6265       int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
6266 
6267       if (!(simd_imm_zero
6268 	    || aarch64_vect_float_const_representable_p (op)))
6269 	return -1;
6270 
6271 	if (modconst)
6272 	  *modconst = CONST_VECTOR_ELT (op, 0);
6273 
6274 	if (elementwidth)
6275 	  *elementwidth = elem_width;
6276 
6277 	if (elementchar)
6278 	  *elementchar = sizetochar (elem_width);
6279 
6280 	if (shift)
6281 	  *shift = 0;
6282 
6283 	if (simd_imm_zero)
6284 	  return 19;
6285 	else
6286 	  return 18;
6287     }
6288 
6289   /* Splat vector constant out into a byte vector.  */
6290   for (i = 0; i < n_elts; i++)
6291     {
6292       rtx el = CONST_VECTOR_ELT (op, i);
6293       unsigned HOST_WIDE_INT elpart;
6294       unsigned int part, parts;
6295 
6296       if (GET_CODE (el) == CONST_INT)
6297         {
6298           elpart = INTVAL (el);
6299           parts = 1;
6300         }
6301       else if (GET_CODE (el) == CONST_DOUBLE)
6302         {
6303           elpart = CONST_DOUBLE_LOW (el);
6304           parts = 2;
6305         }
6306       else
6307         gcc_unreachable ();
6308 
6309       for (part = 0; part < parts; part++)
6310         {
6311           unsigned int byte;
6312           for (byte = 0; byte < innersize; byte++)
6313             {
6314               bytes[idx++] = (elpart & 0xff) ^ invmask;
6315               elpart >>= BITS_PER_UNIT;
6316             }
6317           if (GET_CODE (el) == CONST_DOUBLE)
6318             elpart = CONST_DOUBLE_HIGH (el);
6319         }
6320     }
6321 
6322   /* Sanity check.  */
6323   gcc_assert (idx == GET_MODE_SIZE (mode));
6324 
6325   do
6326     {
6327       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6328 	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6329 
6330       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6331 	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6332 
6333       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6334 	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6335 
6336       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6337 	     && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6338 
6339       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6340 
6341       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6342 
6343       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6344 	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6345 
6346       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6347 	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6348 
6349       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6350 	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6351 
6352       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6353 	     && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6354 
6355       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6356 
6357       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6358 
6359       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6360 	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6361 
6362       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6363 	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6364 
6365       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6366 	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6367 
6368       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6369 	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6370 
6371       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6372 
6373       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6374 	     && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6375     }
6376   while (0);
6377 
6378   /* TODO: Currently the assembler cannot handle types 12 to 15.
6379      And there is no way to specify cmode through the compiler.
6380      Disable them till there is support in the assembler.  */
6381   if (immtype == -1
6382       || (immtype >= 12 && immtype <= 15)
6383       || immtype == 18)
6384     return -1;
6385 
6386 
6387   if (elementwidth)
6388     *elementwidth = elsize;
6389 
6390   if (elementchar)
6391     *elementchar = elchar;
6392 
6393   if (mvn)
6394     *mvn = emvn;
6395 
6396   if (shift)
6397     *shift = eshift;
6398 
6399   if (modconst)
6400     {
6401       unsigned HOST_WIDE_INT imm = 0;
6402 
6403       /* Un-invert bytes of recognized vector, if necessary.  */
6404       if (invmask != 0)
6405         for (i = 0; i < idx; i++)
6406           bytes[i] ^= invmask;
6407 
6408       if (immtype == 17)
6409         {
6410           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6411           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6412 
6413           for (i = 0; i < 8; i++)
6414             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6415 	      << (i * BITS_PER_UNIT);
6416 
6417           *modconst = GEN_INT (imm);
6418         }
6419       else
6420         {
6421           unsigned HOST_WIDE_INT imm = 0;
6422 
6423           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6424             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6425 
6426 	  /* Construct 'abcdefgh' because the assembler cannot handle
6427 	     generic constants.  */
6428 	  gcc_assert (shift != NULL && mvn != NULL);
6429 	  if (*mvn)
6430 	    imm = ~imm;
6431 	  imm = (imm >> *shift) & 0xff;
6432           *modconst = GEN_INT (imm);
6433         }
6434     }
6435 
6436   return immtype;
6437 #undef CHECK
6438 }
6439 
6440 /* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
6441    (or, implicitly, MVNI) immediate.  Write back width per element
6442    to *ELEMENTWIDTH, and a modified constant (whatever should be output
6443    for a MOVI instruction) in *MODCONST.  */
6444 int
aarch64_simd_immediate_valid_for_move(rtx op,enum machine_mode mode,rtx * modconst,int * elementwidth,unsigned char * elementchar,int * mvn,int * shift)6445 aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
6446 				       rtx *modconst, int *elementwidth,
6447 				       unsigned char *elementchar,
6448 				       int *mvn, int *shift)
6449 {
6450   rtx tmpconst;
6451   int tmpwidth;
6452   unsigned char tmpwidthc;
6453   int tmpmvn = 0, tmpshift = 0;
6454   int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
6455 					     &tmpwidth, &tmpwidthc,
6456 					     &tmpmvn, &tmpshift);
6457 
6458   if (retval == -1)
6459     return 0;
6460 
6461   if (modconst)
6462     *modconst = tmpconst;
6463 
6464   if (elementwidth)
6465     *elementwidth = tmpwidth;
6466 
6467   if (elementchar)
6468     *elementchar = tmpwidthc;
6469 
6470   if (mvn)
6471     *mvn = tmpmvn;
6472 
6473   if (shift)
6474     *shift = tmpshift;
6475 
6476   return 1;
6477 }
6478 
6479 static bool
aarch64_const_vec_all_same_int_p(rtx x,HOST_WIDE_INT minval,HOST_WIDE_INT maxval)6480 aarch64_const_vec_all_same_int_p (rtx x,
6481 				  HOST_WIDE_INT minval,
6482 				  HOST_WIDE_INT maxval)
6483 {
6484   HOST_WIDE_INT firstval;
6485   int count, i;
6486 
6487   if (GET_CODE (x) != CONST_VECTOR
6488       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6489     return false;
6490 
6491   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6492   if (firstval < minval || firstval > maxval)
6493     return false;
6494 
6495   count = CONST_VECTOR_NUNITS (x);
6496   for (i = 1; i < count; i++)
6497     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6498       return false;
6499 
6500   return true;
6501 }
6502 
6503 /* Check of immediate shift constants are within range.  */
6504 bool
aarch64_simd_shift_imm_p(rtx x,enum machine_mode mode,bool left)6505 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6506 {
6507   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6508   if (left)
6509     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6510   else
6511     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6512 }
6513 
6514 /* Return true if X is a uniform vector where all elements
6515    are either the floating-point constant 0.0 or the
6516    integer constant 0.  */
6517 bool
aarch64_simd_imm_zero_p(rtx x,enum machine_mode mode)6518 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6519 {
6520   return x == CONST0_RTX (mode);
6521 }
6522 
6523 bool
aarch64_simd_imm_scalar_p(rtx x,enum machine_mode mode ATTRIBUTE_UNUSED)6524 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6525 {
6526   HOST_WIDE_INT imm = INTVAL (x);
6527   int i;
6528 
6529   for (i = 0; i < 8; i++)
6530     {
6531       unsigned int byte = imm & 0xff;
6532       if (byte != 0xff && byte != 0)
6533        return false;
6534       imm >>= 8;
6535     }
6536 
6537   return true;
6538 }
6539 
6540 /* Return a const_int vector of VAL.  */
6541 rtx
aarch64_simd_gen_const_vector_dup(enum machine_mode mode,int val)6542 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6543 {
6544   int nunits = GET_MODE_NUNITS (mode);
6545   rtvec v = rtvec_alloc (nunits);
6546   int i;
6547 
6548   for (i=0; i < nunits; i++)
6549     RTVEC_ELT (v, i) = GEN_INT (val);
6550 
6551   return gen_rtx_CONST_VECTOR (mode, v);
6552 }
6553 
6554 /* Construct and return a PARALLEL RTX vector.  */
6555 rtx
aarch64_simd_vect_par_cnst_half(enum machine_mode mode,bool high)6556 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6557 {
6558   int nunits = GET_MODE_NUNITS (mode);
6559   rtvec v = rtvec_alloc (nunits / 2);
6560   int base = high ? nunits / 2 : 0;
6561   rtx t1;
6562   int i;
6563 
6564   for (i=0; i < nunits / 2; i++)
6565     RTVEC_ELT (v, i) = GEN_INT (base + i);
6566 
6567   t1 = gen_rtx_PARALLEL (mode, v);
6568   return t1;
6569 }
6570 
6571 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6572    HIGH (exclusive).  */
6573 void
aarch64_simd_lane_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high)6574 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6575 {
6576   HOST_WIDE_INT lane;
6577   gcc_assert (GET_CODE (operand) == CONST_INT);
6578   lane = INTVAL (operand);
6579 
6580   if (lane < low || lane >= high)
6581     error ("lane out of range");
6582 }
6583 
6584 void
aarch64_simd_const_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high)6585 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6586 {
6587   gcc_assert (GET_CODE (operand) == CONST_INT);
6588   HOST_WIDE_INT lane = INTVAL (operand);
6589 
6590   if (lane < low || lane >= high)
6591     error ("constant out of range");
6592 }
6593 
6594 /* Emit code to reinterpret one AdvSIMD type as another,
6595    without altering bits.  */
6596 void
aarch64_simd_reinterpret(rtx dest,rtx src)6597 aarch64_simd_reinterpret (rtx dest, rtx src)
6598 {
6599   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6600 }
6601 
6602 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6603    registers).  */
6604 void
aarch64_simd_emit_pair_result_insn(enum machine_mode mode,rtx (* intfn)(rtx,rtx,rtx),rtx destaddr,rtx op1)6605 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6606 			    rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6607                             rtx op1)
6608 {
6609   rtx mem = gen_rtx_MEM (mode, destaddr);
6610   rtx tmp1 = gen_reg_rtx (mode);
6611   rtx tmp2 = gen_reg_rtx (mode);
6612 
6613   emit_insn (intfn (tmp1, op1, tmp2));
6614 
6615   emit_move_insn (mem, tmp1);
6616   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6617   emit_move_insn (mem, tmp2);
6618 }
6619 
6620 /* Return TRUE if OP is a valid vector addressing mode.  */
6621 bool
aarch64_simd_mem_operand_p(rtx op)6622 aarch64_simd_mem_operand_p (rtx op)
6623 {
6624   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6625 			|| GET_CODE (XEXP (op, 0)) == REG);
6626 }
6627 
6628 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6629    not to early-clobber SRC registers in the process.
6630 
6631    We assume that the operands described by SRC and DEST represent a
6632    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6633    number of components into which the copy has been decomposed.  */
6634 void
aarch64_simd_disambiguate_copy(rtx * operands,rtx * dest,rtx * src,unsigned int count)6635 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6636 				rtx *src, unsigned int count)
6637 {
6638   unsigned int i;
6639 
6640   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6641       || REGNO (operands[0]) < REGNO (operands[1]))
6642     {
6643       for (i = 0; i < count; i++)
6644 	{
6645 	  operands[2 * i] = dest[i];
6646 	  operands[2 * i + 1] = src[i];
6647 	}
6648     }
6649   else
6650     {
6651       for (i = 0; i < count; i++)
6652 	{
6653 	  operands[2 * i] = dest[count - i - 1];
6654 	  operands[2 * i + 1] = src[count - i - 1];
6655 	}
6656     }
6657 }
6658 
6659 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6660    one of VSTRUCT modes: OI, CI or XI.  */
6661 int
aarch64_simd_attr_length_move(rtx insn)6662 aarch64_simd_attr_length_move (rtx insn)
6663 {
6664   enum machine_mode mode;
6665 
6666   extract_insn_cached (insn);
6667 
6668   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6669     {
6670       mode = GET_MODE (recog_data.operand[0]);
6671       switch (mode)
6672 	{
6673 	case OImode:
6674 	  return 8;
6675 	case CImode:
6676 	  return 12;
6677 	case XImode:
6678 	  return 16;
6679 	default:
6680 	  gcc_unreachable ();
6681 	}
6682     }
6683   return 4;
6684 }
6685 
6686 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6687    alignment of a vector to 128 bits.  */
6688 static HOST_WIDE_INT
aarch64_simd_vector_alignment(const_tree type)6689 aarch64_simd_vector_alignment (const_tree type)
6690 {
6691   HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6692   return MIN (align, 128);
6693 }
6694 
6695 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6696 static bool
aarch64_simd_vector_alignment_reachable(const_tree type,bool is_packed)6697 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6698 {
6699   if (is_packed)
6700     return false;
6701 
6702   /* We guarantee alignment for vectors up to 128-bits.  */
6703   if (tree_int_cst_compare (TYPE_SIZE (type),
6704 			    bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6705     return false;
6706 
6707   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6708   return true;
6709 }
6710 
6711 /* If VALS is a vector constant that can be loaded into a register
6712    using DUP, generate instructions to do so and return an RTX to
6713    assign to the register.  Otherwise return NULL_RTX.  */
6714 static rtx
aarch64_simd_dup_constant(rtx vals)6715 aarch64_simd_dup_constant (rtx vals)
6716 {
6717   enum machine_mode mode = GET_MODE (vals);
6718   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6719   int n_elts = GET_MODE_NUNITS (mode);
6720   bool all_same = true;
6721   rtx x;
6722   int i;
6723 
6724   if (GET_CODE (vals) != CONST_VECTOR)
6725     return NULL_RTX;
6726 
6727   for (i = 1; i < n_elts; ++i)
6728     {
6729       x = CONST_VECTOR_ELT (vals, i);
6730       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6731 	all_same = false;
6732     }
6733 
6734   if (!all_same)
6735     return NULL_RTX;
6736 
6737   /* We can load this constant by using DUP and a constant in a
6738      single ARM register.  This will be cheaper than a vector
6739      load.  */
6740   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6741   return gen_rtx_VEC_DUPLICATE (mode, x);
6742 }
6743 
6744 
6745 /* Generate code to load VALS, which is a PARALLEL containing only
6746    constants (for vec_init) or CONST_VECTOR, efficiently into a
6747    register.  Returns an RTX to copy into the register, or NULL_RTX
6748    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
6749 static rtx
aarch64_simd_make_constant(rtx vals)6750 aarch64_simd_make_constant (rtx vals)
6751 {
6752   enum machine_mode mode = GET_MODE (vals);
6753   rtx const_dup;
6754   rtx const_vec = NULL_RTX;
6755   int n_elts = GET_MODE_NUNITS (mode);
6756   int n_const = 0;
6757   int i;
6758 
6759   if (GET_CODE (vals) == CONST_VECTOR)
6760     const_vec = vals;
6761   else if (GET_CODE (vals) == PARALLEL)
6762     {
6763       /* A CONST_VECTOR must contain only CONST_INTs and
6764 	 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6765 	 Only store valid constants in a CONST_VECTOR.  */
6766       for (i = 0; i < n_elts; ++i)
6767 	{
6768 	  rtx x = XVECEXP (vals, 0, i);
6769 	  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6770 	    n_const++;
6771 	}
6772       if (n_const == n_elts)
6773 	const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6774     }
6775   else
6776     gcc_unreachable ();
6777 
6778   if (const_vec != NULL_RTX
6779       && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
6780 						NULL, NULL, NULL))
6781     /* Load using MOVI/MVNI.  */
6782     return const_vec;
6783   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6784     /* Loaded using DUP.  */
6785     return const_dup;
6786   else if (const_vec != NULL_RTX)
6787     /* Load from constant pool. We can not take advantage of single-cycle
6788        LD1 because we need a PC-relative addressing mode.  */
6789     return const_vec;
6790   else
6791     /* A PARALLEL containing something not valid inside CONST_VECTOR.
6792        We can not construct an initializer.  */
6793     return NULL_RTX;
6794 }
6795 
6796 void
aarch64_expand_vector_init(rtx target,rtx vals)6797 aarch64_expand_vector_init (rtx target, rtx vals)
6798 {
6799   enum machine_mode mode = GET_MODE (target);
6800   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6801   int n_elts = GET_MODE_NUNITS (mode);
6802   int n_var = 0, one_var = -1;
6803   bool all_same = true;
6804   rtx x, mem;
6805   int i;
6806 
6807   x = XVECEXP (vals, 0, 0);
6808   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6809     n_var = 1, one_var = 0;
6810 
6811   for (i = 1; i < n_elts; ++i)
6812     {
6813       x = XVECEXP (vals, 0, i);
6814       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6815 	++n_var, one_var = i;
6816 
6817       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6818 	all_same = false;
6819     }
6820 
6821   if (n_var == 0)
6822     {
6823       rtx constant = aarch64_simd_make_constant (vals);
6824       if (constant != NULL_RTX)
6825 	{
6826 	  emit_move_insn (target, constant);
6827 	  return;
6828 	}
6829     }
6830 
6831   /* Splat a single non-constant element if we can.  */
6832   if (all_same)
6833     {
6834       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6835       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6836       return;
6837     }
6838 
6839   /* One field is non-constant.  Load constant then overwrite varying
6840      field.  This is more efficient than using the stack.  */
6841   if (n_var == 1)
6842     {
6843       rtx copy = copy_rtx (vals);
6844       rtx index = GEN_INT (one_var);
6845       enum insn_code icode;
6846 
6847       /* Load constant part of vector, substitute neighboring value for
6848 	 varying element.  */
6849       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6850       aarch64_expand_vector_init (target, copy);
6851 
6852       /* Insert variable.  */
6853       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6854       icode = optab_handler (vec_set_optab, mode);
6855       gcc_assert (icode != CODE_FOR_nothing);
6856       emit_insn (GEN_FCN (icode) (target, x, index));
6857       return;
6858     }
6859 
6860   /* Construct the vector in memory one field at a time
6861      and load the whole vector.  */
6862   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6863   for (i = 0; i < n_elts; i++)
6864     emit_move_insn (adjust_address_nv (mem, inner_mode,
6865 				    i * GET_MODE_SIZE (inner_mode)),
6866 		    XVECEXP (vals, 0, i));
6867   emit_move_insn (target, mem);
6868 
6869 }
6870 
6871 static unsigned HOST_WIDE_INT
aarch64_shift_truncation_mask(enum machine_mode mode)6872 aarch64_shift_truncation_mask (enum machine_mode mode)
6873 {
6874   return
6875     (aarch64_vector_mode_supported_p (mode)
6876      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6877 }
6878 
6879 #ifndef TLS_SECTION_ASM_FLAG
6880 #define TLS_SECTION_ASM_FLAG 'T'
6881 #endif
6882 
6883 void
aarch64_elf_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)6884 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6885 			       tree decl ATTRIBUTE_UNUSED)
6886 {
6887   char flagchars[10], *f = flagchars;
6888 
6889   /* If we have already declared this section, we can use an
6890      abbreviated form to switch back to it -- unless this section is
6891      part of a COMDAT groups, in which case GAS requires the full
6892      declaration every time.  */
6893   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6894       && (flags & SECTION_DECLARED))
6895     {
6896       fprintf (asm_out_file, "\t.section\t%s\n", name);
6897       return;
6898     }
6899 
6900   if (!(flags & SECTION_DEBUG))
6901     *f++ = 'a';
6902   if (flags & SECTION_WRITE)
6903     *f++ = 'w';
6904   if (flags & SECTION_CODE)
6905     *f++ = 'x';
6906   if (flags & SECTION_SMALL)
6907     *f++ = 's';
6908   if (flags & SECTION_MERGE)
6909     *f++ = 'M';
6910   if (flags & SECTION_STRINGS)
6911     *f++ = 'S';
6912   if (flags & SECTION_TLS)
6913     *f++ = TLS_SECTION_ASM_FLAG;
6914   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6915     *f++ = 'G';
6916   *f = '\0';
6917 
6918   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6919 
6920   if (!(flags & SECTION_NOTYPE))
6921     {
6922       const char *type;
6923       const char *format;
6924 
6925       if (flags & SECTION_BSS)
6926 	type = "nobits";
6927       else
6928 	type = "progbits";
6929 
6930 #ifdef TYPE_OPERAND_FMT
6931       format = "," TYPE_OPERAND_FMT;
6932 #else
6933       format = ",@%s";
6934 #endif
6935 
6936       fprintf (asm_out_file, format, type);
6937 
6938       if (flags & SECTION_ENTSIZE)
6939 	fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6940       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6941 	{
6942 	  if (TREE_CODE (decl) == IDENTIFIER_NODE)
6943 	    fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6944 	  else
6945 	    fprintf (asm_out_file, ",%s,comdat",
6946 		     IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6947 	}
6948     }
6949 
6950   putc ('\n', asm_out_file);
6951 }
6952 
6953 /* Select a format to encode pointers in exception handling data.  */
6954 int
aarch64_asm_preferred_eh_data_format(int code ATTRIBUTE_UNUSED,int global)6955 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6956 {
6957    int type;
6958    switch (aarch64_cmodel)
6959      {
6960      case AARCH64_CMODEL_TINY:
6961      case AARCH64_CMODEL_TINY_PIC:
6962      case AARCH64_CMODEL_SMALL:
6963      case AARCH64_CMODEL_SMALL_PIC:
6964        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
6965 	  for everything.  */
6966        type = DW_EH_PE_sdata4;
6967        break;
6968      default:
6969        /* No assumptions here.  8-byte relocs required.  */
6970        type = DW_EH_PE_sdata8;
6971        break;
6972      }
6973    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6974 }
6975 
6976 /* Emit load exclusive.  */
6977 
6978 static void
aarch64_emit_load_exclusive(enum machine_mode mode,rtx rval,rtx mem,rtx model_rtx)6979 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6980 			     rtx mem, rtx model_rtx)
6981 {
6982   rtx (*gen) (rtx, rtx, rtx);
6983 
6984   switch (mode)
6985     {
6986     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6987     case HImode: gen = gen_aarch64_load_exclusivehi; break;
6988     case SImode: gen = gen_aarch64_load_exclusivesi; break;
6989     case DImode: gen = gen_aarch64_load_exclusivedi; break;
6990     default:
6991       gcc_unreachable ();
6992     }
6993 
6994   emit_insn (gen (rval, mem, model_rtx));
6995 }
6996 
6997 /* Emit store exclusive.  */
6998 
6999 static void
aarch64_emit_store_exclusive(enum machine_mode mode,rtx bval,rtx rval,rtx mem,rtx model_rtx)7000 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7001 			      rtx rval, rtx mem, rtx model_rtx)
7002 {
7003   rtx (*gen) (rtx, rtx, rtx, rtx);
7004 
7005   switch (mode)
7006     {
7007     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7008     case HImode: gen = gen_aarch64_store_exclusivehi; break;
7009     case SImode: gen = gen_aarch64_store_exclusivesi; break;
7010     case DImode: gen = gen_aarch64_store_exclusivedi; break;
7011     default:
7012       gcc_unreachable ();
7013     }
7014 
7015   emit_insn (gen (bval, rval, mem, model_rtx));
7016 }
7017 
7018 /* Mark the previous jump instruction as unlikely.  */
7019 
7020 static void
aarch64_emit_unlikely_jump(rtx insn)7021 aarch64_emit_unlikely_jump (rtx insn)
7022 {
7023   rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
7024 
7025   insn = emit_jump_insn (insn);
7026   add_reg_note (insn, REG_BR_PROB, very_unlikely);
7027 }
7028 
7029 /* Expand a compare and swap pattern.  */
7030 
7031 void
aarch64_expand_compare_and_swap(rtx operands[])7032 aarch64_expand_compare_and_swap (rtx operands[])
7033 {
7034   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7035   enum machine_mode mode, cmp_mode;
7036   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7037 
7038   bval = operands[0];
7039   rval = operands[1];
7040   mem = operands[2];
7041   oldval = operands[3];
7042   newval = operands[4];
7043   is_weak = operands[5];
7044   mod_s = operands[6];
7045   mod_f = operands[7];
7046   mode = GET_MODE (mem);
7047   cmp_mode = mode;
7048 
7049   /* Normally the succ memory model must be stronger than fail, but in the
7050      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7051      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
7052 
7053   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7054       && INTVAL (mod_s) == MEMMODEL_RELEASE)
7055     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7056 
7057   switch (mode)
7058     {
7059     case QImode:
7060     case HImode:
7061       /* For short modes, we're going to perform the comparison in SImode,
7062 	 so do the zero-extension now.  */
7063       cmp_mode = SImode;
7064       rval = gen_reg_rtx (SImode);
7065       oldval = convert_modes (SImode, mode, oldval, true);
7066       /* Fall through.  */
7067 
7068     case SImode:
7069     case DImode:
7070       /* Force the value into a register if needed.  */
7071       if (!aarch64_plus_operand (oldval, mode))
7072 	oldval = force_reg (cmp_mode, oldval);
7073       break;
7074 
7075     default:
7076       gcc_unreachable ();
7077     }
7078 
7079   switch (mode)
7080     {
7081     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7082     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7083     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7084     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7085     default:
7086       gcc_unreachable ();
7087     }
7088 
7089   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7090 
7091   if (mode == QImode || mode == HImode)
7092     emit_move_insn (operands[1], gen_lowpart (mode, rval));
7093 
7094   x = gen_rtx_REG (CCmode, CC_REGNUM);
7095   x = gen_rtx_EQ (SImode, x, const0_rtx);
7096   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7097 }
7098 
7099 /* Split a compare and swap pattern.  */
7100 
7101 void
aarch64_split_compare_and_swap(rtx operands[])7102 aarch64_split_compare_and_swap (rtx operands[])
7103 {
7104   rtx rval, mem, oldval, newval, scratch;
7105   enum machine_mode mode;
7106   bool is_weak;
7107   rtx label1, label2, x, cond;
7108 
7109   rval = operands[0];
7110   mem = operands[1];
7111   oldval = operands[2];
7112   newval = operands[3];
7113   is_weak = (operands[4] != const0_rtx);
7114   scratch = operands[7];
7115   mode = GET_MODE (mem);
7116 
7117   label1 = NULL_RTX;
7118   if (!is_weak)
7119     {
7120       label1 = gen_label_rtx ();
7121       emit_label (label1);
7122     }
7123   label2 = gen_label_rtx ();
7124 
7125   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7126 
7127   cond = aarch64_gen_compare_reg (NE, rval, oldval);
7128   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7129   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7130 			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7131   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7132 
7133   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7134 
7135   if (!is_weak)
7136     {
7137       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7138       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7139 				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7140       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7141     }
7142   else
7143     {
7144       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7145       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7146       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7147     }
7148 
7149   emit_label (label2);
7150 }
7151 
7152 /* Split an atomic operation.  */
7153 
7154 void
aarch64_split_atomic_op(enum rtx_code code,rtx old_out,rtx new_out,rtx mem,rtx value,rtx model_rtx,rtx cond)7155 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7156 		     rtx value, rtx model_rtx, rtx cond)
7157 {
7158   enum machine_mode mode = GET_MODE (mem);
7159   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7160   rtx label, x;
7161 
7162   label = gen_label_rtx ();
7163   emit_label (label);
7164 
7165   if (new_out)
7166     new_out = gen_lowpart (wmode, new_out);
7167   if (old_out)
7168     old_out = gen_lowpart (wmode, old_out);
7169   else
7170     old_out = new_out;
7171   value = simplify_gen_subreg (wmode, value, mode, 0);
7172 
7173   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7174 
7175   switch (code)
7176     {
7177     case SET:
7178       new_out = value;
7179       break;
7180 
7181     case NOT:
7182       x = gen_rtx_AND (wmode, old_out, value);
7183       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7184       x = gen_rtx_NOT (wmode, new_out);
7185       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7186       break;
7187 
7188     case MINUS:
7189       if (CONST_INT_P (value))
7190 	{
7191 	  value = GEN_INT (-INTVAL (value));
7192 	  code = PLUS;
7193 	}
7194       /* Fall through.  */
7195 
7196     default:
7197       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7198       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7199       break;
7200     }
7201 
7202   aarch64_emit_store_exclusive (mode, cond, mem,
7203 				gen_lowpart (mode, new_out), model_rtx);
7204 
7205   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7206   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7207 			    gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7208   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7209 }
7210 
7211 static void
aarch64_print_extension(void)7212 aarch64_print_extension (void)
7213 {
7214   const struct aarch64_option_extension *opt = NULL;
7215 
7216   for (opt = all_extensions; opt->name != NULL; opt++)
7217     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7218       asm_fprintf (asm_out_file, "+%s", opt->name);
7219 
7220   asm_fprintf (asm_out_file, "\n");
7221 }
7222 
7223 static void
aarch64_start_file(void)7224 aarch64_start_file (void)
7225 {
7226   if (selected_arch)
7227     {
7228       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7229       aarch64_print_extension ();
7230     }
7231   else if (selected_cpu)
7232     {
7233       asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7234       aarch64_print_extension ();
7235     }
7236   default_file_start();
7237 }
7238 
7239 /* Target hook for c_mode_for_suffix.  */
7240 static enum machine_mode
aarch64_c_mode_for_suffix(char suffix)7241 aarch64_c_mode_for_suffix (char suffix)
7242 {
7243   if (suffix == 'q')
7244     return TFmode;
7245 
7246   return VOIDmode;
7247 }
7248 
7249 /* We can only represent floating point constants which will fit in
7250    "quarter-precision" values.  These values are characterised by
7251    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7252    by:
7253 
7254    (-1)^s * (n/16) * 2^r
7255 
7256    Where:
7257      's' is the sign bit.
7258      'n' is an integer in the range 16 <= n <= 31.
7259      'r' is an integer in the range -3 <= r <= 4.  */
7260 
7261 /* Return true iff X can be represented by a quarter-precision
7262    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7263 bool
aarch64_float_const_representable_p(rtx x)7264 aarch64_float_const_representable_p (rtx x)
7265 {
7266   /* This represents our current view of how many bits
7267      make up the mantissa.  */
7268   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7269   int exponent;
7270   unsigned HOST_WIDE_INT mantissa, mask;
7271   HOST_WIDE_INT m1, m2;
7272   REAL_VALUE_TYPE r, m;
7273 
7274   if (!CONST_DOUBLE_P (x))
7275     return false;
7276 
7277   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7278 
7279   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7280      know if we have +zero until we analyse the mantissa, but we
7281      can reject the other invalid values.  */
7282   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7283       || REAL_VALUE_MINUS_ZERO (r))
7284     return false;
7285 
7286   /* Extract exponent.  */
7287   r = real_value_abs (&r);
7288   exponent = REAL_EXP (&r);
7289 
7290   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7291      highest (sign) bit, with a fixed binary point at bit point_pos.
7292      m1 holds the low part of the mantissa, m2 the high part.
7293      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7294      bits for the mantissa, this can fail (low bits will be lost).  */
7295   real_ldexp (&m, &r, point_pos - exponent);
7296   REAL_VALUE_TO_INT (&m1, &m2, m);
7297 
7298   /* If the low part of the mantissa has bits set we cannot represent
7299      the value.  */
7300   if (m1 != 0)
7301     return false;
7302   /* We have rejected the lower HOST_WIDE_INT, so update our
7303      understanding of how many bits lie in the mantissa and
7304      look only at the high HOST_WIDE_INT.  */
7305   mantissa = m2;
7306   point_pos -= HOST_BITS_PER_WIDE_INT;
7307 
7308   /* We can only represent values with a mantissa of the form 1.xxxx.  */
7309   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7310   if ((mantissa & mask) != 0)
7311     return false;
7312 
7313   /* Having filtered unrepresentable values, we may now remove all
7314      but the highest 5 bits.  */
7315   mantissa >>= point_pos - 5;
7316 
7317   /* We cannot represent the value 0.0, so reject it.  This is handled
7318      elsewhere.  */
7319   if (mantissa == 0)
7320     return false;
7321 
7322   /* Then, as bit 4 is always set, we can mask it off, leaving
7323      the mantissa in the range [0, 15].  */
7324   mantissa &= ~(1 << 4);
7325   gcc_assert (mantissa <= 15);
7326 
7327   /* GCC internally does not use IEEE754-like encoding (where normalized
7328      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
7329      Our mantissa values are shifted 4 places to the left relative to
7330      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7331      by 5 places to correct for GCC's representation.  */
7332   exponent = 5 - exponent;
7333 
7334   return (exponent >= 0 && exponent <= 7);
7335 }
7336 
7337 char*
aarch64_output_simd_mov_immediate(rtx * const_vector,enum machine_mode mode,unsigned width)7338 aarch64_output_simd_mov_immediate (rtx *const_vector,
7339 				   enum machine_mode mode,
7340 				   unsigned width)
7341 {
7342   int is_valid;
7343   unsigned char widthc;
7344   int lane_width_bits;
7345   static char templ[40];
7346   int shift = 0, mvn = 0;
7347   const char *mnemonic;
7348   unsigned int lane_count = 0;
7349 
7350   is_valid =
7351     aarch64_simd_immediate_valid_for_move (*const_vector, mode,
7352 					   const_vector, &lane_width_bits,
7353 					   &widthc, &mvn, &shift);
7354   gcc_assert (is_valid);
7355 
7356   mode = GET_MODE_INNER (mode);
7357   if (mode == SFmode || mode == DFmode)
7358     {
7359       bool zero_p =
7360 	aarch64_float_const_zero_rtx_p (*const_vector);
7361       gcc_assert (shift == 0);
7362       mnemonic = zero_p ? "movi" : "fmov";
7363     }
7364   else
7365     mnemonic = mvn ? "mvni" : "movi";
7366 
7367   gcc_assert (lane_width_bits != 0);
7368   lane_count = width / lane_width_bits;
7369 
7370   if (lane_count == 1)
7371     snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
7372   else if (shift)
7373     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
7374 	      mnemonic, lane_count, widthc, shift);
7375   else
7376     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
7377 	      mnemonic, lane_count, widthc);
7378   return templ;
7379 }
7380 
7381 /* Split operands into moves from op[1] + op[2] into op[0].  */
7382 
7383 void
aarch64_split_combinev16qi(rtx operands[3])7384 aarch64_split_combinev16qi (rtx operands[3])
7385 {
7386   unsigned int dest = REGNO (operands[0]);
7387   unsigned int src1 = REGNO (operands[1]);
7388   unsigned int src2 = REGNO (operands[2]);
7389   enum machine_mode halfmode = GET_MODE (operands[1]);
7390   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7391   rtx destlo, desthi;
7392 
7393   gcc_assert (halfmode == V16QImode);
7394 
7395   if (src1 == dest && src2 == dest + halfregs)
7396     {
7397       /* No-op move.  Can't split to nothing; emit something.  */
7398       emit_note (NOTE_INSN_DELETED);
7399       return;
7400     }
7401 
7402   /* Preserve register attributes for variable tracking.  */
7403   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7404   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7405 			       GET_MODE_SIZE (halfmode));
7406 
7407   /* Special case of reversed high/low parts.  */
7408   if (reg_overlap_mentioned_p (operands[2], destlo)
7409       && reg_overlap_mentioned_p (operands[1], desthi))
7410     {
7411       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7412       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7413       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7414     }
7415   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7416     {
7417       /* Try to avoid unnecessary moves if part of the result
7418 	 is in the right place already.  */
7419       if (src1 != dest)
7420 	emit_move_insn (destlo, operands[1]);
7421       if (src2 != dest + halfregs)
7422 	emit_move_insn (desthi, operands[2]);
7423     }
7424   else
7425     {
7426       if (src2 != dest + halfregs)
7427 	emit_move_insn (desthi, operands[2]);
7428       if (src1 != dest)
7429 	emit_move_insn (destlo, operands[1]);
7430     }
7431 }
7432 
7433 /* vec_perm support.  */
7434 
7435 #define MAX_VECT_LEN 16
7436 
7437 struct expand_vec_perm_d
7438 {
7439   rtx target, op0, op1;
7440   unsigned char perm[MAX_VECT_LEN];
7441   enum machine_mode vmode;
7442   unsigned char nelt;
7443   bool one_vector_p;
7444   bool testing_p;
7445 };
7446 
7447 /* Generate a variable permutation.  */
7448 
7449 static void
aarch64_expand_vec_perm_1(rtx target,rtx op0,rtx op1,rtx sel)7450 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7451 {
7452   enum machine_mode vmode = GET_MODE (target);
7453   bool one_vector_p = rtx_equal_p (op0, op1);
7454 
7455   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7456   gcc_checking_assert (GET_MODE (op0) == vmode);
7457   gcc_checking_assert (GET_MODE (op1) == vmode);
7458   gcc_checking_assert (GET_MODE (sel) == vmode);
7459   gcc_checking_assert (TARGET_SIMD);
7460 
7461   if (one_vector_p)
7462     {
7463       if (vmode == V8QImode)
7464 	{
7465 	  /* Expand the argument to a V16QI mode by duplicating it.  */
7466 	  rtx pair = gen_reg_rtx (V16QImode);
7467 	  emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7468 	  emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7469 	}
7470       else
7471 	{
7472 	  emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7473 	}
7474     }
7475   else
7476     {
7477       rtx pair;
7478 
7479       if (vmode == V8QImode)
7480 	{
7481 	  pair = gen_reg_rtx (V16QImode);
7482 	  emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7483 	  emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7484 	}
7485       else
7486 	{
7487 	  pair = gen_reg_rtx (OImode);
7488 	  emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7489 	  emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7490 	}
7491     }
7492 }
7493 
7494 void
aarch64_expand_vec_perm(rtx target,rtx op0,rtx op1,rtx sel)7495 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7496 {
7497   enum machine_mode vmode = GET_MODE (target);
7498   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7499   bool one_vector_p = rtx_equal_p (op0, op1);
7500   rtx rmask[MAX_VECT_LEN], mask;
7501 
7502   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7503 
7504   /* The TBL instruction does not use a modulo index, so we must take care
7505      of that ourselves.  */
7506   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7507   for (i = 0; i < nelt; ++i)
7508     rmask[i] = mask;
7509   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7510   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7511 
7512   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7513 }
7514 
7515 /* Recognize patterns suitable for the TRN instructions.  */
7516 static bool
aarch64_evpc_trn(struct expand_vec_perm_d * d)7517 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7518 {
7519   unsigned int i, odd, mask, nelt = d->nelt;
7520   rtx out, in0, in1, x;
7521   rtx (*gen) (rtx, rtx, rtx);
7522   enum machine_mode vmode = d->vmode;
7523 
7524   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7525     return false;
7526 
7527   /* Note that these are little-endian tests.
7528      We correct for big-endian later.  */
7529   if (d->perm[0] == 0)
7530     odd = 0;
7531   else if (d->perm[0] == 1)
7532     odd = 1;
7533   else
7534     return false;
7535   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7536 
7537   for (i = 0; i < nelt; i += 2)
7538     {
7539       if (d->perm[i] != i + odd)
7540 	return false;
7541       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7542 	return false;
7543     }
7544 
7545   /* Success!  */
7546   if (d->testing_p)
7547     return true;
7548 
7549   in0 = d->op0;
7550   in1 = d->op1;
7551   if (BYTES_BIG_ENDIAN)
7552     {
7553       x = in0, in0 = in1, in1 = x;
7554       odd = !odd;
7555     }
7556   out = d->target;
7557 
7558   if (odd)
7559     {
7560       switch (vmode)
7561 	{
7562 	case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7563 	case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7564 	case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7565 	case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7566 	case V4SImode: gen = gen_aarch64_trn2v4si; break;
7567 	case V2SImode: gen = gen_aarch64_trn2v2si; break;
7568 	case V2DImode: gen = gen_aarch64_trn2v2di; break;
7569 	case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7570 	case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7571 	case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7572 	default:
7573 	  return false;
7574 	}
7575     }
7576   else
7577     {
7578       switch (vmode)
7579 	{
7580 	case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7581 	case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7582 	case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7583 	case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7584 	case V4SImode: gen = gen_aarch64_trn1v4si; break;
7585 	case V2SImode: gen = gen_aarch64_trn1v2si; break;
7586 	case V2DImode: gen = gen_aarch64_trn1v2di; break;
7587 	case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7588 	case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7589 	case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7590 	default:
7591 	  return false;
7592 	}
7593     }
7594 
7595   emit_insn (gen (out, in0, in1));
7596   return true;
7597 }
7598 
7599 /* Recognize patterns suitable for the UZP instructions.  */
7600 static bool
aarch64_evpc_uzp(struct expand_vec_perm_d * d)7601 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7602 {
7603   unsigned int i, odd, mask, nelt = d->nelt;
7604   rtx out, in0, in1, x;
7605   rtx (*gen) (rtx, rtx, rtx);
7606   enum machine_mode vmode = d->vmode;
7607 
7608   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7609     return false;
7610 
7611   /* Note that these are little-endian tests.
7612      We correct for big-endian later.  */
7613   if (d->perm[0] == 0)
7614     odd = 0;
7615   else if (d->perm[0] == 1)
7616     odd = 1;
7617   else
7618     return false;
7619   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7620 
7621   for (i = 0; i < nelt; i++)
7622     {
7623       unsigned elt = (i * 2 + odd) & mask;
7624       if (d->perm[i] != elt)
7625 	return false;
7626     }
7627 
7628   /* Success!  */
7629   if (d->testing_p)
7630     return true;
7631 
7632   in0 = d->op0;
7633   in1 = d->op1;
7634   if (BYTES_BIG_ENDIAN)
7635     {
7636       x = in0, in0 = in1, in1 = x;
7637       odd = !odd;
7638     }
7639   out = d->target;
7640 
7641   if (odd)
7642     {
7643       switch (vmode)
7644 	{
7645 	case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7646 	case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7647 	case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7648 	case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7649 	case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7650 	case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7651 	case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7652 	case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7653 	case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7654 	case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7655 	default:
7656 	  return false;
7657 	}
7658     }
7659   else
7660     {
7661       switch (vmode)
7662 	{
7663 	case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7664 	case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7665 	case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7666 	case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7667 	case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7668 	case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7669 	case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7670 	case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7671 	case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7672 	case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7673 	default:
7674 	  return false;
7675 	}
7676     }
7677 
7678   emit_insn (gen (out, in0, in1));
7679   return true;
7680 }
7681 
7682 /* Recognize patterns suitable for the ZIP instructions.  */
7683 static bool
aarch64_evpc_zip(struct expand_vec_perm_d * d)7684 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7685 {
7686   unsigned int i, high, mask, nelt = d->nelt;
7687   rtx out, in0, in1, x;
7688   rtx (*gen) (rtx, rtx, rtx);
7689   enum machine_mode vmode = d->vmode;
7690 
7691   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7692     return false;
7693 
7694   /* Note that these are little-endian tests.
7695      We correct for big-endian later.  */
7696   high = nelt / 2;
7697   if (d->perm[0] == high)
7698     /* Do Nothing.  */
7699     ;
7700   else if (d->perm[0] == 0)
7701     high = 0;
7702   else
7703     return false;
7704   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7705 
7706   for (i = 0; i < nelt / 2; i++)
7707     {
7708       unsigned elt = (i + high) & mask;
7709       if (d->perm[i * 2] != elt)
7710 	return false;
7711       elt = (elt + nelt) & mask;
7712       if (d->perm[i * 2 + 1] != elt)
7713 	return false;
7714     }
7715 
7716   /* Success!  */
7717   if (d->testing_p)
7718     return true;
7719 
7720   in0 = d->op0;
7721   in1 = d->op1;
7722   if (BYTES_BIG_ENDIAN)
7723     {
7724       x = in0, in0 = in1, in1 = x;
7725       high = !high;
7726     }
7727   out = d->target;
7728 
7729   if (high)
7730     {
7731       switch (vmode)
7732 	{
7733 	case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7734 	case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7735 	case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7736 	case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7737 	case V4SImode: gen = gen_aarch64_zip2v4si; break;
7738 	case V2SImode: gen = gen_aarch64_zip2v2si; break;
7739 	case V2DImode: gen = gen_aarch64_zip2v2di; break;
7740 	case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7741 	case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7742 	case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7743 	default:
7744 	  return false;
7745 	}
7746     }
7747   else
7748     {
7749       switch (vmode)
7750 	{
7751 	case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7752 	case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7753 	case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7754 	case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7755 	case V4SImode: gen = gen_aarch64_zip1v4si; break;
7756 	case V2SImode: gen = gen_aarch64_zip1v2si; break;
7757 	case V2DImode: gen = gen_aarch64_zip1v2di; break;
7758 	case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7759 	case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7760 	case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7761 	default:
7762 	  return false;
7763 	}
7764     }
7765 
7766   emit_insn (gen (out, in0, in1));
7767   return true;
7768 }
7769 
7770 static bool
aarch64_evpc_tbl(struct expand_vec_perm_d * d)7771 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7772 {
7773   rtx rperm[MAX_VECT_LEN], sel;
7774   enum machine_mode vmode = d->vmode;
7775   unsigned int i, nelt = d->nelt;
7776 
7777   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
7778      numbering of elements for big-endian, we must reverse the order.  */
7779   if (BYTES_BIG_ENDIAN)
7780     return false;
7781 
7782   if (d->testing_p)
7783     return true;
7784 
7785   /* Generic code will try constant permutation twice.  Once with the
7786      original mode and again with the elements lowered to QImode.
7787      So wait and don't do the selector expansion ourselves.  */
7788   if (vmode != V8QImode && vmode != V16QImode)
7789     return false;
7790 
7791   for (i = 0; i < nelt; ++i)
7792     rperm[i] = GEN_INT (d->perm[i]);
7793   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7794   sel = force_reg (vmode, sel);
7795 
7796   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7797   return true;
7798 }
7799 
7800 static bool
aarch64_expand_vec_perm_const_1(struct expand_vec_perm_d * d)7801 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7802 {
7803   /* The pattern matching functions above are written to look for a small
7804      number to begin the sequence (0, 1, N/2).  If we begin with an index
7805      from the second operand, we can swap the operands.  */
7806   if (d->perm[0] >= d->nelt)
7807     {
7808       unsigned i, nelt = d->nelt;
7809       rtx x;
7810 
7811       for (i = 0; i < nelt; ++i)
7812 	d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7813 
7814       x = d->op0;
7815       d->op0 = d->op1;
7816       d->op1 = x;
7817     }
7818 
7819   if (TARGET_SIMD)
7820     {
7821       if (aarch64_evpc_zip (d))
7822 	return true;
7823       else if (aarch64_evpc_uzp (d))
7824 	return true;
7825       else if (aarch64_evpc_trn (d))
7826 	return true;
7827       return aarch64_evpc_tbl (d);
7828     }
7829   return false;
7830 }
7831 
7832 /* Expand a vec_perm_const pattern.  */
7833 
7834 bool
aarch64_expand_vec_perm_const(rtx target,rtx op0,rtx op1,rtx sel)7835 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7836 {
7837   struct expand_vec_perm_d d;
7838   int i, nelt, which;
7839 
7840   d.target = target;
7841   d.op0 = op0;
7842   d.op1 = op1;
7843 
7844   d.vmode = GET_MODE (target);
7845   gcc_assert (VECTOR_MODE_P (d.vmode));
7846   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7847   d.testing_p = false;
7848 
7849   for (i = which = 0; i < nelt; ++i)
7850     {
7851       rtx e = XVECEXP (sel, 0, i);
7852       int ei = INTVAL (e) & (2 * nelt - 1);
7853       which |= (ei < nelt ? 1 : 2);
7854       d.perm[i] = ei;
7855     }
7856 
7857   switch (which)
7858     {
7859     default:
7860       gcc_unreachable ();
7861 
7862     case 3:
7863       d.one_vector_p = false;
7864       if (!rtx_equal_p (op0, op1))
7865 	break;
7866 
7867       /* The elements of PERM do not suggest that only the first operand
7868 	 is used, but both operands are identical.  Allow easier matching
7869 	 of the permutation by folding the permutation into the single
7870 	 input vector.  */
7871       /* Fall Through.  */
7872     case 2:
7873       for (i = 0; i < nelt; ++i)
7874 	d.perm[i] &= nelt - 1;
7875       d.op0 = op1;
7876       d.one_vector_p = true;
7877       break;
7878 
7879     case 1:
7880       d.op1 = op0;
7881       d.one_vector_p = true;
7882       break;
7883     }
7884 
7885   return aarch64_expand_vec_perm_const_1 (&d);
7886 }
7887 
7888 static bool
aarch64_vectorize_vec_perm_const_ok(enum machine_mode vmode,const unsigned char * sel)7889 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7890 				     const unsigned char *sel)
7891 {
7892   struct expand_vec_perm_d d;
7893   unsigned int i, nelt, which;
7894   bool ret;
7895 
7896   d.vmode = vmode;
7897   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7898   d.testing_p = true;
7899   memcpy (d.perm, sel, nelt);
7900 
7901   /* Calculate whether all elements are in one vector.  */
7902   for (i = which = 0; i < nelt; ++i)
7903     {
7904       unsigned char e = d.perm[i];
7905       gcc_assert (e < 2 * nelt);
7906       which |= (e < nelt ? 1 : 2);
7907     }
7908 
7909   /* If all elements are from the second vector, reindex as if from the
7910      first vector.  */
7911   if (which == 2)
7912     for (i = 0; i < nelt; ++i)
7913       d.perm[i] -= nelt;
7914 
7915   /* Check whether the mask can be applied to a single vector.  */
7916   d.one_vector_p = (which != 3);
7917 
7918   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7919   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7920   if (!d.one_vector_p)
7921     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7922 
7923   start_sequence ();
7924   ret = aarch64_expand_vec_perm_const_1 (&d);
7925   end_sequence ();
7926 
7927   return ret;
7928 }
7929 
7930 #undef TARGET_ADDRESS_COST
7931 #define TARGET_ADDRESS_COST aarch64_address_cost
7932 
7933 /* This hook will determines whether unnamed bitfields affect the alignment
7934    of the containing structure.  The hook returns true if the structure
7935    should inherit the alignment requirements of an unnamed bitfield's
7936    type.  */
7937 #undef TARGET_ALIGN_ANON_BITFIELD
7938 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7939 
7940 #undef TARGET_ASM_ALIGNED_DI_OP
7941 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7942 
7943 #undef TARGET_ASM_ALIGNED_HI_OP
7944 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7945 
7946 #undef TARGET_ASM_ALIGNED_SI_OP
7947 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7948 
7949 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7950 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7951   hook_bool_const_tree_hwi_hwi_const_tree_true
7952 
7953 #undef TARGET_ASM_FILE_START
7954 #define TARGET_ASM_FILE_START aarch64_start_file
7955 
7956 #undef TARGET_ASM_OUTPUT_MI_THUNK
7957 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7958 
7959 #undef TARGET_ASM_SELECT_RTX_SECTION
7960 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7961 
7962 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7963 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7964 
7965 #undef TARGET_BUILD_BUILTIN_VA_LIST
7966 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7967 
7968 #undef TARGET_CALLEE_COPIES
7969 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7970 
7971 #undef TARGET_CAN_ELIMINATE
7972 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7973 
7974 #undef TARGET_CANNOT_FORCE_CONST_MEM
7975 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7976 
7977 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7978 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7979 
7980 /* Only the least significant bit is used for initialization guard
7981    variables.  */
7982 #undef TARGET_CXX_GUARD_MASK_BIT
7983 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7984 
7985 #undef TARGET_C_MODE_FOR_SUFFIX
7986 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7987 
7988 #ifdef TARGET_BIG_ENDIAN_DEFAULT
7989 #undef  TARGET_DEFAULT_TARGET_FLAGS
7990 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7991 #endif
7992 
7993 #undef TARGET_CLASS_MAX_NREGS
7994 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7995 
7996 #undef TARGET_BUILTIN_DECL
7997 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
7998 
7999 #undef  TARGET_EXPAND_BUILTIN
8000 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8001 
8002 #undef TARGET_EXPAND_BUILTIN_VA_START
8003 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8004 
8005 #undef TARGET_FUNCTION_ARG
8006 #define TARGET_FUNCTION_ARG aarch64_function_arg
8007 
8008 #undef TARGET_FUNCTION_ARG_ADVANCE
8009 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8010 
8011 #undef TARGET_FUNCTION_ARG_BOUNDARY
8012 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8013 
8014 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8015 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8016 
8017 #undef TARGET_FUNCTION_VALUE
8018 #define TARGET_FUNCTION_VALUE aarch64_function_value
8019 
8020 #undef TARGET_FUNCTION_VALUE_REGNO_P
8021 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8022 
8023 #undef TARGET_FRAME_POINTER_REQUIRED
8024 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8025 
8026 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8027 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8028 
8029 #undef  TARGET_INIT_BUILTINS
8030 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
8031 
8032 #undef TARGET_LEGITIMATE_ADDRESS_P
8033 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8034 
8035 #undef TARGET_LEGITIMATE_CONSTANT_P
8036 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8037 
8038 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8039 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8040 
8041 #undef TARGET_MANGLE_TYPE
8042 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8043 
8044 #undef TARGET_MEMORY_MOVE_COST
8045 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8046 
8047 #undef TARGET_MUST_PASS_IN_STACK
8048 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8049 
8050 /* This target hook should return true if accesses to volatile bitfields
8051    should use the narrowest mode possible.  It should return false if these
8052    accesses should use the bitfield container type.  */
8053 #undef TARGET_NARROW_VOLATILE_BITFIELD
8054 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8055 
8056 #undef  TARGET_OPTION_OVERRIDE
8057 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8058 
8059 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8060 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8061   aarch64_override_options_after_change
8062 
8063 #undef TARGET_PASS_BY_REFERENCE
8064 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8065 
8066 #undef TARGET_PREFERRED_RELOAD_CLASS
8067 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8068 
8069 #undef TARGET_SECONDARY_RELOAD
8070 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8071 
8072 #undef TARGET_SHIFT_TRUNCATION_MASK
8073 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8074 
8075 #undef TARGET_SETUP_INCOMING_VARARGS
8076 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8077 
8078 #undef TARGET_STRUCT_VALUE_RTX
8079 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
8080 
8081 #undef TARGET_REGISTER_MOVE_COST
8082 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8083 
8084 #undef TARGET_RETURN_IN_MEMORY
8085 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8086 
8087 #undef TARGET_RETURN_IN_MSB
8088 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8089 
8090 #undef TARGET_RTX_COSTS
8091 #define TARGET_RTX_COSTS aarch64_rtx_costs
8092 
8093 #undef TARGET_TRAMPOLINE_INIT
8094 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8095 
8096 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8097 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8098 
8099 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8100 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8101 
8102 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8103 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8104 
8105 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8106 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8107 
8108 #undef TARGET_VECTORIZE_BUILTINS
8109 #define TARGET_VECTORIZE_BUILTINS
8110 
8111 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8112 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8113   aarch64_builtin_vectorized_function
8114 
8115 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8116 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8117   aarch64_autovectorize_vector_sizes
8118 
8119 /* Section anchor support.  */
8120 
8121 #undef TARGET_MIN_ANCHOR_OFFSET
8122 #define TARGET_MIN_ANCHOR_OFFSET -256
8123 
8124 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8125    byte offset; we can do much more for larger data types, but have no way
8126    to determine the size of the access.  We assume accesses are aligned.  */
8127 #undef TARGET_MAX_ANCHOR_OFFSET
8128 #define TARGET_MAX_ANCHOR_OFFSET 4095
8129 
8130 #undef TARGET_VECTOR_ALIGNMENT
8131 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8132 
8133 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8134 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8135   aarch64_simd_vector_alignment_reachable
8136 
8137 /* vec_perm support.  */
8138 
8139 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8140 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8141   aarch64_vectorize_vec_perm_const_ok
8142 
8143 
8144 #undef TARGET_FIXED_CONDITION_CODE_REGS
8145 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8146 
8147 struct gcc_target targetm = TARGET_INITIALIZER;
8148 
8149 #include "gt-aarch64.h"
8150