1 /* Machine description for AArch64 architecture.
2    Copyright (C) 2009-2013 Free Software Foundation, Inc.
3    Contributed by ARM Ltd.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    GCC is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "regs.h"
30 #include "df.h"
31 #include "hard-reg-set.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "reload.h"
35 #include "toplev.h"
36 #include "target.h"
37 #include "target-def.h"
38 #include "targhooks.h"
39 #include "ggc.h"
40 #include "function.h"
41 #include "tm_p.h"
42 #include "recog.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
45 #include "gimple.h"
46 #include "optabs.h"
47 #include "dwarf2.h"
48 
49 /* Classifies an address.
50 
51    ADDRESS_REG_IMM
52        A simple base register plus immediate offset.
53 
54    ADDRESS_REG_WB
55        A base register indexed by immediate offset with writeback.
56 
57    ADDRESS_REG_REG
58        A base register indexed by (optionally scaled) register.
59 
60    ADDRESS_REG_UXTW
61        A base register indexed by (optionally scaled) zero-extended register.
62 
63    ADDRESS_REG_SXTW
64        A base register indexed by (optionally scaled) sign-extended register.
65 
66    ADDRESS_LO_SUM
67        A LO_SUM rtx with a base register and "LO12" symbol relocation.
68 
69    ADDRESS_SYMBOLIC:
70        A constant symbolic address, in pc-relative literal pool.  */
71 
72 enum aarch64_address_type {
73   ADDRESS_REG_IMM,
74   ADDRESS_REG_WB,
75   ADDRESS_REG_REG,
76   ADDRESS_REG_UXTW,
77   ADDRESS_REG_SXTW,
78   ADDRESS_LO_SUM,
79   ADDRESS_SYMBOLIC
80 };
81 
82 struct aarch64_address_info {
83   enum aarch64_address_type type;
84   rtx base;
85   rtx offset;
86   int shift;
87   enum aarch64_symbol_type symbol_type;
88 };
89 
90 /* The current code model.  */
91 enum aarch64_code_model aarch64_cmodel;
92 
93 #ifdef HAVE_AS_TLS
94 #undef TARGET_HAVE_TLS
95 #define TARGET_HAVE_TLS 1
96 #endif
97 
98 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
99 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
100 						     const_tree,
101 						     enum machine_mode *, int *,
102 						     bool *);
103 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
104 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
105 static void aarch64_override_options_after_change (void);
106 static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
107 					 int *, unsigned char *, int *, int *);
108 static bool aarch64_vector_mode_supported_p (enum machine_mode);
109 static unsigned bit_count (unsigned HOST_WIDE_INT);
110 static bool aarch64_const_vec_all_same_int_p (rtx,
111 					      HOST_WIDE_INT, HOST_WIDE_INT);
112 
113 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
114 						 const unsigned char *sel);
115 
116 /* The processor for which instructions should be scheduled.  */
117 enum aarch64_processor aarch64_tune = generic;
118 
119 /* The current tuning set.  */
120 const struct tune_params *aarch64_tune_params;
121 
122 /* Mask to specify which instructions we are allowed to generate.  */
123 unsigned long aarch64_isa_flags = 0;
124 
125 /* Mask to specify which instruction scheduling options should be used.  */
126 unsigned long aarch64_tune_flags = 0;
127 
128 /* Tuning parameters.  */
129 
130 #if HAVE_DESIGNATED_INITIALIZERS
131 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
132 #else
133 #define NAMED_PARAM(NAME, VAL) (VAL)
134 #endif
135 
136 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
137 __extension__
138 #endif
139 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
140 {
141   NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
142   NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
143   NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
144   NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
145   NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
146   NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
147   NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
148   NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
149   NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
150   NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
151   NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
152   NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
153 };
154 
155 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
156 __extension__
157 #endif
158 static const struct cpu_addrcost_table generic_addrcost_table =
159 {
160   NAMED_PARAM (pre_modify, 0),
161   NAMED_PARAM (post_modify, 0),
162   NAMED_PARAM (register_offset, 0),
163   NAMED_PARAM (register_extend, 0),
164   NAMED_PARAM (imm_offset, 0)
165 };
166 
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168 __extension__
169 #endif
170 static const struct cpu_regmove_cost generic_regmove_cost =
171 {
172   NAMED_PARAM (GP2GP, 1),
173   NAMED_PARAM (GP2FP, 2),
174   NAMED_PARAM (FP2GP, 2),
175   /* We currently do not provide direct support for TFmode Q->Q move.
176      Therefore we need to raise the cost above 2 in order to have
177      reload handle the situation.  */
178   NAMED_PARAM (FP2FP, 4)
179 };
180 
181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182 __extension__
183 #endif
184 static const struct tune_params generic_tunings =
185 {
186   &generic_rtx_cost_table,
187   &generic_addrcost_table,
188   &generic_regmove_cost,
189   NAMED_PARAM (memmov_cost, 4)
190 };
191 
192 /* A processor implementing AArch64.  */
193 struct processor
194 {
195   const char *const name;
196   enum aarch64_processor core;
197   const char *arch;
198   const unsigned long flags;
199   const struct tune_params *const tune;
200 };
201 
202 /* Processor cores implementing AArch64.  */
203 static const struct processor all_cores[] =
204 {
205 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
206   {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
207 #include "aarch64-cores.def"
208 #undef AARCH64_CORE
209   {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
210   {NULL, aarch64_none, NULL, 0, NULL}
211 };
212 
213 /* Architectures implementing AArch64.  */
214 static const struct processor all_architectures[] =
215 {
216 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
217   {NAME, CORE, #ARCH, FLAGS, NULL},
218 #include "aarch64-arches.def"
219 #undef AARCH64_ARCH
220   {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
221   {NULL, aarch64_none, NULL, 0, NULL}
222 };
223 
224 /* Target specification.  These are populated as commandline arguments
225    are processed, or NULL if not specified.  */
226 static const struct processor *selected_arch;
227 static const struct processor *selected_cpu;
228 static const struct processor *selected_tune;
229 
230 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
231 
232 /* An ISA extension in the co-processor and main instruction set space.  */
233 struct aarch64_option_extension
234 {
235   const char *const name;
236   const unsigned long flags_on;
237   const unsigned long flags_off;
238 };
239 
240 /* ISA extensions in AArch64.  */
241 static const struct aarch64_option_extension all_extensions[] =
242 {
243 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
244   {NAME, FLAGS_ON, FLAGS_OFF},
245 #include "aarch64-option-extensions.def"
246 #undef AARCH64_OPT_EXTENSION
247   {NULL, 0, 0}
248 };
249 
250 /* Used to track the size of an address when generating a pre/post
251    increment address.  */
252 static enum machine_mode aarch64_memory_reference_mode;
253 
254 /* Used to force GTY into this file.  */
255 static GTY(()) int gty_dummy;
256 
257 /* A table of valid AArch64 "bitmask immediate" values for
258    logical instructions.  */
259 
260 #define AARCH64_NUM_BITMASKS  5334
261 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
262 
263 /* Did we set flag_omit_frame_pointer just so
264    aarch64_frame_pointer_required would be called? */
265 static bool faked_omit_frame_pointer;
266 
267 typedef enum aarch64_cond_code
268 {
269   AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
270   AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
271   AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
272 }
273 aarch64_cc;
274 
275 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
276 
277 /* The condition codes of the processor, and the inverse function.  */
278 static const char * const aarch64_condition_codes[] =
279 {
280   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
281   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
282 };
283 
284 /* Provide a mapping from gcc register numbers to dwarf register numbers.  */
285 unsigned
aarch64_dbx_register_number(unsigned regno)286 aarch64_dbx_register_number (unsigned regno)
287 {
288    if (GP_REGNUM_P (regno))
289      return AARCH64_DWARF_R0 + regno - R0_REGNUM;
290    else if (regno == SP_REGNUM)
291      return AARCH64_DWARF_SP;
292    else if (FP_REGNUM_P (regno))
293      return AARCH64_DWARF_V0 + regno - V0_REGNUM;
294 
295    /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
296       equivalent DWARF register.  */
297    return DWARF_FRAME_REGISTERS;
298 }
299 
300 /* Return TRUE if MODE is any of the large INT modes.  */
301 static bool
aarch64_vect_struct_mode_p(enum machine_mode mode)302 aarch64_vect_struct_mode_p (enum machine_mode mode)
303 {
304   return mode == OImode || mode == CImode || mode == XImode;
305 }
306 
307 /* Return TRUE if MODE is any of the vector modes.  */
308 static bool
aarch64_vector_mode_p(enum machine_mode mode)309 aarch64_vector_mode_p (enum machine_mode mode)
310 {
311   return aarch64_vector_mode_supported_p (mode)
312 	 || aarch64_vect_struct_mode_p (mode);
313 }
314 
315 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P.  */
316 static bool
aarch64_array_mode_supported_p(enum machine_mode mode,unsigned HOST_WIDE_INT nelems)317 aarch64_array_mode_supported_p (enum machine_mode mode,
318 				unsigned HOST_WIDE_INT nelems)
319 {
320   if (TARGET_SIMD
321       && AARCH64_VALID_SIMD_QREG_MODE (mode)
322       && (nelems >= 2 && nelems <= 4))
323     return true;
324 
325   return false;
326 }
327 
328 /* Implement HARD_REGNO_NREGS.  */
329 
330 int
aarch64_hard_regno_nregs(unsigned regno,enum machine_mode mode)331 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
332 {
333   switch (aarch64_regno_regclass (regno))
334     {
335     case FP_REGS:
336     case FP_LO_REGS:
337       return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
338     default:
339       return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
340     }
341   gcc_unreachable ();
342 }
343 
344 /* Implement HARD_REGNO_MODE_OK.  */
345 
346 int
aarch64_hard_regno_mode_ok(unsigned regno,enum machine_mode mode)347 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
348 {
349   if (GET_MODE_CLASS (mode) == MODE_CC)
350     return regno == CC_REGNUM;
351 
352   if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
353       || regno == ARG_POINTER_REGNUM)
354     return mode == Pmode;
355 
356   if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
357     return 1;
358 
359   if (FP_REGNUM_P (regno))
360     {
361       if (aarch64_vect_struct_mode_p (mode))
362 	return
363 	  (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
364       else
365 	return 1;
366     }
367 
368   return 0;
369 }
370 
371 /* Return true if calls to DECL should be treated as
372    long-calls (ie called via a register).  */
373 static bool
aarch64_decl_is_long_call_p(const_tree decl ATTRIBUTE_UNUSED)374 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
375 {
376   return false;
377 }
378 
379 /* Return true if calls to symbol-ref SYM should be treated as
380    long-calls (ie called via a register).  */
381 bool
aarch64_is_long_call_p(rtx sym)382 aarch64_is_long_call_p (rtx sym)
383 {
384   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
385 }
386 
387 /* Return true if the offsets to a zero/sign-extract operation
388    represent an expression that matches an extend operation.  The
389    operands represent the paramters from
390 
391    (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)).  */
392 bool
aarch64_is_extend_from_extract(enum machine_mode mode,rtx mult_imm,rtx extract_imm)393 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
394 				rtx extract_imm)
395 {
396   HOST_WIDE_INT mult_val, extract_val;
397 
398   if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
399     return false;
400 
401   mult_val = INTVAL (mult_imm);
402   extract_val = INTVAL (extract_imm);
403 
404   if (extract_val > 8
405       && extract_val < GET_MODE_BITSIZE (mode)
406       && exact_log2 (extract_val & ~7) > 0
407       && (extract_val & 7) <= 4
408       && mult_val == (1 << (extract_val & 7)))
409     return true;
410 
411   return false;
412 }
413 
414 /* Emit an insn that's a simple single-set.  Both the operands must be
415    known to be valid.  */
416 inline static rtx
emit_set_insn(rtx x,rtx y)417 emit_set_insn (rtx x, rtx y)
418 {
419   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
420 }
421 
422 /* X and Y are two things to compare using CODE.  Emit the compare insn and
423    return the rtx for register 0 in the proper mode.  */
424 rtx
aarch64_gen_compare_reg(RTX_CODE code,rtx x,rtx y)425 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
426 {
427   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
428   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
429 
430   emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
431   return cc_reg;
432 }
433 
434 /* Build the SYMBOL_REF for __tls_get_addr.  */
435 
436 static GTY(()) rtx tls_get_addr_libfunc;
437 
438 rtx
aarch64_tls_get_addr(void)439 aarch64_tls_get_addr (void)
440 {
441   if (!tls_get_addr_libfunc)
442     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
443   return tls_get_addr_libfunc;
444 }
445 
446 /* Return the TLS model to use for ADDR.  */
447 
448 static enum tls_model
tls_symbolic_operand_type(rtx addr)449 tls_symbolic_operand_type (rtx addr)
450 {
451   enum tls_model tls_kind = TLS_MODEL_NONE;
452   rtx sym, addend;
453 
454   if (GET_CODE (addr) == CONST)
455     {
456       split_const (addr, &sym, &addend);
457       if (GET_CODE (sym) == SYMBOL_REF)
458 	tls_kind = SYMBOL_REF_TLS_MODEL (sym);
459     }
460   else if (GET_CODE (addr) == SYMBOL_REF)
461     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
462 
463   return tls_kind;
464 }
465 
466 /* We'll allow lo_sum's in addresses in our legitimate addresses
467    so that combine would take care of combining addresses where
468    necessary, but for generation purposes, we'll generate the address
469    as :
470    RTL                               Absolute
471    tmp = hi (symbol_ref);            adrp  x1, foo
472    dest = lo_sum (tmp, symbol_ref);  add dest, x1, :lo_12:foo
473                                      nop
474 
475    PIC                               TLS
476    adrp x1, :got:foo                 adrp tmp, :tlsgd:foo
477    ldr  x1, [:got_lo12:foo]          add  dest, tmp, :tlsgd_lo12:foo
478                                      bl   __tls_get_addr
479                                      nop
480 
481    Load TLS symbol, depending on TLS mechanism and TLS access model.
482 
483    Global Dynamic - Traditional TLS:
484    adrp tmp, :tlsgd:imm
485    add  dest, tmp, #:tlsgd_lo12:imm
486    bl   __tls_get_addr
487 
488    Global Dynamic - TLS Descriptors:
489    adrp dest, :tlsdesc:imm
490    ldr  tmp, [dest, #:tlsdesc_lo12:imm]
491    add  dest, dest, #:tlsdesc_lo12:imm
492    blr  tmp
493    mrs  tp, tpidr_el0
494    add  dest, dest, tp
495 
496    Initial Exec:
497    mrs  tp, tpidr_el0
498    adrp tmp, :gottprel:imm
499    ldr  dest, [tmp, #:gottprel_lo12:imm]
500    add  dest, dest, tp
501 
502    Local Exec:
503    mrs  tp, tpidr_el0
504    add  t0, tp, #:tprel_hi12:imm
505    add  t0, #:tprel_lo12_nc:imm
506 */
507 
508 static void
aarch64_load_symref_appropriately(rtx dest,rtx imm,enum aarch64_symbol_type type)509 aarch64_load_symref_appropriately (rtx dest, rtx imm,
510 				   enum aarch64_symbol_type type)
511 {
512   switch (type)
513     {
514     case SYMBOL_SMALL_ABSOLUTE:
515       {
516 	rtx tmp_reg = dest;
517 	if (can_create_pseudo_p ())
518 	  {
519 	    tmp_reg =  gen_reg_rtx (Pmode);
520 	  }
521 
522 	emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
523 	emit_insn (gen_add_losym (dest, tmp_reg, imm));
524 	return;
525       }
526 
527     case SYMBOL_SMALL_GOT:
528       {
529 	rtx tmp_reg = dest;
530 	if (can_create_pseudo_p ())
531 	  {
532 	    tmp_reg =  gen_reg_rtx (Pmode);
533 	  }
534 	emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
535 	emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
536 	return;
537       }
538 
539     case SYMBOL_SMALL_TLSGD:
540       {
541 	rtx insns;
542 	rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
543 
544 	start_sequence ();
545 	emit_call_insn (gen_tlsgd_small (result, imm));
546 	insns = get_insns ();
547 	end_sequence ();
548 
549 	RTL_CONST_CALL_P (insns) = 1;
550 	emit_libcall_block (insns, dest, result, imm);
551 	return;
552       }
553 
554     case SYMBOL_SMALL_TLSDESC:
555       {
556 	rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
557 	rtx tp;
558 
559 	emit_insn (gen_tlsdesc_small (imm));
560 	tp = aarch64_load_tp (NULL);
561 	emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
562 	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
563 	return;
564       }
565 
566     case SYMBOL_SMALL_GOTTPREL:
567       {
568 	rtx tmp_reg = gen_reg_rtx (Pmode);
569 	rtx tp = aarch64_load_tp (NULL);
570 	emit_insn (gen_tlsie_small (tmp_reg, imm));
571 	emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
572 	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
573 	return;
574       }
575 
576     case SYMBOL_SMALL_TPREL:
577       {
578 	rtx tp = aarch64_load_tp (NULL);
579 	emit_insn (gen_tlsle_small (dest, tp, imm));
580 	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
581 	return;
582       }
583 
584     default:
585       gcc_unreachable ();
586     }
587 }
588 
589 /* Emit a move from SRC to DEST.  Assume that the move expanders can
590    handle all moves if !can_create_pseudo_p ().  The distinction is
591    important because, unlike emit_move_insn, the move expanders know
592    how to force Pmode objects into the constant pool even when the
593    constant pool address is not itself legitimate.  */
594 static rtx
aarch64_emit_move(rtx dest,rtx src)595 aarch64_emit_move (rtx dest, rtx src)
596 {
597   return (can_create_pseudo_p ()
598 	  ? emit_move_insn (dest, src)
599 	  : emit_move_insn_1 (dest, src));
600 }
601 
602 void
aarch64_split_128bit_move(rtx dst,rtx src)603 aarch64_split_128bit_move (rtx dst, rtx src)
604 {
605   rtx low_dst;
606 
607   gcc_assert (GET_MODE (dst) == TImode);
608 
609   if (REG_P (dst) && REG_P (src))
610     {
611       int src_regno = REGNO (src);
612       int dst_regno = REGNO (dst);
613 
614       gcc_assert (GET_MODE (src) == TImode);
615 
616       /* Handle r -> w, w -> r.  */
617       if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
618 	{
619 	  emit_insn (gen_aarch64_movtilow_di (dst,
620 					      gen_lowpart (word_mode, src)));
621 	  emit_insn (gen_aarch64_movtihigh_di (dst,
622 					       gen_highpart (word_mode, src)));
623 	  return;
624 	}
625       else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
626 	{
627 	  emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
628 					      src));
629 	  emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
630 					       src));
631 	  return;
632 	}
633       /* Fall through to r -> r cases.  */
634     }
635 
636   low_dst = gen_lowpart (word_mode, dst);
637   if (REG_P (low_dst)
638       && reg_overlap_mentioned_p (low_dst, src))
639     {
640       aarch64_emit_move (gen_highpart (word_mode, dst),
641 			 gen_highpart_mode (word_mode, TImode, src));
642       aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
643     }
644   else
645     {
646       aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
647       aarch64_emit_move (gen_highpart (word_mode, dst),
648 			 gen_highpart_mode (word_mode, TImode, src));
649     }
650 }
651 
652 bool
aarch64_split_128bit_move_p(rtx dst,rtx src)653 aarch64_split_128bit_move_p (rtx dst, rtx src)
654 {
655   return (! REG_P (src)
656 	  || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
657 }
658 
659 static rtx
aarch64_force_temporary(rtx x,rtx value)660 aarch64_force_temporary (rtx x, rtx value)
661 {
662   if (can_create_pseudo_p ())
663     return force_reg (Pmode, value);
664   else
665     {
666       x = aarch64_emit_move (x, value);
667       return x;
668     }
669 }
670 
671 
672 static rtx
aarch64_add_offset(enum machine_mode mode,rtx temp,rtx reg,HOST_WIDE_INT offset)673 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
674 {
675   if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
676     {
677       rtx high;
678       /* Load the full offset into a register.  This
679          might be improvable in the future.  */
680       high = GEN_INT (offset);
681       offset = 0;
682       high = aarch64_force_temporary (temp, high);
683       reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
684     }
685   return plus_constant (mode, reg, offset);
686 }
687 
688 void
aarch64_expand_mov_immediate(rtx dest,rtx imm)689 aarch64_expand_mov_immediate (rtx dest, rtx imm)
690 {
691   enum machine_mode mode = GET_MODE (dest);
692   unsigned HOST_WIDE_INT mask;
693   int i;
694   bool first;
695   unsigned HOST_WIDE_INT val;
696   bool subtargets;
697   rtx subtarget;
698   int one_match, zero_match;
699 
700   gcc_assert (mode == SImode || mode == DImode);
701 
702   /* Check on what type of symbol it is.  */
703   if (GET_CODE (imm) == SYMBOL_REF
704       || GET_CODE (imm) == LABEL_REF
705       || GET_CODE (imm) == CONST)
706     {
707       rtx mem, base, offset;
708       enum aarch64_symbol_type sty;
709 
710       /* If we have (const (plus symbol offset)), separate out the offset
711 	 before we start classifying the symbol.  */
712       split_const (imm, &base, &offset);
713 
714       sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
715       switch (sty)
716 	{
717 	case SYMBOL_FORCE_TO_MEM:
718 	  if (offset != const0_rtx
719 	      && targetm.cannot_force_const_mem (mode, imm))
720 	    {
721 	      gcc_assert(can_create_pseudo_p ());
722 	      base = aarch64_force_temporary (dest, base);
723 	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
724 	      aarch64_emit_move (dest, base);
725 	      return;
726 	    }
727 	  mem = force_const_mem (mode, imm);
728 	  gcc_assert (mem);
729 	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
730 	  return;
731 
732         case SYMBOL_SMALL_TLSGD:
733         case SYMBOL_SMALL_TLSDESC:
734         case SYMBOL_SMALL_GOTTPREL:
735 	case SYMBOL_SMALL_GOT:
736 	  if (offset != const0_rtx)
737 	    {
738 	      gcc_assert(can_create_pseudo_p ());
739 	      base = aarch64_force_temporary (dest, base);
740 	      base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
741 	      aarch64_emit_move (dest, base);
742 	      return;
743 	    }
744 	  /* FALLTHRU */
745 
746         case SYMBOL_SMALL_TPREL:
747 	case SYMBOL_SMALL_ABSOLUTE:
748 	  aarch64_load_symref_appropriately (dest, imm, sty);
749 	  return;
750 
751 	default:
752 	  gcc_unreachable ();
753 	}
754     }
755 
756   if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
757     {
758       emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
759       return;
760     }
761 
762   if (!CONST_INT_P (imm))
763     {
764       if (GET_CODE (imm) == HIGH)
765 	emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
766       else
767         {
768 	  rtx mem = force_const_mem (mode, imm);
769 	  gcc_assert (mem);
770 	  emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
771 	}
772 
773       return;
774     }
775 
776   if (mode == SImode)
777     {
778       /* We know we can't do this in 1 insn, and we must be able to do it
779 	 in two; so don't mess around looking for sequences that don't buy
780 	 us anything.  */
781       emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
782       emit_insn (gen_insv_immsi (dest, GEN_INT (16),
783 				 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
784       return;
785     }
786 
787   /* Remaining cases are all for DImode.  */
788 
789   val = INTVAL (imm);
790   subtargets = optimize && can_create_pseudo_p ();
791 
792   one_match = 0;
793   zero_match = 0;
794   mask = 0xffff;
795 
796   for (i = 0; i < 64; i += 16, mask <<= 16)
797     {
798       if ((val & mask) == 0)
799 	zero_match++;
800       else if ((val & mask) == mask)
801 	one_match++;
802     }
803 
804   if (one_match == 2)
805     {
806       mask = 0xffff;
807       for (i = 0; i < 64; i += 16, mask <<= 16)
808 	{
809 	  if ((val & mask) != mask)
810 	    {
811 	      emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
812 	      emit_insn (gen_insv_immdi (dest, GEN_INT (i),
813 					 GEN_INT ((val >> i) & 0xffff)));
814 	      return;
815 	    }
816 	}
817       gcc_unreachable ();
818     }
819 
820   if (zero_match == 2)
821     goto simple_sequence;
822 
823   mask = 0x0ffff0000UL;
824   for (i = 16; i < 64; i += 16, mask <<= 16)
825     {
826       HOST_WIDE_INT comp = mask & ~(mask - 1);
827 
828       if (aarch64_uimm12_shift (val - (val & mask)))
829 	{
830 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
831 
832 	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
833 	  emit_insn (gen_adddi3 (dest, subtarget,
834 				 GEN_INT (val - (val & mask))));
835 	  return;
836 	}
837       else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
838 	{
839 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
840 
841 	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
842 				  GEN_INT ((val + comp) & mask)));
843 	  emit_insn (gen_adddi3 (dest, subtarget,
844 				 GEN_INT (val - ((val + comp) & mask))));
845 	  return;
846 	}
847       else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
848 	{
849 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
850 
851 	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
852 				  GEN_INT ((val - comp) | ~mask)));
853 	  emit_insn (gen_adddi3 (dest, subtarget,
854 				 GEN_INT (val - ((val - comp) | ~mask))));
855 	  return;
856 	}
857       else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
858 	{
859 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
860 
861 	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
862 				  GEN_INT (val | ~mask)));
863 	  emit_insn (gen_adddi3 (dest, subtarget,
864 				 GEN_INT (val - (val | ~mask))));
865 	  return;
866 	}
867     }
868 
869   /* See if we can do it by arithmetically combining two
870      immediates.  */
871   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
872     {
873       int j;
874       mask = 0xffff;
875 
876       if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
877 	  || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
878 	{
879 	  subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
880 	  emit_insn (gen_rtx_SET (VOIDmode, subtarget,
881 				  GEN_INT (aarch64_bitmasks[i])));
882 	  emit_insn (gen_adddi3 (dest, subtarget,
883 				 GEN_INT (val - aarch64_bitmasks[i])));
884 	  return;
885 	}
886 
887       for (j = 0; j < 64; j += 16, mask <<= 16)
888 	{
889 	  if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
890 	    {
891 	      emit_insn (gen_rtx_SET (VOIDmode, dest,
892 				      GEN_INT (aarch64_bitmasks[i])));
893 	      emit_insn (gen_insv_immdi (dest, GEN_INT (j),
894 					 GEN_INT ((val >> j) & 0xffff)));
895 	      return;
896 	    }
897 	}
898     }
899 
900   /* See if we can do it by logically combining two immediates.  */
901   for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
902     {
903       if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
904 	{
905 	  int j;
906 
907 	  for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
908 	    if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
909 	      {
910 		subtarget = subtargets ? gen_reg_rtx (mode) : dest;
911 		emit_insn (gen_rtx_SET (VOIDmode, subtarget,
912 					GEN_INT (aarch64_bitmasks[i])));
913 		emit_insn (gen_iordi3 (dest, subtarget,
914 				       GEN_INT (aarch64_bitmasks[j])));
915 		return;
916 	      }
917 	}
918       else if ((val & aarch64_bitmasks[i]) == val)
919 	{
920 	  int j;
921 
922 	  for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
923 	    if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
924 	      {
925 
926 		subtarget = subtargets ? gen_reg_rtx (mode) : dest;
927 		emit_insn (gen_rtx_SET (VOIDmode, subtarget,
928 					GEN_INT (aarch64_bitmasks[j])));
929 		emit_insn (gen_anddi3 (dest, subtarget,
930 				       GEN_INT (aarch64_bitmasks[i])));
931 		return;
932 	      }
933 	}
934     }
935 
936  simple_sequence:
937   first = true;
938   mask = 0xffff;
939   for (i = 0; i < 64; i += 16, mask <<= 16)
940     {
941       if ((val & mask) != 0)
942 	{
943 	  if (first)
944 	    {
945 	      emit_insn (gen_rtx_SET (VOIDmode, dest,
946 				      GEN_INT (val & mask)));
947 	      first = false;
948 	    }
949 	  else
950 	    emit_insn (gen_insv_immdi (dest, GEN_INT (i),
951 				       GEN_INT ((val >> i) & 0xffff)));
952 	}
953     }
954 }
955 
956 static bool
aarch64_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)957 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
958 {
959   /* Indirect calls are not currently supported.  */
960   if (decl == NULL)
961     return false;
962 
963   /* Cannot tail-call to long-calls, since these are outside of the
964      range of a branch instruction (we could handle this if we added
965      support for indirect tail-calls.  */
966   if (aarch64_decl_is_long_call_p (decl))
967     return false;
968 
969   return true;
970 }
971 
972 /* Implement TARGET_PASS_BY_REFERENCE.  */
973 
974 static bool
aarch64_pass_by_reference(cumulative_args_t pcum ATTRIBUTE_UNUSED,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)975 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
976 			   enum machine_mode mode,
977 			   const_tree type,
978 			   bool named ATTRIBUTE_UNUSED)
979 {
980   HOST_WIDE_INT size;
981   enum machine_mode dummymode;
982   int nregs;
983 
984   /* GET_MODE_SIZE (BLKmode) is useless since it is 0.  */
985   size = (mode == BLKmode && type)
986     ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
987 
988   if (type)
989     {
990       /* Arrays always passed by reference.  */
991       if (TREE_CODE (type) == ARRAY_TYPE)
992 	return true;
993       /* Other aggregates based on their size.  */
994       if (AGGREGATE_TYPE_P (type))
995 	size = int_size_in_bytes (type);
996     }
997 
998   /* Variable sized arguments are always returned by reference.  */
999   if (size < 0)
1000     return true;
1001 
1002   /* Can this be a candidate to be passed in fp/simd register(s)?  */
1003   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1004 					       &dummymode, &nregs,
1005 					       NULL))
1006     return false;
1007 
1008   /* Arguments which are variable sized or larger than 2 registers are
1009      passed by reference unless they are a homogenous floating point
1010      aggregate.  */
1011   return size > 2 * UNITS_PER_WORD;
1012 }
1013 
1014 /* Return TRUE if VALTYPE is padded to its least significant bits.  */
1015 static bool
aarch64_return_in_msb(const_tree valtype)1016 aarch64_return_in_msb (const_tree valtype)
1017 {
1018   enum machine_mode dummy_mode;
1019   int dummy_int;
1020 
1021   /* Never happens in little-endian mode.  */
1022   if (!BYTES_BIG_ENDIAN)
1023     return false;
1024 
1025   /* Only composite types smaller than or equal to 16 bytes can
1026      be potentially returned in registers.  */
1027   if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1028       || int_size_in_bytes (valtype) <= 0
1029       || int_size_in_bytes (valtype) > 16)
1030     return false;
1031 
1032   /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1033      or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1034      is always passed/returned in the least significant bits of fp/simd
1035      register(s).  */
1036   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1037 					       &dummy_mode, &dummy_int, NULL))
1038     return false;
1039 
1040   return true;
1041 }
1042 
1043 /* Implement TARGET_FUNCTION_VALUE.
1044    Define how to find the value returned by a function.  */
1045 
1046 static rtx
aarch64_function_value(const_tree type,const_tree func,bool outgoing ATTRIBUTE_UNUSED)1047 aarch64_function_value (const_tree type, const_tree func,
1048 			bool outgoing ATTRIBUTE_UNUSED)
1049 {
1050   enum machine_mode mode;
1051   int unsignedp;
1052   int count;
1053   enum machine_mode ag_mode;
1054 
1055   mode = TYPE_MODE (type);
1056   if (INTEGRAL_TYPE_P (type))
1057     mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1058 
1059   if (aarch64_return_in_msb (type))
1060     {
1061       HOST_WIDE_INT size = int_size_in_bytes (type);
1062 
1063       if (size % UNITS_PER_WORD != 0)
1064 	{
1065 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1066 	  mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1067 	}
1068     }
1069 
1070   if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1071 					       &ag_mode, &count, NULL))
1072     {
1073       if (!aarch64_composite_type_p (type, mode))
1074 	{
1075 	  gcc_assert (count == 1 && mode == ag_mode);
1076 	  return gen_rtx_REG (mode, V0_REGNUM);
1077 	}
1078       else
1079 	{
1080 	  int i;
1081 	  rtx par;
1082 
1083 	  par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1084 	  for (i = 0; i < count; i++)
1085 	    {
1086 	      rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1087 	      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1088 				       GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1089 	      XVECEXP (par, 0, i) = tmp;
1090 	    }
1091 	  return par;
1092 	}
1093     }
1094   else
1095     return gen_rtx_REG (mode, R0_REGNUM);
1096 }
1097 
1098 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1099    Return true if REGNO is the number of a hard register in which the values
1100    of called function may come back.  */
1101 
1102 static bool
aarch64_function_value_regno_p(const unsigned int regno)1103 aarch64_function_value_regno_p (const unsigned int regno)
1104 {
1105   /* Maximum of 16 bytes can be returned in the general registers.  Examples
1106      of 16-byte return values are: 128-bit integers and 16-byte small
1107      structures (excluding homogeneous floating-point aggregates).  */
1108   if (regno == R0_REGNUM || regno == R1_REGNUM)
1109     return true;
1110 
1111   /* Up to four fp/simd registers can return a function value, e.g. a
1112      homogeneous floating-point aggregate having four members.  */
1113   if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1114     return !TARGET_GENERAL_REGS_ONLY;
1115 
1116   return false;
1117 }
1118 
1119 /* Implement TARGET_RETURN_IN_MEMORY.
1120 
1121    If the type T of the result of a function is such that
1122      void func (T arg)
1123    would require that arg be passed as a value in a register (or set of
1124    registers) according to the parameter passing rules, then the result
1125    is returned in the same registers as would be used for such an
1126    argument.  */
1127 
1128 static bool
aarch64_return_in_memory(const_tree type,const_tree fndecl ATTRIBUTE_UNUSED)1129 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1130 {
1131   HOST_WIDE_INT size;
1132   enum machine_mode ag_mode;
1133   int count;
1134 
1135   if (!AGGREGATE_TYPE_P (type)
1136       && TREE_CODE (type) != COMPLEX_TYPE
1137       && TREE_CODE (type) != VECTOR_TYPE)
1138     /* Simple scalar types always returned in registers.  */
1139     return false;
1140 
1141   if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1142 					       type,
1143 					       &ag_mode,
1144 					       &count,
1145 					       NULL))
1146     return false;
1147 
1148   /* Types larger than 2 registers returned in memory.  */
1149   size = int_size_in_bytes (type);
1150   return (size < 0 || size > 2 * UNITS_PER_WORD);
1151 }
1152 
1153 static bool
aarch64_vfp_is_call_candidate(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,int * nregs)1154 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1155 			       const_tree type, int *nregs)
1156 {
1157   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1158   return aarch64_vfp_is_call_or_return_candidate (mode,
1159 						  type,
1160 						  &pcum->aapcs_vfp_rmode,
1161 						  nregs,
1162 						  NULL);
1163 }
1164 
1165 /* Given MODE and TYPE of a function argument, return the alignment in
1166    bits.  The idea is to suppress any stronger alignment requested by
1167    the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1168    This is a helper function for local use only.  */
1169 
1170 static unsigned int
aarch64_function_arg_alignment(enum machine_mode mode,const_tree type)1171 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1172 {
1173   unsigned int alignment;
1174 
1175   if (type)
1176     {
1177       if (!integer_zerop (TYPE_SIZE (type)))
1178 	{
1179 	  if (TYPE_MODE (type) == mode)
1180 	    alignment = TYPE_ALIGN (type);
1181 	  else
1182 	    alignment = GET_MODE_ALIGNMENT (mode);
1183 	}
1184       else
1185 	alignment = 0;
1186     }
1187   else
1188     alignment = GET_MODE_ALIGNMENT (mode);
1189 
1190   return alignment;
1191 }
1192 
1193 /* Layout a function argument according to the AAPCS64 rules.  The rule
1194    numbers refer to the rule numbers in the AAPCS64.  */
1195 
1196 static void
aarch64_layout_arg(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)1197 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1198 		    const_tree type,
1199 		    bool named ATTRIBUTE_UNUSED)
1200 {
1201   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1202   int ncrn, nvrn, nregs;
1203   bool allocate_ncrn, allocate_nvrn;
1204 
1205   /* We need to do this once per argument.  */
1206   if (pcum->aapcs_arg_processed)
1207     return;
1208 
1209   pcum->aapcs_arg_processed = true;
1210 
1211   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1212   allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1213 						 mode,
1214 						 type,
1215 						 &nregs);
1216 
1217   /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1218      The following code thus handles passing by SIMD/FP registers first.  */
1219 
1220   nvrn = pcum->aapcs_nvrn;
1221 
1222   /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1223      and homogenous short-vector aggregates (HVA).  */
1224   if (allocate_nvrn)
1225     {
1226       if (nvrn + nregs <= NUM_FP_ARG_REGS)
1227 	{
1228 	  pcum->aapcs_nextnvrn = nvrn + nregs;
1229 	  if (!aarch64_composite_type_p (type, mode))
1230 	    {
1231 	      gcc_assert (nregs == 1);
1232 	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1233 	    }
1234 	  else
1235 	    {
1236 	      rtx par;
1237 	      int i;
1238 	      par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1239 	      for (i = 0; i < nregs; i++)
1240 		{
1241 		  rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1242 					 V0_REGNUM + nvrn + i);
1243 		  tmp = gen_rtx_EXPR_LIST
1244 		    (VOIDmode, tmp,
1245 		     GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1246 		  XVECEXP (par, 0, i) = tmp;
1247 		}
1248 	      pcum->aapcs_reg = par;
1249 	    }
1250 	  return;
1251 	}
1252       else
1253 	{
1254 	  /* C.3 NSRN is set to 8.  */
1255 	  pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1256 	  goto on_stack;
1257 	}
1258     }
1259 
1260   ncrn = pcum->aapcs_ncrn;
1261   nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1262 	   + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1263 
1264 
1265   /* C6 - C9.  though the sign and zero extension semantics are
1266      handled elsewhere.  This is the case where the argument fits
1267      entirely general registers.  */
1268   if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1269     {
1270       unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1271 
1272       gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1273 
1274       /* C.8 if the argument has an alignment of 16 then the NGRN is
1275          rounded up to the next even number.  */
1276       if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1277 	{
1278 	  ++ncrn;
1279 	  gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1280 	}
1281       /* NREGS can be 0 when e.g. an empty structure is to be passed.
1282          A reg is still generated for it, but the caller should be smart
1283 	 enough not to use it.  */
1284       if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1285 	{
1286 	  pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1287 	}
1288       else
1289 	{
1290 	  rtx par;
1291 	  int i;
1292 
1293 	  par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1294 	  for (i = 0; i < nregs; i++)
1295 	    {
1296 	      rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1297 	      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1298 				       GEN_INT (i * UNITS_PER_WORD));
1299 	      XVECEXP (par, 0, i) = tmp;
1300 	    }
1301 	  pcum->aapcs_reg = par;
1302 	}
1303 
1304       pcum->aapcs_nextncrn = ncrn + nregs;
1305       return;
1306     }
1307 
1308   /* C.11  */
1309   pcum->aapcs_nextncrn = NUM_ARG_REGS;
1310 
1311   /* The argument is passed on stack; record the needed number of words for
1312      this argument (we can re-use NREGS) and align the total size if
1313      necessary.  */
1314 on_stack:
1315   pcum->aapcs_stack_words = nregs;
1316   if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1317     pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1318 					       16 / UNITS_PER_WORD) + 1;
1319   return;
1320 }
1321 
1322 /* Implement TARGET_FUNCTION_ARG.  */
1323 
1324 static rtx
aarch64_function_arg(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,bool named)1325 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1326 		      const_tree type, bool named)
1327 {
1328   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1329   gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1330 
1331   if (mode == VOIDmode)
1332     return NULL_RTX;
1333 
1334   aarch64_layout_arg (pcum_v, mode, type, named);
1335   return pcum->aapcs_reg;
1336 }
1337 
1338 void
aarch64_init_cumulative_args(CUMULATIVE_ARGS * pcum,const_tree fntype ATTRIBUTE_UNUSED,rtx libname ATTRIBUTE_UNUSED,const_tree fndecl ATTRIBUTE_UNUSED,unsigned n_named ATTRIBUTE_UNUSED)1339 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1340 			   const_tree fntype ATTRIBUTE_UNUSED,
1341 			   rtx libname ATTRIBUTE_UNUSED,
1342 			   const_tree fndecl ATTRIBUTE_UNUSED,
1343 			   unsigned n_named ATTRIBUTE_UNUSED)
1344 {
1345   pcum->aapcs_ncrn = 0;
1346   pcum->aapcs_nvrn = 0;
1347   pcum->aapcs_nextncrn = 0;
1348   pcum->aapcs_nextnvrn = 0;
1349   pcum->pcs_variant = ARM_PCS_AAPCS64;
1350   pcum->aapcs_reg = NULL_RTX;
1351   pcum->aapcs_arg_processed = false;
1352   pcum->aapcs_stack_words = 0;
1353   pcum->aapcs_stack_size = 0;
1354 
1355   return;
1356 }
1357 
1358 static void
aarch64_function_arg_advance(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,bool named)1359 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1360 			      enum machine_mode mode,
1361 			      const_tree type,
1362 			      bool named)
1363 {
1364   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1365   if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1366     {
1367       aarch64_layout_arg (pcum_v, mode, type, named);
1368       gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1369 		  != (pcum->aapcs_stack_words != 0));
1370       pcum->aapcs_arg_processed = false;
1371       pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1372       pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1373       pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1374       pcum->aapcs_stack_words = 0;
1375       pcum->aapcs_reg = NULL_RTX;
1376     }
1377 }
1378 
1379 bool
aarch64_function_arg_regno_p(unsigned regno)1380 aarch64_function_arg_regno_p (unsigned regno)
1381 {
1382   return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1383 	  || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1384 }
1385 
1386 /* Implement FUNCTION_ARG_BOUNDARY.  Every parameter gets at least
1387    PARM_BOUNDARY bits of alignment, but will be given anything up
1388    to STACK_BOUNDARY bits if the type requires it.  This makes sure
1389    that both before and after the layout of each argument, the Next
1390    Stacked Argument Address (NSAA) will have a minimum alignment of
1391    8 bytes.  */
1392 
1393 static unsigned int
aarch64_function_arg_boundary(enum machine_mode mode,const_tree type)1394 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1395 {
1396   unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1397 
1398   if (alignment < PARM_BOUNDARY)
1399     alignment = PARM_BOUNDARY;
1400   if (alignment > STACK_BOUNDARY)
1401     alignment = STACK_BOUNDARY;
1402   return alignment;
1403 }
1404 
1405 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1406 
1407    Return true if an argument passed on the stack should be padded upwards,
1408    i.e. if the least-significant byte of the stack slot has useful data.
1409 
1410    Small aggregate types are placed in the lowest memory address.
1411 
1412    The related parameter passing rules are B.4, C.3, C.5 and C.14.  */
1413 
1414 bool
aarch64_pad_arg_upward(enum machine_mode mode,const_tree type)1415 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1416 {
1417   /* On little-endian targets, the least significant byte of every stack
1418      argument is passed at the lowest byte address of the stack slot.  */
1419   if (!BYTES_BIG_ENDIAN)
1420     return true;
1421 
1422   /* Otherwise, integral types and floating point types are padded downward:
1423      the least significant byte of a stack argument is passed at the highest
1424      byte address of the stack slot.  */
1425   if (type
1426       ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1427       : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1428     return false;
1429 
1430   /* Everything else padded upward, i.e. data in first byte of stack slot.  */
1431   return true;
1432 }
1433 
1434 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1435 
1436    It specifies padding for the last (may also be the only)
1437    element of a block move between registers and memory.  If
1438    assuming the block is in the memory, padding upward means that
1439    the last element is padded after its highest significant byte,
1440    while in downward padding, the last element is padded at the
1441    its least significant byte side.
1442 
1443    Small aggregates and small complex types are always padded
1444    upwards.
1445 
1446    We don't need to worry about homogeneous floating-point or
1447    short-vector aggregates; their move is not affected by the
1448    padding direction determined here.  Regardless of endianness,
1449    each element of such an aggregate is put in the least
1450    significant bits of a fp/simd register.
1451 
1452    Return !BYTES_BIG_ENDIAN if the least significant byte of the
1453    register has useful data, and return the opposite if the most
1454    significant byte does.  */
1455 
1456 bool
aarch64_pad_reg_upward(enum machine_mode mode,const_tree type,bool first ATTRIBUTE_UNUSED)1457 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1458 		     bool first ATTRIBUTE_UNUSED)
1459 {
1460 
1461   /* Small composite types are always padded upward.  */
1462   if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1463     {
1464       HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1465 			    : GET_MODE_SIZE (mode));
1466       if (size < 2 * UNITS_PER_WORD)
1467 	return true;
1468     }
1469 
1470   /* Otherwise, use the default padding.  */
1471   return !BYTES_BIG_ENDIAN;
1472 }
1473 
1474 static enum machine_mode
aarch64_libgcc_cmp_return_mode(void)1475 aarch64_libgcc_cmp_return_mode (void)
1476 {
1477   return SImode;
1478 }
1479 
1480 static bool
aarch64_frame_pointer_required(void)1481 aarch64_frame_pointer_required (void)
1482 {
1483   /* If the function contains dynamic stack allocations, we need to
1484      use the frame pointer to access the static parts of the frame.  */
1485   if (cfun->calls_alloca)
1486     return true;
1487 
1488   /* We may have turned flag_omit_frame_pointer on in order to have this
1489      function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1490      and we'll check it here.
1491      If we really did set flag_omit_frame_pointer normally, then we return false
1492      (no frame pointer required) in all cases.  */
1493 
1494   if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1495     return false;
1496   else if (flag_omit_leaf_frame_pointer)
1497     return !crtl->is_leaf;
1498   return true;
1499 }
1500 
1501 /* Mark the registers that need to be saved by the callee and calculate
1502    the size of the callee-saved registers area and frame record (both FP
1503    and LR may be omitted).  */
1504 static void
aarch64_layout_frame(void)1505 aarch64_layout_frame (void)
1506 {
1507   HOST_WIDE_INT offset = 0;
1508   int regno;
1509 
1510   if (reload_completed && cfun->machine->frame.laid_out)
1511     return;
1512 
1513   cfun->machine->frame.fp_lr_offset = 0;
1514 
1515   /* First mark all the registers that really need to be saved...  */
1516   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1517     cfun->machine->frame.reg_offset[regno] = -1;
1518 
1519   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1520     cfun->machine->frame.reg_offset[regno] = -1;
1521 
1522   /* ... that includes the eh data registers (if needed)...  */
1523   if (crtl->calls_eh_return)
1524     for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1525       cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1526 
1527   /* ... and any callee saved register that dataflow says is live.  */
1528   for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1529     if (df_regs_ever_live_p (regno)
1530 	&& !call_used_regs[regno])
1531       cfun->machine->frame.reg_offset[regno] = 0;
1532 
1533   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1534     if (df_regs_ever_live_p (regno)
1535 	&& !call_used_regs[regno])
1536       cfun->machine->frame.reg_offset[regno] = 0;
1537 
1538   if (frame_pointer_needed)
1539     {
1540       cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1541       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1542       cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1543     }
1544 
1545   /* Now assign stack slots for them.  */
1546   for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1547     if (cfun->machine->frame.reg_offset[regno] != -1)
1548       {
1549 	cfun->machine->frame.reg_offset[regno] = offset;
1550 	offset += UNITS_PER_WORD;
1551       }
1552 
1553   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1554     if (cfun->machine->frame.reg_offset[regno] != -1)
1555       {
1556 	cfun->machine->frame.reg_offset[regno] = offset;
1557 	offset += UNITS_PER_WORD;
1558       }
1559 
1560   if (frame_pointer_needed)
1561     {
1562       cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1563       offset += UNITS_PER_WORD;
1564       cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1565     }
1566 
1567   if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1568     {
1569       cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1570       offset += UNITS_PER_WORD;
1571       cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1572     }
1573 
1574   cfun->machine->frame.padding0 =
1575     (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1576   offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1577 
1578   cfun->machine->frame.saved_regs_size = offset;
1579   cfun->machine->frame.laid_out = true;
1580 }
1581 
1582 /* Make the last instruction frame-related and note that it performs
1583    the operation described by FRAME_PATTERN.  */
1584 
1585 static void
aarch64_set_frame_expr(rtx frame_pattern)1586 aarch64_set_frame_expr (rtx frame_pattern)
1587 {
1588   rtx insn;
1589 
1590   insn = get_last_insn ();
1591   RTX_FRAME_RELATED_P (insn) = 1;
1592   RTX_FRAME_RELATED_P (frame_pattern) = 1;
1593   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1594 				      frame_pattern,
1595 				      REG_NOTES (insn));
1596 }
1597 
1598 static bool
aarch64_register_saved_on_entry(int regno)1599 aarch64_register_saved_on_entry (int regno)
1600 {
1601   return cfun->machine->frame.reg_offset[regno] != -1;
1602 }
1603 
1604 
1605 static void
aarch64_save_or_restore_fprs(int start_offset,int increment,bool restore,rtx base_rtx)1606 aarch64_save_or_restore_fprs (int start_offset, int increment,
1607 			      bool restore, rtx base_rtx)
1608 
1609 {
1610   unsigned regno;
1611   unsigned regno2;
1612   rtx insn;
1613   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1614 
1615 
1616   for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1617     {
1618       if (aarch64_register_saved_on_entry (regno))
1619 	{
1620 	  rtx mem;
1621 	  mem = gen_mem_ref (DFmode,
1622 			     plus_constant (Pmode,
1623 					    base_rtx,
1624 					    start_offset));
1625 
1626 	  for (regno2 = regno + 1;
1627 	       regno2 <= V31_REGNUM
1628 		 && !aarch64_register_saved_on_entry (regno2);
1629 	       regno2++)
1630 	    {
1631 	      /* Empty loop.  */
1632 	    }
1633 	  if (regno2 <= V31_REGNUM &&
1634 	      aarch64_register_saved_on_entry (regno2))
1635 	    {
1636 	      rtx mem2;
1637 	      /* Next highest register to be saved.  */
1638 	      mem2 = gen_mem_ref (DFmode,
1639 				  plus_constant
1640 				  (Pmode,
1641 				   base_rtx,
1642 				   start_offset + increment));
1643 	      if (restore == false)
1644 		{
1645 		  insn = emit_insn
1646 		    ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1647 					mem2, gen_rtx_REG (DFmode, regno2)));
1648 
1649 		}
1650 	      else
1651 		{
1652 		  insn = emit_insn
1653 		    ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1654 				       gen_rtx_REG (DFmode, regno2), mem2));
1655 
1656 		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1657 		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1658 		}
1659 
1660 		  /* The first part of a frame-related parallel insn
1661 		     is always assumed to be relevant to the frame
1662 		     calculations; subsequent parts, are only
1663 		     frame-related if explicitly marked.  */
1664 	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1665 					    1)) = 1;
1666 	      regno = regno2;
1667 	      start_offset += increment * 2;
1668 	    }
1669 	  else
1670 	    {
1671 	      if (restore == false)
1672 		insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1673 	      else
1674 		{
1675 		  insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1676 		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1677 		}
1678 	      start_offset += increment;
1679 	    }
1680 	  RTX_FRAME_RELATED_P (insn) = 1;
1681 	}
1682     }
1683 
1684 }
1685 
1686 
1687 /* offset from the stack pointer of where the saves and
1688    restore's have to happen.  */
1689 static void
aarch64_save_or_restore_callee_save_registers(HOST_WIDE_INT offset,bool restore)1690 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1691 					    bool restore)
1692 {
1693   rtx insn;
1694   rtx base_rtx = stack_pointer_rtx;
1695   HOST_WIDE_INT start_offset = offset;
1696   HOST_WIDE_INT increment = UNITS_PER_WORD;
1697   rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1698   unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1699   unsigned regno;
1700   unsigned regno2;
1701 
1702   for (regno = R0_REGNUM; regno <= limit; regno++)
1703     {
1704       if (aarch64_register_saved_on_entry (regno))
1705 	{
1706 	  rtx mem;
1707 	  mem = gen_mem_ref (Pmode,
1708 			     plus_constant (Pmode,
1709 					    base_rtx,
1710 					    start_offset));
1711 
1712 	  for (regno2 = regno + 1;
1713 	       regno2 <= limit
1714 		 && !aarch64_register_saved_on_entry (regno2);
1715 	       regno2++)
1716 	    {
1717 	      /* Empty loop.  */
1718 	    }
1719 	  if (regno2 <= limit &&
1720 	      aarch64_register_saved_on_entry (regno2))
1721 	    {
1722 	      rtx mem2;
1723 	      /* Next highest register to be saved.  */
1724 	      mem2 = gen_mem_ref (Pmode,
1725 				  plus_constant
1726 				  (Pmode,
1727 				   base_rtx,
1728 				   start_offset + increment));
1729 	      if (restore == false)
1730 		{
1731 		  insn = emit_insn
1732 		    ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1733 					mem2, gen_rtx_REG (DImode, regno2)));
1734 
1735 		}
1736 	      else
1737 		{
1738 		  insn = emit_insn
1739 		    ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1740 				     gen_rtx_REG (DImode, regno2), mem2));
1741 
1742 		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1743 		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1744 		}
1745 
1746 		  /* The first part of a frame-related parallel insn
1747 		     is always assumed to be relevant to the frame
1748 		     calculations; subsequent parts, are only
1749 		     frame-related if explicitly marked.  */
1750 	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1751 					    1)) = 1;
1752 	      regno = regno2;
1753 	      start_offset += increment * 2;
1754 	    }
1755 	  else
1756 	    {
1757 	      if (restore == false)
1758 		insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1759 	      else
1760 		{
1761 		  insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1762 		  add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1763 		}
1764 	      start_offset += increment;
1765 	    }
1766 	  RTX_FRAME_RELATED_P (insn) = 1;
1767 	}
1768     }
1769 
1770   aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1771 
1772 }
1773 
1774 /* AArch64 stack frames generated by this compiler look like:
1775 
1776 	+-------------------------------+
1777 	|                               |
1778 	|  incoming stack arguments     |
1779 	|                               |
1780 	+-------------------------------+ <-- arg_pointer_rtx
1781 	|                               |
1782 	|  callee-allocated save area   |
1783 	|  for register varargs         |
1784 	|                               |
1785 	+-------------------------------+
1786 	|                               |
1787 	|  local variables              |
1788 	|                               |
1789 	+-------------------------------+ <-- frame_pointer_rtx
1790 	|                               |
1791 	|  callee-saved registers       |
1792 	|                               |
1793 	+-------------------------------+
1794 	|  LR'                          |
1795 	+-------------------------------+
1796 	|  FP'                          |
1797       P +-------------------------------+ <-- hard_frame_pointer_rtx
1798 	|  dynamic allocation           |
1799 	+-------------------------------+
1800 	|                               |
1801 	|  outgoing stack arguments     |
1802 	|                               |
1803 	+-------------------------------+ <-- stack_pointer_rtx
1804 
1805    Dynamic stack allocations such as alloca insert data at point P.
1806    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1807    hard_frame_pointer_rtx unchanged.  */
1808 
1809 /* Generate the prologue instructions for entry into a function.
1810    Establish the stack frame by decreasing the stack pointer with a
1811    properly calculated size and, if necessary, create a frame record
1812    filled with the values of LR and previous frame pointer.  The
1813    current FP is also set up is it is in use.  */
1814 
1815 void
aarch64_expand_prologue(void)1816 aarch64_expand_prologue (void)
1817 {
1818   /* sub sp, sp, #<frame_size>
1819      stp {fp, lr}, [sp, #<frame_size> - 16]
1820      add fp, sp, #<frame_size> - hardfp_offset
1821      stp {cs_reg}, [fp, #-16] etc.
1822 
1823      sub sp, sp, <final_adjustment_if_any>
1824   */
1825   HOST_WIDE_INT original_frame_size;	/* local variables + vararg save */
1826   HOST_WIDE_INT frame_size, offset;
1827   HOST_WIDE_INT fp_offset;		/* FP offset from SP */
1828   rtx insn;
1829 
1830   aarch64_layout_frame ();
1831   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1832   gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1833 	      && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1834   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1835 		+ crtl->outgoing_args_size);
1836   offset = frame_size = AARCH64_ROUND_UP (frame_size,
1837 					  STACK_BOUNDARY / BITS_PER_UNIT);
1838 
1839   if (flag_stack_usage_info)
1840     current_function_static_stack_size = frame_size;
1841 
1842   fp_offset = (offset
1843 	       - original_frame_size
1844 	       - cfun->machine->frame.saved_regs_size);
1845 
1846   /* Store pairs and load pairs have a range only -512 to 504.  */
1847   if (offset >= 512)
1848     {
1849       /* When the frame has a large size, an initial decrease is done on
1850 	 the stack pointer to jump over the callee-allocated save area for
1851 	 register varargs, the local variable area and/or the callee-saved
1852 	 register area.  This will allow the pre-index write-back
1853 	 store pair instructions to be used for setting up the stack frame
1854 	 efficiently.  */
1855       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1856       if (offset >= 512)
1857 	offset = cfun->machine->frame.saved_regs_size;
1858 
1859       frame_size -= (offset + crtl->outgoing_args_size);
1860       fp_offset = 0;
1861 
1862       if (frame_size >= 0x1000000)
1863 	{
1864 	  rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1865 	  emit_move_insn (op0, GEN_INT (-frame_size));
1866 	  emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
1867 	  aarch64_set_frame_expr (gen_rtx_SET
1868 				  (Pmode, stack_pointer_rtx,
1869 				   gen_rtx_PLUS (Pmode,
1870 						 stack_pointer_rtx,
1871 						 GEN_INT (-frame_size))));
1872 	}
1873       else if (frame_size > 0)
1874 	{
1875 	  if ((frame_size & 0xfff) != frame_size)
1876 	    {
1877 	      insn = emit_insn (gen_add2_insn
1878 				(stack_pointer_rtx,
1879 				 GEN_INT (-(frame_size
1880 					    & ~(HOST_WIDE_INT)0xfff))));
1881 	      RTX_FRAME_RELATED_P (insn) = 1;
1882 	    }
1883 	  if ((frame_size & 0xfff) != 0)
1884 	    {
1885 	      insn = emit_insn (gen_add2_insn
1886 				(stack_pointer_rtx,
1887 				 GEN_INT (-(frame_size
1888 					    & (HOST_WIDE_INT)0xfff))));
1889 	      RTX_FRAME_RELATED_P (insn) = 1;
1890 	    }
1891 	}
1892     }
1893   else
1894     frame_size = -1;
1895 
1896   if (offset > 0)
1897     {
1898       /* Save the frame pointer and lr if the frame pointer is needed
1899 	 first.  Make the frame pointer point to the location of the
1900 	 old frame pointer on the stack.  */
1901       if (frame_pointer_needed)
1902 	{
1903 	  rtx mem_fp, mem_lr;
1904 
1905 	  if (fp_offset)
1906 	    {
1907 	      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1908 					       GEN_INT (-offset)));
1909 	      RTX_FRAME_RELATED_P (insn) = 1;
1910 	      aarch64_set_frame_expr (gen_rtx_SET
1911 				      (Pmode, stack_pointer_rtx,
1912 				       gen_rtx_MINUS (Pmode,
1913 						      stack_pointer_rtx,
1914 						      GEN_INT (offset))));
1915 	      mem_fp = gen_frame_mem (DImode,
1916 				      plus_constant (Pmode,
1917 						     stack_pointer_rtx,
1918 						     fp_offset));
1919 	      mem_lr = gen_frame_mem (DImode,
1920 				      plus_constant (Pmode,
1921 						     stack_pointer_rtx,
1922 						     fp_offset
1923 						     + UNITS_PER_WORD));
1924 	      insn = emit_insn (gen_store_pairdi (mem_fp,
1925 						  hard_frame_pointer_rtx,
1926 						  mem_lr,
1927 						  gen_rtx_REG (DImode,
1928 							       LR_REGNUM)));
1929 	    }
1930 	  else
1931 	    {
1932 	      insn = emit_insn (gen_storewb_pairdi_di
1933 				(stack_pointer_rtx, stack_pointer_rtx,
1934 				 hard_frame_pointer_rtx,
1935 				 gen_rtx_REG (DImode, LR_REGNUM),
1936 				 GEN_INT (-offset),
1937 				 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
1938 	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1939 	    }
1940 
1941 	  /* The first part of a frame-related parallel insn is always
1942 	     assumed to be relevant to the frame calculations;
1943 	     subsequent parts, are only frame-related if explicitly
1944 	     marked.  */
1945 	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1946 	  RTX_FRAME_RELATED_P (insn) = 1;
1947 
1948 	  /* Set up frame pointer to point to the location of the
1949 	     previous frame pointer on the stack.  */
1950 	  insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
1951 					   stack_pointer_rtx,
1952 					   GEN_INT (fp_offset)));
1953 	  aarch64_set_frame_expr (gen_rtx_SET
1954 				  (Pmode, hard_frame_pointer_rtx,
1955 				   gen_rtx_PLUS (Pmode,
1956 						 stack_pointer_rtx,
1957 						 GEN_INT (fp_offset))));
1958 	  RTX_FRAME_RELATED_P (insn) = 1;
1959 	  insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
1960 					   hard_frame_pointer_rtx));
1961 	}
1962       else
1963 	{
1964 	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1965 					   GEN_INT (-offset)));
1966 	  RTX_FRAME_RELATED_P (insn) = 1;
1967 	}
1968 
1969       aarch64_save_or_restore_callee_save_registers
1970 	(fp_offset + cfun->machine->frame.hardfp_offset, 0);
1971     }
1972 
1973   /* when offset >= 512,
1974      sub sp, sp, #<outgoing_args_size> */
1975   if (frame_size > -1)
1976     {
1977       if (crtl->outgoing_args_size > 0)
1978 	{
1979 	  insn = emit_insn (gen_add2_insn
1980 			    (stack_pointer_rtx,
1981 			     GEN_INT (- crtl->outgoing_args_size)));
1982 	  RTX_FRAME_RELATED_P (insn) = 1;
1983 	}
1984     }
1985 }
1986 
1987 /* Generate the epilogue instructions for returning from a function.  */
1988 void
aarch64_expand_epilogue(bool for_sibcall)1989 aarch64_expand_epilogue (bool for_sibcall)
1990 {
1991   HOST_WIDE_INT original_frame_size, frame_size, offset;
1992   HOST_WIDE_INT fp_offset;
1993   rtx insn;
1994   rtx cfa_reg;
1995 
1996   aarch64_layout_frame ();
1997   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1998   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1999 		+ crtl->outgoing_args_size);
2000   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2001 					  STACK_BOUNDARY / BITS_PER_UNIT);
2002 
2003   fp_offset = (offset
2004 	       - original_frame_size
2005 	       - cfun->machine->frame.saved_regs_size);
2006 
2007   cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2008 
2009   /* Store pairs and load pairs have a range only -512 to 504.  */
2010   if (offset >= 512)
2011     {
2012       offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2013       if (offset >= 512)
2014 	offset = cfun->machine->frame.saved_regs_size;
2015 
2016       frame_size -= (offset + crtl->outgoing_args_size);
2017       fp_offset = 0;
2018       if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2019 	{
2020 	  insn = emit_insn (gen_add2_insn
2021 			    (stack_pointer_rtx,
2022 			     GEN_INT (crtl->outgoing_args_size)));
2023 	  RTX_FRAME_RELATED_P (insn) = 1;
2024 	}
2025     }
2026   else
2027     frame_size = -1;
2028 
2029   /* If there were outgoing arguments or we've done dynamic stack
2030      allocation, then restore the stack pointer from the frame
2031      pointer.  This is at most one insn and more efficient than using
2032      GCC's internal mechanism.  */
2033   if (frame_pointer_needed
2034       && (crtl->outgoing_args_size || cfun->calls_alloca))
2035     {
2036       insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2037 				       hard_frame_pointer_rtx,
2038 				       GEN_INT (- fp_offset)));
2039       RTX_FRAME_RELATED_P (insn) = 1;
2040       /* As SP is set to (FP - fp_offset), according to the rules in
2041 	 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2042 	 from the value of SP from now on.  */
2043       cfa_reg = stack_pointer_rtx;
2044     }
2045 
2046   aarch64_save_or_restore_callee_save_registers
2047     (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2048 
2049   /* Restore the frame pointer and lr if the frame pointer is needed.  */
2050   if (offset > 0)
2051     {
2052       if (frame_pointer_needed)
2053 	{
2054 	  rtx mem_fp, mem_lr;
2055 
2056 	  if (fp_offset)
2057 	    {
2058 	      mem_fp = gen_frame_mem (DImode,
2059 				      plus_constant (Pmode,
2060 						     stack_pointer_rtx,
2061 						     fp_offset));
2062 	      mem_lr = gen_frame_mem (DImode,
2063 				      plus_constant (Pmode,
2064 						     stack_pointer_rtx,
2065 						     fp_offset
2066 						     + UNITS_PER_WORD));
2067 	      insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2068 						 mem_fp,
2069 						 gen_rtx_REG (DImode,
2070 							      LR_REGNUM),
2071 						 mem_lr));
2072 	    }
2073 	  else
2074 	    {
2075 	      insn = emit_insn (gen_loadwb_pairdi_di
2076 				(stack_pointer_rtx,
2077 				 stack_pointer_rtx,
2078 				 hard_frame_pointer_rtx,
2079 				 gen_rtx_REG (DImode, LR_REGNUM),
2080 				 GEN_INT (offset),
2081 				 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2082 	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2083 	      add_reg_note (insn, REG_CFA_ADJUST_CFA,
2084 			    (gen_rtx_SET (Pmode, stack_pointer_rtx,
2085 					  plus_constant (Pmode, cfa_reg,
2086 							 offset))));
2087 	    }
2088 
2089 	  /* The first part of a frame-related parallel insn
2090 	     is always assumed to be relevant to the frame
2091 	     calculations; subsequent parts, are only
2092 	     frame-related if explicitly marked.  */
2093 	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2094 	  RTX_FRAME_RELATED_P (insn) = 1;
2095 	  add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2096 	  add_reg_note (insn, REG_CFA_RESTORE,
2097 			gen_rtx_REG (DImode, LR_REGNUM));
2098 
2099 	  if (fp_offset)
2100 	    {
2101 	      insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2102 					       GEN_INT (offset)));
2103 	      RTX_FRAME_RELATED_P (insn) = 1;
2104 	    }
2105 	}
2106       else
2107 	{
2108 	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2109 					   GEN_INT (offset)));
2110 	  RTX_FRAME_RELATED_P (insn) = 1;
2111 	}
2112     }
2113 
2114   /* Stack adjustment for exception handler.  */
2115   if (crtl->calls_eh_return)
2116     {
2117       /* We need to unwind the stack by the offset computed by
2118 	 EH_RETURN_STACKADJ_RTX.  However, at this point the CFA is
2119 	 based on SP.  Ideally we would update the SP and define the
2120 	 CFA along the lines of:
2121 
2122 	 SP = SP + EH_RETURN_STACKADJ_RTX
2123 	 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2124 
2125 	 However the dwarf emitter only understands a constant
2126 	 register offset.
2127 
2128 	 The solution choosen here is to use the otherwise unused IP0
2129 	 as a temporary register to hold the current SP value.  The
2130 	 CFA is described using IP0 then SP is modified.  */
2131 
2132       rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2133 
2134       insn = emit_move_insn (ip0, stack_pointer_rtx);
2135       add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2136       RTX_FRAME_RELATED_P (insn) = 1;
2137 
2138       emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2139 
2140       /* Ensure the assignment to IP0 does not get optimized away.  */
2141       emit_use (ip0);
2142     }
2143 
2144   if (frame_size > -1)
2145     {
2146       if (frame_size >= 0x1000000)
2147 	{
2148 	  rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2149 	  emit_move_insn (op0, GEN_INT (frame_size));
2150 	  emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2151 	  aarch64_set_frame_expr (gen_rtx_SET
2152 				  (Pmode, stack_pointer_rtx,
2153 				   gen_rtx_PLUS (Pmode,
2154 						 stack_pointer_rtx,
2155 						 GEN_INT (frame_size))));
2156 	}
2157       else if (frame_size > 0)
2158 	{
2159 	  if ((frame_size & 0xfff) != 0)
2160 	    {
2161 	      insn = emit_insn (gen_add2_insn
2162 				(stack_pointer_rtx,
2163 				 GEN_INT ((frame_size
2164 					   & (HOST_WIDE_INT) 0xfff))));
2165 	      RTX_FRAME_RELATED_P (insn) = 1;
2166 	    }
2167 	  if ((frame_size & 0xfff) != frame_size)
2168 	    {
2169 	      insn = emit_insn (gen_add2_insn
2170 				(stack_pointer_rtx,
2171 				 GEN_INT ((frame_size
2172 					   & ~ (HOST_WIDE_INT) 0xfff))));
2173 	      RTX_FRAME_RELATED_P (insn) = 1;
2174 	    }
2175 	}
2176 
2177       aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2178 					   gen_rtx_PLUS (Pmode,
2179 							 stack_pointer_rtx,
2180 							 GEN_INT (offset))));
2181     }
2182 
2183   emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2184   if (!for_sibcall)
2185     emit_jump_insn (ret_rtx);
2186 }
2187 
2188 /* Return the place to copy the exception unwinding return address to.
2189    This will probably be a stack slot, but could (in theory be the
2190    return register).  */
2191 rtx
aarch64_final_eh_return_addr(void)2192 aarch64_final_eh_return_addr (void)
2193 {
2194   HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2195   aarch64_layout_frame ();
2196   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2197   frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2198 		+ crtl->outgoing_args_size);
2199   offset = frame_size = AARCH64_ROUND_UP (frame_size,
2200 					  STACK_BOUNDARY / BITS_PER_UNIT);
2201   fp_offset = offset
2202     - original_frame_size
2203     - cfun->machine->frame.saved_regs_size;
2204 
2205   if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2206     return gen_rtx_REG (DImode, LR_REGNUM);
2207 
2208   /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
2209      result in a store to save LR introduced by builtin_eh_return () being
2210      incorrectly deleted because the alias is not detected.
2211      So in the calculation of the address to copy the exception unwinding
2212      return address to, we note 2 cases.
2213      If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2214      we return a SP-relative location since all the addresses are SP-relative
2215      in this case.  This prevents the store from being optimized away.
2216      If the fp_offset is not 0, then the addresses will be FP-relative and
2217      therefore we return a FP-relative location.  */
2218 
2219   if (frame_pointer_needed)
2220     {
2221       if (fp_offset)
2222         return gen_frame_mem (DImode,
2223 			      plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2224       else
2225         return gen_frame_mem (DImode,
2226 			      plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2227     }
2228 
2229   /* If FP is not needed, we calculate the location of LR, which would be
2230      at the top of the saved registers block.  */
2231 
2232   return gen_frame_mem (DImode,
2233 			plus_constant (Pmode,
2234 				       stack_pointer_rtx,
2235 				       fp_offset
2236 				       + cfun->machine->frame.saved_regs_size
2237 				       - 2 * UNITS_PER_WORD));
2238 }
2239 
2240 /* Output code to build up a constant in a register.  */
2241 static void
aarch64_build_constant(int regnum,HOST_WIDE_INT val)2242 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2243 {
2244   if (aarch64_bitmask_imm (val, DImode))
2245     emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2246   else
2247     {
2248       int i;
2249       int ncount = 0;
2250       int zcount = 0;
2251       HOST_WIDE_INT valp = val >> 16;
2252       HOST_WIDE_INT valm;
2253       HOST_WIDE_INT tval;
2254 
2255       for (i = 16; i < 64; i += 16)
2256 	{
2257 	  valm = (valp & 0xffff);
2258 
2259 	  if (valm != 0)
2260 	    ++ zcount;
2261 
2262 	  if (valm != 0xffff)
2263 	    ++ ncount;
2264 
2265 	  valp >>= 16;
2266 	}
2267 
2268       /* zcount contains the number of additional MOVK instructions
2269 	 required if the constant is built up with an initial MOVZ instruction,
2270 	 while ncount is the number of MOVK instructions required if starting
2271 	 with a MOVN instruction.  Choose the sequence that yields the fewest
2272 	 number of instructions, preferring MOVZ instructions when they are both
2273 	 the same.  */
2274       if (ncount < zcount)
2275 	{
2276 	  emit_move_insn (gen_rtx_REG (Pmode, regnum),
2277 			  GEN_INT ((~val) & 0xffff));
2278 	  tval = 0xffff;
2279 	}
2280       else
2281 	{
2282 	  emit_move_insn (gen_rtx_REG (Pmode, regnum),
2283 			  GEN_INT (val & 0xffff));
2284 	  tval = 0;
2285 	}
2286 
2287       val >>= 16;
2288 
2289       for (i = 16; i < 64; i += 16)
2290 	{
2291 	  if ((val & 0xffff) != tval)
2292 	    emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2293 				       GEN_INT (i), GEN_INT (val & 0xffff)));
2294 	  val >>= 16;
2295 	}
2296     }
2297 }
2298 
2299 static void
aarch64_add_constant(int regnum,int scratchreg,HOST_WIDE_INT delta)2300 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2301 {
2302   HOST_WIDE_INT mdelta = delta;
2303   rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2304   rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2305 
2306   if (mdelta < 0)
2307     mdelta = -mdelta;
2308 
2309   if (mdelta >= 4096 * 4096)
2310     {
2311       aarch64_build_constant (scratchreg, delta);
2312       emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2313     }
2314   else if (mdelta > 0)
2315     {
2316       if (mdelta >= 4096)
2317 	{
2318 	  emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2319 	  rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2320 	  if (delta < 0)
2321 	    emit_insn (gen_rtx_SET (Pmode, this_rtx,
2322 				    gen_rtx_MINUS (Pmode, this_rtx, shift)));
2323 	  else
2324 	    emit_insn (gen_rtx_SET (Pmode, this_rtx,
2325 				    gen_rtx_PLUS (Pmode, this_rtx, shift)));
2326 	}
2327       if (mdelta % 4096 != 0)
2328 	{
2329 	  scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2330 	  emit_insn (gen_rtx_SET (Pmode, this_rtx,
2331 				  gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2332 	}
2333     }
2334 }
2335 
2336 /* Output code to add DELTA to the first argument, and then jump
2337    to FUNCTION.  Used for C++ multiple inheritance.  */
2338 static void
aarch64_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)2339 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2340 			 HOST_WIDE_INT delta,
2341 			 HOST_WIDE_INT vcall_offset,
2342 			 tree function)
2343 {
2344   /* The this pointer is always in x0.  Note that this differs from
2345      Arm where the this pointer maybe bumped to r1 if r0 is required
2346      to return a pointer to an aggregate.  On AArch64 a result value
2347      pointer will be in x8.  */
2348   int this_regno = R0_REGNUM;
2349   rtx this_rtx, temp0, temp1, addr, insn, funexp;
2350 
2351   reload_completed = 1;
2352   emit_note (NOTE_INSN_PROLOGUE_END);
2353 
2354   if (vcall_offset == 0)
2355     aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2356   else
2357     {
2358       gcc_assert ((vcall_offset & 0x7) == 0);
2359 
2360       this_rtx = gen_rtx_REG (Pmode, this_regno);
2361       temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2362       temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2363 
2364       addr = this_rtx;
2365       if (delta != 0)
2366 	{
2367 	  if (delta >= -256 && delta < 256)
2368 	    addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2369 				       plus_constant (Pmode, this_rtx, delta));
2370 	  else
2371 	    aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2372 	}
2373 
2374       aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2375 
2376       if (vcall_offset >= -256 && vcall_offset < 32768)
2377 	  addr = plus_constant (Pmode, temp0, vcall_offset);
2378       else
2379 	{
2380 	  aarch64_build_constant (IP1_REGNUM, vcall_offset);
2381 	  addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2382 	}
2383 
2384       aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2385       emit_insn (gen_add2_insn (this_rtx, temp1));
2386     }
2387 
2388   /* Generate a tail call to the target function.  */
2389   if (!TREE_USED (function))
2390     {
2391       assemble_external (function);
2392       TREE_USED (function) = 1;
2393     }
2394   funexp = XEXP (DECL_RTL (function), 0);
2395   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2396   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2397   SIBLING_CALL_P (insn) = 1;
2398 
2399   insn = get_insns ();
2400   shorten_branches (insn);
2401   final_start_function (insn, file, 1);
2402   final (insn, file, 1);
2403   final_end_function ();
2404 
2405   /* Stop pretending to be a post-reload pass.  */
2406   reload_completed = 0;
2407 }
2408 
2409 static int
aarch64_tls_operand_p_1(rtx * x,void * data ATTRIBUTE_UNUSED)2410 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2411 {
2412   if (GET_CODE (*x) == SYMBOL_REF)
2413     return SYMBOL_REF_TLS_MODEL (*x) != 0;
2414 
2415   /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2416      TLS offsets, not real symbol references.  */
2417   if (GET_CODE (*x) == UNSPEC
2418       && XINT (*x, 1) == UNSPEC_TLS)
2419     return -1;
2420 
2421   return 0;
2422 }
2423 
2424 static bool
aarch64_tls_referenced_p(rtx x)2425 aarch64_tls_referenced_p (rtx x)
2426 {
2427   if (!TARGET_HAVE_TLS)
2428     return false;
2429 
2430   return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2431 }
2432 
2433 
2434 static int
aarch64_bitmasks_cmp(const void * i1,const void * i2)2435 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2436 {
2437   const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2438   const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2439 
2440   if (*imm1 < *imm2)
2441     return -1;
2442   if (*imm1 > *imm2)
2443     return +1;
2444   return 0;
2445 }
2446 
2447 
2448 static void
aarch64_build_bitmask_table(void)2449 aarch64_build_bitmask_table (void)
2450 {
2451   unsigned HOST_WIDE_INT mask, imm;
2452   unsigned int log_e, e, s, r;
2453   unsigned int nimms = 0;
2454 
2455   for (log_e = 1; log_e <= 6; log_e++)
2456     {
2457       e = 1 << log_e;
2458       if (e == 64)
2459 	mask = ~(HOST_WIDE_INT) 0;
2460       else
2461 	mask = ((HOST_WIDE_INT) 1 << e) - 1;
2462       for (s = 1; s < e; s++)
2463 	{
2464 	  for (r = 0; r < e; r++)
2465 	    {
2466 	      /* set s consecutive bits to 1 (s < 64) */
2467 	      imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2468 	      /* rotate right by r */
2469 	      if (r != 0)
2470 		imm = ((imm >> r) | (imm << (e - r))) & mask;
2471 	      /* replicate the constant depending on SIMD size */
2472 	      switch (log_e) {
2473 	      case 1: imm |= (imm <<  2);
2474 	      case 2: imm |= (imm <<  4);
2475 	      case 3: imm |= (imm <<  8);
2476 	      case 4: imm |= (imm << 16);
2477 	      case 5: imm |= (imm << 32);
2478 	      case 6:
2479 		break;
2480 	      default:
2481 		gcc_unreachable ();
2482 	      }
2483 	      gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2484 	      aarch64_bitmasks[nimms++] = imm;
2485 	    }
2486 	}
2487     }
2488 
2489   gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2490   qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2491 	 aarch64_bitmasks_cmp);
2492 }
2493 
2494 
2495 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2496    a left shift of 0 or 12 bits.  */
2497 bool
aarch64_uimm12_shift(HOST_WIDE_INT val)2498 aarch64_uimm12_shift (HOST_WIDE_INT val)
2499 {
2500   return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2501 	  || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2502 	  );
2503 }
2504 
2505 
2506 /* Return true if val is an immediate that can be loaded into a
2507    register by a MOVZ instruction.  */
2508 static bool
aarch64_movw_imm(HOST_WIDE_INT val,enum machine_mode mode)2509 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2510 {
2511   if (GET_MODE_SIZE (mode) > 4)
2512     {
2513       if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2514 	  || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2515 	return 1;
2516     }
2517   else
2518     {
2519       /* Ignore sign extension.  */
2520       val &= (HOST_WIDE_INT) 0xffffffff;
2521     }
2522   return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2523 	  || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2524 }
2525 
2526 
2527 /* Return true if val is a valid bitmask immediate.  */
2528 bool
aarch64_bitmask_imm(HOST_WIDE_INT val,enum machine_mode mode)2529 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2530 {
2531   if (GET_MODE_SIZE (mode) < 8)
2532     {
2533       /* Replicate bit pattern.  */
2534       val &= (HOST_WIDE_INT) 0xffffffff;
2535       val |= val << 32;
2536     }
2537   return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2538 		  sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2539 }
2540 
2541 
2542 /* Return true if val is an immediate that can be loaded into a
2543    register in a single instruction.  */
2544 bool
aarch64_move_imm(HOST_WIDE_INT val,enum machine_mode mode)2545 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2546 {
2547   if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2548     return 1;
2549   return aarch64_bitmask_imm (val, mode);
2550 }
2551 
2552 static bool
aarch64_cannot_force_const_mem(enum machine_mode mode ATTRIBUTE_UNUSED,rtx x)2553 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2554 {
2555   rtx base, offset;
2556   if (GET_CODE (x) == HIGH)
2557     return true;
2558 
2559   split_const (x, &base, &offset);
2560   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2561     return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2562 
2563   return aarch64_tls_referenced_p (x);
2564 }
2565 
2566 /* Return true if register REGNO is a valid index register.
2567    STRICT_P is true if REG_OK_STRICT is in effect.  */
2568 
2569 bool
aarch64_regno_ok_for_index_p(int regno,bool strict_p)2570 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2571 {
2572   if (!HARD_REGISTER_NUM_P (regno))
2573     {
2574       if (!strict_p)
2575 	return true;
2576 
2577       if (!reg_renumber)
2578 	return false;
2579 
2580       regno = reg_renumber[regno];
2581     }
2582   return GP_REGNUM_P (regno);
2583 }
2584 
2585 /* Return true if register REGNO is a valid base register for mode MODE.
2586    STRICT_P is true if REG_OK_STRICT is in effect.  */
2587 
2588 bool
aarch64_regno_ok_for_base_p(int regno,bool strict_p)2589 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2590 {
2591   if (!HARD_REGISTER_NUM_P (regno))
2592     {
2593       if (!strict_p)
2594 	return true;
2595 
2596       if (!reg_renumber)
2597 	return false;
2598 
2599       regno = reg_renumber[regno];
2600     }
2601 
2602   /* The fake registers will be eliminated to either the stack or
2603      hard frame pointer, both of which are usually valid base registers.
2604      Reload deals with the cases where the eliminated form isn't valid.  */
2605   return (GP_REGNUM_P (regno)
2606 	  || regno == SP_REGNUM
2607 	  || regno == FRAME_POINTER_REGNUM
2608 	  || regno == ARG_POINTER_REGNUM);
2609 }
2610 
2611 /* Return true if X is a valid base register for mode MODE.
2612    STRICT_P is true if REG_OK_STRICT is in effect.  */
2613 
2614 static bool
aarch64_base_register_rtx_p(rtx x,bool strict_p)2615 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2616 {
2617   if (!strict_p && GET_CODE (x) == SUBREG)
2618     x = SUBREG_REG (x);
2619 
2620   return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2621 }
2622 
2623 /* Return true if address offset is a valid index.  If it is, fill in INFO
2624    appropriately.  STRICT_P is true if REG_OK_STRICT is in effect.  */
2625 
2626 static bool
aarch64_classify_index(struct aarch64_address_info * info,rtx x,enum machine_mode mode,bool strict_p)2627 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2628 			enum machine_mode mode, bool strict_p)
2629 {
2630   enum aarch64_address_type type;
2631   rtx index;
2632   int shift;
2633 
2634   /* (reg:P) */
2635   if ((REG_P (x) || GET_CODE (x) == SUBREG)
2636       && GET_MODE (x) == Pmode)
2637     {
2638       type = ADDRESS_REG_REG;
2639       index = x;
2640       shift = 0;
2641     }
2642   /* (sign_extend:DI (reg:SI)) */
2643   else if ((GET_CODE (x) == SIGN_EXTEND
2644 	    || GET_CODE (x) == ZERO_EXTEND)
2645 	   && GET_MODE (x) == DImode
2646 	   && GET_MODE (XEXP (x, 0)) == SImode)
2647     {
2648       type = (GET_CODE (x) == SIGN_EXTEND)
2649 	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2650       index = XEXP (x, 0);
2651       shift = 0;
2652     }
2653   /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2654   else if (GET_CODE (x) == MULT
2655 	   && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2656 	       || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2657 	   && GET_MODE (XEXP (x, 0)) == DImode
2658 	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2659 	   && CONST_INT_P (XEXP (x, 1)))
2660     {
2661       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2662 	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2663       index = XEXP (XEXP (x, 0), 0);
2664       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2665     }
2666   /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2667   else if (GET_CODE (x) == ASHIFT
2668 	   && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2669 	       || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2670 	   && GET_MODE (XEXP (x, 0)) == DImode
2671 	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2672 	   && CONST_INT_P (XEXP (x, 1)))
2673     {
2674       type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2675 	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2676       index = XEXP (XEXP (x, 0), 0);
2677       shift = INTVAL (XEXP (x, 1));
2678     }
2679   /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2680   else if ((GET_CODE (x) == SIGN_EXTRACT
2681 	    || GET_CODE (x) == ZERO_EXTRACT)
2682 	   && GET_MODE (x) == DImode
2683 	   && GET_CODE (XEXP (x, 0)) == MULT
2684 	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2685 	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2686     {
2687       type = (GET_CODE (x) == SIGN_EXTRACT)
2688 	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2689       index = XEXP (XEXP (x, 0), 0);
2690       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2691       if (INTVAL (XEXP (x, 1)) != 32 + shift
2692 	  || INTVAL (XEXP (x, 2)) != 0)
2693 	shift = -1;
2694     }
2695   /* (and:DI (mult:DI (reg:DI) (const_int scale))
2696      (const_int 0xffffffff<<shift)) */
2697   else if (GET_CODE (x) == AND
2698 	   && GET_MODE (x) == DImode
2699 	   && GET_CODE (XEXP (x, 0)) == MULT
2700 	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2701 	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2702 	   && CONST_INT_P (XEXP (x, 1)))
2703     {
2704       type = ADDRESS_REG_UXTW;
2705       index = XEXP (XEXP (x, 0), 0);
2706       shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2707       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2708 	shift = -1;
2709     }
2710   /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2711   else if ((GET_CODE (x) == SIGN_EXTRACT
2712 	    || GET_CODE (x) == ZERO_EXTRACT)
2713 	   && GET_MODE (x) == DImode
2714 	   && GET_CODE (XEXP (x, 0)) == ASHIFT
2715 	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2716 	   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2717     {
2718       type = (GET_CODE (x) == SIGN_EXTRACT)
2719 	? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2720       index = XEXP (XEXP (x, 0), 0);
2721       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2722       if (INTVAL (XEXP (x, 1)) != 32 + shift
2723 	  || INTVAL (XEXP (x, 2)) != 0)
2724 	shift = -1;
2725     }
2726   /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2727      (const_int 0xffffffff<<shift)) */
2728   else if (GET_CODE (x) == AND
2729 	   && GET_MODE (x) == DImode
2730 	   && GET_CODE (XEXP (x, 0)) == ASHIFT
2731 	   && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2732 	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2733 	   && CONST_INT_P (XEXP (x, 1)))
2734     {
2735       type = ADDRESS_REG_UXTW;
2736       index = XEXP (XEXP (x, 0), 0);
2737       shift = INTVAL (XEXP (XEXP (x, 0), 1));
2738       if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2739 	shift = -1;
2740     }
2741   /* (mult:P (reg:P) (const_int scale)) */
2742   else if (GET_CODE (x) == MULT
2743 	   && GET_MODE (x) == Pmode
2744 	   && GET_MODE (XEXP (x, 0)) == Pmode
2745 	   && CONST_INT_P (XEXP (x, 1)))
2746     {
2747       type = ADDRESS_REG_REG;
2748       index = XEXP (x, 0);
2749       shift = exact_log2 (INTVAL (XEXP (x, 1)));
2750     }
2751   /* (ashift:P (reg:P) (const_int shift)) */
2752   else if (GET_CODE (x) == ASHIFT
2753 	   && GET_MODE (x) == Pmode
2754 	   && GET_MODE (XEXP (x, 0)) == Pmode
2755 	   && CONST_INT_P (XEXP (x, 1)))
2756     {
2757       type = ADDRESS_REG_REG;
2758       index = XEXP (x, 0);
2759       shift = INTVAL (XEXP (x, 1));
2760     }
2761   else
2762     return false;
2763 
2764   if (GET_CODE (index) == SUBREG)
2765     index = SUBREG_REG (index);
2766 
2767   if ((shift == 0 ||
2768        (shift > 0 && shift <= 3
2769 	&& (1 << shift) == GET_MODE_SIZE (mode)))
2770       && REG_P (index)
2771       && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2772     {
2773       info->type = type;
2774       info->offset = index;
2775       info->shift = shift;
2776       return true;
2777     }
2778 
2779   return false;
2780 }
2781 
2782 static inline bool
offset_7bit_signed_scaled_p(enum machine_mode mode,HOST_WIDE_INT offset)2783 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2784 {
2785   return (offset >= -64 * GET_MODE_SIZE (mode)
2786 	  && offset < 64 * GET_MODE_SIZE (mode)
2787 	  && offset % GET_MODE_SIZE (mode) == 0);
2788 }
2789 
2790 static inline bool
offset_9bit_signed_unscaled_p(enum machine_mode mode ATTRIBUTE_UNUSED,HOST_WIDE_INT offset)2791 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2792 			       HOST_WIDE_INT offset)
2793 {
2794   return offset >= -256 && offset < 256;
2795 }
2796 
2797 static inline bool
offset_12bit_unsigned_scaled_p(enum machine_mode mode,HOST_WIDE_INT offset)2798 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2799 {
2800   return (offset >= 0
2801 	  && offset < 4096 * GET_MODE_SIZE (mode)
2802 	  && offset % GET_MODE_SIZE (mode) == 0);
2803 }
2804 
2805 /* Return true if X is a valid address for machine mode MODE.  If it is,
2806    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
2807    effect.  OUTER_CODE is PARALLEL for a load/store pair.  */
2808 
2809 static bool
aarch64_classify_address(struct aarch64_address_info * info,rtx x,enum machine_mode mode,RTX_CODE outer_code,bool strict_p)2810 aarch64_classify_address (struct aarch64_address_info *info,
2811 			  rtx x, enum machine_mode mode,
2812 			  RTX_CODE outer_code, bool strict_p)
2813 {
2814   enum rtx_code code = GET_CODE (x);
2815   rtx op0, op1;
2816   bool allow_reg_index_p =
2817     outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2818 
2819   /* Don't support anything other than POST_INC or REG addressing for
2820      AdvSIMD.  */
2821   if (aarch64_vector_mode_p (mode)
2822       && (code != POST_INC && code != REG))
2823     return false;
2824 
2825   switch (code)
2826     {
2827     case REG:
2828     case SUBREG:
2829       info->type = ADDRESS_REG_IMM;
2830       info->base = x;
2831       info->offset = const0_rtx;
2832       return aarch64_base_register_rtx_p (x, strict_p);
2833 
2834     case PLUS:
2835       op0 = XEXP (x, 0);
2836       op1 = XEXP (x, 1);
2837       if (GET_MODE_SIZE (mode) != 0
2838 	  && CONST_INT_P (op1)
2839 	  && aarch64_base_register_rtx_p (op0, strict_p))
2840 	{
2841 	  HOST_WIDE_INT offset = INTVAL (op1);
2842 
2843 	  info->type = ADDRESS_REG_IMM;
2844 	  info->base = op0;
2845 	  info->offset = op1;
2846 
2847 	  /* TImode and TFmode values are allowed in both pairs of X
2848 	     registers and individual Q registers.  The available
2849 	     address modes are:
2850 	     X,X: 7-bit signed scaled offset
2851 	     Q:   9-bit signed offset
2852 	     We conservatively require an offset representable in either mode.
2853 	   */
2854 	  if (mode == TImode || mode == TFmode)
2855 	    return (offset_7bit_signed_scaled_p (mode, offset)
2856 		    && offset_9bit_signed_unscaled_p (mode, offset));
2857 
2858 	  if (outer_code == PARALLEL)
2859 	    return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2860 		    && offset_7bit_signed_scaled_p (mode, offset));
2861 	  else
2862 	    return (offset_9bit_signed_unscaled_p (mode, offset)
2863 		    || offset_12bit_unsigned_scaled_p (mode, offset));
2864 	}
2865 
2866       if (allow_reg_index_p)
2867 	{
2868 	  /* Look for base + (scaled/extended) index register.  */
2869 	  if (aarch64_base_register_rtx_p (op0, strict_p)
2870 	      && aarch64_classify_index (info, op1, mode, strict_p))
2871 	    {
2872 	      info->base = op0;
2873 	      return true;
2874 	    }
2875 	  if (aarch64_base_register_rtx_p (op1, strict_p)
2876 	      && aarch64_classify_index (info, op0, mode, strict_p))
2877 	    {
2878 	      info->base = op1;
2879 	      return true;
2880 	    }
2881 	}
2882 
2883       return false;
2884 
2885     case POST_INC:
2886     case POST_DEC:
2887     case PRE_INC:
2888     case PRE_DEC:
2889       info->type = ADDRESS_REG_WB;
2890       info->base = XEXP (x, 0);
2891       info->offset = NULL_RTX;
2892       return aarch64_base_register_rtx_p (info->base, strict_p);
2893 
2894     case POST_MODIFY:
2895     case PRE_MODIFY:
2896       info->type = ADDRESS_REG_WB;
2897       info->base = XEXP (x, 0);
2898       if (GET_CODE (XEXP (x, 1)) == PLUS
2899 	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
2900 	  && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
2901 	  && aarch64_base_register_rtx_p (info->base, strict_p))
2902 	{
2903 	  HOST_WIDE_INT offset;
2904 	  info->offset = XEXP (XEXP (x, 1), 1);
2905 	  offset = INTVAL (info->offset);
2906 
2907 	  /* TImode and TFmode values are allowed in both pairs of X
2908 	     registers and individual Q registers.  The available
2909 	     address modes are:
2910 	     X,X: 7-bit signed scaled offset
2911 	     Q:   9-bit signed offset
2912 	     We conservatively require an offset representable in either mode.
2913 	   */
2914 	  if (mode == TImode || mode == TFmode)
2915 	    return (offset_7bit_signed_scaled_p (mode, offset)
2916 		    && offset_9bit_signed_unscaled_p (mode, offset));
2917 
2918 	  if (outer_code == PARALLEL)
2919 	    return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2920 		    && offset_7bit_signed_scaled_p (mode, offset));
2921 	  else
2922 	    return offset_9bit_signed_unscaled_p (mode, offset);
2923 	}
2924       return false;
2925 
2926     case CONST:
2927     case SYMBOL_REF:
2928     case LABEL_REF:
2929       /* load literal: pc-relative constant pool entry.  Only supported
2930          for SI mode or larger.  */
2931       info->type = ADDRESS_SYMBOLIC;
2932       if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
2933 	{
2934 	  rtx sym, addend;
2935 
2936 	  split_const (x, &sym, &addend);
2937 	  return (GET_CODE (sym) == LABEL_REF
2938 		  || (GET_CODE (sym) == SYMBOL_REF
2939 		      && CONSTANT_POOL_ADDRESS_P (sym)));
2940 	}
2941       return false;
2942 
2943     case LO_SUM:
2944       info->type = ADDRESS_LO_SUM;
2945       info->base = XEXP (x, 0);
2946       info->offset = XEXP (x, 1);
2947       if (allow_reg_index_p
2948 	  && aarch64_base_register_rtx_p (info->base, strict_p))
2949 	{
2950 	  rtx sym, offs;
2951 	  split_const (info->offset, &sym, &offs);
2952 	  if (GET_CODE (sym) == SYMBOL_REF
2953 	      && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
2954 		  == SYMBOL_SMALL_ABSOLUTE))
2955 	    {
2956 	      /* The symbol and offset must be aligned to the access size.  */
2957 	      unsigned int align;
2958 	      unsigned int ref_size;
2959 
2960 	      if (CONSTANT_POOL_ADDRESS_P (sym))
2961 		align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
2962 	      else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
2963 		{
2964 		  tree exp = SYMBOL_REF_DECL (sym);
2965 		  align = TYPE_ALIGN (TREE_TYPE (exp));
2966 		  align = CONSTANT_ALIGNMENT (exp, align);
2967 		}
2968 	      else if (SYMBOL_REF_DECL (sym))
2969 		align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
2970 	      else
2971 		align = BITS_PER_UNIT;
2972 
2973 	      ref_size = GET_MODE_SIZE (mode);
2974 	      if (ref_size == 0)
2975 		ref_size = GET_MODE_SIZE (DImode);
2976 
2977 	      return ((INTVAL (offs) & (ref_size - 1)) == 0
2978 		      && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
2979 	    }
2980 	}
2981       return false;
2982 
2983     default:
2984       return false;
2985     }
2986 }
2987 
2988 bool
aarch64_symbolic_address_p(rtx x)2989 aarch64_symbolic_address_p (rtx x)
2990 {
2991   rtx offset;
2992 
2993   split_const (x, &x, &offset);
2994   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
2995 }
2996 
2997 /* Classify the base of symbolic expression X, given that X appears in
2998    context CONTEXT.  */
2999 static enum aarch64_symbol_type
aarch64_classify_symbolic_expression(rtx x,enum aarch64_symbol_context context)3000 aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
3001 {
3002   rtx offset;
3003   split_const (x, &x, &offset);
3004   return aarch64_classify_symbol (x, context);
3005 }
3006 
3007 
3008 /* Return TRUE if X is a legitimate address for accessing memory in
3009    mode MODE.  */
3010 static bool
aarch64_legitimate_address_hook_p(enum machine_mode mode,rtx x,bool strict_p)3011 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3012 {
3013   struct aarch64_address_info addr;
3014 
3015   return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3016 }
3017 
3018 /* Return TRUE if X is a legitimate address for accessing memory in
3019    mode MODE.  OUTER_CODE will be PARALLEL if this is a load/store
3020    pair operation.  */
3021 bool
aarch64_legitimate_address_p(enum machine_mode mode,rtx x,RTX_CODE outer_code,bool strict_p)3022 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3023 			   RTX_CODE outer_code, bool strict_p)
3024 {
3025   struct aarch64_address_info addr;
3026 
3027   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3028 }
3029 
3030 /* Return TRUE if rtx X is immediate constant 0.0 */
3031 bool
aarch64_float_const_zero_rtx_p(rtx x)3032 aarch64_float_const_zero_rtx_p (rtx x)
3033 {
3034   REAL_VALUE_TYPE r;
3035 
3036   if (GET_MODE (x) == VOIDmode)
3037     return false;
3038 
3039   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3040   if (REAL_VALUE_MINUS_ZERO (r))
3041     return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3042   return REAL_VALUES_EQUAL (r, dconst0);
3043 }
3044 
3045 /* Return the fixed registers used for condition codes.  */
3046 
3047 static bool
aarch64_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)3048 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3049 {
3050   *p1 = CC_REGNUM;
3051   *p2 = INVALID_REGNUM;
3052   return true;
3053 }
3054 
3055 enum machine_mode
aarch64_select_cc_mode(RTX_CODE code,rtx x,rtx y)3056 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3057 {
3058   /* All floating point compares return CCFP if it is an equality
3059      comparison, and CCFPE otherwise.  */
3060   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3061     {
3062       switch (code)
3063 	{
3064 	case EQ:
3065 	case NE:
3066 	case UNORDERED:
3067 	case ORDERED:
3068 	case UNLT:
3069 	case UNLE:
3070 	case UNGT:
3071 	case UNGE:
3072 	case UNEQ:
3073 	case LTGT:
3074 	  return CCFPmode;
3075 
3076 	case LT:
3077 	case LE:
3078 	case GT:
3079 	case GE:
3080 	  return CCFPEmode;
3081 
3082 	default:
3083 	  gcc_unreachable ();
3084 	}
3085     }
3086 
3087   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3088       && y == const0_rtx
3089       && (code == EQ || code == NE || code == LT || code == GE)
3090       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND))
3091     return CC_NZmode;
3092 
3093   /* A compare with a shifted operand.  Because of canonicalization,
3094      the comparison will have to be swapped when we emit the assembly
3095      code.  */
3096   if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3097       && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3098       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3099 	  || GET_CODE (x) == LSHIFTRT
3100 	  || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3101     return CC_SWPmode;
3102 
3103   /* A compare of a mode narrower than SI mode against zero can be done
3104      by extending the value in the comparison.  */
3105   if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3106       && y == const0_rtx)
3107     /* Only use sign-extension if we really need it.  */
3108     return ((code == GT || code == GE || code == LE || code == LT)
3109 	    ? CC_SESWPmode : CC_ZESWPmode);
3110 
3111   /* For everything else, return CCmode.  */
3112   return CCmode;
3113 }
3114 
3115 static unsigned
aarch64_get_condition_code(rtx x)3116 aarch64_get_condition_code (rtx x)
3117 {
3118   enum machine_mode mode = GET_MODE (XEXP (x, 0));
3119   enum rtx_code comp_code = GET_CODE (x);
3120 
3121   if (GET_MODE_CLASS (mode) != MODE_CC)
3122     mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3123 
3124   switch (mode)
3125     {
3126     case CCFPmode:
3127     case CCFPEmode:
3128       switch (comp_code)
3129 	{
3130 	case GE: return AARCH64_GE;
3131 	case GT: return AARCH64_GT;
3132 	case LE: return AARCH64_LS;
3133 	case LT: return AARCH64_MI;
3134 	case NE: return AARCH64_NE;
3135 	case EQ: return AARCH64_EQ;
3136 	case ORDERED: return AARCH64_VC;
3137 	case UNORDERED: return AARCH64_VS;
3138 	case UNLT: return AARCH64_LT;
3139 	case UNLE: return AARCH64_LE;
3140 	case UNGT: return AARCH64_HI;
3141 	case UNGE: return AARCH64_PL;
3142 	default: gcc_unreachable ();
3143 	}
3144       break;
3145 
3146     case CCmode:
3147       switch (comp_code)
3148 	{
3149 	case NE: return AARCH64_NE;
3150 	case EQ: return AARCH64_EQ;
3151 	case GE: return AARCH64_GE;
3152 	case GT: return AARCH64_GT;
3153 	case LE: return AARCH64_LE;
3154 	case LT: return AARCH64_LT;
3155 	case GEU: return AARCH64_CS;
3156 	case GTU: return AARCH64_HI;
3157 	case LEU: return AARCH64_LS;
3158 	case LTU: return AARCH64_CC;
3159 	default: gcc_unreachable ();
3160 	}
3161       break;
3162 
3163     case CC_SWPmode:
3164     case CC_ZESWPmode:
3165     case CC_SESWPmode:
3166       switch (comp_code)
3167 	{
3168 	case NE: return AARCH64_NE;
3169 	case EQ: return AARCH64_EQ;
3170 	case GE: return AARCH64_LE;
3171 	case GT: return AARCH64_LT;
3172 	case LE: return AARCH64_GE;
3173 	case LT: return AARCH64_GT;
3174 	case GEU: return AARCH64_LS;
3175 	case GTU: return AARCH64_CC;
3176 	case LEU: return AARCH64_CS;
3177 	case LTU: return AARCH64_HI;
3178 	default: gcc_unreachable ();
3179 	}
3180       break;
3181 
3182     case CC_NZmode:
3183       switch (comp_code)
3184 	{
3185 	case NE: return AARCH64_NE;
3186 	case EQ: return AARCH64_EQ;
3187 	case GE: return AARCH64_PL;
3188 	case LT: return AARCH64_MI;
3189 	default: gcc_unreachable ();
3190 	}
3191       break;
3192 
3193     default:
3194       gcc_unreachable ();
3195       break;
3196     }
3197 }
3198 
3199 static unsigned
bit_count(unsigned HOST_WIDE_INT value)3200 bit_count (unsigned HOST_WIDE_INT value)
3201 {
3202   unsigned count = 0;
3203 
3204   while (value)
3205     {
3206       count++;
3207       value &= value - 1;
3208     }
3209 
3210   return count;
3211 }
3212 
3213 void
aarch64_print_operand(FILE * f,rtx x,char code)3214 aarch64_print_operand (FILE *f, rtx x, char code)
3215 {
3216   switch (code)
3217     {
3218     case 'e':
3219       /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w.  */
3220       {
3221 	int n;
3222 
3223 	if (GET_CODE (x) != CONST_INT
3224 	    || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3225 	  {
3226 	    output_operand_lossage ("invalid operand for '%%%c'", code);
3227 	    return;
3228 	  }
3229 
3230 	switch (n)
3231 	  {
3232 	  case 3:
3233 	    fputc ('b', f);
3234 	    break;
3235 	  case 4:
3236 	    fputc ('h', f);
3237 	    break;
3238 	  case 5:
3239 	    fputc ('w', f);
3240 	    break;
3241 	  default:
3242 	    output_operand_lossage ("invalid operand for '%%%c'", code);
3243 	    return;
3244 	  }
3245       }
3246       break;
3247 
3248     case 'p':
3249       {
3250 	int n;
3251 
3252 	/* Print N such that 2^N == X.  */
3253 	if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3254 	  {
3255 	    output_operand_lossage ("invalid operand for '%%%c'", code);
3256 	    return;
3257 	  }
3258 
3259 	asm_fprintf (f, "%d", n);
3260       }
3261       break;
3262 
3263     case 'P':
3264       /* Print the number of non-zero bits in X (a const_int).  */
3265       if (GET_CODE (x) != CONST_INT)
3266 	{
3267 	  output_operand_lossage ("invalid operand for '%%%c'", code);
3268 	  return;
3269 	}
3270 
3271       asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3272       break;
3273 
3274     case 'H':
3275       /* Print the higher numbered register of a pair (TImode) of regs.  */
3276       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3277 	{
3278 	  output_operand_lossage ("invalid operand for '%%%c'", code);
3279 	  return;
3280 	}
3281 
3282       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3283       break;
3284 
3285     case 'Q':
3286       /* Print the least significant register of a pair (TImode) of regs.  */
3287       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3288 	{
3289 	  output_operand_lossage ("invalid operand for '%%%c'", code);
3290 	  return;
3291 	}
3292       asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)]);
3293       break;
3294 
3295     case 'R':
3296       /* Print the most significant register of a pair (TImode) of regs.  */
3297       if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3298 	{
3299 	  output_operand_lossage ("invalid operand for '%%%c'", code);
3300 	  return;
3301 	}
3302       asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
3303       break;
3304 
3305     case 'm':
3306       /* Print a condition (eq, ne, etc).  */
3307 
3308       /* CONST_TRUE_RTX means always -- that's the default.  */
3309       if (x == const_true_rtx)
3310 	return;
3311 
3312       if (!COMPARISON_P (x))
3313 	{
3314 	  output_operand_lossage ("invalid operand for '%%%c'", code);
3315 	  return;
3316 	}
3317 
3318       fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3319       break;
3320 
3321     case 'M':
3322       /* Print the inverse of a condition (eq <-> ne, etc).  */
3323 
3324       /* CONST_TRUE_RTX means never -- that's the default.  */
3325       if (x == const_true_rtx)
3326 	{
3327 	  fputs ("nv", f);
3328 	  return;
3329 	}
3330 
3331       if (!COMPARISON_P (x))
3332 	{
3333 	  output_operand_lossage ("invalid operand for '%%%c'", code);
3334 	  return;
3335 	}
3336 
3337       fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3338 				  (aarch64_get_condition_code (x))], f);
3339       break;
3340 
3341     case 'b':
3342     case 'h':
3343     case 's':
3344     case 'd':
3345     case 'q':
3346       /* Print a scalar FP/SIMD register name.  */
3347       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3348 	{
3349 	  output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3350 	  return;
3351 	}
3352       asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM);
3353       break;
3354 
3355     case 'S':
3356     case 'T':
3357     case 'U':
3358     case 'V':
3359       /* Print the first FP/SIMD register name in a list.  */
3360       if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3361 	{
3362 	  output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3363 	  return;
3364 	}
3365       asm_fprintf (f, "%sv%d", REGISTER_PREFIX,
3366 			       REGNO (x) - V0_REGNUM + (code - 'S'));
3367       break;
3368 
3369     case 'X':
3370       /* Print integer constant in hex.  */
3371       if (GET_CODE (x) != CONST_INT)
3372 	{
3373 	  output_operand_lossage ("invalid operand for '%%%c'", code);
3374 	  return;
3375 	}
3376       asm_fprintf (f, "0x%x", UINTVAL (x));
3377       break;
3378 
3379     case 'w':
3380     case 'x':
3381       /* Print a general register name or the zero register (32-bit or
3382          64-bit).  */
3383       if (x == const0_rtx
3384 	  || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3385 	{
3386 	  asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
3387 	  break;
3388 	}
3389 
3390       if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3391 	{
3392 	  asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code,
3393 		       REGNO (x) - R0_REGNUM);
3394 	  break;
3395 	}
3396 
3397       if (REG_P (x) && REGNO (x) == SP_REGNUM)
3398 	{
3399 	  asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : "");
3400 	  break;
3401 	}
3402 
3403       /* Fall through */
3404 
3405     case 0:
3406       /* Print a normal operand, if it's a general register, then we
3407 	 assume DImode.  */
3408       if (x == NULL)
3409 	{
3410 	  output_operand_lossage ("missing operand");
3411 	  return;
3412 	}
3413 
3414       switch (GET_CODE (x))
3415 	{
3416 	case REG:
3417 	  asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3418 	  break;
3419 
3420 	case MEM:
3421 	  aarch64_memory_reference_mode = GET_MODE (x);
3422 	  output_address (XEXP (x, 0));
3423 	  break;
3424 
3425 	case LABEL_REF:
3426 	case SYMBOL_REF:
3427 	  output_addr_const (asm_out_file, x);
3428 	  break;
3429 
3430 	case CONST_INT:
3431 	  asm_fprintf (f, "%wd", INTVAL (x));
3432 	  break;
3433 
3434 	case CONST_VECTOR:
3435 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3436 	    {
3437 	      gcc_assert (aarch64_const_vec_all_same_int_p (x,
3438 							    HOST_WIDE_INT_MIN,
3439 							    HOST_WIDE_INT_MAX));
3440 	      asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3441 	    }
3442 	  else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3443 	    {
3444 	      fputc ('0', f);
3445 	    }
3446 	  else
3447 	    gcc_unreachable ();
3448 	  break;
3449 
3450 	case CONST_DOUBLE:
3451 	  /* CONST_DOUBLE can represent a double-width integer.
3452 	     In this case, the mode of x is VOIDmode.  */
3453 	  if (GET_MODE (x) == VOIDmode)
3454 	    ; /* Do Nothing.  */
3455 	  else if (aarch64_float_const_zero_rtx_p (x))
3456 	    {
3457 	      fputc ('0', f);
3458 	      break;
3459 	    }
3460 	  else if (aarch64_float_const_representable_p (x))
3461 	    {
3462 #define buf_size 20
3463 	      char float_buf[buf_size] = {'\0'};
3464 	      REAL_VALUE_TYPE r;
3465 	      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3466 	      real_to_decimal_for_mode (float_buf, &r,
3467 					buf_size, buf_size,
3468 					1, GET_MODE (x));
3469 	      asm_fprintf (asm_out_file, "%s", float_buf);
3470 	      break;
3471 #undef buf_size
3472 	    }
3473 	  output_operand_lossage ("invalid constant");
3474 	  return;
3475 	default:
3476 	  output_operand_lossage ("invalid operand");
3477 	  return;
3478 	}
3479       break;
3480 
3481     case 'A':
3482       if (GET_CODE (x) == HIGH)
3483 	x = XEXP (x, 0);
3484 
3485       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3486 	{
3487 	case SYMBOL_SMALL_GOT:
3488 	  asm_fprintf (asm_out_file, ":got:");
3489 	  break;
3490 
3491 	case SYMBOL_SMALL_TLSGD:
3492 	  asm_fprintf (asm_out_file, ":tlsgd:");
3493 	  break;
3494 
3495 	case SYMBOL_SMALL_TLSDESC:
3496 	  asm_fprintf (asm_out_file, ":tlsdesc:");
3497 	  break;
3498 
3499 	case SYMBOL_SMALL_GOTTPREL:
3500 	  asm_fprintf (asm_out_file, ":gottprel:");
3501 	  break;
3502 
3503 	case SYMBOL_SMALL_TPREL:
3504 	  asm_fprintf (asm_out_file, ":tprel:");
3505 	  break;
3506 
3507 	default:
3508 	  break;
3509 	}
3510       output_addr_const (asm_out_file, x);
3511       break;
3512 
3513     case 'L':
3514       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3515 	{
3516 	case SYMBOL_SMALL_GOT:
3517 	  asm_fprintf (asm_out_file, ":lo12:");
3518 	  break;
3519 
3520 	case SYMBOL_SMALL_TLSGD:
3521 	  asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3522 	  break;
3523 
3524 	case SYMBOL_SMALL_TLSDESC:
3525 	  asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3526 	  break;
3527 
3528 	case SYMBOL_SMALL_GOTTPREL:
3529 	  asm_fprintf (asm_out_file, ":gottprel_lo12:");
3530 	  break;
3531 
3532 	case SYMBOL_SMALL_TPREL:
3533 	  asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3534 	  break;
3535 
3536 	default:
3537 	  break;
3538 	}
3539       output_addr_const (asm_out_file, x);
3540       break;
3541 
3542     case 'G':
3543 
3544       switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3545 	{
3546 	case SYMBOL_SMALL_TPREL:
3547 	  asm_fprintf (asm_out_file, ":tprel_hi12:");
3548 	  break;
3549 	default:
3550 	  break;
3551 	}
3552       output_addr_const (asm_out_file, x);
3553       break;
3554 
3555     default:
3556       output_operand_lossage ("invalid operand prefix '%%%c'", code);
3557       return;
3558     }
3559 }
3560 
3561 void
aarch64_print_operand_address(FILE * f,rtx x)3562 aarch64_print_operand_address (FILE *f, rtx x)
3563 {
3564   struct aarch64_address_info addr;
3565 
3566   if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3567 			     MEM, true))
3568     switch (addr.type)
3569       {
3570       case ADDRESS_REG_IMM:
3571 	if (addr.offset == const0_rtx)
3572 	  asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3573 	else
3574 	  asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3575 		       INTVAL (addr.offset));
3576 	return;
3577 
3578       case ADDRESS_REG_REG:
3579 	if (addr.shift == 0)
3580 	  asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3581 		       reg_names [REGNO (addr.offset)]);
3582 	else
3583 	  asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3584 		       reg_names [REGNO (addr.offset)], addr.shift);
3585 	return;
3586 
3587       case ADDRESS_REG_UXTW:
3588 	if (addr.shift == 0)
3589 	  asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3590 		       REGNO (addr.offset) - R0_REGNUM);
3591 	else
3592 	  asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3593 		       REGNO (addr.offset) - R0_REGNUM, addr.shift);
3594 	return;
3595 
3596       case ADDRESS_REG_SXTW:
3597 	if (addr.shift == 0)
3598 	  asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3599 		       REGNO (addr.offset) - R0_REGNUM);
3600 	else
3601 	  asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3602 		       REGNO (addr.offset) - R0_REGNUM, addr.shift);
3603 	return;
3604 
3605       case ADDRESS_REG_WB:
3606 	switch (GET_CODE (x))
3607 	  {
3608 	  case PRE_INC:
3609 	    asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3610 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
3611 	    return;
3612 	  case POST_INC:
3613 	    asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3614 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
3615 	    return;
3616 	  case PRE_DEC:
3617 	    asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3618 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
3619 	    return;
3620 	  case POST_DEC:
3621 	    asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3622 			 GET_MODE_SIZE (aarch64_memory_reference_mode));
3623 	    return;
3624 	  case PRE_MODIFY:
3625 	    asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3626 			 INTVAL (addr.offset));
3627 	    return;
3628 	  case POST_MODIFY:
3629 	    asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3630 			 INTVAL (addr.offset));
3631 	    return;
3632 	  default:
3633 	    break;
3634 	  }
3635 	break;
3636 
3637       case ADDRESS_LO_SUM:
3638 	asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3639 	output_addr_const (f, addr.offset);
3640 	asm_fprintf (f, "]");
3641 	return;
3642 
3643       case ADDRESS_SYMBOLIC:
3644 	break;
3645       }
3646 
3647   output_addr_const (f, x);
3648 }
3649 
3650 void
aarch64_function_profiler(FILE * f ATTRIBUTE_UNUSED,int labelno ATTRIBUTE_UNUSED)3651 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3652 			   int labelno ATTRIBUTE_UNUSED)
3653 {
3654   sorry ("function profiling");
3655 }
3656 
3657 bool
aarch64_label_mentioned_p(rtx x)3658 aarch64_label_mentioned_p (rtx x)
3659 {
3660   const char *fmt;
3661   int i;
3662 
3663   if (GET_CODE (x) == LABEL_REF)
3664     return true;
3665 
3666   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3667      referencing instruction, but they are constant offsets, not
3668      symbols.  */
3669   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3670     return false;
3671 
3672   fmt = GET_RTX_FORMAT (GET_CODE (x));
3673   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3674     {
3675       if (fmt[i] == 'E')
3676 	{
3677 	  int j;
3678 
3679 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3680 	    if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3681 	      return 1;
3682 	}
3683       else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3684 	return 1;
3685     }
3686 
3687   return 0;
3688 }
3689 
3690 /* Implement REGNO_REG_CLASS.  */
3691 
3692 enum reg_class
aarch64_regno_regclass(unsigned regno)3693 aarch64_regno_regclass (unsigned regno)
3694 {
3695   if (GP_REGNUM_P (regno))
3696     return CORE_REGS;
3697 
3698   if (regno == SP_REGNUM)
3699     return STACK_REG;
3700 
3701   if (regno == FRAME_POINTER_REGNUM
3702       || regno == ARG_POINTER_REGNUM)
3703     return CORE_REGS;
3704 
3705   if (FP_REGNUM_P (regno))
3706     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
3707 
3708   return NO_REGS;
3709 }
3710 
3711 /* Try a machine-dependent way of reloading an illegitimate address
3712    operand.  If we find one, push the reload and return the new rtx.  */
3713 
3714 rtx
aarch64_legitimize_reload_address(rtx * x_p,enum machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)3715 aarch64_legitimize_reload_address (rtx *x_p,
3716 				   enum machine_mode mode,
3717 				   int opnum, int type,
3718 				   int ind_levels ATTRIBUTE_UNUSED)
3719 {
3720   rtx x = *x_p;
3721 
3722   /* Do not allow mem (plus (reg, const)) if vector mode.  */
3723   if (aarch64_vector_mode_p (mode)
3724       && GET_CODE (x) == PLUS
3725       && REG_P (XEXP (x, 0))
3726       && CONST_INT_P (XEXP (x, 1)))
3727     {
3728       rtx orig_rtx = x;
3729       x = copy_rtx (x);
3730       push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3731 		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3732 		   opnum, (enum reload_type) type);
3733       return x;
3734     }
3735 
3736   /* We must recognize output that we have already generated ourselves.  */
3737   if (GET_CODE (x) == PLUS
3738       && GET_CODE (XEXP (x, 0)) == PLUS
3739       && REG_P (XEXP (XEXP (x, 0), 0))
3740       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3741       && CONST_INT_P (XEXP (x, 1)))
3742     {
3743       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3744 		   BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3745 		   opnum, (enum reload_type) type);
3746       return x;
3747     }
3748 
3749   /* We wish to handle large displacements off a base register by splitting
3750      the addend across an add and the mem insn.  This can cut the number of
3751      extra insns needed from 3 to 1.  It is only useful for load/store of a
3752      single register with 12 bit offset field.  */
3753   if (GET_CODE (x) == PLUS
3754       && REG_P (XEXP (x, 0))
3755       && CONST_INT_P (XEXP (x, 1))
3756       && HARD_REGISTER_P (XEXP (x, 0))
3757       && mode != TImode
3758       && mode != TFmode
3759       && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3760     {
3761       HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3762       HOST_WIDE_INT low = val & 0xfff;
3763       HOST_WIDE_INT high = val - low;
3764       HOST_WIDE_INT offs;
3765       rtx cst;
3766 
3767       /* Reload non-zero BLKmode offsets.  This is because we cannot ascertain
3768 	 BLKmode alignment.  */
3769       if (GET_MODE_SIZE (mode) == 0)
3770 	return NULL_RTX;
3771 
3772       offs = low % GET_MODE_SIZE (mode);
3773 
3774       /* Align misaligned offset by adjusting high part to compensate.  */
3775       if (offs != 0)
3776 	{
3777 	  if (aarch64_uimm12_shift (high + offs))
3778 	    {
3779 	      /* Align down.  */
3780 	      low = low - offs;
3781 	      high = high + offs;
3782 	    }
3783 	  else
3784 	    {
3785 	      /* Align up.  */
3786 	      offs = GET_MODE_SIZE (mode) - offs;
3787 	      low = low + offs;
3788 	      high = high + (low & 0x1000) - offs;
3789 	      low &= 0xfff;
3790 	    }
3791 	}
3792 
3793       /* Check for overflow.  */
3794       if (high + low != val)
3795 	return NULL_RTX;
3796 
3797       cst = GEN_INT (high);
3798       if (!aarch64_uimm12_shift (high))
3799 	cst = force_const_mem (Pmode, cst);
3800 
3801       /* Reload high part into base reg, leaving the low part
3802 	 in the mem instruction.  */
3803       x = gen_rtx_PLUS (Pmode,
3804 			gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3805 			GEN_INT (low));
3806 
3807       push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3808 		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3809 		   opnum, (enum reload_type) type);
3810       return x;
3811     }
3812 
3813   return NULL_RTX;
3814 }
3815 
3816 
3817 static reg_class_t
aarch64_secondary_reload(bool in_p ATTRIBUTE_UNUSED,rtx x,reg_class_t rclass,enum machine_mode mode,secondary_reload_info * sri)3818 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3819 			  reg_class_t rclass,
3820 			  enum machine_mode mode,
3821 			  secondary_reload_info *sri)
3822 {
3823   /* Address expressions of the form PLUS (SP, large_offset) need two
3824      scratch registers, one for the constant, and one for holding a
3825      copy of SP, since SP cannot be used on the RHS of an add-reg
3826      instruction.  */
3827   if (mode == DImode
3828       && GET_CODE (x) == PLUS
3829       && XEXP (x, 0) == stack_pointer_rtx
3830       && CONST_INT_P (XEXP (x, 1))
3831       && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3832     {
3833       sri->icode = CODE_FOR_reload_sp_immediate;
3834       return NO_REGS;
3835     }
3836 
3837   /* Without the TARGET_SIMD instructions we cannot move a Q register
3838      to a Q register directly.  We need a scratch.  */
3839   if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3840       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3841       && reg_class_subset_p (rclass, FP_REGS))
3842     {
3843       if (mode == TFmode)
3844         sri->icode = CODE_FOR_aarch64_reload_movtf;
3845       else if (mode == TImode)
3846         sri->icode = CODE_FOR_aarch64_reload_movti;
3847       return NO_REGS;
3848     }
3849 
3850   /* A TFmode or TImode memory access should be handled via an FP_REGS
3851      because AArch64 has richer addressing modes for LDR/STR instructions
3852      than LDP/STP instructions.  */
3853   if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3854       && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3855     return FP_REGS;
3856 
3857   if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3858       return CORE_REGS;
3859 
3860   return NO_REGS;
3861 }
3862 
3863 static bool
aarch64_can_eliminate(const int from,const int to)3864 aarch64_can_eliminate (const int from, const int to)
3865 {
3866   /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3867      HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
3868 
3869   if (frame_pointer_needed)
3870     {
3871       if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3872 	return true;
3873       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3874 	return false;
3875       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3876 	  && !cfun->calls_alloca)
3877 	return true;
3878       if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3879 	return true;
3880     return false;
3881     }
3882   else
3883     {
3884       /* If we decided that we didn't need a leaf frame pointer but then used
3885 	 LR in the function, then we'll want a frame pointer after all, so
3886 	 prevent this elimination to ensure a frame pointer is used.
3887 
3888 	 NOTE: the original value of flag_omit_frame_pointer gets trashed
3889 	 IFF flag_omit_leaf_frame_pointer is true, so we check the value
3890 	 of faked_omit_frame_pointer here (which is true when we always
3891 	 wish to keep non-leaf frame pointers but only wish to keep leaf frame
3892 	 pointers when LR is clobbered).  */
3893       if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3894 	  && df_regs_ever_live_p (LR_REGNUM)
3895 	  && faked_omit_frame_pointer)
3896 	return false;
3897     }
3898 
3899   return true;
3900 }
3901 
3902 HOST_WIDE_INT
aarch64_initial_elimination_offset(unsigned from,unsigned to)3903 aarch64_initial_elimination_offset (unsigned from, unsigned to)
3904 {
3905   HOST_WIDE_INT frame_size;
3906   HOST_WIDE_INT offset;
3907 
3908   aarch64_layout_frame ();
3909   frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
3910 		+ crtl->outgoing_args_size
3911 		+ cfun->machine->saved_varargs_size);
3912 
3913    frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
3914    offset = frame_size;
3915 
3916    if (to == HARD_FRAME_POINTER_REGNUM)
3917      {
3918        if (from == ARG_POINTER_REGNUM)
3919 	 return offset - crtl->outgoing_args_size;
3920 
3921        if (from == FRAME_POINTER_REGNUM)
3922 	 return cfun->machine->frame.saved_regs_size;
3923      }
3924 
3925    if (to == STACK_POINTER_REGNUM)
3926      {
3927        if (from == FRAME_POINTER_REGNUM)
3928          {
3929            HOST_WIDE_INT elim = crtl->outgoing_args_size
3930                               + cfun->machine->frame.saved_regs_size
3931                               - cfun->machine->frame.fp_lr_offset;
3932            elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
3933            return elim;
3934          }
3935      }
3936 
3937    return offset;
3938 }
3939 
3940 
3941 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
3942    previous frame.  */
3943 
3944 rtx
aarch64_return_addr(int count,rtx frame ATTRIBUTE_UNUSED)3945 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
3946 {
3947   if (count != 0)
3948     return const0_rtx;
3949   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
3950 }
3951 
3952 
3953 static void
aarch64_asm_trampoline_template(FILE * f)3954 aarch64_asm_trampoline_template (FILE *f)
3955 {
3956   asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
3957   asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
3958   asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
3959   assemble_aligned_integer (4, const0_rtx);
3960   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3961   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3962 }
3963 
3964 unsigned
aarch64_trampoline_size(void)3965 aarch64_trampoline_size (void)
3966 {
3967   return 32;  /* 3 insns + padding + 2 dwords.  */
3968 }
3969 
3970 static void
aarch64_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)3971 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3972 {
3973   rtx fnaddr, mem, a_tramp;
3974 
3975   /* Don't need to copy the trailing D-words, we fill those in below.  */
3976   emit_block_move (m_tramp, assemble_trampoline_template (),
3977 		   GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
3978   mem = adjust_address (m_tramp, DImode, 16);
3979   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3980   emit_move_insn (mem, fnaddr);
3981 
3982   mem = adjust_address (m_tramp, DImode, 24);
3983   emit_move_insn (mem, chain_value);
3984 
3985   /* XXX We should really define a "clear_cache" pattern and use
3986      gen_clear_cache().  */
3987   a_tramp = XEXP (m_tramp, 0);
3988   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3989 		     LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3990 		     plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3991 }
3992 
3993 static unsigned char
aarch64_class_max_nregs(reg_class_t regclass,enum machine_mode mode)3994 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
3995 {
3996   switch (regclass)
3997     {
3998     case CORE_REGS:
3999     case POINTER_REGS:
4000     case GENERAL_REGS:
4001     case ALL_REGS:
4002     case FP_REGS:
4003     case FP_LO_REGS:
4004       return
4005 	aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4006  				       (GET_MODE_SIZE (mode) + 7) / 8;
4007     case STACK_REG:
4008       return 1;
4009 
4010     case NO_REGS:
4011       return 0;
4012 
4013     default:
4014       break;
4015     }
4016   gcc_unreachable ();
4017 }
4018 
4019 static reg_class_t
aarch64_preferred_reload_class(rtx x ATTRIBUTE_UNUSED,reg_class_t regclass)4020 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4021 {
4022   return ((regclass == POINTER_REGS || regclass == STACK_REG)
4023 	  ? GENERAL_REGS : regclass);
4024 }
4025 
4026 void
aarch64_asm_output_labelref(FILE * f,const char * name)4027 aarch64_asm_output_labelref (FILE* f, const char *name)
4028 {
4029   asm_fprintf (f, "%U%s", name);
4030 }
4031 
4032 static void
aarch64_elf_asm_constructor(rtx symbol,int priority)4033 aarch64_elf_asm_constructor (rtx symbol, int priority)
4034 {
4035   if (priority == DEFAULT_INIT_PRIORITY)
4036     default_ctor_section_asm_out_constructor (symbol, priority);
4037   else
4038     {
4039       section *s;
4040       char buf[18];
4041       snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4042       s = get_section (buf, SECTION_WRITE, NULL);
4043       switch_to_section (s);
4044       assemble_align (POINTER_SIZE);
4045       fputs ("\t.dword\t", asm_out_file);
4046       output_addr_const (asm_out_file, symbol);
4047       fputc ('\n', asm_out_file);
4048     }
4049 }
4050 
4051 static void
aarch64_elf_asm_destructor(rtx symbol,int priority)4052 aarch64_elf_asm_destructor (rtx symbol, int priority)
4053 {
4054   if (priority == DEFAULT_INIT_PRIORITY)
4055     default_dtor_section_asm_out_destructor (symbol, priority);
4056   else
4057     {
4058       section *s;
4059       char buf[18];
4060       snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4061       s = get_section (buf, SECTION_WRITE, NULL);
4062       switch_to_section (s);
4063       assemble_align (POINTER_SIZE);
4064       fputs ("\t.dword\t", asm_out_file);
4065       output_addr_const (asm_out_file, symbol);
4066       fputc ('\n', asm_out_file);
4067     }
4068 }
4069 
4070 const char*
aarch64_output_casesi(rtx * operands)4071 aarch64_output_casesi (rtx *operands)
4072 {
4073   char buf[100];
4074   char label[100];
4075   rtx diff_vec = PATTERN (next_real_insn (operands[2]));
4076   int index;
4077   static const char *const patterns[4][2] =
4078   {
4079     {
4080       "ldrb\t%w3, [%0,%w1,uxtw]",
4081       "add\t%3, %4, %w3, sxtb #2"
4082     },
4083     {
4084       "ldrh\t%w3, [%0,%w1,uxtw #1]",
4085       "add\t%3, %4, %w3, sxth #2"
4086     },
4087     {
4088       "ldr\t%w3, [%0,%w1,uxtw #2]",
4089       "add\t%3, %4, %w3, sxtw #2"
4090     },
4091     /* We assume that DImode is only generated when not optimizing and
4092        that we don't really need 64-bit address offsets.  That would
4093        imply an object file with 8GB of code in a single function!  */
4094     {
4095       "ldr\t%w3, [%0,%w1,uxtw #2]",
4096       "add\t%3, %4, %w3, sxtw #2"
4097     }
4098   };
4099 
4100   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4101 
4102   index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4103 
4104   gcc_assert (index >= 0 && index <= 3);
4105 
4106   /* Need to implement table size reduction, by chaning the code below.  */
4107   output_asm_insn (patterns[index][0], operands);
4108   ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4109   snprintf (buf, sizeof (buf),
4110 	    "adr\t%%4, %s", targetm.strip_name_encoding (label));
4111   output_asm_insn (buf, operands);
4112   output_asm_insn (patterns[index][1], operands);
4113   output_asm_insn ("br\t%3", operands);
4114   assemble_label (asm_out_file, label);
4115   return "";
4116 }
4117 
4118 
4119 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4120    masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4121    operator.  */
4122 
4123 int
aarch64_uxt_size(int shift,HOST_WIDE_INT mask)4124 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4125 {
4126   if (shift >= 0 && shift <= 3)
4127     {
4128       int size;
4129       for (size = 8; size <= 32; size *= 2)
4130 	{
4131 	  HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4132 	  if (mask == bits << shift)
4133 	    return size;
4134 	}
4135     }
4136   return 0;
4137 }
4138 
4139 static bool
aarch64_use_blocks_for_constant_p(enum machine_mode mode ATTRIBUTE_UNUSED,const_rtx x ATTRIBUTE_UNUSED)4140 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4141 				   const_rtx x ATTRIBUTE_UNUSED)
4142 {
4143   /* We can't use blocks for constants when we're using a per-function
4144      constant pool.  */
4145   return false;
4146 }
4147 
4148 static section *
aarch64_select_rtx_section(enum machine_mode mode ATTRIBUTE_UNUSED,rtx x ATTRIBUTE_UNUSED,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)4149 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4150 			    rtx x ATTRIBUTE_UNUSED,
4151 			    unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4152 {
4153   /* Force all constant pool entries into the current function section.  */
4154   return function_section (current_function_decl);
4155 }
4156 
4157 
4158 /* Costs.  */
4159 
4160 /* Helper function for rtx cost calculation.  Strip a shift expression
4161    from X.  Returns the inner operand if successful, or the original
4162    expression on failure.  */
4163 static rtx
aarch64_strip_shift(rtx x)4164 aarch64_strip_shift (rtx x)
4165 {
4166   rtx op = x;
4167 
4168   if ((GET_CODE (op) == ASHIFT
4169        || GET_CODE (op) == ASHIFTRT
4170        || GET_CODE (op) == LSHIFTRT)
4171       && CONST_INT_P (XEXP (op, 1)))
4172     return XEXP (op, 0);
4173 
4174   if (GET_CODE (op) == MULT
4175       && CONST_INT_P (XEXP (op, 1))
4176       && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4177     return XEXP (op, 0);
4178 
4179   return x;
4180 }
4181 
4182 /* Helper function for rtx cost calculation.  Strip a shift or extend
4183    expression from X.  Returns the inner operand if successful, or the
4184    original expression on failure.  We deal with a number of possible
4185    canonicalization variations here.  */
4186 static rtx
aarch64_strip_shift_or_extend(rtx x)4187 aarch64_strip_shift_or_extend (rtx x)
4188 {
4189   rtx op = x;
4190 
4191   /* Zero and sign extraction of a widened value.  */
4192   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4193       && XEXP (op, 2) == const0_rtx
4194       && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4195 					 XEXP (op, 1)))
4196     return XEXP (XEXP (op, 0), 0);
4197 
4198   /* It can also be represented (for zero-extend) as an AND with an
4199      immediate.  */
4200   if (GET_CODE (op) == AND
4201       && GET_CODE (XEXP (op, 0)) == MULT
4202       && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4203       && CONST_INT_P (XEXP (op, 1))
4204       && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4205 			   INTVAL (XEXP (op, 1))) != 0)
4206     return XEXP (XEXP (op, 0), 0);
4207 
4208   /* Now handle extended register, as this may also have an optional
4209      left shift by 1..4.  */
4210   if (GET_CODE (op) == ASHIFT
4211       && CONST_INT_P (XEXP (op, 1))
4212       && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4213     op = XEXP (op, 0);
4214 
4215   if (GET_CODE (op) == ZERO_EXTEND
4216       || GET_CODE (op) == SIGN_EXTEND)
4217     op = XEXP (op, 0);
4218 
4219   if (op != x)
4220     return op;
4221 
4222   return aarch64_strip_shift (x);
4223 }
4224 
4225 /* Calculate the cost of calculating X, storing it in *COST.  Result
4226    is true if the total cost of the operation has now been calculated.  */
4227 static bool
aarch64_rtx_costs(rtx x,int code,int outer ATTRIBUTE_UNUSED,int param ATTRIBUTE_UNUSED,int * cost,bool speed)4228 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4229 		   int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4230 {
4231   rtx op0, op1;
4232   const struct cpu_rtx_cost_table *extra_cost
4233     = aarch64_tune_params->insn_extra_cost;
4234 
4235   switch (code)
4236     {
4237     case SET:
4238       op0 = SET_DEST (x);
4239       op1 = SET_SRC (x);
4240 
4241       switch (GET_CODE (op0))
4242 	{
4243 	case MEM:
4244 	  if (speed)
4245 	    *cost += extra_cost->memory_store;
4246 
4247 	  if (op1 != const0_rtx)
4248 	    *cost += rtx_cost (op1, SET, 1, speed);
4249 	  return true;
4250 
4251 	case SUBREG:
4252 	  if (! REG_P (SUBREG_REG (op0)))
4253 	    *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4254 	  /* Fall through.  */
4255 	case REG:
4256 	  /* Cost is just the cost of the RHS of the set.  */
4257 	  *cost += rtx_cost (op1, SET, 1, true);
4258 	  return true;
4259 
4260 	case ZERO_EXTRACT:  /* Bit-field insertion.  */
4261 	case SIGN_EXTRACT:
4262 	  /* Strip any redundant widening of the RHS to meet the width of
4263 	     the target.  */
4264 	  if (GET_CODE (op1) == SUBREG)
4265 	    op1 = SUBREG_REG (op1);
4266 	  if ((GET_CODE (op1) == ZERO_EXTEND
4267 	       || GET_CODE (op1) == SIGN_EXTEND)
4268 	      && GET_CODE (XEXP (op0, 1)) == CONST_INT
4269 	      && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4270 		  >= INTVAL (XEXP (op0, 1))))
4271 	    op1 = XEXP (op1, 0);
4272 	  *cost += rtx_cost (op1, SET, 1, speed);
4273 	  return true;
4274 
4275 	default:
4276 	  break;
4277 	}
4278       return false;
4279 
4280     case MEM:
4281       if (speed)
4282 	*cost += extra_cost->memory_load;
4283 
4284       return true;
4285 
4286     case NEG:
4287       op0 = CONST0_RTX (GET_MODE (x));
4288       op1 = XEXP (x, 0);
4289       goto cost_minus;
4290 
4291     case COMPARE:
4292       op0 = XEXP (x, 0);
4293       op1 = XEXP (x, 1);
4294 
4295       if (op1 == const0_rtx
4296 	  && GET_CODE (op0) == AND)
4297 	{
4298 	  x = op0;
4299 	  goto cost_logic;
4300 	}
4301 
4302       /* Comparisons can work if the order is swapped.
4303 	 Canonicalization puts the more complex operation first, but
4304 	 we want it in op1.  */
4305       if (! (REG_P (op0)
4306 	     || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4307 	{
4308 	  op0 = XEXP (x, 1);
4309 	  op1 = XEXP (x, 0);
4310 	}
4311       goto cost_minus;
4312 
4313     case MINUS:
4314       op0 = XEXP (x, 0);
4315       op1 = XEXP (x, 1);
4316 
4317     cost_minus:
4318       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4319 	  || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4320 	      && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4321 	{
4322 	  if (op0 != const0_rtx)
4323 	    *cost += rtx_cost (op0, MINUS, 0, speed);
4324 
4325 	  if (CONST_INT_P (op1))
4326 	    {
4327 	      if (!aarch64_uimm12_shift (INTVAL (op1)))
4328 		*cost += rtx_cost (op1, MINUS, 1, speed);
4329 	    }
4330 	  else
4331 	    {
4332 	      op1 = aarch64_strip_shift_or_extend (op1);
4333 	      *cost += rtx_cost (op1, MINUS, 1, speed);
4334 	    }
4335 	  return true;
4336 	}
4337 
4338       return false;
4339 
4340     case PLUS:
4341       op0 = XEXP (x, 0);
4342       op1 = XEXP (x, 1);
4343 
4344       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4345 	{
4346 	  if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4347 	    {
4348 	      *cost += rtx_cost (op0, PLUS, 0, speed);
4349 	    }
4350 	  else
4351 	    {
4352 	      rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4353 
4354 	      if (new_op0 == op0
4355 		  && GET_CODE (op0) == MULT)
4356 		{
4357 		  if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4358 		       && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4359 		      || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4360 			  && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4361 		    {
4362 		      *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4363 					  speed)
4364 				+ rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4365 					    speed)
4366 				+ rtx_cost (op1, PLUS, 1, speed));
4367 		      if (speed)
4368 			*cost += extra_cost->int_multiply_extend_add;
4369 		      return true;
4370 		    }
4371 		  *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4372 			    + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4373 			    + rtx_cost (op1, PLUS, 1, speed));
4374 
4375 		  if (speed)
4376 		    *cost += extra_cost->int_multiply_add;
4377 		}
4378 
4379 	      *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4380 			+ rtx_cost (op1, PLUS, 1, speed));
4381 	    }
4382 	  return true;
4383 	}
4384 
4385       return false;
4386 
4387     case IOR:
4388     case XOR:
4389     case AND:
4390     cost_logic:
4391       op0 = XEXP (x, 0);
4392       op1 = XEXP (x, 1);
4393 
4394       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4395 	{
4396 	  if (CONST_INT_P (op1)
4397 	      && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4398 	    {
4399 	      *cost += rtx_cost (op0, AND, 0, speed);
4400 	    }
4401 	  else
4402 	    {
4403 	      if (GET_CODE (op0) == NOT)
4404 		op0 = XEXP (op0, 0);
4405 	      op0 = aarch64_strip_shift (op0);
4406 	      *cost += (rtx_cost (op0, AND, 0, speed)
4407 			+ rtx_cost (op1, AND, 1, speed));
4408 	    }
4409 	  return true;
4410 	}
4411       return false;
4412 
4413     case ZERO_EXTEND:
4414       if ((GET_MODE (x) == DImode
4415 	   && GET_MODE (XEXP (x, 0)) == SImode)
4416 	  || GET_CODE (XEXP (x, 0)) == MEM)
4417 	{
4418 	  *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4419 	  return true;
4420 	}
4421       return false;
4422 
4423     case SIGN_EXTEND:
4424       if (GET_CODE (XEXP (x, 0)) == MEM)
4425 	{
4426 	  *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4427 	  return true;
4428 	}
4429       return false;
4430 
4431     case ROTATE:
4432       if (!CONST_INT_P (XEXP (x, 1)))
4433 	*cost += COSTS_N_INSNS (2);
4434       /* Fall through.  */
4435     case ROTATERT:
4436     case LSHIFTRT:
4437     case ASHIFT:
4438     case ASHIFTRT:
4439 
4440       /* Shifting by a register often takes an extra cycle.  */
4441       if (speed && !CONST_INT_P (XEXP (x, 1)))
4442 	*cost += extra_cost->register_shift;
4443 
4444       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4445       return true;
4446 
4447     case HIGH:
4448       if (!CONSTANT_P (XEXP (x, 0)))
4449 	*cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4450       return true;
4451 
4452     case LO_SUM:
4453       if (!CONSTANT_P (XEXP (x, 1)))
4454 	*cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4455       *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4456       return true;
4457 
4458     case ZERO_EXTRACT:
4459     case SIGN_EXTRACT:
4460       *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4461       return true;
4462 
4463     case MULT:
4464       op0 = XEXP (x, 0);
4465       op1 = XEXP (x, 1);
4466 
4467       *cost = COSTS_N_INSNS (1);
4468       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4469 	{
4470 	  if (CONST_INT_P (op1)
4471 	      && exact_log2 (INTVAL (op1)) > 0)
4472 	    {
4473 	      *cost += rtx_cost (op0, ASHIFT, 0, speed);
4474 	      return true;
4475 	    }
4476 
4477 	  if ((GET_CODE (op0) == ZERO_EXTEND
4478 	       && GET_CODE (op1) == ZERO_EXTEND)
4479 	      || (GET_CODE (op0) == SIGN_EXTEND
4480 		  && GET_CODE (op1) == SIGN_EXTEND))
4481 	    {
4482 	      *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4483 			+ rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4484 	      if (speed)
4485 		*cost += extra_cost->int_multiply_extend;
4486 	      return true;
4487 	    }
4488 
4489 	  if (speed)
4490 	    *cost += extra_cost->int_multiply;
4491 	}
4492       else if (speed)
4493 	{
4494 	  if (GET_MODE (x) == DFmode)
4495 	    *cost += extra_cost->double_multiply;
4496 	  else if (GET_MODE (x) == SFmode)
4497 	    *cost += extra_cost->float_multiply;
4498 	}
4499 
4500       return false;  /* All arguments need to be in registers.  */
4501 
4502     case MOD:
4503     case UMOD:
4504       *cost = COSTS_N_INSNS (2);
4505       if (speed)
4506 	{
4507 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4508 	    *cost += (extra_cost->int_multiply_add
4509 		      + extra_cost->int_divide);
4510 	  else if (GET_MODE (x) == DFmode)
4511 	    *cost += (extra_cost->double_multiply
4512 		      + extra_cost->double_divide);
4513 	  else if (GET_MODE (x) == SFmode)
4514 	    *cost += (extra_cost->float_multiply
4515 		      + extra_cost->float_divide);
4516 	}
4517       return false;  /* All arguments need to be in registers.  */
4518 
4519     case DIV:
4520     case UDIV:
4521       *cost = COSTS_N_INSNS (1);
4522       if (speed)
4523 	{
4524 	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4525 	    *cost += extra_cost->int_divide;
4526 	  else if (GET_MODE (x) == DFmode)
4527 	    *cost += extra_cost->double_divide;
4528 	  else if (GET_MODE (x) == SFmode)
4529 	    *cost += extra_cost->float_divide;
4530 	}
4531       return false;  /* All arguments need to be in registers.  */
4532 
4533     default:
4534       break;
4535     }
4536   return false;
4537 }
4538 
4539 static int
aarch64_address_cost(rtx x ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)4540 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4541 		  enum machine_mode mode ATTRIBUTE_UNUSED,
4542 		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4543 {
4544   enum rtx_code c  = GET_CODE (x);
4545   const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4546 
4547   if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4548     return addr_cost->pre_modify;
4549 
4550   if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4551     return addr_cost->post_modify;
4552 
4553   if (c == PLUS)
4554     {
4555       if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4556 	return addr_cost->imm_offset;
4557       else if (GET_CODE (XEXP (x, 0)) == MULT
4558 	       || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4559 	       || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4560 	return addr_cost->register_extend;
4561 
4562       return addr_cost->register_offset;
4563     }
4564   else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4565     return addr_cost->imm_offset;
4566 
4567   return 0;
4568 }
4569 
4570 static int
aarch64_register_move_cost(enum machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)4571 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4572 			    reg_class_t from, reg_class_t to)
4573 {
4574   const struct cpu_regmove_cost *regmove_cost
4575     = aarch64_tune_params->regmove_cost;
4576 
4577   if (from == GENERAL_REGS && to == GENERAL_REGS)
4578     return regmove_cost->GP2GP;
4579   else if (from == GENERAL_REGS)
4580     return regmove_cost->GP2FP;
4581   else if (to == GENERAL_REGS)
4582     return regmove_cost->FP2GP;
4583 
4584   /* When AdvSIMD instructions are disabled it is not possible to move
4585      a 128-bit value directly between Q registers.  This is handled in
4586      secondary reload.  A general register is used as a scratch to move
4587      the upper DI value and the lower DI value is moved directly,
4588      hence the cost is the sum of three moves. */
4589 
4590   if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4591     return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4592 
4593   return regmove_cost->FP2FP;
4594 }
4595 
4596 static int
aarch64_memory_move_cost(enum machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass ATTRIBUTE_UNUSED,bool in ATTRIBUTE_UNUSED)4597 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4598 			  reg_class_t rclass ATTRIBUTE_UNUSED,
4599 			  bool in ATTRIBUTE_UNUSED)
4600 {
4601   return aarch64_tune_params->memmov_cost;
4602 }
4603 
4604 static void initialize_aarch64_code_model (void);
4605 
4606 /* Parse the architecture extension string.  */
4607 
4608 static void
aarch64_parse_extension(char * str)4609 aarch64_parse_extension (char *str)
4610 {
4611   /* The extension string is parsed left to right.  */
4612   const struct aarch64_option_extension *opt = NULL;
4613 
4614   /* Flag to say whether we are adding or removing an extension.  */
4615   int adding_ext = -1;
4616 
4617   while (str != NULL && *str != 0)
4618     {
4619       char *ext;
4620       size_t len;
4621 
4622       str++;
4623       ext = strchr (str, '+');
4624 
4625       if (ext != NULL)
4626 	len = ext - str;
4627       else
4628 	len = strlen (str);
4629 
4630       if (len >= 2 && strncmp (str, "no", 2) == 0)
4631 	{
4632 	  adding_ext = 0;
4633 	  len -= 2;
4634 	  str += 2;
4635 	}
4636       else if (len > 0)
4637 	adding_ext = 1;
4638 
4639       if (len == 0)
4640 	{
4641 	  error ("missing feature modifier after %qs", "+no");
4642 	  return;
4643 	}
4644 
4645       /* Scan over the extensions table trying to find an exact match.  */
4646       for (opt = all_extensions; opt->name != NULL; opt++)
4647 	{
4648 	  if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4649 	    {
4650 	      /* Add or remove the extension.  */
4651 	      if (adding_ext)
4652 		aarch64_isa_flags |= opt->flags_on;
4653 	      else
4654 		aarch64_isa_flags &= ~(opt->flags_off);
4655 	      break;
4656 	    }
4657 	}
4658 
4659       if (opt->name == NULL)
4660 	{
4661 	  /* Extension not found in list.  */
4662 	  error ("unknown feature modifier %qs", str);
4663 	  return;
4664 	}
4665 
4666       str = ext;
4667     };
4668 
4669   return;
4670 }
4671 
4672 /* Parse the ARCH string.  */
4673 
4674 static void
aarch64_parse_arch(void)4675 aarch64_parse_arch (void)
4676 {
4677   char *ext;
4678   const struct processor *arch;
4679   char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4680   size_t len;
4681 
4682   strcpy (str, aarch64_arch_string);
4683 
4684   ext = strchr (str, '+');
4685 
4686   if (ext != NULL)
4687     len = ext - str;
4688   else
4689     len = strlen (str);
4690 
4691   if (len == 0)
4692     {
4693       error ("missing arch name in -march=%qs", str);
4694       return;
4695     }
4696 
4697   /* Loop through the list of supported ARCHs to find a match.  */
4698   for (arch = all_architectures; arch->name != NULL; arch++)
4699     {
4700       if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4701 	{
4702 	  selected_arch = arch;
4703 	  aarch64_isa_flags = selected_arch->flags;
4704 	  selected_cpu = &all_cores[selected_arch->core];
4705 
4706 	  if (ext != NULL)
4707 	    {
4708 	      /* ARCH string contains at least one extension.  */
4709 	      aarch64_parse_extension (ext);
4710 	    }
4711 
4712 	  return;
4713 	}
4714     }
4715 
4716   /* ARCH name not found in list.  */
4717   error ("unknown value %qs for -march", str);
4718   return;
4719 }
4720 
4721 /* Parse the CPU string.  */
4722 
4723 static void
aarch64_parse_cpu(void)4724 aarch64_parse_cpu (void)
4725 {
4726   char *ext;
4727   const struct processor *cpu;
4728   char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4729   size_t len;
4730 
4731   strcpy (str, aarch64_cpu_string);
4732 
4733   ext = strchr (str, '+');
4734 
4735   if (ext != NULL)
4736     len = ext - str;
4737   else
4738     len = strlen (str);
4739 
4740   if (len == 0)
4741     {
4742       error ("missing cpu name in -mcpu=%qs", str);
4743       return;
4744     }
4745 
4746   /* Loop through the list of supported CPUs to find a match.  */
4747   for (cpu = all_cores; cpu->name != NULL; cpu++)
4748     {
4749       if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4750 	{
4751 	  selected_cpu = cpu;
4752 	  aarch64_isa_flags = selected_cpu->flags;
4753 
4754 	  if (ext != NULL)
4755 	    {
4756 	      /* CPU string contains at least one extension.  */
4757 	      aarch64_parse_extension (ext);
4758 	    }
4759 
4760 	  return;
4761 	}
4762     }
4763 
4764   /* CPU name not found in list.  */
4765   error ("unknown value %qs for -mcpu", str);
4766   return;
4767 }
4768 
4769 /* Parse the TUNE string.  */
4770 
4771 static void
aarch64_parse_tune(void)4772 aarch64_parse_tune (void)
4773 {
4774   const struct processor *cpu;
4775   char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4776   strcpy (str, aarch64_tune_string);
4777 
4778   /* Loop through the list of supported CPUs to find a match.  */
4779   for (cpu = all_cores; cpu->name != NULL; cpu++)
4780     {
4781       if (strcmp (cpu->name, str) == 0)
4782 	{
4783 	  selected_tune = cpu;
4784 	  return;
4785 	}
4786     }
4787 
4788   /* CPU name not found in list.  */
4789   error ("unknown value %qs for -mtune", str);
4790   return;
4791 }
4792 
4793 
4794 /* Implement TARGET_OPTION_OVERRIDE.  */
4795 
4796 static void
aarch64_override_options(void)4797 aarch64_override_options (void)
4798 {
4799   /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4800      otherwise march remains undefined.  mtune can be used with either march or
4801      mcpu.  */
4802 
4803   if (aarch64_arch_string)
4804     {
4805       aarch64_parse_arch ();
4806       aarch64_cpu_string = NULL;
4807     }
4808 
4809   if (aarch64_cpu_string)
4810     {
4811       aarch64_parse_cpu ();
4812       selected_arch = NULL;
4813     }
4814 
4815   if (aarch64_tune_string)
4816     {
4817       aarch64_parse_tune ();
4818     }
4819 
4820   initialize_aarch64_code_model ();
4821 
4822   aarch64_build_bitmask_table ();
4823 
4824   /* This target defaults to strict volatile bitfields.  */
4825   if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4826     flag_strict_volatile_bitfields = 1;
4827 
4828   /* If the user did not specify a processor, choose the default
4829      one for them.  This will be the CPU set during configuration using
4830      --with-cpu, otherwise it is "generic".  */
4831   if (!selected_cpu)
4832     {
4833       selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4834       aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4835     }
4836 
4837   gcc_assert (selected_cpu);
4838 
4839   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
4840   if (!selected_tune)
4841     selected_tune = &all_cores[selected_cpu->core];
4842 
4843   aarch64_tune_flags = selected_tune->flags;
4844   aarch64_tune = selected_tune->core;
4845   aarch64_tune_params = selected_tune->tune;
4846 
4847   aarch64_override_options_after_change ();
4848 }
4849 
4850 /* Implement targetm.override_options_after_change.  */
4851 
4852 static void
aarch64_override_options_after_change(void)4853 aarch64_override_options_after_change (void)
4854 {
4855   faked_omit_frame_pointer = false;
4856 
4857   /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4858      that aarch64_frame_pointer_required will be called.  We need to remember
4859      whether flag_omit_frame_pointer was turned on normally or just faked.  */
4860 
4861   if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4862     {
4863       flag_omit_frame_pointer = true;
4864       faked_omit_frame_pointer = true;
4865     }
4866 }
4867 
4868 static struct machine_function *
aarch64_init_machine_status(void)4869 aarch64_init_machine_status (void)
4870 {
4871   struct machine_function *machine;
4872   machine = ggc_alloc_cleared_machine_function ();
4873   return machine;
4874 }
4875 
4876 void
aarch64_init_expanders(void)4877 aarch64_init_expanders (void)
4878 {
4879   init_machine_status = aarch64_init_machine_status;
4880 }
4881 
4882 /* A checking mechanism for the implementation of the various code models.  */
4883 static void
initialize_aarch64_code_model(void)4884 initialize_aarch64_code_model (void)
4885 {
4886    if (flag_pic)
4887      {
4888        switch (aarch64_cmodel_var)
4889 	 {
4890 	 case AARCH64_CMODEL_TINY:
4891 	   aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
4892 	   break;
4893 	 case AARCH64_CMODEL_SMALL:
4894 	   aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
4895 	   break;
4896 	 case AARCH64_CMODEL_LARGE:
4897 	   sorry ("code model %qs with -f%s", "large",
4898 		  flag_pic > 1 ? "PIC" : "pic");
4899 	 default:
4900 	   gcc_unreachable ();
4901 	 }
4902      }
4903    else
4904      aarch64_cmodel = aarch64_cmodel_var;
4905 }
4906 
4907 /* Return true if SYMBOL_REF X binds locally.  */
4908 
4909 static bool
aarch64_symbol_binds_local_p(const_rtx x)4910 aarch64_symbol_binds_local_p (const_rtx x)
4911 {
4912   return (SYMBOL_REF_DECL (x)
4913 	  ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
4914 	  : SYMBOL_REF_LOCAL_P (x));
4915 }
4916 
4917 /* Return true if SYMBOL_REF X is thread local */
4918 static bool
aarch64_tls_symbol_p(rtx x)4919 aarch64_tls_symbol_p (rtx x)
4920 {
4921   if (! TARGET_HAVE_TLS)
4922     return false;
4923 
4924   if (GET_CODE (x) != SYMBOL_REF)
4925     return false;
4926 
4927   return SYMBOL_REF_TLS_MODEL (x) != 0;
4928 }
4929 
4930 /* Classify a TLS symbol into one of the TLS kinds.  */
4931 enum aarch64_symbol_type
aarch64_classify_tls_symbol(rtx x)4932 aarch64_classify_tls_symbol (rtx x)
4933 {
4934   enum tls_model tls_kind = tls_symbolic_operand_type (x);
4935 
4936   switch (tls_kind)
4937     {
4938     case TLS_MODEL_GLOBAL_DYNAMIC:
4939     case TLS_MODEL_LOCAL_DYNAMIC:
4940       return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
4941 
4942     case TLS_MODEL_INITIAL_EXEC:
4943       return SYMBOL_SMALL_GOTTPREL;
4944 
4945     case TLS_MODEL_LOCAL_EXEC:
4946       return SYMBOL_SMALL_TPREL;
4947 
4948     case TLS_MODEL_EMULATED:
4949     case TLS_MODEL_NONE:
4950       return SYMBOL_FORCE_TO_MEM;
4951 
4952     default:
4953       gcc_unreachable ();
4954     }
4955 }
4956 
4957 /* Return the method that should be used to access SYMBOL_REF or
4958    LABEL_REF X in context CONTEXT.  */
4959 enum aarch64_symbol_type
aarch64_classify_symbol(rtx x,enum aarch64_symbol_context context ATTRIBUTE_UNUSED)4960 aarch64_classify_symbol (rtx x,
4961 			 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
4962 {
4963   if (GET_CODE (x) == LABEL_REF)
4964     {
4965       switch (aarch64_cmodel)
4966 	{
4967 	case AARCH64_CMODEL_LARGE:
4968 	  return SYMBOL_FORCE_TO_MEM;
4969 
4970 	case AARCH64_CMODEL_TINY_PIC:
4971 	case AARCH64_CMODEL_TINY:
4972 	case AARCH64_CMODEL_SMALL_PIC:
4973 	case AARCH64_CMODEL_SMALL:
4974 	  return SYMBOL_SMALL_ABSOLUTE;
4975 
4976 	default:
4977 	  gcc_unreachable ();
4978 	}
4979     }
4980 
4981   gcc_assert (GET_CODE (x) == SYMBOL_REF);
4982 
4983   switch (aarch64_cmodel)
4984     {
4985     case AARCH64_CMODEL_LARGE:
4986       return SYMBOL_FORCE_TO_MEM;
4987 
4988     case AARCH64_CMODEL_TINY:
4989     case AARCH64_CMODEL_SMALL:
4990 
4991       /* This is needed to get DFmode, TImode constants to be loaded off
4992          the constant pool.  Is it necessary to dump TImode values into
4993          the constant pool.  We don't handle TImode constant loads properly
4994          yet and hence need to use the constant pool.  */
4995       if (CONSTANT_POOL_ADDRESS_P (x))
4996 	return SYMBOL_FORCE_TO_MEM;
4997 
4998       if (aarch64_tls_symbol_p (x))
4999 	return aarch64_classify_tls_symbol (x);
5000 
5001       if (SYMBOL_REF_WEAK (x))
5002 	return SYMBOL_FORCE_TO_MEM;
5003 
5004       return SYMBOL_SMALL_ABSOLUTE;
5005 
5006     case AARCH64_CMODEL_TINY_PIC:
5007     case AARCH64_CMODEL_SMALL_PIC:
5008 
5009       if (CONSTANT_POOL_ADDRESS_P (x))
5010 	return SYMBOL_FORCE_TO_MEM;
5011 
5012       if (aarch64_tls_symbol_p (x))
5013 	return aarch64_classify_tls_symbol (x);
5014 
5015       if (!aarch64_symbol_binds_local_p (x))
5016 	return SYMBOL_SMALL_GOT;
5017 
5018       return SYMBOL_SMALL_ABSOLUTE;
5019 
5020     default:
5021       gcc_unreachable ();
5022     }
5023   /* By default push everything into the constant pool.  */
5024   return SYMBOL_FORCE_TO_MEM;
5025 }
5026 
5027 /* Return true if X is a symbolic constant that can be used in context
5028    CONTEXT.  If it is, store the type of the symbol in *SYMBOL_TYPE.  */
5029 
5030 bool
aarch64_symbolic_constant_p(rtx x,enum aarch64_symbol_context context,enum aarch64_symbol_type * symbol_type)5031 aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
5032 			     enum aarch64_symbol_type *symbol_type)
5033 {
5034   rtx offset;
5035   split_const (x, &x, &offset);
5036   if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5037     *symbol_type = aarch64_classify_symbol (x, context);
5038   else
5039     return false;
5040 
5041   /* No checking of offset at this point.  */
5042   return true;
5043 }
5044 
5045 bool
aarch64_constant_address_p(rtx x)5046 aarch64_constant_address_p (rtx x)
5047 {
5048   return (CONSTANT_P (x) && memory_address_p (DImode, x));
5049 }
5050 
5051 bool
aarch64_legitimate_pic_operand_p(rtx x)5052 aarch64_legitimate_pic_operand_p (rtx x)
5053 {
5054   if (GET_CODE (x) == SYMBOL_REF
5055       || (GET_CODE (x) == CONST
5056 	  && GET_CODE (XEXP (x, 0)) == PLUS
5057 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5058      return false;
5059 
5060   return true;
5061 }
5062 
5063 /* Return true if X holds either a quarter-precision or
5064      floating-point +0.0 constant.  */
5065 static bool
aarch64_valid_floating_const(enum machine_mode mode,rtx x)5066 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5067 {
5068   if (!CONST_DOUBLE_P (x))
5069     return false;
5070 
5071   /* TODO: We could handle moving 0.0 to a TFmode register,
5072      but first we would like to refactor the movtf_aarch64
5073      to be more amicable to split moves properly and
5074      correctly gate on TARGET_SIMD.  For now - reject all
5075      constants which are not to SFmode or DFmode registers.  */
5076   if (!(mode == SFmode || mode == DFmode))
5077     return false;
5078 
5079   if (aarch64_float_const_zero_rtx_p (x))
5080     return true;
5081   return aarch64_float_const_representable_p (x);
5082 }
5083 
5084 static bool
aarch64_legitimate_constant_p(enum machine_mode mode,rtx x)5085 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5086 {
5087   /* Do not allow vector struct mode constants.  We could support
5088      0 and -1 easily, but they need support in aarch64-simd.md.  */
5089   if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5090     return false;
5091 
5092   /* This could probably go away because
5093      we now decompose CONST_INTs according to expand_mov_immediate.  */
5094   if ((GET_CODE (x) == CONST_VECTOR
5095        && aarch64_simd_valid_immediate (x, mode, false,
5096 					NULL, NULL, NULL, NULL, NULL) != -1)
5097       || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5098 	return !targetm.cannot_force_const_mem (mode, x);
5099 
5100   if (GET_CODE (x) == HIGH
5101       && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5102     return true;
5103 
5104   return aarch64_constant_address_p (x);
5105 }
5106 
5107 rtx
aarch64_load_tp(rtx target)5108 aarch64_load_tp (rtx target)
5109 {
5110   if (!target
5111       || GET_MODE (target) != Pmode
5112       || !register_operand (target, Pmode))
5113     target = gen_reg_rtx (Pmode);
5114 
5115   /* Can return in any reg.  */
5116   emit_insn (gen_aarch64_load_tp_hard (target));
5117   return target;
5118 }
5119 
5120 /* On AAPCS systems, this is the "struct __va_list".  */
5121 static GTY(()) tree va_list_type;
5122 
5123 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5124    Return the type to use as __builtin_va_list.
5125 
5126    AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5127 
5128    struct __va_list
5129    {
5130      void *__stack;
5131      void *__gr_top;
5132      void *__vr_top;
5133      int   __gr_offs;
5134      int   __vr_offs;
5135    };  */
5136 
5137 static tree
aarch64_build_builtin_va_list(void)5138 aarch64_build_builtin_va_list (void)
5139 {
5140   tree va_list_name;
5141   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5142 
5143   /* Create the type.  */
5144   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5145   /* Give it the required name.  */
5146   va_list_name = build_decl (BUILTINS_LOCATION,
5147 			     TYPE_DECL,
5148 			     get_identifier ("__va_list"),
5149 			     va_list_type);
5150   DECL_ARTIFICIAL (va_list_name) = 1;
5151   TYPE_NAME (va_list_type) = va_list_name;
5152   TYPE_STUB_DECL (va_list_type) = va_list_name;
5153 
5154   /* Create the fields.  */
5155   f_stack = build_decl (BUILTINS_LOCATION,
5156 			FIELD_DECL, get_identifier ("__stack"),
5157 			ptr_type_node);
5158   f_grtop = build_decl (BUILTINS_LOCATION,
5159 			FIELD_DECL, get_identifier ("__gr_top"),
5160 			ptr_type_node);
5161   f_vrtop = build_decl (BUILTINS_LOCATION,
5162 			FIELD_DECL, get_identifier ("__vr_top"),
5163 			ptr_type_node);
5164   f_groff = build_decl (BUILTINS_LOCATION,
5165 			FIELD_DECL, get_identifier ("__gr_offs"),
5166 			integer_type_node);
5167   f_vroff = build_decl (BUILTINS_LOCATION,
5168 			FIELD_DECL, get_identifier ("__vr_offs"),
5169 			integer_type_node);
5170 
5171   DECL_ARTIFICIAL (f_stack) = 1;
5172   DECL_ARTIFICIAL (f_grtop) = 1;
5173   DECL_ARTIFICIAL (f_vrtop) = 1;
5174   DECL_ARTIFICIAL (f_groff) = 1;
5175   DECL_ARTIFICIAL (f_vroff) = 1;
5176 
5177   DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5178   DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5179   DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5180   DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5181   DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5182 
5183   TYPE_FIELDS (va_list_type) = f_stack;
5184   DECL_CHAIN (f_stack) = f_grtop;
5185   DECL_CHAIN (f_grtop) = f_vrtop;
5186   DECL_CHAIN (f_vrtop) = f_groff;
5187   DECL_CHAIN (f_groff) = f_vroff;
5188 
5189   /* Compute its layout.  */
5190   layout_type (va_list_type);
5191 
5192   return va_list_type;
5193 }
5194 
5195 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
5196 static void
aarch64_expand_builtin_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)5197 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5198 {
5199   const CUMULATIVE_ARGS *cum;
5200   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5201   tree stack, grtop, vrtop, groff, vroff;
5202   tree t;
5203   int gr_save_area_size;
5204   int vr_save_area_size;
5205   int vr_offset;
5206 
5207   cum = &crtl->args.info;
5208   gr_save_area_size
5209     = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5210   vr_save_area_size
5211     = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5212 
5213   if (TARGET_GENERAL_REGS_ONLY)
5214     {
5215       if (cum->aapcs_nvrn > 0)
5216 	sorry ("%qs and floating point or vector arguments",
5217 	       "-mgeneral-regs-only");
5218       vr_save_area_size = 0;
5219     }
5220 
5221   f_stack = TYPE_FIELDS (va_list_type_node);
5222   f_grtop = DECL_CHAIN (f_stack);
5223   f_vrtop = DECL_CHAIN (f_grtop);
5224   f_groff = DECL_CHAIN (f_vrtop);
5225   f_vroff = DECL_CHAIN (f_groff);
5226 
5227   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5228 		  NULL_TREE);
5229   grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5230 		  NULL_TREE);
5231   vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5232 		  NULL_TREE);
5233   groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5234 		  NULL_TREE);
5235   vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5236 		  NULL_TREE);
5237 
5238   /* Emit code to initialize STACK, which points to the next varargs stack
5239      argument.  CUM->AAPCS_STACK_SIZE gives the number of stack words used
5240      by named arguments.  STACK is 8-byte aligned.  */
5241   t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5242   if (cum->aapcs_stack_size > 0)
5243     t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5244   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5245   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5246 
5247   /* Emit code to initialize GRTOP, the top of the GR save area.
5248      virtual_incoming_args_rtx should have been 16 byte aligned.  */
5249   t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5250   t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5251   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5252 
5253   /* Emit code to initialize VRTOP, the top of the VR save area.
5254      This address is gr_save_area_bytes below GRTOP, rounded
5255      down to the next 16-byte boundary.  */
5256   t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5257   vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5258 			     STACK_BOUNDARY / BITS_PER_UNIT);
5259 
5260   if (vr_offset)
5261     t = fold_build_pointer_plus_hwi (t, -vr_offset);
5262   t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5263   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5264 
5265   /* Emit code to initialize GROFF, the offset from GRTOP of the
5266      next GPR argument.  */
5267   t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5268 	      build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5269   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5270 
5271   /* Likewise emit code to initialize VROFF, the offset from FTOP
5272      of the next VR argument.  */
5273   t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5274 	      build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5275   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5276 }
5277 
5278 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
5279 
5280 static tree
aarch64_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)5281 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5282 			      gimple_seq *post_p ATTRIBUTE_UNUSED)
5283 {
5284   tree addr;
5285   bool indirect_p;
5286   bool is_ha;		/* is HFA or HVA.  */
5287   bool dw_align;	/* double-word align.  */
5288   enum machine_mode ag_mode = VOIDmode;
5289   int nregs;
5290   enum machine_mode mode;
5291 
5292   tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5293   tree stack, f_top, f_off, off, arg, roundup, on_stack;
5294   HOST_WIDE_INT size, rsize, adjust, align;
5295   tree t, u, cond1, cond2;
5296 
5297   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5298   if (indirect_p)
5299     type = build_pointer_type (type);
5300 
5301   mode = TYPE_MODE (type);
5302 
5303   f_stack = TYPE_FIELDS (va_list_type_node);
5304   f_grtop = DECL_CHAIN (f_stack);
5305   f_vrtop = DECL_CHAIN (f_grtop);
5306   f_groff = DECL_CHAIN (f_vrtop);
5307   f_vroff = DECL_CHAIN (f_groff);
5308 
5309   stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5310 		  f_stack, NULL_TREE);
5311   size = int_size_in_bytes (type);
5312   align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5313 
5314   dw_align = false;
5315   adjust = 0;
5316   if (aarch64_vfp_is_call_or_return_candidate (mode,
5317 					       type,
5318 					       &ag_mode,
5319 					       &nregs,
5320 					       &is_ha))
5321     {
5322       /* TYPE passed in fp/simd registers.  */
5323       if (TARGET_GENERAL_REGS_ONLY)
5324 	sorry ("%qs and floating point or vector arguments",
5325 	       "-mgeneral-regs-only");
5326 
5327       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5328 		      unshare_expr (valist), f_vrtop, NULL_TREE);
5329       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5330 		      unshare_expr (valist), f_vroff, NULL_TREE);
5331 
5332       rsize = nregs * UNITS_PER_VREG;
5333 
5334       if (is_ha)
5335 	{
5336 	  if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5337 	    adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5338 	}
5339       else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5340 	       && size < UNITS_PER_VREG)
5341 	{
5342 	  adjust = UNITS_PER_VREG - size;
5343 	}
5344     }
5345   else
5346     {
5347       /* TYPE passed in general registers.  */
5348       f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5349 		      unshare_expr (valist), f_grtop, NULL_TREE);
5350       f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5351 		      unshare_expr (valist), f_groff, NULL_TREE);
5352       rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5353       nregs = rsize / UNITS_PER_WORD;
5354 
5355       if (align > 8)
5356 	dw_align = true;
5357 
5358       if (BLOCK_REG_PADDING (mode, type, 1) == downward
5359 	  && size < UNITS_PER_WORD)
5360 	{
5361 	  adjust = UNITS_PER_WORD  - size;
5362 	}
5363     }
5364 
5365   /* Get a local temporary for the field value.  */
5366   off = get_initialized_tmp_var (f_off, pre_p, NULL);
5367 
5368   /* Emit code to branch if off >= 0.  */
5369   t = build2 (GE_EXPR, boolean_type_node, off,
5370 	      build_int_cst (TREE_TYPE (off), 0));
5371   cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5372 
5373   if (dw_align)
5374     {
5375       /* Emit: offs = (offs + 15) & -16.  */
5376       t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5377 		  build_int_cst (TREE_TYPE (off), 15));
5378       t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5379 		  build_int_cst (TREE_TYPE (off), -16));
5380       roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5381     }
5382   else
5383     roundup = NULL;
5384 
5385   /* Update ap.__[g|v]r_offs  */
5386   t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5387 	      build_int_cst (TREE_TYPE (off), rsize));
5388   t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5389 
5390   /* String up.  */
5391   if (roundup)
5392     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5393 
5394   /* [cond2] if (ap.__[g|v]r_offs > 0)  */
5395   u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5396 	      build_int_cst (TREE_TYPE (f_off), 0));
5397   cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5398 
5399   /* String up: make sure the assignment happens before the use.  */
5400   t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5401   COND_EXPR_ELSE (cond1) = t;
5402 
5403   /* Prepare the trees handling the argument that is passed on the stack;
5404      the top level node will store in ON_STACK.  */
5405   arg = get_initialized_tmp_var (stack, pre_p, NULL);
5406   if (align > 8)
5407     {
5408       /* if (alignof(type) > 8) (arg = arg + 15) & -16;  */
5409       t = fold_convert (intDI_type_node, arg);
5410       t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5411 		  build_int_cst (TREE_TYPE (t), 15));
5412       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5413 		  build_int_cst (TREE_TYPE (t), -16));
5414       t = fold_convert (TREE_TYPE (arg), t);
5415       roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5416     }
5417   else
5418     roundup = NULL;
5419   /* Advance ap.__stack  */
5420   t = fold_convert (intDI_type_node, arg);
5421   t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5422 	      build_int_cst (TREE_TYPE (t), size + 7));
5423   t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5424 	      build_int_cst (TREE_TYPE (t), -8));
5425   t = fold_convert (TREE_TYPE (arg), t);
5426   t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5427   /* String up roundup and advance.  */
5428   if (roundup)
5429     t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5430   /* String up with arg */
5431   on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5432   /* Big-endianness related address adjustment.  */
5433   if (BLOCK_REG_PADDING (mode, type, 1) == downward
5434       && size < UNITS_PER_WORD)
5435   {
5436     t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5437 		size_int (UNITS_PER_WORD - size));
5438     on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5439   }
5440 
5441   COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5442   COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5443 
5444   /* Adjustment to OFFSET in the case of BIG_ENDIAN.  */
5445   t = off;
5446   if (adjust)
5447     t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5448 		build_int_cst (TREE_TYPE (off), adjust));
5449 
5450   t = fold_convert (sizetype, t);
5451   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5452 
5453   if (is_ha)
5454     {
5455       /* type ha; // treat as "struct {ftype field[n];}"
5456          ... [computing offs]
5457          for (i = 0; i <nregs; ++i, offs += 16)
5458 	   ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5459 	 return ha;  */
5460       int i;
5461       tree tmp_ha, field_t, field_ptr_t;
5462 
5463       /* Declare a local variable.  */
5464       tmp_ha = create_tmp_var_raw (type, "ha");
5465       gimple_add_tmp_var (tmp_ha);
5466 
5467       /* Establish the base type.  */
5468       switch (ag_mode)
5469 	{
5470 	case SFmode:
5471 	  field_t = float_type_node;
5472 	  field_ptr_t = float_ptr_type_node;
5473 	  break;
5474 	case DFmode:
5475 	  field_t = double_type_node;
5476 	  field_ptr_t = double_ptr_type_node;
5477 	  break;
5478 	case TFmode:
5479 	  field_t = long_double_type_node;
5480 	  field_ptr_t = long_double_ptr_type_node;
5481 	  break;
5482 /* The half precision and quad precision are not fully supported yet.  Enable
5483    the following code after the support is complete.  Need to find the correct
5484    type node for __fp16 *.  */
5485 #if 0
5486 	case HFmode:
5487 	  field_t = float_type_node;
5488 	  field_ptr_t = float_ptr_type_node;
5489 	  break;
5490 #endif
5491 	case V2SImode:
5492 	case V4SImode:
5493 	    {
5494 	      tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5495 	      field_t = build_vector_type_for_mode (innertype, ag_mode);
5496 	      field_ptr_t = build_pointer_type (field_t);
5497 	    }
5498 	  break;
5499 	default:
5500 	  gcc_assert (0);
5501 	}
5502 
5503       /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area  */
5504       tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5505       addr = t;
5506       t = fold_convert (field_ptr_t, addr);
5507       t = build2 (MODIFY_EXPR, field_t,
5508 		  build1 (INDIRECT_REF, field_t, tmp_ha),
5509 		  build1 (INDIRECT_REF, field_t, t));
5510 
5511       /* ha.field[i] = *((field_ptr_t)vr_saved_area + i)  */
5512       for (i = 1; i < nregs; ++i)
5513 	{
5514 	  addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5515 	  u = fold_convert (field_ptr_t, addr);
5516 	  u = build2 (MODIFY_EXPR, field_t,
5517 		      build2 (MEM_REF, field_t, tmp_ha,
5518 			      build_int_cst (field_ptr_t,
5519 					     (i *
5520 					      int_size_in_bytes (field_t)))),
5521 		      build1 (INDIRECT_REF, field_t, u));
5522 	  t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5523 	}
5524 
5525       u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5526       t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5527     }
5528 
5529   COND_EXPR_ELSE (cond2) = t;
5530   addr = fold_convert (build_pointer_type (type), cond1);
5531   addr = build_va_arg_indirect_ref (addr);
5532 
5533   if (indirect_p)
5534     addr = build_va_arg_indirect_ref (addr);
5535 
5536   return addr;
5537 }
5538 
5539 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
5540 
5541 static void
aarch64_setup_incoming_varargs(cumulative_args_t cum_v,enum machine_mode mode,tree type,int * pretend_size ATTRIBUTE_UNUSED,int no_rtl)5542 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5543 				tree type, int *pretend_size ATTRIBUTE_UNUSED,
5544 				int no_rtl)
5545 {
5546   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5547   CUMULATIVE_ARGS local_cum;
5548   int gr_saved, vr_saved;
5549 
5550   /* The caller has advanced CUM up to, but not beyond, the last named
5551      argument.  Advance a local copy of CUM past the last "real" named
5552      argument, to find out how many registers are left over.  */
5553   local_cum = *cum;
5554   aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5555 
5556   /* Found out how many registers we need to save.  */
5557   gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5558   vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5559 
5560   if (TARGET_GENERAL_REGS_ONLY)
5561     {
5562       if (local_cum.aapcs_nvrn > 0)
5563 	sorry ("%qs and floating point or vector arguments",
5564 	       "-mgeneral-regs-only");
5565       vr_saved = 0;
5566     }
5567 
5568   if (!no_rtl)
5569     {
5570       if (gr_saved > 0)
5571 	{
5572 	  rtx ptr, mem;
5573 
5574 	  /* virtual_incoming_args_rtx should have been 16-byte aligned.  */
5575 	  ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5576 			       - gr_saved * UNITS_PER_WORD);
5577 	  mem = gen_frame_mem (BLKmode, ptr);
5578 	  set_mem_alias_set (mem, get_varargs_alias_set ());
5579 
5580 	  move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5581 			       mem, gr_saved);
5582 	}
5583       if (vr_saved > 0)
5584 	{
5585 	  /* We can't use move_block_from_reg, because it will use
5586 	     the wrong mode, storing D regs only.  */
5587 	  enum machine_mode mode = TImode;
5588 	  int off, i;
5589 
5590 	  /* Set OFF to the offset from virtual_incoming_args_rtx of
5591 	     the first vector register.  The VR save area lies below
5592 	     the GR one, and is aligned to 16 bytes.  */
5593 	  off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5594 				   STACK_BOUNDARY / BITS_PER_UNIT);
5595 	  off -= vr_saved * UNITS_PER_VREG;
5596 
5597 	  for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5598 	    {
5599 	      rtx ptr, mem;
5600 
5601 	      ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5602 	      mem = gen_frame_mem (mode, ptr);
5603 	      set_mem_alias_set (mem, get_varargs_alias_set ());
5604 	      aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5605 	      off += UNITS_PER_VREG;
5606 	    }
5607 	}
5608     }
5609 
5610   /* We don't save the size into *PRETEND_SIZE because we want to avoid
5611      any complication of having crtl->args.pretend_args_size changed.  */
5612   cfun->machine->saved_varargs_size
5613     = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5614 		      STACK_BOUNDARY / BITS_PER_UNIT)
5615        + vr_saved * UNITS_PER_VREG);
5616 }
5617 
5618 static void
aarch64_conditional_register_usage(void)5619 aarch64_conditional_register_usage (void)
5620 {
5621   int i;
5622   if (!TARGET_FLOAT)
5623     {
5624       for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5625 	{
5626 	  fixed_regs[i] = 1;
5627 	  call_used_regs[i] = 1;
5628 	}
5629     }
5630 }
5631 
5632 /* Walk down the type tree of TYPE counting consecutive base elements.
5633    If *MODEP is VOIDmode, then set it to the first valid floating point
5634    type.  If a non-floating point type is found, or if a floating point
5635    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5636    otherwise return the count in the sub-tree.  */
5637 static int
aapcs_vfp_sub_candidate(const_tree type,enum machine_mode * modep)5638 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5639 {
5640   enum machine_mode mode;
5641   HOST_WIDE_INT size;
5642 
5643   switch (TREE_CODE (type))
5644     {
5645     case REAL_TYPE:
5646       mode = TYPE_MODE (type);
5647       if (mode != DFmode && mode != SFmode && mode != TFmode)
5648 	return -1;
5649 
5650       if (*modep == VOIDmode)
5651 	*modep = mode;
5652 
5653       if (*modep == mode)
5654 	return 1;
5655 
5656       break;
5657 
5658     case COMPLEX_TYPE:
5659       mode = TYPE_MODE (TREE_TYPE (type));
5660       if (mode != DFmode && mode != SFmode && mode != TFmode)
5661 	return -1;
5662 
5663       if (*modep == VOIDmode)
5664 	*modep = mode;
5665 
5666       if (*modep == mode)
5667 	return 2;
5668 
5669       break;
5670 
5671     case VECTOR_TYPE:
5672       /* Use V2SImode and V4SImode as representatives of all 64-bit
5673 	 and 128-bit vector types.  */
5674       size = int_size_in_bytes (type);
5675       switch (size)
5676 	{
5677 	case 8:
5678 	  mode = V2SImode;
5679 	  break;
5680 	case 16:
5681 	  mode = V4SImode;
5682 	  break;
5683 	default:
5684 	  return -1;
5685 	}
5686 
5687       if (*modep == VOIDmode)
5688 	*modep = mode;
5689 
5690       /* Vector modes are considered to be opaque: two vectors are
5691 	 equivalent for the purposes of being homogeneous aggregates
5692 	 if they are the same size.  */
5693       if (*modep == mode)
5694 	return 1;
5695 
5696       break;
5697 
5698     case ARRAY_TYPE:
5699       {
5700 	int count;
5701 	tree index = TYPE_DOMAIN (type);
5702 
5703 	/* Can't handle incomplete types.  */
5704 	if (!COMPLETE_TYPE_P (type))
5705 	  return -1;
5706 
5707 	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5708 	if (count == -1
5709 	    || !index
5710 	    || !TYPE_MAX_VALUE (index)
5711 	    || !host_integerp (TYPE_MAX_VALUE (index), 1)
5712 	    || !TYPE_MIN_VALUE (index)
5713 	    || !host_integerp (TYPE_MIN_VALUE (index), 1)
5714 	    || count < 0)
5715 	  return -1;
5716 
5717 	count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5718 		      - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5719 
5720 	/* There must be no padding.  */
5721 	if (!host_integerp (TYPE_SIZE (type), 1)
5722 	    || (tree_low_cst (TYPE_SIZE (type), 1)
5723 		!= count * GET_MODE_BITSIZE (*modep)))
5724 	  return -1;
5725 
5726 	return count;
5727       }
5728 
5729     case RECORD_TYPE:
5730       {
5731 	int count = 0;
5732 	int sub_count;
5733 	tree field;
5734 
5735 	/* Can't handle incomplete types.  */
5736 	if (!COMPLETE_TYPE_P (type))
5737 	  return -1;
5738 
5739 	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5740 	  {
5741 	    if (TREE_CODE (field) != FIELD_DECL)
5742 	      continue;
5743 
5744 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5745 	    if (sub_count < 0)
5746 	      return -1;
5747 	    count += sub_count;
5748 	  }
5749 
5750 	/* There must be no padding.  */
5751 	if (!host_integerp (TYPE_SIZE (type), 1)
5752 	    || (tree_low_cst (TYPE_SIZE (type), 1)
5753 		!= count * GET_MODE_BITSIZE (*modep)))
5754 	  return -1;
5755 
5756 	return count;
5757       }
5758 
5759     case UNION_TYPE:
5760     case QUAL_UNION_TYPE:
5761       {
5762 	/* These aren't very interesting except in a degenerate case.  */
5763 	int count = 0;
5764 	int sub_count;
5765 	tree field;
5766 
5767 	/* Can't handle incomplete types.  */
5768 	if (!COMPLETE_TYPE_P (type))
5769 	  return -1;
5770 
5771 	for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5772 	  {
5773 	    if (TREE_CODE (field) != FIELD_DECL)
5774 	      continue;
5775 
5776 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5777 	    if (sub_count < 0)
5778 	      return -1;
5779 	    count = count > sub_count ? count : sub_count;
5780 	  }
5781 
5782 	/* There must be no padding.  */
5783 	if (!host_integerp (TYPE_SIZE (type), 1)
5784 	    || (tree_low_cst (TYPE_SIZE (type), 1)
5785 		!= count * GET_MODE_BITSIZE (*modep)))
5786 	  return -1;
5787 
5788 	return count;
5789       }
5790 
5791     default:
5792       break;
5793     }
5794 
5795   return -1;
5796 }
5797 
5798 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
5799    type as described in AAPCS64 \S 4.3.  This includes aggregate, union and
5800    array types.  The C99 floating-point complex types are also considered
5801    as composite types, according to AAPCS64 \S 7.1.1.  The complex integer
5802    types, which are GCC extensions and out of the scope of AAPCS64, are
5803    treated as composite types here as well.
5804 
5805    Note that MODE itself is not sufficient in determining whether a type
5806    is such a composite type or not.  This is because
5807    stor-layout.c:compute_record_mode may have already changed the MODE
5808    (BLKmode) of a RECORD_TYPE TYPE to some other mode.  For example, a
5809    structure with only one field may have its MODE set to the mode of the
5810    field.  Also an integer mode whose size matches the size of the
5811    RECORD_TYPE type may be used to substitute the original mode
5812    (i.e. BLKmode) in certain circumstances.  In other words, MODE cannot be
5813    solely relied on.  */
5814 
5815 static bool
aarch64_composite_type_p(const_tree type,enum machine_mode mode)5816 aarch64_composite_type_p (const_tree type,
5817 			  enum machine_mode mode)
5818 {
5819   if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5820     return true;
5821 
5822   if (mode == BLKmode
5823       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5824       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5825     return true;
5826 
5827   return false;
5828 }
5829 
5830 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5831    type as described in AAPCS64 \S 4.1.2.
5832 
5833    See the comment above aarch64_composite_type_p for the notes on MODE.  */
5834 
5835 static bool
aarch64_short_vector_p(const_tree type,enum machine_mode mode)5836 aarch64_short_vector_p (const_tree type,
5837 			enum machine_mode mode)
5838 {
5839   HOST_WIDE_INT size = -1;
5840 
5841   if (type && TREE_CODE (type) == VECTOR_TYPE)
5842     size = int_size_in_bytes (type);
5843   else if (!aarch64_composite_type_p (type, mode)
5844 	   && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5845 	       || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5846     size = GET_MODE_SIZE (mode);
5847 
5848   return (size == 8 || size == 16) ? true : false;
5849 }
5850 
5851 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
5852    shall be passed or returned in simd/fp register(s) (providing these
5853    parameter passing registers are available).
5854 
5855    Upon successful return, *COUNT returns the number of needed registers,
5856    *BASE_MODE returns the mode of the individual register and when IS_HAF
5857    is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5858    floating-point aggregate or a homogeneous short-vector aggregate.  */
5859 
5860 static bool
aarch64_vfp_is_call_or_return_candidate(enum machine_mode mode,const_tree type,enum machine_mode * base_mode,int * count,bool * is_ha)5861 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5862 					 const_tree type,
5863 					 enum machine_mode *base_mode,
5864 					 int *count,
5865 					 bool *is_ha)
5866 {
5867   enum machine_mode new_mode = VOIDmode;
5868   bool composite_p = aarch64_composite_type_p (type, mode);
5869 
5870   if (is_ha != NULL) *is_ha = false;
5871 
5872   if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5873       || aarch64_short_vector_p (type, mode))
5874     {
5875       *count = 1;
5876       new_mode = mode;
5877     }
5878   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5879     {
5880       if (is_ha != NULL) *is_ha = true;
5881       *count = 2;
5882       new_mode = GET_MODE_INNER (mode);
5883     }
5884   else if (type && composite_p)
5885     {
5886       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5887 
5888       if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5889 	{
5890 	  if (is_ha != NULL) *is_ha = true;
5891 	  *count = ag_count;
5892 	}
5893       else
5894 	return false;
5895     }
5896   else
5897     return false;
5898 
5899   *base_mode = new_mode;
5900   return true;
5901 }
5902 
5903 /* Implement TARGET_STRUCT_VALUE_RTX.  */
5904 
5905 static rtx
aarch64_struct_value_rtx(tree fndecl ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)5906 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
5907 			  int incoming ATTRIBUTE_UNUSED)
5908 {
5909   return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
5910 }
5911 
5912 /* Implements target hook vector_mode_supported_p.  */
5913 static bool
aarch64_vector_mode_supported_p(enum machine_mode mode)5914 aarch64_vector_mode_supported_p (enum machine_mode mode)
5915 {
5916   if (TARGET_SIMD
5917       && (mode == V4SImode  || mode == V8HImode
5918 	  || mode == V16QImode || mode == V2DImode
5919 	  || mode == V2SImode  || mode == V4HImode
5920 	  || mode == V8QImode || mode == V2SFmode
5921 	  || mode == V4SFmode || mode == V2DFmode))
5922     return true;
5923 
5924   return false;
5925 }
5926 
5927 /* Return quad mode as the preferred SIMD mode.  */
5928 static enum machine_mode
aarch64_preferred_simd_mode(enum machine_mode mode)5929 aarch64_preferred_simd_mode (enum machine_mode mode)
5930 {
5931   if (TARGET_SIMD)
5932     switch (mode)
5933       {
5934       case DFmode:
5935         return V2DFmode;
5936       case SFmode:
5937         return V4SFmode;
5938       case SImode:
5939         return V4SImode;
5940       case HImode:
5941         return V8HImode;
5942       case QImode:
5943         return V16QImode;
5944       case DImode:
5945           return V2DImode;
5946         break;
5947 
5948       default:;
5949       }
5950   return word_mode;
5951 }
5952 
5953 /* Return the bitmask of possible vector sizes for the vectorizer
5954    to iterate over.  */
5955 static unsigned int
aarch64_autovectorize_vector_sizes(void)5956 aarch64_autovectorize_vector_sizes (void)
5957 {
5958   return (16 | 8);
5959 }
5960 
5961 /* A table to help perform AArch64-specific name mangling for AdvSIMD
5962    vector types in order to conform to the AAPCS64 (see "Procedure
5963    Call Standard for the ARM 64-bit Architecture", Appendix A).  To
5964    qualify for emission with the mangled names defined in that document,
5965    a vector type must not only be of the correct mode but also be
5966    composed of AdvSIMD vector element types (e.g.
5967    _builtin_aarch64_simd_qi); these types are registered by
5968    aarch64_init_simd_builtins ().  In other words, vector types defined
5969    in other ways e.g. via vector_size attribute will get default
5970    mangled names.  */
5971 typedef struct
5972 {
5973   enum machine_mode mode;
5974   const char *element_type_name;
5975   const char *mangled_name;
5976 } aarch64_simd_mangle_map_entry;
5977 
5978 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
5979   /* 64-bit containerized types.  */
5980   { V8QImode,  "__builtin_aarch64_simd_qi",     "10__Int8x8_t" },
5981   { V8QImode,  "__builtin_aarch64_simd_uqi",    "11__Uint8x8_t" },
5982   { V4HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x4_t" },
5983   { V4HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x4_t" },
5984   { V2SImode,  "__builtin_aarch64_simd_si",     "11__Int32x2_t" },
5985   { V2SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x2_t" },
5986   { V2SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x2_t" },
5987   { V8QImode,  "__builtin_aarch64_simd_poly8",  "11__Poly8x8_t" },
5988   { V4HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
5989   /* 128-bit containerized types.  */
5990   { V16QImode, "__builtin_aarch64_simd_qi",     "11__Int8x16_t" },
5991   { V16QImode, "__builtin_aarch64_simd_uqi",    "12__Uint8x16_t" },
5992   { V8HImode,  "__builtin_aarch64_simd_hi",     "11__Int16x8_t" },
5993   { V8HImode,  "__builtin_aarch64_simd_uhi",    "12__Uint16x8_t" },
5994   { V4SImode,  "__builtin_aarch64_simd_si",     "11__Int32x4_t" },
5995   { V4SImode,  "__builtin_aarch64_simd_usi",    "12__Uint32x4_t" },
5996   { V2DImode,  "__builtin_aarch64_simd_di",     "11__Int64x2_t" },
5997   { V2DImode,  "__builtin_aarch64_simd_udi",    "12__Uint64x2_t" },
5998   { V4SFmode,  "__builtin_aarch64_simd_sf",     "13__Float32x4_t" },
5999   { V2DFmode,  "__builtin_aarch64_simd_df",     "13__Float64x2_t" },
6000   { V16QImode, "__builtin_aarch64_simd_poly8",  "12__Poly8x16_t" },
6001   { V8HImode,  "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6002   { VOIDmode, NULL, NULL }
6003 };
6004 
6005 /* Implement TARGET_MANGLE_TYPE.  */
6006 
6007 static const char *
aarch64_mangle_type(const_tree type)6008 aarch64_mangle_type (const_tree type)
6009 {
6010   /* The AArch64 ABI documents say that "__va_list" has to be
6011      managled as if it is in the "std" namespace.  */
6012   if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6013     return "St9__va_list";
6014 
6015   /* Check the mode of the vector type, and the name of the vector
6016      element type, against the table.  */
6017   if (TREE_CODE (type) == VECTOR_TYPE)
6018     {
6019       aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6020 
6021       while (pos->mode != VOIDmode)
6022 	{
6023 	  tree elt_type = TREE_TYPE (type);
6024 
6025 	  if (pos->mode == TYPE_MODE (type)
6026 	      && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6027 	      && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6028 			  pos->element_type_name))
6029 	    return pos->mangled_name;
6030 
6031 	  pos++;
6032 	}
6033     }
6034 
6035   /* Use the default mangling.  */
6036   return NULL;
6037 }
6038 
6039 /* Return the equivalent letter for size.  */
6040 static unsigned char
sizetochar(int size)6041 sizetochar (int size)
6042 {
6043   switch (size)
6044     {
6045     case 64: return 'd';
6046     case 32: return 's';
6047     case 16: return 'h';
6048     case 8 : return 'b';
6049     default: gcc_unreachable ();
6050     }
6051 }
6052 
6053 /* Return true iff x is a uniform vector of floating-point
6054    constants, and the constant can be represented in
6055    quarter-precision form.  Note, as aarch64_float_const_representable
6056    rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
6057 static bool
aarch64_vect_float_const_representable_p(rtx x)6058 aarch64_vect_float_const_representable_p (rtx x)
6059 {
6060   int i = 0;
6061   REAL_VALUE_TYPE r0, ri;
6062   rtx x0, xi;
6063 
6064   if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6065     return false;
6066 
6067   x0 = CONST_VECTOR_ELT (x, 0);
6068   if (!CONST_DOUBLE_P (x0))
6069     return false;
6070 
6071   REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6072 
6073   for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6074     {
6075       xi = CONST_VECTOR_ELT (x, i);
6076       if (!CONST_DOUBLE_P (xi))
6077 	return false;
6078 
6079       REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6080       if (!REAL_VALUES_EQUAL (r0, ri))
6081 	return false;
6082     }
6083 
6084   return aarch64_float_const_representable_p (x0);
6085 }
6086 
6087 /* TODO: This function returns values similar to those
6088    returned by neon_valid_immediate in gcc/config/arm/arm.c
6089    but the API here is different enough that these magic numbers
6090    are not used.  It should be sufficient to return true or false.  */
6091 static int
aarch64_simd_valid_immediate(rtx op,enum machine_mode mode,int inverse,rtx * modconst,int * elementwidth,unsigned char * elementchar,int * mvn,int * shift)6092 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6093 			      rtx *modconst, int *elementwidth,
6094 			      unsigned char *elementchar,
6095 			      int *mvn, int *shift)
6096 {
6097 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)	\
6098   matches = 1;						\
6099   for (i = 0; i < idx; i += (STRIDE))			\
6100     if (!(TEST))					\
6101       matches = 0;					\
6102   if (matches)						\
6103     {							\
6104       immtype = (CLASS);				\
6105       elsize = (ELSIZE);				\
6106       elchar = sizetochar (elsize);			\
6107       eshift = (SHIFT);					\
6108       emvn = (NEG);					\
6109       break;						\
6110     }
6111 
6112   unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6113   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6114   unsigned char bytes[16];
6115   unsigned char elchar = 0;
6116   int immtype = -1, matches;
6117   unsigned int invmask = inverse ? 0xff : 0;
6118   int eshift, emvn;
6119 
6120   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6121     {
6122       bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
6123       int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
6124 
6125       if (!(simd_imm_zero
6126 	    || aarch64_vect_float_const_representable_p (op)))
6127 	return -1;
6128 
6129 	if (modconst)
6130 	  *modconst = CONST_VECTOR_ELT (op, 0);
6131 
6132 	if (elementwidth)
6133 	  *elementwidth = elem_width;
6134 
6135 	if (elementchar)
6136 	  *elementchar = sizetochar (elem_width);
6137 
6138 	if (shift)
6139 	  *shift = 0;
6140 
6141 	if (simd_imm_zero)
6142 	  return 19;
6143 	else
6144 	  return 18;
6145     }
6146 
6147   /* Splat vector constant out into a byte vector.  */
6148   for (i = 0; i < n_elts; i++)
6149     {
6150       rtx el = CONST_VECTOR_ELT (op, i);
6151       unsigned HOST_WIDE_INT elpart;
6152       unsigned int part, parts;
6153 
6154       if (GET_CODE (el) == CONST_INT)
6155         {
6156           elpart = INTVAL (el);
6157           parts = 1;
6158         }
6159       else if (GET_CODE (el) == CONST_DOUBLE)
6160         {
6161           elpart = CONST_DOUBLE_LOW (el);
6162           parts = 2;
6163         }
6164       else
6165         gcc_unreachable ();
6166 
6167       for (part = 0; part < parts; part++)
6168         {
6169           unsigned int byte;
6170           for (byte = 0; byte < innersize; byte++)
6171             {
6172               bytes[idx++] = (elpart & 0xff) ^ invmask;
6173               elpart >>= BITS_PER_UNIT;
6174             }
6175           if (GET_CODE (el) == CONST_DOUBLE)
6176             elpart = CONST_DOUBLE_HIGH (el);
6177         }
6178     }
6179 
6180   /* Sanity check.  */
6181   gcc_assert (idx == GET_MODE_SIZE (mode));
6182 
6183   do
6184     {
6185       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6186 	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6187 
6188       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6189 	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6190 
6191       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6192 	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6193 
6194       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6195 	     && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6196 
6197       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6198 
6199       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6200 
6201       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6202 	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6203 
6204       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6205 	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6206 
6207       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6208 	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6209 
6210       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6211 	     && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6212 
6213       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6214 
6215       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6216 
6217       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6218 	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6219 
6220       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6221 	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6222 
6223       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6224 	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6225 
6226       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6227 	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6228 
6229       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6230 
6231       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6232 	     && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6233     }
6234   while (0);
6235 
6236   /* TODO: Currently the assembler cannot handle types 12 to 15.
6237      And there is no way to specify cmode through the compiler.
6238      Disable them till there is support in the assembler.  */
6239   if (immtype == -1
6240       || (immtype >= 12 && immtype <= 15)
6241       || immtype == 18)
6242     return -1;
6243 
6244 
6245   if (elementwidth)
6246     *elementwidth = elsize;
6247 
6248   if (elementchar)
6249     *elementchar = elchar;
6250 
6251   if (mvn)
6252     *mvn = emvn;
6253 
6254   if (shift)
6255     *shift = eshift;
6256 
6257   if (modconst)
6258     {
6259       unsigned HOST_WIDE_INT imm = 0;
6260 
6261       /* Un-invert bytes of recognized vector, if necessary.  */
6262       if (invmask != 0)
6263         for (i = 0; i < idx; i++)
6264           bytes[i] ^= invmask;
6265 
6266       if (immtype == 17)
6267         {
6268           /* FIXME: Broken on 32-bit H_W_I hosts.  */
6269           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6270 
6271           for (i = 0; i < 8; i++)
6272             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6273 	      << (i * BITS_PER_UNIT);
6274 
6275           *modconst = GEN_INT (imm);
6276         }
6277       else
6278         {
6279           unsigned HOST_WIDE_INT imm = 0;
6280 
6281           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6282             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6283 
6284 	  /* Construct 'abcdefgh' because the assembler cannot handle
6285 	     generic constants.  */
6286 	  gcc_assert (shift != NULL && mvn != NULL);
6287 	  if (*mvn)
6288 	    imm = ~imm;
6289 	  imm = (imm >> *shift) & 0xff;
6290           *modconst = GEN_INT (imm);
6291         }
6292     }
6293 
6294   return immtype;
6295 #undef CHECK
6296 }
6297 
6298 /* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
6299    (or, implicitly, MVNI) immediate.  Write back width per element
6300    to *ELEMENTWIDTH, and a modified constant (whatever should be output
6301    for a MOVI instruction) in *MODCONST.  */
6302 int
aarch64_simd_immediate_valid_for_move(rtx op,enum machine_mode mode,rtx * modconst,int * elementwidth,unsigned char * elementchar,int * mvn,int * shift)6303 aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
6304 				       rtx *modconst, int *elementwidth,
6305 				       unsigned char *elementchar,
6306 				       int *mvn, int *shift)
6307 {
6308   rtx tmpconst;
6309   int tmpwidth;
6310   unsigned char tmpwidthc;
6311   int tmpmvn = 0, tmpshift = 0;
6312   int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
6313 					     &tmpwidth, &tmpwidthc,
6314 					     &tmpmvn, &tmpshift);
6315 
6316   if (retval == -1)
6317     return 0;
6318 
6319   if (modconst)
6320     *modconst = tmpconst;
6321 
6322   if (elementwidth)
6323     *elementwidth = tmpwidth;
6324 
6325   if (elementchar)
6326     *elementchar = tmpwidthc;
6327 
6328   if (mvn)
6329     *mvn = tmpmvn;
6330 
6331   if (shift)
6332     *shift = tmpshift;
6333 
6334   return 1;
6335 }
6336 
6337 static bool
aarch64_const_vec_all_same_int_p(rtx x,HOST_WIDE_INT minval,HOST_WIDE_INT maxval)6338 aarch64_const_vec_all_same_int_p (rtx x,
6339 				  HOST_WIDE_INT minval,
6340 				  HOST_WIDE_INT maxval)
6341 {
6342   HOST_WIDE_INT firstval;
6343   int count, i;
6344 
6345   if (GET_CODE (x) != CONST_VECTOR
6346       || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6347     return false;
6348 
6349   firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6350   if (firstval < minval || firstval > maxval)
6351     return false;
6352 
6353   count = CONST_VECTOR_NUNITS (x);
6354   for (i = 1; i < count; i++)
6355     if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6356       return false;
6357 
6358   return true;
6359 }
6360 
6361 /* Check of immediate shift constants are within range.  */
6362 bool
aarch64_simd_shift_imm_p(rtx x,enum machine_mode mode,bool left)6363 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6364 {
6365   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6366   if (left)
6367     return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6368   else
6369     return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6370 }
6371 
6372 /* Return true if X is a uniform vector where all elements
6373    are either the floating-point constant 0.0 or the
6374    integer constant 0.  */
6375 bool
aarch64_simd_imm_zero_p(rtx x,enum machine_mode mode)6376 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6377 {
6378   return x == CONST0_RTX (mode);
6379 }
6380 
6381 bool
aarch64_simd_imm_scalar_p(rtx x,enum machine_mode mode ATTRIBUTE_UNUSED)6382 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6383 {
6384   HOST_WIDE_INT imm = INTVAL (x);
6385   int i;
6386 
6387   for (i = 0; i < 8; i++)
6388     {
6389       unsigned int byte = imm & 0xff;
6390       if (byte != 0xff && byte != 0)
6391        return false;
6392       imm >>= 8;
6393     }
6394 
6395   return true;
6396 }
6397 
6398 /* Return a const_int vector of VAL.  */
6399 rtx
aarch64_simd_gen_const_vector_dup(enum machine_mode mode,int val)6400 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6401 {
6402   int nunits = GET_MODE_NUNITS (mode);
6403   rtvec v = rtvec_alloc (nunits);
6404   int i;
6405 
6406   for (i=0; i < nunits; i++)
6407     RTVEC_ELT (v, i) = GEN_INT (val);
6408 
6409   return gen_rtx_CONST_VECTOR (mode, v);
6410 }
6411 
6412 /* Construct and return a PARALLEL RTX vector.  */
6413 rtx
aarch64_simd_vect_par_cnst_half(enum machine_mode mode,bool high)6414 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6415 {
6416   int nunits = GET_MODE_NUNITS (mode);
6417   rtvec v = rtvec_alloc (nunits / 2);
6418   int base = high ? nunits / 2 : 0;
6419   rtx t1;
6420   int i;
6421 
6422   for (i=0; i < nunits / 2; i++)
6423     RTVEC_ELT (v, i) = GEN_INT (base + i);
6424 
6425   t1 = gen_rtx_PARALLEL (mode, v);
6426   return t1;
6427 }
6428 
6429 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
6430    HIGH (exclusive).  */
6431 void
aarch64_simd_lane_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high)6432 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6433 {
6434   HOST_WIDE_INT lane;
6435   gcc_assert (GET_CODE (operand) == CONST_INT);
6436   lane = INTVAL (operand);
6437 
6438   if (lane < low || lane >= high)
6439     error ("lane out of range");
6440 }
6441 
6442 void
aarch64_simd_const_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high)6443 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6444 {
6445   gcc_assert (GET_CODE (operand) == CONST_INT);
6446   HOST_WIDE_INT lane = INTVAL (operand);
6447 
6448   if (lane < low || lane >= high)
6449     error ("constant out of range");
6450 }
6451 
6452 /* Emit code to reinterpret one AdvSIMD type as another,
6453    without altering bits.  */
6454 void
aarch64_simd_reinterpret(rtx dest,rtx src)6455 aarch64_simd_reinterpret (rtx dest, rtx src)
6456 {
6457   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6458 }
6459 
6460 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6461    registers).  */
6462 void
aarch64_simd_emit_pair_result_insn(enum machine_mode mode,rtx (* intfn)(rtx,rtx,rtx),rtx destaddr,rtx op1)6463 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6464 			    rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6465                             rtx op1)
6466 {
6467   rtx mem = gen_rtx_MEM (mode, destaddr);
6468   rtx tmp1 = gen_reg_rtx (mode);
6469   rtx tmp2 = gen_reg_rtx (mode);
6470 
6471   emit_insn (intfn (tmp1, op1, tmp2));
6472 
6473   emit_move_insn (mem, tmp1);
6474   mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6475   emit_move_insn (mem, tmp2);
6476 }
6477 
6478 /* Return TRUE if OP is a valid vector addressing mode.  */
6479 bool
aarch64_simd_mem_operand_p(rtx op)6480 aarch64_simd_mem_operand_p (rtx op)
6481 {
6482   return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6483 			|| GET_CODE (XEXP (op, 0)) == REG);
6484 }
6485 
6486 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6487    not to early-clobber SRC registers in the process.
6488 
6489    We assume that the operands described by SRC and DEST represent a
6490    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
6491    number of components into which the copy has been decomposed.  */
6492 void
aarch64_simd_disambiguate_copy(rtx * operands,rtx * dest,rtx * src,unsigned int count)6493 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6494 				rtx *src, unsigned int count)
6495 {
6496   unsigned int i;
6497 
6498   if (!reg_overlap_mentioned_p (operands[0], operands[1])
6499       || REGNO (operands[0]) < REGNO (operands[1]))
6500     {
6501       for (i = 0; i < count; i++)
6502 	{
6503 	  operands[2 * i] = dest[i];
6504 	  operands[2 * i + 1] = src[i];
6505 	}
6506     }
6507   else
6508     {
6509       for (i = 0; i < count; i++)
6510 	{
6511 	  operands[2 * i] = dest[count - i - 1];
6512 	  operands[2 * i + 1] = src[count - i - 1];
6513 	}
6514     }
6515 }
6516 
6517 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6518    one of VSTRUCT modes: OI, CI or XI.  */
6519 int
aarch64_simd_attr_length_move(rtx insn)6520 aarch64_simd_attr_length_move (rtx insn)
6521 {
6522   enum machine_mode mode;
6523 
6524   extract_insn_cached (insn);
6525 
6526   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6527     {
6528       mode = GET_MODE (recog_data.operand[0]);
6529       switch (mode)
6530 	{
6531 	case OImode:
6532 	  return 8;
6533 	case CImode:
6534 	  return 12;
6535 	case XImode:
6536 	  return 16;
6537 	default:
6538 	  gcc_unreachable ();
6539 	}
6540     }
6541   return 4;
6542 }
6543 
6544 /* Implement target hook TARGET_VECTOR_ALIGNMENT.  The AAPCS64 sets the maximum
6545    alignment of a vector to 128 bits.  */
6546 static HOST_WIDE_INT
aarch64_simd_vector_alignment(const_tree type)6547 aarch64_simd_vector_alignment (const_tree type)
6548 {
6549   HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6550   return MIN (align, 128);
6551 }
6552 
6553 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.  */
6554 static bool
aarch64_simd_vector_alignment_reachable(const_tree type,bool is_packed)6555 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6556 {
6557   if (is_packed)
6558     return false;
6559 
6560   /* We guarantee alignment for vectors up to 128-bits.  */
6561   if (tree_int_cst_compare (TYPE_SIZE (type),
6562 			    bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6563     return false;
6564 
6565   /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned.  */
6566   return true;
6567 }
6568 
6569 /* If VALS is a vector constant that can be loaded into a register
6570    using DUP, generate instructions to do so and return an RTX to
6571    assign to the register.  Otherwise return NULL_RTX.  */
6572 static rtx
aarch64_simd_dup_constant(rtx vals)6573 aarch64_simd_dup_constant (rtx vals)
6574 {
6575   enum machine_mode mode = GET_MODE (vals);
6576   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6577   int n_elts = GET_MODE_NUNITS (mode);
6578   bool all_same = true;
6579   rtx x;
6580   int i;
6581 
6582   if (GET_CODE (vals) != CONST_VECTOR)
6583     return NULL_RTX;
6584 
6585   for (i = 1; i < n_elts; ++i)
6586     {
6587       x = CONST_VECTOR_ELT (vals, i);
6588       if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6589 	all_same = false;
6590     }
6591 
6592   if (!all_same)
6593     return NULL_RTX;
6594 
6595   /* We can load this constant by using DUP and a constant in a
6596      single ARM register.  This will be cheaper than a vector
6597      load.  */
6598   x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6599   return gen_rtx_VEC_DUPLICATE (mode, x);
6600 }
6601 
6602 
6603 /* Generate code to load VALS, which is a PARALLEL containing only
6604    constants (for vec_init) or CONST_VECTOR, efficiently into a
6605    register.  Returns an RTX to copy into the register, or NULL_RTX
6606    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
6607 static rtx
aarch64_simd_make_constant(rtx vals)6608 aarch64_simd_make_constant (rtx vals)
6609 {
6610   enum machine_mode mode = GET_MODE (vals);
6611   rtx const_dup;
6612   rtx const_vec = NULL_RTX;
6613   int n_elts = GET_MODE_NUNITS (mode);
6614   int n_const = 0;
6615   int i;
6616 
6617   if (GET_CODE (vals) == CONST_VECTOR)
6618     const_vec = vals;
6619   else if (GET_CODE (vals) == PARALLEL)
6620     {
6621       /* A CONST_VECTOR must contain only CONST_INTs and
6622 	 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6623 	 Only store valid constants in a CONST_VECTOR.  */
6624       for (i = 0; i < n_elts; ++i)
6625 	{
6626 	  rtx x = XVECEXP (vals, 0, i);
6627 	  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6628 	    n_const++;
6629 	}
6630       if (n_const == n_elts)
6631 	const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6632     }
6633   else
6634     gcc_unreachable ();
6635 
6636   if (const_vec != NULL_RTX
6637       && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
6638 						NULL, NULL, NULL))
6639     /* Load using MOVI/MVNI.  */
6640     return const_vec;
6641   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6642     /* Loaded using DUP.  */
6643     return const_dup;
6644   else if (const_vec != NULL_RTX)
6645     /* Load from constant pool. We can not take advantage of single-cycle
6646        LD1 because we need a PC-relative addressing mode.  */
6647     return const_vec;
6648   else
6649     /* A PARALLEL containing something not valid inside CONST_VECTOR.
6650        We can not construct an initializer.  */
6651     return NULL_RTX;
6652 }
6653 
6654 void
aarch64_expand_vector_init(rtx target,rtx vals)6655 aarch64_expand_vector_init (rtx target, rtx vals)
6656 {
6657   enum machine_mode mode = GET_MODE (target);
6658   enum machine_mode inner_mode = GET_MODE_INNER (mode);
6659   int n_elts = GET_MODE_NUNITS (mode);
6660   int n_var = 0, one_var = -1;
6661   bool all_same = true;
6662   rtx x, mem;
6663   int i;
6664 
6665   x = XVECEXP (vals, 0, 0);
6666   if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6667     n_var = 1, one_var = 0;
6668 
6669   for (i = 1; i < n_elts; ++i)
6670     {
6671       x = XVECEXP (vals, 0, i);
6672       if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6673 	++n_var, one_var = i;
6674 
6675       if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6676 	all_same = false;
6677     }
6678 
6679   if (n_var == 0)
6680     {
6681       rtx constant = aarch64_simd_make_constant (vals);
6682       if (constant != NULL_RTX)
6683 	{
6684 	  emit_move_insn (target, constant);
6685 	  return;
6686 	}
6687     }
6688 
6689   /* Splat a single non-constant element if we can.  */
6690   if (all_same)
6691     {
6692       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6693       aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6694       return;
6695     }
6696 
6697   /* One field is non-constant.  Load constant then overwrite varying
6698      field.  This is more efficient than using the stack.  */
6699   if (n_var == 1)
6700     {
6701       rtx copy = copy_rtx (vals);
6702       rtx index = GEN_INT (one_var);
6703       enum insn_code icode;
6704 
6705       /* Load constant part of vector, substitute neighboring value for
6706 	 varying element.  */
6707       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6708       aarch64_expand_vector_init (target, copy);
6709 
6710       /* Insert variable.  */
6711       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6712       icode = optab_handler (vec_set_optab, mode);
6713       gcc_assert (icode != CODE_FOR_nothing);
6714       emit_insn (GEN_FCN (icode) (target, x, index));
6715       return;
6716     }
6717 
6718   /* Construct the vector in memory one field at a time
6719      and load the whole vector.  */
6720   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6721   for (i = 0; i < n_elts; i++)
6722     emit_move_insn (adjust_address_nv (mem, inner_mode,
6723 				    i * GET_MODE_SIZE (inner_mode)),
6724 		    XVECEXP (vals, 0, i));
6725   emit_move_insn (target, mem);
6726 
6727 }
6728 
6729 static unsigned HOST_WIDE_INT
aarch64_shift_truncation_mask(enum machine_mode mode)6730 aarch64_shift_truncation_mask (enum machine_mode mode)
6731 {
6732   return
6733     (aarch64_vector_mode_supported_p (mode)
6734      || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6735 }
6736 
6737 #ifndef TLS_SECTION_ASM_FLAG
6738 #define TLS_SECTION_ASM_FLAG 'T'
6739 #endif
6740 
6741 void
aarch64_elf_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)6742 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6743 			       tree decl ATTRIBUTE_UNUSED)
6744 {
6745   char flagchars[10], *f = flagchars;
6746 
6747   /* If we have already declared this section, we can use an
6748      abbreviated form to switch back to it -- unless this section is
6749      part of a COMDAT groups, in which case GAS requires the full
6750      declaration every time.  */
6751   if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6752       && (flags & SECTION_DECLARED))
6753     {
6754       fprintf (asm_out_file, "\t.section\t%s\n", name);
6755       return;
6756     }
6757 
6758   if (!(flags & SECTION_DEBUG))
6759     *f++ = 'a';
6760   if (flags & SECTION_WRITE)
6761     *f++ = 'w';
6762   if (flags & SECTION_CODE)
6763     *f++ = 'x';
6764   if (flags & SECTION_SMALL)
6765     *f++ = 's';
6766   if (flags & SECTION_MERGE)
6767     *f++ = 'M';
6768   if (flags & SECTION_STRINGS)
6769     *f++ = 'S';
6770   if (flags & SECTION_TLS)
6771     *f++ = TLS_SECTION_ASM_FLAG;
6772   if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6773     *f++ = 'G';
6774   *f = '\0';
6775 
6776   fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6777 
6778   if (!(flags & SECTION_NOTYPE))
6779     {
6780       const char *type;
6781       const char *format;
6782 
6783       if (flags & SECTION_BSS)
6784 	type = "nobits";
6785       else
6786 	type = "progbits";
6787 
6788 #ifdef TYPE_OPERAND_FMT
6789       format = "," TYPE_OPERAND_FMT;
6790 #else
6791       format = ",@%s";
6792 #endif
6793 
6794       fprintf (asm_out_file, format, type);
6795 
6796       if (flags & SECTION_ENTSIZE)
6797 	fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6798       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6799 	{
6800 	  if (TREE_CODE (decl) == IDENTIFIER_NODE)
6801 	    fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6802 	  else
6803 	    fprintf (asm_out_file, ",%s,comdat",
6804 		     IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6805 	}
6806     }
6807 
6808   putc ('\n', asm_out_file);
6809 }
6810 
6811 /* Select a format to encode pointers in exception handling data.  */
6812 int
aarch64_asm_preferred_eh_data_format(int code ATTRIBUTE_UNUSED,int global)6813 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6814 {
6815    int type;
6816    switch (aarch64_cmodel)
6817      {
6818      case AARCH64_CMODEL_TINY:
6819      case AARCH64_CMODEL_TINY_PIC:
6820      case AARCH64_CMODEL_SMALL:
6821      case AARCH64_CMODEL_SMALL_PIC:
6822        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
6823 	  for everything.  */
6824        type = DW_EH_PE_sdata4;
6825        break;
6826      default:
6827        /* No assumptions here.  8-byte relocs required.  */
6828        type = DW_EH_PE_sdata8;
6829        break;
6830      }
6831    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6832 }
6833 
6834 /* Emit load exclusive.  */
6835 
6836 static void
aarch64_emit_load_exclusive(enum machine_mode mode,rtx rval,rtx mem,rtx model_rtx)6837 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6838 			     rtx mem, rtx model_rtx)
6839 {
6840   rtx (*gen) (rtx, rtx, rtx);
6841 
6842   switch (mode)
6843     {
6844     case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6845     case HImode: gen = gen_aarch64_load_exclusivehi; break;
6846     case SImode: gen = gen_aarch64_load_exclusivesi; break;
6847     case DImode: gen = gen_aarch64_load_exclusivedi; break;
6848     default:
6849       gcc_unreachable ();
6850     }
6851 
6852   emit_insn (gen (rval, mem, model_rtx));
6853 }
6854 
6855 /* Emit store exclusive.  */
6856 
6857 static void
aarch64_emit_store_exclusive(enum machine_mode mode,rtx bval,rtx rval,rtx mem,rtx model_rtx)6858 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
6859 			      rtx rval, rtx mem, rtx model_rtx)
6860 {
6861   rtx (*gen) (rtx, rtx, rtx, rtx);
6862 
6863   switch (mode)
6864     {
6865     case QImode: gen = gen_aarch64_store_exclusiveqi; break;
6866     case HImode: gen = gen_aarch64_store_exclusivehi; break;
6867     case SImode: gen = gen_aarch64_store_exclusivesi; break;
6868     case DImode: gen = gen_aarch64_store_exclusivedi; break;
6869     default:
6870       gcc_unreachable ();
6871     }
6872 
6873   emit_insn (gen (bval, rval, mem, model_rtx));
6874 }
6875 
6876 /* Mark the previous jump instruction as unlikely.  */
6877 
6878 static void
aarch64_emit_unlikely_jump(rtx insn)6879 aarch64_emit_unlikely_jump (rtx insn)
6880 {
6881   rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
6882 
6883   insn = emit_jump_insn (insn);
6884   add_reg_note (insn, REG_BR_PROB, very_unlikely);
6885 }
6886 
6887 /* Expand a compare and swap pattern.  */
6888 
6889 void
aarch64_expand_compare_and_swap(rtx operands[])6890 aarch64_expand_compare_and_swap (rtx operands[])
6891 {
6892   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
6893   enum machine_mode mode, cmp_mode;
6894   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
6895 
6896   bval = operands[0];
6897   rval = operands[1];
6898   mem = operands[2];
6899   oldval = operands[3];
6900   newval = operands[4];
6901   is_weak = operands[5];
6902   mod_s = operands[6];
6903   mod_f = operands[7];
6904   mode = GET_MODE (mem);
6905   cmp_mode = mode;
6906 
6907   /* Normally the succ memory model must be stronger than fail, but in the
6908      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6909      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
6910 
6911   if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
6912       && INTVAL (mod_s) == MEMMODEL_RELEASE)
6913     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
6914 
6915   switch (mode)
6916     {
6917     case QImode:
6918     case HImode:
6919       /* For short modes, we're going to perform the comparison in SImode,
6920 	 so do the zero-extension now.  */
6921       cmp_mode = SImode;
6922       rval = gen_reg_rtx (SImode);
6923       oldval = convert_modes (SImode, mode, oldval, true);
6924       /* Fall through.  */
6925 
6926     case SImode:
6927     case DImode:
6928       /* Force the value into a register if needed.  */
6929       if (!aarch64_plus_operand (oldval, mode))
6930 	oldval = force_reg (cmp_mode, oldval);
6931       break;
6932 
6933     default:
6934       gcc_unreachable ();
6935     }
6936 
6937   switch (mode)
6938     {
6939     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
6940     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
6941     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
6942     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
6943     default:
6944       gcc_unreachable ();
6945     }
6946 
6947   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
6948 
6949   if (mode == QImode || mode == HImode)
6950     emit_move_insn (operands[1], gen_lowpart (mode, rval));
6951 
6952   x = gen_rtx_REG (CCmode, CC_REGNUM);
6953   x = gen_rtx_EQ (SImode, x, const0_rtx);
6954   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
6955 }
6956 
6957 /* Split a compare and swap pattern.  */
6958 
6959 void
aarch64_split_compare_and_swap(rtx operands[])6960 aarch64_split_compare_and_swap (rtx operands[])
6961 {
6962   rtx rval, mem, oldval, newval, scratch;
6963   enum machine_mode mode;
6964   bool is_weak;
6965   rtx label1, label2, x, cond;
6966 
6967   rval = operands[0];
6968   mem = operands[1];
6969   oldval = operands[2];
6970   newval = operands[3];
6971   is_weak = (operands[4] != const0_rtx);
6972   scratch = operands[7];
6973   mode = GET_MODE (mem);
6974 
6975   label1 = NULL_RTX;
6976   if (!is_weak)
6977     {
6978       label1 = gen_label_rtx ();
6979       emit_label (label1);
6980     }
6981   label2 = gen_label_rtx ();
6982 
6983   aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
6984 
6985   cond = aarch64_gen_compare_reg (NE, rval, oldval);
6986   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
6987   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6988 			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
6989   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6990 
6991   aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
6992 
6993   if (!is_weak)
6994     {
6995       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
6996       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6997 				gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
6998       aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6999     }
7000   else
7001     {
7002       cond = gen_rtx_REG (CCmode, CC_REGNUM);
7003       x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7004       emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7005     }
7006 
7007   emit_label (label2);
7008 }
7009 
7010 /* Split an atomic operation.  */
7011 
7012 void
aarch64_split_atomic_op(enum rtx_code code,rtx old_out,rtx new_out,rtx mem,rtx value,rtx model_rtx,rtx cond)7013 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7014 		     rtx value, rtx model_rtx, rtx cond)
7015 {
7016   enum machine_mode mode = GET_MODE (mem);
7017   enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7018   rtx label, x;
7019 
7020   label = gen_label_rtx ();
7021   emit_label (label);
7022 
7023   if (new_out)
7024     new_out = gen_lowpart (wmode, new_out);
7025   if (old_out)
7026     old_out = gen_lowpart (wmode, old_out);
7027   else
7028     old_out = new_out;
7029   value = simplify_gen_subreg (wmode, value, mode, 0);
7030 
7031   aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7032 
7033   switch (code)
7034     {
7035     case SET:
7036       new_out = value;
7037       break;
7038 
7039     case NOT:
7040       x = gen_rtx_AND (wmode, old_out, value);
7041       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7042       x = gen_rtx_NOT (wmode, new_out);
7043       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7044       break;
7045 
7046     case MINUS:
7047       if (CONST_INT_P (value))
7048 	{
7049 	  value = GEN_INT (-INTVAL (value));
7050 	  code = PLUS;
7051 	}
7052       /* Fall through.  */
7053 
7054     default:
7055       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7056       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7057       break;
7058     }
7059 
7060   aarch64_emit_store_exclusive (mode, cond, mem,
7061 				gen_lowpart (mode, new_out), model_rtx);
7062 
7063   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7064   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7065 			    gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7066   aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7067 }
7068 
7069 static void
aarch64_print_extension(void)7070 aarch64_print_extension (void)
7071 {
7072   const struct aarch64_option_extension *opt = NULL;
7073 
7074   for (opt = all_extensions; opt->name != NULL; opt++)
7075     if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7076       asm_fprintf (asm_out_file, "+%s", opt->name);
7077 
7078   asm_fprintf (asm_out_file, "\n");
7079 }
7080 
7081 static void
aarch64_start_file(void)7082 aarch64_start_file (void)
7083 {
7084   if (selected_arch)
7085     {
7086       asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7087       aarch64_print_extension ();
7088     }
7089   else if (selected_cpu)
7090     {
7091       asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7092       aarch64_print_extension ();
7093     }
7094   default_file_start();
7095 }
7096 
7097 /* Target hook for c_mode_for_suffix.  */
7098 static enum machine_mode
aarch64_c_mode_for_suffix(char suffix)7099 aarch64_c_mode_for_suffix (char suffix)
7100 {
7101   if (suffix == 'q')
7102     return TFmode;
7103 
7104   return VOIDmode;
7105 }
7106 
7107 /* We can only represent floating point constants which will fit in
7108    "quarter-precision" values.  These values are characterised by
7109    a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
7110    by:
7111 
7112    (-1)^s * (n/16) * 2^r
7113 
7114    Where:
7115      's' is the sign bit.
7116      'n' is an integer in the range 16 <= n <= 31.
7117      'r' is an integer in the range -3 <= r <= 4.  */
7118 
7119 /* Return true iff X can be represented by a quarter-precision
7120    floating point immediate operand X.  Note, we cannot represent 0.0.  */
7121 bool
aarch64_float_const_representable_p(rtx x)7122 aarch64_float_const_representable_p (rtx x)
7123 {
7124   /* This represents our current view of how many bits
7125      make up the mantissa.  */
7126   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7127   int exponent;
7128   unsigned HOST_WIDE_INT mantissa, mask;
7129   HOST_WIDE_INT m1, m2;
7130   REAL_VALUE_TYPE r, m;
7131 
7132   if (!CONST_DOUBLE_P (x))
7133     return false;
7134 
7135   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7136 
7137   /* We cannot represent infinities, NaNs or +/-zero.  We won't
7138      know if we have +zero until we analyse the mantissa, but we
7139      can reject the other invalid values.  */
7140   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7141       || REAL_VALUE_MINUS_ZERO (r))
7142     return false;
7143 
7144   /* Extract exponent.  */
7145   r = real_value_abs (&r);
7146   exponent = REAL_EXP (&r);
7147 
7148   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7149      highest (sign) bit, with a fixed binary point at bit point_pos.
7150      m1 holds the low part of the mantissa, m2 the high part.
7151      WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7152      bits for the mantissa, this can fail (low bits will be lost).  */
7153   real_ldexp (&m, &r, point_pos - exponent);
7154   REAL_VALUE_TO_INT (&m1, &m2, m);
7155 
7156   /* If the low part of the mantissa has bits set we cannot represent
7157      the value.  */
7158   if (m1 != 0)
7159     return false;
7160   /* We have rejected the lower HOST_WIDE_INT, so update our
7161      understanding of how many bits lie in the mantissa and
7162      look only at the high HOST_WIDE_INT.  */
7163   mantissa = m2;
7164   point_pos -= HOST_BITS_PER_WIDE_INT;
7165 
7166   /* We can only represent values with a mantissa of the form 1.xxxx.  */
7167   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7168   if ((mantissa & mask) != 0)
7169     return false;
7170 
7171   /* Having filtered unrepresentable values, we may now remove all
7172      but the highest 5 bits.  */
7173   mantissa >>= point_pos - 5;
7174 
7175   /* We cannot represent the value 0.0, so reject it.  This is handled
7176      elsewhere.  */
7177   if (mantissa == 0)
7178     return false;
7179 
7180   /* Then, as bit 4 is always set, we can mask it off, leaving
7181      the mantissa in the range [0, 15].  */
7182   mantissa &= ~(1 << 4);
7183   gcc_assert (mantissa <= 15);
7184 
7185   /* GCC internally does not use IEEE754-like encoding (where normalized
7186      significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
7187      Our mantissa values are shifted 4 places to the left relative to
7188      normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7189      by 5 places to correct for GCC's representation.  */
7190   exponent = 5 - exponent;
7191 
7192   return (exponent >= 0 && exponent <= 7);
7193 }
7194 
7195 char*
aarch64_output_simd_mov_immediate(rtx * const_vector,enum machine_mode mode,unsigned width)7196 aarch64_output_simd_mov_immediate (rtx *const_vector,
7197 				   enum machine_mode mode,
7198 				   unsigned width)
7199 {
7200   int is_valid;
7201   unsigned char widthc;
7202   int lane_width_bits;
7203   static char templ[40];
7204   int shift = 0, mvn = 0;
7205   const char *mnemonic;
7206   unsigned int lane_count = 0;
7207 
7208   is_valid =
7209     aarch64_simd_immediate_valid_for_move (*const_vector, mode,
7210 					   const_vector, &lane_width_bits,
7211 					   &widthc, &mvn, &shift);
7212   gcc_assert (is_valid);
7213 
7214   mode = GET_MODE_INNER (mode);
7215   if (mode == SFmode || mode == DFmode)
7216     {
7217       bool zero_p =
7218 	aarch64_float_const_zero_rtx_p (*const_vector);
7219       gcc_assert (shift == 0);
7220       mnemonic = zero_p ? "movi" : "fmov";
7221     }
7222   else
7223     mnemonic = mvn ? "mvni" : "movi";
7224 
7225   gcc_assert (lane_width_bits != 0);
7226   lane_count = width / lane_width_bits;
7227 
7228   if (lane_count == 1)
7229     snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
7230   else if (shift)
7231     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
7232 	      mnemonic, lane_count, widthc, shift);
7233   else
7234     snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
7235 	      mnemonic, lane_count, widthc);
7236   return templ;
7237 }
7238 
7239 /* Split operands into moves from op[1] + op[2] into op[0].  */
7240 
7241 void
aarch64_split_combinev16qi(rtx operands[3])7242 aarch64_split_combinev16qi (rtx operands[3])
7243 {
7244   unsigned int dest = REGNO (operands[0]);
7245   unsigned int src1 = REGNO (operands[1]);
7246   unsigned int src2 = REGNO (operands[2]);
7247   enum machine_mode halfmode = GET_MODE (operands[1]);
7248   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7249   rtx destlo, desthi;
7250 
7251   gcc_assert (halfmode == V16QImode);
7252 
7253   if (src1 == dest && src2 == dest + halfregs)
7254     {
7255       /* No-op move.  Can't split to nothing; emit something.  */
7256       emit_note (NOTE_INSN_DELETED);
7257       return;
7258     }
7259 
7260   /* Preserve register attributes for variable tracking.  */
7261   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7262   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7263 			       GET_MODE_SIZE (halfmode));
7264 
7265   /* Special case of reversed high/low parts.  */
7266   if (reg_overlap_mentioned_p (operands[2], destlo)
7267       && reg_overlap_mentioned_p (operands[1], desthi))
7268     {
7269       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7270       emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7271       emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7272     }
7273   else if (!reg_overlap_mentioned_p (operands[2], destlo))
7274     {
7275       /* Try to avoid unnecessary moves if part of the result
7276 	 is in the right place already.  */
7277       if (src1 != dest)
7278 	emit_move_insn (destlo, operands[1]);
7279       if (src2 != dest + halfregs)
7280 	emit_move_insn (desthi, operands[2]);
7281     }
7282   else
7283     {
7284       if (src2 != dest + halfregs)
7285 	emit_move_insn (desthi, operands[2]);
7286       if (src1 != dest)
7287 	emit_move_insn (destlo, operands[1]);
7288     }
7289 }
7290 
7291 /* vec_perm support.  */
7292 
7293 #define MAX_VECT_LEN 16
7294 
7295 struct expand_vec_perm_d
7296 {
7297   rtx target, op0, op1;
7298   unsigned char perm[MAX_VECT_LEN];
7299   enum machine_mode vmode;
7300   unsigned char nelt;
7301   bool one_vector_p;
7302   bool testing_p;
7303 };
7304 
7305 /* Generate a variable permutation.  */
7306 
7307 static void
aarch64_expand_vec_perm_1(rtx target,rtx op0,rtx op1,rtx sel)7308 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7309 {
7310   enum machine_mode vmode = GET_MODE (target);
7311   bool one_vector_p = rtx_equal_p (op0, op1);
7312 
7313   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7314   gcc_checking_assert (GET_MODE (op0) == vmode);
7315   gcc_checking_assert (GET_MODE (op1) == vmode);
7316   gcc_checking_assert (GET_MODE (sel) == vmode);
7317   gcc_checking_assert (TARGET_SIMD);
7318 
7319   if (one_vector_p)
7320     {
7321       if (vmode == V8QImode)
7322 	{
7323 	  /* Expand the argument to a V16QI mode by duplicating it.  */
7324 	  rtx pair = gen_reg_rtx (V16QImode);
7325 	  emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7326 	  emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7327 	}
7328       else
7329 	{
7330 	  emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7331 	}
7332     }
7333   else
7334     {
7335       rtx pair;
7336 
7337       if (vmode == V8QImode)
7338 	{
7339 	  pair = gen_reg_rtx (V16QImode);
7340 	  emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7341 	  emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7342 	}
7343       else
7344 	{
7345 	  pair = gen_reg_rtx (OImode);
7346 	  emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7347 	  emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7348 	}
7349     }
7350 }
7351 
7352 void
aarch64_expand_vec_perm(rtx target,rtx op0,rtx op1,rtx sel)7353 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7354 {
7355   enum machine_mode vmode = GET_MODE (target);
7356   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7357   bool one_vector_p = rtx_equal_p (op0, op1);
7358   rtx rmask[MAX_VECT_LEN], mask;
7359 
7360   gcc_checking_assert (!BYTES_BIG_ENDIAN);
7361 
7362   /* The TBL instruction does not use a modulo index, so we must take care
7363      of that ourselves.  */
7364   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7365   for (i = 0; i < nelt; ++i)
7366     rmask[i] = mask;
7367   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7368   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7369 
7370   aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7371 }
7372 
7373 /* Recognize patterns suitable for the TRN instructions.  */
7374 static bool
aarch64_evpc_trn(struct expand_vec_perm_d * d)7375 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7376 {
7377   unsigned int i, odd, mask, nelt = d->nelt;
7378   rtx out, in0, in1, x;
7379   rtx (*gen) (rtx, rtx, rtx);
7380   enum machine_mode vmode = d->vmode;
7381 
7382   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7383     return false;
7384 
7385   /* Note that these are little-endian tests.
7386      We correct for big-endian later.  */
7387   if (d->perm[0] == 0)
7388     odd = 0;
7389   else if (d->perm[0] == 1)
7390     odd = 1;
7391   else
7392     return false;
7393   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7394 
7395   for (i = 0; i < nelt; i += 2)
7396     {
7397       if (d->perm[i] != i + odd)
7398 	return false;
7399       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7400 	return false;
7401     }
7402 
7403   /* Success!  */
7404   if (d->testing_p)
7405     return true;
7406 
7407   in0 = d->op0;
7408   in1 = d->op1;
7409   if (BYTES_BIG_ENDIAN)
7410     {
7411       x = in0, in0 = in1, in1 = x;
7412       odd = !odd;
7413     }
7414   out = d->target;
7415 
7416   if (odd)
7417     {
7418       switch (vmode)
7419 	{
7420 	case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7421 	case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7422 	case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7423 	case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7424 	case V4SImode: gen = gen_aarch64_trn2v4si; break;
7425 	case V2SImode: gen = gen_aarch64_trn2v2si; break;
7426 	case V2DImode: gen = gen_aarch64_trn2v2di; break;
7427 	case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7428 	case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7429 	case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7430 	default:
7431 	  return false;
7432 	}
7433     }
7434   else
7435     {
7436       switch (vmode)
7437 	{
7438 	case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7439 	case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7440 	case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7441 	case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7442 	case V4SImode: gen = gen_aarch64_trn1v4si; break;
7443 	case V2SImode: gen = gen_aarch64_trn1v2si; break;
7444 	case V2DImode: gen = gen_aarch64_trn1v2di; break;
7445 	case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7446 	case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7447 	case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7448 	default:
7449 	  return false;
7450 	}
7451     }
7452 
7453   emit_insn (gen (out, in0, in1));
7454   return true;
7455 }
7456 
7457 /* Recognize patterns suitable for the UZP instructions.  */
7458 static bool
aarch64_evpc_uzp(struct expand_vec_perm_d * d)7459 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7460 {
7461   unsigned int i, odd, mask, nelt = d->nelt;
7462   rtx out, in0, in1, x;
7463   rtx (*gen) (rtx, rtx, rtx);
7464   enum machine_mode vmode = d->vmode;
7465 
7466   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7467     return false;
7468 
7469   /* Note that these are little-endian tests.
7470      We correct for big-endian later.  */
7471   if (d->perm[0] == 0)
7472     odd = 0;
7473   else if (d->perm[0] == 1)
7474     odd = 1;
7475   else
7476     return false;
7477   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7478 
7479   for (i = 0; i < nelt; i++)
7480     {
7481       unsigned elt = (i * 2 + odd) & mask;
7482       if (d->perm[i] != elt)
7483 	return false;
7484     }
7485 
7486   /* Success!  */
7487   if (d->testing_p)
7488     return true;
7489 
7490   in0 = d->op0;
7491   in1 = d->op1;
7492   if (BYTES_BIG_ENDIAN)
7493     {
7494       x = in0, in0 = in1, in1 = x;
7495       odd = !odd;
7496     }
7497   out = d->target;
7498 
7499   if (odd)
7500     {
7501       switch (vmode)
7502 	{
7503 	case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7504 	case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7505 	case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7506 	case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7507 	case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7508 	case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7509 	case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7510 	case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7511 	case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7512 	case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7513 	default:
7514 	  return false;
7515 	}
7516     }
7517   else
7518     {
7519       switch (vmode)
7520 	{
7521 	case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7522 	case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7523 	case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7524 	case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7525 	case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7526 	case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7527 	case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7528 	case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7529 	case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7530 	case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7531 	default:
7532 	  return false;
7533 	}
7534     }
7535 
7536   emit_insn (gen (out, in0, in1));
7537   return true;
7538 }
7539 
7540 /* Recognize patterns suitable for the ZIP instructions.  */
7541 static bool
aarch64_evpc_zip(struct expand_vec_perm_d * d)7542 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7543 {
7544   unsigned int i, high, mask, nelt = d->nelt;
7545   rtx out, in0, in1, x;
7546   rtx (*gen) (rtx, rtx, rtx);
7547   enum machine_mode vmode = d->vmode;
7548 
7549   if (GET_MODE_UNIT_SIZE (vmode) > 8)
7550     return false;
7551 
7552   /* Note that these are little-endian tests.
7553      We correct for big-endian later.  */
7554   high = nelt / 2;
7555   if (d->perm[0] == high)
7556     /* Do Nothing.  */
7557     ;
7558   else if (d->perm[0] == 0)
7559     high = 0;
7560   else
7561     return false;
7562   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7563 
7564   for (i = 0; i < nelt / 2; i++)
7565     {
7566       unsigned elt = (i + high) & mask;
7567       if (d->perm[i * 2] != elt)
7568 	return false;
7569       elt = (elt + nelt) & mask;
7570       if (d->perm[i * 2 + 1] != elt)
7571 	return false;
7572     }
7573 
7574   /* Success!  */
7575   if (d->testing_p)
7576     return true;
7577 
7578   in0 = d->op0;
7579   in1 = d->op1;
7580   if (BYTES_BIG_ENDIAN)
7581     {
7582       x = in0, in0 = in1, in1 = x;
7583       high = !high;
7584     }
7585   out = d->target;
7586 
7587   if (high)
7588     {
7589       switch (vmode)
7590 	{
7591 	case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7592 	case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7593 	case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7594 	case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7595 	case V4SImode: gen = gen_aarch64_zip2v4si; break;
7596 	case V2SImode: gen = gen_aarch64_zip2v2si; break;
7597 	case V2DImode: gen = gen_aarch64_zip2v2di; break;
7598 	case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7599 	case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7600 	case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7601 	default:
7602 	  return false;
7603 	}
7604     }
7605   else
7606     {
7607       switch (vmode)
7608 	{
7609 	case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7610 	case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7611 	case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7612 	case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7613 	case V4SImode: gen = gen_aarch64_zip1v4si; break;
7614 	case V2SImode: gen = gen_aarch64_zip1v2si; break;
7615 	case V2DImode: gen = gen_aarch64_zip1v2di; break;
7616 	case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7617 	case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7618 	case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7619 	default:
7620 	  return false;
7621 	}
7622     }
7623 
7624   emit_insn (gen (out, in0, in1));
7625   return true;
7626 }
7627 
7628 static bool
aarch64_evpc_tbl(struct expand_vec_perm_d * d)7629 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7630 {
7631   rtx rperm[MAX_VECT_LEN], sel;
7632   enum machine_mode vmode = d->vmode;
7633   unsigned int i, nelt = d->nelt;
7634 
7635   /* TODO: ARM's TBL indexing is little-endian.  In order to handle GCC's
7636      numbering of elements for big-endian, we must reverse the order.  */
7637   if (BYTES_BIG_ENDIAN)
7638     return false;
7639 
7640   if (d->testing_p)
7641     return true;
7642 
7643   /* Generic code will try constant permutation twice.  Once with the
7644      original mode and again with the elements lowered to QImode.
7645      So wait and don't do the selector expansion ourselves.  */
7646   if (vmode != V8QImode && vmode != V16QImode)
7647     return false;
7648 
7649   for (i = 0; i < nelt; ++i)
7650     rperm[i] = GEN_INT (d->perm[i]);
7651   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7652   sel = force_reg (vmode, sel);
7653 
7654   aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7655   return true;
7656 }
7657 
7658 static bool
aarch64_expand_vec_perm_const_1(struct expand_vec_perm_d * d)7659 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7660 {
7661   /* The pattern matching functions above are written to look for a small
7662      number to begin the sequence (0, 1, N/2).  If we begin with an index
7663      from the second operand, we can swap the operands.  */
7664   if (d->perm[0] >= d->nelt)
7665     {
7666       unsigned i, nelt = d->nelt;
7667       rtx x;
7668 
7669       for (i = 0; i < nelt; ++i)
7670 	d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7671 
7672       x = d->op0;
7673       d->op0 = d->op1;
7674       d->op1 = x;
7675     }
7676 
7677   if (TARGET_SIMD)
7678     {
7679       if (aarch64_evpc_zip (d))
7680 	return true;
7681       else if (aarch64_evpc_uzp (d))
7682 	return true;
7683       else if (aarch64_evpc_trn (d))
7684 	return true;
7685       return aarch64_evpc_tbl (d);
7686     }
7687   return false;
7688 }
7689 
7690 /* Expand a vec_perm_const pattern.  */
7691 
7692 bool
aarch64_expand_vec_perm_const(rtx target,rtx op0,rtx op1,rtx sel)7693 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7694 {
7695   struct expand_vec_perm_d d;
7696   int i, nelt, which;
7697 
7698   d.target = target;
7699   d.op0 = op0;
7700   d.op1 = op1;
7701 
7702   d.vmode = GET_MODE (target);
7703   gcc_assert (VECTOR_MODE_P (d.vmode));
7704   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7705   d.testing_p = false;
7706 
7707   for (i = which = 0; i < nelt; ++i)
7708     {
7709       rtx e = XVECEXP (sel, 0, i);
7710       int ei = INTVAL (e) & (2 * nelt - 1);
7711       which |= (ei < nelt ? 1 : 2);
7712       d.perm[i] = ei;
7713     }
7714 
7715   switch (which)
7716     {
7717     default:
7718       gcc_unreachable ();
7719 
7720     case 3:
7721       d.one_vector_p = false;
7722       if (!rtx_equal_p (op0, op1))
7723 	break;
7724 
7725       /* The elements of PERM do not suggest that only the first operand
7726 	 is used, but both operands are identical.  Allow easier matching
7727 	 of the permutation by folding the permutation into the single
7728 	 input vector.  */
7729       /* Fall Through.  */
7730     case 2:
7731       for (i = 0; i < nelt; ++i)
7732 	d.perm[i] &= nelt - 1;
7733       d.op0 = op1;
7734       d.one_vector_p = true;
7735       break;
7736 
7737     case 1:
7738       d.op1 = op0;
7739       d.one_vector_p = true;
7740       break;
7741     }
7742 
7743   return aarch64_expand_vec_perm_const_1 (&d);
7744 }
7745 
7746 static bool
aarch64_vectorize_vec_perm_const_ok(enum machine_mode vmode,const unsigned char * sel)7747 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7748 				     const unsigned char *sel)
7749 {
7750   struct expand_vec_perm_d d;
7751   unsigned int i, nelt, which;
7752   bool ret;
7753 
7754   d.vmode = vmode;
7755   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7756   d.testing_p = true;
7757   memcpy (d.perm, sel, nelt);
7758 
7759   /* Calculate whether all elements are in one vector.  */
7760   for (i = which = 0; i < nelt; ++i)
7761     {
7762       unsigned char e = d.perm[i];
7763       gcc_assert (e < 2 * nelt);
7764       which |= (e < nelt ? 1 : 2);
7765     }
7766 
7767   /* If all elements are from the second vector, reindex as if from the
7768      first vector.  */
7769   if (which == 2)
7770     for (i = 0; i < nelt; ++i)
7771       d.perm[i] -= nelt;
7772 
7773   /* Check whether the mask can be applied to a single vector.  */
7774   d.one_vector_p = (which != 3);
7775 
7776   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7777   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7778   if (!d.one_vector_p)
7779     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7780 
7781   start_sequence ();
7782   ret = aarch64_expand_vec_perm_const_1 (&d);
7783   end_sequence ();
7784 
7785   return ret;
7786 }
7787 
7788 #undef TARGET_ADDRESS_COST
7789 #define TARGET_ADDRESS_COST aarch64_address_cost
7790 
7791 /* This hook will determines whether unnamed bitfields affect the alignment
7792    of the containing structure.  The hook returns true if the structure
7793    should inherit the alignment requirements of an unnamed bitfield's
7794    type.  */
7795 #undef TARGET_ALIGN_ANON_BITFIELD
7796 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7797 
7798 #undef TARGET_ASM_ALIGNED_DI_OP
7799 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7800 
7801 #undef TARGET_ASM_ALIGNED_HI_OP
7802 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7803 
7804 #undef TARGET_ASM_ALIGNED_SI_OP
7805 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7806 
7807 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7808 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7809   hook_bool_const_tree_hwi_hwi_const_tree_true
7810 
7811 #undef TARGET_ASM_FILE_START
7812 #define TARGET_ASM_FILE_START aarch64_start_file
7813 
7814 #undef TARGET_ASM_OUTPUT_MI_THUNK
7815 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7816 
7817 #undef TARGET_ASM_SELECT_RTX_SECTION
7818 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7819 
7820 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7821 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7822 
7823 #undef TARGET_BUILD_BUILTIN_VA_LIST
7824 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7825 
7826 #undef TARGET_CALLEE_COPIES
7827 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7828 
7829 #undef TARGET_CAN_ELIMINATE
7830 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7831 
7832 #undef TARGET_CANNOT_FORCE_CONST_MEM
7833 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7834 
7835 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7836 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7837 
7838 /* Only the least significant bit is used for initialization guard
7839    variables.  */
7840 #undef TARGET_CXX_GUARD_MASK_BIT
7841 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7842 
7843 #undef TARGET_C_MODE_FOR_SUFFIX
7844 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7845 
7846 #ifdef TARGET_BIG_ENDIAN_DEFAULT
7847 #undef  TARGET_DEFAULT_TARGET_FLAGS
7848 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7849 #endif
7850 
7851 #undef TARGET_CLASS_MAX_NREGS
7852 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7853 
7854 #undef TARGET_BUILTIN_DECL
7855 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
7856 
7857 #undef  TARGET_EXPAND_BUILTIN
7858 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
7859 
7860 #undef TARGET_EXPAND_BUILTIN_VA_START
7861 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
7862 
7863 #undef TARGET_FUNCTION_ARG
7864 #define TARGET_FUNCTION_ARG aarch64_function_arg
7865 
7866 #undef TARGET_FUNCTION_ARG_ADVANCE
7867 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
7868 
7869 #undef TARGET_FUNCTION_ARG_BOUNDARY
7870 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
7871 
7872 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7873 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
7874 
7875 #undef TARGET_FUNCTION_VALUE
7876 #define TARGET_FUNCTION_VALUE aarch64_function_value
7877 
7878 #undef TARGET_FUNCTION_VALUE_REGNO_P
7879 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
7880 
7881 #undef TARGET_FRAME_POINTER_REQUIRED
7882 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
7883 
7884 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7885 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
7886 
7887 #undef  TARGET_INIT_BUILTINS
7888 #define TARGET_INIT_BUILTINS  aarch64_init_builtins
7889 
7890 #undef TARGET_LEGITIMATE_ADDRESS_P
7891 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
7892 
7893 #undef TARGET_LEGITIMATE_CONSTANT_P
7894 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
7895 
7896 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7897 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
7898 
7899 #undef TARGET_MANGLE_TYPE
7900 #define TARGET_MANGLE_TYPE aarch64_mangle_type
7901 
7902 #undef TARGET_MEMORY_MOVE_COST
7903 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
7904 
7905 #undef TARGET_MUST_PASS_IN_STACK
7906 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7907 
7908 /* This target hook should return true if accesses to volatile bitfields
7909    should use the narrowest mode possible.  It should return false if these
7910    accesses should use the bitfield container type.  */
7911 #undef TARGET_NARROW_VOLATILE_BITFIELD
7912 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
7913 
7914 #undef  TARGET_OPTION_OVERRIDE
7915 #define TARGET_OPTION_OVERRIDE aarch64_override_options
7916 
7917 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
7918 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
7919   aarch64_override_options_after_change
7920 
7921 #undef TARGET_PASS_BY_REFERENCE
7922 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
7923 
7924 #undef TARGET_PREFERRED_RELOAD_CLASS
7925 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
7926 
7927 #undef TARGET_SECONDARY_RELOAD
7928 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
7929 
7930 #undef TARGET_SHIFT_TRUNCATION_MASK
7931 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
7932 
7933 #undef TARGET_SETUP_INCOMING_VARARGS
7934 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
7935 
7936 #undef TARGET_STRUCT_VALUE_RTX
7937 #define TARGET_STRUCT_VALUE_RTX   aarch64_struct_value_rtx
7938 
7939 #undef TARGET_REGISTER_MOVE_COST
7940 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
7941 
7942 #undef TARGET_RETURN_IN_MEMORY
7943 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
7944 
7945 #undef TARGET_RETURN_IN_MSB
7946 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
7947 
7948 #undef TARGET_RTX_COSTS
7949 #define TARGET_RTX_COSTS aarch64_rtx_costs
7950 
7951 #undef TARGET_TRAMPOLINE_INIT
7952 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
7953 
7954 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
7955 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
7956 
7957 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7958 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
7959 
7960 #undef TARGET_ARRAY_MODE_SUPPORTED_P
7961 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
7962 
7963 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
7964 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
7965 
7966 #undef TARGET_VECTORIZE_BUILTINS
7967 #define TARGET_VECTORIZE_BUILTINS
7968 
7969 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
7970 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
7971   aarch64_builtin_vectorized_function
7972 
7973 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
7974 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
7975   aarch64_autovectorize_vector_sizes
7976 
7977 /* Section anchor support.  */
7978 
7979 #undef TARGET_MIN_ANCHOR_OFFSET
7980 #define TARGET_MIN_ANCHOR_OFFSET -256
7981 
7982 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
7983    byte offset; we can do much more for larger data types, but have no way
7984    to determine the size of the access.  We assume accesses are aligned.  */
7985 #undef TARGET_MAX_ANCHOR_OFFSET
7986 #define TARGET_MAX_ANCHOR_OFFSET 4095
7987 
7988 #undef TARGET_VECTOR_ALIGNMENT
7989 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
7990 
7991 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7992 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
7993   aarch64_simd_vector_alignment_reachable
7994 
7995 /* vec_perm support.  */
7996 
7997 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
7998 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
7999   aarch64_vectorize_vec_perm_const_ok
8000 
8001 
8002 #undef TARGET_FIXED_CONDITION_CODE_REGS
8003 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8004 
8005 struct gcc_target targetm = TARGET_INITIALIZER;
8006 
8007 #include "gt-aarch64.h"
8008