1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "regs.h"
30 #include "df.h"
31 #include "hard-reg-set.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "reload.h"
35 #include "toplev.h"
36 #include "target.h"
37 #include "target-def.h"
38 #include "targhooks.h"
39 #include "ggc.h"
40 #include "function.h"
41 #include "tm_p.h"
42 #include "recog.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
45 #include "gimple.h"
46 #include "optabs.h"
47 #include "dwarf2.h"
48
49 /* Classifies an address.
50
51 ADDRESS_REG_IMM
52 A simple base register plus immediate offset.
53
54 ADDRESS_REG_WB
55 A base register indexed by immediate offset with writeback.
56
57 ADDRESS_REG_REG
58 A base register indexed by (optionally scaled) register.
59
60 ADDRESS_REG_UXTW
61 A base register indexed by (optionally scaled) zero-extended register.
62
63 ADDRESS_REG_SXTW
64 A base register indexed by (optionally scaled) sign-extended register.
65
66 ADDRESS_LO_SUM
67 A LO_SUM rtx with a base register and "LO12" symbol relocation.
68
69 ADDRESS_SYMBOLIC:
70 A constant symbolic address, in pc-relative literal pool. */
71
72 enum aarch64_address_type {
73 ADDRESS_REG_IMM,
74 ADDRESS_REG_WB,
75 ADDRESS_REG_REG,
76 ADDRESS_REG_UXTW,
77 ADDRESS_REG_SXTW,
78 ADDRESS_LO_SUM,
79 ADDRESS_SYMBOLIC
80 };
81
82 struct aarch64_address_info {
83 enum aarch64_address_type type;
84 rtx base;
85 rtx offset;
86 int shift;
87 enum aarch64_symbol_type symbol_type;
88 };
89
90 /* The current code model. */
91 enum aarch64_code_model aarch64_cmodel;
92
93 #ifdef HAVE_AS_TLS
94 #undef TARGET_HAVE_TLS
95 #define TARGET_HAVE_TLS 1
96 #endif
97
98 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
99 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
100 const_tree,
101 enum machine_mode *, int *,
102 bool *);
103 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
104 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
105 static void aarch64_override_options_after_change (void);
106 static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
107 int *, unsigned char *, int *, int *);
108 static bool aarch64_vector_mode_supported_p (enum machine_mode);
109 static unsigned bit_count (unsigned HOST_WIDE_INT);
110 static bool aarch64_const_vec_all_same_int_p (rtx,
111 HOST_WIDE_INT, HOST_WIDE_INT);
112
113 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
114 const unsigned char *sel);
115
116 /* The processor for which instructions should be scheduled. */
117 enum aarch64_processor aarch64_tune = generic;
118
119 /* The current tuning set. */
120 const struct tune_params *aarch64_tune_params;
121
122 /* Mask to specify which instructions we are allowed to generate. */
123 unsigned long aarch64_isa_flags = 0;
124
125 /* Mask to specify which instruction scheduling options should be used. */
126 unsigned long aarch64_tune_flags = 0;
127
128 /* Tuning parameters. */
129
130 #if HAVE_DESIGNATED_INITIALIZERS
131 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
132 #else
133 #define NAMED_PARAM(NAME, VAL) (VAL)
134 #endif
135
136 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
137 __extension__
138 #endif
139 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
140 {
141 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
142 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
143 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
144 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
145 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
146 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
147 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
148 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
149 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
150 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
151 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
152 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
153 };
154
155 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
156 __extension__
157 #endif
158 static const struct cpu_addrcost_table generic_addrcost_table =
159 {
160 NAMED_PARAM (pre_modify, 0),
161 NAMED_PARAM (post_modify, 0),
162 NAMED_PARAM (register_offset, 0),
163 NAMED_PARAM (register_extend, 0),
164 NAMED_PARAM (imm_offset, 0)
165 };
166
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168 __extension__
169 #endif
170 static const struct cpu_regmove_cost generic_regmove_cost =
171 {
172 NAMED_PARAM (GP2GP, 1),
173 NAMED_PARAM (GP2FP, 2),
174 NAMED_PARAM (FP2GP, 2),
175 /* We currently do not provide direct support for TFmode Q->Q move.
176 Therefore we need to raise the cost above 2 in order to have
177 reload handle the situation. */
178 NAMED_PARAM (FP2FP, 4)
179 };
180
181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182 __extension__
183 #endif
184 static const struct tune_params generic_tunings =
185 {
186 &generic_rtx_cost_table,
187 &generic_addrcost_table,
188 &generic_regmove_cost,
189 NAMED_PARAM (memmov_cost, 4)
190 };
191
192 /* A processor implementing AArch64. */
193 struct processor
194 {
195 const char *const name;
196 enum aarch64_processor core;
197 const char *arch;
198 const unsigned long flags;
199 const struct tune_params *const tune;
200 };
201
202 /* Processor cores implementing AArch64. */
203 static const struct processor all_cores[] =
204 {
205 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
206 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
207 #include "aarch64-cores.def"
208 #undef AARCH64_CORE
209 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
210 {NULL, aarch64_none, NULL, 0, NULL}
211 };
212
213 /* Architectures implementing AArch64. */
214 static const struct processor all_architectures[] =
215 {
216 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
217 {NAME, CORE, #ARCH, FLAGS, NULL},
218 #include "aarch64-arches.def"
219 #undef AARCH64_ARCH
220 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
221 {NULL, aarch64_none, NULL, 0, NULL}
222 };
223
224 /* Target specification. These are populated as commandline arguments
225 are processed, or NULL if not specified. */
226 static const struct processor *selected_arch;
227 static const struct processor *selected_cpu;
228 static const struct processor *selected_tune;
229
230 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
231
232 /* An ISA extension in the co-processor and main instruction set space. */
233 struct aarch64_option_extension
234 {
235 const char *const name;
236 const unsigned long flags_on;
237 const unsigned long flags_off;
238 };
239
240 /* ISA extensions in AArch64. */
241 static const struct aarch64_option_extension all_extensions[] =
242 {
243 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
244 {NAME, FLAGS_ON, FLAGS_OFF},
245 #include "aarch64-option-extensions.def"
246 #undef AARCH64_OPT_EXTENSION
247 {NULL, 0, 0}
248 };
249
250 /* Used to track the size of an address when generating a pre/post
251 increment address. */
252 static enum machine_mode aarch64_memory_reference_mode;
253
254 /* Used to force GTY into this file. */
255 static GTY(()) int gty_dummy;
256
257 /* A table of valid AArch64 "bitmask immediate" values for
258 logical instructions. */
259
260 #define AARCH64_NUM_BITMASKS 5334
261 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
262
263 /* Did we set flag_omit_frame_pointer just so
264 aarch64_frame_pointer_required would be called? */
265 static bool faked_omit_frame_pointer;
266
267 typedef enum aarch64_cond_code
268 {
269 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
270 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
271 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
272 }
273 aarch64_cc;
274
275 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
276
277 /* The condition codes of the processor, and the inverse function. */
278 static const char * const aarch64_condition_codes[] =
279 {
280 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
281 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
282 };
283
284 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
285 unsigned
aarch64_dbx_register_number(unsigned regno)286 aarch64_dbx_register_number (unsigned regno)
287 {
288 if (GP_REGNUM_P (regno))
289 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
290 else if (regno == SP_REGNUM)
291 return AARCH64_DWARF_SP;
292 else if (FP_REGNUM_P (regno))
293 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
294
295 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
296 equivalent DWARF register. */
297 return DWARF_FRAME_REGISTERS;
298 }
299
300 /* Return TRUE if MODE is any of the large INT modes. */
301 static bool
aarch64_vect_struct_mode_p(enum machine_mode mode)302 aarch64_vect_struct_mode_p (enum machine_mode mode)
303 {
304 return mode == OImode || mode == CImode || mode == XImode;
305 }
306
307 /* Return TRUE if MODE is any of the vector modes. */
308 static bool
aarch64_vector_mode_p(enum machine_mode mode)309 aarch64_vector_mode_p (enum machine_mode mode)
310 {
311 return aarch64_vector_mode_supported_p (mode)
312 || aarch64_vect_struct_mode_p (mode);
313 }
314
315 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
316 static bool
aarch64_array_mode_supported_p(enum machine_mode mode,unsigned HOST_WIDE_INT nelems)317 aarch64_array_mode_supported_p (enum machine_mode mode,
318 unsigned HOST_WIDE_INT nelems)
319 {
320 if (TARGET_SIMD
321 && AARCH64_VALID_SIMD_QREG_MODE (mode)
322 && (nelems >= 2 && nelems <= 4))
323 return true;
324
325 return false;
326 }
327
328 /* Implement HARD_REGNO_NREGS. */
329
330 int
aarch64_hard_regno_nregs(unsigned regno,enum machine_mode mode)331 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
332 {
333 switch (aarch64_regno_regclass (regno))
334 {
335 case FP_REGS:
336 case FP_LO_REGS:
337 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
338 default:
339 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
340 }
341 gcc_unreachable ();
342 }
343
344 /* Implement HARD_REGNO_MODE_OK. */
345
346 int
aarch64_hard_regno_mode_ok(unsigned regno,enum machine_mode mode)347 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
348 {
349 if (GET_MODE_CLASS (mode) == MODE_CC)
350 return regno == CC_REGNUM;
351
352 if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
353 || regno == ARG_POINTER_REGNUM)
354 return mode == Pmode;
355
356 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
357 return 1;
358
359 if (FP_REGNUM_P (regno))
360 {
361 if (aarch64_vect_struct_mode_p (mode))
362 return
363 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
364 else
365 return 1;
366 }
367
368 return 0;
369 }
370
371 /* Return true if calls to DECL should be treated as
372 long-calls (ie called via a register). */
373 static bool
aarch64_decl_is_long_call_p(const_tree decl ATTRIBUTE_UNUSED)374 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
375 {
376 return false;
377 }
378
379 /* Return true if calls to symbol-ref SYM should be treated as
380 long-calls (ie called via a register). */
381 bool
aarch64_is_long_call_p(rtx sym)382 aarch64_is_long_call_p (rtx sym)
383 {
384 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
385 }
386
387 /* Return true if the offsets to a zero/sign-extract operation
388 represent an expression that matches an extend operation. The
389 operands represent the paramters from
390
391 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
392 bool
aarch64_is_extend_from_extract(enum machine_mode mode,rtx mult_imm,rtx extract_imm)393 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
394 rtx extract_imm)
395 {
396 HOST_WIDE_INT mult_val, extract_val;
397
398 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
399 return false;
400
401 mult_val = INTVAL (mult_imm);
402 extract_val = INTVAL (extract_imm);
403
404 if (extract_val > 8
405 && extract_val < GET_MODE_BITSIZE (mode)
406 && exact_log2 (extract_val & ~7) > 0
407 && (extract_val & 7) <= 4
408 && mult_val == (1 << (extract_val & 7)))
409 return true;
410
411 return false;
412 }
413
414 /* Emit an insn that's a simple single-set. Both the operands must be
415 known to be valid. */
416 inline static rtx
emit_set_insn(rtx x,rtx y)417 emit_set_insn (rtx x, rtx y)
418 {
419 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
420 }
421
422 /* X and Y are two things to compare using CODE. Emit the compare insn and
423 return the rtx for register 0 in the proper mode. */
424 rtx
aarch64_gen_compare_reg(RTX_CODE code,rtx x,rtx y)425 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
426 {
427 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
428 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
429
430 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
431 return cc_reg;
432 }
433
434 /* Build the SYMBOL_REF for __tls_get_addr. */
435
436 static GTY(()) rtx tls_get_addr_libfunc;
437
438 rtx
aarch64_tls_get_addr(void)439 aarch64_tls_get_addr (void)
440 {
441 if (!tls_get_addr_libfunc)
442 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
443 return tls_get_addr_libfunc;
444 }
445
446 /* Return the TLS model to use for ADDR. */
447
448 static enum tls_model
tls_symbolic_operand_type(rtx addr)449 tls_symbolic_operand_type (rtx addr)
450 {
451 enum tls_model tls_kind = TLS_MODEL_NONE;
452 rtx sym, addend;
453
454 if (GET_CODE (addr) == CONST)
455 {
456 split_const (addr, &sym, &addend);
457 if (GET_CODE (sym) == SYMBOL_REF)
458 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
459 }
460 else if (GET_CODE (addr) == SYMBOL_REF)
461 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
462
463 return tls_kind;
464 }
465
466 /* We'll allow lo_sum's in addresses in our legitimate addresses
467 so that combine would take care of combining addresses where
468 necessary, but for generation purposes, we'll generate the address
469 as :
470 RTL Absolute
471 tmp = hi (symbol_ref); adrp x1, foo
472 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
473 nop
474
475 PIC TLS
476 adrp x1, :got:foo adrp tmp, :tlsgd:foo
477 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
478 bl __tls_get_addr
479 nop
480
481 Load TLS symbol, depending on TLS mechanism and TLS access model.
482
483 Global Dynamic - Traditional TLS:
484 adrp tmp, :tlsgd:imm
485 add dest, tmp, #:tlsgd_lo12:imm
486 bl __tls_get_addr
487
488 Global Dynamic - TLS Descriptors:
489 adrp dest, :tlsdesc:imm
490 ldr tmp, [dest, #:tlsdesc_lo12:imm]
491 add dest, dest, #:tlsdesc_lo12:imm
492 blr tmp
493 mrs tp, tpidr_el0
494 add dest, dest, tp
495
496 Initial Exec:
497 mrs tp, tpidr_el0
498 adrp tmp, :gottprel:imm
499 ldr dest, [tmp, #:gottprel_lo12:imm]
500 add dest, dest, tp
501
502 Local Exec:
503 mrs tp, tpidr_el0
504 add t0, tp, #:tprel_hi12:imm
505 add t0, #:tprel_lo12_nc:imm
506 */
507
508 static void
aarch64_load_symref_appropriately(rtx dest,rtx imm,enum aarch64_symbol_type type)509 aarch64_load_symref_appropriately (rtx dest, rtx imm,
510 enum aarch64_symbol_type type)
511 {
512 switch (type)
513 {
514 case SYMBOL_SMALL_ABSOLUTE:
515 {
516 rtx tmp_reg = dest;
517 if (can_create_pseudo_p ())
518 {
519 tmp_reg = gen_reg_rtx (Pmode);
520 }
521
522 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
523 emit_insn (gen_add_losym (dest, tmp_reg, imm));
524 return;
525 }
526
527 case SYMBOL_SMALL_GOT:
528 {
529 rtx tmp_reg = dest;
530 if (can_create_pseudo_p ())
531 {
532 tmp_reg = gen_reg_rtx (Pmode);
533 }
534 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
535 emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
536 return;
537 }
538
539 case SYMBOL_SMALL_TLSGD:
540 {
541 rtx insns;
542 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
543
544 start_sequence ();
545 emit_call_insn (gen_tlsgd_small (result, imm));
546 insns = get_insns ();
547 end_sequence ();
548
549 RTL_CONST_CALL_P (insns) = 1;
550 emit_libcall_block (insns, dest, result, imm);
551 return;
552 }
553
554 case SYMBOL_SMALL_TLSDESC:
555 {
556 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
557 rtx tp;
558
559 emit_insn (gen_tlsdesc_small (imm));
560 tp = aarch64_load_tp (NULL);
561 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
562 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
563 return;
564 }
565
566 case SYMBOL_SMALL_GOTTPREL:
567 {
568 rtx tmp_reg = gen_reg_rtx (Pmode);
569 rtx tp = aarch64_load_tp (NULL);
570 emit_insn (gen_tlsie_small (tmp_reg, imm));
571 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
572 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
573 return;
574 }
575
576 case SYMBOL_SMALL_TPREL:
577 {
578 rtx tp = aarch64_load_tp (NULL);
579 emit_insn (gen_tlsle_small (dest, tp, imm));
580 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
581 return;
582 }
583
584 default:
585 gcc_unreachable ();
586 }
587 }
588
589 /* Emit a move from SRC to DEST. Assume that the move expanders can
590 handle all moves if !can_create_pseudo_p (). The distinction is
591 important because, unlike emit_move_insn, the move expanders know
592 how to force Pmode objects into the constant pool even when the
593 constant pool address is not itself legitimate. */
594 static rtx
aarch64_emit_move(rtx dest,rtx src)595 aarch64_emit_move (rtx dest, rtx src)
596 {
597 return (can_create_pseudo_p ()
598 ? emit_move_insn (dest, src)
599 : emit_move_insn_1 (dest, src));
600 }
601
602 void
aarch64_split_128bit_move(rtx dst,rtx src)603 aarch64_split_128bit_move (rtx dst, rtx src)
604 {
605 rtx low_dst;
606
607 gcc_assert (GET_MODE (dst) == TImode);
608
609 if (REG_P (dst) && REG_P (src))
610 {
611 int src_regno = REGNO (src);
612 int dst_regno = REGNO (dst);
613
614 gcc_assert (GET_MODE (src) == TImode);
615
616 /* Handle r -> w, w -> r. */
617 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
618 {
619 emit_insn (gen_aarch64_movtilow_di (dst,
620 gen_lowpart (word_mode, src)));
621 emit_insn (gen_aarch64_movtihigh_di (dst,
622 gen_highpart (word_mode, src)));
623 return;
624 }
625 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
626 {
627 emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
628 src));
629 emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
630 src));
631 return;
632 }
633 /* Fall through to r -> r cases. */
634 }
635
636 low_dst = gen_lowpart (word_mode, dst);
637 if (REG_P (low_dst)
638 && reg_overlap_mentioned_p (low_dst, src))
639 {
640 aarch64_emit_move (gen_highpart (word_mode, dst),
641 gen_highpart_mode (word_mode, TImode, src));
642 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
643 }
644 else
645 {
646 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
647 aarch64_emit_move (gen_highpart (word_mode, dst),
648 gen_highpart_mode (word_mode, TImode, src));
649 }
650 }
651
652 bool
aarch64_split_128bit_move_p(rtx dst,rtx src)653 aarch64_split_128bit_move_p (rtx dst, rtx src)
654 {
655 return (! REG_P (src)
656 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
657 }
658
659 static rtx
aarch64_force_temporary(rtx x,rtx value)660 aarch64_force_temporary (rtx x, rtx value)
661 {
662 if (can_create_pseudo_p ())
663 return force_reg (Pmode, value);
664 else
665 {
666 x = aarch64_emit_move (x, value);
667 return x;
668 }
669 }
670
671
672 static rtx
aarch64_add_offset(enum machine_mode mode,rtx temp,rtx reg,HOST_WIDE_INT offset)673 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
674 {
675 if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
676 {
677 rtx high;
678 /* Load the full offset into a register. This
679 might be improvable in the future. */
680 high = GEN_INT (offset);
681 offset = 0;
682 high = aarch64_force_temporary (temp, high);
683 reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
684 }
685 return plus_constant (mode, reg, offset);
686 }
687
688 void
aarch64_expand_mov_immediate(rtx dest,rtx imm)689 aarch64_expand_mov_immediate (rtx dest, rtx imm)
690 {
691 enum machine_mode mode = GET_MODE (dest);
692 unsigned HOST_WIDE_INT mask;
693 int i;
694 bool first;
695 unsigned HOST_WIDE_INT val;
696 bool subtargets;
697 rtx subtarget;
698 int one_match, zero_match;
699
700 gcc_assert (mode == SImode || mode == DImode);
701
702 /* Check on what type of symbol it is. */
703 if (GET_CODE (imm) == SYMBOL_REF
704 || GET_CODE (imm) == LABEL_REF
705 || GET_CODE (imm) == CONST)
706 {
707 rtx mem, base, offset;
708 enum aarch64_symbol_type sty;
709
710 /* If we have (const (plus symbol offset)), separate out the offset
711 before we start classifying the symbol. */
712 split_const (imm, &base, &offset);
713
714 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
715 switch (sty)
716 {
717 case SYMBOL_FORCE_TO_MEM:
718 if (offset != const0_rtx
719 && targetm.cannot_force_const_mem (mode, imm))
720 {
721 gcc_assert(can_create_pseudo_p ());
722 base = aarch64_force_temporary (dest, base);
723 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
724 aarch64_emit_move (dest, base);
725 return;
726 }
727 mem = force_const_mem (mode, imm);
728 gcc_assert (mem);
729 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
730 return;
731
732 case SYMBOL_SMALL_TLSGD:
733 case SYMBOL_SMALL_TLSDESC:
734 case SYMBOL_SMALL_GOTTPREL:
735 case SYMBOL_SMALL_GOT:
736 if (offset != const0_rtx)
737 {
738 gcc_assert(can_create_pseudo_p ());
739 base = aarch64_force_temporary (dest, base);
740 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
741 aarch64_emit_move (dest, base);
742 return;
743 }
744 /* FALLTHRU */
745
746 case SYMBOL_SMALL_TPREL:
747 case SYMBOL_SMALL_ABSOLUTE:
748 aarch64_load_symref_appropriately (dest, imm, sty);
749 return;
750
751 default:
752 gcc_unreachable ();
753 }
754 }
755
756 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
757 {
758 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
759 return;
760 }
761
762 if (!CONST_INT_P (imm))
763 {
764 if (GET_CODE (imm) == HIGH)
765 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
766 else
767 {
768 rtx mem = force_const_mem (mode, imm);
769 gcc_assert (mem);
770 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
771 }
772
773 return;
774 }
775
776 if (mode == SImode)
777 {
778 /* We know we can't do this in 1 insn, and we must be able to do it
779 in two; so don't mess around looking for sequences that don't buy
780 us anything. */
781 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
782 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
783 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
784 return;
785 }
786
787 /* Remaining cases are all for DImode. */
788
789 val = INTVAL (imm);
790 subtargets = optimize && can_create_pseudo_p ();
791
792 one_match = 0;
793 zero_match = 0;
794 mask = 0xffff;
795
796 for (i = 0; i < 64; i += 16, mask <<= 16)
797 {
798 if ((val & mask) == 0)
799 zero_match++;
800 else if ((val & mask) == mask)
801 one_match++;
802 }
803
804 if (one_match == 2)
805 {
806 mask = 0xffff;
807 for (i = 0; i < 64; i += 16, mask <<= 16)
808 {
809 if ((val & mask) != mask)
810 {
811 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
812 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
813 GEN_INT ((val >> i) & 0xffff)));
814 return;
815 }
816 }
817 gcc_unreachable ();
818 }
819
820 if (zero_match == 2)
821 goto simple_sequence;
822
823 mask = 0x0ffff0000UL;
824 for (i = 16; i < 64; i += 16, mask <<= 16)
825 {
826 HOST_WIDE_INT comp = mask & ~(mask - 1);
827
828 if (aarch64_uimm12_shift (val - (val & mask)))
829 {
830 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
831
832 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
833 emit_insn (gen_adddi3 (dest, subtarget,
834 GEN_INT (val - (val & mask))));
835 return;
836 }
837 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
838 {
839 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
840
841 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
842 GEN_INT ((val + comp) & mask)));
843 emit_insn (gen_adddi3 (dest, subtarget,
844 GEN_INT (val - ((val + comp) & mask))));
845 return;
846 }
847 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
848 {
849 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
850
851 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
852 GEN_INT ((val - comp) | ~mask)));
853 emit_insn (gen_adddi3 (dest, subtarget,
854 GEN_INT (val - ((val - comp) | ~mask))));
855 return;
856 }
857 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
858 {
859 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
860
861 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
862 GEN_INT (val | ~mask)));
863 emit_insn (gen_adddi3 (dest, subtarget,
864 GEN_INT (val - (val | ~mask))));
865 return;
866 }
867 }
868
869 /* See if we can do it by arithmetically combining two
870 immediates. */
871 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
872 {
873 int j;
874 mask = 0xffff;
875
876 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
877 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
878 {
879 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
880 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
881 GEN_INT (aarch64_bitmasks[i])));
882 emit_insn (gen_adddi3 (dest, subtarget,
883 GEN_INT (val - aarch64_bitmasks[i])));
884 return;
885 }
886
887 for (j = 0; j < 64; j += 16, mask <<= 16)
888 {
889 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
890 {
891 emit_insn (gen_rtx_SET (VOIDmode, dest,
892 GEN_INT (aarch64_bitmasks[i])));
893 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
894 GEN_INT ((val >> j) & 0xffff)));
895 return;
896 }
897 }
898 }
899
900 /* See if we can do it by logically combining two immediates. */
901 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
902 {
903 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
904 {
905 int j;
906
907 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
908 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
909 {
910 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
911 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
912 GEN_INT (aarch64_bitmasks[i])));
913 emit_insn (gen_iordi3 (dest, subtarget,
914 GEN_INT (aarch64_bitmasks[j])));
915 return;
916 }
917 }
918 else if ((val & aarch64_bitmasks[i]) == val)
919 {
920 int j;
921
922 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
923 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
924 {
925
926 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
927 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
928 GEN_INT (aarch64_bitmasks[j])));
929 emit_insn (gen_anddi3 (dest, subtarget,
930 GEN_INT (aarch64_bitmasks[i])));
931 return;
932 }
933 }
934 }
935
936 simple_sequence:
937 first = true;
938 mask = 0xffff;
939 for (i = 0; i < 64; i += 16, mask <<= 16)
940 {
941 if ((val & mask) != 0)
942 {
943 if (first)
944 {
945 emit_insn (gen_rtx_SET (VOIDmode, dest,
946 GEN_INT (val & mask)));
947 first = false;
948 }
949 else
950 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
951 GEN_INT ((val >> i) & 0xffff)));
952 }
953 }
954 }
955
956 static bool
aarch64_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)957 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
958 {
959 /* Indirect calls are not currently supported. */
960 if (decl == NULL)
961 return false;
962
963 /* Cannot tail-call to long-calls, since these are outside of the
964 range of a branch instruction (we could handle this if we added
965 support for indirect tail-calls. */
966 if (aarch64_decl_is_long_call_p (decl))
967 return false;
968
969 return true;
970 }
971
972 /* Implement TARGET_PASS_BY_REFERENCE. */
973
974 static bool
aarch64_pass_by_reference(cumulative_args_t pcum ATTRIBUTE_UNUSED,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)975 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
976 enum machine_mode mode,
977 const_tree type,
978 bool named ATTRIBUTE_UNUSED)
979 {
980 HOST_WIDE_INT size;
981 enum machine_mode dummymode;
982 int nregs;
983
984 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
985 size = (mode == BLKmode && type)
986 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
987
988 if (type)
989 {
990 /* Arrays always passed by reference. */
991 if (TREE_CODE (type) == ARRAY_TYPE)
992 return true;
993 /* Other aggregates based on their size. */
994 if (AGGREGATE_TYPE_P (type))
995 size = int_size_in_bytes (type);
996 }
997
998 /* Variable sized arguments are always returned by reference. */
999 if (size < 0)
1000 return true;
1001
1002 /* Can this be a candidate to be passed in fp/simd register(s)? */
1003 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1004 &dummymode, &nregs,
1005 NULL))
1006 return false;
1007
1008 /* Arguments which are variable sized or larger than 2 registers are
1009 passed by reference unless they are a homogenous floating point
1010 aggregate. */
1011 return size > 2 * UNITS_PER_WORD;
1012 }
1013
1014 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1015 static bool
aarch64_return_in_msb(const_tree valtype)1016 aarch64_return_in_msb (const_tree valtype)
1017 {
1018 enum machine_mode dummy_mode;
1019 int dummy_int;
1020
1021 /* Never happens in little-endian mode. */
1022 if (!BYTES_BIG_ENDIAN)
1023 return false;
1024
1025 /* Only composite types smaller than or equal to 16 bytes can
1026 be potentially returned in registers. */
1027 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1028 || int_size_in_bytes (valtype) <= 0
1029 || int_size_in_bytes (valtype) > 16)
1030 return false;
1031
1032 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1033 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1034 is always passed/returned in the least significant bits of fp/simd
1035 register(s). */
1036 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1037 &dummy_mode, &dummy_int, NULL))
1038 return false;
1039
1040 return true;
1041 }
1042
1043 /* Implement TARGET_FUNCTION_VALUE.
1044 Define how to find the value returned by a function. */
1045
1046 static rtx
aarch64_function_value(const_tree type,const_tree func,bool outgoing ATTRIBUTE_UNUSED)1047 aarch64_function_value (const_tree type, const_tree func,
1048 bool outgoing ATTRIBUTE_UNUSED)
1049 {
1050 enum machine_mode mode;
1051 int unsignedp;
1052 int count;
1053 enum machine_mode ag_mode;
1054
1055 mode = TYPE_MODE (type);
1056 if (INTEGRAL_TYPE_P (type))
1057 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1058
1059 if (aarch64_return_in_msb (type))
1060 {
1061 HOST_WIDE_INT size = int_size_in_bytes (type);
1062
1063 if (size % UNITS_PER_WORD != 0)
1064 {
1065 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1066 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1067 }
1068 }
1069
1070 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1071 &ag_mode, &count, NULL))
1072 {
1073 if (!aarch64_composite_type_p (type, mode))
1074 {
1075 gcc_assert (count == 1 && mode == ag_mode);
1076 return gen_rtx_REG (mode, V0_REGNUM);
1077 }
1078 else
1079 {
1080 int i;
1081 rtx par;
1082
1083 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1084 for (i = 0; i < count; i++)
1085 {
1086 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1087 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1088 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1089 XVECEXP (par, 0, i) = tmp;
1090 }
1091 return par;
1092 }
1093 }
1094 else
1095 return gen_rtx_REG (mode, R0_REGNUM);
1096 }
1097
1098 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1099 Return true if REGNO is the number of a hard register in which the values
1100 of called function may come back. */
1101
1102 static bool
aarch64_function_value_regno_p(const unsigned int regno)1103 aarch64_function_value_regno_p (const unsigned int regno)
1104 {
1105 /* Maximum of 16 bytes can be returned in the general registers. Examples
1106 of 16-byte return values are: 128-bit integers and 16-byte small
1107 structures (excluding homogeneous floating-point aggregates). */
1108 if (regno == R0_REGNUM || regno == R1_REGNUM)
1109 return true;
1110
1111 /* Up to four fp/simd registers can return a function value, e.g. a
1112 homogeneous floating-point aggregate having four members. */
1113 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1114 return !TARGET_GENERAL_REGS_ONLY;
1115
1116 return false;
1117 }
1118
1119 /* Implement TARGET_RETURN_IN_MEMORY.
1120
1121 If the type T of the result of a function is such that
1122 void func (T arg)
1123 would require that arg be passed as a value in a register (or set of
1124 registers) according to the parameter passing rules, then the result
1125 is returned in the same registers as would be used for such an
1126 argument. */
1127
1128 static bool
aarch64_return_in_memory(const_tree type,const_tree fndecl ATTRIBUTE_UNUSED)1129 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1130 {
1131 HOST_WIDE_INT size;
1132 enum machine_mode ag_mode;
1133 int count;
1134
1135 if (!AGGREGATE_TYPE_P (type)
1136 && TREE_CODE (type) != COMPLEX_TYPE
1137 && TREE_CODE (type) != VECTOR_TYPE)
1138 /* Simple scalar types always returned in registers. */
1139 return false;
1140
1141 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1142 type,
1143 &ag_mode,
1144 &count,
1145 NULL))
1146 return false;
1147
1148 /* Types larger than 2 registers returned in memory. */
1149 size = int_size_in_bytes (type);
1150 return (size < 0 || size > 2 * UNITS_PER_WORD);
1151 }
1152
1153 static bool
aarch64_vfp_is_call_candidate(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,int * nregs)1154 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1155 const_tree type, int *nregs)
1156 {
1157 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1158 return aarch64_vfp_is_call_or_return_candidate (mode,
1159 type,
1160 &pcum->aapcs_vfp_rmode,
1161 nregs,
1162 NULL);
1163 }
1164
1165 /* Given MODE and TYPE of a function argument, return the alignment in
1166 bits. The idea is to suppress any stronger alignment requested by
1167 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1168 This is a helper function for local use only. */
1169
1170 static unsigned int
aarch64_function_arg_alignment(enum machine_mode mode,const_tree type)1171 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1172 {
1173 unsigned int alignment;
1174
1175 if (type)
1176 {
1177 if (!integer_zerop (TYPE_SIZE (type)))
1178 {
1179 if (TYPE_MODE (type) == mode)
1180 alignment = TYPE_ALIGN (type);
1181 else
1182 alignment = GET_MODE_ALIGNMENT (mode);
1183 }
1184 else
1185 alignment = 0;
1186 }
1187 else
1188 alignment = GET_MODE_ALIGNMENT (mode);
1189
1190 return alignment;
1191 }
1192
1193 /* Layout a function argument according to the AAPCS64 rules. The rule
1194 numbers refer to the rule numbers in the AAPCS64. */
1195
1196 static void
aarch64_layout_arg(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)1197 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1198 const_tree type,
1199 bool named ATTRIBUTE_UNUSED)
1200 {
1201 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1202 int ncrn, nvrn, nregs;
1203 bool allocate_ncrn, allocate_nvrn;
1204
1205 /* We need to do this once per argument. */
1206 if (pcum->aapcs_arg_processed)
1207 return;
1208
1209 pcum->aapcs_arg_processed = true;
1210
1211 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1212 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1213 mode,
1214 type,
1215 &nregs);
1216
1217 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1218 The following code thus handles passing by SIMD/FP registers first. */
1219
1220 nvrn = pcum->aapcs_nvrn;
1221
1222 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1223 and homogenous short-vector aggregates (HVA). */
1224 if (allocate_nvrn)
1225 {
1226 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1227 {
1228 pcum->aapcs_nextnvrn = nvrn + nregs;
1229 if (!aarch64_composite_type_p (type, mode))
1230 {
1231 gcc_assert (nregs == 1);
1232 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1233 }
1234 else
1235 {
1236 rtx par;
1237 int i;
1238 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1239 for (i = 0; i < nregs; i++)
1240 {
1241 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1242 V0_REGNUM + nvrn + i);
1243 tmp = gen_rtx_EXPR_LIST
1244 (VOIDmode, tmp,
1245 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1246 XVECEXP (par, 0, i) = tmp;
1247 }
1248 pcum->aapcs_reg = par;
1249 }
1250 return;
1251 }
1252 else
1253 {
1254 /* C.3 NSRN is set to 8. */
1255 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1256 goto on_stack;
1257 }
1258 }
1259
1260 ncrn = pcum->aapcs_ncrn;
1261 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1262 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1263
1264
1265 /* C6 - C9. though the sign and zero extension semantics are
1266 handled elsewhere. This is the case where the argument fits
1267 entirely general registers. */
1268 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1269 {
1270 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1271
1272 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1273
1274 /* C.8 if the argument has an alignment of 16 then the NGRN is
1275 rounded up to the next even number. */
1276 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1277 {
1278 ++ncrn;
1279 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1280 }
1281 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1282 A reg is still generated for it, but the caller should be smart
1283 enough not to use it. */
1284 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1285 {
1286 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1287 }
1288 else
1289 {
1290 rtx par;
1291 int i;
1292
1293 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1294 for (i = 0; i < nregs; i++)
1295 {
1296 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1297 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1298 GEN_INT (i * UNITS_PER_WORD));
1299 XVECEXP (par, 0, i) = tmp;
1300 }
1301 pcum->aapcs_reg = par;
1302 }
1303
1304 pcum->aapcs_nextncrn = ncrn + nregs;
1305 return;
1306 }
1307
1308 /* C.11 */
1309 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1310
1311 /* The argument is passed on stack; record the needed number of words for
1312 this argument (we can re-use NREGS) and align the total size if
1313 necessary. */
1314 on_stack:
1315 pcum->aapcs_stack_words = nregs;
1316 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1317 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1318 16 / UNITS_PER_WORD) + 1;
1319 return;
1320 }
1321
1322 /* Implement TARGET_FUNCTION_ARG. */
1323
1324 static rtx
aarch64_function_arg(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,bool named)1325 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1326 const_tree type, bool named)
1327 {
1328 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1329 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1330
1331 if (mode == VOIDmode)
1332 return NULL_RTX;
1333
1334 aarch64_layout_arg (pcum_v, mode, type, named);
1335 return pcum->aapcs_reg;
1336 }
1337
1338 void
aarch64_init_cumulative_args(CUMULATIVE_ARGS * pcum,const_tree fntype ATTRIBUTE_UNUSED,rtx libname ATTRIBUTE_UNUSED,const_tree fndecl ATTRIBUTE_UNUSED,unsigned n_named ATTRIBUTE_UNUSED)1339 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1340 const_tree fntype ATTRIBUTE_UNUSED,
1341 rtx libname ATTRIBUTE_UNUSED,
1342 const_tree fndecl ATTRIBUTE_UNUSED,
1343 unsigned n_named ATTRIBUTE_UNUSED)
1344 {
1345 pcum->aapcs_ncrn = 0;
1346 pcum->aapcs_nvrn = 0;
1347 pcum->aapcs_nextncrn = 0;
1348 pcum->aapcs_nextnvrn = 0;
1349 pcum->pcs_variant = ARM_PCS_AAPCS64;
1350 pcum->aapcs_reg = NULL_RTX;
1351 pcum->aapcs_arg_processed = false;
1352 pcum->aapcs_stack_words = 0;
1353 pcum->aapcs_stack_size = 0;
1354
1355 return;
1356 }
1357
1358 static void
aarch64_function_arg_advance(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,bool named)1359 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1360 enum machine_mode mode,
1361 const_tree type,
1362 bool named)
1363 {
1364 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1365 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1366 {
1367 aarch64_layout_arg (pcum_v, mode, type, named);
1368 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1369 != (pcum->aapcs_stack_words != 0));
1370 pcum->aapcs_arg_processed = false;
1371 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1372 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1373 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1374 pcum->aapcs_stack_words = 0;
1375 pcum->aapcs_reg = NULL_RTX;
1376 }
1377 }
1378
1379 bool
aarch64_function_arg_regno_p(unsigned regno)1380 aarch64_function_arg_regno_p (unsigned regno)
1381 {
1382 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1383 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1384 }
1385
1386 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1387 PARM_BOUNDARY bits of alignment, but will be given anything up
1388 to STACK_BOUNDARY bits if the type requires it. This makes sure
1389 that both before and after the layout of each argument, the Next
1390 Stacked Argument Address (NSAA) will have a minimum alignment of
1391 8 bytes. */
1392
1393 static unsigned int
aarch64_function_arg_boundary(enum machine_mode mode,const_tree type)1394 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1395 {
1396 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1397
1398 if (alignment < PARM_BOUNDARY)
1399 alignment = PARM_BOUNDARY;
1400 if (alignment > STACK_BOUNDARY)
1401 alignment = STACK_BOUNDARY;
1402 return alignment;
1403 }
1404
1405 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1406
1407 Return true if an argument passed on the stack should be padded upwards,
1408 i.e. if the least-significant byte of the stack slot has useful data.
1409
1410 Small aggregate types are placed in the lowest memory address.
1411
1412 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1413
1414 bool
aarch64_pad_arg_upward(enum machine_mode mode,const_tree type)1415 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1416 {
1417 /* On little-endian targets, the least significant byte of every stack
1418 argument is passed at the lowest byte address of the stack slot. */
1419 if (!BYTES_BIG_ENDIAN)
1420 return true;
1421
1422 /* Otherwise, integral types and floating point types are padded downward:
1423 the least significant byte of a stack argument is passed at the highest
1424 byte address of the stack slot. */
1425 if (type
1426 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1427 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1428 return false;
1429
1430 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1431 return true;
1432 }
1433
1434 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1435
1436 It specifies padding for the last (may also be the only)
1437 element of a block move between registers and memory. If
1438 assuming the block is in the memory, padding upward means that
1439 the last element is padded after its highest significant byte,
1440 while in downward padding, the last element is padded at the
1441 its least significant byte side.
1442
1443 Small aggregates and small complex types are always padded
1444 upwards.
1445
1446 We don't need to worry about homogeneous floating-point or
1447 short-vector aggregates; their move is not affected by the
1448 padding direction determined here. Regardless of endianness,
1449 each element of such an aggregate is put in the least
1450 significant bits of a fp/simd register.
1451
1452 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1453 register has useful data, and return the opposite if the most
1454 significant byte does. */
1455
1456 bool
aarch64_pad_reg_upward(enum machine_mode mode,const_tree type,bool first ATTRIBUTE_UNUSED)1457 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1458 bool first ATTRIBUTE_UNUSED)
1459 {
1460
1461 /* Small composite types are always padded upward. */
1462 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1463 {
1464 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1465 : GET_MODE_SIZE (mode));
1466 if (size < 2 * UNITS_PER_WORD)
1467 return true;
1468 }
1469
1470 /* Otherwise, use the default padding. */
1471 return !BYTES_BIG_ENDIAN;
1472 }
1473
1474 static enum machine_mode
aarch64_libgcc_cmp_return_mode(void)1475 aarch64_libgcc_cmp_return_mode (void)
1476 {
1477 return SImode;
1478 }
1479
1480 static bool
aarch64_frame_pointer_required(void)1481 aarch64_frame_pointer_required (void)
1482 {
1483 /* If the function contains dynamic stack allocations, we need to
1484 use the frame pointer to access the static parts of the frame. */
1485 if (cfun->calls_alloca)
1486 return true;
1487
1488 /* We may have turned flag_omit_frame_pointer on in order to have this
1489 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1490 and we'll check it here.
1491 If we really did set flag_omit_frame_pointer normally, then we return false
1492 (no frame pointer required) in all cases. */
1493
1494 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1495 return false;
1496 else if (flag_omit_leaf_frame_pointer)
1497 return !crtl->is_leaf;
1498 return true;
1499 }
1500
1501 /* Mark the registers that need to be saved by the callee and calculate
1502 the size of the callee-saved registers area and frame record (both FP
1503 and LR may be omitted). */
1504 static void
aarch64_layout_frame(void)1505 aarch64_layout_frame (void)
1506 {
1507 HOST_WIDE_INT offset = 0;
1508 int regno;
1509
1510 if (reload_completed && cfun->machine->frame.laid_out)
1511 return;
1512
1513 cfun->machine->frame.fp_lr_offset = 0;
1514
1515 /* First mark all the registers that really need to be saved... */
1516 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1517 cfun->machine->frame.reg_offset[regno] = -1;
1518
1519 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1520 cfun->machine->frame.reg_offset[regno] = -1;
1521
1522 /* ... that includes the eh data registers (if needed)... */
1523 if (crtl->calls_eh_return)
1524 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1525 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1526
1527 /* ... and any callee saved register that dataflow says is live. */
1528 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1529 if (df_regs_ever_live_p (regno)
1530 && !call_used_regs[regno])
1531 cfun->machine->frame.reg_offset[regno] = 0;
1532
1533 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1534 if (df_regs_ever_live_p (regno)
1535 && !call_used_regs[regno])
1536 cfun->machine->frame.reg_offset[regno] = 0;
1537
1538 if (frame_pointer_needed)
1539 {
1540 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1541 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1542 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1543 }
1544
1545 /* Now assign stack slots for them. */
1546 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1547 if (cfun->machine->frame.reg_offset[regno] != -1)
1548 {
1549 cfun->machine->frame.reg_offset[regno] = offset;
1550 offset += UNITS_PER_WORD;
1551 }
1552
1553 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1554 if (cfun->machine->frame.reg_offset[regno] != -1)
1555 {
1556 cfun->machine->frame.reg_offset[regno] = offset;
1557 offset += UNITS_PER_WORD;
1558 }
1559
1560 if (frame_pointer_needed)
1561 {
1562 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1563 offset += UNITS_PER_WORD;
1564 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1565 }
1566
1567 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1568 {
1569 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1570 offset += UNITS_PER_WORD;
1571 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1572 }
1573
1574 cfun->machine->frame.padding0 =
1575 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1576 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1577
1578 cfun->machine->frame.saved_regs_size = offset;
1579 cfun->machine->frame.laid_out = true;
1580 }
1581
1582 /* Make the last instruction frame-related and note that it performs
1583 the operation described by FRAME_PATTERN. */
1584
1585 static void
aarch64_set_frame_expr(rtx frame_pattern)1586 aarch64_set_frame_expr (rtx frame_pattern)
1587 {
1588 rtx insn;
1589
1590 insn = get_last_insn ();
1591 RTX_FRAME_RELATED_P (insn) = 1;
1592 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1593 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1594 frame_pattern,
1595 REG_NOTES (insn));
1596 }
1597
1598 static bool
aarch64_register_saved_on_entry(int regno)1599 aarch64_register_saved_on_entry (int regno)
1600 {
1601 return cfun->machine->frame.reg_offset[regno] != -1;
1602 }
1603
1604
1605 static void
aarch64_save_or_restore_fprs(int start_offset,int increment,bool restore,rtx base_rtx)1606 aarch64_save_or_restore_fprs (int start_offset, int increment,
1607 bool restore, rtx base_rtx)
1608
1609 {
1610 unsigned regno;
1611 unsigned regno2;
1612 rtx insn;
1613 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1614
1615
1616 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1617 {
1618 if (aarch64_register_saved_on_entry (regno))
1619 {
1620 rtx mem;
1621 mem = gen_mem_ref (DFmode,
1622 plus_constant (Pmode,
1623 base_rtx,
1624 start_offset));
1625
1626 for (regno2 = regno + 1;
1627 regno2 <= V31_REGNUM
1628 && !aarch64_register_saved_on_entry (regno2);
1629 regno2++)
1630 {
1631 /* Empty loop. */
1632 }
1633 if (regno2 <= V31_REGNUM &&
1634 aarch64_register_saved_on_entry (regno2))
1635 {
1636 rtx mem2;
1637 /* Next highest register to be saved. */
1638 mem2 = gen_mem_ref (DFmode,
1639 plus_constant
1640 (Pmode,
1641 base_rtx,
1642 start_offset + increment));
1643 if (restore == false)
1644 {
1645 insn = emit_insn
1646 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1647 mem2, gen_rtx_REG (DFmode, regno2)));
1648
1649 }
1650 else
1651 {
1652 insn = emit_insn
1653 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1654 gen_rtx_REG (DFmode, regno2), mem2));
1655
1656 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1657 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1658 }
1659
1660 /* The first part of a frame-related parallel insn
1661 is always assumed to be relevant to the frame
1662 calculations; subsequent parts, are only
1663 frame-related if explicitly marked. */
1664 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1665 1)) = 1;
1666 regno = regno2;
1667 start_offset += increment * 2;
1668 }
1669 else
1670 {
1671 if (restore == false)
1672 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1673 else
1674 {
1675 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1676 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1677 }
1678 start_offset += increment;
1679 }
1680 RTX_FRAME_RELATED_P (insn) = 1;
1681 }
1682 }
1683
1684 }
1685
1686
1687 /* offset from the stack pointer of where the saves and
1688 restore's have to happen. */
1689 static void
aarch64_save_or_restore_callee_save_registers(HOST_WIDE_INT offset,bool restore)1690 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1691 bool restore)
1692 {
1693 rtx insn;
1694 rtx base_rtx = stack_pointer_rtx;
1695 HOST_WIDE_INT start_offset = offset;
1696 HOST_WIDE_INT increment = UNITS_PER_WORD;
1697 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1698 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1699 unsigned regno;
1700 unsigned regno2;
1701
1702 for (regno = R0_REGNUM; regno <= limit; regno++)
1703 {
1704 if (aarch64_register_saved_on_entry (regno))
1705 {
1706 rtx mem;
1707 mem = gen_mem_ref (Pmode,
1708 plus_constant (Pmode,
1709 base_rtx,
1710 start_offset));
1711
1712 for (regno2 = regno + 1;
1713 regno2 <= limit
1714 && !aarch64_register_saved_on_entry (regno2);
1715 regno2++)
1716 {
1717 /* Empty loop. */
1718 }
1719 if (regno2 <= limit &&
1720 aarch64_register_saved_on_entry (regno2))
1721 {
1722 rtx mem2;
1723 /* Next highest register to be saved. */
1724 mem2 = gen_mem_ref (Pmode,
1725 plus_constant
1726 (Pmode,
1727 base_rtx,
1728 start_offset + increment));
1729 if (restore == false)
1730 {
1731 insn = emit_insn
1732 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1733 mem2, gen_rtx_REG (DImode, regno2)));
1734
1735 }
1736 else
1737 {
1738 insn = emit_insn
1739 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1740 gen_rtx_REG (DImode, regno2), mem2));
1741
1742 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1743 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1744 }
1745
1746 /* The first part of a frame-related parallel insn
1747 is always assumed to be relevant to the frame
1748 calculations; subsequent parts, are only
1749 frame-related if explicitly marked. */
1750 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1751 1)) = 1;
1752 regno = regno2;
1753 start_offset += increment * 2;
1754 }
1755 else
1756 {
1757 if (restore == false)
1758 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1759 else
1760 {
1761 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1762 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1763 }
1764 start_offset += increment;
1765 }
1766 RTX_FRAME_RELATED_P (insn) = 1;
1767 }
1768 }
1769
1770 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1771
1772 }
1773
1774 /* AArch64 stack frames generated by this compiler look like:
1775
1776 +-------------------------------+
1777 | |
1778 | incoming stack arguments |
1779 | |
1780 +-------------------------------+ <-- arg_pointer_rtx
1781 | |
1782 | callee-allocated save area |
1783 | for register varargs |
1784 | |
1785 +-------------------------------+
1786 | |
1787 | local variables |
1788 | |
1789 +-------------------------------+ <-- frame_pointer_rtx
1790 | |
1791 | callee-saved registers |
1792 | |
1793 +-------------------------------+
1794 | LR' |
1795 +-------------------------------+
1796 | FP' |
1797 P +-------------------------------+ <-- hard_frame_pointer_rtx
1798 | dynamic allocation |
1799 +-------------------------------+
1800 | |
1801 | outgoing stack arguments |
1802 | |
1803 +-------------------------------+ <-- stack_pointer_rtx
1804
1805 Dynamic stack allocations such as alloca insert data at point P.
1806 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1807 hard_frame_pointer_rtx unchanged. */
1808
1809 /* Generate the prologue instructions for entry into a function.
1810 Establish the stack frame by decreasing the stack pointer with a
1811 properly calculated size and, if necessary, create a frame record
1812 filled with the values of LR and previous frame pointer. The
1813 current FP is also set up is it is in use. */
1814
1815 void
aarch64_expand_prologue(void)1816 aarch64_expand_prologue (void)
1817 {
1818 /* sub sp, sp, #<frame_size>
1819 stp {fp, lr}, [sp, #<frame_size> - 16]
1820 add fp, sp, #<frame_size> - hardfp_offset
1821 stp {cs_reg}, [fp, #-16] etc.
1822
1823 sub sp, sp, <final_adjustment_if_any>
1824 */
1825 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
1826 HOST_WIDE_INT frame_size, offset;
1827 HOST_WIDE_INT fp_offset; /* FP offset from SP */
1828 rtx insn;
1829
1830 aarch64_layout_frame ();
1831 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1832 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1833 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1834 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1835 + crtl->outgoing_args_size);
1836 offset = frame_size = AARCH64_ROUND_UP (frame_size,
1837 STACK_BOUNDARY / BITS_PER_UNIT);
1838
1839 if (flag_stack_usage_info)
1840 current_function_static_stack_size = frame_size;
1841
1842 fp_offset = (offset
1843 - original_frame_size
1844 - cfun->machine->frame.saved_regs_size);
1845
1846 /* Store pairs and load pairs have a range only -512 to 504. */
1847 if (offset >= 512)
1848 {
1849 /* When the frame has a large size, an initial decrease is done on
1850 the stack pointer to jump over the callee-allocated save area for
1851 register varargs, the local variable area and/or the callee-saved
1852 register area. This will allow the pre-index write-back
1853 store pair instructions to be used for setting up the stack frame
1854 efficiently. */
1855 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1856 if (offset >= 512)
1857 offset = cfun->machine->frame.saved_regs_size;
1858
1859 frame_size -= (offset + crtl->outgoing_args_size);
1860 fp_offset = 0;
1861
1862 if (frame_size >= 0x1000000)
1863 {
1864 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1865 emit_move_insn (op0, GEN_INT (-frame_size));
1866 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
1867 aarch64_set_frame_expr (gen_rtx_SET
1868 (Pmode, stack_pointer_rtx,
1869 gen_rtx_PLUS (Pmode,
1870 stack_pointer_rtx,
1871 GEN_INT (-frame_size))));
1872 }
1873 else if (frame_size > 0)
1874 {
1875 if ((frame_size & 0xfff) != frame_size)
1876 {
1877 insn = emit_insn (gen_add2_insn
1878 (stack_pointer_rtx,
1879 GEN_INT (-(frame_size
1880 & ~(HOST_WIDE_INT)0xfff))));
1881 RTX_FRAME_RELATED_P (insn) = 1;
1882 }
1883 if ((frame_size & 0xfff) != 0)
1884 {
1885 insn = emit_insn (gen_add2_insn
1886 (stack_pointer_rtx,
1887 GEN_INT (-(frame_size
1888 & (HOST_WIDE_INT)0xfff))));
1889 RTX_FRAME_RELATED_P (insn) = 1;
1890 }
1891 }
1892 }
1893 else
1894 frame_size = -1;
1895
1896 if (offset > 0)
1897 {
1898 /* Save the frame pointer and lr if the frame pointer is needed
1899 first. Make the frame pointer point to the location of the
1900 old frame pointer on the stack. */
1901 if (frame_pointer_needed)
1902 {
1903 rtx mem_fp, mem_lr;
1904
1905 if (fp_offset)
1906 {
1907 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1908 GEN_INT (-offset)));
1909 RTX_FRAME_RELATED_P (insn) = 1;
1910 aarch64_set_frame_expr (gen_rtx_SET
1911 (Pmode, stack_pointer_rtx,
1912 gen_rtx_MINUS (Pmode,
1913 stack_pointer_rtx,
1914 GEN_INT (offset))));
1915 mem_fp = gen_frame_mem (DImode,
1916 plus_constant (Pmode,
1917 stack_pointer_rtx,
1918 fp_offset));
1919 mem_lr = gen_frame_mem (DImode,
1920 plus_constant (Pmode,
1921 stack_pointer_rtx,
1922 fp_offset
1923 + UNITS_PER_WORD));
1924 insn = emit_insn (gen_store_pairdi (mem_fp,
1925 hard_frame_pointer_rtx,
1926 mem_lr,
1927 gen_rtx_REG (DImode,
1928 LR_REGNUM)));
1929 }
1930 else
1931 {
1932 insn = emit_insn (gen_storewb_pairdi_di
1933 (stack_pointer_rtx, stack_pointer_rtx,
1934 hard_frame_pointer_rtx,
1935 gen_rtx_REG (DImode, LR_REGNUM),
1936 GEN_INT (-offset),
1937 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
1938 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1939 }
1940
1941 /* The first part of a frame-related parallel insn is always
1942 assumed to be relevant to the frame calculations;
1943 subsequent parts, are only frame-related if explicitly
1944 marked. */
1945 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1946 RTX_FRAME_RELATED_P (insn) = 1;
1947
1948 /* Set up frame pointer to point to the location of the
1949 previous frame pointer on the stack. */
1950 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
1951 stack_pointer_rtx,
1952 GEN_INT (fp_offset)));
1953 aarch64_set_frame_expr (gen_rtx_SET
1954 (Pmode, hard_frame_pointer_rtx,
1955 gen_rtx_PLUS (Pmode,
1956 stack_pointer_rtx,
1957 GEN_INT (fp_offset))));
1958 RTX_FRAME_RELATED_P (insn) = 1;
1959 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
1960 hard_frame_pointer_rtx));
1961 }
1962 else
1963 {
1964 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1965 GEN_INT (-offset)));
1966 RTX_FRAME_RELATED_P (insn) = 1;
1967 }
1968
1969 aarch64_save_or_restore_callee_save_registers
1970 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
1971 }
1972
1973 /* when offset >= 512,
1974 sub sp, sp, #<outgoing_args_size> */
1975 if (frame_size > -1)
1976 {
1977 if (crtl->outgoing_args_size > 0)
1978 {
1979 insn = emit_insn (gen_add2_insn
1980 (stack_pointer_rtx,
1981 GEN_INT (- crtl->outgoing_args_size)));
1982 RTX_FRAME_RELATED_P (insn) = 1;
1983 }
1984 }
1985 }
1986
1987 /* Generate the epilogue instructions for returning from a function. */
1988 void
aarch64_expand_epilogue(bool for_sibcall)1989 aarch64_expand_epilogue (bool for_sibcall)
1990 {
1991 HOST_WIDE_INT original_frame_size, frame_size, offset;
1992 HOST_WIDE_INT fp_offset;
1993 rtx insn;
1994 rtx cfa_reg;
1995
1996 aarch64_layout_frame ();
1997 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1998 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1999 + crtl->outgoing_args_size);
2000 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2001 STACK_BOUNDARY / BITS_PER_UNIT);
2002
2003 fp_offset = (offset
2004 - original_frame_size
2005 - cfun->machine->frame.saved_regs_size);
2006
2007 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2008
2009 /* Store pairs and load pairs have a range only -512 to 504. */
2010 if (offset >= 512)
2011 {
2012 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2013 if (offset >= 512)
2014 offset = cfun->machine->frame.saved_regs_size;
2015
2016 frame_size -= (offset + crtl->outgoing_args_size);
2017 fp_offset = 0;
2018 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2019 {
2020 insn = emit_insn (gen_add2_insn
2021 (stack_pointer_rtx,
2022 GEN_INT (crtl->outgoing_args_size)));
2023 RTX_FRAME_RELATED_P (insn) = 1;
2024 }
2025 }
2026 else
2027 frame_size = -1;
2028
2029 /* If there were outgoing arguments or we've done dynamic stack
2030 allocation, then restore the stack pointer from the frame
2031 pointer. This is at most one insn and more efficient than using
2032 GCC's internal mechanism. */
2033 if (frame_pointer_needed
2034 && (crtl->outgoing_args_size || cfun->calls_alloca))
2035 {
2036 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2037 hard_frame_pointer_rtx,
2038 GEN_INT (- fp_offset)));
2039 RTX_FRAME_RELATED_P (insn) = 1;
2040 /* As SP is set to (FP - fp_offset), according to the rules in
2041 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2042 from the value of SP from now on. */
2043 cfa_reg = stack_pointer_rtx;
2044 }
2045
2046 aarch64_save_or_restore_callee_save_registers
2047 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2048
2049 /* Restore the frame pointer and lr if the frame pointer is needed. */
2050 if (offset > 0)
2051 {
2052 if (frame_pointer_needed)
2053 {
2054 rtx mem_fp, mem_lr;
2055
2056 if (fp_offset)
2057 {
2058 mem_fp = gen_frame_mem (DImode,
2059 plus_constant (Pmode,
2060 stack_pointer_rtx,
2061 fp_offset));
2062 mem_lr = gen_frame_mem (DImode,
2063 plus_constant (Pmode,
2064 stack_pointer_rtx,
2065 fp_offset
2066 + UNITS_PER_WORD));
2067 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2068 mem_fp,
2069 gen_rtx_REG (DImode,
2070 LR_REGNUM),
2071 mem_lr));
2072 }
2073 else
2074 {
2075 insn = emit_insn (gen_loadwb_pairdi_di
2076 (stack_pointer_rtx,
2077 stack_pointer_rtx,
2078 hard_frame_pointer_rtx,
2079 gen_rtx_REG (DImode, LR_REGNUM),
2080 GEN_INT (offset),
2081 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2082 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2083 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2084 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2085 plus_constant (Pmode, cfa_reg,
2086 offset))));
2087 }
2088
2089 /* The first part of a frame-related parallel insn
2090 is always assumed to be relevant to the frame
2091 calculations; subsequent parts, are only
2092 frame-related if explicitly marked. */
2093 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2094 RTX_FRAME_RELATED_P (insn) = 1;
2095 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2096 add_reg_note (insn, REG_CFA_RESTORE,
2097 gen_rtx_REG (DImode, LR_REGNUM));
2098
2099 if (fp_offset)
2100 {
2101 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2102 GEN_INT (offset)));
2103 RTX_FRAME_RELATED_P (insn) = 1;
2104 }
2105 }
2106 else
2107 {
2108 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2109 GEN_INT (offset)));
2110 RTX_FRAME_RELATED_P (insn) = 1;
2111 }
2112 }
2113
2114 /* Stack adjustment for exception handler. */
2115 if (crtl->calls_eh_return)
2116 {
2117 /* We need to unwind the stack by the offset computed by
2118 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2119 based on SP. Ideally we would update the SP and define the
2120 CFA along the lines of:
2121
2122 SP = SP + EH_RETURN_STACKADJ_RTX
2123 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2124
2125 However the dwarf emitter only understands a constant
2126 register offset.
2127
2128 The solution choosen here is to use the otherwise unused IP0
2129 as a temporary register to hold the current SP value. The
2130 CFA is described using IP0 then SP is modified. */
2131
2132 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2133
2134 insn = emit_move_insn (ip0, stack_pointer_rtx);
2135 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2136 RTX_FRAME_RELATED_P (insn) = 1;
2137
2138 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2139
2140 /* Ensure the assignment to IP0 does not get optimized away. */
2141 emit_use (ip0);
2142 }
2143
2144 if (frame_size > -1)
2145 {
2146 if (frame_size >= 0x1000000)
2147 {
2148 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2149 emit_move_insn (op0, GEN_INT (frame_size));
2150 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2151 aarch64_set_frame_expr (gen_rtx_SET
2152 (Pmode, stack_pointer_rtx,
2153 gen_rtx_PLUS (Pmode,
2154 stack_pointer_rtx,
2155 GEN_INT (frame_size))));
2156 }
2157 else if (frame_size > 0)
2158 {
2159 if ((frame_size & 0xfff) != 0)
2160 {
2161 insn = emit_insn (gen_add2_insn
2162 (stack_pointer_rtx,
2163 GEN_INT ((frame_size
2164 & (HOST_WIDE_INT) 0xfff))));
2165 RTX_FRAME_RELATED_P (insn) = 1;
2166 }
2167 if ((frame_size & 0xfff) != frame_size)
2168 {
2169 insn = emit_insn (gen_add2_insn
2170 (stack_pointer_rtx,
2171 GEN_INT ((frame_size
2172 & ~ (HOST_WIDE_INT) 0xfff))));
2173 RTX_FRAME_RELATED_P (insn) = 1;
2174 }
2175 }
2176
2177 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2178 gen_rtx_PLUS (Pmode,
2179 stack_pointer_rtx,
2180 GEN_INT (offset))));
2181 }
2182
2183 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2184 if (!for_sibcall)
2185 emit_jump_insn (ret_rtx);
2186 }
2187
2188 /* Return the place to copy the exception unwinding return address to.
2189 This will probably be a stack slot, but could (in theory be the
2190 return register). */
2191 rtx
aarch64_final_eh_return_addr(void)2192 aarch64_final_eh_return_addr (void)
2193 {
2194 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2195 aarch64_layout_frame ();
2196 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2197 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2198 + crtl->outgoing_args_size);
2199 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2200 STACK_BOUNDARY / BITS_PER_UNIT);
2201 fp_offset = offset
2202 - original_frame_size
2203 - cfun->machine->frame.saved_regs_size;
2204
2205 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2206 return gen_rtx_REG (DImode, LR_REGNUM);
2207
2208 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2209 result in a store to save LR introduced by builtin_eh_return () being
2210 incorrectly deleted because the alias is not detected.
2211 So in the calculation of the address to copy the exception unwinding
2212 return address to, we note 2 cases.
2213 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2214 we return a SP-relative location since all the addresses are SP-relative
2215 in this case. This prevents the store from being optimized away.
2216 If the fp_offset is not 0, then the addresses will be FP-relative and
2217 therefore we return a FP-relative location. */
2218
2219 if (frame_pointer_needed)
2220 {
2221 if (fp_offset)
2222 return gen_frame_mem (DImode,
2223 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2224 else
2225 return gen_frame_mem (DImode,
2226 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2227 }
2228
2229 /* If FP is not needed, we calculate the location of LR, which would be
2230 at the top of the saved registers block. */
2231
2232 return gen_frame_mem (DImode,
2233 plus_constant (Pmode,
2234 stack_pointer_rtx,
2235 fp_offset
2236 + cfun->machine->frame.saved_regs_size
2237 - 2 * UNITS_PER_WORD));
2238 }
2239
2240 /* Output code to build up a constant in a register. */
2241 static void
aarch64_build_constant(int regnum,HOST_WIDE_INT val)2242 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2243 {
2244 if (aarch64_bitmask_imm (val, DImode))
2245 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2246 else
2247 {
2248 int i;
2249 int ncount = 0;
2250 int zcount = 0;
2251 HOST_WIDE_INT valp = val >> 16;
2252 HOST_WIDE_INT valm;
2253 HOST_WIDE_INT tval;
2254
2255 for (i = 16; i < 64; i += 16)
2256 {
2257 valm = (valp & 0xffff);
2258
2259 if (valm != 0)
2260 ++ zcount;
2261
2262 if (valm != 0xffff)
2263 ++ ncount;
2264
2265 valp >>= 16;
2266 }
2267
2268 /* zcount contains the number of additional MOVK instructions
2269 required if the constant is built up with an initial MOVZ instruction,
2270 while ncount is the number of MOVK instructions required if starting
2271 with a MOVN instruction. Choose the sequence that yields the fewest
2272 number of instructions, preferring MOVZ instructions when they are both
2273 the same. */
2274 if (ncount < zcount)
2275 {
2276 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2277 GEN_INT ((~val) & 0xffff));
2278 tval = 0xffff;
2279 }
2280 else
2281 {
2282 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2283 GEN_INT (val & 0xffff));
2284 tval = 0;
2285 }
2286
2287 val >>= 16;
2288
2289 for (i = 16; i < 64; i += 16)
2290 {
2291 if ((val & 0xffff) != tval)
2292 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2293 GEN_INT (i), GEN_INT (val & 0xffff)));
2294 val >>= 16;
2295 }
2296 }
2297 }
2298
2299 static void
aarch64_add_constant(int regnum,int scratchreg,HOST_WIDE_INT delta)2300 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2301 {
2302 HOST_WIDE_INT mdelta = delta;
2303 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2304 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2305
2306 if (mdelta < 0)
2307 mdelta = -mdelta;
2308
2309 if (mdelta >= 4096 * 4096)
2310 {
2311 aarch64_build_constant (scratchreg, delta);
2312 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2313 }
2314 else if (mdelta > 0)
2315 {
2316 if (mdelta >= 4096)
2317 {
2318 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2319 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2320 if (delta < 0)
2321 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2322 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2323 else
2324 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2325 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2326 }
2327 if (mdelta % 4096 != 0)
2328 {
2329 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2330 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2331 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2332 }
2333 }
2334 }
2335
2336 /* Output code to add DELTA to the first argument, and then jump
2337 to FUNCTION. Used for C++ multiple inheritance. */
2338 static void
aarch64_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)2339 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2340 HOST_WIDE_INT delta,
2341 HOST_WIDE_INT vcall_offset,
2342 tree function)
2343 {
2344 /* The this pointer is always in x0. Note that this differs from
2345 Arm where the this pointer maybe bumped to r1 if r0 is required
2346 to return a pointer to an aggregate. On AArch64 a result value
2347 pointer will be in x8. */
2348 int this_regno = R0_REGNUM;
2349 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2350
2351 reload_completed = 1;
2352 emit_note (NOTE_INSN_PROLOGUE_END);
2353
2354 if (vcall_offset == 0)
2355 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2356 else
2357 {
2358 gcc_assert ((vcall_offset & 0x7) == 0);
2359
2360 this_rtx = gen_rtx_REG (Pmode, this_regno);
2361 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2362 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2363
2364 addr = this_rtx;
2365 if (delta != 0)
2366 {
2367 if (delta >= -256 && delta < 256)
2368 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2369 plus_constant (Pmode, this_rtx, delta));
2370 else
2371 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2372 }
2373
2374 aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2375
2376 if (vcall_offset >= -256 && vcall_offset < 32768)
2377 addr = plus_constant (Pmode, temp0, vcall_offset);
2378 else
2379 {
2380 aarch64_build_constant (IP1_REGNUM, vcall_offset);
2381 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2382 }
2383
2384 aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2385 emit_insn (gen_add2_insn (this_rtx, temp1));
2386 }
2387
2388 /* Generate a tail call to the target function. */
2389 if (!TREE_USED (function))
2390 {
2391 assemble_external (function);
2392 TREE_USED (function) = 1;
2393 }
2394 funexp = XEXP (DECL_RTL (function), 0);
2395 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2396 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2397 SIBLING_CALL_P (insn) = 1;
2398
2399 insn = get_insns ();
2400 shorten_branches (insn);
2401 final_start_function (insn, file, 1);
2402 final (insn, file, 1);
2403 final_end_function ();
2404
2405 /* Stop pretending to be a post-reload pass. */
2406 reload_completed = 0;
2407 }
2408
2409 static int
aarch64_tls_operand_p_1(rtx * x,void * data ATTRIBUTE_UNUSED)2410 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2411 {
2412 if (GET_CODE (*x) == SYMBOL_REF)
2413 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2414
2415 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2416 TLS offsets, not real symbol references. */
2417 if (GET_CODE (*x) == UNSPEC
2418 && XINT (*x, 1) == UNSPEC_TLS)
2419 return -1;
2420
2421 return 0;
2422 }
2423
2424 static bool
aarch64_tls_referenced_p(rtx x)2425 aarch64_tls_referenced_p (rtx x)
2426 {
2427 if (!TARGET_HAVE_TLS)
2428 return false;
2429
2430 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2431 }
2432
2433
2434 static int
aarch64_bitmasks_cmp(const void * i1,const void * i2)2435 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2436 {
2437 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2438 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2439
2440 if (*imm1 < *imm2)
2441 return -1;
2442 if (*imm1 > *imm2)
2443 return +1;
2444 return 0;
2445 }
2446
2447
2448 static void
aarch64_build_bitmask_table(void)2449 aarch64_build_bitmask_table (void)
2450 {
2451 unsigned HOST_WIDE_INT mask, imm;
2452 unsigned int log_e, e, s, r;
2453 unsigned int nimms = 0;
2454
2455 for (log_e = 1; log_e <= 6; log_e++)
2456 {
2457 e = 1 << log_e;
2458 if (e == 64)
2459 mask = ~(HOST_WIDE_INT) 0;
2460 else
2461 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2462 for (s = 1; s < e; s++)
2463 {
2464 for (r = 0; r < e; r++)
2465 {
2466 /* set s consecutive bits to 1 (s < 64) */
2467 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2468 /* rotate right by r */
2469 if (r != 0)
2470 imm = ((imm >> r) | (imm << (e - r))) & mask;
2471 /* replicate the constant depending on SIMD size */
2472 switch (log_e) {
2473 case 1: imm |= (imm << 2);
2474 case 2: imm |= (imm << 4);
2475 case 3: imm |= (imm << 8);
2476 case 4: imm |= (imm << 16);
2477 case 5: imm |= (imm << 32);
2478 case 6:
2479 break;
2480 default:
2481 gcc_unreachable ();
2482 }
2483 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2484 aarch64_bitmasks[nimms++] = imm;
2485 }
2486 }
2487 }
2488
2489 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2490 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2491 aarch64_bitmasks_cmp);
2492 }
2493
2494
2495 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2496 a left shift of 0 or 12 bits. */
2497 bool
aarch64_uimm12_shift(HOST_WIDE_INT val)2498 aarch64_uimm12_shift (HOST_WIDE_INT val)
2499 {
2500 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2501 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2502 );
2503 }
2504
2505
2506 /* Return true if val is an immediate that can be loaded into a
2507 register by a MOVZ instruction. */
2508 static bool
aarch64_movw_imm(HOST_WIDE_INT val,enum machine_mode mode)2509 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2510 {
2511 if (GET_MODE_SIZE (mode) > 4)
2512 {
2513 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2514 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2515 return 1;
2516 }
2517 else
2518 {
2519 /* Ignore sign extension. */
2520 val &= (HOST_WIDE_INT) 0xffffffff;
2521 }
2522 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2523 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2524 }
2525
2526
2527 /* Return true if val is a valid bitmask immediate. */
2528 bool
aarch64_bitmask_imm(HOST_WIDE_INT val,enum machine_mode mode)2529 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2530 {
2531 if (GET_MODE_SIZE (mode) < 8)
2532 {
2533 /* Replicate bit pattern. */
2534 val &= (HOST_WIDE_INT) 0xffffffff;
2535 val |= val << 32;
2536 }
2537 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2538 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2539 }
2540
2541
2542 /* Return true if val is an immediate that can be loaded into a
2543 register in a single instruction. */
2544 bool
aarch64_move_imm(HOST_WIDE_INT val,enum machine_mode mode)2545 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2546 {
2547 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2548 return 1;
2549 return aarch64_bitmask_imm (val, mode);
2550 }
2551
2552 static bool
aarch64_cannot_force_const_mem(enum machine_mode mode ATTRIBUTE_UNUSED,rtx x)2553 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2554 {
2555 rtx base, offset;
2556 if (GET_CODE (x) == HIGH)
2557 return true;
2558
2559 split_const (x, &base, &offset);
2560 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2561 return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2562
2563 return aarch64_tls_referenced_p (x);
2564 }
2565
2566 /* Return true if register REGNO is a valid index register.
2567 STRICT_P is true if REG_OK_STRICT is in effect. */
2568
2569 bool
aarch64_regno_ok_for_index_p(int regno,bool strict_p)2570 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2571 {
2572 if (!HARD_REGISTER_NUM_P (regno))
2573 {
2574 if (!strict_p)
2575 return true;
2576
2577 if (!reg_renumber)
2578 return false;
2579
2580 regno = reg_renumber[regno];
2581 }
2582 return GP_REGNUM_P (regno);
2583 }
2584
2585 /* Return true if register REGNO is a valid base register for mode MODE.
2586 STRICT_P is true if REG_OK_STRICT is in effect. */
2587
2588 bool
aarch64_regno_ok_for_base_p(int regno,bool strict_p)2589 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2590 {
2591 if (!HARD_REGISTER_NUM_P (regno))
2592 {
2593 if (!strict_p)
2594 return true;
2595
2596 if (!reg_renumber)
2597 return false;
2598
2599 regno = reg_renumber[regno];
2600 }
2601
2602 /* The fake registers will be eliminated to either the stack or
2603 hard frame pointer, both of which are usually valid base registers.
2604 Reload deals with the cases where the eliminated form isn't valid. */
2605 return (GP_REGNUM_P (regno)
2606 || regno == SP_REGNUM
2607 || regno == FRAME_POINTER_REGNUM
2608 || regno == ARG_POINTER_REGNUM);
2609 }
2610
2611 /* Return true if X is a valid base register for mode MODE.
2612 STRICT_P is true if REG_OK_STRICT is in effect. */
2613
2614 static bool
aarch64_base_register_rtx_p(rtx x,bool strict_p)2615 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2616 {
2617 if (!strict_p && GET_CODE (x) == SUBREG)
2618 x = SUBREG_REG (x);
2619
2620 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2621 }
2622
2623 /* Return true if address offset is a valid index. If it is, fill in INFO
2624 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2625
2626 static bool
aarch64_classify_index(struct aarch64_address_info * info,rtx x,enum machine_mode mode,bool strict_p)2627 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2628 enum machine_mode mode, bool strict_p)
2629 {
2630 enum aarch64_address_type type;
2631 rtx index;
2632 int shift;
2633
2634 /* (reg:P) */
2635 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2636 && GET_MODE (x) == Pmode)
2637 {
2638 type = ADDRESS_REG_REG;
2639 index = x;
2640 shift = 0;
2641 }
2642 /* (sign_extend:DI (reg:SI)) */
2643 else if ((GET_CODE (x) == SIGN_EXTEND
2644 || GET_CODE (x) == ZERO_EXTEND)
2645 && GET_MODE (x) == DImode
2646 && GET_MODE (XEXP (x, 0)) == SImode)
2647 {
2648 type = (GET_CODE (x) == SIGN_EXTEND)
2649 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2650 index = XEXP (x, 0);
2651 shift = 0;
2652 }
2653 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2654 else if (GET_CODE (x) == MULT
2655 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2656 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2657 && GET_MODE (XEXP (x, 0)) == DImode
2658 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2659 && CONST_INT_P (XEXP (x, 1)))
2660 {
2661 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2662 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2663 index = XEXP (XEXP (x, 0), 0);
2664 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2665 }
2666 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2667 else if (GET_CODE (x) == ASHIFT
2668 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2669 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2670 && GET_MODE (XEXP (x, 0)) == DImode
2671 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2672 && CONST_INT_P (XEXP (x, 1)))
2673 {
2674 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2675 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2676 index = XEXP (XEXP (x, 0), 0);
2677 shift = INTVAL (XEXP (x, 1));
2678 }
2679 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2680 else if ((GET_CODE (x) == SIGN_EXTRACT
2681 || GET_CODE (x) == ZERO_EXTRACT)
2682 && GET_MODE (x) == DImode
2683 && GET_CODE (XEXP (x, 0)) == MULT
2684 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2685 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2686 {
2687 type = (GET_CODE (x) == SIGN_EXTRACT)
2688 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2689 index = XEXP (XEXP (x, 0), 0);
2690 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2691 if (INTVAL (XEXP (x, 1)) != 32 + shift
2692 || INTVAL (XEXP (x, 2)) != 0)
2693 shift = -1;
2694 }
2695 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2696 (const_int 0xffffffff<<shift)) */
2697 else if (GET_CODE (x) == AND
2698 && GET_MODE (x) == DImode
2699 && GET_CODE (XEXP (x, 0)) == MULT
2700 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2701 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2702 && CONST_INT_P (XEXP (x, 1)))
2703 {
2704 type = ADDRESS_REG_UXTW;
2705 index = XEXP (XEXP (x, 0), 0);
2706 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2707 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2708 shift = -1;
2709 }
2710 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2711 else if ((GET_CODE (x) == SIGN_EXTRACT
2712 || GET_CODE (x) == ZERO_EXTRACT)
2713 && GET_MODE (x) == DImode
2714 && GET_CODE (XEXP (x, 0)) == ASHIFT
2715 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2716 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2717 {
2718 type = (GET_CODE (x) == SIGN_EXTRACT)
2719 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2720 index = XEXP (XEXP (x, 0), 0);
2721 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2722 if (INTVAL (XEXP (x, 1)) != 32 + shift
2723 || INTVAL (XEXP (x, 2)) != 0)
2724 shift = -1;
2725 }
2726 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2727 (const_int 0xffffffff<<shift)) */
2728 else if (GET_CODE (x) == AND
2729 && GET_MODE (x) == DImode
2730 && GET_CODE (XEXP (x, 0)) == ASHIFT
2731 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2732 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2733 && CONST_INT_P (XEXP (x, 1)))
2734 {
2735 type = ADDRESS_REG_UXTW;
2736 index = XEXP (XEXP (x, 0), 0);
2737 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2738 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2739 shift = -1;
2740 }
2741 /* (mult:P (reg:P) (const_int scale)) */
2742 else if (GET_CODE (x) == MULT
2743 && GET_MODE (x) == Pmode
2744 && GET_MODE (XEXP (x, 0)) == Pmode
2745 && CONST_INT_P (XEXP (x, 1)))
2746 {
2747 type = ADDRESS_REG_REG;
2748 index = XEXP (x, 0);
2749 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2750 }
2751 /* (ashift:P (reg:P) (const_int shift)) */
2752 else if (GET_CODE (x) == ASHIFT
2753 && GET_MODE (x) == Pmode
2754 && GET_MODE (XEXP (x, 0)) == Pmode
2755 && CONST_INT_P (XEXP (x, 1)))
2756 {
2757 type = ADDRESS_REG_REG;
2758 index = XEXP (x, 0);
2759 shift = INTVAL (XEXP (x, 1));
2760 }
2761 else
2762 return false;
2763
2764 if (GET_CODE (index) == SUBREG)
2765 index = SUBREG_REG (index);
2766
2767 if ((shift == 0 ||
2768 (shift > 0 && shift <= 3
2769 && (1 << shift) == GET_MODE_SIZE (mode)))
2770 && REG_P (index)
2771 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2772 {
2773 info->type = type;
2774 info->offset = index;
2775 info->shift = shift;
2776 return true;
2777 }
2778
2779 return false;
2780 }
2781
2782 static inline bool
offset_7bit_signed_scaled_p(enum machine_mode mode,HOST_WIDE_INT offset)2783 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2784 {
2785 return (offset >= -64 * GET_MODE_SIZE (mode)
2786 && offset < 64 * GET_MODE_SIZE (mode)
2787 && offset % GET_MODE_SIZE (mode) == 0);
2788 }
2789
2790 static inline bool
offset_9bit_signed_unscaled_p(enum machine_mode mode ATTRIBUTE_UNUSED,HOST_WIDE_INT offset)2791 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2792 HOST_WIDE_INT offset)
2793 {
2794 return offset >= -256 && offset < 256;
2795 }
2796
2797 static inline bool
offset_12bit_unsigned_scaled_p(enum machine_mode mode,HOST_WIDE_INT offset)2798 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2799 {
2800 return (offset >= 0
2801 && offset < 4096 * GET_MODE_SIZE (mode)
2802 && offset % GET_MODE_SIZE (mode) == 0);
2803 }
2804
2805 /* Return true if X is a valid address for machine mode MODE. If it is,
2806 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2807 effect. OUTER_CODE is PARALLEL for a load/store pair. */
2808
2809 static bool
aarch64_classify_address(struct aarch64_address_info * info,rtx x,enum machine_mode mode,RTX_CODE outer_code,bool strict_p)2810 aarch64_classify_address (struct aarch64_address_info *info,
2811 rtx x, enum machine_mode mode,
2812 RTX_CODE outer_code, bool strict_p)
2813 {
2814 enum rtx_code code = GET_CODE (x);
2815 rtx op0, op1;
2816 bool allow_reg_index_p =
2817 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2818
2819 /* Don't support anything other than POST_INC or REG addressing for
2820 AdvSIMD. */
2821 if (aarch64_vector_mode_p (mode)
2822 && (code != POST_INC && code != REG))
2823 return false;
2824
2825 switch (code)
2826 {
2827 case REG:
2828 case SUBREG:
2829 info->type = ADDRESS_REG_IMM;
2830 info->base = x;
2831 info->offset = const0_rtx;
2832 return aarch64_base_register_rtx_p (x, strict_p);
2833
2834 case PLUS:
2835 op0 = XEXP (x, 0);
2836 op1 = XEXP (x, 1);
2837 if (GET_MODE_SIZE (mode) != 0
2838 && CONST_INT_P (op1)
2839 && aarch64_base_register_rtx_p (op0, strict_p))
2840 {
2841 HOST_WIDE_INT offset = INTVAL (op1);
2842
2843 info->type = ADDRESS_REG_IMM;
2844 info->base = op0;
2845 info->offset = op1;
2846
2847 /* TImode and TFmode values are allowed in both pairs of X
2848 registers and individual Q registers. The available
2849 address modes are:
2850 X,X: 7-bit signed scaled offset
2851 Q: 9-bit signed offset
2852 We conservatively require an offset representable in either mode.
2853 */
2854 if (mode == TImode || mode == TFmode)
2855 return (offset_7bit_signed_scaled_p (mode, offset)
2856 && offset_9bit_signed_unscaled_p (mode, offset));
2857
2858 if (outer_code == PARALLEL)
2859 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2860 && offset_7bit_signed_scaled_p (mode, offset));
2861 else
2862 return (offset_9bit_signed_unscaled_p (mode, offset)
2863 || offset_12bit_unsigned_scaled_p (mode, offset));
2864 }
2865
2866 if (allow_reg_index_p)
2867 {
2868 /* Look for base + (scaled/extended) index register. */
2869 if (aarch64_base_register_rtx_p (op0, strict_p)
2870 && aarch64_classify_index (info, op1, mode, strict_p))
2871 {
2872 info->base = op0;
2873 return true;
2874 }
2875 if (aarch64_base_register_rtx_p (op1, strict_p)
2876 && aarch64_classify_index (info, op0, mode, strict_p))
2877 {
2878 info->base = op1;
2879 return true;
2880 }
2881 }
2882
2883 return false;
2884
2885 case POST_INC:
2886 case POST_DEC:
2887 case PRE_INC:
2888 case PRE_DEC:
2889 info->type = ADDRESS_REG_WB;
2890 info->base = XEXP (x, 0);
2891 info->offset = NULL_RTX;
2892 return aarch64_base_register_rtx_p (info->base, strict_p);
2893
2894 case POST_MODIFY:
2895 case PRE_MODIFY:
2896 info->type = ADDRESS_REG_WB;
2897 info->base = XEXP (x, 0);
2898 if (GET_CODE (XEXP (x, 1)) == PLUS
2899 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
2900 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
2901 && aarch64_base_register_rtx_p (info->base, strict_p))
2902 {
2903 HOST_WIDE_INT offset;
2904 info->offset = XEXP (XEXP (x, 1), 1);
2905 offset = INTVAL (info->offset);
2906
2907 /* TImode and TFmode values are allowed in both pairs of X
2908 registers and individual Q registers. The available
2909 address modes are:
2910 X,X: 7-bit signed scaled offset
2911 Q: 9-bit signed offset
2912 We conservatively require an offset representable in either mode.
2913 */
2914 if (mode == TImode || mode == TFmode)
2915 return (offset_7bit_signed_scaled_p (mode, offset)
2916 && offset_9bit_signed_unscaled_p (mode, offset));
2917
2918 if (outer_code == PARALLEL)
2919 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2920 && offset_7bit_signed_scaled_p (mode, offset));
2921 else
2922 return offset_9bit_signed_unscaled_p (mode, offset);
2923 }
2924 return false;
2925
2926 case CONST:
2927 case SYMBOL_REF:
2928 case LABEL_REF:
2929 /* load literal: pc-relative constant pool entry. Only supported
2930 for SI mode or larger. */
2931 info->type = ADDRESS_SYMBOLIC;
2932 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
2933 {
2934 rtx sym, addend;
2935
2936 split_const (x, &sym, &addend);
2937 return (GET_CODE (sym) == LABEL_REF
2938 || (GET_CODE (sym) == SYMBOL_REF
2939 && CONSTANT_POOL_ADDRESS_P (sym)));
2940 }
2941 return false;
2942
2943 case LO_SUM:
2944 info->type = ADDRESS_LO_SUM;
2945 info->base = XEXP (x, 0);
2946 info->offset = XEXP (x, 1);
2947 if (allow_reg_index_p
2948 && aarch64_base_register_rtx_p (info->base, strict_p))
2949 {
2950 rtx sym, offs;
2951 split_const (info->offset, &sym, &offs);
2952 if (GET_CODE (sym) == SYMBOL_REF
2953 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
2954 == SYMBOL_SMALL_ABSOLUTE))
2955 {
2956 /* The symbol and offset must be aligned to the access size. */
2957 unsigned int align;
2958 unsigned int ref_size;
2959
2960 if (CONSTANT_POOL_ADDRESS_P (sym))
2961 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
2962 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
2963 {
2964 tree exp = SYMBOL_REF_DECL (sym);
2965 align = TYPE_ALIGN (TREE_TYPE (exp));
2966 align = CONSTANT_ALIGNMENT (exp, align);
2967 }
2968 else if (SYMBOL_REF_DECL (sym))
2969 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
2970 else
2971 align = BITS_PER_UNIT;
2972
2973 ref_size = GET_MODE_SIZE (mode);
2974 if (ref_size == 0)
2975 ref_size = GET_MODE_SIZE (DImode);
2976
2977 return ((INTVAL (offs) & (ref_size - 1)) == 0
2978 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
2979 }
2980 }
2981 return false;
2982
2983 default:
2984 return false;
2985 }
2986 }
2987
2988 bool
aarch64_symbolic_address_p(rtx x)2989 aarch64_symbolic_address_p (rtx x)
2990 {
2991 rtx offset;
2992
2993 split_const (x, &x, &offset);
2994 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
2995 }
2996
2997 /* Classify the base of symbolic expression X, given that X appears in
2998 context CONTEXT. */
2999 static enum aarch64_symbol_type
aarch64_classify_symbolic_expression(rtx x,enum aarch64_symbol_context context)3000 aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
3001 {
3002 rtx offset;
3003 split_const (x, &x, &offset);
3004 return aarch64_classify_symbol (x, context);
3005 }
3006
3007
3008 /* Return TRUE if X is a legitimate address for accessing memory in
3009 mode MODE. */
3010 static bool
aarch64_legitimate_address_hook_p(enum machine_mode mode,rtx x,bool strict_p)3011 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3012 {
3013 struct aarch64_address_info addr;
3014
3015 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3016 }
3017
3018 /* Return TRUE if X is a legitimate address for accessing memory in
3019 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3020 pair operation. */
3021 bool
aarch64_legitimate_address_p(enum machine_mode mode,rtx x,RTX_CODE outer_code,bool strict_p)3022 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3023 RTX_CODE outer_code, bool strict_p)
3024 {
3025 struct aarch64_address_info addr;
3026
3027 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3028 }
3029
3030 /* Return TRUE if rtx X is immediate constant 0.0 */
3031 bool
aarch64_float_const_zero_rtx_p(rtx x)3032 aarch64_float_const_zero_rtx_p (rtx x)
3033 {
3034 REAL_VALUE_TYPE r;
3035
3036 if (GET_MODE (x) == VOIDmode)
3037 return false;
3038
3039 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3040 if (REAL_VALUE_MINUS_ZERO (r))
3041 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3042 return REAL_VALUES_EQUAL (r, dconst0);
3043 }
3044
3045 /* Return the fixed registers used for condition codes. */
3046
3047 static bool
aarch64_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)3048 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3049 {
3050 *p1 = CC_REGNUM;
3051 *p2 = INVALID_REGNUM;
3052 return true;
3053 }
3054
3055 enum machine_mode
aarch64_select_cc_mode(RTX_CODE code,rtx x,rtx y)3056 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3057 {
3058 /* All floating point compares return CCFP if it is an equality
3059 comparison, and CCFPE otherwise. */
3060 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3061 {
3062 switch (code)
3063 {
3064 case EQ:
3065 case NE:
3066 case UNORDERED:
3067 case ORDERED:
3068 case UNLT:
3069 case UNLE:
3070 case UNGT:
3071 case UNGE:
3072 case UNEQ:
3073 case LTGT:
3074 return CCFPmode;
3075
3076 case LT:
3077 case LE:
3078 case GT:
3079 case GE:
3080 return CCFPEmode;
3081
3082 default:
3083 gcc_unreachable ();
3084 }
3085 }
3086
3087 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3088 && y == const0_rtx
3089 && (code == EQ || code == NE || code == LT || code == GE)
3090 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND))
3091 return CC_NZmode;
3092
3093 /* A compare with a shifted operand. Because of canonicalization,
3094 the comparison will have to be swapped when we emit the assembly
3095 code. */
3096 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3097 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3098 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3099 || GET_CODE (x) == LSHIFTRT
3100 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3101 return CC_SWPmode;
3102
3103 /* A compare of a mode narrower than SI mode against zero can be done
3104 by extending the value in the comparison. */
3105 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3106 && y == const0_rtx)
3107 /* Only use sign-extension if we really need it. */
3108 return ((code == GT || code == GE || code == LE || code == LT)
3109 ? CC_SESWPmode : CC_ZESWPmode);
3110
3111 /* For everything else, return CCmode. */
3112 return CCmode;
3113 }
3114
3115 static unsigned
aarch64_get_condition_code(rtx x)3116 aarch64_get_condition_code (rtx x)
3117 {
3118 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3119 enum rtx_code comp_code = GET_CODE (x);
3120
3121 if (GET_MODE_CLASS (mode) != MODE_CC)
3122 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3123
3124 switch (mode)
3125 {
3126 case CCFPmode:
3127 case CCFPEmode:
3128 switch (comp_code)
3129 {
3130 case GE: return AARCH64_GE;
3131 case GT: return AARCH64_GT;
3132 case LE: return AARCH64_LS;
3133 case LT: return AARCH64_MI;
3134 case NE: return AARCH64_NE;
3135 case EQ: return AARCH64_EQ;
3136 case ORDERED: return AARCH64_VC;
3137 case UNORDERED: return AARCH64_VS;
3138 case UNLT: return AARCH64_LT;
3139 case UNLE: return AARCH64_LE;
3140 case UNGT: return AARCH64_HI;
3141 case UNGE: return AARCH64_PL;
3142 default: gcc_unreachable ();
3143 }
3144 break;
3145
3146 case CCmode:
3147 switch (comp_code)
3148 {
3149 case NE: return AARCH64_NE;
3150 case EQ: return AARCH64_EQ;
3151 case GE: return AARCH64_GE;
3152 case GT: return AARCH64_GT;
3153 case LE: return AARCH64_LE;
3154 case LT: return AARCH64_LT;
3155 case GEU: return AARCH64_CS;
3156 case GTU: return AARCH64_HI;
3157 case LEU: return AARCH64_LS;
3158 case LTU: return AARCH64_CC;
3159 default: gcc_unreachable ();
3160 }
3161 break;
3162
3163 case CC_SWPmode:
3164 case CC_ZESWPmode:
3165 case CC_SESWPmode:
3166 switch (comp_code)
3167 {
3168 case NE: return AARCH64_NE;
3169 case EQ: return AARCH64_EQ;
3170 case GE: return AARCH64_LE;
3171 case GT: return AARCH64_LT;
3172 case LE: return AARCH64_GE;
3173 case LT: return AARCH64_GT;
3174 case GEU: return AARCH64_LS;
3175 case GTU: return AARCH64_CC;
3176 case LEU: return AARCH64_CS;
3177 case LTU: return AARCH64_HI;
3178 default: gcc_unreachable ();
3179 }
3180 break;
3181
3182 case CC_NZmode:
3183 switch (comp_code)
3184 {
3185 case NE: return AARCH64_NE;
3186 case EQ: return AARCH64_EQ;
3187 case GE: return AARCH64_PL;
3188 case LT: return AARCH64_MI;
3189 default: gcc_unreachable ();
3190 }
3191 break;
3192
3193 default:
3194 gcc_unreachable ();
3195 break;
3196 }
3197 }
3198
3199 static unsigned
bit_count(unsigned HOST_WIDE_INT value)3200 bit_count (unsigned HOST_WIDE_INT value)
3201 {
3202 unsigned count = 0;
3203
3204 while (value)
3205 {
3206 count++;
3207 value &= value - 1;
3208 }
3209
3210 return count;
3211 }
3212
3213 void
aarch64_print_operand(FILE * f,rtx x,char code)3214 aarch64_print_operand (FILE *f, rtx x, char code)
3215 {
3216 switch (code)
3217 {
3218 case 'e':
3219 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3220 {
3221 int n;
3222
3223 if (GET_CODE (x) != CONST_INT
3224 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3225 {
3226 output_operand_lossage ("invalid operand for '%%%c'", code);
3227 return;
3228 }
3229
3230 switch (n)
3231 {
3232 case 3:
3233 fputc ('b', f);
3234 break;
3235 case 4:
3236 fputc ('h', f);
3237 break;
3238 case 5:
3239 fputc ('w', f);
3240 break;
3241 default:
3242 output_operand_lossage ("invalid operand for '%%%c'", code);
3243 return;
3244 }
3245 }
3246 break;
3247
3248 case 'p':
3249 {
3250 int n;
3251
3252 /* Print N such that 2^N == X. */
3253 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3254 {
3255 output_operand_lossage ("invalid operand for '%%%c'", code);
3256 return;
3257 }
3258
3259 asm_fprintf (f, "%d", n);
3260 }
3261 break;
3262
3263 case 'P':
3264 /* Print the number of non-zero bits in X (a const_int). */
3265 if (GET_CODE (x) != CONST_INT)
3266 {
3267 output_operand_lossage ("invalid operand for '%%%c'", code);
3268 return;
3269 }
3270
3271 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3272 break;
3273
3274 case 'H':
3275 /* Print the higher numbered register of a pair (TImode) of regs. */
3276 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3277 {
3278 output_operand_lossage ("invalid operand for '%%%c'", code);
3279 return;
3280 }
3281
3282 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3283 break;
3284
3285 case 'Q':
3286 /* Print the least significant register of a pair (TImode) of regs. */
3287 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3288 {
3289 output_operand_lossage ("invalid operand for '%%%c'", code);
3290 return;
3291 }
3292 asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)]);
3293 break;
3294
3295 case 'R':
3296 /* Print the most significant register of a pair (TImode) of regs. */
3297 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3298 {
3299 output_operand_lossage ("invalid operand for '%%%c'", code);
3300 return;
3301 }
3302 asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
3303 break;
3304
3305 case 'm':
3306 /* Print a condition (eq, ne, etc). */
3307
3308 /* CONST_TRUE_RTX means always -- that's the default. */
3309 if (x == const_true_rtx)
3310 return;
3311
3312 if (!COMPARISON_P (x))
3313 {
3314 output_operand_lossage ("invalid operand for '%%%c'", code);
3315 return;
3316 }
3317
3318 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3319 break;
3320
3321 case 'M':
3322 /* Print the inverse of a condition (eq <-> ne, etc). */
3323
3324 /* CONST_TRUE_RTX means never -- that's the default. */
3325 if (x == const_true_rtx)
3326 {
3327 fputs ("nv", f);
3328 return;
3329 }
3330
3331 if (!COMPARISON_P (x))
3332 {
3333 output_operand_lossage ("invalid operand for '%%%c'", code);
3334 return;
3335 }
3336
3337 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3338 (aarch64_get_condition_code (x))], f);
3339 break;
3340
3341 case 'b':
3342 case 'h':
3343 case 's':
3344 case 'd':
3345 case 'q':
3346 /* Print a scalar FP/SIMD register name. */
3347 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3348 {
3349 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3350 return;
3351 }
3352 asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM);
3353 break;
3354
3355 case 'S':
3356 case 'T':
3357 case 'U':
3358 case 'V':
3359 /* Print the first FP/SIMD register name in a list. */
3360 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3361 {
3362 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3363 return;
3364 }
3365 asm_fprintf (f, "%sv%d", REGISTER_PREFIX,
3366 REGNO (x) - V0_REGNUM + (code - 'S'));
3367 break;
3368
3369 case 'X':
3370 /* Print integer constant in hex. */
3371 if (GET_CODE (x) != CONST_INT)
3372 {
3373 output_operand_lossage ("invalid operand for '%%%c'", code);
3374 return;
3375 }
3376 asm_fprintf (f, "0x%x", UINTVAL (x));
3377 break;
3378
3379 case 'w':
3380 case 'x':
3381 /* Print a general register name or the zero register (32-bit or
3382 64-bit). */
3383 if (x == const0_rtx
3384 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3385 {
3386 asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
3387 break;
3388 }
3389
3390 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3391 {
3392 asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code,
3393 REGNO (x) - R0_REGNUM);
3394 break;
3395 }
3396
3397 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3398 {
3399 asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : "");
3400 break;
3401 }
3402
3403 /* Fall through */
3404
3405 case 0:
3406 /* Print a normal operand, if it's a general register, then we
3407 assume DImode. */
3408 if (x == NULL)
3409 {
3410 output_operand_lossage ("missing operand");
3411 return;
3412 }
3413
3414 switch (GET_CODE (x))
3415 {
3416 case REG:
3417 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3418 break;
3419
3420 case MEM:
3421 aarch64_memory_reference_mode = GET_MODE (x);
3422 output_address (XEXP (x, 0));
3423 break;
3424
3425 case LABEL_REF:
3426 case SYMBOL_REF:
3427 output_addr_const (asm_out_file, x);
3428 break;
3429
3430 case CONST_INT:
3431 asm_fprintf (f, "%wd", INTVAL (x));
3432 break;
3433
3434 case CONST_VECTOR:
3435 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3436 {
3437 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3438 HOST_WIDE_INT_MIN,
3439 HOST_WIDE_INT_MAX));
3440 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3441 }
3442 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3443 {
3444 fputc ('0', f);
3445 }
3446 else
3447 gcc_unreachable ();
3448 break;
3449
3450 case CONST_DOUBLE:
3451 /* CONST_DOUBLE can represent a double-width integer.
3452 In this case, the mode of x is VOIDmode. */
3453 if (GET_MODE (x) == VOIDmode)
3454 ; /* Do Nothing. */
3455 else if (aarch64_float_const_zero_rtx_p (x))
3456 {
3457 fputc ('0', f);
3458 break;
3459 }
3460 else if (aarch64_float_const_representable_p (x))
3461 {
3462 #define buf_size 20
3463 char float_buf[buf_size] = {'\0'};
3464 REAL_VALUE_TYPE r;
3465 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3466 real_to_decimal_for_mode (float_buf, &r,
3467 buf_size, buf_size,
3468 1, GET_MODE (x));
3469 asm_fprintf (asm_out_file, "%s", float_buf);
3470 break;
3471 #undef buf_size
3472 }
3473 output_operand_lossage ("invalid constant");
3474 return;
3475 default:
3476 output_operand_lossage ("invalid operand");
3477 return;
3478 }
3479 break;
3480
3481 case 'A':
3482 if (GET_CODE (x) == HIGH)
3483 x = XEXP (x, 0);
3484
3485 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3486 {
3487 case SYMBOL_SMALL_GOT:
3488 asm_fprintf (asm_out_file, ":got:");
3489 break;
3490
3491 case SYMBOL_SMALL_TLSGD:
3492 asm_fprintf (asm_out_file, ":tlsgd:");
3493 break;
3494
3495 case SYMBOL_SMALL_TLSDESC:
3496 asm_fprintf (asm_out_file, ":tlsdesc:");
3497 break;
3498
3499 case SYMBOL_SMALL_GOTTPREL:
3500 asm_fprintf (asm_out_file, ":gottprel:");
3501 break;
3502
3503 case SYMBOL_SMALL_TPREL:
3504 asm_fprintf (asm_out_file, ":tprel:");
3505 break;
3506
3507 default:
3508 break;
3509 }
3510 output_addr_const (asm_out_file, x);
3511 break;
3512
3513 case 'L':
3514 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3515 {
3516 case SYMBOL_SMALL_GOT:
3517 asm_fprintf (asm_out_file, ":lo12:");
3518 break;
3519
3520 case SYMBOL_SMALL_TLSGD:
3521 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3522 break;
3523
3524 case SYMBOL_SMALL_TLSDESC:
3525 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3526 break;
3527
3528 case SYMBOL_SMALL_GOTTPREL:
3529 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3530 break;
3531
3532 case SYMBOL_SMALL_TPREL:
3533 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3534 break;
3535
3536 default:
3537 break;
3538 }
3539 output_addr_const (asm_out_file, x);
3540 break;
3541
3542 case 'G':
3543
3544 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3545 {
3546 case SYMBOL_SMALL_TPREL:
3547 asm_fprintf (asm_out_file, ":tprel_hi12:");
3548 break;
3549 default:
3550 break;
3551 }
3552 output_addr_const (asm_out_file, x);
3553 break;
3554
3555 default:
3556 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3557 return;
3558 }
3559 }
3560
3561 void
aarch64_print_operand_address(FILE * f,rtx x)3562 aarch64_print_operand_address (FILE *f, rtx x)
3563 {
3564 struct aarch64_address_info addr;
3565
3566 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3567 MEM, true))
3568 switch (addr.type)
3569 {
3570 case ADDRESS_REG_IMM:
3571 if (addr.offset == const0_rtx)
3572 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3573 else
3574 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3575 INTVAL (addr.offset));
3576 return;
3577
3578 case ADDRESS_REG_REG:
3579 if (addr.shift == 0)
3580 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3581 reg_names [REGNO (addr.offset)]);
3582 else
3583 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3584 reg_names [REGNO (addr.offset)], addr.shift);
3585 return;
3586
3587 case ADDRESS_REG_UXTW:
3588 if (addr.shift == 0)
3589 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3590 REGNO (addr.offset) - R0_REGNUM);
3591 else
3592 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3593 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3594 return;
3595
3596 case ADDRESS_REG_SXTW:
3597 if (addr.shift == 0)
3598 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3599 REGNO (addr.offset) - R0_REGNUM);
3600 else
3601 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3602 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3603 return;
3604
3605 case ADDRESS_REG_WB:
3606 switch (GET_CODE (x))
3607 {
3608 case PRE_INC:
3609 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3610 GET_MODE_SIZE (aarch64_memory_reference_mode));
3611 return;
3612 case POST_INC:
3613 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3614 GET_MODE_SIZE (aarch64_memory_reference_mode));
3615 return;
3616 case PRE_DEC:
3617 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3618 GET_MODE_SIZE (aarch64_memory_reference_mode));
3619 return;
3620 case POST_DEC:
3621 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3622 GET_MODE_SIZE (aarch64_memory_reference_mode));
3623 return;
3624 case PRE_MODIFY:
3625 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3626 INTVAL (addr.offset));
3627 return;
3628 case POST_MODIFY:
3629 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3630 INTVAL (addr.offset));
3631 return;
3632 default:
3633 break;
3634 }
3635 break;
3636
3637 case ADDRESS_LO_SUM:
3638 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3639 output_addr_const (f, addr.offset);
3640 asm_fprintf (f, "]");
3641 return;
3642
3643 case ADDRESS_SYMBOLIC:
3644 break;
3645 }
3646
3647 output_addr_const (f, x);
3648 }
3649
3650 void
aarch64_function_profiler(FILE * f ATTRIBUTE_UNUSED,int labelno ATTRIBUTE_UNUSED)3651 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3652 int labelno ATTRIBUTE_UNUSED)
3653 {
3654 sorry ("function profiling");
3655 }
3656
3657 bool
aarch64_label_mentioned_p(rtx x)3658 aarch64_label_mentioned_p (rtx x)
3659 {
3660 const char *fmt;
3661 int i;
3662
3663 if (GET_CODE (x) == LABEL_REF)
3664 return true;
3665
3666 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3667 referencing instruction, but they are constant offsets, not
3668 symbols. */
3669 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3670 return false;
3671
3672 fmt = GET_RTX_FORMAT (GET_CODE (x));
3673 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3674 {
3675 if (fmt[i] == 'E')
3676 {
3677 int j;
3678
3679 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3680 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3681 return 1;
3682 }
3683 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3684 return 1;
3685 }
3686
3687 return 0;
3688 }
3689
3690 /* Implement REGNO_REG_CLASS. */
3691
3692 enum reg_class
aarch64_regno_regclass(unsigned regno)3693 aarch64_regno_regclass (unsigned regno)
3694 {
3695 if (GP_REGNUM_P (regno))
3696 return CORE_REGS;
3697
3698 if (regno == SP_REGNUM)
3699 return STACK_REG;
3700
3701 if (regno == FRAME_POINTER_REGNUM
3702 || regno == ARG_POINTER_REGNUM)
3703 return CORE_REGS;
3704
3705 if (FP_REGNUM_P (regno))
3706 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3707
3708 return NO_REGS;
3709 }
3710
3711 /* Try a machine-dependent way of reloading an illegitimate address
3712 operand. If we find one, push the reload and return the new rtx. */
3713
3714 rtx
aarch64_legitimize_reload_address(rtx * x_p,enum machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)3715 aarch64_legitimize_reload_address (rtx *x_p,
3716 enum machine_mode mode,
3717 int opnum, int type,
3718 int ind_levels ATTRIBUTE_UNUSED)
3719 {
3720 rtx x = *x_p;
3721
3722 /* Do not allow mem (plus (reg, const)) if vector mode. */
3723 if (aarch64_vector_mode_p (mode)
3724 && GET_CODE (x) == PLUS
3725 && REG_P (XEXP (x, 0))
3726 && CONST_INT_P (XEXP (x, 1)))
3727 {
3728 rtx orig_rtx = x;
3729 x = copy_rtx (x);
3730 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3731 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3732 opnum, (enum reload_type) type);
3733 return x;
3734 }
3735
3736 /* We must recognize output that we have already generated ourselves. */
3737 if (GET_CODE (x) == PLUS
3738 && GET_CODE (XEXP (x, 0)) == PLUS
3739 && REG_P (XEXP (XEXP (x, 0), 0))
3740 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3741 && CONST_INT_P (XEXP (x, 1)))
3742 {
3743 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3744 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3745 opnum, (enum reload_type) type);
3746 return x;
3747 }
3748
3749 /* We wish to handle large displacements off a base register by splitting
3750 the addend across an add and the mem insn. This can cut the number of
3751 extra insns needed from 3 to 1. It is only useful for load/store of a
3752 single register with 12 bit offset field. */
3753 if (GET_CODE (x) == PLUS
3754 && REG_P (XEXP (x, 0))
3755 && CONST_INT_P (XEXP (x, 1))
3756 && HARD_REGISTER_P (XEXP (x, 0))
3757 && mode != TImode
3758 && mode != TFmode
3759 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3760 {
3761 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3762 HOST_WIDE_INT low = val & 0xfff;
3763 HOST_WIDE_INT high = val - low;
3764 HOST_WIDE_INT offs;
3765 rtx cst;
3766
3767 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3768 BLKmode alignment. */
3769 if (GET_MODE_SIZE (mode) == 0)
3770 return NULL_RTX;
3771
3772 offs = low % GET_MODE_SIZE (mode);
3773
3774 /* Align misaligned offset by adjusting high part to compensate. */
3775 if (offs != 0)
3776 {
3777 if (aarch64_uimm12_shift (high + offs))
3778 {
3779 /* Align down. */
3780 low = low - offs;
3781 high = high + offs;
3782 }
3783 else
3784 {
3785 /* Align up. */
3786 offs = GET_MODE_SIZE (mode) - offs;
3787 low = low + offs;
3788 high = high + (low & 0x1000) - offs;
3789 low &= 0xfff;
3790 }
3791 }
3792
3793 /* Check for overflow. */
3794 if (high + low != val)
3795 return NULL_RTX;
3796
3797 cst = GEN_INT (high);
3798 if (!aarch64_uimm12_shift (high))
3799 cst = force_const_mem (Pmode, cst);
3800
3801 /* Reload high part into base reg, leaving the low part
3802 in the mem instruction. */
3803 x = gen_rtx_PLUS (Pmode,
3804 gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3805 GEN_INT (low));
3806
3807 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3808 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3809 opnum, (enum reload_type) type);
3810 return x;
3811 }
3812
3813 return NULL_RTX;
3814 }
3815
3816
3817 static reg_class_t
aarch64_secondary_reload(bool in_p ATTRIBUTE_UNUSED,rtx x,reg_class_t rclass,enum machine_mode mode,secondary_reload_info * sri)3818 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3819 reg_class_t rclass,
3820 enum machine_mode mode,
3821 secondary_reload_info *sri)
3822 {
3823 /* Address expressions of the form PLUS (SP, large_offset) need two
3824 scratch registers, one for the constant, and one for holding a
3825 copy of SP, since SP cannot be used on the RHS of an add-reg
3826 instruction. */
3827 if (mode == DImode
3828 && GET_CODE (x) == PLUS
3829 && XEXP (x, 0) == stack_pointer_rtx
3830 && CONST_INT_P (XEXP (x, 1))
3831 && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3832 {
3833 sri->icode = CODE_FOR_reload_sp_immediate;
3834 return NO_REGS;
3835 }
3836
3837 /* Without the TARGET_SIMD instructions we cannot move a Q register
3838 to a Q register directly. We need a scratch. */
3839 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3840 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3841 && reg_class_subset_p (rclass, FP_REGS))
3842 {
3843 if (mode == TFmode)
3844 sri->icode = CODE_FOR_aarch64_reload_movtf;
3845 else if (mode == TImode)
3846 sri->icode = CODE_FOR_aarch64_reload_movti;
3847 return NO_REGS;
3848 }
3849
3850 /* A TFmode or TImode memory access should be handled via an FP_REGS
3851 because AArch64 has richer addressing modes for LDR/STR instructions
3852 than LDP/STP instructions. */
3853 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3854 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3855 return FP_REGS;
3856
3857 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3858 return CORE_REGS;
3859
3860 return NO_REGS;
3861 }
3862
3863 static bool
aarch64_can_eliminate(const int from,const int to)3864 aarch64_can_eliminate (const int from, const int to)
3865 {
3866 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3867 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
3868
3869 if (frame_pointer_needed)
3870 {
3871 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3872 return true;
3873 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3874 return false;
3875 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3876 && !cfun->calls_alloca)
3877 return true;
3878 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3879 return true;
3880 return false;
3881 }
3882 else
3883 {
3884 /* If we decided that we didn't need a leaf frame pointer but then used
3885 LR in the function, then we'll want a frame pointer after all, so
3886 prevent this elimination to ensure a frame pointer is used.
3887
3888 NOTE: the original value of flag_omit_frame_pointer gets trashed
3889 IFF flag_omit_leaf_frame_pointer is true, so we check the value
3890 of faked_omit_frame_pointer here (which is true when we always
3891 wish to keep non-leaf frame pointers but only wish to keep leaf frame
3892 pointers when LR is clobbered). */
3893 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3894 && df_regs_ever_live_p (LR_REGNUM)
3895 && faked_omit_frame_pointer)
3896 return false;
3897 }
3898
3899 return true;
3900 }
3901
3902 HOST_WIDE_INT
aarch64_initial_elimination_offset(unsigned from,unsigned to)3903 aarch64_initial_elimination_offset (unsigned from, unsigned to)
3904 {
3905 HOST_WIDE_INT frame_size;
3906 HOST_WIDE_INT offset;
3907
3908 aarch64_layout_frame ();
3909 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
3910 + crtl->outgoing_args_size
3911 + cfun->machine->saved_varargs_size);
3912
3913 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
3914 offset = frame_size;
3915
3916 if (to == HARD_FRAME_POINTER_REGNUM)
3917 {
3918 if (from == ARG_POINTER_REGNUM)
3919 return offset - crtl->outgoing_args_size;
3920
3921 if (from == FRAME_POINTER_REGNUM)
3922 return cfun->machine->frame.saved_regs_size;
3923 }
3924
3925 if (to == STACK_POINTER_REGNUM)
3926 {
3927 if (from == FRAME_POINTER_REGNUM)
3928 {
3929 HOST_WIDE_INT elim = crtl->outgoing_args_size
3930 + cfun->machine->frame.saved_regs_size
3931 - cfun->machine->frame.fp_lr_offset;
3932 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
3933 return elim;
3934 }
3935 }
3936
3937 return offset;
3938 }
3939
3940
3941 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
3942 previous frame. */
3943
3944 rtx
aarch64_return_addr(int count,rtx frame ATTRIBUTE_UNUSED)3945 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
3946 {
3947 if (count != 0)
3948 return const0_rtx;
3949 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
3950 }
3951
3952
3953 static void
aarch64_asm_trampoline_template(FILE * f)3954 aarch64_asm_trampoline_template (FILE *f)
3955 {
3956 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
3957 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
3958 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
3959 assemble_aligned_integer (4, const0_rtx);
3960 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3961 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3962 }
3963
3964 unsigned
aarch64_trampoline_size(void)3965 aarch64_trampoline_size (void)
3966 {
3967 return 32; /* 3 insns + padding + 2 dwords. */
3968 }
3969
3970 static void
aarch64_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)3971 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3972 {
3973 rtx fnaddr, mem, a_tramp;
3974
3975 /* Don't need to copy the trailing D-words, we fill those in below. */
3976 emit_block_move (m_tramp, assemble_trampoline_template (),
3977 GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
3978 mem = adjust_address (m_tramp, DImode, 16);
3979 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3980 emit_move_insn (mem, fnaddr);
3981
3982 mem = adjust_address (m_tramp, DImode, 24);
3983 emit_move_insn (mem, chain_value);
3984
3985 /* XXX We should really define a "clear_cache" pattern and use
3986 gen_clear_cache(). */
3987 a_tramp = XEXP (m_tramp, 0);
3988 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3989 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3990 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3991 }
3992
3993 static unsigned char
aarch64_class_max_nregs(reg_class_t regclass,enum machine_mode mode)3994 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
3995 {
3996 switch (regclass)
3997 {
3998 case CORE_REGS:
3999 case POINTER_REGS:
4000 case GENERAL_REGS:
4001 case ALL_REGS:
4002 case FP_REGS:
4003 case FP_LO_REGS:
4004 return
4005 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4006 (GET_MODE_SIZE (mode) + 7) / 8;
4007 case STACK_REG:
4008 return 1;
4009
4010 case NO_REGS:
4011 return 0;
4012
4013 default:
4014 break;
4015 }
4016 gcc_unreachable ();
4017 }
4018
4019 static reg_class_t
aarch64_preferred_reload_class(rtx x ATTRIBUTE_UNUSED,reg_class_t regclass)4020 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4021 {
4022 return ((regclass == POINTER_REGS || regclass == STACK_REG)
4023 ? GENERAL_REGS : regclass);
4024 }
4025
4026 void
aarch64_asm_output_labelref(FILE * f,const char * name)4027 aarch64_asm_output_labelref (FILE* f, const char *name)
4028 {
4029 asm_fprintf (f, "%U%s", name);
4030 }
4031
4032 static void
aarch64_elf_asm_constructor(rtx symbol,int priority)4033 aarch64_elf_asm_constructor (rtx symbol, int priority)
4034 {
4035 if (priority == DEFAULT_INIT_PRIORITY)
4036 default_ctor_section_asm_out_constructor (symbol, priority);
4037 else
4038 {
4039 section *s;
4040 char buf[18];
4041 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4042 s = get_section (buf, SECTION_WRITE, NULL);
4043 switch_to_section (s);
4044 assemble_align (POINTER_SIZE);
4045 fputs ("\t.dword\t", asm_out_file);
4046 output_addr_const (asm_out_file, symbol);
4047 fputc ('\n', asm_out_file);
4048 }
4049 }
4050
4051 static void
aarch64_elf_asm_destructor(rtx symbol,int priority)4052 aarch64_elf_asm_destructor (rtx symbol, int priority)
4053 {
4054 if (priority == DEFAULT_INIT_PRIORITY)
4055 default_dtor_section_asm_out_destructor (symbol, priority);
4056 else
4057 {
4058 section *s;
4059 char buf[18];
4060 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4061 s = get_section (buf, SECTION_WRITE, NULL);
4062 switch_to_section (s);
4063 assemble_align (POINTER_SIZE);
4064 fputs ("\t.dword\t", asm_out_file);
4065 output_addr_const (asm_out_file, symbol);
4066 fputc ('\n', asm_out_file);
4067 }
4068 }
4069
4070 const char*
aarch64_output_casesi(rtx * operands)4071 aarch64_output_casesi (rtx *operands)
4072 {
4073 char buf[100];
4074 char label[100];
4075 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
4076 int index;
4077 static const char *const patterns[4][2] =
4078 {
4079 {
4080 "ldrb\t%w3, [%0,%w1,uxtw]",
4081 "add\t%3, %4, %w3, sxtb #2"
4082 },
4083 {
4084 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4085 "add\t%3, %4, %w3, sxth #2"
4086 },
4087 {
4088 "ldr\t%w3, [%0,%w1,uxtw #2]",
4089 "add\t%3, %4, %w3, sxtw #2"
4090 },
4091 /* We assume that DImode is only generated when not optimizing and
4092 that we don't really need 64-bit address offsets. That would
4093 imply an object file with 8GB of code in a single function! */
4094 {
4095 "ldr\t%w3, [%0,%w1,uxtw #2]",
4096 "add\t%3, %4, %w3, sxtw #2"
4097 }
4098 };
4099
4100 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4101
4102 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4103
4104 gcc_assert (index >= 0 && index <= 3);
4105
4106 /* Need to implement table size reduction, by chaning the code below. */
4107 output_asm_insn (patterns[index][0], operands);
4108 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4109 snprintf (buf, sizeof (buf),
4110 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4111 output_asm_insn (buf, operands);
4112 output_asm_insn (patterns[index][1], operands);
4113 output_asm_insn ("br\t%3", operands);
4114 assemble_label (asm_out_file, label);
4115 return "";
4116 }
4117
4118
4119 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4120 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4121 operator. */
4122
4123 int
aarch64_uxt_size(int shift,HOST_WIDE_INT mask)4124 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4125 {
4126 if (shift >= 0 && shift <= 3)
4127 {
4128 int size;
4129 for (size = 8; size <= 32; size *= 2)
4130 {
4131 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4132 if (mask == bits << shift)
4133 return size;
4134 }
4135 }
4136 return 0;
4137 }
4138
4139 static bool
aarch64_use_blocks_for_constant_p(enum machine_mode mode ATTRIBUTE_UNUSED,const_rtx x ATTRIBUTE_UNUSED)4140 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4141 const_rtx x ATTRIBUTE_UNUSED)
4142 {
4143 /* We can't use blocks for constants when we're using a per-function
4144 constant pool. */
4145 return false;
4146 }
4147
4148 static section *
aarch64_select_rtx_section(enum machine_mode mode ATTRIBUTE_UNUSED,rtx x ATTRIBUTE_UNUSED,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)4149 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4150 rtx x ATTRIBUTE_UNUSED,
4151 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4152 {
4153 /* Force all constant pool entries into the current function section. */
4154 return function_section (current_function_decl);
4155 }
4156
4157
4158 /* Costs. */
4159
4160 /* Helper function for rtx cost calculation. Strip a shift expression
4161 from X. Returns the inner operand if successful, or the original
4162 expression on failure. */
4163 static rtx
aarch64_strip_shift(rtx x)4164 aarch64_strip_shift (rtx x)
4165 {
4166 rtx op = x;
4167
4168 if ((GET_CODE (op) == ASHIFT
4169 || GET_CODE (op) == ASHIFTRT
4170 || GET_CODE (op) == LSHIFTRT)
4171 && CONST_INT_P (XEXP (op, 1)))
4172 return XEXP (op, 0);
4173
4174 if (GET_CODE (op) == MULT
4175 && CONST_INT_P (XEXP (op, 1))
4176 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4177 return XEXP (op, 0);
4178
4179 return x;
4180 }
4181
4182 /* Helper function for rtx cost calculation. Strip a shift or extend
4183 expression from X. Returns the inner operand if successful, or the
4184 original expression on failure. We deal with a number of possible
4185 canonicalization variations here. */
4186 static rtx
aarch64_strip_shift_or_extend(rtx x)4187 aarch64_strip_shift_or_extend (rtx x)
4188 {
4189 rtx op = x;
4190
4191 /* Zero and sign extraction of a widened value. */
4192 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4193 && XEXP (op, 2) == const0_rtx
4194 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4195 XEXP (op, 1)))
4196 return XEXP (XEXP (op, 0), 0);
4197
4198 /* It can also be represented (for zero-extend) as an AND with an
4199 immediate. */
4200 if (GET_CODE (op) == AND
4201 && GET_CODE (XEXP (op, 0)) == MULT
4202 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4203 && CONST_INT_P (XEXP (op, 1))
4204 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4205 INTVAL (XEXP (op, 1))) != 0)
4206 return XEXP (XEXP (op, 0), 0);
4207
4208 /* Now handle extended register, as this may also have an optional
4209 left shift by 1..4. */
4210 if (GET_CODE (op) == ASHIFT
4211 && CONST_INT_P (XEXP (op, 1))
4212 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4213 op = XEXP (op, 0);
4214
4215 if (GET_CODE (op) == ZERO_EXTEND
4216 || GET_CODE (op) == SIGN_EXTEND)
4217 op = XEXP (op, 0);
4218
4219 if (op != x)
4220 return op;
4221
4222 return aarch64_strip_shift (x);
4223 }
4224
4225 /* Calculate the cost of calculating X, storing it in *COST. Result
4226 is true if the total cost of the operation has now been calculated. */
4227 static bool
aarch64_rtx_costs(rtx x,int code,int outer ATTRIBUTE_UNUSED,int param ATTRIBUTE_UNUSED,int * cost,bool speed)4228 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4229 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4230 {
4231 rtx op0, op1;
4232 const struct cpu_rtx_cost_table *extra_cost
4233 = aarch64_tune_params->insn_extra_cost;
4234
4235 switch (code)
4236 {
4237 case SET:
4238 op0 = SET_DEST (x);
4239 op1 = SET_SRC (x);
4240
4241 switch (GET_CODE (op0))
4242 {
4243 case MEM:
4244 if (speed)
4245 *cost += extra_cost->memory_store;
4246
4247 if (op1 != const0_rtx)
4248 *cost += rtx_cost (op1, SET, 1, speed);
4249 return true;
4250
4251 case SUBREG:
4252 if (! REG_P (SUBREG_REG (op0)))
4253 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4254 /* Fall through. */
4255 case REG:
4256 /* Cost is just the cost of the RHS of the set. */
4257 *cost += rtx_cost (op1, SET, 1, true);
4258 return true;
4259
4260 case ZERO_EXTRACT: /* Bit-field insertion. */
4261 case SIGN_EXTRACT:
4262 /* Strip any redundant widening of the RHS to meet the width of
4263 the target. */
4264 if (GET_CODE (op1) == SUBREG)
4265 op1 = SUBREG_REG (op1);
4266 if ((GET_CODE (op1) == ZERO_EXTEND
4267 || GET_CODE (op1) == SIGN_EXTEND)
4268 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4269 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4270 >= INTVAL (XEXP (op0, 1))))
4271 op1 = XEXP (op1, 0);
4272 *cost += rtx_cost (op1, SET, 1, speed);
4273 return true;
4274
4275 default:
4276 break;
4277 }
4278 return false;
4279
4280 case MEM:
4281 if (speed)
4282 *cost += extra_cost->memory_load;
4283
4284 return true;
4285
4286 case NEG:
4287 op0 = CONST0_RTX (GET_MODE (x));
4288 op1 = XEXP (x, 0);
4289 goto cost_minus;
4290
4291 case COMPARE:
4292 op0 = XEXP (x, 0);
4293 op1 = XEXP (x, 1);
4294
4295 if (op1 == const0_rtx
4296 && GET_CODE (op0) == AND)
4297 {
4298 x = op0;
4299 goto cost_logic;
4300 }
4301
4302 /* Comparisons can work if the order is swapped.
4303 Canonicalization puts the more complex operation first, but
4304 we want it in op1. */
4305 if (! (REG_P (op0)
4306 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4307 {
4308 op0 = XEXP (x, 1);
4309 op1 = XEXP (x, 0);
4310 }
4311 goto cost_minus;
4312
4313 case MINUS:
4314 op0 = XEXP (x, 0);
4315 op1 = XEXP (x, 1);
4316
4317 cost_minus:
4318 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4319 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4320 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4321 {
4322 if (op0 != const0_rtx)
4323 *cost += rtx_cost (op0, MINUS, 0, speed);
4324
4325 if (CONST_INT_P (op1))
4326 {
4327 if (!aarch64_uimm12_shift (INTVAL (op1)))
4328 *cost += rtx_cost (op1, MINUS, 1, speed);
4329 }
4330 else
4331 {
4332 op1 = aarch64_strip_shift_or_extend (op1);
4333 *cost += rtx_cost (op1, MINUS, 1, speed);
4334 }
4335 return true;
4336 }
4337
4338 return false;
4339
4340 case PLUS:
4341 op0 = XEXP (x, 0);
4342 op1 = XEXP (x, 1);
4343
4344 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4345 {
4346 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4347 {
4348 *cost += rtx_cost (op0, PLUS, 0, speed);
4349 }
4350 else
4351 {
4352 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4353
4354 if (new_op0 == op0
4355 && GET_CODE (op0) == MULT)
4356 {
4357 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4358 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4359 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4360 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4361 {
4362 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4363 speed)
4364 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4365 speed)
4366 + rtx_cost (op1, PLUS, 1, speed));
4367 if (speed)
4368 *cost += extra_cost->int_multiply_extend_add;
4369 return true;
4370 }
4371 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4372 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4373 + rtx_cost (op1, PLUS, 1, speed));
4374
4375 if (speed)
4376 *cost += extra_cost->int_multiply_add;
4377 }
4378
4379 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4380 + rtx_cost (op1, PLUS, 1, speed));
4381 }
4382 return true;
4383 }
4384
4385 return false;
4386
4387 case IOR:
4388 case XOR:
4389 case AND:
4390 cost_logic:
4391 op0 = XEXP (x, 0);
4392 op1 = XEXP (x, 1);
4393
4394 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4395 {
4396 if (CONST_INT_P (op1)
4397 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4398 {
4399 *cost += rtx_cost (op0, AND, 0, speed);
4400 }
4401 else
4402 {
4403 if (GET_CODE (op0) == NOT)
4404 op0 = XEXP (op0, 0);
4405 op0 = aarch64_strip_shift (op0);
4406 *cost += (rtx_cost (op0, AND, 0, speed)
4407 + rtx_cost (op1, AND, 1, speed));
4408 }
4409 return true;
4410 }
4411 return false;
4412
4413 case ZERO_EXTEND:
4414 if ((GET_MODE (x) == DImode
4415 && GET_MODE (XEXP (x, 0)) == SImode)
4416 || GET_CODE (XEXP (x, 0)) == MEM)
4417 {
4418 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4419 return true;
4420 }
4421 return false;
4422
4423 case SIGN_EXTEND:
4424 if (GET_CODE (XEXP (x, 0)) == MEM)
4425 {
4426 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4427 return true;
4428 }
4429 return false;
4430
4431 case ROTATE:
4432 if (!CONST_INT_P (XEXP (x, 1)))
4433 *cost += COSTS_N_INSNS (2);
4434 /* Fall through. */
4435 case ROTATERT:
4436 case LSHIFTRT:
4437 case ASHIFT:
4438 case ASHIFTRT:
4439
4440 /* Shifting by a register often takes an extra cycle. */
4441 if (speed && !CONST_INT_P (XEXP (x, 1)))
4442 *cost += extra_cost->register_shift;
4443
4444 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4445 return true;
4446
4447 case HIGH:
4448 if (!CONSTANT_P (XEXP (x, 0)))
4449 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4450 return true;
4451
4452 case LO_SUM:
4453 if (!CONSTANT_P (XEXP (x, 1)))
4454 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4455 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4456 return true;
4457
4458 case ZERO_EXTRACT:
4459 case SIGN_EXTRACT:
4460 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4461 return true;
4462
4463 case MULT:
4464 op0 = XEXP (x, 0);
4465 op1 = XEXP (x, 1);
4466
4467 *cost = COSTS_N_INSNS (1);
4468 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4469 {
4470 if (CONST_INT_P (op1)
4471 && exact_log2 (INTVAL (op1)) > 0)
4472 {
4473 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4474 return true;
4475 }
4476
4477 if ((GET_CODE (op0) == ZERO_EXTEND
4478 && GET_CODE (op1) == ZERO_EXTEND)
4479 || (GET_CODE (op0) == SIGN_EXTEND
4480 && GET_CODE (op1) == SIGN_EXTEND))
4481 {
4482 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4483 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4484 if (speed)
4485 *cost += extra_cost->int_multiply_extend;
4486 return true;
4487 }
4488
4489 if (speed)
4490 *cost += extra_cost->int_multiply;
4491 }
4492 else if (speed)
4493 {
4494 if (GET_MODE (x) == DFmode)
4495 *cost += extra_cost->double_multiply;
4496 else if (GET_MODE (x) == SFmode)
4497 *cost += extra_cost->float_multiply;
4498 }
4499
4500 return false; /* All arguments need to be in registers. */
4501
4502 case MOD:
4503 case UMOD:
4504 *cost = COSTS_N_INSNS (2);
4505 if (speed)
4506 {
4507 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4508 *cost += (extra_cost->int_multiply_add
4509 + extra_cost->int_divide);
4510 else if (GET_MODE (x) == DFmode)
4511 *cost += (extra_cost->double_multiply
4512 + extra_cost->double_divide);
4513 else if (GET_MODE (x) == SFmode)
4514 *cost += (extra_cost->float_multiply
4515 + extra_cost->float_divide);
4516 }
4517 return false; /* All arguments need to be in registers. */
4518
4519 case DIV:
4520 case UDIV:
4521 *cost = COSTS_N_INSNS (1);
4522 if (speed)
4523 {
4524 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4525 *cost += extra_cost->int_divide;
4526 else if (GET_MODE (x) == DFmode)
4527 *cost += extra_cost->double_divide;
4528 else if (GET_MODE (x) == SFmode)
4529 *cost += extra_cost->float_divide;
4530 }
4531 return false; /* All arguments need to be in registers. */
4532
4533 default:
4534 break;
4535 }
4536 return false;
4537 }
4538
4539 static int
aarch64_address_cost(rtx x ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)4540 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4541 enum machine_mode mode ATTRIBUTE_UNUSED,
4542 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4543 {
4544 enum rtx_code c = GET_CODE (x);
4545 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4546
4547 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4548 return addr_cost->pre_modify;
4549
4550 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4551 return addr_cost->post_modify;
4552
4553 if (c == PLUS)
4554 {
4555 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4556 return addr_cost->imm_offset;
4557 else if (GET_CODE (XEXP (x, 0)) == MULT
4558 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4559 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4560 return addr_cost->register_extend;
4561
4562 return addr_cost->register_offset;
4563 }
4564 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4565 return addr_cost->imm_offset;
4566
4567 return 0;
4568 }
4569
4570 static int
aarch64_register_move_cost(enum machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)4571 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4572 reg_class_t from, reg_class_t to)
4573 {
4574 const struct cpu_regmove_cost *regmove_cost
4575 = aarch64_tune_params->regmove_cost;
4576
4577 if (from == GENERAL_REGS && to == GENERAL_REGS)
4578 return regmove_cost->GP2GP;
4579 else if (from == GENERAL_REGS)
4580 return regmove_cost->GP2FP;
4581 else if (to == GENERAL_REGS)
4582 return regmove_cost->FP2GP;
4583
4584 /* When AdvSIMD instructions are disabled it is not possible to move
4585 a 128-bit value directly between Q registers. This is handled in
4586 secondary reload. A general register is used as a scratch to move
4587 the upper DI value and the lower DI value is moved directly,
4588 hence the cost is the sum of three moves. */
4589
4590 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4591 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4592
4593 return regmove_cost->FP2FP;
4594 }
4595
4596 static int
aarch64_memory_move_cost(enum machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass ATTRIBUTE_UNUSED,bool in ATTRIBUTE_UNUSED)4597 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4598 reg_class_t rclass ATTRIBUTE_UNUSED,
4599 bool in ATTRIBUTE_UNUSED)
4600 {
4601 return aarch64_tune_params->memmov_cost;
4602 }
4603
4604 static void initialize_aarch64_code_model (void);
4605
4606 /* Parse the architecture extension string. */
4607
4608 static void
aarch64_parse_extension(char * str)4609 aarch64_parse_extension (char *str)
4610 {
4611 /* The extension string is parsed left to right. */
4612 const struct aarch64_option_extension *opt = NULL;
4613
4614 /* Flag to say whether we are adding or removing an extension. */
4615 int adding_ext = -1;
4616
4617 while (str != NULL && *str != 0)
4618 {
4619 char *ext;
4620 size_t len;
4621
4622 str++;
4623 ext = strchr (str, '+');
4624
4625 if (ext != NULL)
4626 len = ext - str;
4627 else
4628 len = strlen (str);
4629
4630 if (len >= 2 && strncmp (str, "no", 2) == 0)
4631 {
4632 adding_ext = 0;
4633 len -= 2;
4634 str += 2;
4635 }
4636 else if (len > 0)
4637 adding_ext = 1;
4638
4639 if (len == 0)
4640 {
4641 error ("missing feature modifier after %qs", "+no");
4642 return;
4643 }
4644
4645 /* Scan over the extensions table trying to find an exact match. */
4646 for (opt = all_extensions; opt->name != NULL; opt++)
4647 {
4648 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4649 {
4650 /* Add or remove the extension. */
4651 if (adding_ext)
4652 aarch64_isa_flags |= opt->flags_on;
4653 else
4654 aarch64_isa_flags &= ~(opt->flags_off);
4655 break;
4656 }
4657 }
4658
4659 if (opt->name == NULL)
4660 {
4661 /* Extension not found in list. */
4662 error ("unknown feature modifier %qs", str);
4663 return;
4664 }
4665
4666 str = ext;
4667 };
4668
4669 return;
4670 }
4671
4672 /* Parse the ARCH string. */
4673
4674 static void
aarch64_parse_arch(void)4675 aarch64_parse_arch (void)
4676 {
4677 char *ext;
4678 const struct processor *arch;
4679 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4680 size_t len;
4681
4682 strcpy (str, aarch64_arch_string);
4683
4684 ext = strchr (str, '+');
4685
4686 if (ext != NULL)
4687 len = ext - str;
4688 else
4689 len = strlen (str);
4690
4691 if (len == 0)
4692 {
4693 error ("missing arch name in -march=%qs", str);
4694 return;
4695 }
4696
4697 /* Loop through the list of supported ARCHs to find a match. */
4698 for (arch = all_architectures; arch->name != NULL; arch++)
4699 {
4700 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4701 {
4702 selected_arch = arch;
4703 aarch64_isa_flags = selected_arch->flags;
4704 selected_cpu = &all_cores[selected_arch->core];
4705
4706 if (ext != NULL)
4707 {
4708 /* ARCH string contains at least one extension. */
4709 aarch64_parse_extension (ext);
4710 }
4711
4712 return;
4713 }
4714 }
4715
4716 /* ARCH name not found in list. */
4717 error ("unknown value %qs for -march", str);
4718 return;
4719 }
4720
4721 /* Parse the CPU string. */
4722
4723 static void
aarch64_parse_cpu(void)4724 aarch64_parse_cpu (void)
4725 {
4726 char *ext;
4727 const struct processor *cpu;
4728 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4729 size_t len;
4730
4731 strcpy (str, aarch64_cpu_string);
4732
4733 ext = strchr (str, '+');
4734
4735 if (ext != NULL)
4736 len = ext - str;
4737 else
4738 len = strlen (str);
4739
4740 if (len == 0)
4741 {
4742 error ("missing cpu name in -mcpu=%qs", str);
4743 return;
4744 }
4745
4746 /* Loop through the list of supported CPUs to find a match. */
4747 for (cpu = all_cores; cpu->name != NULL; cpu++)
4748 {
4749 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4750 {
4751 selected_cpu = cpu;
4752 aarch64_isa_flags = selected_cpu->flags;
4753
4754 if (ext != NULL)
4755 {
4756 /* CPU string contains at least one extension. */
4757 aarch64_parse_extension (ext);
4758 }
4759
4760 return;
4761 }
4762 }
4763
4764 /* CPU name not found in list. */
4765 error ("unknown value %qs for -mcpu", str);
4766 return;
4767 }
4768
4769 /* Parse the TUNE string. */
4770
4771 static void
aarch64_parse_tune(void)4772 aarch64_parse_tune (void)
4773 {
4774 const struct processor *cpu;
4775 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4776 strcpy (str, aarch64_tune_string);
4777
4778 /* Loop through the list of supported CPUs to find a match. */
4779 for (cpu = all_cores; cpu->name != NULL; cpu++)
4780 {
4781 if (strcmp (cpu->name, str) == 0)
4782 {
4783 selected_tune = cpu;
4784 return;
4785 }
4786 }
4787
4788 /* CPU name not found in list. */
4789 error ("unknown value %qs for -mtune", str);
4790 return;
4791 }
4792
4793
4794 /* Implement TARGET_OPTION_OVERRIDE. */
4795
4796 static void
aarch64_override_options(void)4797 aarch64_override_options (void)
4798 {
4799 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4800 otherwise march remains undefined. mtune can be used with either march or
4801 mcpu. */
4802
4803 if (aarch64_arch_string)
4804 {
4805 aarch64_parse_arch ();
4806 aarch64_cpu_string = NULL;
4807 }
4808
4809 if (aarch64_cpu_string)
4810 {
4811 aarch64_parse_cpu ();
4812 selected_arch = NULL;
4813 }
4814
4815 if (aarch64_tune_string)
4816 {
4817 aarch64_parse_tune ();
4818 }
4819
4820 initialize_aarch64_code_model ();
4821
4822 aarch64_build_bitmask_table ();
4823
4824 /* This target defaults to strict volatile bitfields. */
4825 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4826 flag_strict_volatile_bitfields = 1;
4827
4828 /* If the user did not specify a processor, choose the default
4829 one for them. This will be the CPU set during configuration using
4830 --with-cpu, otherwise it is "generic". */
4831 if (!selected_cpu)
4832 {
4833 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4834 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4835 }
4836
4837 gcc_assert (selected_cpu);
4838
4839 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
4840 if (!selected_tune)
4841 selected_tune = &all_cores[selected_cpu->core];
4842
4843 aarch64_tune_flags = selected_tune->flags;
4844 aarch64_tune = selected_tune->core;
4845 aarch64_tune_params = selected_tune->tune;
4846
4847 aarch64_override_options_after_change ();
4848 }
4849
4850 /* Implement targetm.override_options_after_change. */
4851
4852 static void
aarch64_override_options_after_change(void)4853 aarch64_override_options_after_change (void)
4854 {
4855 faked_omit_frame_pointer = false;
4856
4857 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4858 that aarch64_frame_pointer_required will be called. We need to remember
4859 whether flag_omit_frame_pointer was turned on normally or just faked. */
4860
4861 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4862 {
4863 flag_omit_frame_pointer = true;
4864 faked_omit_frame_pointer = true;
4865 }
4866 }
4867
4868 static struct machine_function *
aarch64_init_machine_status(void)4869 aarch64_init_machine_status (void)
4870 {
4871 struct machine_function *machine;
4872 machine = ggc_alloc_cleared_machine_function ();
4873 return machine;
4874 }
4875
4876 void
aarch64_init_expanders(void)4877 aarch64_init_expanders (void)
4878 {
4879 init_machine_status = aarch64_init_machine_status;
4880 }
4881
4882 /* A checking mechanism for the implementation of the various code models. */
4883 static void
initialize_aarch64_code_model(void)4884 initialize_aarch64_code_model (void)
4885 {
4886 if (flag_pic)
4887 {
4888 switch (aarch64_cmodel_var)
4889 {
4890 case AARCH64_CMODEL_TINY:
4891 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
4892 break;
4893 case AARCH64_CMODEL_SMALL:
4894 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
4895 break;
4896 case AARCH64_CMODEL_LARGE:
4897 sorry ("code model %qs with -f%s", "large",
4898 flag_pic > 1 ? "PIC" : "pic");
4899 default:
4900 gcc_unreachable ();
4901 }
4902 }
4903 else
4904 aarch64_cmodel = aarch64_cmodel_var;
4905 }
4906
4907 /* Return true if SYMBOL_REF X binds locally. */
4908
4909 static bool
aarch64_symbol_binds_local_p(const_rtx x)4910 aarch64_symbol_binds_local_p (const_rtx x)
4911 {
4912 return (SYMBOL_REF_DECL (x)
4913 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
4914 : SYMBOL_REF_LOCAL_P (x));
4915 }
4916
4917 /* Return true if SYMBOL_REF X is thread local */
4918 static bool
aarch64_tls_symbol_p(rtx x)4919 aarch64_tls_symbol_p (rtx x)
4920 {
4921 if (! TARGET_HAVE_TLS)
4922 return false;
4923
4924 if (GET_CODE (x) != SYMBOL_REF)
4925 return false;
4926
4927 return SYMBOL_REF_TLS_MODEL (x) != 0;
4928 }
4929
4930 /* Classify a TLS symbol into one of the TLS kinds. */
4931 enum aarch64_symbol_type
aarch64_classify_tls_symbol(rtx x)4932 aarch64_classify_tls_symbol (rtx x)
4933 {
4934 enum tls_model tls_kind = tls_symbolic_operand_type (x);
4935
4936 switch (tls_kind)
4937 {
4938 case TLS_MODEL_GLOBAL_DYNAMIC:
4939 case TLS_MODEL_LOCAL_DYNAMIC:
4940 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
4941
4942 case TLS_MODEL_INITIAL_EXEC:
4943 return SYMBOL_SMALL_GOTTPREL;
4944
4945 case TLS_MODEL_LOCAL_EXEC:
4946 return SYMBOL_SMALL_TPREL;
4947
4948 case TLS_MODEL_EMULATED:
4949 case TLS_MODEL_NONE:
4950 return SYMBOL_FORCE_TO_MEM;
4951
4952 default:
4953 gcc_unreachable ();
4954 }
4955 }
4956
4957 /* Return the method that should be used to access SYMBOL_REF or
4958 LABEL_REF X in context CONTEXT. */
4959 enum aarch64_symbol_type
aarch64_classify_symbol(rtx x,enum aarch64_symbol_context context ATTRIBUTE_UNUSED)4960 aarch64_classify_symbol (rtx x,
4961 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
4962 {
4963 if (GET_CODE (x) == LABEL_REF)
4964 {
4965 switch (aarch64_cmodel)
4966 {
4967 case AARCH64_CMODEL_LARGE:
4968 return SYMBOL_FORCE_TO_MEM;
4969
4970 case AARCH64_CMODEL_TINY_PIC:
4971 case AARCH64_CMODEL_TINY:
4972 case AARCH64_CMODEL_SMALL_PIC:
4973 case AARCH64_CMODEL_SMALL:
4974 return SYMBOL_SMALL_ABSOLUTE;
4975
4976 default:
4977 gcc_unreachable ();
4978 }
4979 }
4980
4981 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4982
4983 switch (aarch64_cmodel)
4984 {
4985 case AARCH64_CMODEL_LARGE:
4986 return SYMBOL_FORCE_TO_MEM;
4987
4988 case AARCH64_CMODEL_TINY:
4989 case AARCH64_CMODEL_SMALL:
4990
4991 /* This is needed to get DFmode, TImode constants to be loaded off
4992 the constant pool. Is it necessary to dump TImode values into
4993 the constant pool. We don't handle TImode constant loads properly
4994 yet and hence need to use the constant pool. */
4995 if (CONSTANT_POOL_ADDRESS_P (x))
4996 return SYMBOL_FORCE_TO_MEM;
4997
4998 if (aarch64_tls_symbol_p (x))
4999 return aarch64_classify_tls_symbol (x);
5000
5001 if (SYMBOL_REF_WEAK (x))
5002 return SYMBOL_FORCE_TO_MEM;
5003
5004 return SYMBOL_SMALL_ABSOLUTE;
5005
5006 case AARCH64_CMODEL_TINY_PIC:
5007 case AARCH64_CMODEL_SMALL_PIC:
5008
5009 if (CONSTANT_POOL_ADDRESS_P (x))
5010 return SYMBOL_FORCE_TO_MEM;
5011
5012 if (aarch64_tls_symbol_p (x))
5013 return aarch64_classify_tls_symbol (x);
5014
5015 if (!aarch64_symbol_binds_local_p (x))
5016 return SYMBOL_SMALL_GOT;
5017
5018 return SYMBOL_SMALL_ABSOLUTE;
5019
5020 default:
5021 gcc_unreachable ();
5022 }
5023 /* By default push everything into the constant pool. */
5024 return SYMBOL_FORCE_TO_MEM;
5025 }
5026
5027 /* Return true if X is a symbolic constant that can be used in context
5028 CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */
5029
5030 bool
aarch64_symbolic_constant_p(rtx x,enum aarch64_symbol_context context,enum aarch64_symbol_type * symbol_type)5031 aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
5032 enum aarch64_symbol_type *symbol_type)
5033 {
5034 rtx offset;
5035 split_const (x, &x, &offset);
5036 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5037 *symbol_type = aarch64_classify_symbol (x, context);
5038 else
5039 return false;
5040
5041 /* No checking of offset at this point. */
5042 return true;
5043 }
5044
5045 bool
aarch64_constant_address_p(rtx x)5046 aarch64_constant_address_p (rtx x)
5047 {
5048 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5049 }
5050
5051 bool
aarch64_legitimate_pic_operand_p(rtx x)5052 aarch64_legitimate_pic_operand_p (rtx x)
5053 {
5054 if (GET_CODE (x) == SYMBOL_REF
5055 || (GET_CODE (x) == CONST
5056 && GET_CODE (XEXP (x, 0)) == PLUS
5057 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5058 return false;
5059
5060 return true;
5061 }
5062
5063 /* Return true if X holds either a quarter-precision or
5064 floating-point +0.0 constant. */
5065 static bool
aarch64_valid_floating_const(enum machine_mode mode,rtx x)5066 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5067 {
5068 if (!CONST_DOUBLE_P (x))
5069 return false;
5070
5071 /* TODO: We could handle moving 0.0 to a TFmode register,
5072 but first we would like to refactor the movtf_aarch64
5073 to be more amicable to split moves properly and
5074 correctly gate on TARGET_SIMD. For now - reject all
5075 constants which are not to SFmode or DFmode registers. */
5076 if (!(mode == SFmode || mode == DFmode))
5077 return false;
5078
5079 if (aarch64_float_const_zero_rtx_p (x))
5080 return true;
5081 return aarch64_float_const_representable_p (x);
5082 }
5083
5084 static bool
aarch64_legitimate_constant_p(enum machine_mode mode,rtx x)5085 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5086 {
5087 /* Do not allow vector struct mode constants. We could support
5088 0 and -1 easily, but they need support in aarch64-simd.md. */
5089 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5090 return false;
5091
5092 /* This could probably go away because
5093 we now decompose CONST_INTs according to expand_mov_immediate. */
5094 if ((GET_CODE (x) == CONST_VECTOR
5095 && aarch64_simd_valid_immediate (x, mode, false,
5096 NULL, NULL, NULL, NULL, NULL) != -1)
5097 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5098 return !targetm.cannot_force_const_mem (mode, x);
5099
5100 if (GET_CODE (x) == HIGH
5101 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5102 return true;
5103
5104 return aarch64_constant_address_p (x);
5105 }
5106
5107 rtx
aarch64_load_tp(rtx target)5108 aarch64_load_tp (rtx target)
5109 {
5110 if (!target
5111 || GET_MODE (target) != Pmode
5112 || !register_operand (target, Pmode))
5113 target = gen_reg_rtx (Pmode);
5114
5115 /* Can return in any reg. */
5116 emit_insn (gen_aarch64_load_tp_hard (target));
5117 return target;
5118 }
5119
5120 /* On AAPCS systems, this is the "struct __va_list". */
5121 static GTY(()) tree va_list_type;
5122
5123 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5124 Return the type to use as __builtin_va_list.
5125
5126 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5127
5128 struct __va_list
5129 {
5130 void *__stack;
5131 void *__gr_top;
5132 void *__vr_top;
5133 int __gr_offs;
5134 int __vr_offs;
5135 }; */
5136
5137 static tree
aarch64_build_builtin_va_list(void)5138 aarch64_build_builtin_va_list (void)
5139 {
5140 tree va_list_name;
5141 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5142
5143 /* Create the type. */
5144 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5145 /* Give it the required name. */
5146 va_list_name = build_decl (BUILTINS_LOCATION,
5147 TYPE_DECL,
5148 get_identifier ("__va_list"),
5149 va_list_type);
5150 DECL_ARTIFICIAL (va_list_name) = 1;
5151 TYPE_NAME (va_list_type) = va_list_name;
5152 TYPE_STUB_DECL (va_list_type) = va_list_name;
5153
5154 /* Create the fields. */
5155 f_stack = build_decl (BUILTINS_LOCATION,
5156 FIELD_DECL, get_identifier ("__stack"),
5157 ptr_type_node);
5158 f_grtop = build_decl (BUILTINS_LOCATION,
5159 FIELD_DECL, get_identifier ("__gr_top"),
5160 ptr_type_node);
5161 f_vrtop = build_decl (BUILTINS_LOCATION,
5162 FIELD_DECL, get_identifier ("__vr_top"),
5163 ptr_type_node);
5164 f_groff = build_decl (BUILTINS_LOCATION,
5165 FIELD_DECL, get_identifier ("__gr_offs"),
5166 integer_type_node);
5167 f_vroff = build_decl (BUILTINS_LOCATION,
5168 FIELD_DECL, get_identifier ("__vr_offs"),
5169 integer_type_node);
5170
5171 DECL_ARTIFICIAL (f_stack) = 1;
5172 DECL_ARTIFICIAL (f_grtop) = 1;
5173 DECL_ARTIFICIAL (f_vrtop) = 1;
5174 DECL_ARTIFICIAL (f_groff) = 1;
5175 DECL_ARTIFICIAL (f_vroff) = 1;
5176
5177 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5178 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5179 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5180 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5181 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5182
5183 TYPE_FIELDS (va_list_type) = f_stack;
5184 DECL_CHAIN (f_stack) = f_grtop;
5185 DECL_CHAIN (f_grtop) = f_vrtop;
5186 DECL_CHAIN (f_vrtop) = f_groff;
5187 DECL_CHAIN (f_groff) = f_vroff;
5188
5189 /* Compute its layout. */
5190 layout_type (va_list_type);
5191
5192 return va_list_type;
5193 }
5194
5195 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5196 static void
aarch64_expand_builtin_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)5197 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5198 {
5199 const CUMULATIVE_ARGS *cum;
5200 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5201 tree stack, grtop, vrtop, groff, vroff;
5202 tree t;
5203 int gr_save_area_size;
5204 int vr_save_area_size;
5205 int vr_offset;
5206
5207 cum = &crtl->args.info;
5208 gr_save_area_size
5209 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5210 vr_save_area_size
5211 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5212
5213 if (TARGET_GENERAL_REGS_ONLY)
5214 {
5215 if (cum->aapcs_nvrn > 0)
5216 sorry ("%qs and floating point or vector arguments",
5217 "-mgeneral-regs-only");
5218 vr_save_area_size = 0;
5219 }
5220
5221 f_stack = TYPE_FIELDS (va_list_type_node);
5222 f_grtop = DECL_CHAIN (f_stack);
5223 f_vrtop = DECL_CHAIN (f_grtop);
5224 f_groff = DECL_CHAIN (f_vrtop);
5225 f_vroff = DECL_CHAIN (f_groff);
5226
5227 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5228 NULL_TREE);
5229 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5230 NULL_TREE);
5231 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5232 NULL_TREE);
5233 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5234 NULL_TREE);
5235 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5236 NULL_TREE);
5237
5238 /* Emit code to initialize STACK, which points to the next varargs stack
5239 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5240 by named arguments. STACK is 8-byte aligned. */
5241 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5242 if (cum->aapcs_stack_size > 0)
5243 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5244 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5245 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5246
5247 /* Emit code to initialize GRTOP, the top of the GR save area.
5248 virtual_incoming_args_rtx should have been 16 byte aligned. */
5249 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5250 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5252
5253 /* Emit code to initialize VRTOP, the top of the VR save area.
5254 This address is gr_save_area_bytes below GRTOP, rounded
5255 down to the next 16-byte boundary. */
5256 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5257 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5258 STACK_BOUNDARY / BITS_PER_UNIT);
5259
5260 if (vr_offset)
5261 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5262 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5263 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5264
5265 /* Emit code to initialize GROFF, the offset from GRTOP of the
5266 next GPR argument. */
5267 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5268 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5269 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5270
5271 /* Likewise emit code to initialize VROFF, the offset from FTOP
5272 of the next VR argument. */
5273 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5274 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5275 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5276 }
5277
5278 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5279
5280 static tree
aarch64_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)5281 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5282 gimple_seq *post_p ATTRIBUTE_UNUSED)
5283 {
5284 tree addr;
5285 bool indirect_p;
5286 bool is_ha; /* is HFA or HVA. */
5287 bool dw_align; /* double-word align. */
5288 enum machine_mode ag_mode = VOIDmode;
5289 int nregs;
5290 enum machine_mode mode;
5291
5292 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5293 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5294 HOST_WIDE_INT size, rsize, adjust, align;
5295 tree t, u, cond1, cond2;
5296
5297 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5298 if (indirect_p)
5299 type = build_pointer_type (type);
5300
5301 mode = TYPE_MODE (type);
5302
5303 f_stack = TYPE_FIELDS (va_list_type_node);
5304 f_grtop = DECL_CHAIN (f_stack);
5305 f_vrtop = DECL_CHAIN (f_grtop);
5306 f_groff = DECL_CHAIN (f_vrtop);
5307 f_vroff = DECL_CHAIN (f_groff);
5308
5309 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5310 f_stack, NULL_TREE);
5311 size = int_size_in_bytes (type);
5312 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5313
5314 dw_align = false;
5315 adjust = 0;
5316 if (aarch64_vfp_is_call_or_return_candidate (mode,
5317 type,
5318 &ag_mode,
5319 &nregs,
5320 &is_ha))
5321 {
5322 /* TYPE passed in fp/simd registers. */
5323 if (TARGET_GENERAL_REGS_ONLY)
5324 sorry ("%qs and floating point or vector arguments",
5325 "-mgeneral-regs-only");
5326
5327 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5328 unshare_expr (valist), f_vrtop, NULL_TREE);
5329 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5330 unshare_expr (valist), f_vroff, NULL_TREE);
5331
5332 rsize = nregs * UNITS_PER_VREG;
5333
5334 if (is_ha)
5335 {
5336 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5337 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5338 }
5339 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5340 && size < UNITS_PER_VREG)
5341 {
5342 adjust = UNITS_PER_VREG - size;
5343 }
5344 }
5345 else
5346 {
5347 /* TYPE passed in general registers. */
5348 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5349 unshare_expr (valist), f_grtop, NULL_TREE);
5350 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5351 unshare_expr (valist), f_groff, NULL_TREE);
5352 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5353 nregs = rsize / UNITS_PER_WORD;
5354
5355 if (align > 8)
5356 dw_align = true;
5357
5358 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5359 && size < UNITS_PER_WORD)
5360 {
5361 adjust = UNITS_PER_WORD - size;
5362 }
5363 }
5364
5365 /* Get a local temporary for the field value. */
5366 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5367
5368 /* Emit code to branch if off >= 0. */
5369 t = build2 (GE_EXPR, boolean_type_node, off,
5370 build_int_cst (TREE_TYPE (off), 0));
5371 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5372
5373 if (dw_align)
5374 {
5375 /* Emit: offs = (offs + 15) & -16. */
5376 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5377 build_int_cst (TREE_TYPE (off), 15));
5378 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5379 build_int_cst (TREE_TYPE (off), -16));
5380 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5381 }
5382 else
5383 roundup = NULL;
5384
5385 /* Update ap.__[g|v]r_offs */
5386 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5387 build_int_cst (TREE_TYPE (off), rsize));
5388 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5389
5390 /* String up. */
5391 if (roundup)
5392 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5393
5394 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5395 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5396 build_int_cst (TREE_TYPE (f_off), 0));
5397 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5398
5399 /* String up: make sure the assignment happens before the use. */
5400 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5401 COND_EXPR_ELSE (cond1) = t;
5402
5403 /* Prepare the trees handling the argument that is passed on the stack;
5404 the top level node will store in ON_STACK. */
5405 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5406 if (align > 8)
5407 {
5408 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5409 t = fold_convert (intDI_type_node, arg);
5410 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5411 build_int_cst (TREE_TYPE (t), 15));
5412 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5413 build_int_cst (TREE_TYPE (t), -16));
5414 t = fold_convert (TREE_TYPE (arg), t);
5415 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5416 }
5417 else
5418 roundup = NULL;
5419 /* Advance ap.__stack */
5420 t = fold_convert (intDI_type_node, arg);
5421 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5422 build_int_cst (TREE_TYPE (t), size + 7));
5423 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5424 build_int_cst (TREE_TYPE (t), -8));
5425 t = fold_convert (TREE_TYPE (arg), t);
5426 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5427 /* String up roundup and advance. */
5428 if (roundup)
5429 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5430 /* String up with arg */
5431 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5432 /* Big-endianness related address adjustment. */
5433 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5434 && size < UNITS_PER_WORD)
5435 {
5436 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5437 size_int (UNITS_PER_WORD - size));
5438 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5439 }
5440
5441 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5442 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5443
5444 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5445 t = off;
5446 if (adjust)
5447 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5448 build_int_cst (TREE_TYPE (off), adjust));
5449
5450 t = fold_convert (sizetype, t);
5451 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5452
5453 if (is_ha)
5454 {
5455 /* type ha; // treat as "struct {ftype field[n];}"
5456 ... [computing offs]
5457 for (i = 0; i <nregs; ++i, offs += 16)
5458 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5459 return ha; */
5460 int i;
5461 tree tmp_ha, field_t, field_ptr_t;
5462
5463 /* Declare a local variable. */
5464 tmp_ha = create_tmp_var_raw (type, "ha");
5465 gimple_add_tmp_var (tmp_ha);
5466
5467 /* Establish the base type. */
5468 switch (ag_mode)
5469 {
5470 case SFmode:
5471 field_t = float_type_node;
5472 field_ptr_t = float_ptr_type_node;
5473 break;
5474 case DFmode:
5475 field_t = double_type_node;
5476 field_ptr_t = double_ptr_type_node;
5477 break;
5478 case TFmode:
5479 field_t = long_double_type_node;
5480 field_ptr_t = long_double_ptr_type_node;
5481 break;
5482 /* The half precision and quad precision are not fully supported yet. Enable
5483 the following code after the support is complete. Need to find the correct
5484 type node for __fp16 *. */
5485 #if 0
5486 case HFmode:
5487 field_t = float_type_node;
5488 field_ptr_t = float_ptr_type_node;
5489 break;
5490 #endif
5491 case V2SImode:
5492 case V4SImode:
5493 {
5494 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5495 field_t = build_vector_type_for_mode (innertype, ag_mode);
5496 field_ptr_t = build_pointer_type (field_t);
5497 }
5498 break;
5499 default:
5500 gcc_assert (0);
5501 }
5502
5503 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5504 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5505 addr = t;
5506 t = fold_convert (field_ptr_t, addr);
5507 t = build2 (MODIFY_EXPR, field_t,
5508 build1 (INDIRECT_REF, field_t, tmp_ha),
5509 build1 (INDIRECT_REF, field_t, t));
5510
5511 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5512 for (i = 1; i < nregs; ++i)
5513 {
5514 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5515 u = fold_convert (field_ptr_t, addr);
5516 u = build2 (MODIFY_EXPR, field_t,
5517 build2 (MEM_REF, field_t, tmp_ha,
5518 build_int_cst (field_ptr_t,
5519 (i *
5520 int_size_in_bytes (field_t)))),
5521 build1 (INDIRECT_REF, field_t, u));
5522 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5523 }
5524
5525 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5526 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5527 }
5528
5529 COND_EXPR_ELSE (cond2) = t;
5530 addr = fold_convert (build_pointer_type (type), cond1);
5531 addr = build_va_arg_indirect_ref (addr);
5532
5533 if (indirect_p)
5534 addr = build_va_arg_indirect_ref (addr);
5535
5536 return addr;
5537 }
5538
5539 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5540
5541 static void
aarch64_setup_incoming_varargs(cumulative_args_t cum_v,enum machine_mode mode,tree type,int * pretend_size ATTRIBUTE_UNUSED,int no_rtl)5542 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5543 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5544 int no_rtl)
5545 {
5546 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5547 CUMULATIVE_ARGS local_cum;
5548 int gr_saved, vr_saved;
5549
5550 /* The caller has advanced CUM up to, but not beyond, the last named
5551 argument. Advance a local copy of CUM past the last "real" named
5552 argument, to find out how many registers are left over. */
5553 local_cum = *cum;
5554 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5555
5556 /* Found out how many registers we need to save. */
5557 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5558 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5559
5560 if (TARGET_GENERAL_REGS_ONLY)
5561 {
5562 if (local_cum.aapcs_nvrn > 0)
5563 sorry ("%qs and floating point or vector arguments",
5564 "-mgeneral-regs-only");
5565 vr_saved = 0;
5566 }
5567
5568 if (!no_rtl)
5569 {
5570 if (gr_saved > 0)
5571 {
5572 rtx ptr, mem;
5573
5574 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5575 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5576 - gr_saved * UNITS_PER_WORD);
5577 mem = gen_frame_mem (BLKmode, ptr);
5578 set_mem_alias_set (mem, get_varargs_alias_set ());
5579
5580 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5581 mem, gr_saved);
5582 }
5583 if (vr_saved > 0)
5584 {
5585 /* We can't use move_block_from_reg, because it will use
5586 the wrong mode, storing D regs only. */
5587 enum machine_mode mode = TImode;
5588 int off, i;
5589
5590 /* Set OFF to the offset from virtual_incoming_args_rtx of
5591 the first vector register. The VR save area lies below
5592 the GR one, and is aligned to 16 bytes. */
5593 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5594 STACK_BOUNDARY / BITS_PER_UNIT);
5595 off -= vr_saved * UNITS_PER_VREG;
5596
5597 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5598 {
5599 rtx ptr, mem;
5600
5601 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5602 mem = gen_frame_mem (mode, ptr);
5603 set_mem_alias_set (mem, get_varargs_alias_set ());
5604 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5605 off += UNITS_PER_VREG;
5606 }
5607 }
5608 }
5609
5610 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5611 any complication of having crtl->args.pretend_args_size changed. */
5612 cfun->machine->saved_varargs_size
5613 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5614 STACK_BOUNDARY / BITS_PER_UNIT)
5615 + vr_saved * UNITS_PER_VREG);
5616 }
5617
5618 static void
aarch64_conditional_register_usage(void)5619 aarch64_conditional_register_usage (void)
5620 {
5621 int i;
5622 if (!TARGET_FLOAT)
5623 {
5624 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5625 {
5626 fixed_regs[i] = 1;
5627 call_used_regs[i] = 1;
5628 }
5629 }
5630 }
5631
5632 /* Walk down the type tree of TYPE counting consecutive base elements.
5633 If *MODEP is VOIDmode, then set it to the first valid floating point
5634 type. If a non-floating point type is found, or if a floating point
5635 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5636 otherwise return the count in the sub-tree. */
5637 static int
aapcs_vfp_sub_candidate(const_tree type,enum machine_mode * modep)5638 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5639 {
5640 enum machine_mode mode;
5641 HOST_WIDE_INT size;
5642
5643 switch (TREE_CODE (type))
5644 {
5645 case REAL_TYPE:
5646 mode = TYPE_MODE (type);
5647 if (mode != DFmode && mode != SFmode && mode != TFmode)
5648 return -1;
5649
5650 if (*modep == VOIDmode)
5651 *modep = mode;
5652
5653 if (*modep == mode)
5654 return 1;
5655
5656 break;
5657
5658 case COMPLEX_TYPE:
5659 mode = TYPE_MODE (TREE_TYPE (type));
5660 if (mode != DFmode && mode != SFmode && mode != TFmode)
5661 return -1;
5662
5663 if (*modep == VOIDmode)
5664 *modep = mode;
5665
5666 if (*modep == mode)
5667 return 2;
5668
5669 break;
5670
5671 case VECTOR_TYPE:
5672 /* Use V2SImode and V4SImode as representatives of all 64-bit
5673 and 128-bit vector types. */
5674 size = int_size_in_bytes (type);
5675 switch (size)
5676 {
5677 case 8:
5678 mode = V2SImode;
5679 break;
5680 case 16:
5681 mode = V4SImode;
5682 break;
5683 default:
5684 return -1;
5685 }
5686
5687 if (*modep == VOIDmode)
5688 *modep = mode;
5689
5690 /* Vector modes are considered to be opaque: two vectors are
5691 equivalent for the purposes of being homogeneous aggregates
5692 if they are the same size. */
5693 if (*modep == mode)
5694 return 1;
5695
5696 break;
5697
5698 case ARRAY_TYPE:
5699 {
5700 int count;
5701 tree index = TYPE_DOMAIN (type);
5702
5703 /* Can't handle incomplete types. */
5704 if (!COMPLETE_TYPE_P (type))
5705 return -1;
5706
5707 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5708 if (count == -1
5709 || !index
5710 || !TYPE_MAX_VALUE (index)
5711 || !host_integerp (TYPE_MAX_VALUE (index), 1)
5712 || !TYPE_MIN_VALUE (index)
5713 || !host_integerp (TYPE_MIN_VALUE (index), 1)
5714 || count < 0)
5715 return -1;
5716
5717 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5718 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5719
5720 /* There must be no padding. */
5721 if (!host_integerp (TYPE_SIZE (type), 1)
5722 || (tree_low_cst (TYPE_SIZE (type), 1)
5723 != count * GET_MODE_BITSIZE (*modep)))
5724 return -1;
5725
5726 return count;
5727 }
5728
5729 case RECORD_TYPE:
5730 {
5731 int count = 0;
5732 int sub_count;
5733 tree field;
5734
5735 /* Can't handle incomplete types. */
5736 if (!COMPLETE_TYPE_P (type))
5737 return -1;
5738
5739 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5740 {
5741 if (TREE_CODE (field) != FIELD_DECL)
5742 continue;
5743
5744 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5745 if (sub_count < 0)
5746 return -1;
5747 count += sub_count;
5748 }
5749
5750 /* There must be no padding. */
5751 if (!host_integerp (TYPE_SIZE (type), 1)
5752 || (tree_low_cst (TYPE_SIZE (type), 1)
5753 != count * GET_MODE_BITSIZE (*modep)))
5754 return -1;
5755
5756 return count;
5757 }
5758
5759 case UNION_TYPE:
5760 case QUAL_UNION_TYPE:
5761 {
5762 /* These aren't very interesting except in a degenerate case. */
5763 int count = 0;
5764 int sub_count;
5765 tree field;
5766
5767 /* Can't handle incomplete types. */
5768 if (!COMPLETE_TYPE_P (type))
5769 return -1;
5770
5771 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5772 {
5773 if (TREE_CODE (field) != FIELD_DECL)
5774 continue;
5775
5776 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5777 if (sub_count < 0)
5778 return -1;
5779 count = count > sub_count ? count : sub_count;
5780 }
5781
5782 /* There must be no padding. */
5783 if (!host_integerp (TYPE_SIZE (type), 1)
5784 || (tree_low_cst (TYPE_SIZE (type), 1)
5785 != count * GET_MODE_BITSIZE (*modep)))
5786 return -1;
5787
5788 return count;
5789 }
5790
5791 default:
5792 break;
5793 }
5794
5795 return -1;
5796 }
5797
5798 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
5799 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
5800 array types. The C99 floating-point complex types are also considered
5801 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
5802 types, which are GCC extensions and out of the scope of AAPCS64, are
5803 treated as composite types here as well.
5804
5805 Note that MODE itself is not sufficient in determining whether a type
5806 is such a composite type or not. This is because
5807 stor-layout.c:compute_record_mode may have already changed the MODE
5808 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
5809 structure with only one field may have its MODE set to the mode of the
5810 field. Also an integer mode whose size matches the size of the
5811 RECORD_TYPE type may be used to substitute the original mode
5812 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
5813 solely relied on. */
5814
5815 static bool
aarch64_composite_type_p(const_tree type,enum machine_mode mode)5816 aarch64_composite_type_p (const_tree type,
5817 enum machine_mode mode)
5818 {
5819 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5820 return true;
5821
5822 if (mode == BLKmode
5823 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5824 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5825 return true;
5826
5827 return false;
5828 }
5829
5830 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5831 type as described in AAPCS64 \S 4.1.2.
5832
5833 See the comment above aarch64_composite_type_p for the notes on MODE. */
5834
5835 static bool
aarch64_short_vector_p(const_tree type,enum machine_mode mode)5836 aarch64_short_vector_p (const_tree type,
5837 enum machine_mode mode)
5838 {
5839 HOST_WIDE_INT size = -1;
5840
5841 if (type && TREE_CODE (type) == VECTOR_TYPE)
5842 size = int_size_in_bytes (type);
5843 else if (!aarch64_composite_type_p (type, mode)
5844 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5845 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5846 size = GET_MODE_SIZE (mode);
5847
5848 return (size == 8 || size == 16) ? true : false;
5849 }
5850
5851 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
5852 shall be passed or returned in simd/fp register(s) (providing these
5853 parameter passing registers are available).
5854
5855 Upon successful return, *COUNT returns the number of needed registers,
5856 *BASE_MODE returns the mode of the individual register and when IS_HAF
5857 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5858 floating-point aggregate or a homogeneous short-vector aggregate. */
5859
5860 static bool
aarch64_vfp_is_call_or_return_candidate(enum machine_mode mode,const_tree type,enum machine_mode * base_mode,int * count,bool * is_ha)5861 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5862 const_tree type,
5863 enum machine_mode *base_mode,
5864 int *count,
5865 bool *is_ha)
5866 {
5867 enum machine_mode new_mode = VOIDmode;
5868 bool composite_p = aarch64_composite_type_p (type, mode);
5869
5870 if (is_ha != NULL) *is_ha = false;
5871
5872 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5873 || aarch64_short_vector_p (type, mode))
5874 {
5875 *count = 1;
5876 new_mode = mode;
5877 }
5878 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5879 {
5880 if (is_ha != NULL) *is_ha = true;
5881 *count = 2;
5882 new_mode = GET_MODE_INNER (mode);
5883 }
5884 else if (type && composite_p)
5885 {
5886 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5887
5888 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5889 {
5890 if (is_ha != NULL) *is_ha = true;
5891 *count = ag_count;
5892 }
5893 else
5894 return false;
5895 }
5896 else
5897 return false;
5898
5899 *base_mode = new_mode;
5900 return true;
5901 }
5902
5903 /* Implement TARGET_STRUCT_VALUE_RTX. */
5904
5905 static rtx
aarch64_struct_value_rtx(tree fndecl ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)5906 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
5907 int incoming ATTRIBUTE_UNUSED)
5908 {
5909 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
5910 }
5911
5912 /* Implements target hook vector_mode_supported_p. */
5913 static bool
aarch64_vector_mode_supported_p(enum machine_mode mode)5914 aarch64_vector_mode_supported_p (enum machine_mode mode)
5915 {
5916 if (TARGET_SIMD
5917 && (mode == V4SImode || mode == V8HImode
5918 || mode == V16QImode || mode == V2DImode
5919 || mode == V2SImode || mode == V4HImode
5920 || mode == V8QImode || mode == V2SFmode
5921 || mode == V4SFmode || mode == V2DFmode))
5922 return true;
5923
5924 return false;
5925 }
5926
5927 /* Return quad mode as the preferred SIMD mode. */
5928 static enum machine_mode
aarch64_preferred_simd_mode(enum machine_mode mode)5929 aarch64_preferred_simd_mode (enum machine_mode mode)
5930 {
5931 if (TARGET_SIMD)
5932 switch (mode)
5933 {
5934 case DFmode:
5935 return V2DFmode;
5936 case SFmode:
5937 return V4SFmode;
5938 case SImode:
5939 return V4SImode;
5940 case HImode:
5941 return V8HImode;
5942 case QImode:
5943 return V16QImode;
5944 case DImode:
5945 return V2DImode;
5946 break;
5947
5948 default:;
5949 }
5950 return word_mode;
5951 }
5952
5953 /* Return the bitmask of possible vector sizes for the vectorizer
5954 to iterate over. */
5955 static unsigned int
aarch64_autovectorize_vector_sizes(void)5956 aarch64_autovectorize_vector_sizes (void)
5957 {
5958 return (16 | 8);
5959 }
5960
5961 /* A table to help perform AArch64-specific name mangling for AdvSIMD
5962 vector types in order to conform to the AAPCS64 (see "Procedure
5963 Call Standard for the ARM 64-bit Architecture", Appendix A). To
5964 qualify for emission with the mangled names defined in that document,
5965 a vector type must not only be of the correct mode but also be
5966 composed of AdvSIMD vector element types (e.g.
5967 _builtin_aarch64_simd_qi); these types are registered by
5968 aarch64_init_simd_builtins (). In other words, vector types defined
5969 in other ways e.g. via vector_size attribute will get default
5970 mangled names. */
5971 typedef struct
5972 {
5973 enum machine_mode mode;
5974 const char *element_type_name;
5975 const char *mangled_name;
5976 } aarch64_simd_mangle_map_entry;
5977
5978 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
5979 /* 64-bit containerized types. */
5980 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
5981 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
5982 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
5983 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
5984 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
5985 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
5986 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
5987 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
5988 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
5989 /* 128-bit containerized types. */
5990 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
5991 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
5992 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
5993 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
5994 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
5995 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
5996 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
5997 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
5998 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
5999 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6000 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6001 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6002 { VOIDmode, NULL, NULL }
6003 };
6004
6005 /* Implement TARGET_MANGLE_TYPE. */
6006
6007 static const char *
aarch64_mangle_type(const_tree type)6008 aarch64_mangle_type (const_tree type)
6009 {
6010 /* The AArch64 ABI documents say that "__va_list" has to be
6011 managled as if it is in the "std" namespace. */
6012 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6013 return "St9__va_list";
6014
6015 /* Check the mode of the vector type, and the name of the vector
6016 element type, against the table. */
6017 if (TREE_CODE (type) == VECTOR_TYPE)
6018 {
6019 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6020
6021 while (pos->mode != VOIDmode)
6022 {
6023 tree elt_type = TREE_TYPE (type);
6024
6025 if (pos->mode == TYPE_MODE (type)
6026 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6027 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6028 pos->element_type_name))
6029 return pos->mangled_name;
6030
6031 pos++;
6032 }
6033 }
6034
6035 /* Use the default mangling. */
6036 return NULL;
6037 }
6038
6039 /* Return the equivalent letter for size. */
6040 static unsigned char
sizetochar(int size)6041 sizetochar (int size)
6042 {
6043 switch (size)
6044 {
6045 case 64: return 'd';
6046 case 32: return 's';
6047 case 16: return 'h';
6048 case 8 : return 'b';
6049 default: gcc_unreachable ();
6050 }
6051 }
6052
6053 /* Return true iff x is a uniform vector of floating-point
6054 constants, and the constant can be represented in
6055 quarter-precision form. Note, as aarch64_float_const_representable
6056 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6057 static bool
aarch64_vect_float_const_representable_p(rtx x)6058 aarch64_vect_float_const_representable_p (rtx x)
6059 {
6060 int i = 0;
6061 REAL_VALUE_TYPE r0, ri;
6062 rtx x0, xi;
6063
6064 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6065 return false;
6066
6067 x0 = CONST_VECTOR_ELT (x, 0);
6068 if (!CONST_DOUBLE_P (x0))
6069 return false;
6070
6071 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6072
6073 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6074 {
6075 xi = CONST_VECTOR_ELT (x, i);
6076 if (!CONST_DOUBLE_P (xi))
6077 return false;
6078
6079 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6080 if (!REAL_VALUES_EQUAL (r0, ri))
6081 return false;
6082 }
6083
6084 return aarch64_float_const_representable_p (x0);
6085 }
6086
6087 /* TODO: This function returns values similar to those
6088 returned by neon_valid_immediate in gcc/config/arm/arm.c
6089 but the API here is different enough that these magic numbers
6090 are not used. It should be sufficient to return true or false. */
6091 static int
aarch64_simd_valid_immediate(rtx op,enum machine_mode mode,int inverse,rtx * modconst,int * elementwidth,unsigned char * elementchar,int * mvn,int * shift)6092 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6093 rtx *modconst, int *elementwidth,
6094 unsigned char *elementchar,
6095 int *mvn, int *shift)
6096 {
6097 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6098 matches = 1; \
6099 for (i = 0; i < idx; i += (STRIDE)) \
6100 if (!(TEST)) \
6101 matches = 0; \
6102 if (matches) \
6103 { \
6104 immtype = (CLASS); \
6105 elsize = (ELSIZE); \
6106 elchar = sizetochar (elsize); \
6107 eshift = (SHIFT); \
6108 emvn = (NEG); \
6109 break; \
6110 }
6111
6112 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6113 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6114 unsigned char bytes[16];
6115 unsigned char elchar = 0;
6116 int immtype = -1, matches;
6117 unsigned int invmask = inverse ? 0xff : 0;
6118 int eshift, emvn;
6119
6120 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6121 {
6122 bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
6123 int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
6124
6125 if (!(simd_imm_zero
6126 || aarch64_vect_float_const_representable_p (op)))
6127 return -1;
6128
6129 if (modconst)
6130 *modconst = CONST_VECTOR_ELT (op, 0);
6131
6132 if (elementwidth)
6133 *elementwidth = elem_width;
6134
6135 if (elementchar)
6136 *elementchar = sizetochar (elem_width);
6137
6138 if (shift)
6139 *shift = 0;
6140
6141 if (simd_imm_zero)
6142 return 19;
6143 else
6144 return 18;
6145 }
6146
6147 /* Splat vector constant out into a byte vector. */
6148 for (i = 0; i < n_elts; i++)
6149 {
6150 rtx el = CONST_VECTOR_ELT (op, i);
6151 unsigned HOST_WIDE_INT elpart;
6152 unsigned int part, parts;
6153
6154 if (GET_CODE (el) == CONST_INT)
6155 {
6156 elpart = INTVAL (el);
6157 parts = 1;
6158 }
6159 else if (GET_CODE (el) == CONST_DOUBLE)
6160 {
6161 elpart = CONST_DOUBLE_LOW (el);
6162 parts = 2;
6163 }
6164 else
6165 gcc_unreachable ();
6166
6167 for (part = 0; part < parts; part++)
6168 {
6169 unsigned int byte;
6170 for (byte = 0; byte < innersize; byte++)
6171 {
6172 bytes[idx++] = (elpart & 0xff) ^ invmask;
6173 elpart >>= BITS_PER_UNIT;
6174 }
6175 if (GET_CODE (el) == CONST_DOUBLE)
6176 elpart = CONST_DOUBLE_HIGH (el);
6177 }
6178 }
6179
6180 /* Sanity check. */
6181 gcc_assert (idx == GET_MODE_SIZE (mode));
6182
6183 do
6184 {
6185 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6186 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6187
6188 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6189 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6190
6191 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6192 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6193
6194 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6195 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6196
6197 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6198
6199 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6200
6201 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6202 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6203
6204 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6205 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6206
6207 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6208 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6209
6210 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6211 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6212
6213 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6214
6215 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6216
6217 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6218 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6219
6220 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6221 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6222
6223 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6224 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6225
6226 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6227 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6228
6229 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6230
6231 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6232 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6233 }
6234 while (0);
6235
6236 /* TODO: Currently the assembler cannot handle types 12 to 15.
6237 And there is no way to specify cmode through the compiler.
6238 Disable them till there is support in the assembler. */
6239 if (immtype == -1
6240 || (immtype >= 12 && immtype <= 15)
6241 || immtype == 18)
6242 return -1;
6243
6244
6245 if (elementwidth)
6246 *elementwidth = elsize;
6247
6248 if (elementchar)
6249 *elementchar = elchar;
6250
6251 if (mvn)
6252 *mvn = emvn;
6253
6254 if (shift)
6255 *shift = eshift;
6256
6257 if (modconst)
6258 {
6259 unsigned HOST_WIDE_INT imm = 0;
6260
6261 /* Un-invert bytes of recognized vector, if necessary. */
6262 if (invmask != 0)
6263 for (i = 0; i < idx; i++)
6264 bytes[i] ^= invmask;
6265
6266 if (immtype == 17)
6267 {
6268 /* FIXME: Broken on 32-bit H_W_I hosts. */
6269 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6270
6271 for (i = 0; i < 8; i++)
6272 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6273 << (i * BITS_PER_UNIT);
6274
6275 *modconst = GEN_INT (imm);
6276 }
6277 else
6278 {
6279 unsigned HOST_WIDE_INT imm = 0;
6280
6281 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6282 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6283
6284 /* Construct 'abcdefgh' because the assembler cannot handle
6285 generic constants. */
6286 gcc_assert (shift != NULL && mvn != NULL);
6287 if (*mvn)
6288 imm = ~imm;
6289 imm = (imm >> *shift) & 0xff;
6290 *modconst = GEN_INT (imm);
6291 }
6292 }
6293
6294 return immtype;
6295 #undef CHECK
6296 }
6297
6298 /* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
6299 (or, implicitly, MVNI) immediate. Write back width per element
6300 to *ELEMENTWIDTH, and a modified constant (whatever should be output
6301 for a MOVI instruction) in *MODCONST. */
6302 int
aarch64_simd_immediate_valid_for_move(rtx op,enum machine_mode mode,rtx * modconst,int * elementwidth,unsigned char * elementchar,int * mvn,int * shift)6303 aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
6304 rtx *modconst, int *elementwidth,
6305 unsigned char *elementchar,
6306 int *mvn, int *shift)
6307 {
6308 rtx tmpconst;
6309 int tmpwidth;
6310 unsigned char tmpwidthc;
6311 int tmpmvn = 0, tmpshift = 0;
6312 int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
6313 &tmpwidth, &tmpwidthc,
6314 &tmpmvn, &tmpshift);
6315
6316 if (retval == -1)
6317 return 0;
6318
6319 if (modconst)
6320 *modconst = tmpconst;
6321
6322 if (elementwidth)
6323 *elementwidth = tmpwidth;
6324
6325 if (elementchar)
6326 *elementchar = tmpwidthc;
6327
6328 if (mvn)
6329 *mvn = tmpmvn;
6330
6331 if (shift)
6332 *shift = tmpshift;
6333
6334 return 1;
6335 }
6336
6337 static bool
aarch64_const_vec_all_same_int_p(rtx x,HOST_WIDE_INT minval,HOST_WIDE_INT maxval)6338 aarch64_const_vec_all_same_int_p (rtx x,
6339 HOST_WIDE_INT minval,
6340 HOST_WIDE_INT maxval)
6341 {
6342 HOST_WIDE_INT firstval;
6343 int count, i;
6344
6345 if (GET_CODE (x) != CONST_VECTOR
6346 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6347 return false;
6348
6349 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6350 if (firstval < minval || firstval > maxval)
6351 return false;
6352
6353 count = CONST_VECTOR_NUNITS (x);
6354 for (i = 1; i < count; i++)
6355 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6356 return false;
6357
6358 return true;
6359 }
6360
6361 /* Check of immediate shift constants are within range. */
6362 bool
aarch64_simd_shift_imm_p(rtx x,enum machine_mode mode,bool left)6363 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6364 {
6365 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6366 if (left)
6367 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6368 else
6369 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6370 }
6371
6372 /* Return true if X is a uniform vector where all elements
6373 are either the floating-point constant 0.0 or the
6374 integer constant 0. */
6375 bool
aarch64_simd_imm_zero_p(rtx x,enum machine_mode mode)6376 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6377 {
6378 return x == CONST0_RTX (mode);
6379 }
6380
6381 bool
aarch64_simd_imm_scalar_p(rtx x,enum machine_mode mode ATTRIBUTE_UNUSED)6382 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6383 {
6384 HOST_WIDE_INT imm = INTVAL (x);
6385 int i;
6386
6387 for (i = 0; i < 8; i++)
6388 {
6389 unsigned int byte = imm & 0xff;
6390 if (byte != 0xff && byte != 0)
6391 return false;
6392 imm >>= 8;
6393 }
6394
6395 return true;
6396 }
6397
6398 /* Return a const_int vector of VAL. */
6399 rtx
aarch64_simd_gen_const_vector_dup(enum machine_mode mode,int val)6400 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6401 {
6402 int nunits = GET_MODE_NUNITS (mode);
6403 rtvec v = rtvec_alloc (nunits);
6404 int i;
6405
6406 for (i=0; i < nunits; i++)
6407 RTVEC_ELT (v, i) = GEN_INT (val);
6408
6409 return gen_rtx_CONST_VECTOR (mode, v);
6410 }
6411
6412 /* Construct and return a PARALLEL RTX vector. */
6413 rtx
aarch64_simd_vect_par_cnst_half(enum machine_mode mode,bool high)6414 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6415 {
6416 int nunits = GET_MODE_NUNITS (mode);
6417 rtvec v = rtvec_alloc (nunits / 2);
6418 int base = high ? nunits / 2 : 0;
6419 rtx t1;
6420 int i;
6421
6422 for (i=0; i < nunits / 2; i++)
6423 RTVEC_ELT (v, i) = GEN_INT (base + i);
6424
6425 t1 = gen_rtx_PARALLEL (mode, v);
6426 return t1;
6427 }
6428
6429 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6430 HIGH (exclusive). */
6431 void
aarch64_simd_lane_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high)6432 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6433 {
6434 HOST_WIDE_INT lane;
6435 gcc_assert (GET_CODE (operand) == CONST_INT);
6436 lane = INTVAL (operand);
6437
6438 if (lane < low || lane >= high)
6439 error ("lane out of range");
6440 }
6441
6442 void
aarch64_simd_const_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high)6443 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6444 {
6445 gcc_assert (GET_CODE (operand) == CONST_INT);
6446 HOST_WIDE_INT lane = INTVAL (operand);
6447
6448 if (lane < low || lane >= high)
6449 error ("constant out of range");
6450 }
6451
6452 /* Emit code to reinterpret one AdvSIMD type as another,
6453 without altering bits. */
6454 void
aarch64_simd_reinterpret(rtx dest,rtx src)6455 aarch64_simd_reinterpret (rtx dest, rtx src)
6456 {
6457 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6458 }
6459
6460 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6461 registers). */
6462 void
aarch64_simd_emit_pair_result_insn(enum machine_mode mode,rtx (* intfn)(rtx,rtx,rtx),rtx destaddr,rtx op1)6463 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6464 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6465 rtx op1)
6466 {
6467 rtx mem = gen_rtx_MEM (mode, destaddr);
6468 rtx tmp1 = gen_reg_rtx (mode);
6469 rtx tmp2 = gen_reg_rtx (mode);
6470
6471 emit_insn (intfn (tmp1, op1, tmp2));
6472
6473 emit_move_insn (mem, tmp1);
6474 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6475 emit_move_insn (mem, tmp2);
6476 }
6477
6478 /* Return TRUE if OP is a valid vector addressing mode. */
6479 bool
aarch64_simd_mem_operand_p(rtx op)6480 aarch64_simd_mem_operand_p (rtx op)
6481 {
6482 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6483 || GET_CODE (XEXP (op, 0)) == REG);
6484 }
6485
6486 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6487 not to early-clobber SRC registers in the process.
6488
6489 We assume that the operands described by SRC and DEST represent a
6490 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6491 number of components into which the copy has been decomposed. */
6492 void
aarch64_simd_disambiguate_copy(rtx * operands,rtx * dest,rtx * src,unsigned int count)6493 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6494 rtx *src, unsigned int count)
6495 {
6496 unsigned int i;
6497
6498 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6499 || REGNO (operands[0]) < REGNO (operands[1]))
6500 {
6501 for (i = 0; i < count; i++)
6502 {
6503 operands[2 * i] = dest[i];
6504 operands[2 * i + 1] = src[i];
6505 }
6506 }
6507 else
6508 {
6509 for (i = 0; i < count; i++)
6510 {
6511 operands[2 * i] = dest[count - i - 1];
6512 operands[2 * i + 1] = src[count - i - 1];
6513 }
6514 }
6515 }
6516
6517 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6518 one of VSTRUCT modes: OI, CI or XI. */
6519 int
aarch64_simd_attr_length_move(rtx insn)6520 aarch64_simd_attr_length_move (rtx insn)
6521 {
6522 enum machine_mode mode;
6523
6524 extract_insn_cached (insn);
6525
6526 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6527 {
6528 mode = GET_MODE (recog_data.operand[0]);
6529 switch (mode)
6530 {
6531 case OImode:
6532 return 8;
6533 case CImode:
6534 return 12;
6535 case XImode:
6536 return 16;
6537 default:
6538 gcc_unreachable ();
6539 }
6540 }
6541 return 4;
6542 }
6543
6544 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6545 alignment of a vector to 128 bits. */
6546 static HOST_WIDE_INT
aarch64_simd_vector_alignment(const_tree type)6547 aarch64_simd_vector_alignment (const_tree type)
6548 {
6549 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6550 return MIN (align, 128);
6551 }
6552
6553 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6554 static bool
aarch64_simd_vector_alignment_reachable(const_tree type,bool is_packed)6555 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6556 {
6557 if (is_packed)
6558 return false;
6559
6560 /* We guarantee alignment for vectors up to 128-bits. */
6561 if (tree_int_cst_compare (TYPE_SIZE (type),
6562 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6563 return false;
6564
6565 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6566 return true;
6567 }
6568
6569 /* If VALS is a vector constant that can be loaded into a register
6570 using DUP, generate instructions to do so and return an RTX to
6571 assign to the register. Otherwise return NULL_RTX. */
6572 static rtx
aarch64_simd_dup_constant(rtx vals)6573 aarch64_simd_dup_constant (rtx vals)
6574 {
6575 enum machine_mode mode = GET_MODE (vals);
6576 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6577 int n_elts = GET_MODE_NUNITS (mode);
6578 bool all_same = true;
6579 rtx x;
6580 int i;
6581
6582 if (GET_CODE (vals) != CONST_VECTOR)
6583 return NULL_RTX;
6584
6585 for (i = 1; i < n_elts; ++i)
6586 {
6587 x = CONST_VECTOR_ELT (vals, i);
6588 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6589 all_same = false;
6590 }
6591
6592 if (!all_same)
6593 return NULL_RTX;
6594
6595 /* We can load this constant by using DUP and a constant in a
6596 single ARM register. This will be cheaper than a vector
6597 load. */
6598 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6599 return gen_rtx_VEC_DUPLICATE (mode, x);
6600 }
6601
6602
6603 /* Generate code to load VALS, which is a PARALLEL containing only
6604 constants (for vec_init) or CONST_VECTOR, efficiently into a
6605 register. Returns an RTX to copy into the register, or NULL_RTX
6606 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6607 static rtx
aarch64_simd_make_constant(rtx vals)6608 aarch64_simd_make_constant (rtx vals)
6609 {
6610 enum machine_mode mode = GET_MODE (vals);
6611 rtx const_dup;
6612 rtx const_vec = NULL_RTX;
6613 int n_elts = GET_MODE_NUNITS (mode);
6614 int n_const = 0;
6615 int i;
6616
6617 if (GET_CODE (vals) == CONST_VECTOR)
6618 const_vec = vals;
6619 else if (GET_CODE (vals) == PARALLEL)
6620 {
6621 /* A CONST_VECTOR must contain only CONST_INTs and
6622 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6623 Only store valid constants in a CONST_VECTOR. */
6624 for (i = 0; i < n_elts; ++i)
6625 {
6626 rtx x = XVECEXP (vals, 0, i);
6627 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6628 n_const++;
6629 }
6630 if (n_const == n_elts)
6631 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6632 }
6633 else
6634 gcc_unreachable ();
6635
6636 if (const_vec != NULL_RTX
6637 && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
6638 NULL, NULL, NULL))
6639 /* Load using MOVI/MVNI. */
6640 return const_vec;
6641 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6642 /* Loaded using DUP. */
6643 return const_dup;
6644 else if (const_vec != NULL_RTX)
6645 /* Load from constant pool. We can not take advantage of single-cycle
6646 LD1 because we need a PC-relative addressing mode. */
6647 return const_vec;
6648 else
6649 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6650 We can not construct an initializer. */
6651 return NULL_RTX;
6652 }
6653
6654 void
aarch64_expand_vector_init(rtx target,rtx vals)6655 aarch64_expand_vector_init (rtx target, rtx vals)
6656 {
6657 enum machine_mode mode = GET_MODE (target);
6658 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6659 int n_elts = GET_MODE_NUNITS (mode);
6660 int n_var = 0, one_var = -1;
6661 bool all_same = true;
6662 rtx x, mem;
6663 int i;
6664
6665 x = XVECEXP (vals, 0, 0);
6666 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6667 n_var = 1, one_var = 0;
6668
6669 for (i = 1; i < n_elts; ++i)
6670 {
6671 x = XVECEXP (vals, 0, i);
6672 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6673 ++n_var, one_var = i;
6674
6675 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6676 all_same = false;
6677 }
6678
6679 if (n_var == 0)
6680 {
6681 rtx constant = aarch64_simd_make_constant (vals);
6682 if (constant != NULL_RTX)
6683 {
6684 emit_move_insn (target, constant);
6685 return;
6686 }
6687 }
6688
6689 /* Splat a single non-constant element if we can. */
6690 if (all_same)
6691 {
6692 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6693 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6694 return;
6695 }
6696
6697 /* One field is non-constant. Load constant then overwrite varying
6698 field. This is more efficient than using the stack. */
6699 if (n_var == 1)
6700 {
6701 rtx copy = copy_rtx (vals);
6702 rtx index = GEN_INT (one_var);
6703 enum insn_code icode;
6704
6705 /* Load constant part of vector, substitute neighboring value for
6706 varying element. */
6707 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6708 aarch64_expand_vector_init (target, copy);
6709
6710 /* Insert variable. */
6711 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6712 icode = optab_handler (vec_set_optab, mode);
6713 gcc_assert (icode != CODE_FOR_nothing);
6714 emit_insn (GEN_FCN (icode) (target, x, index));
6715 return;
6716 }
6717
6718 /* Construct the vector in memory one field at a time
6719 and load the whole vector. */
6720 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6721 for (i = 0; i < n_elts; i++)
6722 emit_move_insn (adjust_address_nv (mem, inner_mode,
6723 i * GET_MODE_SIZE (inner_mode)),
6724 XVECEXP (vals, 0, i));
6725 emit_move_insn (target, mem);
6726
6727 }
6728
6729 static unsigned HOST_WIDE_INT
aarch64_shift_truncation_mask(enum machine_mode mode)6730 aarch64_shift_truncation_mask (enum machine_mode mode)
6731 {
6732 return
6733 (aarch64_vector_mode_supported_p (mode)
6734 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6735 }
6736
6737 #ifndef TLS_SECTION_ASM_FLAG
6738 #define TLS_SECTION_ASM_FLAG 'T'
6739 #endif
6740
6741 void
aarch64_elf_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)6742 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6743 tree decl ATTRIBUTE_UNUSED)
6744 {
6745 char flagchars[10], *f = flagchars;
6746
6747 /* If we have already declared this section, we can use an
6748 abbreviated form to switch back to it -- unless this section is
6749 part of a COMDAT groups, in which case GAS requires the full
6750 declaration every time. */
6751 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6752 && (flags & SECTION_DECLARED))
6753 {
6754 fprintf (asm_out_file, "\t.section\t%s\n", name);
6755 return;
6756 }
6757
6758 if (!(flags & SECTION_DEBUG))
6759 *f++ = 'a';
6760 if (flags & SECTION_WRITE)
6761 *f++ = 'w';
6762 if (flags & SECTION_CODE)
6763 *f++ = 'x';
6764 if (flags & SECTION_SMALL)
6765 *f++ = 's';
6766 if (flags & SECTION_MERGE)
6767 *f++ = 'M';
6768 if (flags & SECTION_STRINGS)
6769 *f++ = 'S';
6770 if (flags & SECTION_TLS)
6771 *f++ = TLS_SECTION_ASM_FLAG;
6772 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6773 *f++ = 'G';
6774 *f = '\0';
6775
6776 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6777
6778 if (!(flags & SECTION_NOTYPE))
6779 {
6780 const char *type;
6781 const char *format;
6782
6783 if (flags & SECTION_BSS)
6784 type = "nobits";
6785 else
6786 type = "progbits";
6787
6788 #ifdef TYPE_OPERAND_FMT
6789 format = "," TYPE_OPERAND_FMT;
6790 #else
6791 format = ",@%s";
6792 #endif
6793
6794 fprintf (asm_out_file, format, type);
6795
6796 if (flags & SECTION_ENTSIZE)
6797 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6798 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6799 {
6800 if (TREE_CODE (decl) == IDENTIFIER_NODE)
6801 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6802 else
6803 fprintf (asm_out_file, ",%s,comdat",
6804 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6805 }
6806 }
6807
6808 putc ('\n', asm_out_file);
6809 }
6810
6811 /* Select a format to encode pointers in exception handling data. */
6812 int
aarch64_asm_preferred_eh_data_format(int code ATTRIBUTE_UNUSED,int global)6813 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6814 {
6815 int type;
6816 switch (aarch64_cmodel)
6817 {
6818 case AARCH64_CMODEL_TINY:
6819 case AARCH64_CMODEL_TINY_PIC:
6820 case AARCH64_CMODEL_SMALL:
6821 case AARCH64_CMODEL_SMALL_PIC:
6822 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
6823 for everything. */
6824 type = DW_EH_PE_sdata4;
6825 break;
6826 default:
6827 /* No assumptions here. 8-byte relocs required. */
6828 type = DW_EH_PE_sdata8;
6829 break;
6830 }
6831 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6832 }
6833
6834 /* Emit load exclusive. */
6835
6836 static void
aarch64_emit_load_exclusive(enum machine_mode mode,rtx rval,rtx mem,rtx model_rtx)6837 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6838 rtx mem, rtx model_rtx)
6839 {
6840 rtx (*gen) (rtx, rtx, rtx);
6841
6842 switch (mode)
6843 {
6844 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6845 case HImode: gen = gen_aarch64_load_exclusivehi; break;
6846 case SImode: gen = gen_aarch64_load_exclusivesi; break;
6847 case DImode: gen = gen_aarch64_load_exclusivedi; break;
6848 default:
6849 gcc_unreachable ();
6850 }
6851
6852 emit_insn (gen (rval, mem, model_rtx));
6853 }
6854
6855 /* Emit store exclusive. */
6856
6857 static void
aarch64_emit_store_exclusive(enum machine_mode mode,rtx bval,rtx rval,rtx mem,rtx model_rtx)6858 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
6859 rtx rval, rtx mem, rtx model_rtx)
6860 {
6861 rtx (*gen) (rtx, rtx, rtx, rtx);
6862
6863 switch (mode)
6864 {
6865 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
6866 case HImode: gen = gen_aarch64_store_exclusivehi; break;
6867 case SImode: gen = gen_aarch64_store_exclusivesi; break;
6868 case DImode: gen = gen_aarch64_store_exclusivedi; break;
6869 default:
6870 gcc_unreachable ();
6871 }
6872
6873 emit_insn (gen (bval, rval, mem, model_rtx));
6874 }
6875
6876 /* Mark the previous jump instruction as unlikely. */
6877
6878 static void
aarch64_emit_unlikely_jump(rtx insn)6879 aarch64_emit_unlikely_jump (rtx insn)
6880 {
6881 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
6882
6883 insn = emit_jump_insn (insn);
6884 add_reg_note (insn, REG_BR_PROB, very_unlikely);
6885 }
6886
6887 /* Expand a compare and swap pattern. */
6888
6889 void
aarch64_expand_compare_and_swap(rtx operands[])6890 aarch64_expand_compare_and_swap (rtx operands[])
6891 {
6892 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
6893 enum machine_mode mode, cmp_mode;
6894 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
6895
6896 bval = operands[0];
6897 rval = operands[1];
6898 mem = operands[2];
6899 oldval = operands[3];
6900 newval = operands[4];
6901 is_weak = operands[5];
6902 mod_s = operands[6];
6903 mod_f = operands[7];
6904 mode = GET_MODE (mem);
6905 cmp_mode = mode;
6906
6907 /* Normally the succ memory model must be stronger than fail, but in the
6908 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6909 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
6910
6911 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
6912 && INTVAL (mod_s) == MEMMODEL_RELEASE)
6913 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
6914
6915 switch (mode)
6916 {
6917 case QImode:
6918 case HImode:
6919 /* For short modes, we're going to perform the comparison in SImode,
6920 so do the zero-extension now. */
6921 cmp_mode = SImode;
6922 rval = gen_reg_rtx (SImode);
6923 oldval = convert_modes (SImode, mode, oldval, true);
6924 /* Fall through. */
6925
6926 case SImode:
6927 case DImode:
6928 /* Force the value into a register if needed. */
6929 if (!aarch64_plus_operand (oldval, mode))
6930 oldval = force_reg (cmp_mode, oldval);
6931 break;
6932
6933 default:
6934 gcc_unreachable ();
6935 }
6936
6937 switch (mode)
6938 {
6939 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
6940 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
6941 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
6942 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
6943 default:
6944 gcc_unreachable ();
6945 }
6946
6947 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
6948
6949 if (mode == QImode || mode == HImode)
6950 emit_move_insn (operands[1], gen_lowpart (mode, rval));
6951
6952 x = gen_rtx_REG (CCmode, CC_REGNUM);
6953 x = gen_rtx_EQ (SImode, x, const0_rtx);
6954 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
6955 }
6956
6957 /* Split a compare and swap pattern. */
6958
6959 void
aarch64_split_compare_and_swap(rtx operands[])6960 aarch64_split_compare_and_swap (rtx operands[])
6961 {
6962 rtx rval, mem, oldval, newval, scratch;
6963 enum machine_mode mode;
6964 bool is_weak;
6965 rtx label1, label2, x, cond;
6966
6967 rval = operands[0];
6968 mem = operands[1];
6969 oldval = operands[2];
6970 newval = operands[3];
6971 is_weak = (operands[4] != const0_rtx);
6972 scratch = operands[7];
6973 mode = GET_MODE (mem);
6974
6975 label1 = NULL_RTX;
6976 if (!is_weak)
6977 {
6978 label1 = gen_label_rtx ();
6979 emit_label (label1);
6980 }
6981 label2 = gen_label_rtx ();
6982
6983 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
6984
6985 cond = aarch64_gen_compare_reg (NE, rval, oldval);
6986 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
6987 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6988 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
6989 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6990
6991 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
6992
6993 if (!is_weak)
6994 {
6995 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
6996 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6997 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
6998 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6999 }
7000 else
7001 {
7002 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7003 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7004 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7005 }
7006
7007 emit_label (label2);
7008 }
7009
7010 /* Split an atomic operation. */
7011
7012 void
aarch64_split_atomic_op(enum rtx_code code,rtx old_out,rtx new_out,rtx mem,rtx value,rtx model_rtx,rtx cond)7013 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7014 rtx value, rtx model_rtx, rtx cond)
7015 {
7016 enum machine_mode mode = GET_MODE (mem);
7017 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7018 rtx label, x;
7019
7020 label = gen_label_rtx ();
7021 emit_label (label);
7022
7023 if (new_out)
7024 new_out = gen_lowpart (wmode, new_out);
7025 if (old_out)
7026 old_out = gen_lowpart (wmode, old_out);
7027 else
7028 old_out = new_out;
7029 value = simplify_gen_subreg (wmode, value, mode, 0);
7030
7031 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7032
7033 switch (code)
7034 {
7035 case SET:
7036 new_out = value;
7037 break;
7038
7039 case NOT:
7040 x = gen_rtx_AND (wmode, old_out, value);
7041 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7042 x = gen_rtx_NOT (wmode, new_out);
7043 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7044 break;
7045
7046 case MINUS:
7047 if (CONST_INT_P (value))
7048 {
7049 value = GEN_INT (-INTVAL (value));
7050 code = PLUS;
7051 }
7052 /* Fall through. */
7053
7054 default:
7055 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7056 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7057 break;
7058 }
7059
7060 aarch64_emit_store_exclusive (mode, cond, mem,
7061 gen_lowpart (mode, new_out), model_rtx);
7062
7063 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7064 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7065 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7066 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7067 }
7068
7069 static void
aarch64_print_extension(void)7070 aarch64_print_extension (void)
7071 {
7072 const struct aarch64_option_extension *opt = NULL;
7073
7074 for (opt = all_extensions; opt->name != NULL; opt++)
7075 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7076 asm_fprintf (asm_out_file, "+%s", opt->name);
7077
7078 asm_fprintf (asm_out_file, "\n");
7079 }
7080
7081 static void
aarch64_start_file(void)7082 aarch64_start_file (void)
7083 {
7084 if (selected_arch)
7085 {
7086 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7087 aarch64_print_extension ();
7088 }
7089 else if (selected_cpu)
7090 {
7091 asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7092 aarch64_print_extension ();
7093 }
7094 default_file_start();
7095 }
7096
7097 /* Target hook for c_mode_for_suffix. */
7098 static enum machine_mode
aarch64_c_mode_for_suffix(char suffix)7099 aarch64_c_mode_for_suffix (char suffix)
7100 {
7101 if (suffix == 'q')
7102 return TFmode;
7103
7104 return VOIDmode;
7105 }
7106
7107 /* We can only represent floating point constants which will fit in
7108 "quarter-precision" values. These values are characterised by
7109 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7110 by:
7111
7112 (-1)^s * (n/16) * 2^r
7113
7114 Where:
7115 's' is the sign bit.
7116 'n' is an integer in the range 16 <= n <= 31.
7117 'r' is an integer in the range -3 <= r <= 4. */
7118
7119 /* Return true iff X can be represented by a quarter-precision
7120 floating point immediate operand X. Note, we cannot represent 0.0. */
7121 bool
aarch64_float_const_representable_p(rtx x)7122 aarch64_float_const_representable_p (rtx x)
7123 {
7124 /* This represents our current view of how many bits
7125 make up the mantissa. */
7126 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7127 int exponent;
7128 unsigned HOST_WIDE_INT mantissa, mask;
7129 HOST_WIDE_INT m1, m2;
7130 REAL_VALUE_TYPE r, m;
7131
7132 if (!CONST_DOUBLE_P (x))
7133 return false;
7134
7135 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7136
7137 /* We cannot represent infinities, NaNs or +/-zero. We won't
7138 know if we have +zero until we analyse the mantissa, but we
7139 can reject the other invalid values. */
7140 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7141 || REAL_VALUE_MINUS_ZERO (r))
7142 return false;
7143
7144 /* Extract exponent. */
7145 r = real_value_abs (&r);
7146 exponent = REAL_EXP (&r);
7147
7148 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7149 highest (sign) bit, with a fixed binary point at bit point_pos.
7150 m1 holds the low part of the mantissa, m2 the high part.
7151 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7152 bits for the mantissa, this can fail (low bits will be lost). */
7153 real_ldexp (&m, &r, point_pos - exponent);
7154 REAL_VALUE_TO_INT (&m1, &m2, m);
7155
7156 /* If the low part of the mantissa has bits set we cannot represent
7157 the value. */
7158 if (m1 != 0)
7159 return false;
7160 /* We have rejected the lower HOST_WIDE_INT, so update our
7161 understanding of how many bits lie in the mantissa and
7162 look only at the high HOST_WIDE_INT. */
7163 mantissa = m2;
7164 point_pos -= HOST_BITS_PER_WIDE_INT;
7165
7166 /* We can only represent values with a mantissa of the form 1.xxxx. */
7167 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7168 if ((mantissa & mask) != 0)
7169 return false;
7170
7171 /* Having filtered unrepresentable values, we may now remove all
7172 but the highest 5 bits. */
7173 mantissa >>= point_pos - 5;
7174
7175 /* We cannot represent the value 0.0, so reject it. This is handled
7176 elsewhere. */
7177 if (mantissa == 0)
7178 return false;
7179
7180 /* Then, as bit 4 is always set, we can mask it off, leaving
7181 the mantissa in the range [0, 15]. */
7182 mantissa &= ~(1 << 4);
7183 gcc_assert (mantissa <= 15);
7184
7185 /* GCC internally does not use IEEE754-like encoding (where normalized
7186 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7187 Our mantissa values are shifted 4 places to the left relative to
7188 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7189 by 5 places to correct for GCC's representation. */
7190 exponent = 5 - exponent;
7191
7192 return (exponent >= 0 && exponent <= 7);
7193 }
7194
7195 char*
aarch64_output_simd_mov_immediate(rtx * const_vector,enum machine_mode mode,unsigned width)7196 aarch64_output_simd_mov_immediate (rtx *const_vector,
7197 enum machine_mode mode,
7198 unsigned width)
7199 {
7200 int is_valid;
7201 unsigned char widthc;
7202 int lane_width_bits;
7203 static char templ[40];
7204 int shift = 0, mvn = 0;
7205 const char *mnemonic;
7206 unsigned int lane_count = 0;
7207
7208 is_valid =
7209 aarch64_simd_immediate_valid_for_move (*const_vector, mode,
7210 const_vector, &lane_width_bits,
7211 &widthc, &mvn, &shift);
7212 gcc_assert (is_valid);
7213
7214 mode = GET_MODE_INNER (mode);
7215 if (mode == SFmode || mode == DFmode)
7216 {
7217 bool zero_p =
7218 aarch64_float_const_zero_rtx_p (*const_vector);
7219 gcc_assert (shift == 0);
7220 mnemonic = zero_p ? "movi" : "fmov";
7221 }
7222 else
7223 mnemonic = mvn ? "mvni" : "movi";
7224
7225 gcc_assert (lane_width_bits != 0);
7226 lane_count = width / lane_width_bits;
7227
7228 if (lane_count == 1)
7229 snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
7230 else if (shift)
7231 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
7232 mnemonic, lane_count, widthc, shift);
7233 else
7234 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
7235 mnemonic, lane_count, widthc);
7236 return templ;
7237 }
7238
7239 /* Split operands into moves from op[1] + op[2] into op[0]. */
7240
7241 void
aarch64_split_combinev16qi(rtx operands[3])7242 aarch64_split_combinev16qi (rtx operands[3])
7243 {
7244 unsigned int dest = REGNO (operands[0]);
7245 unsigned int src1 = REGNO (operands[1]);
7246 unsigned int src2 = REGNO (operands[2]);
7247 enum machine_mode halfmode = GET_MODE (operands[1]);
7248 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7249 rtx destlo, desthi;
7250
7251 gcc_assert (halfmode == V16QImode);
7252
7253 if (src1 == dest && src2 == dest + halfregs)
7254 {
7255 /* No-op move. Can't split to nothing; emit something. */
7256 emit_note (NOTE_INSN_DELETED);
7257 return;
7258 }
7259
7260 /* Preserve register attributes for variable tracking. */
7261 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7262 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7263 GET_MODE_SIZE (halfmode));
7264
7265 /* Special case of reversed high/low parts. */
7266 if (reg_overlap_mentioned_p (operands[2], destlo)
7267 && reg_overlap_mentioned_p (operands[1], desthi))
7268 {
7269 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7270 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7271 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7272 }
7273 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7274 {
7275 /* Try to avoid unnecessary moves if part of the result
7276 is in the right place already. */
7277 if (src1 != dest)
7278 emit_move_insn (destlo, operands[1]);
7279 if (src2 != dest + halfregs)
7280 emit_move_insn (desthi, operands[2]);
7281 }
7282 else
7283 {
7284 if (src2 != dest + halfregs)
7285 emit_move_insn (desthi, operands[2]);
7286 if (src1 != dest)
7287 emit_move_insn (destlo, operands[1]);
7288 }
7289 }
7290
7291 /* vec_perm support. */
7292
7293 #define MAX_VECT_LEN 16
7294
7295 struct expand_vec_perm_d
7296 {
7297 rtx target, op0, op1;
7298 unsigned char perm[MAX_VECT_LEN];
7299 enum machine_mode vmode;
7300 unsigned char nelt;
7301 bool one_vector_p;
7302 bool testing_p;
7303 };
7304
7305 /* Generate a variable permutation. */
7306
7307 static void
aarch64_expand_vec_perm_1(rtx target,rtx op0,rtx op1,rtx sel)7308 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7309 {
7310 enum machine_mode vmode = GET_MODE (target);
7311 bool one_vector_p = rtx_equal_p (op0, op1);
7312
7313 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7314 gcc_checking_assert (GET_MODE (op0) == vmode);
7315 gcc_checking_assert (GET_MODE (op1) == vmode);
7316 gcc_checking_assert (GET_MODE (sel) == vmode);
7317 gcc_checking_assert (TARGET_SIMD);
7318
7319 if (one_vector_p)
7320 {
7321 if (vmode == V8QImode)
7322 {
7323 /* Expand the argument to a V16QI mode by duplicating it. */
7324 rtx pair = gen_reg_rtx (V16QImode);
7325 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7326 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7327 }
7328 else
7329 {
7330 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7331 }
7332 }
7333 else
7334 {
7335 rtx pair;
7336
7337 if (vmode == V8QImode)
7338 {
7339 pair = gen_reg_rtx (V16QImode);
7340 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7341 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7342 }
7343 else
7344 {
7345 pair = gen_reg_rtx (OImode);
7346 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7347 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7348 }
7349 }
7350 }
7351
7352 void
aarch64_expand_vec_perm(rtx target,rtx op0,rtx op1,rtx sel)7353 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7354 {
7355 enum machine_mode vmode = GET_MODE (target);
7356 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7357 bool one_vector_p = rtx_equal_p (op0, op1);
7358 rtx rmask[MAX_VECT_LEN], mask;
7359
7360 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7361
7362 /* The TBL instruction does not use a modulo index, so we must take care
7363 of that ourselves. */
7364 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7365 for (i = 0; i < nelt; ++i)
7366 rmask[i] = mask;
7367 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7368 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7369
7370 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7371 }
7372
7373 /* Recognize patterns suitable for the TRN instructions. */
7374 static bool
aarch64_evpc_trn(struct expand_vec_perm_d * d)7375 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7376 {
7377 unsigned int i, odd, mask, nelt = d->nelt;
7378 rtx out, in0, in1, x;
7379 rtx (*gen) (rtx, rtx, rtx);
7380 enum machine_mode vmode = d->vmode;
7381
7382 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7383 return false;
7384
7385 /* Note that these are little-endian tests.
7386 We correct for big-endian later. */
7387 if (d->perm[0] == 0)
7388 odd = 0;
7389 else if (d->perm[0] == 1)
7390 odd = 1;
7391 else
7392 return false;
7393 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7394
7395 for (i = 0; i < nelt; i += 2)
7396 {
7397 if (d->perm[i] != i + odd)
7398 return false;
7399 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7400 return false;
7401 }
7402
7403 /* Success! */
7404 if (d->testing_p)
7405 return true;
7406
7407 in0 = d->op0;
7408 in1 = d->op1;
7409 if (BYTES_BIG_ENDIAN)
7410 {
7411 x = in0, in0 = in1, in1 = x;
7412 odd = !odd;
7413 }
7414 out = d->target;
7415
7416 if (odd)
7417 {
7418 switch (vmode)
7419 {
7420 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7421 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7422 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7423 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7424 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7425 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7426 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7427 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7428 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7429 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7430 default:
7431 return false;
7432 }
7433 }
7434 else
7435 {
7436 switch (vmode)
7437 {
7438 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7439 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7440 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7441 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7442 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7443 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7444 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7445 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7446 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7447 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7448 default:
7449 return false;
7450 }
7451 }
7452
7453 emit_insn (gen (out, in0, in1));
7454 return true;
7455 }
7456
7457 /* Recognize patterns suitable for the UZP instructions. */
7458 static bool
aarch64_evpc_uzp(struct expand_vec_perm_d * d)7459 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7460 {
7461 unsigned int i, odd, mask, nelt = d->nelt;
7462 rtx out, in0, in1, x;
7463 rtx (*gen) (rtx, rtx, rtx);
7464 enum machine_mode vmode = d->vmode;
7465
7466 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7467 return false;
7468
7469 /* Note that these are little-endian tests.
7470 We correct for big-endian later. */
7471 if (d->perm[0] == 0)
7472 odd = 0;
7473 else if (d->perm[0] == 1)
7474 odd = 1;
7475 else
7476 return false;
7477 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7478
7479 for (i = 0; i < nelt; i++)
7480 {
7481 unsigned elt = (i * 2 + odd) & mask;
7482 if (d->perm[i] != elt)
7483 return false;
7484 }
7485
7486 /* Success! */
7487 if (d->testing_p)
7488 return true;
7489
7490 in0 = d->op0;
7491 in1 = d->op1;
7492 if (BYTES_BIG_ENDIAN)
7493 {
7494 x = in0, in0 = in1, in1 = x;
7495 odd = !odd;
7496 }
7497 out = d->target;
7498
7499 if (odd)
7500 {
7501 switch (vmode)
7502 {
7503 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7504 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7505 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7506 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7507 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7508 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7509 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7510 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7511 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7512 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7513 default:
7514 return false;
7515 }
7516 }
7517 else
7518 {
7519 switch (vmode)
7520 {
7521 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7522 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7523 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7524 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7525 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7526 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7527 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7528 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7529 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7530 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7531 default:
7532 return false;
7533 }
7534 }
7535
7536 emit_insn (gen (out, in0, in1));
7537 return true;
7538 }
7539
7540 /* Recognize patterns suitable for the ZIP instructions. */
7541 static bool
aarch64_evpc_zip(struct expand_vec_perm_d * d)7542 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7543 {
7544 unsigned int i, high, mask, nelt = d->nelt;
7545 rtx out, in0, in1, x;
7546 rtx (*gen) (rtx, rtx, rtx);
7547 enum machine_mode vmode = d->vmode;
7548
7549 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7550 return false;
7551
7552 /* Note that these are little-endian tests.
7553 We correct for big-endian later. */
7554 high = nelt / 2;
7555 if (d->perm[0] == high)
7556 /* Do Nothing. */
7557 ;
7558 else if (d->perm[0] == 0)
7559 high = 0;
7560 else
7561 return false;
7562 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7563
7564 for (i = 0; i < nelt / 2; i++)
7565 {
7566 unsigned elt = (i + high) & mask;
7567 if (d->perm[i * 2] != elt)
7568 return false;
7569 elt = (elt + nelt) & mask;
7570 if (d->perm[i * 2 + 1] != elt)
7571 return false;
7572 }
7573
7574 /* Success! */
7575 if (d->testing_p)
7576 return true;
7577
7578 in0 = d->op0;
7579 in1 = d->op1;
7580 if (BYTES_BIG_ENDIAN)
7581 {
7582 x = in0, in0 = in1, in1 = x;
7583 high = !high;
7584 }
7585 out = d->target;
7586
7587 if (high)
7588 {
7589 switch (vmode)
7590 {
7591 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7592 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7593 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7594 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7595 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7596 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7597 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7598 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7599 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7600 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7601 default:
7602 return false;
7603 }
7604 }
7605 else
7606 {
7607 switch (vmode)
7608 {
7609 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7610 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7611 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7612 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7613 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7614 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7615 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7616 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7617 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7618 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7619 default:
7620 return false;
7621 }
7622 }
7623
7624 emit_insn (gen (out, in0, in1));
7625 return true;
7626 }
7627
7628 static bool
aarch64_evpc_tbl(struct expand_vec_perm_d * d)7629 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7630 {
7631 rtx rperm[MAX_VECT_LEN], sel;
7632 enum machine_mode vmode = d->vmode;
7633 unsigned int i, nelt = d->nelt;
7634
7635 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
7636 numbering of elements for big-endian, we must reverse the order. */
7637 if (BYTES_BIG_ENDIAN)
7638 return false;
7639
7640 if (d->testing_p)
7641 return true;
7642
7643 /* Generic code will try constant permutation twice. Once with the
7644 original mode and again with the elements lowered to QImode.
7645 So wait and don't do the selector expansion ourselves. */
7646 if (vmode != V8QImode && vmode != V16QImode)
7647 return false;
7648
7649 for (i = 0; i < nelt; ++i)
7650 rperm[i] = GEN_INT (d->perm[i]);
7651 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7652 sel = force_reg (vmode, sel);
7653
7654 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7655 return true;
7656 }
7657
7658 static bool
aarch64_expand_vec_perm_const_1(struct expand_vec_perm_d * d)7659 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7660 {
7661 /* The pattern matching functions above are written to look for a small
7662 number to begin the sequence (0, 1, N/2). If we begin with an index
7663 from the second operand, we can swap the operands. */
7664 if (d->perm[0] >= d->nelt)
7665 {
7666 unsigned i, nelt = d->nelt;
7667 rtx x;
7668
7669 for (i = 0; i < nelt; ++i)
7670 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7671
7672 x = d->op0;
7673 d->op0 = d->op1;
7674 d->op1 = x;
7675 }
7676
7677 if (TARGET_SIMD)
7678 {
7679 if (aarch64_evpc_zip (d))
7680 return true;
7681 else if (aarch64_evpc_uzp (d))
7682 return true;
7683 else if (aarch64_evpc_trn (d))
7684 return true;
7685 return aarch64_evpc_tbl (d);
7686 }
7687 return false;
7688 }
7689
7690 /* Expand a vec_perm_const pattern. */
7691
7692 bool
aarch64_expand_vec_perm_const(rtx target,rtx op0,rtx op1,rtx sel)7693 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7694 {
7695 struct expand_vec_perm_d d;
7696 int i, nelt, which;
7697
7698 d.target = target;
7699 d.op0 = op0;
7700 d.op1 = op1;
7701
7702 d.vmode = GET_MODE (target);
7703 gcc_assert (VECTOR_MODE_P (d.vmode));
7704 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7705 d.testing_p = false;
7706
7707 for (i = which = 0; i < nelt; ++i)
7708 {
7709 rtx e = XVECEXP (sel, 0, i);
7710 int ei = INTVAL (e) & (2 * nelt - 1);
7711 which |= (ei < nelt ? 1 : 2);
7712 d.perm[i] = ei;
7713 }
7714
7715 switch (which)
7716 {
7717 default:
7718 gcc_unreachable ();
7719
7720 case 3:
7721 d.one_vector_p = false;
7722 if (!rtx_equal_p (op0, op1))
7723 break;
7724
7725 /* The elements of PERM do not suggest that only the first operand
7726 is used, but both operands are identical. Allow easier matching
7727 of the permutation by folding the permutation into the single
7728 input vector. */
7729 /* Fall Through. */
7730 case 2:
7731 for (i = 0; i < nelt; ++i)
7732 d.perm[i] &= nelt - 1;
7733 d.op0 = op1;
7734 d.one_vector_p = true;
7735 break;
7736
7737 case 1:
7738 d.op1 = op0;
7739 d.one_vector_p = true;
7740 break;
7741 }
7742
7743 return aarch64_expand_vec_perm_const_1 (&d);
7744 }
7745
7746 static bool
aarch64_vectorize_vec_perm_const_ok(enum machine_mode vmode,const unsigned char * sel)7747 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7748 const unsigned char *sel)
7749 {
7750 struct expand_vec_perm_d d;
7751 unsigned int i, nelt, which;
7752 bool ret;
7753
7754 d.vmode = vmode;
7755 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7756 d.testing_p = true;
7757 memcpy (d.perm, sel, nelt);
7758
7759 /* Calculate whether all elements are in one vector. */
7760 for (i = which = 0; i < nelt; ++i)
7761 {
7762 unsigned char e = d.perm[i];
7763 gcc_assert (e < 2 * nelt);
7764 which |= (e < nelt ? 1 : 2);
7765 }
7766
7767 /* If all elements are from the second vector, reindex as if from the
7768 first vector. */
7769 if (which == 2)
7770 for (i = 0; i < nelt; ++i)
7771 d.perm[i] -= nelt;
7772
7773 /* Check whether the mask can be applied to a single vector. */
7774 d.one_vector_p = (which != 3);
7775
7776 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7777 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7778 if (!d.one_vector_p)
7779 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7780
7781 start_sequence ();
7782 ret = aarch64_expand_vec_perm_const_1 (&d);
7783 end_sequence ();
7784
7785 return ret;
7786 }
7787
7788 #undef TARGET_ADDRESS_COST
7789 #define TARGET_ADDRESS_COST aarch64_address_cost
7790
7791 /* This hook will determines whether unnamed bitfields affect the alignment
7792 of the containing structure. The hook returns true if the structure
7793 should inherit the alignment requirements of an unnamed bitfield's
7794 type. */
7795 #undef TARGET_ALIGN_ANON_BITFIELD
7796 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7797
7798 #undef TARGET_ASM_ALIGNED_DI_OP
7799 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7800
7801 #undef TARGET_ASM_ALIGNED_HI_OP
7802 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7803
7804 #undef TARGET_ASM_ALIGNED_SI_OP
7805 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7806
7807 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7808 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7809 hook_bool_const_tree_hwi_hwi_const_tree_true
7810
7811 #undef TARGET_ASM_FILE_START
7812 #define TARGET_ASM_FILE_START aarch64_start_file
7813
7814 #undef TARGET_ASM_OUTPUT_MI_THUNK
7815 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7816
7817 #undef TARGET_ASM_SELECT_RTX_SECTION
7818 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7819
7820 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7821 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7822
7823 #undef TARGET_BUILD_BUILTIN_VA_LIST
7824 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7825
7826 #undef TARGET_CALLEE_COPIES
7827 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7828
7829 #undef TARGET_CAN_ELIMINATE
7830 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7831
7832 #undef TARGET_CANNOT_FORCE_CONST_MEM
7833 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7834
7835 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7836 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7837
7838 /* Only the least significant bit is used for initialization guard
7839 variables. */
7840 #undef TARGET_CXX_GUARD_MASK_BIT
7841 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7842
7843 #undef TARGET_C_MODE_FOR_SUFFIX
7844 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7845
7846 #ifdef TARGET_BIG_ENDIAN_DEFAULT
7847 #undef TARGET_DEFAULT_TARGET_FLAGS
7848 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7849 #endif
7850
7851 #undef TARGET_CLASS_MAX_NREGS
7852 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7853
7854 #undef TARGET_BUILTIN_DECL
7855 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
7856
7857 #undef TARGET_EXPAND_BUILTIN
7858 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
7859
7860 #undef TARGET_EXPAND_BUILTIN_VA_START
7861 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
7862
7863 #undef TARGET_FUNCTION_ARG
7864 #define TARGET_FUNCTION_ARG aarch64_function_arg
7865
7866 #undef TARGET_FUNCTION_ARG_ADVANCE
7867 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
7868
7869 #undef TARGET_FUNCTION_ARG_BOUNDARY
7870 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
7871
7872 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7873 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
7874
7875 #undef TARGET_FUNCTION_VALUE
7876 #define TARGET_FUNCTION_VALUE aarch64_function_value
7877
7878 #undef TARGET_FUNCTION_VALUE_REGNO_P
7879 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
7880
7881 #undef TARGET_FRAME_POINTER_REQUIRED
7882 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
7883
7884 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7885 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
7886
7887 #undef TARGET_INIT_BUILTINS
7888 #define TARGET_INIT_BUILTINS aarch64_init_builtins
7889
7890 #undef TARGET_LEGITIMATE_ADDRESS_P
7891 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
7892
7893 #undef TARGET_LEGITIMATE_CONSTANT_P
7894 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
7895
7896 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7897 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
7898
7899 #undef TARGET_MANGLE_TYPE
7900 #define TARGET_MANGLE_TYPE aarch64_mangle_type
7901
7902 #undef TARGET_MEMORY_MOVE_COST
7903 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
7904
7905 #undef TARGET_MUST_PASS_IN_STACK
7906 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7907
7908 /* This target hook should return true if accesses to volatile bitfields
7909 should use the narrowest mode possible. It should return false if these
7910 accesses should use the bitfield container type. */
7911 #undef TARGET_NARROW_VOLATILE_BITFIELD
7912 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
7913
7914 #undef TARGET_OPTION_OVERRIDE
7915 #define TARGET_OPTION_OVERRIDE aarch64_override_options
7916
7917 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
7918 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
7919 aarch64_override_options_after_change
7920
7921 #undef TARGET_PASS_BY_REFERENCE
7922 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
7923
7924 #undef TARGET_PREFERRED_RELOAD_CLASS
7925 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
7926
7927 #undef TARGET_SECONDARY_RELOAD
7928 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
7929
7930 #undef TARGET_SHIFT_TRUNCATION_MASK
7931 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
7932
7933 #undef TARGET_SETUP_INCOMING_VARARGS
7934 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
7935
7936 #undef TARGET_STRUCT_VALUE_RTX
7937 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
7938
7939 #undef TARGET_REGISTER_MOVE_COST
7940 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
7941
7942 #undef TARGET_RETURN_IN_MEMORY
7943 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
7944
7945 #undef TARGET_RETURN_IN_MSB
7946 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
7947
7948 #undef TARGET_RTX_COSTS
7949 #define TARGET_RTX_COSTS aarch64_rtx_costs
7950
7951 #undef TARGET_TRAMPOLINE_INIT
7952 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
7953
7954 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
7955 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
7956
7957 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7958 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
7959
7960 #undef TARGET_ARRAY_MODE_SUPPORTED_P
7961 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
7962
7963 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
7964 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
7965
7966 #undef TARGET_VECTORIZE_BUILTINS
7967 #define TARGET_VECTORIZE_BUILTINS
7968
7969 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
7970 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
7971 aarch64_builtin_vectorized_function
7972
7973 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
7974 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
7975 aarch64_autovectorize_vector_sizes
7976
7977 /* Section anchor support. */
7978
7979 #undef TARGET_MIN_ANCHOR_OFFSET
7980 #define TARGET_MIN_ANCHOR_OFFSET -256
7981
7982 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
7983 byte offset; we can do much more for larger data types, but have no way
7984 to determine the size of the access. We assume accesses are aligned. */
7985 #undef TARGET_MAX_ANCHOR_OFFSET
7986 #define TARGET_MAX_ANCHOR_OFFSET 4095
7987
7988 #undef TARGET_VECTOR_ALIGNMENT
7989 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
7990
7991 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7992 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
7993 aarch64_simd_vector_alignment_reachable
7994
7995 /* vec_perm support. */
7996
7997 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
7998 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
7999 aarch64_vectorize_vec_perm_const_ok
8000
8001
8002 #undef TARGET_FIXED_CONDITION_CODE_REGS
8003 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8004
8005 struct gcc_target targetm = TARGET_INITIALIZER;
8006
8007 #include "gt-aarch64.h"
8008