1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "regs.h"
30 #include "df.h"
31 #include "hard-reg-set.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "reload.h"
35 #include "toplev.h"
36 #include "target.h"
37 #include "target-def.h"
38 #include "targhooks.h"
39 #include "ggc.h"
40 #include "function.h"
41 #include "tm_p.h"
42 #include "recog.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
45 #include "gimple.h"
46 #include "optabs.h"
47 #include "dwarf2.h"
48
49 /* Classifies an address.
50
51 ADDRESS_REG_IMM
52 A simple base register plus immediate offset.
53
54 ADDRESS_REG_WB
55 A base register indexed by immediate offset with writeback.
56
57 ADDRESS_REG_REG
58 A base register indexed by (optionally scaled) register.
59
60 ADDRESS_REG_UXTW
61 A base register indexed by (optionally scaled) zero-extended register.
62
63 ADDRESS_REG_SXTW
64 A base register indexed by (optionally scaled) sign-extended register.
65
66 ADDRESS_LO_SUM
67 A LO_SUM rtx with a base register and "LO12" symbol relocation.
68
69 ADDRESS_SYMBOLIC:
70 A constant symbolic address, in pc-relative literal pool. */
71
72 enum aarch64_address_type {
73 ADDRESS_REG_IMM,
74 ADDRESS_REG_WB,
75 ADDRESS_REG_REG,
76 ADDRESS_REG_UXTW,
77 ADDRESS_REG_SXTW,
78 ADDRESS_LO_SUM,
79 ADDRESS_SYMBOLIC
80 };
81
82 struct aarch64_address_info {
83 enum aarch64_address_type type;
84 rtx base;
85 rtx offset;
86 int shift;
87 enum aarch64_symbol_type symbol_type;
88 };
89
90 /* The current code model. */
91 enum aarch64_code_model aarch64_cmodel;
92
93 #ifdef HAVE_AS_TLS
94 #undef TARGET_HAVE_TLS
95 #define TARGET_HAVE_TLS 1
96 #endif
97
98 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
99 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
100 const_tree,
101 enum machine_mode *, int *,
102 bool *);
103 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
104 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
105 static void aarch64_override_options_after_change (void);
106 static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
107 int *, unsigned char *, int *, int *);
108 static bool aarch64_vector_mode_supported_p (enum machine_mode);
109 static unsigned bit_count (unsigned HOST_WIDE_INT);
110 static bool aarch64_const_vec_all_same_int_p (rtx,
111 HOST_WIDE_INT, HOST_WIDE_INT);
112
113 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
114 const unsigned char *sel);
115
116 /* The processor for which instructions should be scheduled. */
117 enum aarch64_processor aarch64_tune = generic;
118
119 /* The current tuning set. */
120 const struct tune_params *aarch64_tune_params;
121
122 /* Mask to specify which instructions we are allowed to generate. */
123 unsigned long aarch64_isa_flags = 0;
124
125 /* Mask to specify which instruction scheduling options should be used. */
126 unsigned long aarch64_tune_flags = 0;
127
128 /* Tuning parameters. */
129
130 #if HAVE_DESIGNATED_INITIALIZERS
131 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
132 #else
133 #define NAMED_PARAM(NAME, VAL) (VAL)
134 #endif
135
136 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
137 __extension__
138 #endif
139 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
140 {
141 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
142 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
143 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
144 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
145 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
146 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
147 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
148 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
149 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
150 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
151 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
152 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
153 };
154
155 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
156 __extension__
157 #endif
158 static const struct cpu_addrcost_table generic_addrcost_table =
159 {
160 NAMED_PARAM (pre_modify, 0),
161 NAMED_PARAM (post_modify, 0),
162 NAMED_PARAM (register_offset, 0),
163 NAMED_PARAM (register_extend, 0),
164 NAMED_PARAM (imm_offset, 0)
165 };
166
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168 __extension__
169 #endif
170 static const struct cpu_regmove_cost generic_regmove_cost =
171 {
172 NAMED_PARAM (GP2GP, 1),
173 NAMED_PARAM (GP2FP, 2),
174 NAMED_PARAM (FP2GP, 2),
175 /* We currently do not provide direct support for TFmode Q->Q move.
176 Therefore we need to raise the cost above 2 in order to have
177 reload handle the situation. */
178 NAMED_PARAM (FP2FP, 4)
179 };
180
181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182 __extension__
183 #endif
184 static const struct tune_params generic_tunings =
185 {
186 &generic_rtx_cost_table,
187 &generic_addrcost_table,
188 &generic_regmove_cost,
189 NAMED_PARAM (memmov_cost, 4)
190 };
191
192 /* A processor implementing AArch64. */
193 struct processor
194 {
195 const char *const name;
196 enum aarch64_processor core;
197 const char *arch;
198 const unsigned long flags;
199 const struct tune_params *const tune;
200 };
201
202 /* Processor cores implementing AArch64. */
203 static const struct processor all_cores[] =
204 {
205 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
206 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
207 #include "aarch64-cores.def"
208 #undef AARCH64_CORE
209 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
210 {NULL, aarch64_none, NULL, 0, NULL}
211 };
212
213 /* Architectures implementing AArch64. */
214 static const struct processor all_architectures[] =
215 {
216 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
217 {NAME, CORE, #ARCH, FLAGS, NULL},
218 #include "aarch64-arches.def"
219 #undef AARCH64_ARCH
220 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
221 {NULL, aarch64_none, NULL, 0, NULL}
222 };
223
224 /* Target specification. These are populated as commandline arguments
225 are processed, or NULL if not specified. */
226 static const struct processor *selected_arch;
227 static const struct processor *selected_cpu;
228 static const struct processor *selected_tune;
229
230 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
231
232 /* An ISA extension in the co-processor and main instruction set space. */
233 struct aarch64_option_extension
234 {
235 const char *const name;
236 const unsigned long flags_on;
237 const unsigned long flags_off;
238 };
239
240 /* ISA extensions in AArch64. */
241 static const struct aarch64_option_extension all_extensions[] =
242 {
243 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
244 {NAME, FLAGS_ON, FLAGS_OFF},
245 #include "aarch64-option-extensions.def"
246 #undef AARCH64_OPT_EXTENSION
247 {NULL, 0, 0}
248 };
249
250 /* Used to track the size of an address when generating a pre/post
251 increment address. */
252 static enum machine_mode aarch64_memory_reference_mode;
253
254 /* Used to force GTY into this file. */
255 static GTY(()) int gty_dummy;
256
257 /* A table of valid AArch64 "bitmask immediate" values for
258 logical instructions. */
259
260 #define AARCH64_NUM_BITMASKS 5334
261 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
262
263 /* Did we set flag_omit_frame_pointer just so
264 aarch64_frame_pointer_required would be called? */
265 static bool faked_omit_frame_pointer;
266
267 typedef enum aarch64_cond_code
268 {
269 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
270 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
271 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
272 }
273 aarch64_cc;
274
275 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
276
277 /* The condition codes of the processor, and the inverse function. */
278 static const char * const aarch64_condition_codes[] =
279 {
280 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
281 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
282 };
283
284 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
285 unsigned
aarch64_dbx_register_number(unsigned regno)286 aarch64_dbx_register_number (unsigned regno)
287 {
288 if (GP_REGNUM_P (regno))
289 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
290 else if (regno == SP_REGNUM)
291 return AARCH64_DWARF_SP;
292 else if (FP_REGNUM_P (regno))
293 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
294
295 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
296 equivalent DWARF register. */
297 return DWARF_FRAME_REGISTERS;
298 }
299
300 /* Return TRUE if MODE is any of the large INT modes. */
301 static bool
aarch64_vect_struct_mode_p(enum machine_mode mode)302 aarch64_vect_struct_mode_p (enum machine_mode mode)
303 {
304 return mode == OImode || mode == CImode || mode == XImode;
305 }
306
307 /* Return TRUE if MODE is any of the vector modes. */
308 static bool
aarch64_vector_mode_p(enum machine_mode mode)309 aarch64_vector_mode_p (enum machine_mode mode)
310 {
311 return aarch64_vector_mode_supported_p (mode)
312 || aarch64_vect_struct_mode_p (mode);
313 }
314
315 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
316 static bool
aarch64_array_mode_supported_p(enum machine_mode mode,unsigned HOST_WIDE_INT nelems)317 aarch64_array_mode_supported_p (enum machine_mode mode,
318 unsigned HOST_WIDE_INT nelems)
319 {
320 if (TARGET_SIMD
321 && AARCH64_VALID_SIMD_QREG_MODE (mode)
322 && (nelems >= 2 && nelems <= 4))
323 return true;
324
325 return false;
326 }
327
328 /* Implement HARD_REGNO_NREGS. */
329
330 int
aarch64_hard_regno_nregs(unsigned regno,enum machine_mode mode)331 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
332 {
333 switch (aarch64_regno_regclass (regno))
334 {
335 case FP_REGS:
336 case FP_LO_REGS:
337 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
338 default:
339 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
340 }
341 gcc_unreachable ();
342 }
343
344 /* Implement HARD_REGNO_MODE_OK. */
345
346 int
aarch64_hard_regno_mode_ok(unsigned regno,enum machine_mode mode)347 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
348 {
349 if (GET_MODE_CLASS (mode) == MODE_CC)
350 return regno == CC_REGNUM;
351
352 if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
353 || regno == ARG_POINTER_REGNUM)
354 return mode == Pmode;
355
356 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
357 return 1;
358
359 if (FP_REGNUM_P (regno))
360 {
361 if (aarch64_vect_struct_mode_p (mode))
362 return
363 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
364 else
365 return 1;
366 }
367
368 return 0;
369 }
370
371 /* Return true if calls to DECL should be treated as
372 long-calls (ie called via a register). */
373 static bool
aarch64_decl_is_long_call_p(const_tree decl ATTRIBUTE_UNUSED)374 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
375 {
376 return false;
377 }
378
379 /* Return true if calls to symbol-ref SYM should be treated as
380 long-calls (ie called via a register). */
381 bool
aarch64_is_long_call_p(rtx sym)382 aarch64_is_long_call_p (rtx sym)
383 {
384 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
385 }
386
387 /* Return true if the offsets to a zero/sign-extract operation
388 represent an expression that matches an extend operation. The
389 operands represent the paramters from
390
391 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
392 bool
aarch64_is_extend_from_extract(enum machine_mode mode,rtx mult_imm,rtx extract_imm)393 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
394 rtx extract_imm)
395 {
396 HOST_WIDE_INT mult_val, extract_val;
397
398 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
399 return false;
400
401 mult_val = INTVAL (mult_imm);
402 extract_val = INTVAL (extract_imm);
403
404 if (extract_val > 8
405 && extract_val < GET_MODE_BITSIZE (mode)
406 && exact_log2 (extract_val & ~7) > 0
407 && (extract_val & 7) <= 4
408 && mult_val == (1 << (extract_val & 7)))
409 return true;
410
411 return false;
412 }
413
414 /* Emit an insn that's a simple single-set. Both the operands must be
415 known to be valid. */
416 inline static rtx
emit_set_insn(rtx x,rtx y)417 emit_set_insn (rtx x, rtx y)
418 {
419 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
420 }
421
422 /* X and Y are two things to compare using CODE. Emit the compare insn and
423 return the rtx for register 0 in the proper mode. */
424 rtx
aarch64_gen_compare_reg(RTX_CODE code,rtx x,rtx y)425 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
426 {
427 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
428 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
429
430 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
431 return cc_reg;
432 }
433
434 /* Build the SYMBOL_REF for __tls_get_addr. */
435
436 static GTY(()) rtx tls_get_addr_libfunc;
437
438 rtx
aarch64_tls_get_addr(void)439 aarch64_tls_get_addr (void)
440 {
441 if (!tls_get_addr_libfunc)
442 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
443 return tls_get_addr_libfunc;
444 }
445
446 /* Return the TLS model to use for ADDR. */
447
448 static enum tls_model
tls_symbolic_operand_type(rtx addr)449 tls_symbolic_operand_type (rtx addr)
450 {
451 enum tls_model tls_kind = TLS_MODEL_NONE;
452 rtx sym, addend;
453
454 if (GET_CODE (addr) == CONST)
455 {
456 split_const (addr, &sym, &addend);
457 if (GET_CODE (sym) == SYMBOL_REF)
458 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
459 }
460 else if (GET_CODE (addr) == SYMBOL_REF)
461 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
462
463 return tls_kind;
464 }
465
466 /* We'll allow lo_sum's in addresses in our legitimate addresses
467 so that combine would take care of combining addresses where
468 necessary, but for generation purposes, we'll generate the address
469 as :
470 RTL Absolute
471 tmp = hi (symbol_ref); adrp x1, foo
472 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
473 nop
474
475 PIC TLS
476 adrp x1, :got:foo adrp tmp, :tlsgd:foo
477 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
478 bl __tls_get_addr
479 nop
480
481 Load TLS symbol, depending on TLS mechanism and TLS access model.
482
483 Global Dynamic - Traditional TLS:
484 adrp tmp, :tlsgd:imm
485 add dest, tmp, #:tlsgd_lo12:imm
486 bl __tls_get_addr
487
488 Global Dynamic - TLS Descriptors:
489 adrp dest, :tlsdesc:imm
490 ldr tmp, [dest, #:tlsdesc_lo12:imm]
491 add dest, dest, #:tlsdesc_lo12:imm
492 blr tmp
493 mrs tp, tpidr_el0
494 add dest, dest, tp
495
496 Initial Exec:
497 mrs tp, tpidr_el0
498 adrp tmp, :gottprel:imm
499 ldr dest, [tmp, #:gottprel_lo12:imm]
500 add dest, dest, tp
501
502 Local Exec:
503 mrs tp, tpidr_el0
504 add t0, tp, #:tprel_hi12:imm
505 add t0, #:tprel_lo12_nc:imm
506 */
507
508 static void
aarch64_load_symref_appropriately(rtx dest,rtx imm,enum aarch64_symbol_type type)509 aarch64_load_symref_appropriately (rtx dest, rtx imm,
510 enum aarch64_symbol_type type)
511 {
512 switch (type)
513 {
514 case SYMBOL_SMALL_ABSOLUTE:
515 {
516 rtx tmp_reg = dest;
517 if (can_create_pseudo_p ())
518 {
519 tmp_reg = gen_reg_rtx (Pmode);
520 }
521
522 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
523 emit_insn (gen_add_losym (dest, tmp_reg, imm));
524 return;
525 }
526
527 case SYMBOL_SMALL_GOT:
528 {
529 rtx tmp_reg = dest;
530 if (can_create_pseudo_p ())
531 {
532 tmp_reg = gen_reg_rtx (Pmode);
533 }
534 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
535 emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
536 return;
537 }
538
539 case SYMBOL_SMALL_TLSGD:
540 {
541 rtx insns;
542 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
543
544 start_sequence ();
545 emit_call_insn (gen_tlsgd_small (result, imm));
546 insns = get_insns ();
547 end_sequence ();
548
549 RTL_CONST_CALL_P (insns) = 1;
550 emit_libcall_block (insns, dest, result, imm);
551 return;
552 }
553
554 case SYMBOL_SMALL_TLSDESC:
555 {
556 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
557 rtx tp;
558
559 emit_insn (gen_tlsdesc_small (imm));
560 tp = aarch64_load_tp (NULL);
561 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
562 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
563 return;
564 }
565
566 case SYMBOL_SMALL_GOTTPREL:
567 {
568 rtx tmp_reg = gen_reg_rtx (Pmode);
569 rtx tp = aarch64_load_tp (NULL);
570 emit_insn (gen_tlsie_small (tmp_reg, imm));
571 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
572 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
573 return;
574 }
575
576 case SYMBOL_SMALL_TPREL:
577 {
578 rtx tp = aarch64_load_tp (NULL);
579 emit_insn (gen_tlsle_small (dest, tp, imm));
580 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
581 return;
582 }
583
584 default:
585 gcc_unreachable ();
586 }
587 }
588
589 /* Emit a move from SRC to DEST. Assume that the move expanders can
590 handle all moves if !can_create_pseudo_p (). The distinction is
591 important because, unlike emit_move_insn, the move expanders know
592 how to force Pmode objects into the constant pool even when the
593 constant pool address is not itself legitimate. */
594 static rtx
aarch64_emit_move(rtx dest,rtx src)595 aarch64_emit_move (rtx dest, rtx src)
596 {
597 return (can_create_pseudo_p ()
598 ? emit_move_insn (dest, src)
599 : emit_move_insn_1 (dest, src));
600 }
601
602 void
aarch64_split_128bit_move(rtx dst,rtx src)603 aarch64_split_128bit_move (rtx dst, rtx src)
604 {
605 rtx low_dst;
606
607 gcc_assert (GET_MODE (dst) == TImode);
608
609 if (REG_P (dst) && REG_P (src))
610 {
611 int src_regno = REGNO (src);
612 int dst_regno = REGNO (dst);
613
614 gcc_assert (GET_MODE (src) == TImode);
615
616 /* Handle r -> w, w -> r. */
617 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
618 {
619 emit_insn (gen_aarch64_movtilow_di (dst,
620 gen_lowpart (word_mode, src)));
621 emit_insn (gen_aarch64_movtihigh_di (dst,
622 gen_highpart (word_mode, src)));
623 return;
624 }
625 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
626 {
627 emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
628 src));
629 emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
630 src));
631 return;
632 }
633 /* Fall through to r -> r cases. */
634 }
635
636 low_dst = gen_lowpart (word_mode, dst);
637 if (REG_P (low_dst)
638 && reg_overlap_mentioned_p (low_dst, src))
639 {
640 aarch64_emit_move (gen_highpart (word_mode, dst),
641 gen_highpart_mode (word_mode, TImode, src));
642 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
643 }
644 else
645 {
646 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
647 aarch64_emit_move (gen_highpart (word_mode, dst),
648 gen_highpart_mode (word_mode, TImode, src));
649 }
650 }
651
652 bool
aarch64_split_128bit_move_p(rtx dst,rtx src)653 aarch64_split_128bit_move_p (rtx dst, rtx src)
654 {
655 return (! REG_P (src)
656 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
657 }
658
659 static rtx
aarch64_force_temporary(rtx x,rtx value)660 aarch64_force_temporary (rtx x, rtx value)
661 {
662 if (can_create_pseudo_p ())
663 return force_reg (Pmode, value);
664 else
665 {
666 x = aarch64_emit_move (x, value);
667 return x;
668 }
669 }
670
671
672 static rtx
aarch64_add_offset(enum machine_mode mode,rtx temp,rtx reg,HOST_WIDE_INT offset)673 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
674 {
675 if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
676 {
677 rtx high;
678 /* Load the full offset into a register. This
679 might be improvable in the future. */
680 high = GEN_INT (offset);
681 offset = 0;
682 high = aarch64_force_temporary (temp, high);
683 reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
684 }
685 return plus_constant (mode, reg, offset);
686 }
687
688 void
aarch64_expand_mov_immediate(rtx dest,rtx imm)689 aarch64_expand_mov_immediate (rtx dest, rtx imm)
690 {
691 enum machine_mode mode = GET_MODE (dest);
692 unsigned HOST_WIDE_INT mask;
693 int i;
694 bool first;
695 unsigned HOST_WIDE_INT val;
696 bool subtargets;
697 rtx subtarget;
698 int one_match, zero_match;
699
700 gcc_assert (mode == SImode || mode == DImode);
701
702 /* Check on what type of symbol it is. */
703 if (GET_CODE (imm) == SYMBOL_REF
704 || GET_CODE (imm) == LABEL_REF
705 || GET_CODE (imm) == CONST)
706 {
707 rtx mem, base, offset;
708 enum aarch64_symbol_type sty;
709
710 /* If we have (const (plus symbol offset)), separate out the offset
711 before we start classifying the symbol. */
712 split_const (imm, &base, &offset);
713
714 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
715 switch (sty)
716 {
717 case SYMBOL_FORCE_TO_MEM:
718 if (offset != const0_rtx
719 && targetm.cannot_force_const_mem (mode, imm))
720 {
721 gcc_assert(can_create_pseudo_p ());
722 base = aarch64_force_temporary (dest, base);
723 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
724 aarch64_emit_move (dest, base);
725 return;
726 }
727 mem = force_const_mem (mode, imm);
728 gcc_assert (mem);
729 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
730 return;
731
732 case SYMBOL_SMALL_TLSGD:
733 case SYMBOL_SMALL_TLSDESC:
734 case SYMBOL_SMALL_GOTTPREL:
735 case SYMBOL_SMALL_GOT:
736 if (offset != const0_rtx)
737 {
738 gcc_assert(can_create_pseudo_p ());
739 base = aarch64_force_temporary (dest, base);
740 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
741 aarch64_emit_move (dest, base);
742 return;
743 }
744 /* FALLTHRU */
745
746 case SYMBOL_SMALL_TPREL:
747 case SYMBOL_SMALL_ABSOLUTE:
748 aarch64_load_symref_appropriately (dest, imm, sty);
749 return;
750
751 default:
752 gcc_unreachable ();
753 }
754 }
755
756 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
757 {
758 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
759 return;
760 }
761
762 if (!CONST_INT_P (imm))
763 {
764 if (GET_CODE (imm) == HIGH)
765 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
766 else
767 {
768 rtx mem = force_const_mem (mode, imm);
769 gcc_assert (mem);
770 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
771 }
772
773 return;
774 }
775
776 if (mode == SImode)
777 {
778 /* We know we can't do this in 1 insn, and we must be able to do it
779 in two; so don't mess around looking for sequences that don't buy
780 us anything. */
781 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
782 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
783 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
784 return;
785 }
786
787 /* Remaining cases are all for DImode. */
788
789 val = INTVAL (imm);
790 subtargets = optimize && can_create_pseudo_p ();
791
792 one_match = 0;
793 zero_match = 0;
794 mask = 0xffff;
795
796 for (i = 0; i < 64; i += 16, mask <<= 16)
797 {
798 if ((val & mask) == 0)
799 zero_match++;
800 else if ((val & mask) == mask)
801 one_match++;
802 }
803
804 if (one_match == 2)
805 {
806 mask = 0xffff;
807 for (i = 0; i < 64; i += 16, mask <<= 16)
808 {
809 if ((val & mask) != mask)
810 {
811 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
812 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
813 GEN_INT ((val >> i) & 0xffff)));
814 return;
815 }
816 }
817 gcc_unreachable ();
818 }
819
820 if (zero_match == 2)
821 goto simple_sequence;
822
823 mask = 0x0ffff0000UL;
824 for (i = 16; i < 64; i += 16, mask <<= 16)
825 {
826 HOST_WIDE_INT comp = mask & ~(mask - 1);
827
828 if (aarch64_uimm12_shift (val - (val & mask)))
829 {
830 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
831
832 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
833 emit_insn (gen_adddi3 (dest, subtarget,
834 GEN_INT (val - (val & mask))));
835 return;
836 }
837 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
838 {
839 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
840
841 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
842 GEN_INT ((val + comp) & mask)));
843 emit_insn (gen_adddi3 (dest, subtarget,
844 GEN_INT (val - ((val + comp) & mask))));
845 return;
846 }
847 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
848 {
849 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
850
851 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
852 GEN_INT ((val - comp) | ~mask)));
853 emit_insn (gen_adddi3 (dest, subtarget,
854 GEN_INT (val - ((val - comp) | ~mask))));
855 return;
856 }
857 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
858 {
859 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
860
861 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
862 GEN_INT (val | ~mask)));
863 emit_insn (gen_adddi3 (dest, subtarget,
864 GEN_INT (val - (val | ~mask))));
865 return;
866 }
867 }
868
869 /* See if we can do it by arithmetically combining two
870 immediates. */
871 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
872 {
873 int j;
874 mask = 0xffff;
875
876 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
877 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
878 {
879 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
880 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
881 GEN_INT (aarch64_bitmasks[i])));
882 emit_insn (gen_adddi3 (dest, subtarget,
883 GEN_INT (val - aarch64_bitmasks[i])));
884 return;
885 }
886
887 for (j = 0; j < 64; j += 16, mask <<= 16)
888 {
889 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
890 {
891 emit_insn (gen_rtx_SET (VOIDmode, dest,
892 GEN_INT (aarch64_bitmasks[i])));
893 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
894 GEN_INT ((val >> j) & 0xffff)));
895 return;
896 }
897 }
898 }
899
900 /* See if we can do it by logically combining two immediates. */
901 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
902 {
903 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
904 {
905 int j;
906
907 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
908 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
909 {
910 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
911 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
912 GEN_INT (aarch64_bitmasks[i])));
913 emit_insn (gen_iordi3 (dest, subtarget,
914 GEN_INT (aarch64_bitmasks[j])));
915 return;
916 }
917 }
918 else if ((val & aarch64_bitmasks[i]) == val)
919 {
920 int j;
921
922 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
923 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
924 {
925
926 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
927 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
928 GEN_INT (aarch64_bitmasks[j])));
929 emit_insn (gen_anddi3 (dest, subtarget,
930 GEN_INT (aarch64_bitmasks[i])));
931 return;
932 }
933 }
934 }
935
936 simple_sequence:
937 first = true;
938 mask = 0xffff;
939 for (i = 0; i < 64; i += 16, mask <<= 16)
940 {
941 if ((val & mask) != 0)
942 {
943 if (first)
944 {
945 emit_insn (gen_rtx_SET (VOIDmode, dest,
946 GEN_INT (val & mask)));
947 first = false;
948 }
949 else
950 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
951 GEN_INT ((val >> i) & 0xffff)));
952 }
953 }
954 }
955
956 static bool
aarch64_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)957 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
958 {
959 /* Indirect calls are not currently supported. */
960 if (decl == NULL)
961 return false;
962
963 /* Cannot tail-call to long-calls, since these are outside of the
964 range of a branch instruction (we could handle this if we added
965 support for indirect tail-calls. */
966 if (aarch64_decl_is_long_call_p (decl))
967 return false;
968
969 return true;
970 }
971
972 /* Implement TARGET_PASS_BY_REFERENCE. */
973
974 static bool
aarch64_pass_by_reference(cumulative_args_t pcum ATTRIBUTE_UNUSED,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)975 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
976 enum machine_mode mode,
977 const_tree type,
978 bool named ATTRIBUTE_UNUSED)
979 {
980 HOST_WIDE_INT size;
981 enum machine_mode dummymode;
982 int nregs;
983
984 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
985 size = (mode == BLKmode && type)
986 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
987
988 if (type)
989 {
990 /* Arrays always passed by reference. */
991 if (TREE_CODE (type) == ARRAY_TYPE)
992 return true;
993 /* Other aggregates based on their size. */
994 if (AGGREGATE_TYPE_P (type))
995 size = int_size_in_bytes (type);
996 }
997
998 /* Variable sized arguments are always returned by reference. */
999 if (size < 0)
1000 return true;
1001
1002 /* Can this be a candidate to be passed in fp/simd register(s)? */
1003 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1004 &dummymode, &nregs,
1005 NULL))
1006 return false;
1007
1008 /* Arguments which are variable sized or larger than 2 registers are
1009 passed by reference unless they are a homogenous floating point
1010 aggregate. */
1011 return size > 2 * UNITS_PER_WORD;
1012 }
1013
1014 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1015 static bool
aarch64_return_in_msb(const_tree valtype)1016 aarch64_return_in_msb (const_tree valtype)
1017 {
1018 enum machine_mode dummy_mode;
1019 int dummy_int;
1020
1021 /* Never happens in little-endian mode. */
1022 if (!BYTES_BIG_ENDIAN)
1023 return false;
1024
1025 /* Only composite types smaller than or equal to 16 bytes can
1026 be potentially returned in registers. */
1027 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1028 || int_size_in_bytes (valtype) <= 0
1029 || int_size_in_bytes (valtype) > 16)
1030 return false;
1031
1032 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1033 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1034 is always passed/returned in the least significant bits of fp/simd
1035 register(s). */
1036 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1037 &dummy_mode, &dummy_int, NULL))
1038 return false;
1039
1040 return true;
1041 }
1042
1043 /* Implement TARGET_FUNCTION_VALUE.
1044 Define how to find the value returned by a function. */
1045
1046 static rtx
aarch64_function_value(const_tree type,const_tree func,bool outgoing ATTRIBUTE_UNUSED)1047 aarch64_function_value (const_tree type, const_tree func,
1048 bool outgoing ATTRIBUTE_UNUSED)
1049 {
1050 enum machine_mode mode;
1051 int unsignedp;
1052 int count;
1053 enum machine_mode ag_mode;
1054
1055 mode = TYPE_MODE (type);
1056 if (INTEGRAL_TYPE_P (type))
1057 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1058
1059 if (aarch64_return_in_msb (type))
1060 {
1061 HOST_WIDE_INT size = int_size_in_bytes (type);
1062
1063 if (size % UNITS_PER_WORD != 0)
1064 {
1065 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1066 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1067 }
1068 }
1069
1070 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1071 &ag_mode, &count, NULL))
1072 {
1073 if (!aarch64_composite_type_p (type, mode))
1074 {
1075 gcc_assert (count == 1 && mode == ag_mode);
1076 return gen_rtx_REG (mode, V0_REGNUM);
1077 }
1078 else
1079 {
1080 int i;
1081 rtx par;
1082
1083 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1084 for (i = 0; i < count; i++)
1085 {
1086 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1087 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1088 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1089 XVECEXP (par, 0, i) = tmp;
1090 }
1091 return par;
1092 }
1093 }
1094 else
1095 return gen_rtx_REG (mode, R0_REGNUM);
1096 }
1097
1098 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1099 Return true if REGNO is the number of a hard register in which the values
1100 of called function may come back. */
1101
1102 static bool
aarch64_function_value_regno_p(const unsigned int regno)1103 aarch64_function_value_regno_p (const unsigned int regno)
1104 {
1105 /* Maximum of 16 bytes can be returned in the general registers. Examples
1106 of 16-byte return values are: 128-bit integers and 16-byte small
1107 structures (excluding homogeneous floating-point aggregates). */
1108 if (regno == R0_REGNUM || regno == R1_REGNUM)
1109 return true;
1110
1111 /* Up to four fp/simd registers can return a function value, e.g. a
1112 homogeneous floating-point aggregate having four members. */
1113 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1114 return !TARGET_GENERAL_REGS_ONLY;
1115
1116 return false;
1117 }
1118
1119 /* Implement TARGET_RETURN_IN_MEMORY.
1120
1121 If the type T of the result of a function is such that
1122 void func (T arg)
1123 would require that arg be passed as a value in a register (or set of
1124 registers) according to the parameter passing rules, then the result
1125 is returned in the same registers as would be used for such an
1126 argument. */
1127
1128 static bool
aarch64_return_in_memory(const_tree type,const_tree fndecl ATTRIBUTE_UNUSED)1129 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1130 {
1131 HOST_WIDE_INT size;
1132 enum machine_mode ag_mode;
1133 int count;
1134
1135 if (!AGGREGATE_TYPE_P (type)
1136 && TREE_CODE (type) != COMPLEX_TYPE
1137 && TREE_CODE (type) != VECTOR_TYPE)
1138 /* Simple scalar types always returned in registers. */
1139 return false;
1140
1141 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1142 type,
1143 &ag_mode,
1144 &count,
1145 NULL))
1146 return false;
1147
1148 /* Types larger than 2 registers returned in memory. */
1149 size = int_size_in_bytes (type);
1150 return (size < 0 || size > 2 * UNITS_PER_WORD);
1151 }
1152
1153 static bool
aarch64_vfp_is_call_candidate(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,int * nregs)1154 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1155 const_tree type, int *nregs)
1156 {
1157 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1158 return aarch64_vfp_is_call_or_return_candidate (mode,
1159 type,
1160 &pcum->aapcs_vfp_rmode,
1161 nregs,
1162 NULL);
1163 }
1164
1165 /* Given MODE and TYPE of a function argument, return the alignment in
1166 bits. The idea is to suppress any stronger alignment requested by
1167 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1168 This is a helper function for local use only. */
1169
1170 static unsigned int
aarch64_function_arg_alignment(enum machine_mode mode,const_tree type)1171 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1172 {
1173 unsigned int alignment;
1174
1175 if (type)
1176 {
1177 if (!integer_zerop (TYPE_SIZE (type)))
1178 {
1179 if (TYPE_MODE (type) == mode)
1180 alignment = TYPE_ALIGN (type);
1181 else
1182 alignment = GET_MODE_ALIGNMENT (mode);
1183 }
1184 else
1185 alignment = 0;
1186 }
1187 else
1188 alignment = GET_MODE_ALIGNMENT (mode);
1189
1190 return alignment;
1191 }
1192
1193 /* Layout a function argument according to the AAPCS64 rules. The rule
1194 numbers refer to the rule numbers in the AAPCS64. */
1195
1196 static void
aarch64_layout_arg(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)1197 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1198 const_tree type,
1199 bool named ATTRIBUTE_UNUSED)
1200 {
1201 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1202 int ncrn, nvrn, nregs;
1203 bool allocate_ncrn, allocate_nvrn;
1204 HOST_WIDE_INT size;
1205
1206 /* We need to do this once per argument. */
1207 if (pcum->aapcs_arg_processed)
1208 return;
1209
1210 pcum->aapcs_arg_processed = true;
1211
1212 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1213 size
1214 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1215 UNITS_PER_WORD);
1216
1217 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1218 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1219 mode,
1220 type,
1221 &nregs);
1222
1223 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1224 The following code thus handles passing by SIMD/FP registers first. */
1225
1226 nvrn = pcum->aapcs_nvrn;
1227
1228 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1229 and homogenous short-vector aggregates (HVA). */
1230 if (allocate_nvrn)
1231 {
1232 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1233 {
1234 pcum->aapcs_nextnvrn = nvrn + nregs;
1235 if (!aarch64_composite_type_p (type, mode))
1236 {
1237 gcc_assert (nregs == 1);
1238 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1239 }
1240 else
1241 {
1242 rtx par;
1243 int i;
1244 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1245 for (i = 0; i < nregs; i++)
1246 {
1247 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1248 V0_REGNUM + nvrn + i);
1249 tmp = gen_rtx_EXPR_LIST
1250 (VOIDmode, tmp,
1251 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1252 XVECEXP (par, 0, i) = tmp;
1253 }
1254 pcum->aapcs_reg = par;
1255 }
1256 return;
1257 }
1258 else
1259 {
1260 /* C.3 NSRN is set to 8. */
1261 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1262 goto on_stack;
1263 }
1264 }
1265
1266 ncrn = pcum->aapcs_ncrn;
1267 nregs = size / UNITS_PER_WORD;
1268
1269 /* C6 - C9. though the sign and zero extension semantics are
1270 handled elsewhere. This is the case where the argument fits
1271 entirely general registers. */
1272 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1273 {
1274 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1275
1276 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1277
1278 /* C.8 if the argument has an alignment of 16 then the NGRN is
1279 rounded up to the next even number. */
1280 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1281 {
1282 ++ncrn;
1283 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1284 }
1285 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1286 A reg is still generated for it, but the caller should be smart
1287 enough not to use it. */
1288 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1289 {
1290 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1291 }
1292 else
1293 {
1294 rtx par;
1295 int i;
1296
1297 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1298 for (i = 0; i < nregs; i++)
1299 {
1300 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1301 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1302 GEN_INT (i * UNITS_PER_WORD));
1303 XVECEXP (par, 0, i) = tmp;
1304 }
1305 pcum->aapcs_reg = par;
1306 }
1307
1308 pcum->aapcs_nextncrn = ncrn + nregs;
1309 return;
1310 }
1311
1312 /* C.11 */
1313 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1314
1315 /* The argument is passed on stack; record the needed number of words for
1316 this argument and align the total size if necessary. */
1317 on_stack:
1318 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1319 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1320 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1321 16 / UNITS_PER_WORD);
1322 return;
1323 }
1324
1325 /* Implement TARGET_FUNCTION_ARG. */
1326
1327 static rtx
aarch64_function_arg(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,bool named)1328 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1329 const_tree type, bool named)
1330 {
1331 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1332 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1333
1334 if (mode == VOIDmode)
1335 return NULL_RTX;
1336
1337 aarch64_layout_arg (pcum_v, mode, type, named);
1338 return pcum->aapcs_reg;
1339 }
1340
1341 void
aarch64_init_cumulative_args(CUMULATIVE_ARGS * pcum,const_tree fntype ATTRIBUTE_UNUSED,rtx libname ATTRIBUTE_UNUSED,const_tree fndecl ATTRIBUTE_UNUSED,unsigned n_named ATTRIBUTE_UNUSED)1342 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1343 const_tree fntype ATTRIBUTE_UNUSED,
1344 rtx libname ATTRIBUTE_UNUSED,
1345 const_tree fndecl ATTRIBUTE_UNUSED,
1346 unsigned n_named ATTRIBUTE_UNUSED)
1347 {
1348 pcum->aapcs_ncrn = 0;
1349 pcum->aapcs_nvrn = 0;
1350 pcum->aapcs_nextncrn = 0;
1351 pcum->aapcs_nextnvrn = 0;
1352 pcum->pcs_variant = ARM_PCS_AAPCS64;
1353 pcum->aapcs_reg = NULL_RTX;
1354 pcum->aapcs_arg_processed = false;
1355 pcum->aapcs_stack_words = 0;
1356 pcum->aapcs_stack_size = 0;
1357
1358 return;
1359 }
1360
1361 static void
aarch64_function_arg_advance(cumulative_args_t pcum_v,enum machine_mode mode,const_tree type,bool named)1362 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1363 enum machine_mode mode,
1364 const_tree type,
1365 bool named)
1366 {
1367 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1368 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1369 {
1370 aarch64_layout_arg (pcum_v, mode, type, named);
1371 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1372 != (pcum->aapcs_stack_words != 0));
1373 pcum->aapcs_arg_processed = false;
1374 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1375 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1376 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1377 pcum->aapcs_stack_words = 0;
1378 pcum->aapcs_reg = NULL_RTX;
1379 }
1380 }
1381
1382 bool
aarch64_function_arg_regno_p(unsigned regno)1383 aarch64_function_arg_regno_p (unsigned regno)
1384 {
1385 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1386 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1387 }
1388
1389 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1390 PARM_BOUNDARY bits of alignment, but will be given anything up
1391 to STACK_BOUNDARY bits if the type requires it. This makes sure
1392 that both before and after the layout of each argument, the Next
1393 Stacked Argument Address (NSAA) will have a minimum alignment of
1394 8 bytes. */
1395
1396 static unsigned int
aarch64_function_arg_boundary(enum machine_mode mode,const_tree type)1397 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1398 {
1399 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1400
1401 if (alignment < PARM_BOUNDARY)
1402 alignment = PARM_BOUNDARY;
1403 if (alignment > STACK_BOUNDARY)
1404 alignment = STACK_BOUNDARY;
1405 return alignment;
1406 }
1407
1408 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1409
1410 Return true if an argument passed on the stack should be padded upwards,
1411 i.e. if the least-significant byte of the stack slot has useful data.
1412
1413 Small aggregate types are placed in the lowest memory address.
1414
1415 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1416
1417 bool
aarch64_pad_arg_upward(enum machine_mode mode,const_tree type)1418 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1419 {
1420 /* On little-endian targets, the least significant byte of every stack
1421 argument is passed at the lowest byte address of the stack slot. */
1422 if (!BYTES_BIG_ENDIAN)
1423 return true;
1424
1425 /* Otherwise, integral types and floating point types are padded downward:
1426 the least significant byte of a stack argument is passed at the highest
1427 byte address of the stack slot. */
1428 if (type
1429 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1430 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1431 return false;
1432
1433 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1434 return true;
1435 }
1436
1437 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1438
1439 It specifies padding for the last (may also be the only)
1440 element of a block move between registers and memory. If
1441 assuming the block is in the memory, padding upward means that
1442 the last element is padded after its highest significant byte,
1443 while in downward padding, the last element is padded at the
1444 its least significant byte side.
1445
1446 Small aggregates and small complex types are always padded
1447 upwards.
1448
1449 We don't need to worry about homogeneous floating-point or
1450 short-vector aggregates; their move is not affected by the
1451 padding direction determined here. Regardless of endianness,
1452 each element of such an aggregate is put in the least
1453 significant bits of a fp/simd register.
1454
1455 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1456 register has useful data, and return the opposite if the most
1457 significant byte does. */
1458
1459 bool
aarch64_pad_reg_upward(enum machine_mode mode,const_tree type,bool first ATTRIBUTE_UNUSED)1460 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1461 bool first ATTRIBUTE_UNUSED)
1462 {
1463
1464 /* Small composite types are always padded upward. */
1465 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1466 {
1467 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1468 : GET_MODE_SIZE (mode));
1469 if (size < 2 * UNITS_PER_WORD)
1470 return true;
1471 }
1472
1473 /* Otherwise, use the default padding. */
1474 return !BYTES_BIG_ENDIAN;
1475 }
1476
1477 static enum machine_mode
aarch64_libgcc_cmp_return_mode(void)1478 aarch64_libgcc_cmp_return_mode (void)
1479 {
1480 return SImode;
1481 }
1482
1483 static bool
aarch64_frame_pointer_required(void)1484 aarch64_frame_pointer_required (void)
1485 {
1486 /* If the function contains dynamic stack allocations, we need to
1487 use the frame pointer to access the static parts of the frame. */
1488 if (cfun->calls_alloca)
1489 return true;
1490
1491 /* We may have turned flag_omit_frame_pointer on in order to have this
1492 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1493 and we'll check it here.
1494 If we really did set flag_omit_frame_pointer normally, then we return false
1495 (no frame pointer required) in all cases. */
1496
1497 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1498 return false;
1499 else if (flag_omit_leaf_frame_pointer)
1500 return !crtl->is_leaf;
1501 return true;
1502 }
1503
1504 /* Mark the registers that need to be saved by the callee and calculate
1505 the size of the callee-saved registers area and frame record (both FP
1506 and LR may be omitted). */
1507 static void
aarch64_layout_frame(void)1508 aarch64_layout_frame (void)
1509 {
1510 HOST_WIDE_INT offset = 0;
1511 int regno;
1512
1513 if (reload_completed && cfun->machine->frame.laid_out)
1514 return;
1515
1516 cfun->machine->frame.fp_lr_offset = 0;
1517
1518 /* First mark all the registers that really need to be saved... */
1519 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1520 cfun->machine->frame.reg_offset[regno] = -1;
1521
1522 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1523 cfun->machine->frame.reg_offset[regno] = -1;
1524
1525 /* ... that includes the eh data registers (if needed)... */
1526 if (crtl->calls_eh_return)
1527 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1528 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1529
1530 /* ... and any callee saved register that dataflow says is live. */
1531 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1532 if (df_regs_ever_live_p (regno)
1533 && !call_used_regs[regno])
1534 cfun->machine->frame.reg_offset[regno] = 0;
1535
1536 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1537 if (df_regs_ever_live_p (regno)
1538 && !call_used_regs[regno])
1539 cfun->machine->frame.reg_offset[regno] = 0;
1540
1541 if (frame_pointer_needed)
1542 {
1543 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1544 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1545 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1546 }
1547
1548 /* Now assign stack slots for them. */
1549 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1550 if (cfun->machine->frame.reg_offset[regno] != -1)
1551 {
1552 cfun->machine->frame.reg_offset[regno] = offset;
1553 offset += UNITS_PER_WORD;
1554 }
1555
1556 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1557 if (cfun->machine->frame.reg_offset[regno] != -1)
1558 {
1559 cfun->machine->frame.reg_offset[regno] = offset;
1560 offset += UNITS_PER_WORD;
1561 }
1562
1563 if (frame_pointer_needed)
1564 {
1565 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1566 offset += UNITS_PER_WORD;
1567 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1568 }
1569
1570 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1571 {
1572 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1573 offset += UNITS_PER_WORD;
1574 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1575 }
1576
1577 cfun->machine->frame.padding0 =
1578 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1579 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1580
1581 cfun->machine->frame.saved_regs_size = offset;
1582 cfun->machine->frame.laid_out = true;
1583 }
1584
1585 /* Make the last instruction frame-related and note that it performs
1586 the operation described by FRAME_PATTERN. */
1587
1588 static void
aarch64_set_frame_expr(rtx frame_pattern)1589 aarch64_set_frame_expr (rtx frame_pattern)
1590 {
1591 rtx insn;
1592
1593 insn = get_last_insn ();
1594 RTX_FRAME_RELATED_P (insn) = 1;
1595 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1596 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1597 frame_pattern,
1598 REG_NOTES (insn));
1599 }
1600
1601 static bool
aarch64_register_saved_on_entry(int regno)1602 aarch64_register_saved_on_entry (int regno)
1603 {
1604 return cfun->machine->frame.reg_offset[regno] != -1;
1605 }
1606
1607
1608 static void
aarch64_save_or_restore_fprs(int start_offset,int increment,bool restore,rtx base_rtx)1609 aarch64_save_or_restore_fprs (int start_offset, int increment,
1610 bool restore, rtx base_rtx)
1611
1612 {
1613 unsigned regno;
1614 unsigned regno2;
1615 rtx insn;
1616 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1617
1618
1619 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1620 {
1621 if (aarch64_register_saved_on_entry (regno))
1622 {
1623 rtx mem;
1624 mem = gen_mem_ref (DFmode,
1625 plus_constant (Pmode,
1626 base_rtx,
1627 start_offset));
1628
1629 for (regno2 = regno + 1;
1630 regno2 <= V31_REGNUM
1631 && !aarch64_register_saved_on_entry (regno2);
1632 regno2++)
1633 {
1634 /* Empty loop. */
1635 }
1636 if (regno2 <= V31_REGNUM &&
1637 aarch64_register_saved_on_entry (regno2))
1638 {
1639 rtx mem2;
1640 /* Next highest register to be saved. */
1641 mem2 = gen_mem_ref (DFmode,
1642 plus_constant
1643 (Pmode,
1644 base_rtx,
1645 start_offset + increment));
1646 if (restore == false)
1647 {
1648 insn = emit_insn
1649 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1650 mem2, gen_rtx_REG (DFmode, regno2)));
1651
1652 }
1653 else
1654 {
1655 insn = emit_insn
1656 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1657 gen_rtx_REG (DFmode, regno2), mem2));
1658
1659 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1660 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1661 }
1662
1663 /* The first part of a frame-related parallel insn
1664 is always assumed to be relevant to the frame
1665 calculations; subsequent parts, are only
1666 frame-related if explicitly marked. */
1667 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1668 1)) = 1;
1669 regno = regno2;
1670 start_offset += increment * 2;
1671 }
1672 else
1673 {
1674 if (restore == false)
1675 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1676 else
1677 {
1678 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1679 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1680 }
1681 start_offset += increment;
1682 }
1683 RTX_FRAME_RELATED_P (insn) = 1;
1684 }
1685 }
1686
1687 }
1688
1689
1690 /* offset from the stack pointer of where the saves and
1691 restore's have to happen. */
1692 static void
aarch64_save_or_restore_callee_save_registers(HOST_WIDE_INT offset,bool restore)1693 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1694 bool restore)
1695 {
1696 rtx insn;
1697 rtx base_rtx = stack_pointer_rtx;
1698 HOST_WIDE_INT start_offset = offset;
1699 HOST_WIDE_INT increment = UNITS_PER_WORD;
1700 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1701 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1702 unsigned regno;
1703 unsigned regno2;
1704
1705 for (regno = R0_REGNUM; regno <= limit; regno++)
1706 {
1707 if (aarch64_register_saved_on_entry (regno))
1708 {
1709 rtx mem;
1710 mem = gen_mem_ref (Pmode,
1711 plus_constant (Pmode,
1712 base_rtx,
1713 start_offset));
1714
1715 for (regno2 = regno + 1;
1716 regno2 <= limit
1717 && !aarch64_register_saved_on_entry (regno2);
1718 regno2++)
1719 {
1720 /* Empty loop. */
1721 }
1722 if (regno2 <= limit &&
1723 aarch64_register_saved_on_entry (regno2))
1724 {
1725 rtx mem2;
1726 /* Next highest register to be saved. */
1727 mem2 = gen_mem_ref (Pmode,
1728 plus_constant
1729 (Pmode,
1730 base_rtx,
1731 start_offset + increment));
1732 if (restore == false)
1733 {
1734 insn = emit_insn
1735 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1736 mem2, gen_rtx_REG (DImode, regno2)));
1737
1738 }
1739 else
1740 {
1741 insn = emit_insn
1742 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1743 gen_rtx_REG (DImode, regno2), mem2));
1744
1745 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1746 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1747 }
1748
1749 /* The first part of a frame-related parallel insn
1750 is always assumed to be relevant to the frame
1751 calculations; subsequent parts, are only
1752 frame-related if explicitly marked. */
1753 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1754 1)) = 1;
1755 regno = regno2;
1756 start_offset += increment * 2;
1757 }
1758 else
1759 {
1760 if (restore == false)
1761 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1762 else
1763 {
1764 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1765 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1766 }
1767 start_offset += increment;
1768 }
1769 RTX_FRAME_RELATED_P (insn) = 1;
1770 }
1771 }
1772
1773 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1774
1775 }
1776
1777 /* AArch64 stack frames generated by this compiler look like:
1778
1779 +-------------------------------+
1780 | |
1781 | incoming stack arguments |
1782 | |
1783 +-------------------------------+ <-- arg_pointer_rtx
1784 | |
1785 | callee-allocated save area |
1786 | for register varargs |
1787 | |
1788 +-------------------------------+
1789 | |
1790 | local variables |
1791 | |
1792 +-------------------------------+ <-- frame_pointer_rtx
1793 | |
1794 | callee-saved registers |
1795 | |
1796 +-------------------------------+
1797 | LR' |
1798 +-------------------------------+
1799 | FP' |
1800 P +-------------------------------+ <-- hard_frame_pointer_rtx
1801 | dynamic allocation |
1802 +-------------------------------+
1803 | |
1804 | outgoing stack arguments |
1805 | |
1806 +-------------------------------+ <-- stack_pointer_rtx
1807
1808 Dynamic stack allocations such as alloca insert data at point P.
1809 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1810 hard_frame_pointer_rtx unchanged. */
1811
1812 /* Generate the prologue instructions for entry into a function.
1813 Establish the stack frame by decreasing the stack pointer with a
1814 properly calculated size and, if necessary, create a frame record
1815 filled with the values of LR and previous frame pointer. The
1816 current FP is also set up is it is in use. */
1817
1818 void
aarch64_expand_prologue(void)1819 aarch64_expand_prologue (void)
1820 {
1821 /* sub sp, sp, #<frame_size>
1822 stp {fp, lr}, [sp, #<frame_size> - 16]
1823 add fp, sp, #<frame_size> - hardfp_offset
1824 stp {cs_reg}, [fp, #-16] etc.
1825
1826 sub sp, sp, <final_adjustment_if_any>
1827 */
1828 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
1829 HOST_WIDE_INT frame_size, offset;
1830 HOST_WIDE_INT fp_offset; /* FP offset from SP */
1831 rtx insn;
1832
1833 aarch64_layout_frame ();
1834 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1835 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1836 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1837 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1838 + crtl->outgoing_args_size);
1839 offset = frame_size = AARCH64_ROUND_UP (frame_size,
1840 STACK_BOUNDARY / BITS_PER_UNIT);
1841
1842 if (flag_stack_usage_info)
1843 current_function_static_stack_size = frame_size;
1844
1845 fp_offset = (offset
1846 - original_frame_size
1847 - cfun->machine->frame.saved_regs_size);
1848
1849 /* Store pairs and load pairs have a range only -512 to 504. */
1850 if (offset >= 512)
1851 {
1852 /* When the frame has a large size, an initial decrease is done on
1853 the stack pointer to jump over the callee-allocated save area for
1854 register varargs, the local variable area and/or the callee-saved
1855 register area. This will allow the pre-index write-back
1856 store pair instructions to be used for setting up the stack frame
1857 efficiently. */
1858 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1859 if (offset >= 512)
1860 offset = cfun->machine->frame.saved_regs_size;
1861
1862 frame_size -= (offset + crtl->outgoing_args_size);
1863 fp_offset = 0;
1864
1865 if (frame_size >= 0x1000000)
1866 {
1867 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1868 emit_move_insn (op0, GEN_INT (-frame_size));
1869 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
1870 aarch64_set_frame_expr (gen_rtx_SET
1871 (Pmode, stack_pointer_rtx,
1872 gen_rtx_PLUS (Pmode,
1873 stack_pointer_rtx,
1874 GEN_INT (-frame_size))));
1875 }
1876 else if (frame_size > 0)
1877 {
1878 if ((frame_size & 0xfff) != frame_size)
1879 {
1880 insn = emit_insn (gen_add2_insn
1881 (stack_pointer_rtx,
1882 GEN_INT (-(frame_size
1883 & ~(HOST_WIDE_INT)0xfff))));
1884 RTX_FRAME_RELATED_P (insn) = 1;
1885 }
1886 if ((frame_size & 0xfff) != 0)
1887 {
1888 insn = emit_insn (gen_add2_insn
1889 (stack_pointer_rtx,
1890 GEN_INT (-(frame_size
1891 & (HOST_WIDE_INT)0xfff))));
1892 RTX_FRAME_RELATED_P (insn) = 1;
1893 }
1894 }
1895 }
1896 else
1897 frame_size = -1;
1898
1899 if (offset > 0)
1900 {
1901 /* Save the frame pointer and lr if the frame pointer is needed
1902 first. Make the frame pointer point to the location of the
1903 old frame pointer on the stack. */
1904 if (frame_pointer_needed)
1905 {
1906 rtx mem_fp, mem_lr;
1907
1908 if (fp_offset)
1909 {
1910 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1911 GEN_INT (-offset)));
1912 RTX_FRAME_RELATED_P (insn) = 1;
1913 aarch64_set_frame_expr (gen_rtx_SET
1914 (Pmode, stack_pointer_rtx,
1915 gen_rtx_MINUS (Pmode,
1916 stack_pointer_rtx,
1917 GEN_INT (offset))));
1918 mem_fp = gen_frame_mem (DImode,
1919 plus_constant (Pmode,
1920 stack_pointer_rtx,
1921 fp_offset));
1922 mem_lr = gen_frame_mem (DImode,
1923 plus_constant (Pmode,
1924 stack_pointer_rtx,
1925 fp_offset
1926 + UNITS_PER_WORD));
1927 insn = emit_insn (gen_store_pairdi (mem_fp,
1928 hard_frame_pointer_rtx,
1929 mem_lr,
1930 gen_rtx_REG (DImode,
1931 LR_REGNUM)));
1932 }
1933 else
1934 {
1935 insn = emit_insn (gen_storewb_pairdi_di
1936 (stack_pointer_rtx, stack_pointer_rtx,
1937 hard_frame_pointer_rtx,
1938 gen_rtx_REG (DImode, LR_REGNUM),
1939 GEN_INT (-offset),
1940 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
1941 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1942 }
1943
1944 /* The first part of a frame-related parallel insn is always
1945 assumed to be relevant to the frame calculations;
1946 subsequent parts, are only frame-related if explicitly
1947 marked. */
1948 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1949 RTX_FRAME_RELATED_P (insn) = 1;
1950
1951 /* Set up frame pointer to point to the location of the
1952 previous frame pointer on the stack. */
1953 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
1954 stack_pointer_rtx,
1955 GEN_INT (fp_offset)));
1956 aarch64_set_frame_expr (gen_rtx_SET
1957 (Pmode, hard_frame_pointer_rtx,
1958 gen_rtx_PLUS (Pmode,
1959 stack_pointer_rtx,
1960 GEN_INT (fp_offset))));
1961 RTX_FRAME_RELATED_P (insn) = 1;
1962 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
1963 hard_frame_pointer_rtx));
1964 }
1965 else
1966 {
1967 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1968 GEN_INT (-offset)));
1969 RTX_FRAME_RELATED_P (insn) = 1;
1970 }
1971
1972 aarch64_save_or_restore_callee_save_registers
1973 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
1974 }
1975
1976 /* when offset >= 512,
1977 sub sp, sp, #<outgoing_args_size> */
1978 if (frame_size > -1)
1979 {
1980 if (crtl->outgoing_args_size > 0)
1981 {
1982 insn = emit_insn (gen_add2_insn
1983 (stack_pointer_rtx,
1984 GEN_INT (- crtl->outgoing_args_size)));
1985 RTX_FRAME_RELATED_P (insn) = 1;
1986 }
1987 }
1988 }
1989
1990 /* Generate the epilogue instructions for returning from a function. */
1991 void
aarch64_expand_epilogue(bool for_sibcall)1992 aarch64_expand_epilogue (bool for_sibcall)
1993 {
1994 HOST_WIDE_INT original_frame_size, frame_size, offset;
1995 HOST_WIDE_INT fp_offset;
1996 rtx insn;
1997 rtx cfa_reg;
1998
1999 aarch64_layout_frame ();
2000 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2001 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2002 + crtl->outgoing_args_size);
2003 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2004 STACK_BOUNDARY / BITS_PER_UNIT);
2005
2006 fp_offset = (offset
2007 - original_frame_size
2008 - cfun->machine->frame.saved_regs_size);
2009
2010 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2011
2012 /* Store pairs and load pairs have a range only -512 to 504. */
2013 if (offset >= 512)
2014 {
2015 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2016 if (offset >= 512)
2017 offset = cfun->machine->frame.saved_regs_size;
2018
2019 frame_size -= (offset + crtl->outgoing_args_size);
2020 fp_offset = 0;
2021 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2022 {
2023 insn = emit_insn (gen_add2_insn
2024 (stack_pointer_rtx,
2025 GEN_INT (crtl->outgoing_args_size)));
2026 RTX_FRAME_RELATED_P (insn) = 1;
2027 }
2028 }
2029 else
2030 frame_size = -1;
2031
2032 /* If there were outgoing arguments or we've done dynamic stack
2033 allocation, then restore the stack pointer from the frame
2034 pointer. This is at most one insn and more efficient than using
2035 GCC's internal mechanism. */
2036 if (frame_pointer_needed
2037 && (crtl->outgoing_args_size || cfun->calls_alloca))
2038 {
2039 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2040 hard_frame_pointer_rtx,
2041 GEN_INT (- fp_offset)));
2042 RTX_FRAME_RELATED_P (insn) = 1;
2043 /* As SP is set to (FP - fp_offset), according to the rules in
2044 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2045 from the value of SP from now on. */
2046 cfa_reg = stack_pointer_rtx;
2047 }
2048
2049 aarch64_save_or_restore_callee_save_registers
2050 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2051
2052 /* Restore the frame pointer and lr if the frame pointer is needed. */
2053 if (offset > 0)
2054 {
2055 if (frame_pointer_needed)
2056 {
2057 rtx mem_fp, mem_lr;
2058
2059 if (fp_offset)
2060 {
2061 mem_fp = gen_frame_mem (DImode,
2062 plus_constant (Pmode,
2063 stack_pointer_rtx,
2064 fp_offset));
2065 mem_lr = gen_frame_mem (DImode,
2066 plus_constant (Pmode,
2067 stack_pointer_rtx,
2068 fp_offset
2069 + UNITS_PER_WORD));
2070 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2071 mem_fp,
2072 gen_rtx_REG (DImode,
2073 LR_REGNUM),
2074 mem_lr));
2075 }
2076 else
2077 {
2078 insn = emit_insn (gen_loadwb_pairdi_di
2079 (stack_pointer_rtx,
2080 stack_pointer_rtx,
2081 hard_frame_pointer_rtx,
2082 gen_rtx_REG (DImode, LR_REGNUM),
2083 GEN_INT (offset),
2084 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2085 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2086 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2087 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2088 plus_constant (Pmode, cfa_reg,
2089 offset))));
2090 }
2091
2092 /* The first part of a frame-related parallel insn
2093 is always assumed to be relevant to the frame
2094 calculations; subsequent parts, are only
2095 frame-related if explicitly marked. */
2096 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2097 RTX_FRAME_RELATED_P (insn) = 1;
2098 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2099 add_reg_note (insn, REG_CFA_RESTORE,
2100 gen_rtx_REG (DImode, LR_REGNUM));
2101
2102 if (fp_offset)
2103 {
2104 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2105 GEN_INT (offset)));
2106 RTX_FRAME_RELATED_P (insn) = 1;
2107 }
2108 }
2109 else
2110 {
2111 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2112 GEN_INT (offset)));
2113 RTX_FRAME_RELATED_P (insn) = 1;
2114 }
2115 }
2116
2117 /* Stack adjustment for exception handler. */
2118 if (crtl->calls_eh_return)
2119 {
2120 /* We need to unwind the stack by the offset computed by
2121 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2122 based on SP. Ideally we would update the SP and define the
2123 CFA along the lines of:
2124
2125 SP = SP + EH_RETURN_STACKADJ_RTX
2126 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2127
2128 However the dwarf emitter only understands a constant
2129 register offset.
2130
2131 The solution choosen here is to use the otherwise unused IP0
2132 as a temporary register to hold the current SP value. The
2133 CFA is described using IP0 then SP is modified. */
2134
2135 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2136
2137 insn = emit_move_insn (ip0, stack_pointer_rtx);
2138 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2139 RTX_FRAME_RELATED_P (insn) = 1;
2140
2141 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2142
2143 /* Ensure the assignment to IP0 does not get optimized away. */
2144 emit_use (ip0);
2145 }
2146
2147 if (frame_size > -1)
2148 {
2149 if (frame_size >= 0x1000000)
2150 {
2151 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2152 emit_move_insn (op0, GEN_INT (frame_size));
2153 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2154 aarch64_set_frame_expr (gen_rtx_SET
2155 (Pmode, stack_pointer_rtx,
2156 gen_rtx_PLUS (Pmode,
2157 stack_pointer_rtx,
2158 GEN_INT (frame_size))));
2159 }
2160 else if (frame_size > 0)
2161 {
2162 if ((frame_size & 0xfff) != 0)
2163 {
2164 insn = emit_insn (gen_add2_insn
2165 (stack_pointer_rtx,
2166 GEN_INT ((frame_size
2167 & (HOST_WIDE_INT) 0xfff))));
2168 RTX_FRAME_RELATED_P (insn) = 1;
2169 }
2170 if ((frame_size & 0xfff) != frame_size)
2171 {
2172 insn = emit_insn (gen_add2_insn
2173 (stack_pointer_rtx,
2174 GEN_INT ((frame_size
2175 & ~ (HOST_WIDE_INT) 0xfff))));
2176 RTX_FRAME_RELATED_P (insn) = 1;
2177 }
2178 }
2179
2180 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2181 gen_rtx_PLUS (Pmode,
2182 stack_pointer_rtx,
2183 GEN_INT (offset))));
2184 }
2185
2186 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2187 if (!for_sibcall)
2188 emit_jump_insn (ret_rtx);
2189 }
2190
2191 /* Return the place to copy the exception unwinding return address to.
2192 This will probably be a stack slot, but could (in theory be the
2193 return register). */
2194 rtx
aarch64_final_eh_return_addr(void)2195 aarch64_final_eh_return_addr (void)
2196 {
2197 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2198 aarch64_layout_frame ();
2199 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2200 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2201 + crtl->outgoing_args_size);
2202 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2203 STACK_BOUNDARY / BITS_PER_UNIT);
2204 fp_offset = offset
2205 - original_frame_size
2206 - cfun->machine->frame.saved_regs_size;
2207
2208 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2209 return gen_rtx_REG (DImode, LR_REGNUM);
2210
2211 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2212 result in a store to save LR introduced by builtin_eh_return () being
2213 incorrectly deleted because the alias is not detected.
2214 So in the calculation of the address to copy the exception unwinding
2215 return address to, we note 2 cases.
2216 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2217 we return a SP-relative location since all the addresses are SP-relative
2218 in this case. This prevents the store from being optimized away.
2219 If the fp_offset is not 0, then the addresses will be FP-relative and
2220 therefore we return a FP-relative location. */
2221
2222 if (frame_pointer_needed)
2223 {
2224 if (fp_offset)
2225 return gen_frame_mem (DImode,
2226 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2227 else
2228 return gen_frame_mem (DImode,
2229 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2230 }
2231
2232 /* If FP is not needed, we calculate the location of LR, which would be
2233 at the top of the saved registers block. */
2234
2235 return gen_frame_mem (DImode,
2236 plus_constant (Pmode,
2237 stack_pointer_rtx,
2238 fp_offset
2239 + cfun->machine->frame.saved_regs_size
2240 - 2 * UNITS_PER_WORD));
2241 }
2242
2243 /* Output code to build up a constant in a register. */
2244 static void
aarch64_build_constant(int regnum,HOST_WIDE_INT val)2245 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2246 {
2247 if (aarch64_bitmask_imm (val, DImode))
2248 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2249 else
2250 {
2251 int i;
2252 int ncount = 0;
2253 int zcount = 0;
2254 HOST_WIDE_INT valp = val >> 16;
2255 HOST_WIDE_INT valm;
2256 HOST_WIDE_INT tval;
2257
2258 for (i = 16; i < 64; i += 16)
2259 {
2260 valm = (valp & 0xffff);
2261
2262 if (valm != 0)
2263 ++ zcount;
2264
2265 if (valm != 0xffff)
2266 ++ ncount;
2267
2268 valp >>= 16;
2269 }
2270
2271 /* zcount contains the number of additional MOVK instructions
2272 required if the constant is built up with an initial MOVZ instruction,
2273 while ncount is the number of MOVK instructions required if starting
2274 with a MOVN instruction. Choose the sequence that yields the fewest
2275 number of instructions, preferring MOVZ instructions when they are both
2276 the same. */
2277 if (ncount < zcount)
2278 {
2279 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2280 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2281 tval = 0xffff;
2282 }
2283 else
2284 {
2285 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2286 GEN_INT (val & 0xffff));
2287 tval = 0;
2288 }
2289
2290 val >>= 16;
2291
2292 for (i = 16; i < 64; i += 16)
2293 {
2294 if ((val & 0xffff) != tval)
2295 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2296 GEN_INT (i), GEN_INT (val & 0xffff)));
2297 val >>= 16;
2298 }
2299 }
2300 }
2301
2302 static void
aarch64_add_constant(int regnum,int scratchreg,HOST_WIDE_INT delta)2303 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2304 {
2305 HOST_WIDE_INT mdelta = delta;
2306 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2307 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2308
2309 if (mdelta < 0)
2310 mdelta = -mdelta;
2311
2312 if (mdelta >= 4096 * 4096)
2313 {
2314 aarch64_build_constant (scratchreg, delta);
2315 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2316 }
2317 else if (mdelta > 0)
2318 {
2319 if (mdelta >= 4096)
2320 {
2321 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2322 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2323 if (delta < 0)
2324 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2325 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2326 else
2327 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2328 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2329 }
2330 if (mdelta % 4096 != 0)
2331 {
2332 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2333 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2334 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2335 }
2336 }
2337 }
2338
2339 /* Output code to add DELTA to the first argument, and then jump
2340 to FUNCTION. Used for C++ multiple inheritance. */
2341 static void
aarch64_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)2342 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2343 HOST_WIDE_INT delta,
2344 HOST_WIDE_INT vcall_offset,
2345 tree function)
2346 {
2347 /* The this pointer is always in x0. Note that this differs from
2348 Arm where the this pointer maybe bumped to r1 if r0 is required
2349 to return a pointer to an aggregate. On AArch64 a result value
2350 pointer will be in x8. */
2351 int this_regno = R0_REGNUM;
2352 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2353
2354 reload_completed = 1;
2355 emit_note (NOTE_INSN_PROLOGUE_END);
2356
2357 if (vcall_offset == 0)
2358 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2359 else
2360 {
2361 gcc_assert ((vcall_offset & 0x7) == 0);
2362
2363 this_rtx = gen_rtx_REG (Pmode, this_regno);
2364 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2365 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2366
2367 addr = this_rtx;
2368 if (delta != 0)
2369 {
2370 if (delta >= -256 && delta < 256)
2371 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2372 plus_constant (Pmode, this_rtx, delta));
2373 else
2374 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2375 }
2376
2377 aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2378
2379 if (vcall_offset >= -256 && vcall_offset < 32768)
2380 addr = plus_constant (Pmode, temp0, vcall_offset);
2381 else
2382 {
2383 aarch64_build_constant (IP1_REGNUM, vcall_offset);
2384 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2385 }
2386
2387 aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2388 emit_insn (gen_add2_insn (this_rtx, temp1));
2389 }
2390
2391 /* Generate a tail call to the target function. */
2392 if (!TREE_USED (function))
2393 {
2394 assemble_external (function);
2395 TREE_USED (function) = 1;
2396 }
2397 funexp = XEXP (DECL_RTL (function), 0);
2398 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2399 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2400 SIBLING_CALL_P (insn) = 1;
2401
2402 insn = get_insns ();
2403 shorten_branches (insn);
2404 final_start_function (insn, file, 1);
2405 final (insn, file, 1);
2406 final_end_function ();
2407
2408 /* Stop pretending to be a post-reload pass. */
2409 reload_completed = 0;
2410 }
2411
2412 static int
aarch64_tls_operand_p_1(rtx * x,void * data ATTRIBUTE_UNUSED)2413 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2414 {
2415 if (GET_CODE (*x) == SYMBOL_REF)
2416 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2417
2418 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2419 TLS offsets, not real symbol references. */
2420 if (GET_CODE (*x) == UNSPEC
2421 && XINT (*x, 1) == UNSPEC_TLS)
2422 return -1;
2423
2424 return 0;
2425 }
2426
2427 static bool
aarch64_tls_referenced_p(rtx x)2428 aarch64_tls_referenced_p (rtx x)
2429 {
2430 if (!TARGET_HAVE_TLS)
2431 return false;
2432
2433 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2434 }
2435
2436
2437 static int
aarch64_bitmasks_cmp(const void * i1,const void * i2)2438 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2439 {
2440 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2441 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2442
2443 if (*imm1 < *imm2)
2444 return -1;
2445 if (*imm1 > *imm2)
2446 return +1;
2447 return 0;
2448 }
2449
2450
2451 static void
aarch64_build_bitmask_table(void)2452 aarch64_build_bitmask_table (void)
2453 {
2454 unsigned HOST_WIDE_INT mask, imm;
2455 unsigned int log_e, e, s, r;
2456 unsigned int nimms = 0;
2457
2458 for (log_e = 1; log_e <= 6; log_e++)
2459 {
2460 e = 1 << log_e;
2461 if (e == 64)
2462 mask = ~(HOST_WIDE_INT) 0;
2463 else
2464 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2465 for (s = 1; s < e; s++)
2466 {
2467 for (r = 0; r < e; r++)
2468 {
2469 /* set s consecutive bits to 1 (s < 64) */
2470 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2471 /* rotate right by r */
2472 if (r != 0)
2473 imm = ((imm >> r) | (imm << (e - r))) & mask;
2474 /* replicate the constant depending on SIMD size */
2475 switch (log_e) {
2476 case 1: imm |= (imm << 2);
2477 case 2: imm |= (imm << 4);
2478 case 3: imm |= (imm << 8);
2479 case 4: imm |= (imm << 16);
2480 case 5: imm |= (imm << 32);
2481 case 6:
2482 break;
2483 default:
2484 gcc_unreachable ();
2485 }
2486 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2487 aarch64_bitmasks[nimms++] = imm;
2488 }
2489 }
2490 }
2491
2492 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2493 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2494 aarch64_bitmasks_cmp);
2495 }
2496
2497
2498 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2499 a left shift of 0 or 12 bits. */
2500 bool
aarch64_uimm12_shift(HOST_WIDE_INT val)2501 aarch64_uimm12_shift (HOST_WIDE_INT val)
2502 {
2503 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2504 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2505 );
2506 }
2507
2508
2509 /* Return true if val is an immediate that can be loaded into a
2510 register by a MOVZ instruction. */
2511 static bool
aarch64_movw_imm(HOST_WIDE_INT val,enum machine_mode mode)2512 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2513 {
2514 if (GET_MODE_SIZE (mode) > 4)
2515 {
2516 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2517 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2518 return 1;
2519 }
2520 else
2521 {
2522 /* Ignore sign extension. */
2523 val &= (HOST_WIDE_INT) 0xffffffff;
2524 }
2525 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2526 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2527 }
2528
2529
2530 /* Return true if val is a valid bitmask immediate. */
2531 bool
aarch64_bitmask_imm(HOST_WIDE_INT val,enum machine_mode mode)2532 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2533 {
2534 if (GET_MODE_SIZE (mode) < 8)
2535 {
2536 /* Replicate bit pattern. */
2537 val &= (HOST_WIDE_INT) 0xffffffff;
2538 val |= val << 32;
2539 }
2540 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2541 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2542 }
2543
2544
2545 /* Return true if val is an immediate that can be loaded into a
2546 register in a single instruction. */
2547 bool
aarch64_move_imm(HOST_WIDE_INT val,enum machine_mode mode)2548 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2549 {
2550 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2551 return 1;
2552 return aarch64_bitmask_imm (val, mode);
2553 }
2554
2555 static bool
aarch64_cannot_force_const_mem(enum machine_mode mode ATTRIBUTE_UNUSED,rtx x)2556 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2557 {
2558 rtx base, offset;
2559 if (GET_CODE (x) == HIGH)
2560 return true;
2561
2562 split_const (x, &base, &offset);
2563 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2564 return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2565
2566 return aarch64_tls_referenced_p (x);
2567 }
2568
2569 /* Return true if register REGNO is a valid index register.
2570 STRICT_P is true if REG_OK_STRICT is in effect. */
2571
2572 bool
aarch64_regno_ok_for_index_p(int regno,bool strict_p)2573 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2574 {
2575 if (!HARD_REGISTER_NUM_P (regno))
2576 {
2577 if (!strict_p)
2578 return true;
2579
2580 if (!reg_renumber)
2581 return false;
2582
2583 regno = reg_renumber[regno];
2584 }
2585 return GP_REGNUM_P (regno);
2586 }
2587
2588 /* Return true if register REGNO is a valid base register for mode MODE.
2589 STRICT_P is true if REG_OK_STRICT is in effect. */
2590
2591 bool
aarch64_regno_ok_for_base_p(int regno,bool strict_p)2592 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2593 {
2594 if (!HARD_REGISTER_NUM_P (regno))
2595 {
2596 if (!strict_p)
2597 return true;
2598
2599 if (!reg_renumber)
2600 return false;
2601
2602 regno = reg_renumber[regno];
2603 }
2604
2605 /* The fake registers will be eliminated to either the stack or
2606 hard frame pointer, both of which are usually valid base registers.
2607 Reload deals with the cases where the eliminated form isn't valid. */
2608 return (GP_REGNUM_P (regno)
2609 || regno == SP_REGNUM
2610 || regno == FRAME_POINTER_REGNUM
2611 || regno == ARG_POINTER_REGNUM);
2612 }
2613
2614 /* Return true if X is a valid base register for mode MODE.
2615 STRICT_P is true if REG_OK_STRICT is in effect. */
2616
2617 static bool
aarch64_base_register_rtx_p(rtx x,bool strict_p)2618 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2619 {
2620 if (!strict_p && GET_CODE (x) == SUBREG)
2621 x = SUBREG_REG (x);
2622
2623 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2624 }
2625
2626 /* Return true if address offset is a valid index. If it is, fill in INFO
2627 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2628
2629 static bool
aarch64_classify_index(struct aarch64_address_info * info,rtx x,enum machine_mode mode,bool strict_p)2630 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2631 enum machine_mode mode, bool strict_p)
2632 {
2633 enum aarch64_address_type type;
2634 rtx index;
2635 int shift;
2636
2637 /* (reg:P) */
2638 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2639 && GET_MODE (x) == Pmode)
2640 {
2641 type = ADDRESS_REG_REG;
2642 index = x;
2643 shift = 0;
2644 }
2645 /* (sign_extend:DI (reg:SI)) */
2646 else if ((GET_CODE (x) == SIGN_EXTEND
2647 || GET_CODE (x) == ZERO_EXTEND)
2648 && GET_MODE (x) == DImode
2649 && GET_MODE (XEXP (x, 0)) == SImode)
2650 {
2651 type = (GET_CODE (x) == SIGN_EXTEND)
2652 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2653 index = XEXP (x, 0);
2654 shift = 0;
2655 }
2656 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2657 else if (GET_CODE (x) == MULT
2658 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2659 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2660 && GET_MODE (XEXP (x, 0)) == DImode
2661 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2662 && CONST_INT_P (XEXP (x, 1)))
2663 {
2664 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2665 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2666 index = XEXP (XEXP (x, 0), 0);
2667 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2668 }
2669 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2670 else if (GET_CODE (x) == ASHIFT
2671 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2672 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2673 && GET_MODE (XEXP (x, 0)) == DImode
2674 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2675 && CONST_INT_P (XEXP (x, 1)))
2676 {
2677 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2678 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2679 index = XEXP (XEXP (x, 0), 0);
2680 shift = INTVAL (XEXP (x, 1));
2681 }
2682 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2683 else if ((GET_CODE (x) == SIGN_EXTRACT
2684 || GET_CODE (x) == ZERO_EXTRACT)
2685 && GET_MODE (x) == DImode
2686 && GET_CODE (XEXP (x, 0)) == MULT
2687 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2688 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2689 {
2690 type = (GET_CODE (x) == SIGN_EXTRACT)
2691 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2692 index = XEXP (XEXP (x, 0), 0);
2693 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2694 if (INTVAL (XEXP (x, 1)) != 32 + shift
2695 || INTVAL (XEXP (x, 2)) != 0)
2696 shift = -1;
2697 }
2698 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2699 (const_int 0xffffffff<<shift)) */
2700 else if (GET_CODE (x) == AND
2701 && GET_MODE (x) == DImode
2702 && GET_CODE (XEXP (x, 0)) == MULT
2703 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2704 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2705 && CONST_INT_P (XEXP (x, 1)))
2706 {
2707 type = ADDRESS_REG_UXTW;
2708 index = XEXP (XEXP (x, 0), 0);
2709 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2710 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2711 shift = -1;
2712 }
2713 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2714 else if ((GET_CODE (x) == SIGN_EXTRACT
2715 || GET_CODE (x) == ZERO_EXTRACT)
2716 && GET_MODE (x) == DImode
2717 && GET_CODE (XEXP (x, 0)) == ASHIFT
2718 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2719 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2720 {
2721 type = (GET_CODE (x) == SIGN_EXTRACT)
2722 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2723 index = XEXP (XEXP (x, 0), 0);
2724 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2725 if (INTVAL (XEXP (x, 1)) != 32 + shift
2726 || INTVAL (XEXP (x, 2)) != 0)
2727 shift = -1;
2728 }
2729 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2730 (const_int 0xffffffff<<shift)) */
2731 else if (GET_CODE (x) == AND
2732 && GET_MODE (x) == DImode
2733 && GET_CODE (XEXP (x, 0)) == ASHIFT
2734 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2735 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2736 && CONST_INT_P (XEXP (x, 1)))
2737 {
2738 type = ADDRESS_REG_UXTW;
2739 index = XEXP (XEXP (x, 0), 0);
2740 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2741 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2742 shift = -1;
2743 }
2744 /* (mult:P (reg:P) (const_int scale)) */
2745 else if (GET_CODE (x) == MULT
2746 && GET_MODE (x) == Pmode
2747 && GET_MODE (XEXP (x, 0)) == Pmode
2748 && CONST_INT_P (XEXP (x, 1)))
2749 {
2750 type = ADDRESS_REG_REG;
2751 index = XEXP (x, 0);
2752 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2753 }
2754 /* (ashift:P (reg:P) (const_int shift)) */
2755 else if (GET_CODE (x) == ASHIFT
2756 && GET_MODE (x) == Pmode
2757 && GET_MODE (XEXP (x, 0)) == Pmode
2758 && CONST_INT_P (XEXP (x, 1)))
2759 {
2760 type = ADDRESS_REG_REG;
2761 index = XEXP (x, 0);
2762 shift = INTVAL (XEXP (x, 1));
2763 }
2764 else
2765 return false;
2766
2767 if (GET_CODE (index) == SUBREG)
2768 index = SUBREG_REG (index);
2769
2770 if ((shift == 0 ||
2771 (shift > 0 && shift <= 3
2772 && (1 << shift) == GET_MODE_SIZE (mode)))
2773 && REG_P (index)
2774 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2775 {
2776 info->type = type;
2777 info->offset = index;
2778 info->shift = shift;
2779 return true;
2780 }
2781
2782 return false;
2783 }
2784
2785 static inline bool
offset_7bit_signed_scaled_p(enum machine_mode mode,HOST_WIDE_INT offset)2786 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2787 {
2788 return (offset >= -64 * GET_MODE_SIZE (mode)
2789 && offset < 64 * GET_MODE_SIZE (mode)
2790 && offset % GET_MODE_SIZE (mode) == 0);
2791 }
2792
2793 static inline bool
offset_9bit_signed_unscaled_p(enum machine_mode mode ATTRIBUTE_UNUSED,HOST_WIDE_INT offset)2794 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2795 HOST_WIDE_INT offset)
2796 {
2797 return offset >= -256 && offset < 256;
2798 }
2799
2800 static inline bool
offset_12bit_unsigned_scaled_p(enum machine_mode mode,HOST_WIDE_INT offset)2801 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2802 {
2803 return (offset >= 0
2804 && offset < 4096 * GET_MODE_SIZE (mode)
2805 && offset % GET_MODE_SIZE (mode) == 0);
2806 }
2807
2808 /* Return true if X is a valid address for machine mode MODE. If it is,
2809 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2810 effect. OUTER_CODE is PARALLEL for a load/store pair. */
2811
2812 static bool
aarch64_classify_address(struct aarch64_address_info * info,rtx x,enum machine_mode mode,RTX_CODE outer_code,bool strict_p)2813 aarch64_classify_address (struct aarch64_address_info *info,
2814 rtx x, enum machine_mode mode,
2815 RTX_CODE outer_code, bool strict_p)
2816 {
2817 enum rtx_code code = GET_CODE (x);
2818 rtx op0, op1;
2819 bool allow_reg_index_p =
2820 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2821
2822 /* Don't support anything other than POST_INC or REG addressing for
2823 AdvSIMD. */
2824 if (aarch64_vector_mode_p (mode)
2825 && (code != POST_INC && code != REG))
2826 return false;
2827
2828 switch (code)
2829 {
2830 case REG:
2831 case SUBREG:
2832 info->type = ADDRESS_REG_IMM;
2833 info->base = x;
2834 info->offset = const0_rtx;
2835 return aarch64_base_register_rtx_p (x, strict_p);
2836
2837 case PLUS:
2838 op0 = XEXP (x, 0);
2839 op1 = XEXP (x, 1);
2840 if (GET_MODE_SIZE (mode) != 0
2841 && CONST_INT_P (op1)
2842 && aarch64_base_register_rtx_p (op0, strict_p))
2843 {
2844 HOST_WIDE_INT offset = INTVAL (op1);
2845
2846 info->type = ADDRESS_REG_IMM;
2847 info->base = op0;
2848 info->offset = op1;
2849
2850 /* TImode and TFmode values are allowed in both pairs of X
2851 registers and individual Q registers. The available
2852 address modes are:
2853 X,X: 7-bit signed scaled offset
2854 Q: 9-bit signed offset
2855 We conservatively require an offset representable in either mode.
2856 */
2857 if (mode == TImode || mode == TFmode)
2858 return (offset_7bit_signed_scaled_p (mode, offset)
2859 && offset_9bit_signed_unscaled_p (mode, offset));
2860
2861 if (outer_code == PARALLEL)
2862 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2863 && offset_7bit_signed_scaled_p (mode, offset));
2864 else
2865 return (offset_9bit_signed_unscaled_p (mode, offset)
2866 || offset_12bit_unsigned_scaled_p (mode, offset));
2867 }
2868
2869 if (allow_reg_index_p)
2870 {
2871 /* Look for base + (scaled/extended) index register. */
2872 if (aarch64_base_register_rtx_p (op0, strict_p)
2873 && aarch64_classify_index (info, op1, mode, strict_p))
2874 {
2875 info->base = op0;
2876 return true;
2877 }
2878 if (aarch64_base_register_rtx_p (op1, strict_p)
2879 && aarch64_classify_index (info, op0, mode, strict_p))
2880 {
2881 info->base = op1;
2882 return true;
2883 }
2884 }
2885
2886 return false;
2887
2888 case POST_INC:
2889 case POST_DEC:
2890 case PRE_INC:
2891 case PRE_DEC:
2892 info->type = ADDRESS_REG_WB;
2893 info->base = XEXP (x, 0);
2894 info->offset = NULL_RTX;
2895 return aarch64_base_register_rtx_p (info->base, strict_p);
2896
2897 case POST_MODIFY:
2898 case PRE_MODIFY:
2899 info->type = ADDRESS_REG_WB;
2900 info->base = XEXP (x, 0);
2901 if (GET_CODE (XEXP (x, 1)) == PLUS
2902 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
2903 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
2904 && aarch64_base_register_rtx_p (info->base, strict_p))
2905 {
2906 HOST_WIDE_INT offset;
2907 info->offset = XEXP (XEXP (x, 1), 1);
2908 offset = INTVAL (info->offset);
2909
2910 /* TImode and TFmode values are allowed in both pairs of X
2911 registers and individual Q registers. The available
2912 address modes are:
2913 X,X: 7-bit signed scaled offset
2914 Q: 9-bit signed offset
2915 We conservatively require an offset representable in either mode.
2916 */
2917 if (mode == TImode || mode == TFmode)
2918 return (offset_7bit_signed_scaled_p (mode, offset)
2919 && offset_9bit_signed_unscaled_p (mode, offset));
2920
2921 if (outer_code == PARALLEL)
2922 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2923 && offset_7bit_signed_scaled_p (mode, offset));
2924 else
2925 return offset_9bit_signed_unscaled_p (mode, offset);
2926 }
2927 return false;
2928
2929 case CONST:
2930 case SYMBOL_REF:
2931 case LABEL_REF:
2932 /* load literal: pc-relative constant pool entry. Only supported
2933 for SI mode or larger. */
2934 info->type = ADDRESS_SYMBOLIC;
2935 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
2936 {
2937 rtx sym, addend;
2938
2939 split_const (x, &sym, &addend);
2940 return (GET_CODE (sym) == LABEL_REF
2941 || (GET_CODE (sym) == SYMBOL_REF
2942 && CONSTANT_POOL_ADDRESS_P (sym)));
2943 }
2944 return false;
2945
2946 case LO_SUM:
2947 info->type = ADDRESS_LO_SUM;
2948 info->base = XEXP (x, 0);
2949 info->offset = XEXP (x, 1);
2950 if (allow_reg_index_p
2951 && aarch64_base_register_rtx_p (info->base, strict_p))
2952 {
2953 rtx sym, offs;
2954 split_const (info->offset, &sym, &offs);
2955 if (GET_CODE (sym) == SYMBOL_REF
2956 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
2957 == SYMBOL_SMALL_ABSOLUTE))
2958 {
2959 /* The symbol and offset must be aligned to the access size. */
2960 unsigned int align;
2961 unsigned int ref_size;
2962
2963 if (CONSTANT_POOL_ADDRESS_P (sym))
2964 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
2965 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
2966 {
2967 tree exp = SYMBOL_REF_DECL (sym);
2968 align = TYPE_ALIGN (TREE_TYPE (exp));
2969 align = CONSTANT_ALIGNMENT (exp, align);
2970 }
2971 else if (SYMBOL_REF_DECL (sym))
2972 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
2973 else
2974 align = BITS_PER_UNIT;
2975
2976 ref_size = GET_MODE_SIZE (mode);
2977 if (ref_size == 0)
2978 ref_size = GET_MODE_SIZE (DImode);
2979
2980 return ((INTVAL (offs) & (ref_size - 1)) == 0
2981 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
2982 }
2983 }
2984 return false;
2985
2986 default:
2987 return false;
2988 }
2989 }
2990
2991 bool
aarch64_symbolic_address_p(rtx x)2992 aarch64_symbolic_address_p (rtx x)
2993 {
2994 rtx offset;
2995
2996 split_const (x, &x, &offset);
2997 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
2998 }
2999
3000 /* Classify the base of symbolic expression X, given that X appears in
3001 context CONTEXT. */
3002 static enum aarch64_symbol_type
aarch64_classify_symbolic_expression(rtx x,enum aarch64_symbol_context context)3003 aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
3004 {
3005 rtx offset;
3006 split_const (x, &x, &offset);
3007 return aarch64_classify_symbol (x, context);
3008 }
3009
3010
3011 /* Return TRUE if X is a legitimate address for accessing memory in
3012 mode MODE. */
3013 static bool
aarch64_legitimate_address_hook_p(enum machine_mode mode,rtx x,bool strict_p)3014 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3015 {
3016 struct aarch64_address_info addr;
3017
3018 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3019 }
3020
3021 /* Return TRUE if X is a legitimate address for accessing memory in
3022 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3023 pair operation. */
3024 bool
aarch64_legitimate_address_p(enum machine_mode mode,rtx x,RTX_CODE outer_code,bool strict_p)3025 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3026 RTX_CODE outer_code, bool strict_p)
3027 {
3028 struct aarch64_address_info addr;
3029
3030 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3031 }
3032
3033 /* Return TRUE if rtx X is immediate constant 0.0 */
3034 bool
aarch64_float_const_zero_rtx_p(rtx x)3035 aarch64_float_const_zero_rtx_p (rtx x)
3036 {
3037 REAL_VALUE_TYPE r;
3038
3039 if (GET_MODE (x) == VOIDmode)
3040 return false;
3041
3042 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3043 if (REAL_VALUE_MINUS_ZERO (r))
3044 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3045 return REAL_VALUES_EQUAL (r, dconst0);
3046 }
3047
3048 /* Return the fixed registers used for condition codes. */
3049
3050 static bool
aarch64_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)3051 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3052 {
3053 *p1 = CC_REGNUM;
3054 *p2 = INVALID_REGNUM;
3055 return true;
3056 }
3057
3058 enum machine_mode
aarch64_select_cc_mode(RTX_CODE code,rtx x,rtx y)3059 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3060 {
3061 /* All floating point compares return CCFP if it is an equality
3062 comparison, and CCFPE otherwise. */
3063 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3064 {
3065 switch (code)
3066 {
3067 case EQ:
3068 case NE:
3069 case UNORDERED:
3070 case ORDERED:
3071 case UNLT:
3072 case UNLE:
3073 case UNGT:
3074 case UNGE:
3075 case UNEQ:
3076 case LTGT:
3077 return CCFPmode;
3078
3079 case LT:
3080 case LE:
3081 case GT:
3082 case GE:
3083 return CCFPEmode;
3084
3085 default:
3086 gcc_unreachable ();
3087 }
3088 }
3089
3090 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3091 && y == const0_rtx
3092 && (code == EQ || code == NE || code == LT || code == GE)
3093 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND))
3094 return CC_NZmode;
3095
3096 /* A compare with a shifted operand. Because of canonicalization,
3097 the comparison will have to be swapped when we emit the assembly
3098 code. */
3099 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3100 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3101 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3102 || GET_CODE (x) == LSHIFTRT
3103 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3104 return CC_SWPmode;
3105
3106 /* A compare of a mode narrower than SI mode against zero can be done
3107 by extending the value in the comparison. */
3108 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3109 && y == const0_rtx)
3110 /* Only use sign-extension if we really need it. */
3111 return ((code == GT || code == GE || code == LE || code == LT)
3112 ? CC_SESWPmode : CC_ZESWPmode);
3113
3114 /* For everything else, return CCmode. */
3115 return CCmode;
3116 }
3117
3118 static unsigned
aarch64_get_condition_code(rtx x)3119 aarch64_get_condition_code (rtx x)
3120 {
3121 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3122 enum rtx_code comp_code = GET_CODE (x);
3123
3124 if (GET_MODE_CLASS (mode) != MODE_CC)
3125 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3126
3127 switch (mode)
3128 {
3129 case CCFPmode:
3130 case CCFPEmode:
3131 switch (comp_code)
3132 {
3133 case GE: return AARCH64_GE;
3134 case GT: return AARCH64_GT;
3135 case LE: return AARCH64_LS;
3136 case LT: return AARCH64_MI;
3137 case NE: return AARCH64_NE;
3138 case EQ: return AARCH64_EQ;
3139 case ORDERED: return AARCH64_VC;
3140 case UNORDERED: return AARCH64_VS;
3141 case UNLT: return AARCH64_LT;
3142 case UNLE: return AARCH64_LE;
3143 case UNGT: return AARCH64_HI;
3144 case UNGE: return AARCH64_PL;
3145 default: gcc_unreachable ();
3146 }
3147 break;
3148
3149 case CCmode:
3150 switch (comp_code)
3151 {
3152 case NE: return AARCH64_NE;
3153 case EQ: return AARCH64_EQ;
3154 case GE: return AARCH64_GE;
3155 case GT: return AARCH64_GT;
3156 case LE: return AARCH64_LE;
3157 case LT: return AARCH64_LT;
3158 case GEU: return AARCH64_CS;
3159 case GTU: return AARCH64_HI;
3160 case LEU: return AARCH64_LS;
3161 case LTU: return AARCH64_CC;
3162 default: gcc_unreachable ();
3163 }
3164 break;
3165
3166 case CC_SWPmode:
3167 case CC_ZESWPmode:
3168 case CC_SESWPmode:
3169 switch (comp_code)
3170 {
3171 case NE: return AARCH64_NE;
3172 case EQ: return AARCH64_EQ;
3173 case GE: return AARCH64_LE;
3174 case GT: return AARCH64_LT;
3175 case LE: return AARCH64_GE;
3176 case LT: return AARCH64_GT;
3177 case GEU: return AARCH64_LS;
3178 case GTU: return AARCH64_CC;
3179 case LEU: return AARCH64_CS;
3180 case LTU: return AARCH64_HI;
3181 default: gcc_unreachable ();
3182 }
3183 break;
3184
3185 case CC_NZmode:
3186 switch (comp_code)
3187 {
3188 case NE: return AARCH64_NE;
3189 case EQ: return AARCH64_EQ;
3190 case GE: return AARCH64_PL;
3191 case LT: return AARCH64_MI;
3192 default: gcc_unreachable ();
3193 }
3194 break;
3195
3196 default:
3197 gcc_unreachable ();
3198 break;
3199 }
3200 }
3201
3202 static unsigned
bit_count(unsigned HOST_WIDE_INT value)3203 bit_count (unsigned HOST_WIDE_INT value)
3204 {
3205 unsigned count = 0;
3206
3207 while (value)
3208 {
3209 count++;
3210 value &= value - 1;
3211 }
3212
3213 return count;
3214 }
3215
3216 void
aarch64_print_operand(FILE * f,rtx x,char code)3217 aarch64_print_operand (FILE *f, rtx x, char code)
3218 {
3219 switch (code)
3220 {
3221 case 'e':
3222 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3223 {
3224 int n;
3225
3226 if (GET_CODE (x) != CONST_INT
3227 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3228 {
3229 output_operand_lossage ("invalid operand for '%%%c'", code);
3230 return;
3231 }
3232
3233 switch (n)
3234 {
3235 case 3:
3236 fputc ('b', f);
3237 break;
3238 case 4:
3239 fputc ('h', f);
3240 break;
3241 case 5:
3242 fputc ('w', f);
3243 break;
3244 default:
3245 output_operand_lossage ("invalid operand for '%%%c'", code);
3246 return;
3247 }
3248 }
3249 break;
3250
3251 case 'p':
3252 {
3253 int n;
3254
3255 /* Print N such that 2^N == X. */
3256 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3257 {
3258 output_operand_lossage ("invalid operand for '%%%c'", code);
3259 return;
3260 }
3261
3262 asm_fprintf (f, "%d", n);
3263 }
3264 break;
3265
3266 case 'P':
3267 /* Print the number of non-zero bits in X (a const_int). */
3268 if (GET_CODE (x) != CONST_INT)
3269 {
3270 output_operand_lossage ("invalid operand for '%%%c'", code);
3271 return;
3272 }
3273
3274 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3275 break;
3276
3277 case 'H':
3278 /* Print the higher numbered register of a pair (TImode) of regs. */
3279 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3280 {
3281 output_operand_lossage ("invalid operand for '%%%c'", code);
3282 return;
3283 }
3284
3285 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3286 break;
3287
3288 case 'Q':
3289 /* Print the least significant register of a pair (TImode) of regs. */
3290 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3291 {
3292 output_operand_lossage ("invalid operand for '%%%c'", code);
3293 return;
3294 }
3295 asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)]);
3296 break;
3297
3298 case 'R':
3299 /* Print the most significant register of a pair (TImode) of regs. */
3300 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3301 {
3302 output_operand_lossage ("invalid operand for '%%%c'", code);
3303 return;
3304 }
3305 asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
3306 break;
3307
3308 case 'm':
3309 /* Print a condition (eq, ne, etc). */
3310
3311 /* CONST_TRUE_RTX means always -- that's the default. */
3312 if (x == const_true_rtx)
3313 return;
3314
3315 if (!COMPARISON_P (x))
3316 {
3317 output_operand_lossage ("invalid operand for '%%%c'", code);
3318 return;
3319 }
3320
3321 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3322 break;
3323
3324 case 'M':
3325 /* Print the inverse of a condition (eq <-> ne, etc). */
3326
3327 /* CONST_TRUE_RTX means never -- that's the default. */
3328 if (x == const_true_rtx)
3329 {
3330 fputs ("nv", f);
3331 return;
3332 }
3333
3334 if (!COMPARISON_P (x))
3335 {
3336 output_operand_lossage ("invalid operand for '%%%c'", code);
3337 return;
3338 }
3339
3340 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3341 (aarch64_get_condition_code (x))], f);
3342 break;
3343
3344 case 'b':
3345 case 'h':
3346 case 's':
3347 case 'd':
3348 case 'q':
3349 /* Print a scalar FP/SIMD register name. */
3350 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3351 {
3352 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3353 return;
3354 }
3355 asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM);
3356 break;
3357
3358 case 'S':
3359 case 'T':
3360 case 'U':
3361 case 'V':
3362 /* Print the first FP/SIMD register name in a list. */
3363 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3364 {
3365 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3366 return;
3367 }
3368 asm_fprintf (f, "%sv%d", REGISTER_PREFIX,
3369 REGNO (x) - V0_REGNUM + (code - 'S'));
3370 break;
3371
3372 case 'X':
3373 /* Print integer constant in hex. */
3374 if (GET_CODE (x) != CONST_INT)
3375 {
3376 output_operand_lossage ("invalid operand for '%%%c'", code);
3377 return;
3378 }
3379 asm_fprintf (f, "0x%wx", UINTVAL (x));
3380 break;
3381
3382 case 'w':
3383 case 'x':
3384 /* Print a general register name or the zero register (32-bit or
3385 64-bit). */
3386 if (x == const0_rtx
3387 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3388 {
3389 asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
3390 break;
3391 }
3392
3393 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3394 {
3395 asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code,
3396 REGNO (x) - R0_REGNUM);
3397 break;
3398 }
3399
3400 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3401 {
3402 asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : "");
3403 break;
3404 }
3405
3406 /* Fall through */
3407
3408 case 0:
3409 /* Print a normal operand, if it's a general register, then we
3410 assume DImode. */
3411 if (x == NULL)
3412 {
3413 output_operand_lossage ("missing operand");
3414 return;
3415 }
3416
3417 switch (GET_CODE (x))
3418 {
3419 case REG:
3420 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3421 break;
3422
3423 case MEM:
3424 aarch64_memory_reference_mode = GET_MODE (x);
3425 output_address (XEXP (x, 0));
3426 break;
3427
3428 case LABEL_REF:
3429 case SYMBOL_REF:
3430 output_addr_const (asm_out_file, x);
3431 break;
3432
3433 case CONST_INT:
3434 asm_fprintf (f, "%wd", INTVAL (x));
3435 break;
3436
3437 case CONST_VECTOR:
3438 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3439 {
3440 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3441 HOST_WIDE_INT_MIN,
3442 HOST_WIDE_INT_MAX));
3443 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3444 }
3445 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3446 {
3447 fputc ('0', f);
3448 }
3449 else
3450 gcc_unreachable ();
3451 break;
3452
3453 case CONST_DOUBLE:
3454 /* CONST_DOUBLE can represent a double-width integer.
3455 In this case, the mode of x is VOIDmode. */
3456 if (GET_MODE (x) == VOIDmode)
3457 ; /* Do Nothing. */
3458 else if (aarch64_float_const_zero_rtx_p (x))
3459 {
3460 fputc ('0', f);
3461 break;
3462 }
3463 else if (aarch64_float_const_representable_p (x))
3464 {
3465 #define buf_size 20
3466 char float_buf[buf_size] = {'\0'};
3467 REAL_VALUE_TYPE r;
3468 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3469 real_to_decimal_for_mode (float_buf, &r,
3470 buf_size, buf_size,
3471 1, GET_MODE (x));
3472 asm_fprintf (asm_out_file, "%s", float_buf);
3473 break;
3474 #undef buf_size
3475 }
3476 output_operand_lossage ("invalid constant");
3477 return;
3478 default:
3479 output_operand_lossage ("invalid operand");
3480 return;
3481 }
3482 break;
3483
3484 case 'A':
3485 if (GET_CODE (x) == HIGH)
3486 x = XEXP (x, 0);
3487
3488 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3489 {
3490 case SYMBOL_SMALL_GOT:
3491 asm_fprintf (asm_out_file, ":got:");
3492 break;
3493
3494 case SYMBOL_SMALL_TLSGD:
3495 asm_fprintf (asm_out_file, ":tlsgd:");
3496 break;
3497
3498 case SYMBOL_SMALL_TLSDESC:
3499 asm_fprintf (asm_out_file, ":tlsdesc:");
3500 break;
3501
3502 case SYMBOL_SMALL_GOTTPREL:
3503 asm_fprintf (asm_out_file, ":gottprel:");
3504 break;
3505
3506 case SYMBOL_SMALL_TPREL:
3507 asm_fprintf (asm_out_file, ":tprel:");
3508 break;
3509
3510 default:
3511 break;
3512 }
3513 output_addr_const (asm_out_file, x);
3514 break;
3515
3516 case 'L':
3517 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3518 {
3519 case SYMBOL_SMALL_GOT:
3520 asm_fprintf (asm_out_file, ":lo12:");
3521 break;
3522
3523 case SYMBOL_SMALL_TLSGD:
3524 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3525 break;
3526
3527 case SYMBOL_SMALL_TLSDESC:
3528 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3529 break;
3530
3531 case SYMBOL_SMALL_GOTTPREL:
3532 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3533 break;
3534
3535 case SYMBOL_SMALL_TPREL:
3536 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3537 break;
3538
3539 default:
3540 break;
3541 }
3542 output_addr_const (asm_out_file, x);
3543 break;
3544
3545 case 'G':
3546
3547 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3548 {
3549 case SYMBOL_SMALL_TPREL:
3550 asm_fprintf (asm_out_file, ":tprel_hi12:");
3551 break;
3552 default:
3553 break;
3554 }
3555 output_addr_const (asm_out_file, x);
3556 break;
3557
3558 default:
3559 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3560 return;
3561 }
3562 }
3563
3564 void
aarch64_print_operand_address(FILE * f,rtx x)3565 aarch64_print_operand_address (FILE *f, rtx x)
3566 {
3567 struct aarch64_address_info addr;
3568
3569 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3570 MEM, true))
3571 switch (addr.type)
3572 {
3573 case ADDRESS_REG_IMM:
3574 if (addr.offset == const0_rtx)
3575 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3576 else
3577 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3578 INTVAL (addr.offset));
3579 return;
3580
3581 case ADDRESS_REG_REG:
3582 if (addr.shift == 0)
3583 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3584 reg_names [REGNO (addr.offset)]);
3585 else
3586 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3587 reg_names [REGNO (addr.offset)], addr.shift);
3588 return;
3589
3590 case ADDRESS_REG_UXTW:
3591 if (addr.shift == 0)
3592 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3593 REGNO (addr.offset) - R0_REGNUM);
3594 else
3595 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3596 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3597 return;
3598
3599 case ADDRESS_REG_SXTW:
3600 if (addr.shift == 0)
3601 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3602 REGNO (addr.offset) - R0_REGNUM);
3603 else
3604 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3605 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3606 return;
3607
3608 case ADDRESS_REG_WB:
3609 switch (GET_CODE (x))
3610 {
3611 case PRE_INC:
3612 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3613 GET_MODE_SIZE (aarch64_memory_reference_mode));
3614 return;
3615 case POST_INC:
3616 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3617 GET_MODE_SIZE (aarch64_memory_reference_mode));
3618 return;
3619 case PRE_DEC:
3620 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3621 GET_MODE_SIZE (aarch64_memory_reference_mode));
3622 return;
3623 case POST_DEC:
3624 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3625 GET_MODE_SIZE (aarch64_memory_reference_mode));
3626 return;
3627 case PRE_MODIFY:
3628 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3629 INTVAL (addr.offset));
3630 return;
3631 case POST_MODIFY:
3632 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3633 INTVAL (addr.offset));
3634 return;
3635 default:
3636 break;
3637 }
3638 break;
3639
3640 case ADDRESS_LO_SUM:
3641 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3642 output_addr_const (f, addr.offset);
3643 asm_fprintf (f, "]");
3644 return;
3645
3646 case ADDRESS_SYMBOLIC:
3647 break;
3648 }
3649
3650 output_addr_const (f, x);
3651 }
3652
3653 void
aarch64_function_profiler(FILE * f ATTRIBUTE_UNUSED,int labelno ATTRIBUTE_UNUSED)3654 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3655 int labelno ATTRIBUTE_UNUSED)
3656 {
3657 sorry ("function profiling");
3658 }
3659
3660 bool
aarch64_label_mentioned_p(rtx x)3661 aarch64_label_mentioned_p (rtx x)
3662 {
3663 const char *fmt;
3664 int i;
3665
3666 if (GET_CODE (x) == LABEL_REF)
3667 return true;
3668
3669 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3670 referencing instruction, but they are constant offsets, not
3671 symbols. */
3672 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3673 return false;
3674
3675 fmt = GET_RTX_FORMAT (GET_CODE (x));
3676 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3677 {
3678 if (fmt[i] == 'E')
3679 {
3680 int j;
3681
3682 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3683 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3684 return 1;
3685 }
3686 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3687 return 1;
3688 }
3689
3690 return 0;
3691 }
3692
3693 /* Implement REGNO_REG_CLASS. */
3694
3695 enum reg_class
aarch64_regno_regclass(unsigned regno)3696 aarch64_regno_regclass (unsigned regno)
3697 {
3698 if (GP_REGNUM_P (regno))
3699 return CORE_REGS;
3700
3701 if (regno == SP_REGNUM)
3702 return STACK_REG;
3703
3704 if (regno == FRAME_POINTER_REGNUM
3705 || regno == ARG_POINTER_REGNUM)
3706 return CORE_REGS;
3707
3708 if (FP_REGNUM_P (regno))
3709 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3710
3711 return NO_REGS;
3712 }
3713
3714 /* Try a machine-dependent way of reloading an illegitimate address
3715 operand. If we find one, push the reload and return the new rtx. */
3716
3717 rtx
aarch64_legitimize_reload_address(rtx * x_p,enum machine_mode mode,int opnum,int type,int ind_levels ATTRIBUTE_UNUSED)3718 aarch64_legitimize_reload_address (rtx *x_p,
3719 enum machine_mode mode,
3720 int opnum, int type,
3721 int ind_levels ATTRIBUTE_UNUSED)
3722 {
3723 rtx x = *x_p;
3724
3725 /* Do not allow mem (plus (reg, const)) if vector mode. */
3726 if (aarch64_vector_mode_p (mode)
3727 && GET_CODE (x) == PLUS
3728 && REG_P (XEXP (x, 0))
3729 && CONST_INT_P (XEXP (x, 1)))
3730 {
3731 rtx orig_rtx = x;
3732 x = copy_rtx (x);
3733 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3734 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3735 opnum, (enum reload_type) type);
3736 return x;
3737 }
3738
3739 /* We must recognize output that we have already generated ourselves. */
3740 if (GET_CODE (x) == PLUS
3741 && GET_CODE (XEXP (x, 0)) == PLUS
3742 && REG_P (XEXP (XEXP (x, 0), 0))
3743 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3744 && CONST_INT_P (XEXP (x, 1)))
3745 {
3746 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3747 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3748 opnum, (enum reload_type) type);
3749 return x;
3750 }
3751
3752 /* We wish to handle large displacements off a base register by splitting
3753 the addend across an add and the mem insn. This can cut the number of
3754 extra insns needed from 3 to 1. It is only useful for load/store of a
3755 single register with 12 bit offset field. */
3756 if (GET_CODE (x) == PLUS
3757 && REG_P (XEXP (x, 0))
3758 && CONST_INT_P (XEXP (x, 1))
3759 && HARD_REGISTER_P (XEXP (x, 0))
3760 && mode != TImode
3761 && mode != TFmode
3762 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3763 {
3764 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3765 HOST_WIDE_INT low = val & 0xfff;
3766 HOST_WIDE_INT high = val - low;
3767 HOST_WIDE_INT offs;
3768 rtx cst;
3769
3770 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3771 BLKmode alignment. */
3772 if (GET_MODE_SIZE (mode) == 0)
3773 return NULL_RTX;
3774
3775 offs = low % GET_MODE_SIZE (mode);
3776
3777 /* Align misaligned offset by adjusting high part to compensate. */
3778 if (offs != 0)
3779 {
3780 if (aarch64_uimm12_shift (high + offs))
3781 {
3782 /* Align down. */
3783 low = low - offs;
3784 high = high + offs;
3785 }
3786 else
3787 {
3788 /* Align up. */
3789 offs = GET_MODE_SIZE (mode) - offs;
3790 low = low + offs;
3791 high = high + (low & 0x1000) - offs;
3792 low &= 0xfff;
3793 }
3794 }
3795
3796 /* Check for overflow. */
3797 if (high + low != val)
3798 return NULL_RTX;
3799
3800 cst = GEN_INT (high);
3801 if (!aarch64_uimm12_shift (high))
3802 cst = force_const_mem (Pmode, cst);
3803
3804 /* Reload high part into base reg, leaving the low part
3805 in the mem instruction. */
3806 x = gen_rtx_PLUS (Pmode,
3807 gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3808 GEN_INT (low));
3809
3810 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3811 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3812 opnum, (enum reload_type) type);
3813 return x;
3814 }
3815
3816 return NULL_RTX;
3817 }
3818
3819
3820 static reg_class_t
aarch64_secondary_reload(bool in_p ATTRIBUTE_UNUSED,rtx x,reg_class_t rclass,enum machine_mode mode,secondary_reload_info * sri)3821 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3822 reg_class_t rclass,
3823 enum machine_mode mode,
3824 secondary_reload_info *sri)
3825 {
3826 /* Address expressions of the form PLUS (SP, large_offset) need two
3827 scratch registers, one for the constant, and one for holding a
3828 copy of SP, since SP cannot be used on the RHS of an add-reg
3829 instruction. */
3830 if (mode == DImode
3831 && GET_CODE (x) == PLUS
3832 && XEXP (x, 0) == stack_pointer_rtx
3833 && CONST_INT_P (XEXP (x, 1))
3834 && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3835 {
3836 sri->icode = CODE_FOR_reload_sp_immediate;
3837 return NO_REGS;
3838 }
3839
3840 /* Without the TARGET_SIMD instructions we cannot move a Q register
3841 to a Q register directly. We need a scratch. */
3842 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3843 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3844 && reg_class_subset_p (rclass, FP_REGS))
3845 {
3846 if (mode == TFmode)
3847 sri->icode = CODE_FOR_aarch64_reload_movtf;
3848 else if (mode == TImode)
3849 sri->icode = CODE_FOR_aarch64_reload_movti;
3850 return NO_REGS;
3851 }
3852
3853 /* A TFmode or TImode memory access should be handled via an FP_REGS
3854 because AArch64 has richer addressing modes for LDR/STR instructions
3855 than LDP/STP instructions. */
3856 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3857 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3858 return FP_REGS;
3859
3860 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3861 return CORE_REGS;
3862
3863 return NO_REGS;
3864 }
3865
3866 static bool
aarch64_can_eliminate(const int from,const int to)3867 aarch64_can_eliminate (const int from, const int to)
3868 {
3869 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3870 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
3871
3872 if (frame_pointer_needed)
3873 {
3874 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3875 return true;
3876 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3877 return false;
3878 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3879 && !cfun->calls_alloca)
3880 return true;
3881 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3882 return true;
3883 return false;
3884 }
3885 else
3886 {
3887 /* If we decided that we didn't need a leaf frame pointer but then used
3888 LR in the function, then we'll want a frame pointer after all, so
3889 prevent this elimination to ensure a frame pointer is used.
3890
3891 NOTE: the original value of flag_omit_frame_pointer gets trashed
3892 IFF flag_omit_leaf_frame_pointer is true, so we check the value
3893 of faked_omit_frame_pointer here (which is true when we always
3894 wish to keep non-leaf frame pointers but only wish to keep leaf frame
3895 pointers when LR is clobbered). */
3896 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3897 && df_regs_ever_live_p (LR_REGNUM)
3898 && faked_omit_frame_pointer)
3899 return false;
3900 }
3901
3902 return true;
3903 }
3904
3905 HOST_WIDE_INT
aarch64_initial_elimination_offset(unsigned from,unsigned to)3906 aarch64_initial_elimination_offset (unsigned from, unsigned to)
3907 {
3908 HOST_WIDE_INT frame_size;
3909 HOST_WIDE_INT offset;
3910
3911 aarch64_layout_frame ();
3912 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
3913 + crtl->outgoing_args_size
3914 + cfun->machine->saved_varargs_size);
3915
3916 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
3917 offset = frame_size;
3918
3919 if (to == HARD_FRAME_POINTER_REGNUM)
3920 {
3921 if (from == ARG_POINTER_REGNUM)
3922 return offset - crtl->outgoing_args_size;
3923
3924 if (from == FRAME_POINTER_REGNUM)
3925 return cfun->machine->frame.saved_regs_size;
3926 }
3927
3928 if (to == STACK_POINTER_REGNUM)
3929 {
3930 if (from == FRAME_POINTER_REGNUM)
3931 {
3932 HOST_WIDE_INT elim = crtl->outgoing_args_size
3933 + cfun->machine->frame.saved_regs_size
3934 - cfun->machine->frame.fp_lr_offset;
3935 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
3936 return elim;
3937 }
3938 }
3939
3940 return offset;
3941 }
3942
3943
3944 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
3945 previous frame. */
3946
3947 rtx
aarch64_return_addr(int count,rtx frame ATTRIBUTE_UNUSED)3948 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
3949 {
3950 if (count != 0)
3951 return const0_rtx;
3952 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
3953 }
3954
3955
3956 static void
aarch64_asm_trampoline_template(FILE * f)3957 aarch64_asm_trampoline_template (FILE *f)
3958 {
3959 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
3960 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
3961 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
3962 assemble_aligned_integer (4, const0_rtx);
3963 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3964 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3965 }
3966
3967 unsigned
aarch64_trampoline_size(void)3968 aarch64_trampoline_size (void)
3969 {
3970 return 32; /* 3 insns + padding + 2 dwords. */
3971 }
3972
3973 static void
aarch64_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)3974 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3975 {
3976 rtx fnaddr, mem, a_tramp;
3977
3978 /* Don't need to copy the trailing D-words, we fill those in below. */
3979 emit_block_move (m_tramp, assemble_trampoline_template (),
3980 GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
3981 mem = adjust_address (m_tramp, DImode, 16);
3982 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3983 emit_move_insn (mem, fnaddr);
3984
3985 mem = adjust_address (m_tramp, DImode, 24);
3986 emit_move_insn (mem, chain_value);
3987
3988 /* XXX We should really define a "clear_cache" pattern and use
3989 gen_clear_cache(). */
3990 a_tramp = XEXP (m_tramp, 0);
3991 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3992 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3993 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3994 }
3995
3996 static unsigned char
aarch64_class_max_nregs(reg_class_t regclass,enum machine_mode mode)3997 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
3998 {
3999 switch (regclass)
4000 {
4001 case CORE_REGS:
4002 case POINTER_REGS:
4003 case GENERAL_REGS:
4004 case ALL_REGS:
4005 case FP_REGS:
4006 case FP_LO_REGS:
4007 return
4008 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4009 (GET_MODE_SIZE (mode) + 7) / 8;
4010 case STACK_REG:
4011 return 1;
4012
4013 case NO_REGS:
4014 return 0;
4015
4016 default:
4017 break;
4018 }
4019 gcc_unreachable ();
4020 }
4021
4022 static reg_class_t
aarch64_preferred_reload_class(rtx x ATTRIBUTE_UNUSED,reg_class_t regclass)4023 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4024 {
4025 return ((regclass == POINTER_REGS || regclass == STACK_REG)
4026 ? GENERAL_REGS : regclass);
4027 }
4028
4029 void
aarch64_asm_output_labelref(FILE * f,const char * name)4030 aarch64_asm_output_labelref (FILE* f, const char *name)
4031 {
4032 asm_fprintf (f, "%U%s", name);
4033 }
4034
4035 static void
aarch64_elf_asm_constructor(rtx symbol,int priority)4036 aarch64_elf_asm_constructor (rtx symbol, int priority)
4037 {
4038 if (priority == DEFAULT_INIT_PRIORITY)
4039 default_ctor_section_asm_out_constructor (symbol, priority);
4040 else
4041 {
4042 section *s;
4043 char buf[18];
4044 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4045 s = get_section (buf, SECTION_WRITE, NULL);
4046 switch_to_section (s);
4047 assemble_align (POINTER_SIZE);
4048 fputs ("\t.dword\t", asm_out_file);
4049 output_addr_const (asm_out_file, symbol);
4050 fputc ('\n', asm_out_file);
4051 }
4052 }
4053
4054 static void
aarch64_elf_asm_destructor(rtx symbol,int priority)4055 aarch64_elf_asm_destructor (rtx symbol, int priority)
4056 {
4057 if (priority == DEFAULT_INIT_PRIORITY)
4058 default_dtor_section_asm_out_destructor (symbol, priority);
4059 else
4060 {
4061 section *s;
4062 char buf[18];
4063 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4064 s = get_section (buf, SECTION_WRITE, NULL);
4065 switch_to_section (s);
4066 assemble_align (POINTER_SIZE);
4067 fputs ("\t.dword\t", asm_out_file);
4068 output_addr_const (asm_out_file, symbol);
4069 fputc ('\n', asm_out_file);
4070 }
4071 }
4072
4073 const char*
aarch64_output_casesi(rtx * operands)4074 aarch64_output_casesi (rtx *operands)
4075 {
4076 char buf[100];
4077 char label[100];
4078 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
4079 int index;
4080 static const char *const patterns[4][2] =
4081 {
4082 {
4083 "ldrb\t%w3, [%0,%w1,uxtw]",
4084 "add\t%3, %4, %w3, sxtb #2"
4085 },
4086 {
4087 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4088 "add\t%3, %4, %w3, sxth #2"
4089 },
4090 {
4091 "ldr\t%w3, [%0,%w1,uxtw #2]",
4092 "add\t%3, %4, %w3, sxtw #2"
4093 },
4094 /* We assume that DImode is only generated when not optimizing and
4095 that we don't really need 64-bit address offsets. That would
4096 imply an object file with 8GB of code in a single function! */
4097 {
4098 "ldr\t%w3, [%0,%w1,uxtw #2]",
4099 "add\t%3, %4, %w3, sxtw #2"
4100 }
4101 };
4102
4103 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4104
4105 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4106
4107 gcc_assert (index >= 0 && index <= 3);
4108
4109 /* Need to implement table size reduction, by chaning the code below. */
4110 output_asm_insn (patterns[index][0], operands);
4111 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4112 snprintf (buf, sizeof (buf),
4113 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4114 output_asm_insn (buf, operands);
4115 output_asm_insn (patterns[index][1], operands);
4116 output_asm_insn ("br\t%3", operands);
4117 assemble_label (asm_out_file, label);
4118 return "";
4119 }
4120
4121
4122 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4123 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4124 operator. */
4125
4126 int
aarch64_uxt_size(int shift,HOST_WIDE_INT mask)4127 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4128 {
4129 if (shift >= 0 && shift <= 3)
4130 {
4131 int size;
4132 for (size = 8; size <= 32; size *= 2)
4133 {
4134 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4135 if (mask == bits << shift)
4136 return size;
4137 }
4138 }
4139 return 0;
4140 }
4141
4142 static bool
aarch64_use_blocks_for_constant_p(enum machine_mode mode ATTRIBUTE_UNUSED,const_rtx x ATTRIBUTE_UNUSED)4143 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4144 const_rtx x ATTRIBUTE_UNUSED)
4145 {
4146 /* We can't use blocks for constants when we're using a per-function
4147 constant pool. */
4148 return false;
4149 }
4150
4151 static section *
aarch64_select_rtx_section(enum machine_mode mode ATTRIBUTE_UNUSED,rtx x ATTRIBUTE_UNUSED,unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)4152 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4153 rtx x ATTRIBUTE_UNUSED,
4154 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4155 {
4156 /* Force all constant pool entries into the current function section. */
4157 return function_section (current_function_decl);
4158 }
4159
4160
4161 /* Costs. */
4162
4163 /* Helper function for rtx cost calculation. Strip a shift expression
4164 from X. Returns the inner operand if successful, or the original
4165 expression on failure. */
4166 static rtx
aarch64_strip_shift(rtx x)4167 aarch64_strip_shift (rtx x)
4168 {
4169 rtx op = x;
4170
4171 if ((GET_CODE (op) == ASHIFT
4172 || GET_CODE (op) == ASHIFTRT
4173 || GET_CODE (op) == LSHIFTRT)
4174 && CONST_INT_P (XEXP (op, 1)))
4175 return XEXP (op, 0);
4176
4177 if (GET_CODE (op) == MULT
4178 && CONST_INT_P (XEXP (op, 1))
4179 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4180 return XEXP (op, 0);
4181
4182 return x;
4183 }
4184
4185 /* Helper function for rtx cost calculation. Strip a shift or extend
4186 expression from X. Returns the inner operand if successful, or the
4187 original expression on failure. We deal with a number of possible
4188 canonicalization variations here. */
4189 static rtx
aarch64_strip_shift_or_extend(rtx x)4190 aarch64_strip_shift_or_extend (rtx x)
4191 {
4192 rtx op = x;
4193
4194 /* Zero and sign extraction of a widened value. */
4195 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4196 && XEXP (op, 2) == const0_rtx
4197 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4198 XEXP (op, 1)))
4199 return XEXP (XEXP (op, 0), 0);
4200
4201 /* It can also be represented (for zero-extend) as an AND with an
4202 immediate. */
4203 if (GET_CODE (op) == AND
4204 && GET_CODE (XEXP (op, 0)) == MULT
4205 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4206 && CONST_INT_P (XEXP (op, 1))
4207 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4208 INTVAL (XEXP (op, 1))) != 0)
4209 return XEXP (XEXP (op, 0), 0);
4210
4211 /* Now handle extended register, as this may also have an optional
4212 left shift by 1..4. */
4213 if (GET_CODE (op) == ASHIFT
4214 && CONST_INT_P (XEXP (op, 1))
4215 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4216 op = XEXP (op, 0);
4217
4218 if (GET_CODE (op) == ZERO_EXTEND
4219 || GET_CODE (op) == SIGN_EXTEND)
4220 op = XEXP (op, 0);
4221
4222 if (op != x)
4223 return op;
4224
4225 return aarch64_strip_shift (x);
4226 }
4227
4228 /* Calculate the cost of calculating X, storing it in *COST. Result
4229 is true if the total cost of the operation has now been calculated. */
4230 static bool
aarch64_rtx_costs(rtx x,int code,int outer ATTRIBUTE_UNUSED,int param ATTRIBUTE_UNUSED,int * cost,bool speed)4231 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4232 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4233 {
4234 rtx op0, op1;
4235 const struct cpu_rtx_cost_table *extra_cost
4236 = aarch64_tune_params->insn_extra_cost;
4237
4238 switch (code)
4239 {
4240 case SET:
4241 op0 = SET_DEST (x);
4242 op1 = SET_SRC (x);
4243
4244 switch (GET_CODE (op0))
4245 {
4246 case MEM:
4247 if (speed)
4248 *cost += extra_cost->memory_store;
4249
4250 if (op1 != const0_rtx)
4251 *cost += rtx_cost (op1, SET, 1, speed);
4252 return true;
4253
4254 case SUBREG:
4255 if (! REG_P (SUBREG_REG (op0)))
4256 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4257 /* Fall through. */
4258 case REG:
4259 /* Cost is just the cost of the RHS of the set. */
4260 *cost += rtx_cost (op1, SET, 1, true);
4261 return true;
4262
4263 case ZERO_EXTRACT: /* Bit-field insertion. */
4264 case SIGN_EXTRACT:
4265 /* Strip any redundant widening of the RHS to meet the width of
4266 the target. */
4267 if (GET_CODE (op1) == SUBREG)
4268 op1 = SUBREG_REG (op1);
4269 if ((GET_CODE (op1) == ZERO_EXTEND
4270 || GET_CODE (op1) == SIGN_EXTEND)
4271 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4272 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4273 >= INTVAL (XEXP (op0, 1))))
4274 op1 = XEXP (op1, 0);
4275 *cost += rtx_cost (op1, SET, 1, speed);
4276 return true;
4277
4278 default:
4279 break;
4280 }
4281 return false;
4282
4283 case MEM:
4284 if (speed)
4285 *cost += extra_cost->memory_load;
4286
4287 return true;
4288
4289 case NEG:
4290 op0 = CONST0_RTX (GET_MODE (x));
4291 op1 = XEXP (x, 0);
4292 goto cost_minus;
4293
4294 case COMPARE:
4295 op0 = XEXP (x, 0);
4296 op1 = XEXP (x, 1);
4297
4298 if (op1 == const0_rtx
4299 && GET_CODE (op0) == AND)
4300 {
4301 x = op0;
4302 goto cost_logic;
4303 }
4304
4305 /* Comparisons can work if the order is swapped.
4306 Canonicalization puts the more complex operation first, but
4307 we want it in op1. */
4308 if (! (REG_P (op0)
4309 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4310 {
4311 op0 = XEXP (x, 1);
4312 op1 = XEXP (x, 0);
4313 }
4314 goto cost_minus;
4315
4316 case MINUS:
4317 op0 = XEXP (x, 0);
4318 op1 = XEXP (x, 1);
4319
4320 cost_minus:
4321 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4322 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4323 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4324 {
4325 if (op0 != const0_rtx)
4326 *cost += rtx_cost (op0, MINUS, 0, speed);
4327
4328 if (CONST_INT_P (op1))
4329 {
4330 if (!aarch64_uimm12_shift (INTVAL (op1)))
4331 *cost += rtx_cost (op1, MINUS, 1, speed);
4332 }
4333 else
4334 {
4335 op1 = aarch64_strip_shift_or_extend (op1);
4336 *cost += rtx_cost (op1, MINUS, 1, speed);
4337 }
4338 return true;
4339 }
4340
4341 return false;
4342
4343 case PLUS:
4344 op0 = XEXP (x, 0);
4345 op1 = XEXP (x, 1);
4346
4347 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4348 {
4349 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4350 {
4351 *cost += rtx_cost (op0, PLUS, 0, speed);
4352 }
4353 else
4354 {
4355 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4356
4357 if (new_op0 == op0
4358 && GET_CODE (op0) == MULT)
4359 {
4360 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4361 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4362 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4363 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4364 {
4365 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4366 speed)
4367 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4368 speed)
4369 + rtx_cost (op1, PLUS, 1, speed));
4370 if (speed)
4371 *cost += extra_cost->int_multiply_extend_add;
4372 return true;
4373 }
4374 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4375 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4376 + rtx_cost (op1, PLUS, 1, speed));
4377
4378 if (speed)
4379 *cost += extra_cost->int_multiply_add;
4380 }
4381
4382 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4383 + rtx_cost (op1, PLUS, 1, speed));
4384 }
4385 return true;
4386 }
4387
4388 return false;
4389
4390 case IOR:
4391 case XOR:
4392 case AND:
4393 cost_logic:
4394 op0 = XEXP (x, 0);
4395 op1 = XEXP (x, 1);
4396
4397 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4398 {
4399 if (CONST_INT_P (op1)
4400 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4401 {
4402 *cost += rtx_cost (op0, AND, 0, speed);
4403 }
4404 else
4405 {
4406 if (GET_CODE (op0) == NOT)
4407 op0 = XEXP (op0, 0);
4408 op0 = aarch64_strip_shift (op0);
4409 *cost += (rtx_cost (op0, AND, 0, speed)
4410 + rtx_cost (op1, AND, 1, speed));
4411 }
4412 return true;
4413 }
4414 return false;
4415
4416 case ZERO_EXTEND:
4417 if ((GET_MODE (x) == DImode
4418 && GET_MODE (XEXP (x, 0)) == SImode)
4419 || GET_CODE (XEXP (x, 0)) == MEM)
4420 {
4421 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4422 return true;
4423 }
4424 return false;
4425
4426 case SIGN_EXTEND:
4427 if (GET_CODE (XEXP (x, 0)) == MEM)
4428 {
4429 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4430 return true;
4431 }
4432 return false;
4433
4434 case ROTATE:
4435 if (!CONST_INT_P (XEXP (x, 1)))
4436 *cost += COSTS_N_INSNS (2);
4437 /* Fall through. */
4438 case ROTATERT:
4439 case LSHIFTRT:
4440 case ASHIFT:
4441 case ASHIFTRT:
4442
4443 /* Shifting by a register often takes an extra cycle. */
4444 if (speed && !CONST_INT_P (XEXP (x, 1)))
4445 *cost += extra_cost->register_shift;
4446
4447 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4448 return true;
4449
4450 case HIGH:
4451 if (!CONSTANT_P (XEXP (x, 0)))
4452 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4453 return true;
4454
4455 case LO_SUM:
4456 if (!CONSTANT_P (XEXP (x, 1)))
4457 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4458 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4459 return true;
4460
4461 case ZERO_EXTRACT:
4462 case SIGN_EXTRACT:
4463 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4464 return true;
4465
4466 case MULT:
4467 op0 = XEXP (x, 0);
4468 op1 = XEXP (x, 1);
4469
4470 *cost = COSTS_N_INSNS (1);
4471 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4472 {
4473 if (CONST_INT_P (op1)
4474 && exact_log2 (INTVAL (op1)) > 0)
4475 {
4476 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4477 return true;
4478 }
4479
4480 if ((GET_CODE (op0) == ZERO_EXTEND
4481 && GET_CODE (op1) == ZERO_EXTEND)
4482 || (GET_CODE (op0) == SIGN_EXTEND
4483 && GET_CODE (op1) == SIGN_EXTEND))
4484 {
4485 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4486 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4487 if (speed)
4488 *cost += extra_cost->int_multiply_extend;
4489 return true;
4490 }
4491
4492 if (speed)
4493 *cost += extra_cost->int_multiply;
4494 }
4495 else if (speed)
4496 {
4497 if (GET_MODE (x) == DFmode)
4498 *cost += extra_cost->double_multiply;
4499 else if (GET_MODE (x) == SFmode)
4500 *cost += extra_cost->float_multiply;
4501 }
4502
4503 return false; /* All arguments need to be in registers. */
4504
4505 case MOD:
4506 case UMOD:
4507 *cost = COSTS_N_INSNS (2);
4508 if (speed)
4509 {
4510 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4511 *cost += (extra_cost->int_multiply_add
4512 + extra_cost->int_divide);
4513 else if (GET_MODE (x) == DFmode)
4514 *cost += (extra_cost->double_multiply
4515 + extra_cost->double_divide);
4516 else if (GET_MODE (x) == SFmode)
4517 *cost += (extra_cost->float_multiply
4518 + extra_cost->float_divide);
4519 }
4520 return false; /* All arguments need to be in registers. */
4521
4522 case DIV:
4523 case UDIV:
4524 *cost = COSTS_N_INSNS (1);
4525 if (speed)
4526 {
4527 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4528 *cost += extra_cost->int_divide;
4529 else if (GET_MODE (x) == DFmode)
4530 *cost += extra_cost->double_divide;
4531 else if (GET_MODE (x) == SFmode)
4532 *cost += extra_cost->float_divide;
4533 }
4534 return false; /* All arguments need to be in registers. */
4535
4536 default:
4537 break;
4538 }
4539 return false;
4540 }
4541
4542 static int
aarch64_address_cost(rtx x ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)4543 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4544 enum machine_mode mode ATTRIBUTE_UNUSED,
4545 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4546 {
4547 enum rtx_code c = GET_CODE (x);
4548 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4549
4550 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4551 return addr_cost->pre_modify;
4552
4553 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4554 return addr_cost->post_modify;
4555
4556 if (c == PLUS)
4557 {
4558 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4559 return addr_cost->imm_offset;
4560 else if (GET_CODE (XEXP (x, 0)) == MULT
4561 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4562 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4563 return addr_cost->register_extend;
4564
4565 return addr_cost->register_offset;
4566 }
4567 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4568 return addr_cost->imm_offset;
4569
4570 return 0;
4571 }
4572
4573 static int
aarch64_register_move_cost(enum machine_mode mode,reg_class_t from_i,reg_class_t to_i)4574 aarch64_register_move_cost (enum machine_mode mode,
4575 reg_class_t from_i, reg_class_t to_i)
4576 {
4577 enum reg_class from = (enum reg_class) from_i;
4578 enum reg_class to = (enum reg_class) to_i;
4579 const struct cpu_regmove_cost *regmove_cost
4580 = aarch64_tune_params->regmove_cost;
4581
4582 if (from == GENERAL_REGS && to == GENERAL_REGS)
4583 return regmove_cost->GP2GP;
4584 else if (from == GENERAL_REGS)
4585 return regmove_cost->GP2FP;
4586 else if (to == GENERAL_REGS)
4587 return regmove_cost->FP2GP;
4588
4589 /* When AdvSIMD instructions are disabled it is not possible to move
4590 a 128-bit value directly between Q registers. This is handled in
4591 secondary reload. A general register is used as a scratch to move
4592 the upper DI value and the lower DI value is moved directly,
4593 hence the cost is the sum of three moves. */
4594 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
4595 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4596
4597 return regmove_cost->FP2FP;
4598 }
4599
4600 static int
aarch64_memory_move_cost(enum machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass ATTRIBUTE_UNUSED,bool in ATTRIBUTE_UNUSED)4601 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4602 reg_class_t rclass ATTRIBUTE_UNUSED,
4603 bool in ATTRIBUTE_UNUSED)
4604 {
4605 return aarch64_tune_params->memmov_cost;
4606 }
4607
4608 static void initialize_aarch64_code_model (void);
4609
4610 /* Parse the architecture extension string. */
4611
4612 static void
aarch64_parse_extension(char * str)4613 aarch64_parse_extension (char *str)
4614 {
4615 /* The extension string is parsed left to right. */
4616 const struct aarch64_option_extension *opt = NULL;
4617
4618 /* Flag to say whether we are adding or removing an extension. */
4619 int adding_ext = -1;
4620
4621 while (str != NULL && *str != 0)
4622 {
4623 char *ext;
4624 size_t len;
4625
4626 str++;
4627 ext = strchr (str, '+');
4628
4629 if (ext != NULL)
4630 len = ext - str;
4631 else
4632 len = strlen (str);
4633
4634 if (len >= 2 && strncmp (str, "no", 2) == 0)
4635 {
4636 adding_ext = 0;
4637 len -= 2;
4638 str += 2;
4639 }
4640 else if (len > 0)
4641 adding_ext = 1;
4642
4643 if (len == 0)
4644 {
4645 error ("missing feature modifier after %qs", "+no");
4646 return;
4647 }
4648
4649 /* Scan over the extensions table trying to find an exact match. */
4650 for (opt = all_extensions; opt->name != NULL; opt++)
4651 {
4652 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4653 {
4654 /* Add or remove the extension. */
4655 if (adding_ext)
4656 aarch64_isa_flags |= opt->flags_on;
4657 else
4658 aarch64_isa_flags &= ~(opt->flags_off);
4659 break;
4660 }
4661 }
4662
4663 if (opt->name == NULL)
4664 {
4665 /* Extension not found in list. */
4666 error ("unknown feature modifier %qs", str);
4667 return;
4668 }
4669
4670 str = ext;
4671 };
4672
4673 return;
4674 }
4675
4676 /* Parse the ARCH string. */
4677
4678 static void
aarch64_parse_arch(void)4679 aarch64_parse_arch (void)
4680 {
4681 char *ext;
4682 const struct processor *arch;
4683 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4684 size_t len;
4685
4686 strcpy (str, aarch64_arch_string);
4687
4688 ext = strchr (str, '+');
4689
4690 if (ext != NULL)
4691 len = ext - str;
4692 else
4693 len = strlen (str);
4694
4695 if (len == 0)
4696 {
4697 error ("missing arch name in -march=%qs", str);
4698 return;
4699 }
4700
4701 /* Loop through the list of supported ARCHs to find a match. */
4702 for (arch = all_architectures; arch->name != NULL; arch++)
4703 {
4704 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4705 {
4706 selected_arch = arch;
4707 aarch64_isa_flags = selected_arch->flags;
4708 selected_cpu = &all_cores[selected_arch->core];
4709
4710 if (ext != NULL)
4711 {
4712 /* ARCH string contains at least one extension. */
4713 aarch64_parse_extension (ext);
4714 }
4715
4716 return;
4717 }
4718 }
4719
4720 /* ARCH name not found in list. */
4721 error ("unknown value %qs for -march", str);
4722 return;
4723 }
4724
4725 /* Parse the CPU string. */
4726
4727 static void
aarch64_parse_cpu(void)4728 aarch64_parse_cpu (void)
4729 {
4730 char *ext;
4731 const struct processor *cpu;
4732 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4733 size_t len;
4734
4735 strcpy (str, aarch64_cpu_string);
4736
4737 ext = strchr (str, '+');
4738
4739 if (ext != NULL)
4740 len = ext - str;
4741 else
4742 len = strlen (str);
4743
4744 if (len == 0)
4745 {
4746 error ("missing cpu name in -mcpu=%qs", str);
4747 return;
4748 }
4749
4750 /* Loop through the list of supported CPUs to find a match. */
4751 for (cpu = all_cores; cpu->name != NULL; cpu++)
4752 {
4753 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4754 {
4755 selected_cpu = cpu;
4756 aarch64_isa_flags = selected_cpu->flags;
4757
4758 if (ext != NULL)
4759 {
4760 /* CPU string contains at least one extension. */
4761 aarch64_parse_extension (ext);
4762 }
4763
4764 return;
4765 }
4766 }
4767
4768 /* CPU name not found in list. */
4769 error ("unknown value %qs for -mcpu", str);
4770 return;
4771 }
4772
4773 /* Parse the TUNE string. */
4774
4775 static void
aarch64_parse_tune(void)4776 aarch64_parse_tune (void)
4777 {
4778 const struct processor *cpu;
4779 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4780 strcpy (str, aarch64_tune_string);
4781
4782 /* Loop through the list of supported CPUs to find a match. */
4783 for (cpu = all_cores; cpu->name != NULL; cpu++)
4784 {
4785 if (strcmp (cpu->name, str) == 0)
4786 {
4787 selected_tune = cpu;
4788 return;
4789 }
4790 }
4791
4792 /* CPU name not found in list. */
4793 error ("unknown value %qs for -mtune", str);
4794 return;
4795 }
4796
4797
4798 /* Implement TARGET_OPTION_OVERRIDE. */
4799
4800 static void
aarch64_override_options(void)4801 aarch64_override_options (void)
4802 {
4803 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4804 otherwise march remains undefined. mtune can be used with either march or
4805 mcpu. */
4806
4807 if (aarch64_arch_string)
4808 {
4809 aarch64_parse_arch ();
4810 aarch64_cpu_string = NULL;
4811 }
4812
4813 if (aarch64_cpu_string)
4814 {
4815 aarch64_parse_cpu ();
4816 selected_arch = NULL;
4817 }
4818
4819 if (aarch64_tune_string)
4820 {
4821 aarch64_parse_tune ();
4822 }
4823
4824 initialize_aarch64_code_model ();
4825
4826 aarch64_build_bitmask_table ();
4827
4828 /* This target defaults to strict volatile bitfields. */
4829 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4830 flag_strict_volatile_bitfields = 1;
4831
4832 /* If the user did not specify a processor, choose the default
4833 one for them. This will be the CPU set during configuration using
4834 --with-cpu, otherwise it is "generic". */
4835 if (!selected_cpu)
4836 {
4837 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4838 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4839 }
4840
4841 gcc_assert (selected_cpu);
4842
4843 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
4844 if (!selected_tune)
4845 selected_tune = &all_cores[selected_cpu->core];
4846
4847 aarch64_tune_flags = selected_tune->flags;
4848 aarch64_tune = selected_tune->core;
4849 aarch64_tune_params = selected_tune->tune;
4850
4851 if (aarch64_fix_a53_err835769 == 2)
4852 {
4853 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
4854 aarch64_fix_a53_err835769 = 1;
4855 #else
4856 aarch64_fix_a53_err835769 = 0;
4857 #endif
4858 }
4859
4860 aarch64_override_options_after_change ();
4861 }
4862
4863 /* Implement targetm.override_options_after_change. */
4864
4865 static void
aarch64_override_options_after_change(void)4866 aarch64_override_options_after_change (void)
4867 {
4868 faked_omit_frame_pointer = false;
4869
4870 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4871 that aarch64_frame_pointer_required will be called. We need to remember
4872 whether flag_omit_frame_pointer was turned on normally or just faked. */
4873
4874 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4875 {
4876 flag_omit_frame_pointer = true;
4877 faked_omit_frame_pointer = true;
4878 }
4879 }
4880
4881 static struct machine_function *
aarch64_init_machine_status(void)4882 aarch64_init_machine_status (void)
4883 {
4884 struct machine_function *machine;
4885 machine = ggc_alloc_cleared_machine_function ();
4886 return machine;
4887 }
4888
4889 void
aarch64_init_expanders(void)4890 aarch64_init_expanders (void)
4891 {
4892 init_machine_status = aarch64_init_machine_status;
4893 }
4894
4895 /* A checking mechanism for the implementation of the various code models. */
4896 static void
initialize_aarch64_code_model(void)4897 initialize_aarch64_code_model (void)
4898 {
4899 if (flag_pic)
4900 {
4901 switch (aarch64_cmodel_var)
4902 {
4903 case AARCH64_CMODEL_TINY:
4904 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
4905 break;
4906 case AARCH64_CMODEL_SMALL:
4907 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
4908 break;
4909 case AARCH64_CMODEL_LARGE:
4910 sorry ("code model %qs with -f%s", "large",
4911 flag_pic > 1 ? "PIC" : "pic");
4912 default:
4913 gcc_unreachable ();
4914 }
4915 }
4916 else
4917 aarch64_cmodel = aarch64_cmodel_var;
4918 }
4919
4920 /* Return true if SYMBOL_REF X binds locally. */
4921
4922 static bool
aarch64_symbol_binds_local_p(const_rtx x)4923 aarch64_symbol_binds_local_p (const_rtx x)
4924 {
4925 return (SYMBOL_REF_DECL (x)
4926 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
4927 : SYMBOL_REF_LOCAL_P (x));
4928 }
4929
4930 /* Return true if SYMBOL_REF X is thread local */
4931 static bool
aarch64_tls_symbol_p(rtx x)4932 aarch64_tls_symbol_p (rtx x)
4933 {
4934 if (! TARGET_HAVE_TLS)
4935 return false;
4936
4937 if (GET_CODE (x) != SYMBOL_REF)
4938 return false;
4939
4940 return SYMBOL_REF_TLS_MODEL (x) != 0;
4941 }
4942
4943 /* Classify a TLS symbol into one of the TLS kinds. */
4944 enum aarch64_symbol_type
aarch64_classify_tls_symbol(rtx x)4945 aarch64_classify_tls_symbol (rtx x)
4946 {
4947 enum tls_model tls_kind = tls_symbolic_operand_type (x);
4948
4949 switch (tls_kind)
4950 {
4951 case TLS_MODEL_GLOBAL_DYNAMIC:
4952 case TLS_MODEL_LOCAL_DYNAMIC:
4953 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
4954
4955 case TLS_MODEL_INITIAL_EXEC:
4956 return SYMBOL_SMALL_GOTTPREL;
4957
4958 case TLS_MODEL_LOCAL_EXEC:
4959 return SYMBOL_SMALL_TPREL;
4960
4961 case TLS_MODEL_EMULATED:
4962 case TLS_MODEL_NONE:
4963 return SYMBOL_FORCE_TO_MEM;
4964
4965 default:
4966 gcc_unreachable ();
4967 }
4968 }
4969
4970 /* Return the method that should be used to access SYMBOL_REF or
4971 LABEL_REF X in context CONTEXT. */
4972 enum aarch64_symbol_type
aarch64_classify_symbol(rtx x,enum aarch64_symbol_context context ATTRIBUTE_UNUSED)4973 aarch64_classify_symbol (rtx x,
4974 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
4975 {
4976 if (GET_CODE (x) == LABEL_REF)
4977 {
4978 switch (aarch64_cmodel)
4979 {
4980 case AARCH64_CMODEL_LARGE:
4981 return SYMBOL_FORCE_TO_MEM;
4982
4983 case AARCH64_CMODEL_TINY_PIC:
4984 case AARCH64_CMODEL_TINY:
4985 case AARCH64_CMODEL_SMALL_PIC:
4986 case AARCH64_CMODEL_SMALL:
4987 return SYMBOL_SMALL_ABSOLUTE;
4988
4989 default:
4990 gcc_unreachable ();
4991 }
4992 }
4993
4994 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4995
4996 switch (aarch64_cmodel)
4997 {
4998 case AARCH64_CMODEL_LARGE:
4999 return SYMBOL_FORCE_TO_MEM;
5000
5001 case AARCH64_CMODEL_TINY:
5002 case AARCH64_CMODEL_SMALL:
5003
5004 /* This is needed to get DFmode, TImode constants to be loaded off
5005 the constant pool. Is it necessary to dump TImode values into
5006 the constant pool. We don't handle TImode constant loads properly
5007 yet and hence need to use the constant pool. */
5008 if (CONSTANT_POOL_ADDRESS_P (x))
5009 return SYMBOL_FORCE_TO_MEM;
5010
5011 if (aarch64_tls_symbol_p (x))
5012 return aarch64_classify_tls_symbol (x);
5013
5014 if (SYMBOL_REF_WEAK (x))
5015 return SYMBOL_FORCE_TO_MEM;
5016
5017 return SYMBOL_SMALL_ABSOLUTE;
5018
5019 case AARCH64_CMODEL_TINY_PIC:
5020 case AARCH64_CMODEL_SMALL_PIC:
5021
5022 if (CONSTANT_POOL_ADDRESS_P (x))
5023 return SYMBOL_FORCE_TO_MEM;
5024
5025 if (aarch64_tls_symbol_p (x))
5026 return aarch64_classify_tls_symbol (x);
5027
5028 if (!aarch64_symbol_binds_local_p (x))
5029 return SYMBOL_SMALL_GOT;
5030
5031 return SYMBOL_SMALL_ABSOLUTE;
5032
5033 default:
5034 gcc_unreachable ();
5035 }
5036 /* By default push everything into the constant pool. */
5037 return SYMBOL_FORCE_TO_MEM;
5038 }
5039
5040 /* Return true if X is a symbolic constant that can be used in context
5041 CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */
5042
5043 bool
aarch64_symbolic_constant_p(rtx x,enum aarch64_symbol_context context,enum aarch64_symbol_type * symbol_type)5044 aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
5045 enum aarch64_symbol_type *symbol_type)
5046 {
5047 rtx offset;
5048 split_const (x, &x, &offset);
5049 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5050 *symbol_type = aarch64_classify_symbol (x, context);
5051 else
5052 return false;
5053
5054 /* No checking of offset at this point. */
5055 return true;
5056 }
5057
5058 bool
aarch64_constant_address_p(rtx x)5059 aarch64_constant_address_p (rtx x)
5060 {
5061 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5062 }
5063
5064 bool
aarch64_legitimate_pic_operand_p(rtx x)5065 aarch64_legitimate_pic_operand_p (rtx x)
5066 {
5067 if (GET_CODE (x) == SYMBOL_REF
5068 || (GET_CODE (x) == CONST
5069 && GET_CODE (XEXP (x, 0)) == PLUS
5070 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5071 return false;
5072
5073 return true;
5074 }
5075
5076 /* Return true if X holds either a quarter-precision or
5077 floating-point +0.0 constant. */
5078 static bool
aarch64_valid_floating_const(enum machine_mode mode,rtx x)5079 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5080 {
5081 if (!CONST_DOUBLE_P (x))
5082 return false;
5083
5084 /* TODO: We could handle moving 0.0 to a TFmode register,
5085 but first we would like to refactor the movtf_aarch64
5086 to be more amicable to split moves properly and
5087 correctly gate on TARGET_SIMD. For now - reject all
5088 constants which are not to SFmode or DFmode registers. */
5089 if (!(mode == SFmode || mode == DFmode))
5090 return false;
5091
5092 if (aarch64_float_const_zero_rtx_p (x))
5093 return true;
5094 return aarch64_float_const_representable_p (x);
5095 }
5096
5097 static bool
aarch64_legitimate_constant_p(enum machine_mode mode,rtx x)5098 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5099 {
5100 /* Do not allow vector struct mode constants. We could support
5101 0 and -1 easily, but they need support in aarch64-simd.md. */
5102 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5103 return false;
5104
5105 /* This could probably go away because
5106 we now decompose CONST_INTs according to expand_mov_immediate. */
5107 if ((GET_CODE (x) == CONST_VECTOR
5108 && aarch64_simd_valid_immediate (x, mode, false,
5109 NULL, NULL, NULL, NULL, NULL) != -1)
5110 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5111 return !targetm.cannot_force_const_mem (mode, x);
5112
5113 if (GET_CODE (x) == HIGH
5114 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5115 return true;
5116
5117 return aarch64_constant_address_p (x);
5118 }
5119
5120 rtx
aarch64_load_tp(rtx target)5121 aarch64_load_tp (rtx target)
5122 {
5123 if (!target
5124 || GET_MODE (target) != Pmode
5125 || !register_operand (target, Pmode))
5126 target = gen_reg_rtx (Pmode);
5127
5128 /* Can return in any reg. */
5129 emit_insn (gen_aarch64_load_tp_hard (target));
5130 return target;
5131 }
5132
5133 /* On AAPCS systems, this is the "struct __va_list". */
5134 static GTY(()) tree va_list_type;
5135
5136 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5137 Return the type to use as __builtin_va_list.
5138
5139 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5140
5141 struct __va_list
5142 {
5143 void *__stack;
5144 void *__gr_top;
5145 void *__vr_top;
5146 int __gr_offs;
5147 int __vr_offs;
5148 }; */
5149
5150 static tree
aarch64_build_builtin_va_list(void)5151 aarch64_build_builtin_va_list (void)
5152 {
5153 tree va_list_name;
5154 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5155
5156 /* Create the type. */
5157 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5158 /* Give it the required name. */
5159 va_list_name = build_decl (BUILTINS_LOCATION,
5160 TYPE_DECL,
5161 get_identifier ("__va_list"),
5162 va_list_type);
5163 DECL_ARTIFICIAL (va_list_name) = 1;
5164 TYPE_NAME (va_list_type) = va_list_name;
5165 TYPE_STUB_DECL (va_list_type) = va_list_name;
5166
5167 /* Create the fields. */
5168 f_stack = build_decl (BUILTINS_LOCATION,
5169 FIELD_DECL, get_identifier ("__stack"),
5170 ptr_type_node);
5171 f_grtop = build_decl (BUILTINS_LOCATION,
5172 FIELD_DECL, get_identifier ("__gr_top"),
5173 ptr_type_node);
5174 f_vrtop = build_decl (BUILTINS_LOCATION,
5175 FIELD_DECL, get_identifier ("__vr_top"),
5176 ptr_type_node);
5177 f_groff = build_decl (BUILTINS_LOCATION,
5178 FIELD_DECL, get_identifier ("__gr_offs"),
5179 integer_type_node);
5180 f_vroff = build_decl (BUILTINS_LOCATION,
5181 FIELD_DECL, get_identifier ("__vr_offs"),
5182 integer_type_node);
5183
5184 DECL_ARTIFICIAL (f_stack) = 1;
5185 DECL_ARTIFICIAL (f_grtop) = 1;
5186 DECL_ARTIFICIAL (f_vrtop) = 1;
5187 DECL_ARTIFICIAL (f_groff) = 1;
5188 DECL_ARTIFICIAL (f_vroff) = 1;
5189
5190 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5191 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5192 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5193 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5194 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5195
5196 TYPE_FIELDS (va_list_type) = f_stack;
5197 DECL_CHAIN (f_stack) = f_grtop;
5198 DECL_CHAIN (f_grtop) = f_vrtop;
5199 DECL_CHAIN (f_vrtop) = f_groff;
5200 DECL_CHAIN (f_groff) = f_vroff;
5201
5202 /* Compute its layout. */
5203 layout_type (va_list_type);
5204
5205 return va_list_type;
5206 }
5207
5208 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5209 static void
aarch64_expand_builtin_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)5210 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5211 {
5212 const CUMULATIVE_ARGS *cum;
5213 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5214 tree stack, grtop, vrtop, groff, vroff;
5215 tree t;
5216 int gr_save_area_size;
5217 int vr_save_area_size;
5218 int vr_offset;
5219
5220 cum = &crtl->args.info;
5221 gr_save_area_size
5222 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5223 vr_save_area_size
5224 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5225
5226 if (TARGET_GENERAL_REGS_ONLY)
5227 {
5228 if (cum->aapcs_nvrn > 0)
5229 sorry ("%qs and floating point or vector arguments",
5230 "-mgeneral-regs-only");
5231 vr_save_area_size = 0;
5232 }
5233
5234 f_stack = TYPE_FIELDS (va_list_type_node);
5235 f_grtop = DECL_CHAIN (f_stack);
5236 f_vrtop = DECL_CHAIN (f_grtop);
5237 f_groff = DECL_CHAIN (f_vrtop);
5238 f_vroff = DECL_CHAIN (f_groff);
5239
5240 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5241 NULL_TREE);
5242 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5243 NULL_TREE);
5244 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5245 NULL_TREE);
5246 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5247 NULL_TREE);
5248 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5249 NULL_TREE);
5250
5251 /* Emit code to initialize STACK, which points to the next varargs stack
5252 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5253 by named arguments. STACK is 8-byte aligned. */
5254 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5255 if (cum->aapcs_stack_size > 0)
5256 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5257 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5258 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5259
5260 /* Emit code to initialize GRTOP, the top of the GR save area.
5261 virtual_incoming_args_rtx should have been 16 byte aligned. */
5262 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5263 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5264 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5265
5266 /* Emit code to initialize VRTOP, the top of the VR save area.
5267 This address is gr_save_area_bytes below GRTOP, rounded
5268 down to the next 16-byte boundary. */
5269 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5270 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5271 STACK_BOUNDARY / BITS_PER_UNIT);
5272
5273 if (vr_offset)
5274 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5275 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5276 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5277
5278 /* Emit code to initialize GROFF, the offset from GRTOP of the
5279 next GPR argument. */
5280 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5281 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5282 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5283
5284 /* Likewise emit code to initialize VROFF, the offset from FTOP
5285 of the next VR argument. */
5286 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5287 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5288 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5289 }
5290
5291 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5292
5293 static tree
aarch64_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)5294 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5295 gimple_seq *post_p ATTRIBUTE_UNUSED)
5296 {
5297 tree addr;
5298 bool indirect_p;
5299 bool is_ha; /* is HFA or HVA. */
5300 bool dw_align; /* double-word align. */
5301 enum machine_mode ag_mode = VOIDmode;
5302 int nregs;
5303 enum machine_mode mode;
5304
5305 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5306 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5307 HOST_WIDE_INT size, rsize, adjust, align;
5308 tree t, u, cond1, cond2;
5309
5310 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5311 if (indirect_p)
5312 type = build_pointer_type (type);
5313
5314 mode = TYPE_MODE (type);
5315
5316 f_stack = TYPE_FIELDS (va_list_type_node);
5317 f_grtop = DECL_CHAIN (f_stack);
5318 f_vrtop = DECL_CHAIN (f_grtop);
5319 f_groff = DECL_CHAIN (f_vrtop);
5320 f_vroff = DECL_CHAIN (f_groff);
5321
5322 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5323 f_stack, NULL_TREE);
5324 size = int_size_in_bytes (type);
5325 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5326
5327 dw_align = false;
5328 adjust = 0;
5329 if (aarch64_vfp_is_call_or_return_candidate (mode,
5330 type,
5331 &ag_mode,
5332 &nregs,
5333 &is_ha))
5334 {
5335 /* TYPE passed in fp/simd registers. */
5336 if (TARGET_GENERAL_REGS_ONLY)
5337 sorry ("%qs and floating point or vector arguments",
5338 "-mgeneral-regs-only");
5339
5340 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5341 unshare_expr (valist), f_vrtop, NULL_TREE);
5342 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5343 unshare_expr (valist), f_vroff, NULL_TREE);
5344
5345 rsize = nregs * UNITS_PER_VREG;
5346
5347 if (is_ha)
5348 {
5349 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5350 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5351 }
5352 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5353 && size < UNITS_PER_VREG)
5354 {
5355 adjust = UNITS_PER_VREG - size;
5356 }
5357 }
5358 else
5359 {
5360 /* TYPE passed in general registers. */
5361 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5362 unshare_expr (valist), f_grtop, NULL_TREE);
5363 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5364 unshare_expr (valist), f_groff, NULL_TREE);
5365 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5366 nregs = rsize / UNITS_PER_WORD;
5367
5368 if (align > 8)
5369 dw_align = true;
5370
5371 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5372 && size < UNITS_PER_WORD)
5373 {
5374 adjust = UNITS_PER_WORD - size;
5375 }
5376 }
5377
5378 /* Get a local temporary for the field value. */
5379 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5380
5381 /* Emit code to branch if off >= 0. */
5382 t = build2 (GE_EXPR, boolean_type_node, off,
5383 build_int_cst (TREE_TYPE (off), 0));
5384 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5385
5386 if (dw_align)
5387 {
5388 /* Emit: offs = (offs + 15) & -16. */
5389 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5390 build_int_cst (TREE_TYPE (off), 15));
5391 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5392 build_int_cst (TREE_TYPE (off), -16));
5393 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5394 }
5395 else
5396 roundup = NULL;
5397
5398 /* Update ap.__[g|v]r_offs */
5399 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5400 build_int_cst (TREE_TYPE (off), rsize));
5401 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5402
5403 /* String up. */
5404 if (roundup)
5405 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5406
5407 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5408 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5409 build_int_cst (TREE_TYPE (f_off), 0));
5410 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5411
5412 /* String up: make sure the assignment happens before the use. */
5413 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5414 COND_EXPR_ELSE (cond1) = t;
5415
5416 /* Prepare the trees handling the argument that is passed on the stack;
5417 the top level node will store in ON_STACK. */
5418 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5419 if (align > 8)
5420 {
5421 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5422 t = fold_convert (intDI_type_node, arg);
5423 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5424 build_int_cst (TREE_TYPE (t), 15));
5425 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5426 build_int_cst (TREE_TYPE (t), -16));
5427 t = fold_convert (TREE_TYPE (arg), t);
5428 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5429 }
5430 else
5431 roundup = NULL;
5432 /* Advance ap.__stack */
5433 t = fold_convert (intDI_type_node, arg);
5434 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5435 build_int_cst (TREE_TYPE (t), size + 7));
5436 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5437 build_int_cst (TREE_TYPE (t), -8));
5438 t = fold_convert (TREE_TYPE (arg), t);
5439 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5440 /* String up roundup and advance. */
5441 if (roundup)
5442 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5443 /* String up with arg */
5444 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5445 /* Big-endianness related address adjustment. */
5446 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5447 && size < UNITS_PER_WORD)
5448 {
5449 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5450 size_int (UNITS_PER_WORD - size));
5451 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5452 }
5453
5454 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5455 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5456
5457 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5458 t = off;
5459 if (adjust)
5460 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5461 build_int_cst (TREE_TYPE (off), adjust));
5462
5463 t = fold_convert (sizetype, t);
5464 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5465
5466 if (is_ha)
5467 {
5468 /* type ha; // treat as "struct {ftype field[n];}"
5469 ... [computing offs]
5470 for (i = 0; i <nregs; ++i, offs += 16)
5471 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5472 return ha; */
5473 int i;
5474 tree tmp_ha, field_t, field_ptr_t;
5475
5476 /* Declare a local variable. */
5477 tmp_ha = create_tmp_var_raw (type, "ha");
5478 gimple_add_tmp_var (tmp_ha);
5479
5480 /* Establish the base type. */
5481 switch (ag_mode)
5482 {
5483 case SFmode:
5484 field_t = float_type_node;
5485 field_ptr_t = float_ptr_type_node;
5486 break;
5487 case DFmode:
5488 field_t = double_type_node;
5489 field_ptr_t = double_ptr_type_node;
5490 break;
5491 case TFmode:
5492 field_t = long_double_type_node;
5493 field_ptr_t = long_double_ptr_type_node;
5494 break;
5495 /* The half precision and quad precision are not fully supported yet. Enable
5496 the following code after the support is complete. Need to find the correct
5497 type node for __fp16 *. */
5498 #if 0
5499 case HFmode:
5500 field_t = float_type_node;
5501 field_ptr_t = float_ptr_type_node;
5502 break;
5503 #endif
5504 case V2SImode:
5505 case V4SImode:
5506 {
5507 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5508 field_t = build_vector_type_for_mode (innertype, ag_mode);
5509 field_ptr_t = build_pointer_type (field_t);
5510 }
5511 break;
5512 default:
5513 gcc_assert (0);
5514 }
5515
5516 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5517 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5518 addr = t;
5519 t = fold_convert (field_ptr_t, addr);
5520 t = build2 (MODIFY_EXPR, field_t,
5521 build1 (INDIRECT_REF, field_t, tmp_ha),
5522 build1 (INDIRECT_REF, field_t, t));
5523
5524 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5525 for (i = 1; i < nregs; ++i)
5526 {
5527 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5528 u = fold_convert (field_ptr_t, addr);
5529 u = build2 (MODIFY_EXPR, field_t,
5530 build2 (MEM_REF, field_t, tmp_ha,
5531 build_int_cst (field_ptr_t,
5532 (i *
5533 int_size_in_bytes (field_t)))),
5534 build1 (INDIRECT_REF, field_t, u));
5535 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5536 }
5537
5538 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5539 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5540 }
5541
5542 COND_EXPR_ELSE (cond2) = t;
5543 addr = fold_convert (build_pointer_type (type), cond1);
5544 addr = build_va_arg_indirect_ref (addr);
5545
5546 if (indirect_p)
5547 addr = build_va_arg_indirect_ref (addr);
5548
5549 return addr;
5550 }
5551
5552 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5553
5554 static void
aarch64_setup_incoming_varargs(cumulative_args_t cum_v,enum machine_mode mode,tree type,int * pretend_size ATTRIBUTE_UNUSED,int no_rtl)5555 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5556 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5557 int no_rtl)
5558 {
5559 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5560 CUMULATIVE_ARGS local_cum;
5561 int gr_saved, vr_saved;
5562
5563 /* The caller has advanced CUM up to, but not beyond, the last named
5564 argument. Advance a local copy of CUM past the last "real" named
5565 argument, to find out how many registers are left over. */
5566 local_cum = *cum;
5567 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5568
5569 /* Found out how many registers we need to save. */
5570 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5571 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5572
5573 if (TARGET_GENERAL_REGS_ONLY)
5574 {
5575 if (local_cum.aapcs_nvrn > 0)
5576 sorry ("%qs and floating point or vector arguments",
5577 "-mgeneral-regs-only");
5578 vr_saved = 0;
5579 }
5580
5581 if (!no_rtl)
5582 {
5583 if (gr_saved > 0)
5584 {
5585 rtx ptr, mem;
5586
5587 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5588 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5589 - gr_saved * UNITS_PER_WORD);
5590 mem = gen_frame_mem (BLKmode, ptr);
5591 set_mem_alias_set (mem, get_varargs_alias_set ());
5592
5593 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5594 mem, gr_saved);
5595 }
5596 if (vr_saved > 0)
5597 {
5598 /* We can't use move_block_from_reg, because it will use
5599 the wrong mode, storing D regs only. */
5600 enum machine_mode mode = TImode;
5601 int off, i;
5602
5603 /* Set OFF to the offset from virtual_incoming_args_rtx of
5604 the first vector register. The VR save area lies below
5605 the GR one, and is aligned to 16 bytes. */
5606 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5607 STACK_BOUNDARY / BITS_PER_UNIT);
5608 off -= vr_saved * UNITS_PER_VREG;
5609
5610 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5611 {
5612 rtx ptr, mem;
5613
5614 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5615 mem = gen_frame_mem (mode, ptr);
5616 set_mem_alias_set (mem, get_varargs_alias_set ());
5617 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5618 off += UNITS_PER_VREG;
5619 }
5620 }
5621 }
5622
5623 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5624 any complication of having crtl->args.pretend_args_size changed. */
5625 cfun->machine->saved_varargs_size
5626 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5627 STACK_BOUNDARY / BITS_PER_UNIT)
5628 + vr_saved * UNITS_PER_VREG);
5629 }
5630
5631 static void
aarch64_conditional_register_usage(void)5632 aarch64_conditional_register_usage (void)
5633 {
5634 int i;
5635 if (!TARGET_FLOAT)
5636 {
5637 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5638 {
5639 fixed_regs[i] = 1;
5640 call_used_regs[i] = 1;
5641 }
5642 }
5643 }
5644
5645 /* Walk down the type tree of TYPE counting consecutive base elements.
5646 If *MODEP is VOIDmode, then set it to the first valid floating point
5647 type. If a non-floating point type is found, or if a floating point
5648 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5649 otherwise return the count in the sub-tree. */
5650 static int
aapcs_vfp_sub_candidate(const_tree type,enum machine_mode * modep)5651 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5652 {
5653 enum machine_mode mode;
5654 HOST_WIDE_INT size;
5655
5656 switch (TREE_CODE (type))
5657 {
5658 case REAL_TYPE:
5659 mode = TYPE_MODE (type);
5660 if (mode != DFmode && mode != SFmode && mode != TFmode)
5661 return -1;
5662
5663 if (*modep == VOIDmode)
5664 *modep = mode;
5665
5666 if (*modep == mode)
5667 return 1;
5668
5669 break;
5670
5671 case COMPLEX_TYPE:
5672 mode = TYPE_MODE (TREE_TYPE (type));
5673 if (mode != DFmode && mode != SFmode && mode != TFmode)
5674 return -1;
5675
5676 if (*modep == VOIDmode)
5677 *modep = mode;
5678
5679 if (*modep == mode)
5680 return 2;
5681
5682 break;
5683
5684 case VECTOR_TYPE:
5685 /* Use V2SImode and V4SImode as representatives of all 64-bit
5686 and 128-bit vector types. */
5687 size = int_size_in_bytes (type);
5688 switch (size)
5689 {
5690 case 8:
5691 mode = V2SImode;
5692 break;
5693 case 16:
5694 mode = V4SImode;
5695 break;
5696 default:
5697 return -1;
5698 }
5699
5700 if (*modep == VOIDmode)
5701 *modep = mode;
5702
5703 /* Vector modes are considered to be opaque: two vectors are
5704 equivalent for the purposes of being homogeneous aggregates
5705 if they are the same size. */
5706 if (*modep == mode)
5707 return 1;
5708
5709 break;
5710
5711 case ARRAY_TYPE:
5712 {
5713 int count;
5714 tree index = TYPE_DOMAIN (type);
5715
5716 /* Can't handle incomplete types. */
5717 if (!COMPLETE_TYPE_P (type))
5718 return -1;
5719
5720 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5721 if (count == -1
5722 || !index
5723 || !TYPE_MAX_VALUE (index)
5724 || !host_integerp (TYPE_MAX_VALUE (index), 1)
5725 || !TYPE_MIN_VALUE (index)
5726 || !host_integerp (TYPE_MIN_VALUE (index), 1)
5727 || count < 0)
5728 return -1;
5729
5730 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5731 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5732
5733 /* There must be no padding. */
5734 if (!host_integerp (TYPE_SIZE (type), 1)
5735 || (tree_low_cst (TYPE_SIZE (type), 1)
5736 != count * GET_MODE_BITSIZE (*modep)))
5737 return -1;
5738
5739 return count;
5740 }
5741
5742 case RECORD_TYPE:
5743 {
5744 int count = 0;
5745 int sub_count;
5746 tree field;
5747
5748 /* Can't handle incomplete types. */
5749 if (!COMPLETE_TYPE_P (type))
5750 return -1;
5751
5752 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5753 {
5754 if (TREE_CODE (field) != FIELD_DECL)
5755 continue;
5756
5757 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5758 if (sub_count < 0)
5759 return -1;
5760 count += sub_count;
5761 }
5762
5763 /* There must be no padding. */
5764 if (!host_integerp (TYPE_SIZE (type), 1)
5765 || (tree_low_cst (TYPE_SIZE (type), 1)
5766 != count * GET_MODE_BITSIZE (*modep)))
5767 return -1;
5768
5769 return count;
5770 }
5771
5772 case UNION_TYPE:
5773 case QUAL_UNION_TYPE:
5774 {
5775 /* These aren't very interesting except in a degenerate case. */
5776 int count = 0;
5777 int sub_count;
5778 tree field;
5779
5780 /* Can't handle incomplete types. */
5781 if (!COMPLETE_TYPE_P (type))
5782 return -1;
5783
5784 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5785 {
5786 if (TREE_CODE (field) != FIELD_DECL)
5787 continue;
5788
5789 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5790 if (sub_count < 0)
5791 return -1;
5792 count = count > sub_count ? count : sub_count;
5793 }
5794
5795 /* There must be no padding. */
5796 if (!host_integerp (TYPE_SIZE (type), 1)
5797 || (tree_low_cst (TYPE_SIZE (type), 1)
5798 != count * GET_MODE_BITSIZE (*modep)))
5799 return -1;
5800
5801 return count;
5802 }
5803
5804 default:
5805 break;
5806 }
5807
5808 return -1;
5809 }
5810
5811 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
5812 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
5813 array types. The C99 floating-point complex types are also considered
5814 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
5815 types, which are GCC extensions and out of the scope of AAPCS64, are
5816 treated as composite types here as well.
5817
5818 Note that MODE itself is not sufficient in determining whether a type
5819 is such a composite type or not. This is because
5820 stor-layout.c:compute_record_mode may have already changed the MODE
5821 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
5822 structure with only one field may have its MODE set to the mode of the
5823 field. Also an integer mode whose size matches the size of the
5824 RECORD_TYPE type may be used to substitute the original mode
5825 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
5826 solely relied on. */
5827
5828 static bool
aarch64_composite_type_p(const_tree type,enum machine_mode mode)5829 aarch64_composite_type_p (const_tree type,
5830 enum machine_mode mode)
5831 {
5832 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5833 return true;
5834
5835 if (mode == BLKmode
5836 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5837 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5838 return true;
5839
5840 return false;
5841 }
5842
5843 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5844 type as described in AAPCS64 \S 4.1.2.
5845
5846 See the comment above aarch64_composite_type_p for the notes on MODE. */
5847
5848 static bool
aarch64_short_vector_p(const_tree type,enum machine_mode mode)5849 aarch64_short_vector_p (const_tree type,
5850 enum machine_mode mode)
5851 {
5852 HOST_WIDE_INT size = -1;
5853
5854 if (type && TREE_CODE (type) == VECTOR_TYPE)
5855 size = int_size_in_bytes (type);
5856 else if (!aarch64_composite_type_p (type, mode)
5857 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5858 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5859 size = GET_MODE_SIZE (mode);
5860
5861 return (size == 8 || size == 16) ? true : false;
5862 }
5863
5864 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
5865 shall be passed or returned in simd/fp register(s) (providing these
5866 parameter passing registers are available).
5867
5868 Upon successful return, *COUNT returns the number of needed registers,
5869 *BASE_MODE returns the mode of the individual register and when IS_HAF
5870 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5871 floating-point aggregate or a homogeneous short-vector aggregate. */
5872
5873 static bool
aarch64_vfp_is_call_or_return_candidate(enum machine_mode mode,const_tree type,enum machine_mode * base_mode,int * count,bool * is_ha)5874 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5875 const_tree type,
5876 enum machine_mode *base_mode,
5877 int *count,
5878 bool *is_ha)
5879 {
5880 enum machine_mode new_mode = VOIDmode;
5881 bool composite_p = aarch64_composite_type_p (type, mode);
5882
5883 if (is_ha != NULL) *is_ha = false;
5884
5885 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5886 || aarch64_short_vector_p (type, mode))
5887 {
5888 *count = 1;
5889 new_mode = mode;
5890 }
5891 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5892 {
5893 if (is_ha != NULL) *is_ha = true;
5894 *count = 2;
5895 new_mode = GET_MODE_INNER (mode);
5896 }
5897 else if (type && composite_p)
5898 {
5899 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5900
5901 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5902 {
5903 if (is_ha != NULL) *is_ha = true;
5904 *count = ag_count;
5905 }
5906 else
5907 return false;
5908 }
5909 else
5910 return false;
5911
5912 *base_mode = new_mode;
5913 return true;
5914 }
5915
5916 /* Implement TARGET_STRUCT_VALUE_RTX. */
5917
5918 static rtx
aarch64_struct_value_rtx(tree fndecl ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)5919 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
5920 int incoming ATTRIBUTE_UNUSED)
5921 {
5922 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
5923 }
5924
5925 /* Implements target hook vector_mode_supported_p. */
5926 static bool
aarch64_vector_mode_supported_p(enum machine_mode mode)5927 aarch64_vector_mode_supported_p (enum machine_mode mode)
5928 {
5929 if (TARGET_SIMD
5930 && (mode == V4SImode || mode == V8HImode
5931 || mode == V16QImode || mode == V2DImode
5932 || mode == V2SImode || mode == V4HImode
5933 || mode == V8QImode || mode == V2SFmode
5934 || mode == V4SFmode || mode == V2DFmode))
5935 return true;
5936
5937 return false;
5938 }
5939
5940 /* Return quad mode as the preferred SIMD mode. */
5941 static enum machine_mode
aarch64_preferred_simd_mode(enum machine_mode mode)5942 aarch64_preferred_simd_mode (enum machine_mode mode)
5943 {
5944 if (TARGET_SIMD)
5945 switch (mode)
5946 {
5947 case DFmode:
5948 return V2DFmode;
5949 case SFmode:
5950 return V4SFmode;
5951 case SImode:
5952 return V4SImode;
5953 case HImode:
5954 return V8HImode;
5955 case QImode:
5956 return V16QImode;
5957 case DImode:
5958 return V2DImode;
5959 break;
5960
5961 default:;
5962 }
5963 return word_mode;
5964 }
5965
5966 /* Return the bitmask of possible vector sizes for the vectorizer
5967 to iterate over. */
5968 static unsigned int
aarch64_autovectorize_vector_sizes(void)5969 aarch64_autovectorize_vector_sizes (void)
5970 {
5971 return (16 | 8);
5972 }
5973
5974 /* A table to help perform AArch64-specific name mangling for AdvSIMD
5975 vector types in order to conform to the AAPCS64 (see "Procedure
5976 Call Standard for the ARM 64-bit Architecture", Appendix A). To
5977 qualify for emission with the mangled names defined in that document,
5978 a vector type must not only be of the correct mode but also be
5979 composed of AdvSIMD vector element types (e.g.
5980 _builtin_aarch64_simd_qi); these types are registered by
5981 aarch64_init_simd_builtins (). In other words, vector types defined
5982 in other ways e.g. via vector_size attribute will get default
5983 mangled names. */
5984 typedef struct
5985 {
5986 enum machine_mode mode;
5987 const char *element_type_name;
5988 const char *mangled_name;
5989 } aarch64_simd_mangle_map_entry;
5990
5991 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
5992 /* 64-bit containerized types. */
5993 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
5994 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
5995 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
5996 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
5997 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
5998 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
5999 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6000 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6001 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6002 /* 128-bit containerized types. */
6003 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6004 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6005 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6006 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6007 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6008 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6009 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6010 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6011 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6012 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6013 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6014 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6015 { VOIDmode, NULL, NULL }
6016 };
6017
6018 /* Implement TARGET_MANGLE_TYPE. */
6019
6020 static const char *
aarch64_mangle_type(const_tree type)6021 aarch64_mangle_type (const_tree type)
6022 {
6023 /* The AArch64 ABI documents say that "__va_list" has to be
6024 managled as if it is in the "std" namespace. */
6025 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6026 return "St9__va_list";
6027
6028 /* Check the mode of the vector type, and the name of the vector
6029 element type, against the table. */
6030 if (TREE_CODE (type) == VECTOR_TYPE)
6031 {
6032 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6033
6034 while (pos->mode != VOIDmode)
6035 {
6036 tree elt_type = TREE_TYPE (type);
6037
6038 if (pos->mode == TYPE_MODE (type)
6039 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6040 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6041 pos->element_type_name))
6042 return pos->mangled_name;
6043
6044 pos++;
6045 }
6046 }
6047
6048 /* Use the default mangling. */
6049 return NULL;
6050 }
6051
6052
6053 /* Return true iff X is a MEM rtx. */
6054
6055 static int
is_mem_p(rtx * x,void * data ATTRIBUTE_UNUSED)6056 is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED)
6057 {
6058 return MEM_P (*x);
6059 }
6060
6061
6062 /* Return true if mem_insn contains a MEM RTX somewhere in it. */
6063
6064 static bool
has_memory_op(rtx mem_insn)6065 has_memory_op (rtx mem_insn)
6066 {
6067 rtx pattern = PATTERN (mem_insn);
6068 return for_each_rtx (&pattern, is_mem_p, NULL);
6069 }
6070
6071
6072 /* Find the first rtx before insn that will generate an assembly
6073 instruction. */
6074
6075 static rtx
aarch64_prev_real_insn(rtx insn)6076 aarch64_prev_real_insn (rtx insn)
6077 {
6078 if (!insn)
6079 return NULL;
6080
6081 do
6082 {
6083 insn = prev_real_insn (insn);
6084 }
6085 while (insn && recog_memoized (insn) < 0);
6086
6087 return insn;
6088 }
6089
6090 /* Return true iff t1 is the v8type of a multiply-accumulate instruction. */
6091
6092 static bool
is_madd_op(enum attr_v8type t1)6093 is_madd_op (enum attr_v8type t1)
6094 {
6095 return t1 == V8TYPE_MADD
6096 || t1 == V8TYPE_MADDL;
6097 }
6098
6099
6100 /* Check if there is a register dependency between a load and the insn
6101 for which we hold recog_data. */
6102
6103 static bool
dep_between_memop_and_curr(rtx memop)6104 dep_between_memop_and_curr (rtx memop)
6105 {
6106 rtx load_reg;
6107 int opno;
6108
6109 gcc_assert (GET_CODE (memop) == SET);
6110
6111 if (!REG_P (SET_DEST (memop)))
6112 return false;
6113
6114 load_reg = SET_DEST (memop);
6115 for (opno = 1; opno < recog_data.n_operands; opno++)
6116 {
6117 rtx operand = recog_data.operand[opno];
6118 if (REG_P (operand)
6119 && reg_overlap_mentioned_p (load_reg, operand))
6120 return true;
6121
6122 }
6123 return false;
6124 }
6125
6126
6127
6128 /* When working around the Cortex-A53 erratum 835769,
6129 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
6130 instruction and has a preceding memory instruction such that a NOP
6131 should be inserted between them. */
6132
6133 bool
aarch64_madd_needs_nop(rtx insn)6134 aarch64_madd_needs_nop (rtx insn)
6135 {
6136 enum attr_v8type attr_type;
6137 rtx prev;
6138 rtx body;
6139
6140 if (!aarch64_fix_a53_err835769)
6141 return false;
6142
6143 if (recog_memoized (insn) < 0)
6144 return false;
6145
6146 attr_type = get_attr_v8type (insn);
6147 if (!is_madd_op (attr_type))
6148 return false;
6149
6150 prev = aarch64_prev_real_insn (insn);
6151 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
6152 Restore recog state to INSN to avoid state corruption. */
6153 extract_constrain_insn_cached (insn);
6154
6155 if (!prev || !has_memory_op (prev))
6156 return false;
6157
6158 body = single_set (prev);
6159
6160 /* If the previous insn is a memory op and there is no dependency between
6161 it and the madd, emit a nop between them. If we know it's a memop but
6162 body is NULL, return true to be safe. */
6163 if (GET_MODE (recog_data.operand[0]) == DImode
6164 && (!body || !dep_between_memop_and_curr (body)))
6165 return true;
6166
6167 return false;
6168
6169 }
6170
6171 /* Implement FINAL_PRESCAN_INSN. */
6172
6173 void
aarch64_final_prescan_insn(rtx insn)6174 aarch64_final_prescan_insn (rtx insn)
6175 {
6176 if (aarch64_madd_needs_nop (insn))
6177 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
6178 }
6179
6180
6181 /* Return the equivalent letter for size. */
6182 static unsigned char
sizetochar(int size)6183 sizetochar (int size)
6184 {
6185 switch (size)
6186 {
6187 case 64: return 'd';
6188 case 32: return 's';
6189 case 16: return 'h';
6190 case 8 : return 'b';
6191 default: gcc_unreachable ();
6192 }
6193 }
6194
6195 /* Return true iff x is a uniform vector of floating-point
6196 constants, and the constant can be represented in
6197 quarter-precision form. Note, as aarch64_float_const_representable
6198 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6199 static bool
aarch64_vect_float_const_representable_p(rtx x)6200 aarch64_vect_float_const_representable_p (rtx x)
6201 {
6202 int i = 0;
6203 REAL_VALUE_TYPE r0, ri;
6204 rtx x0, xi;
6205
6206 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6207 return false;
6208
6209 x0 = CONST_VECTOR_ELT (x, 0);
6210 if (!CONST_DOUBLE_P (x0))
6211 return false;
6212
6213 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6214
6215 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6216 {
6217 xi = CONST_VECTOR_ELT (x, i);
6218 if (!CONST_DOUBLE_P (xi))
6219 return false;
6220
6221 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6222 if (!REAL_VALUES_EQUAL (r0, ri))
6223 return false;
6224 }
6225
6226 return aarch64_float_const_representable_p (x0);
6227 }
6228
6229 /* TODO: This function returns values similar to those
6230 returned by neon_valid_immediate in gcc/config/arm/arm.c
6231 but the API here is different enough that these magic numbers
6232 are not used. It should be sufficient to return true or false. */
6233 static int
aarch64_simd_valid_immediate(rtx op,enum machine_mode mode,int inverse,rtx * modconst,int * elementwidth,unsigned char * elementchar,int * mvn,int * shift)6234 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6235 rtx *modconst, int *elementwidth,
6236 unsigned char *elementchar,
6237 int *mvn, int *shift)
6238 {
6239 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6240 matches = 1; \
6241 for (i = 0; i < idx; i += (STRIDE)) \
6242 if (!(TEST)) \
6243 matches = 0; \
6244 if (matches) \
6245 { \
6246 immtype = (CLASS); \
6247 elsize = (ELSIZE); \
6248 elchar = sizetochar (elsize); \
6249 eshift = (SHIFT); \
6250 emvn = (NEG); \
6251 break; \
6252 }
6253
6254 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6255 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6256 unsigned char bytes[16];
6257 unsigned char elchar = 0;
6258 int immtype = -1, matches;
6259 unsigned int invmask = inverse ? 0xff : 0;
6260 int eshift, emvn;
6261
6262 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6263 {
6264 bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
6265 int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
6266
6267 if (!(simd_imm_zero
6268 || aarch64_vect_float_const_representable_p (op)))
6269 return -1;
6270
6271 if (modconst)
6272 *modconst = CONST_VECTOR_ELT (op, 0);
6273
6274 if (elementwidth)
6275 *elementwidth = elem_width;
6276
6277 if (elementchar)
6278 *elementchar = sizetochar (elem_width);
6279
6280 if (shift)
6281 *shift = 0;
6282
6283 if (simd_imm_zero)
6284 return 19;
6285 else
6286 return 18;
6287 }
6288
6289 /* Splat vector constant out into a byte vector. */
6290 for (i = 0; i < n_elts; i++)
6291 {
6292 rtx el = CONST_VECTOR_ELT (op, i);
6293 unsigned HOST_WIDE_INT elpart;
6294 unsigned int part, parts;
6295
6296 if (GET_CODE (el) == CONST_INT)
6297 {
6298 elpart = INTVAL (el);
6299 parts = 1;
6300 }
6301 else if (GET_CODE (el) == CONST_DOUBLE)
6302 {
6303 elpart = CONST_DOUBLE_LOW (el);
6304 parts = 2;
6305 }
6306 else
6307 gcc_unreachable ();
6308
6309 for (part = 0; part < parts; part++)
6310 {
6311 unsigned int byte;
6312 for (byte = 0; byte < innersize; byte++)
6313 {
6314 bytes[idx++] = (elpart & 0xff) ^ invmask;
6315 elpart >>= BITS_PER_UNIT;
6316 }
6317 if (GET_CODE (el) == CONST_DOUBLE)
6318 elpart = CONST_DOUBLE_HIGH (el);
6319 }
6320 }
6321
6322 /* Sanity check. */
6323 gcc_assert (idx == GET_MODE_SIZE (mode));
6324
6325 do
6326 {
6327 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6328 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6329
6330 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6331 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6332
6333 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6334 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6335
6336 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6337 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6338
6339 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6340
6341 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6342
6343 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6344 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6345
6346 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6347 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6348
6349 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6350 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6351
6352 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6353 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6354
6355 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6356
6357 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6358
6359 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6360 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6361
6362 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6363 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6364
6365 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6366 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6367
6368 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6369 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6370
6371 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6372
6373 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6374 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6375 }
6376 while (0);
6377
6378 /* TODO: Currently the assembler cannot handle types 12 to 15.
6379 And there is no way to specify cmode through the compiler.
6380 Disable them till there is support in the assembler. */
6381 if (immtype == -1
6382 || (immtype >= 12 && immtype <= 15)
6383 || immtype == 18)
6384 return -1;
6385
6386
6387 if (elementwidth)
6388 *elementwidth = elsize;
6389
6390 if (elementchar)
6391 *elementchar = elchar;
6392
6393 if (mvn)
6394 *mvn = emvn;
6395
6396 if (shift)
6397 *shift = eshift;
6398
6399 if (modconst)
6400 {
6401 unsigned HOST_WIDE_INT imm = 0;
6402
6403 /* Un-invert bytes of recognized vector, if necessary. */
6404 if (invmask != 0)
6405 for (i = 0; i < idx; i++)
6406 bytes[i] ^= invmask;
6407
6408 if (immtype == 17)
6409 {
6410 /* FIXME: Broken on 32-bit H_W_I hosts. */
6411 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6412
6413 for (i = 0; i < 8; i++)
6414 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6415 << (i * BITS_PER_UNIT);
6416
6417 *modconst = GEN_INT (imm);
6418 }
6419 else
6420 {
6421 unsigned HOST_WIDE_INT imm = 0;
6422
6423 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6424 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6425
6426 /* Construct 'abcdefgh' because the assembler cannot handle
6427 generic constants. */
6428 gcc_assert (shift != NULL && mvn != NULL);
6429 if (*mvn)
6430 imm = ~imm;
6431 imm = (imm >> *shift) & 0xff;
6432 *modconst = GEN_INT (imm);
6433 }
6434 }
6435
6436 return immtype;
6437 #undef CHECK
6438 }
6439
6440 /* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
6441 (or, implicitly, MVNI) immediate. Write back width per element
6442 to *ELEMENTWIDTH, and a modified constant (whatever should be output
6443 for a MOVI instruction) in *MODCONST. */
6444 int
aarch64_simd_immediate_valid_for_move(rtx op,enum machine_mode mode,rtx * modconst,int * elementwidth,unsigned char * elementchar,int * mvn,int * shift)6445 aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
6446 rtx *modconst, int *elementwidth,
6447 unsigned char *elementchar,
6448 int *mvn, int *shift)
6449 {
6450 rtx tmpconst;
6451 int tmpwidth;
6452 unsigned char tmpwidthc;
6453 int tmpmvn = 0, tmpshift = 0;
6454 int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
6455 &tmpwidth, &tmpwidthc,
6456 &tmpmvn, &tmpshift);
6457
6458 if (retval == -1)
6459 return 0;
6460
6461 if (modconst)
6462 *modconst = tmpconst;
6463
6464 if (elementwidth)
6465 *elementwidth = tmpwidth;
6466
6467 if (elementchar)
6468 *elementchar = tmpwidthc;
6469
6470 if (mvn)
6471 *mvn = tmpmvn;
6472
6473 if (shift)
6474 *shift = tmpshift;
6475
6476 return 1;
6477 }
6478
6479 static bool
aarch64_const_vec_all_same_int_p(rtx x,HOST_WIDE_INT minval,HOST_WIDE_INT maxval)6480 aarch64_const_vec_all_same_int_p (rtx x,
6481 HOST_WIDE_INT minval,
6482 HOST_WIDE_INT maxval)
6483 {
6484 HOST_WIDE_INT firstval;
6485 int count, i;
6486
6487 if (GET_CODE (x) != CONST_VECTOR
6488 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6489 return false;
6490
6491 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6492 if (firstval < minval || firstval > maxval)
6493 return false;
6494
6495 count = CONST_VECTOR_NUNITS (x);
6496 for (i = 1; i < count; i++)
6497 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6498 return false;
6499
6500 return true;
6501 }
6502
6503 /* Check of immediate shift constants are within range. */
6504 bool
aarch64_simd_shift_imm_p(rtx x,enum machine_mode mode,bool left)6505 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6506 {
6507 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6508 if (left)
6509 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6510 else
6511 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6512 }
6513
6514 /* Return true if X is a uniform vector where all elements
6515 are either the floating-point constant 0.0 or the
6516 integer constant 0. */
6517 bool
aarch64_simd_imm_zero_p(rtx x,enum machine_mode mode)6518 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6519 {
6520 return x == CONST0_RTX (mode);
6521 }
6522
6523 bool
aarch64_simd_imm_scalar_p(rtx x,enum machine_mode mode ATTRIBUTE_UNUSED)6524 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6525 {
6526 HOST_WIDE_INT imm = INTVAL (x);
6527 int i;
6528
6529 for (i = 0; i < 8; i++)
6530 {
6531 unsigned int byte = imm & 0xff;
6532 if (byte != 0xff && byte != 0)
6533 return false;
6534 imm >>= 8;
6535 }
6536
6537 return true;
6538 }
6539
6540 /* Return a const_int vector of VAL. */
6541 rtx
aarch64_simd_gen_const_vector_dup(enum machine_mode mode,int val)6542 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6543 {
6544 int nunits = GET_MODE_NUNITS (mode);
6545 rtvec v = rtvec_alloc (nunits);
6546 int i;
6547
6548 for (i=0; i < nunits; i++)
6549 RTVEC_ELT (v, i) = GEN_INT (val);
6550
6551 return gen_rtx_CONST_VECTOR (mode, v);
6552 }
6553
6554 /* Construct and return a PARALLEL RTX vector. */
6555 rtx
aarch64_simd_vect_par_cnst_half(enum machine_mode mode,bool high)6556 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6557 {
6558 int nunits = GET_MODE_NUNITS (mode);
6559 rtvec v = rtvec_alloc (nunits / 2);
6560 int base = high ? nunits / 2 : 0;
6561 rtx t1;
6562 int i;
6563
6564 for (i=0; i < nunits / 2; i++)
6565 RTVEC_ELT (v, i) = GEN_INT (base + i);
6566
6567 t1 = gen_rtx_PARALLEL (mode, v);
6568 return t1;
6569 }
6570
6571 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6572 HIGH (exclusive). */
6573 void
aarch64_simd_lane_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high)6574 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6575 {
6576 HOST_WIDE_INT lane;
6577 gcc_assert (GET_CODE (operand) == CONST_INT);
6578 lane = INTVAL (operand);
6579
6580 if (lane < low || lane >= high)
6581 error ("lane out of range");
6582 }
6583
6584 void
aarch64_simd_const_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high)6585 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6586 {
6587 gcc_assert (GET_CODE (operand) == CONST_INT);
6588 HOST_WIDE_INT lane = INTVAL (operand);
6589
6590 if (lane < low || lane >= high)
6591 error ("constant out of range");
6592 }
6593
6594 /* Emit code to reinterpret one AdvSIMD type as another,
6595 without altering bits. */
6596 void
aarch64_simd_reinterpret(rtx dest,rtx src)6597 aarch64_simd_reinterpret (rtx dest, rtx src)
6598 {
6599 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6600 }
6601
6602 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6603 registers). */
6604 void
aarch64_simd_emit_pair_result_insn(enum machine_mode mode,rtx (* intfn)(rtx,rtx,rtx),rtx destaddr,rtx op1)6605 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6606 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6607 rtx op1)
6608 {
6609 rtx mem = gen_rtx_MEM (mode, destaddr);
6610 rtx tmp1 = gen_reg_rtx (mode);
6611 rtx tmp2 = gen_reg_rtx (mode);
6612
6613 emit_insn (intfn (tmp1, op1, tmp2));
6614
6615 emit_move_insn (mem, tmp1);
6616 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6617 emit_move_insn (mem, tmp2);
6618 }
6619
6620 /* Return TRUE if OP is a valid vector addressing mode. */
6621 bool
aarch64_simd_mem_operand_p(rtx op)6622 aarch64_simd_mem_operand_p (rtx op)
6623 {
6624 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6625 || GET_CODE (XEXP (op, 0)) == REG);
6626 }
6627
6628 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6629 not to early-clobber SRC registers in the process.
6630
6631 We assume that the operands described by SRC and DEST represent a
6632 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6633 number of components into which the copy has been decomposed. */
6634 void
aarch64_simd_disambiguate_copy(rtx * operands,rtx * dest,rtx * src,unsigned int count)6635 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6636 rtx *src, unsigned int count)
6637 {
6638 unsigned int i;
6639
6640 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6641 || REGNO (operands[0]) < REGNO (operands[1]))
6642 {
6643 for (i = 0; i < count; i++)
6644 {
6645 operands[2 * i] = dest[i];
6646 operands[2 * i + 1] = src[i];
6647 }
6648 }
6649 else
6650 {
6651 for (i = 0; i < count; i++)
6652 {
6653 operands[2 * i] = dest[count - i - 1];
6654 operands[2 * i + 1] = src[count - i - 1];
6655 }
6656 }
6657 }
6658
6659 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6660 one of VSTRUCT modes: OI, CI or XI. */
6661 int
aarch64_simd_attr_length_move(rtx insn)6662 aarch64_simd_attr_length_move (rtx insn)
6663 {
6664 enum machine_mode mode;
6665
6666 extract_insn_cached (insn);
6667
6668 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6669 {
6670 mode = GET_MODE (recog_data.operand[0]);
6671 switch (mode)
6672 {
6673 case OImode:
6674 return 8;
6675 case CImode:
6676 return 12;
6677 case XImode:
6678 return 16;
6679 default:
6680 gcc_unreachable ();
6681 }
6682 }
6683 return 4;
6684 }
6685
6686 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6687 alignment of a vector to 128 bits. */
6688 static HOST_WIDE_INT
aarch64_simd_vector_alignment(const_tree type)6689 aarch64_simd_vector_alignment (const_tree type)
6690 {
6691 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6692 return MIN (align, 128);
6693 }
6694
6695 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6696 static bool
aarch64_simd_vector_alignment_reachable(const_tree type,bool is_packed)6697 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6698 {
6699 if (is_packed)
6700 return false;
6701
6702 /* We guarantee alignment for vectors up to 128-bits. */
6703 if (tree_int_cst_compare (TYPE_SIZE (type),
6704 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6705 return false;
6706
6707 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6708 return true;
6709 }
6710
6711 /* If VALS is a vector constant that can be loaded into a register
6712 using DUP, generate instructions to do so and return an RTX to
6713 assign to the register. Otherwise return NULL_RTX. */
6714 static rtx
aarch64_simd_dup_constant(rtx vals)6715 aarch64_simd_dup_constant (rtx vals)
6716 {
6717 enum machine_mode mode = GET_MODE (vals);
6718 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6719 int n_elts = GET_MODE_NUNITS (mode);
6720 bool all_same = true;
6721 rtx x;
6722 int i;
6723
6724 if (GET_CODE (vals) != CONST_VECTOR)
6725 return NULL_RTX;
6726
6727 for (i = 1; i < n_elts; ++i)
6728 {
6729 x = CONST_VECTOR_ELT (vals, i);
6730 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6731 all_same = false;
6732 }
6733
6734 if (!all_same)
6735 return NULL_RTX;
6736
6737 /* We can load this constant by using DUP and a constant in a
6738 single ARM register. This will be cheaper than a vector
6739 load. */
6740 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6741 return gen_rtx_VEC_DUPLICATE (mode, x);
6742 }
6743
6744
6745 /* Generate code to load VALS, which is a PARALLEL containing only
6746 constants (for vec_init) or CONST_VECTOR, efficiently into a
6747 register. Returns an RTX to copy into the register, or NULL_RTX
6748 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6749 static rtx
aarch64_simd_make_constant(rtx vals)6750 aarch64_simd_make_constant (rtx vals)
6751 {
6752 enum machine_mode mode = GET_MODE (vals);
6753 rtx const_dup;
6754 rtx const_vec = NULL_RTX;
6755 int n_elts = GET_MODE_NUNITS (mode);
6756 int n_const = 0;
6757 int i;
6758
6759 if (GET_CODE (vals) == CONST_VECTOR)
6760 const_vec = vals;
6761 else if (GET_CODE (vals) == PARALLEL)
6762 {
6763 /* A CONST_VECTOR must contain only CONST_INTs and
6764 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6765 Only store valid constants in a CONST_VECTOR. */
6766 for (i = 0; i < n_elts; ++i)
6767 {
6768 rtx x = XVECEXP (vals, 0, i);
6769 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6770 n_const++;
6771 }
6772 if (n_const == n_elts)
6773 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6774 }
6775 else
6776 gcc_unreachable ();
6777
6778 if (const_vec != NULL_RTX
6779 && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
6780 NULL, NULL, NULL))
6781 /* Load using MOVI/MVNI. */
6782 return const_vec;
6783 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6784 /* Loaded using DUP. */
6785 return const_dup;
6786 else if (const_vec != NULL_RTX)
6787 /* Load from constant pool. We can not take advantage of single-cycle
6788 LD1 because we need a PC-relative addressing mode. */
6789 return const_vec;
6790 else
6791 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6792 We can not construct an initializer. */
6793 return NULL_RTX;
6794 }
6795
6796 void
aarch64_expand_vector_init(rtx target,rtx vals)6797 aarch64_expand_vector_init (rtx target, rtx vals)
6798 {
6799 enum machine_mode mode = GET_MODE (target);
6800 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6801 int n_elts = GET_MODE_NUNITS (mode);
6802 int n_var = 0, one_var = -1;
6803 bool all_same = true;
6804 rtx x, mem;
6805 int i;
6806
6807 x = XVECEXP (vals, 0, 0);
6808 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6809 n_var = 1, one_var = 0;
6810
6811 for (i = 1; i < n_elts; ++i)
6812 {
6813 x = XVECEXP (vals, 0, i);
6814 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6815 ++n_var, one_var = i;
6816
6817 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6818 all_same = false;
6819 }
6820
6821 if (n_var == 0)
6822 {
6823 rtx constant = aarch64_simd_make_constant (vals);
6824 if (constant != NULL_RTX)
6825 {
6826 emit_move_insn (target, constant);
6827 return;
6828 }
6829 }
6830
6831 /* Splat a single non-constant element if we can. */
6832 if (all_same)
6833 {
6834 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6835 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6836 return;
6837 }
6838
6839 /* One field is non-constant. Load constant then overwrite varying
6840 field. This is more efficient than using the stack. */
6841 if (n_var == 1)
6842 {
6843 rtx copy = copy_rtx (vals);
6844 rtx index = GEN_INT (one_var);
6845 enum insn_code icode;
6846
6847 /* Load constant part of vector, substitute neighboring value for
6848 varying element. */
6849 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6850 aarch64_expand_vector_init (target, copy);
6851
6852 /* Insert variable. */
6853 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6854 icode = optab_handler (vec_set_optab, mode);
6855 gcc_assert (icode != CODE_FOR_nothing);
6856 emit_insn (GEN_FCN (icode) (target, x, index));
6857 return;
6858 }
6859
6860 /* Construct the vector in memory one field at a time
6861 and load the whole vector. */
6862 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6863 for (i = 0; i < n_elts; i++)
6864 emit_move_insn (adjust_address_nv (mem, inner_mode,
6865 i * GET_MODE_SIZE (inner_mode)),
6866 XVECEXP (vals, 0, i));
6867 emit_move_insn (target, mem);
6868
6869 }
6870
6871 static unsigned HOST_WIDE_INT
aarch64_shift_truncation_mask(enum machine_mode mode)6872 aarch64_shift_truncation_mask (enum machine_mode mode)
6873 {
6874 return
6875 (aarch64_vector_mode_supported_p (mode)
6876 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6877 }
6878
6879 #ifndef TLS_SECTION_ASM_FLAG
6880 #define TLS_SECTION_ASM_FLAG 'T'
6881 #endif
6882
6883 void
aarch64_elf_asm_named_section(const char * name,unsigned int flags,tree decl ATTRIBUTE_UNUSED)6884 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6885 tree decl ATTRIBUTE_UNUSED)
6886 {
6887 char flagchars[10], *f = flagchars;
6888
6889 /* If we have already declared this section, we can use an
6890 abbreviated form to switch back to it -- unless this section is
6891 part of a COMDAT groups, in which case GAS requires the full
6892 declaration every time. */
6893 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6894 && (flags & SECTION_DECLARED))
6895 {
6896 fprintf (asm_out_file, "\t.section\t%s\n", name);
6897 return;
6898 }
6899
6900 if (!(flags & SECTION_DEBUG))
6901 *f++ = 'a';
6902 if (flags & SECTION_WRITE)
6903 *f++ = 'w';
6904 if (flags & SECTION_CODE)
6905 *f++ = 'x';
6906 if (flags & SECTION_SMALL)
6907 *f++ = 's';
6908 if (flags & SECTION_MERGE)
6909 *f++ = 'M';
6910 if (flags & SECTION_STRINGS)
6911 *f++ = 'S';
6912 if (flags & SECTION_TLS)
6913 *f++ = TLS_SECTION_ASM_FLAG;
6914 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6915 *f++ = 'G';
6916 *f = '\0';
6917
6918 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6919
6920 if (!(flags & SECTION_NOTYPE))
6921 {
6922 const char *type;
6923 const char *format;
6924
6925 if (flags & SECTION_BSS)
6926 type = "nobits";
6927 else
6928 type = "progbits";
6929
6930 #ifdef TYPE_OPERAND_FMT
6931 format = "," TYPE_OPERAND_FMT;
6932 #else
6933 format = ",@%s";
6934 #endif
6935
6936 fprintf (asm_out_file, format, type);
6937
6938 if (flags & SECTION_ENTSIZE)
6939 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6940 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6941 {
6942 if (TREE_CODE (decl) == IDENTIFIER_NODE)
6943 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6944 else
6945 fprintf (asm_out_file, ",%s,comdat",
6946 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6947 }
6948 }
6949
6950 putc ('\n', asm_out_file);
6951 }
6952
6953 /* Select a format to encode pointers in exception handling data. */
6954 int
aarch64_asm_preferred_eh_data_format(int code ATTRIBUTE_UNUSED,int global)6955 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6956 {
6957 int type;
6958 switch (aarch64_cmodel)
6959 {
6960 case AARCH64_CMODEL_TINY:
6961 case AARCH64_CMODEL_TINY_PIC:
6962 case AARCH64_CMODEL_SMALL:
6963 case AARCH64_CMODEL_SMALL_PIC:
6964 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
6965 for everything. */
6966 type = DW_EH_PE_sdata4;
6967 break;
6968 default:
6969 /* No assumptions here. 8-byte relocs required. */
6970 type = DW_EH_PE_sdata8;
6971 break;
6972 }
6973 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6974 }
6975
6976 /* Emit load exclusive. */
6977
6978 static void
aarch64_emit_load_exclusive(enum machine_mode mode,rtx rval,rtx mem,rtx model_rtx)6979 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6980 rtx mem, rtx model_rtx)
6981 {
6982 rtx (*gen) (rtx, rtx, rtx);
6983
6984 switch (mode)
6985 {
6986 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6987 case HImode: gen = gen_aarch64_load_exclusivehi; break;
6988 case SImode: gen = gen_aarch64_load_exclusivesi; break;
6989 case DImode: gen = gen_aarch64_load_exclusivedi; break;
6990 default:
6991 gcc_unreachable ();
6992 }
6993
6994 emit_insn (gen (rval, mem, model_rtx));
6995 }
6996
6997 /* Emit store exclusive. */
6998
6999 static void
aarch64_emit_store_exclusive(enum machine_mode mode,rtx bval,rtx rval,rtx mem,rtx model_rtx)7000 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7001 rtx rval, rtx mem, rtx model_rtx)
7002 {
7003 rtx (*gen) (rtx, rtx, rtx, rtx);
7004
7005 switch (mode)
7006 {
7007 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7008 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7009 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7010 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7011 default:
7012 gcc_unreachable ();
7013 }
7014
7015 emit_insn (gen (bval, rval, mem, model_rtx));
7016 }
7017
7018 /* Mark the previous jump instruction as unlikely. */
7019
7020 static void
aarch64_emit_unlikely_jump(rtx insn)7021 aarch64_emit_unlikely_jump (rtx insn)
7022 {
7023 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
7024
7025 insn = emit_jump_insn (insn);
7026 add_reg_note (insn, REG_BR_PROB, very_unlikely);
7027 }
7028
7029 /* Expand a compare and swap pattern. */
7030
7031 void
aarch64_expand_compare_and_swap(rtx operands[])7032 aarch64_expand_compare_and_swap (rtx operands[])
7033 {
7034 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7035 enum machine_mode mode, cmp_mode;
7036 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7037
7038 bval = operands[0];
7039 rval = operands[1];
7040 mem = operands[2];
7041 oldval = operands[3];
7042 newval = operands[4];
7043 is_weak = operands[5];
7044 mod_s = operands[6];
7045 mod_f = operands[7];
7046 mode = GET_MODE (mem);
7047 cmp_mode = mode;
7048
7049 /* Normally the succ memory model must be stronger than fail, but in the
7050 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7051 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7052
7053 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7054 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7055 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7056
7057 switch (mode)
7058 {
7059 case QImode:
7060 case HImode:
7061 /* For short modes, we're going to perform the comparison in SImode,
7062 so do the zero-extension now. */
7063 cmp_mode = SImode;
7064 rval = gen_reg_rtx (SImode);
7065 oldval = convert_modes (SImode, mode, oldval, true);
7066 /* Fall through. */
7067
7068 case SImode:
7069 case DImode:
7070 /* Force the value into a register if needed. */
7071 if (!aarch64_plus_operand (oldval, mode))
7072 oldval = force_reg (cmp_mode, oldval);
7073 break;
7074
7075 default:
7076 gcc_unreachable ();
7077 }
7078
7079 switch (mode)
7080 {
7081 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7082 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7083 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7084 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7085 default:
7086 gcc_unreachable ();
7087 }
7088
7089 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7090
7091 if (mode == QImode || mode == HImode)
7092 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7093
7094 x = gen_rtx_REG (CCmode, CC_REGNUM);
7095 x = gen_rtx_EQ (SImode, x, const0_rtx);
7096 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7097 }
7098
7099 /* Split a compare and swap pattern. */
7100
7101 void
aarch64_split_compare_and_swap(rtx operands[])7102 aarch64_split_compare_and_swap (rtx operands[])
7103 {
7104 rtx rval, mem, oldval, newval, scratch;
7105 enum machine_mode mode;
7106 bool is_weak;
7107 rtx label1, label2, x, cond;
7108
7109 rval = operands[0];
7110 mem = operands[1];
7111 oldval = operands[2];
7112 newval = operands[3];
7113 is_weak = (operands[4] != const0_rtx);
7114 scratch = operands[7];
7115 mode = GET_MODE (mem);
7116
7117 label1 = NULL_RTX;
7118 if (!is_weak)
7119 {
7120 label1 = gen_label_rtx ();
7121 emit_label (label1);
7122 }
7123 label2 = gen_label_rtx ();
7124
7125 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7126
7127 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7128 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7129 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7130 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7131 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7132
7133 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7134
7135 if (!is_weak)
7136 {
7137 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7138 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7139 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7140 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7141 }
7142 else
7143 {
7144 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7145 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7146 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7147 }
7148
7149 emit_label (label2);
7150 }
7151
7152 /* Split an atomic operation. */
7153
7154 void
aarch64_split_atomic_op(enum rtx_code code,rtx old_out,rtx new_out,rtx mem,rtx value,rtx model_rtx,rtx cond)7155 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7156 rtx value, rtx model_rtx, rtx cond)
7157 {
7158 enum machine_mode mode = GET_MODE (mem);
7159 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7160 rtx label, x;
7161
7162 label = gen_label_rtx ();
7163 emit_label (label);
7164
7165 if (new_out)
7166 new_out = gen_lowpart (wmode, new_out);
7167 if (old_out)
7168 old_out = gen_lowpart (wmode, old_out);
7169 else
7170 old_out = new_out;
7171 value = simplify_gen_subreg (wmode, value, mode, 0);
7172
7173 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7174
7175 switch (code)
7176 {
7177 case SET:
7178 new_out = value;
7179 break;
7180
7181 case NOT:
7182 x = gen_rtx_AND (wmode, old_out, value);
7183 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7184 x = gen_rtx_NOT (wmode, new_out);
7185 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7186 break;
7187
7188 case MINUS:
7189 if (CONST_INT_P (value))
7190 {
7191 value = GEN_INT (-INTVAL (value));
7192 code = PLUS;
7193 }
7194 /* Fall through. */
7195
7196 default:
7197 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7198 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7199 break;
7200 }
7201
7202 aarch64_emit_store_exclusive (mode, cond, mem,
7203 gen_lowpart (mode, new_out), model_rtx);
7204
7205 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7206 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7207 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7208 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7209 }
7210
7211 static void
aarch64_print_extension(void)7212 aarch64_print_extension (void)
7213 {
7214 const struct aarch64_option_extension *opt = NULL;
7215
7216 for (opt = all_extensions; opt->name != NULL; opt++)
7217 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7218 asm_fprintf (asm_out_file, "+%s", opt->name);
7219
7220 asm_fprintf (asm_out_file, "\n");
7221 }
7222
7223 static void
aarch64_start_file(void)7224 aarch64_start_file (void)
7225 {
7226 if (selected_arch)
7227 {
7228 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7229 aarch64_print_extension ();
7230 }
7231 else if (selected_cpu)
7232 {
7233 asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7234 aarch64_print_extension ();
7235 }
7236 default_file_start();
7237 }
7238
7239 /* Target hook for c_mode_for_suffix. */
7240 static enum machine_mode
aarch64_c_mode_for_suffix(char suffix)7241 aarch64_c_mode_for_suffix (char suffix)
7242 {
7243 if (suffix == 'q')
7244 return TFmode;
7245
7246 return VOIDmode;
7247 }
7248
7249 /* We can only represent floating point constants which will fit in
7250 "quarter-precision" values. These values are characterised by
7251 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7252 by:
7253
7254 (-1)^s * (n/16) * 2^r
7255
7256 Where:
7257 's' is the sign bit.
7258 'n' is an integer in the range 16 <= n <= 31.
7259 'r' is an integer in the range -3 <= r <= 4. */
7260
7261 /* Return true iff X can be represented by a quarter-precision
7262 floating point immediate operand X. Note, we cannot represent 0.0. */
7263 bool
aarch64_float_const_representable_p(rtx x)7264 aarch64_float_const_representable_p (rtx x)
7265 {
7266 /* This represents our current view of how many bits
7267 make up the mantissa. */
7268 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7269 int exponent;
7270 unsigned HOST_WIDE_INT mantissa, mask;
7271 HOST_WIDE_INT m1, m2;
7272 REAL_VALUE_TYPE r, m;
7273
7274 if (!CONST_DOUBLE_P (x))
7275 return false;
7276
7277 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7278
7279 /* We cannot represent infinities, NaNs or +/-zero. We won't
7280 know if we have +zero until we analyse the mantissa, but we
7281 can reject the other invalid values. */
7282 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7283 || REAL_VALUE_MINUS_ZERO (r))
7284 return false;
7285
7286 /* Extract exponent. */
7287 r = real_value_abs (&r);
7288 exponent = REAL_EXP (&r);
7289
7290 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7291 highest (sign) bit, with a fixed binary point at bit point_pos.
7292 m1 holds the low part of the mantissa, m2 the high part.
7293 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7294 bits for the mantissa, this can fail (low bits will be lost). */
7295 real_ldexp (&m, &r, point_pos - exponent);
7296 REAL_VALUE_TO_INT (&m1, &m2, m);
7297
7298 /* If the low part of the mantissa has bits set we cannot represent
7299 the value. */
7300 if (m1 != 0)
7301 return false;
7302 /* We have rejected the lower HOST_WIDE_INT, so update our
7303 understanding of how many bits lie in the mantissa and
7304 look only at the high HOST_WIDE_INT. */
7305 mantissa = m2;
7306 point_pos -= HOST_BITS_PER_WIDE_INT;
7307
7308 /* We can only represent values with a mantissa of the form 1.xxxx. */
7309 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7310 if ((mantissa & mask) != 0)
7311 return false;
7312
7313 /* Having filtered unrepresentable values, we may now remove all
7314 but the highest 5 bits. */
7315 mantissa >>= point_pos - 5;
7316
7317 /* We cannot represent the value 0.0, so reject it. This is handled
7318 elsewhere. */
7319 if (mantissa == 0)
7320 return false;
7321
7322 /* Then, as bit 4 is always set, we can mask it off, leaving
7323 the mantissa in the range [0, 15]. */
7324 mantissa &= ~(1 << 4);
7325 gcc_assert (mantissa <= 15);
7326
7327 /* GCC internally does not use IEEE754-like encoding (where normalized
7328 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7329 Our mantissa values are shifted 4 places to the left relative to
7330 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7331 by 5 places to correct for GCC's representation. */
7332 exponent = 5 - exponent;
7333
7334 return (exponent >= 0 && exponent <= 7);
7335 }
7336
7337 char*
aarch64_output_simd_mov_immediate(rtx * const_vector,enum machine_mode mode,unsigned width)7338 aarch64_output_simd_mov_immediate (rtx *const_vector,
7339 enum machine_mode mode,
7340 unsigned width)
7341 {
7342 int is_valid;
7343 unsigned char widthc;
7344 int lane_width_bits;
7345 static char templ[40];
7346 int shift = 0, mvn = 0;
7347 const char *mnemonic;
7348 unsigned int lane_count = 0;
7349
7350 is_valid =
7351 aarch64_simd_immediate_valid_for_move (*const_vector, mode,
7352 const_vector, &lane_width_bits,
7353 &widthc, &mvn, &shift);
7354 gcc_assert (is_valid);
7355
7356 mode = GET_MODE_INNER (mode);
7357 if (mode == SFmode || mode == DFmode)
7358 {
7359 bool zero_p =
7360 aarch64_float_const_zero_rtx_p (*const_vector);
7361 gcc_assert (shift == 0);
7362 mnemonic = zero_p ? "movi" : "fmov";
7363 }
7364 else
7365 mnemonic = mvn ? "mvni" : "movi";
7366
7367 gcc_assert (lane_width_bits != 0);
7368 lane_count = width / lane_width_bits;
7369
7370 if (lane_count == 1)
7371 snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
7372 else if (shift)
7373 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
7374 mnemonic, lane_count, widthc, shift);
7375 else
7376 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
7377 mnemonic, lane_count, widthc);
7378 return templ;
7379 }
7380
7381 /* Split operands into moves from op[1] + op[2] into op[0]. */
7382
7383 void
aarch64_split_combinev16qi(rtx operands[3])7384 aarch64_split_combinev16qi (rtx operands[3])
7385 {
7386 unsigned int dest = REGNO (operands[0]);
7387 unsigned int src1 = REGNO (operands[1]);
7388 unsigned int src2 = REGNO (operands[2]);
7389 enum machine_mode halfmode = GET_MODE (operands[1]);
7390 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7391 rtx destlo, desthi;
7392
7393 gcc_assert (halfmode == V16QImode);
7394
7395 if (src1 == dest && src2 == dest + halfregs)
7396 {
7397 /* No-op move. Can't split to nothing; emit something. */
7398 emit_note (NOTE_INSN_DELETED);
7399 return;
7400 }
7401
7402 /* Preserve register attributes for variable tracking. */
7403 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7404 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7405 GET_MODE_SIZE (halfmode));
7406
7407 /* Special case of reversed high/low parts. */
7408 if (reg_overlap_mentioned_p (operands[2], destlo)
7409 && reg_overlap_mentioned_p (operands[1], desthi))
7410 {
7411 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7412 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7413 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7414 }
7415 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7416 {
7417 /* Try to avoid unnecessary moves if part of the result
7418 is in the right place already. */
7419 if (src1 != dest)
7420 emit_move_insn (destlo, operands[1]);
7421 if (src2 != dest + halfregs)
7422 emit_move_insn (desthi, operands[2]);
7423 }
7424 else
7425 {
7426 if (src2 != dest + halfregs)
7427 emit_move_insn (desthi, operands[2]);
7428 if (src1 != dest)
7429 emit_move_insn (destlo, operands[1]);
7430 }
7431 }
7432
7433 /* vec_perm support. */
7434
7435 #define MAX_VECT_LEN 16
7436
7437 struct expand_vec_perm_d
7438 {
7439 rtx target, op0, op1;
7440 unsigned char perm[MAX_VECT_LEN];
7441 enum machine_mode vmode;
7442 unsigned char nelt;
7443 bool one_vector_p;
7444 bool testing_p;
7445 };
7446
7447 /* Generate a variable permutation. */
7448
7449 static void
aarch64_expand_vec_perm_1(rtx target,rtx op0,rtx op1,rtx sel)7450 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7451 {
7452 enum machine_mode vmode = GET_MODE (target);
7453 bool one_vector_p = rtx_equal_p (op0, op1);
7454
7455 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7456 gcc_checking_assert (GET_MODE (op0) == vmode);
7457 gcc_checking_assert (GET_MODE (op1) == vmode);
7458 gcc_checking_assert (GET_MODE (sel) == vmode);
7459 gcc_checking_assert (TARGET_SIMD);
7460
7461 if (one_vector_p)
7462 {
7463 if (vmode == V8QImode)
7464 {
7465 /* Expand the argument to a V16QI mode by duplicating it. */
7466 rtx pair = gen_reg_rtx (V16QImode);
7467 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7468 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7469 }
7470 else
7471 {
7472 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7473 }
7474 }
7475 else
7476 {
7477 rtx pair;
7478
7479 if (vmode == V8QImode)
7480 {
7481 pair = gen_reg_rtx (V16QImode);
7482 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7483 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7484 }
7485 else
7486 {
7487 pair = gen_reg_rtx (OImode);
7488 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7489 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7490 }
7491 }
7492 }
7493
7494 void
aarch64_expand_vec_perm(rtx target,rtx op0,rtx op1,rtx sel)7495 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7496 {
7497 enum machine_mode vmode = GET_MODE (target);
7498 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7499 bool one_vector_p = rtx_equal_p (op0, op1);
7500 rtx rmask[MAX_VECT_LEN], mask;
7501
7502 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7503
7504 /* The TBL instruction does not use a modulo index, so we must take care
7505 of that ourselves. */
7506 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7507 for (i = 0; i < nelt; ++i)
7508 rmask[i] = mask;
7509 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7510 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7511
7512 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7513 }
7514
7515 /* Recognize patterns suitable for the TRN instructions. */
7516 static bool
aarch64_evpc_trn(struct expand_vec_perm_d * d)7517 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7518 {
7519 unsigned int i, odd, mask, nelt = d->nelt;
7520 rtx out, in0, in1, x;
7521 rtx (*gen) (rtx, rtx, rtx);
7522 enum machine_mode vmode = d->vmode;
7523
7524 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7525 return false;
7526
7527 /* Note that these are little-endian tests.
7528 We correct for big-endian later. */
7529 if (d->perm[0] == 0)
7530 odd = 0;
7531 else if (d->perm[0] == 1)
7532 odd = 1;
7533 else
7534 return false;
7535 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7536
7537 for (i = 0; i < nelt; i += 2)
7538 {
7539 if (d->perm[i] != i + odd)
7540 return false;
7541 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7542 return false;
7543 }
7544
7545 /* Success! */
7546 if (d->testing_p)
7547 return true;
7548
7549 in0 = d->op0;
7550 in1 = d->op1;
7551 if (BYTES_BIG_ENDIAN)
7552 {
7553 x = in0, in0 = in1, in1 = x;
7554 odd = !odd;
7555 }
7556 out = d->target;
7557
7558 if (odd)
7559 {
7560 switch (vmode)
7561 {
7562 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7563 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7564 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7565 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7566 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7567 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7568 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7569 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7570 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7571 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7572 default:
7573 return false;
7574 }
7575 }
7576 else
7577 {
7578 switch (vmode)
7579 {
7580 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7581 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7582 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7583 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7584 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7585 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7586 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7587 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7588 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7589 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7590 default:
7591 return false;
7592 }
7593 }
7594
7595 emit_insn (gen (out, in0, in1));
7596 return true;
7597 }
7598
7599 /* Recognize patterns suitable for the UZP instructions. */
7600 static bool
aarch64_evpc_uzp(struct expand_vec_perm_d * d)7601 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7602 {
7603 unsigned int i, odd, mask, nelt = d->nelt;
7604 rtx out, in0, in1, x;
7605 rtx (*gen) (rtx, rtx, rtx);
7606 enum machine_mode vmode = d->vmode;
7607
7608 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7609 return false;
7610
7611 /* Note that these are little-endian tests.
7612 We correct for big-endian later. */
7613 if (d->perm[0] == 0)
7614 odd = 0;
7615 else if (d->perm[0] == 1)
7616 odd = 1;
7617 else
7618 return false;
7619 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7620
7621 for (i = 0; i < nelt; i++)
7622 {
7623 unsigned elt = (i * 2 + odd) & mask;
7624 if (d->perm[i] != elt)
7625 return false;
7626 }
7627
7628 /* Success! */
7629 if (d->testing_p)
7630 return true;
7631
7632 in0 = d->op0;
7633 in1 = d->op1;
7634 if (BYTES_BIG_ENDIAN)
7635 {
7636 x = in0, in0 = in1, in1 = x;
7637 odd = !odd;
7638 }
7639 out = d->target;
7640
7641 if (odd)
7642 {
7643 switch (vmode)
7644 {
7645 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7646 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7647 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7648 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7649 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7650 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7651 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7652 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7653 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7654 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7655 default:
7656 return false;
7657 }
7658 }
7659 else
7660 {
7661 switch (vmode)
7662 {
7663 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7664 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7665 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7666 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7667 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7668 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7669 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7670 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7671 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7672 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7673 default:
7674 return false;
7675 }
7676 }
7677
7678 emit_insn (gen (out, in0, in1));
7679 return true;
7680 }
7681
7682 /* Recognize patterns suitable for the ZIP instructions. */
7683 static bool
aarch64_evpc_zip(struct expand_vec_perm_d * d)7684 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7685 {
7686 unsigned int i, high, mask, nelt = d->nelt;
7687 rtx out, in0, in1, x;
7688 rtx (*gen) (rtx, rtx, rtx);
7689 enum machine_mode vmode = d->vmode;
7690
7691 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7692 return false;
7693
7694 /* Note that these are little-endian tests.
7695 We correct for big-endian later. */
7696 high = nelt / 2;
7697 if (d->perm[0] == high)
7698 /* Do Nothing. */
7699 ;
7700 else if (d->perm[0] == 0)
7701 high = 0;
7702 else
7703 return false;
7704 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7705
7706 for (i = 0; i < nelt / 2; i++)
7707 {
7708 unsigned elt = (i + high) & mask;
7709 if (d->perm[i * 2] != elt)
7710 return false;
7711 elt = (elt + nelt) & mask;
7712 if (d->perm[i * 2 + 1] != elt)
7713 return false;
7714 }
7715
7716 /* Success! */
7717 if (d->testing_p)
7718 return true;
7719
7720 in0 = d->op0;
7721 in1 = d->op1;
7722 if (BYTES_BIG_ENDIAN)
7723 {
7724 x = in0, in0 = in1, in1 = x;
7725 high = !high;
7726 }
7727 out = d->target;
7728
7729 if (high)
7730 {
7731 switch (vmode)
7732 {
7733 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7734 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7735 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7736 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7737 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7738 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7739 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7740 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7741 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7742 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7743 default:
7744 return false;
7745 }
7746 }
7747 else
7748 {
7749 switch (vmode)
7750 {
7751 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7752 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7753 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7754 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7755 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7756 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7757 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7758 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7759 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7760 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7761 default:
7762 return false;
7763 }
7764 }
7765
7766 emit_insn (gen (out, in0, in1));
7767 return true;
7768 }
7769
7770 static bool
aarch64_evpc_tbl(struct expand_vec_perm_d * d)7771 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7772 {
7773 rtx rperm[MAX_VECT_LEN], sel;
7774 enum machine_mode vmode = d->vmode;
7775 unsigned int i, nelt = d->nelt;
7776
7777 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
7778 numbering of elements for big-endian, we must reverse the order. */
7779 if (BYTES_BIG_ENDIAN)
7780 return false;
7781
7782 if (d->testing_p)
7783 return true;
7784
7785 /* Generic code will try constant permutation twice. Once with the
7786 original mode and again with the elements lowered to QImode.
7787 So wait and don't do the selector expansion ourselves. */
7788 if (vmode != V8QImode && vmode != V16QImode)
7789 return false;
7790
7791 for (i = 0; i < nelt; ++i)
7792 rperm[i] = GEN_INT (d->perm[i]);
7793 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7794 sel = force_reg (vmode, sel);
7795
7796 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7797 return true;
7798 }
7799
7800 static bool
aarch64_expand_vec_perm_const_1(struct expand_vec_perm_d * d)7801 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7802 {
7803 /* The pattern matching functions above are written to look for a small
7804 number to begin the sequence (0, 1, N/2). If we begin with an index
7805 from the second operand, we can swap the operands. */
7806 if (d->perm[0] >= d->nelt)
7807 {
7808 unsigned i, nelt = d->nelt;
7809 rtx x;
7810
7811 for (i = 0; i < nelt; ++i)
7812 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7813
7814 x = d->op0;
7815 d->op0 = d->op1;
7816 d->op1 = x;
7817 }
7818
7819 if (TARGET_SIMD)
7820 {
7821 if (aarch64_evpc_zip (d))
7822 return true;
7823 else if (aarch64_evpc_uzp (d))
7824 return true;
7825 else if (aarch64_evpc_trn (d))
7826 return true;
7827 return aarch64_evpc_tbl (d);
7828 }
7829 return false;
7830 }
7831
7832 /* Expand a vec_perm_const pattern. */
7833
7834 bool
aarch64_expand_vec_perm_const(rtx target,rtx op0,rtx op1,rtx sel)7835 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7836 {
7837 struct expand_vec_perm_d d;
7838 int i, nelt, which;
7839
7840 d.target = target;
7841 d.op0 = op0;
7842 d.op1 = op1;
7843
7844 d.vmode = GET_MODE (target);
7845 gcc_assert (VECTOR_MODE_P (d.vmode));
7846 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7847 d.testing_p = false;
7848
7849 for (i = which = 0; i < nelt; ++i)
7850 {
7851 rtx e = XVECEXP (sel, 0, i);
7852 int ei = INTVAL (e) & (2 * nelt - 1);
7853 which |= (ei < nelt ? 1 : 2);
7854 d.perm[i] = ei;
7855 }
7856
7857 switch (which)
7858 {
7859 default:
7860 gcc_unreachable ();
7861
7862 case 3:
7863 d.one_vector_p = false;
7864 if (!rtx_equal_p (op0, op1))
7865 break;
7866
7867 /* The elements of PERM do not suggest that only the first operand
7868 is used, but both operands are identical. Allow easier matching
7869 of the permutation by folding the permutation into the single
7870 input vector. */
7871 /* Fall Through. */
7872 case 2:
7873 for (i = 0; i < nelt; ++i)
7874 d.perm[i] &= nelt - 1;
7875 d.op0 = op1;
7876 d.one_vector_p = true;
7877 break;
7878
7879 case 1:
7880 d.op1 = op0;
7881 d.one_vector_p = true;
7882 break;
7883 }
7884
7885 return aarch64_expand_vec_perm_const_1 (&d);
7886 }
7887
7888 static bool
aarch64_vectorize_vec_perm_const_ok(enum machine_mode vmode,const unsigned char * sel)7889 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7890 const unsigned char *sel)
7891 {
7892 struct expand_vec_perm_d d;
7893 unsigned int i, nelt, which;
7894 bool ret;
7895
7896 d.vmode = vmode;
7897 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7898 d.testing_p = true;
7899 memcpy (d.perm, sel, nelt);
7900
7901 /* Calculate whether all elements are in one vector. */
7902 for (i = which = 0; i < nelt; ++i)
7903 {
7904 unsigned char e = d.perm[i];
7905 gcc_assert (e < 2 * nelt);
7906 which |= (e < nelt ? 1 : 2);
7907 }
7908
7909 /* If all elements are from the second vector, reindex as if from the
7910 first vector. */
7911 if (which == 2)
7912 for (i = 0; i < nelt; ++i)
7913 d.perm[i] -= nelt;
7914
7915 /* Check whether the mask can be applied to a single vector. */
7916 d.one_vector_p = (which != 3);
7917
7918 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7919 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7920 if (!d.one_vector_p)
7921 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7922
7923 start_sequence ();
7924 ret = aarch64_expand_vec_perm_const_1 (&d);
7925 end_sequence ();
7926
7927 return ret;
7928 }
7929
7930 #undef TARGET_ADDRESS_COST
7931 #define TARGET_ADDRESS_COST aarch64_address_cost
7932
7933 /* This hook will determines whether unnamed bitfields affect the alignment
7934 of the containing structure. The hook returns true if the structure
7935 should inherit the alignment requirements of an unnamed bitfield's
7936 type. */
7937 #undef TARGET_ALIGN_ANON_BITFIELD
7938 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7939
7940 #undef TARGET_ASM_ALIGNED_DI_OP
7941 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7942
7943 #undef TARGET_ASM_ALIGNED_HI_OP
7944 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7945
7946 #undef TARGET_ASM_ALIGNED_SI_OP
7947 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7948
7949 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7950 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7951 hook_bool_const_tree_hwi_hwi_const_tree_true
7952
7953 #undef TARGET_ASM_FILE_START
7954 #define TARGET_ASM_FILE_START aarch64_start_file
7955
7956 #undef TARGET_ASM_OUTPUT_MI_THUNK
7957 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7958
7959 #undef TARGET_ASM_SELECT_RTX_SECTION
7960 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7961
7962 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7963 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7964
7965 #undef TARGET_BUILD_BUILTIN_VA_LIST
7966 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7967
7968 #undef TARGET_CALLEE_COPIES
7969 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7970
7971 #undef TARGET_CAN_ELIMINATE
7972 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7973
7974 #undef TARGET_CANNOT_FORCE_CONST_MEM
7975 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7976
7977 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7978 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7979
7980 /* Only the least significant bit is used for initialization guard
7981 variables. */
7982 #undef TARGET_CXX_GUARD_MASK_BIT
7983 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7984
7985 #undef TARGET_C_MODE_FOR_SUFFIX
7986 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7987
7988 #ifdef TARGET_BIG_ENDIAN_DEFAULT
7989 #undef TARGET_DEFAULT_TARGET_FLAGS
7990 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7991 #endif
7992
7993 #undef TARGET_CLASS_MAX_NREGS
7994 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7995
7996 #undef TARGET_BUILTIN_DECL
7997 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
7998
7999 #undef TARGET_EXPAND_BUILTIN
8000 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8001
8002 #undef TARGET_EXPAND_BUILTIN_VA_START
8003 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8004
8005 #undef TARGET_FUNCTION_ARG
8006 #define TARGET_FUNCTION_ARG aarch64_function_arg
8007
8008 #undef TARGET_FUNCTION_ARG_ADVANCE
8009 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8010
8011 #undef TARGET_FUNCTION_ARG_BOUNDARY
8012 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8013
8014 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8015 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8016
8017 #undef TARGET_FUNCTION_VALUE
8018 #define TARGET_FUNCTION_VALUE aarch64_function_value
8019
8020 #undef TARGET_FUNCTION_VALUE_REGNO_P
8021 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8022
8023 #undef TARGET_FRAME_POINTER_REQUIRED
8024 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8025
8026 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8027 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8028
8029 #undef TARGET_INIT_BUILTINS
8030 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8031
8032 #undef TARGET_LEGITIMATE_ADDRESS_P
8033 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8034
8035 #undef TARGET_LEGITIMATE_CONSTANT_P
8036 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8037
8038 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8039 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8040
8041 #undef TARGET_MANGLE_TYPE
8042 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8043
8044 #undef TARGET_MEMORY_MOVE_COST
8045 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8046
8047 #undef TARGET_MUST_PASS_IN_STACK
8048 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8049
8050 /* This target hook should return true if accesses to volatile bitfields
8051 should use the narrowest mode possible. It should return false if these
8052 accesses should use the bitfield container type. */
8053 #undef TARGET_NARROW_VOLATILE_BITFIELD
8054 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8055
8056 #undef TARGET_OPTION_OVERRIDE
8057 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8058
8059 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8060 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8061 aarch64_override_options_after_change
8062
8063 #undef TARGET_PASS_BY_REFERENCE
8064 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8065
8066 #undef TARGET_PREFERRED_RELOAD_CLASS
8067 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8068
8069 #undef TARGET_SECONDARY_RELOAD
8070 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8071
8072 #undef TARGET_SHIFT_TRUNCATION_MASK
8073 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8074
8075 #undef TARGET_SETUP_INCOMING_VARARGS
8076 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8077
8078 #undef TARGET_STRUCT_VALUE_RTX
8079 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8080
8081 #undef TARGET_REGISTER_MOVE_COST
8082 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8083
8084 #undef TARGET_RETURN_IN_MEMORY
8085 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8086
8087 #undef TARGET_RETURN_IN_MSB
8088 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8089
8090 #undef TARGET_RTX_COSTS
8091 #define TARGET_RTX_COSTS aarch64_rtx_costs
8092
8093 #undef TARGET_TRAMPOLINE_INIT
8094 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8095
8096 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8097 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8098
8099 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8100 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8101
8102 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8103 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8104
8105 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8106 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8107
8108 #undef TARGET_VECTORIZE_BUILTINS
8109 #define TARGET_VECTORIZE_BUILTINS
8110
8111 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8112 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8113 aarch64_builtin_vectorized_function
8114
8115 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8116 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8117 aarch64_autovectorize_vector_sizes
8118
8119 /* Section anchor support. */
8120
8121 #undef TARGET_MIN_ANCHOR_OFFSET
8122 #define TARGET_MIN_ANCHOR_OFFSET -256
8123
8124 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8125 byte offset; we can do much more for larger data types, but have no way
8126 to determine the size of the access. We assume accesses are aligned. */
8127 #undef TARGET_MAX_ANCHOR_OFFSET
8128 #define TARGET_MAX_ANCHOR_OFFSET 4095
8129
8130 #undef TARGET_VECTOR_ALIGNMENT
8131 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8132
8133 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8134 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8135 aarch64_simd_vector_alignment_reachable
8136
8137 /* vec_perm support. */
8138
8139 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8140 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8141 aarch64_vectorize_vec_perm_const_ok
8142
8143
8144 #undef TARGET_FIXED_CONDITION_CODE_REGS
8145 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8146
8147 struct gcc_target targetm = TARGET_INITIALIZER;
8148
8149 #include "gt-aarch64.h"
8150