1 /* Output routines for GCC for ARM.
2    Copyright (C) 1991-2021 Free Software Foundation, Inc.
3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4    and Martin Simmons (@harleqn.co.uk).
5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
6 
7    This file is part of GCC.
8 
9    GCC is free software; you can redistribute it and/or modify it
10    under the terms of the GNU General Public License as published
11    by the Free Software Foundation; either version 3, or (at your
12    option) any later version.
13 
14    GCC is distributed in the hope that it will be useful, but WITHOUT
15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
17    License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with GCC; see the file COPYING3.  If not see
21    <http://www.gnu.org/licenses/>.  */
22 
23 #define IN_TARGET_CODE 1
24 
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "opts.h"
63 #include "dumpfile.h"
64 #include "target-globals.h"
65 #include "builtins.h"
66 #include "tm-constrs.h"
67 #include "rtl-iter.h"
68 #include "optabs-libfuncs.h"
69 #include "gimplify.h"
70 #include "gimple.h"
71 #include "selftest.h"
72 
73 /* This file should be included last.  */
74 #include "target-def.h"
75 
76 /* Forward definitions of types.  */
77 typedef struct minipool_node    Mnode;
78 typedef struct minipool_fixup   Mfix;
79 
80 /* The last .arch and .fpu assembly strings that we printed.  */
81 static std::string arm_last_printed_arch_string;
82 static std::string arm_last_printed_fpu_string;
83 
84 void (*arm_lang_output_object_attributes_hook)(void);
85 
86 struct four_ints
87 {
88   int i[4];
89 };
90 
91 /* Forward function declarations.  */
92 static bool arm_const_not_ok_for_debug_p (rtx);
93 static int arm_needs_doubleword_align (machine_mode, const_tree);
94 static int arm_compute_static_chain_stack_bytes (void);
95 static arm_stack_offsets *arm_get_frame_offsets (void);
96 static void arm_compute_frame_layout (void);
97 static void arm_add_gc_roots (void);
98 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
99 			     unsigned HOST_WIDE_INT, rtx, rtx, int, int);
100 static unsigned bit_count (unsigned long);
101 static unsigned bitmap_popcount (const sbitmap);
102 static int arm_address_register_rtx_p (rtx, int);
103 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
104 static bool is_called_in_ARM_mode (tree);
105 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
106 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
107 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
108 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
109 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
110 inline static int thumb1_index_register_rtx_p (rtx, int);
111 static int thumb_far_jump_used_p (void);
112 static bool thumb_force_lr_save (void);
113 static unsigned arm_size_return_regs (void);
114 static bool arm_assemble_integer (rtx, unsigned int, int);
115 static void arm_print_operand (FILE *, rtx, int);
116 static void arm_print_operand_address (FILE *, machine_mode, rtx);
117 static bool arm_print_operand_punct_valid_p (unsigned char code);
118 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
119 static arm_cc get_arm_condition_code (rtx);
120 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
121 static const char *output_multi_immediate (rtx *, const char *, const char *,
122 					   int, HOST_WIDE_INT);
123 static const char *shift_op (rtx, HOST_WIDE_INT *);
124 static struct machine_function *arm_init_machine_status (void);
125 static void thumb_exit (FILE *, int);
126 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
127 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
128 static Mnode *add_minipool_forward_ref (Mfix *);
129 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
130 static Mnode *add_minipool_backward_ref (Mfix *);
131 static void assign_minipool_offsets (Mfix *);
132 static void arm_print_value (FILE *, rtx);
133 static void dump_minipool (rtx_insn *);
134 static int arm_barrier_cost (rtx_insn *);
135 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
136 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
137 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
138 			       machine_mode, rtx);
139 static void arm_reorg (void);
140 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
141 static unsigned long arm_compute_save_reg0_reg12_mask (void);
142 static unsigned long arm_compute_save_core_reg_mask (void);
143 static unsigned long arm_isr_value (tree);
144 static unsigned long arm_compute_func_type (void);
145 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
146 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
148 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
149 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
150 #endif
151 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
152 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
153 static void arm_output_function_epilogue (FILE *);
154 static void arm_output_function_prologue (FILE *);
155 static int arm_comp_type_attributes (const_tree, const_tree);
156 static void arm_set_default_type_attributes (tree);
157 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
158 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
159 static int optimal_immediate_sequence (enum rtx_code code,
160 				       unsigned HOST_WIDE_INT val,
161 				       struct four_ints *return_sequence);
162 static int optimal_immediate_sequence_1 (enum rtx_code code,
163 					 unsigned HOST_WIDE_INT val,
164 					 struct four_ints *return_sequence,
165 					 int i);
166 static int arm_get_strip_length (int);
167 static bool arm_function_ok_for_sibcall (tree, tree);
168 static machine_mode arm_promote_function_mode (const_tree,
169 						    machine_mode, int *,
170 						    const_tree, int);
171 static bool arm_return_in_memory (const_tree, const_tree);
172 static rtx arm_function_value (const_tree, const_tree, bool);
173 static rtx arm_libcall_value_1 (machine_mode);
174 static rtx arm_libcall_value (machine_mode, const_rtx);
175 static bool arm_function_value_regno_p (const unsigned int);
176 static void arm_internal_label (FILE *, const char *, unsigned long);
177 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
178 				 tree);
179 static bool arm_have_conditional_execution (void);
180 static bool arm_cannot_force_const_mem (machine_mode, rtx);
181 static bool arm_legitimate_constant_p (machine_mode, rtx);
182 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
183 static int arm_insn_cost (rtx_insn *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
190 static rtx emit_multi_reg_push (unsigned long, unsigned long);
191 static void arm_emit_multi_reg_pop (unsigned long);
192 static int vfp_emit_fstmd (int, int);
193 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
194 static int arm_arg_partial_bytes (cumulative_args_t,
195 				  const function_arg_info &);
196 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
197 static void arm_function_arg_advance (cumulative_args_t,
198 				      const function_arg_info &);
199 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
200 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
201 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
202 				      const_tree);
203 static rtx aapcs_libcall_value (machine_mode);
204 static int aapcs_select_return_coproc (const_tree, const_tree);
205 
206 #ifdef OBJECT_FORMAT_ELF
207 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
208 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
209 #endif
210 #ifndef ARM_PE
211 static void arm_encode_section_info (tree, rtx, int);
212 #endif
213 
214 static void arm_file_end (void);
215 static void arm_file_start (void);
216 static void arm_insert_attributes (tree, tree *);
217 
218 static void arm_setup_incoming_varargs (cumulative_args_t,
219 					const function_arg_info &, int *, int);
220 static bool arm_pass_by_reference (cumulative_args_t,
221 				   const function_arg_info &);
222 static bool arm_promote_prototypes (const_tree);
223 static bool arm_default_short_enums (void);
224 static bool arm_align_anon_bitfield (void);
225 static bool arm_return_in_msb (const_tree);
226 static bool arm_must_pass_in_stack (const function_arg_info &);
227 static bool arm_return_in_memory (const_tree, const_tree);
228 #if ARM_UNWIND_INFO
229 static void arm_unwind_emit (FILE *, rtx_insn *);
230 static bool arm_output_ttype (rtx);
231 static void arm_asm_emit_except_personality (rtx);
232 #endif
233 static void arm_asm_init_sections (void);
234 static rtx arm_dwarf_register_span (rtx);
235 
236 static tree arm_cxx_guard_type (void);
237 static bool arm_cxx_guard_mask_bit (void);
238 static tree arm_get_cookie_size (tree);
239 static bool arm_cookie_has_size (void);
240 static bool arm_cxx_cdtor_returns_this (void);
241 static bool arm_cxx_key_method_may_be_inline (void);
242 static void arm_cxx_determine_class_data_visibility (tree);
243 static bool arm_cxx_class_data_always_comdat (void);
244 static bool arm_cxx_use_aeabi_atexit (void);
245 static void arm_init_libfuncs (void);
246 static tree arm_build_builtin_va_list (void);
247 static void arm_expand_builtin_va_start (tree, rtx);
248 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
249 static void arm_option_override (void);
250 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
251 				struct cl_target_option *);
252 static void arm_override_options_after_change (void);
253 static void arm_option_print (FILE *, int, struct cl_target_option *);
254 static void arm_set_current_function (tree);
255 static bool arm_can_inline_p (tree, tree);
256 static void arm_relayout_function (tree);
257 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
258 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
259 static bool arm_sched_can_speculate_insn (rtx_insn *);
260 static bool arm_macro_fusion_p (void);
261 static bool arm_cannot_copy_insn_p (rtx_insn *);
262 static int arm_issue_rate (void);
263 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
264 static int arm_first_cycle_multipass_dfa_lookahead (void);
265 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
266 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
267 static bool arm_output_addr_const_extra (FILE *, rtx);
268 static bool arm_allocate_stack_slots_for_args (void);
269 static bool arm_warn_func_return (tree);
270 static tree arm_promoted_type (const_tree t);
271 static bool arm_scalar_mode_supported_p (scalar_mode);
272 static bool arm_frame_pointer_required (void);
273 static bool arm_can_eliminate (const int, const int);
274 static void arm_asm_trampoline_template (FILE *);
275 static void arm_trampoline_init (rtx, tree, rtx);
276 static rtx arm_trampoline_adjust_address (rtx);
277 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
278 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
279 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
280 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
281 static bool arm_array_mode_supported_p (machine_mode,
282 					unsigned HOST_WIDE_INT);
283 static machine_mode arm_preferred_simd_mode (scalar_mode);
284 static bool arm_class_likely_spilled_p (reg_class_t);
285 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
286 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
287 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
288 						     const_tree type,
289 						     int misalignment,
290 						     bool is_packed);
291 static void arm_conditional_register_usage (void);
292 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
293 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
294 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
295 static int arm_default_branch_cost (bool, bool);
296 static int arm_cortex_a5_branch_cost (bool, bool);
297 static int arm_cortex_m_branch_cost (bool, bool);
298 static int arm_cortex_m7_branch_cost (bool, bool);
299 
300 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
301 					  const vec_perm_indices &);
302 
303 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
304 
305 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
306 					   tree vectype,
307 					   int misalign ATTRIBUTE_UNUSED);
308 static unsigned arm_add_stmt_cost (vec_info *vinfo, void *data, int count,
309 				   enum vect_cost_for_stmt kind,
310 				   struct _stmt_vec_info *stmt_info,
311 				   tree vectype, int misalign,
312 				   enum vect_cost_model_location where);
313 
314 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
315 					 bool op0_preserve_value);
316 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
317 
318 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
319 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
320 				     const_tree);
321 static section *arm_function_section (tree, enum node_frequency, bool, bool);
322 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
323 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
324 						int reloc);
325 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
326 static opt_scalar_float_mode arm_floatn_mode (int, bool);
327 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
328 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
329 static bool arm_modes_tieable_p (machine_mode, machine_mode);
330 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
331 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
332 				       vec<machine_mode> &,
333 				       vec<const char *> &, vec<rtx> &,
334 				       HARD_REG_SET &);
335 
336 /* Table of machine attributes.  */
337 static const struct attribute_spec arm_attribute_table[] =
338 {
339   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
340        affects_type_identity, handler, exclude } */
341   /* Function calls made to this symbol must be done indirectly, because
342      it may lie outside of the 26 bit addressing range of a normal function
343      call.  */
344   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
345   /* Whereas these functions are always known to reside within the 26 bit
346      addressing range.  */
347   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
348   /* Specify the procedure call conventions for a function.  */
349   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
350     NULL },
351   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
352   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
353     NULL },
354   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
355     NULL },
356   { "naked",        0, 0, true,  false, false, false,
357     arm_handle_fndecl_attribute, NULL },
358 #ifdef ARM_PE
359   /* ARM/PE has three new attributes:
360      interfacearm - ?
361      dllexport - for exporting a function/variable that will live in a dll
362      dllimport - for importing a function/variable from a dll
363 
364      Microsoft allows multiple declspecs in one __declspec, separating
365      them with spaces.  We do NOT support this.  Instead, use __declspec
366      multiple times.
367   */
368   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
369   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
370   { "interfacearm", 0, 0, true,  false, false, false,
371     arm_handle_fndecl_attribute, NULL },
372 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
373   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
374     NULL },
375   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
376     NULL },
377   { "notshared",    0, 0, false, true, false, false,
378     arm_handle_notshared_attribute, NULL },
379 #endif
380   /* ARMv8-M Security Extensions support.  */
381   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
382     arm_handle_cmse_nonsecure_entry, NULL },
383   { "cmse_nonsecure_call", 0, 0, true, false, false, true,
384     arm_handle_cmse_nonsecure_call, NULL },
385   { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
386   { NULL, 0, 0, false, false, false, false, NULL, NULL }
387 };
388 
389 /* Initialize the GCC target structure.  */
390 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
391 #undef  TARGET_MERGE_DECL_ATTRIBUTES
392 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
393 #endif
394 
395 #undef TARGET_CHECK_BUILTIN_CALL
396 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
397 
398 #undef TARGET_LEGITIMIZE_ADDRESS
399 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
400 
401 #undef  TARGET_ATTRIBUTE_TABLE
402 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
403 
404 #undef  TARGET_INSERT_ATTRIBUTES
405 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
406 
407 #undef TARGET_ASM_FILE_START
408 #define TARGET_ASM_FILE_START arm_file_start
409 #undef TARGET_ASM_FILE_END
410 #define TARGET_ASM_FILE_END arm_file_end
411 
412 #undef  TARGET_ASM_ALIGNED_SI_OP
413 #define TARGET_ASM_ALIGNED_SI_OP NULL
414 #undef  TARGET_ASM_INTEGER
415 #define TARGET_ASM_INTEGER arm_assemble_integer
416 
417 #undef TARGET_PRINT_OPERAND
418 #define TARGET_PRINT_OPERAND arm_print_operand
419 #undef TARGET_PRINT_OPERAND_ADDRESS
420 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
421 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
422 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
423 
424 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
425 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
426 
427 #undef  TARGET_ASM_FUNCTION_PROLOGUE
428 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
429 
430 #undef  TARGET_ASM_FUNCTION_EPILOGUE
431 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
432 
433 #undef TARGET_CAN_INLINE_P
434 #define TARGET_CAN_INLINE_P arm_can_inline_p
435 
436 #undef TARGET_RELAYOUT_FUNCTION
437 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
438 
439 #undef  TARGET_OPTION_OVERRIDE
440 #define TARGET_OPTION_OVERRIDE arm_option_override
441 
442 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
443 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
444 
445 #undef TARGET_OPTION_RESTORE
446 #define TARGET_OPTION_RESTORE arm_option_restore
447 
448 #undef TARGET_OPTION_PRINT
449 #define TARGET_OPTION_PRINT arm_option_print
450 
451 #undef  TARGET_COMP_TYPE_ATTRIBUTES
452 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
453 
454 #undef TARGET_SCHED_CAN_SPECULATE_INSN
455 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
456 
457 #undef TARGET_SCHED_MACRO_FUSION_P
458 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
459 
460 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
461 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
462 
463 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
464 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
465 
466 #undef  TARGET_SCHED_ADJUST_COST
467 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
468 
469 #undef TARGET_SET_CURRENT_FUNCTION
470 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
471 
472 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
473 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
474 
475 #undef TARGET_SCHED_REORDER
476 #define TARGET_SCHED_REORDER arm_sched_reorder
477 
478 #undef TARGET_REGISTER_MOVE_COST
479 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
480 
481 #undef TARGET_MEMORY_MOVE_COST
482 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
483 
484 #undef TARGET_ENCODE_SECTION_INFO
485 #ifdef ARM_PE
486 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
487 #else
488 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
489 #endif
490 
491 #undef  TARGET_STRIP_NAME_ENCODING
492 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
493 
494 #undef  TARGET_ASM_INTERNAL_LABEL
495 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
496 
497 #undef TARGET_FLOATN_MODE
498 #define TARGET_FLOATN_MODE arm_floatn_mode
499 
500 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
501 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
502 
503 #undef  TARGET_FUNCTION_VALUE
504 #define TARGET_FUNCTION_VALUE arm_function_value
505 
506 #undef  TARGET_LIBCALL_VALUE
507 #define TARGET_LIBCALL_VALUE arm_libcall_value
508 
509 #undef TARGET_FUNCTION_VALUE_REGNO_P
510 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
511 
512 #undef  TARGET_ASM_OUTPUT_MI_THUNK
513 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
514 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
515 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
516 
517 #undef  TARGET_RTX_COSTS
518 #define TARGET_RTX_COSTS arm_rtx_costs
519 #undef  TARGET_ADDRESS_COST
520 #define TARGET_ADDRESS_COST arm_address_cost
521 #undef TARGET_INSN_COST
522 #define TARGET_INSN_COST arm_insn_cost
523 
524 #undef TARGET_SHIFT_TRUNCATION_MASK
525 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
526 #undef TARGET_VECTOR_MODE_SUPPORTED_P
527 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
528 #undef TARGET_ARRAY_MODE_SUPPORTED_P
529 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
530 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
531 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
532 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
533 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
534   arm_autovectorize_vector_modes
535 
536 #undef  TARGET_MACHINE_DEPENDENT_REORG
537 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
538 
539 #undef  TARGET_INIT_BUILTINS
540 #define TARGET_INIT_BUILTINS  arm_init_builtins
541 #undef  TARGET_EXPAND_BUILTIN
542 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
543 #undef  TARGET_BUILTIN_DECL
544 #define TARGET_BUILTIN_DECL arm_builtin_decl
545 
546 #undef TARGET_INIT_LIBFUNCS
547 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
548 
549 #undef TARGET_PROMOTE_FUNCTION_MODE
550 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
551 #undef TARGET_PROMOTE_PROTOTYPES
552 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
553 #undef TARGET_PASS_BY_REFERENCE
554 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
555 #undef TARGET_ARG_PARTIAL_BYTES
556 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
557 #undef TARGET_FUNCTION_ARG
558 #define TARGET_FUNCTION_ARG arm_function_arg
559 #undef TARGET_FUNCTION_ARG_ADVANCE
560 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
561 #undef TARGET_FUNCTION_ARG_PADDING
562 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
563 #undef TARGET_FUNCTION_ARG_BOUNDARY
564 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
565 
566 #undef  TARGET_SETUP_INCOMING_VARARGS
567 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
568 
569 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
570 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
571 
572 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
573 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
574 #undef TARGET_TRAMPOLINE_INIT
575 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
576 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
577 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
578 
579 #undef TARGET_WARN_FUNC_RETURN
580 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
581 
582 #undef TARGET_DEFAULT_SHORT_ENUMS
583 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
584 
585 #undef TARGET_ALIGN_ANON_BITFIELD
586 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
587 
588 #undef TARGET_NARROW_VOLATILE_BITFIELD
589 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
590 
591 #undef TARGET_CXX_GUARD_TYPE
592 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
593 
594 #undef TARGET_CXX_GUARD_MASK_BIT
595 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
596 
597 #undef TARGET_CXX_GET_COOKIE_SIZE
598 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
599 
600 #undef TARGET_CXX_COOKIE_HAS_SIZE
601 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
602 
603 #undef TARGET_CXX_CDTOR_RETURNS_THIS
604 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
605 
606 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
607 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
608 
609 #undef TARGET_CXX_USE_AEABI_ATEXIT
610 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
611 
612 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
613 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
614   arm_cxx_determine_class_data_visibility
615 
616 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
617 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
618 
619 #undef TARGET_RETURN_IN_MSB
620 #define TARGET_RETURN_IN_MSB arm_return_in_msb
621 
622 #undef TARGET_RETURN_IN_MEMORY
623 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
624 
625 #undef TARGET_MUST_PASS_IN_STACK
626 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
627 
628 #if ARM_UNWIND_INFO
629 #undef TARGET_ASM_UNWIND_EMIT
630 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
631 
632 /* EABI unwinding tables use a different format for the typeinfo tables.  */
633 #undef TARGET_ASM_TTYPE
634 #define TARGET_ASM_TTYPE arm_output_ttype
635 
636 #undef TARGET_ARM_EABI_UNWINDER
637 #define TARGET_ARM_EABI_UNWINDER true
638 
639 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
640 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
641 
642 #endif /* ARM_UNWIND_INFO */
643 
644 #undef TARGET_ASM_INIT_SECTIONS
645 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
646 
647 #undef TARGET_DWARF_REGISTER_SPAN
648 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
649 
650 #undef  TARGET_CANNOT_COPY_INSN_P
651 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
652 
653 #ifdef HAVE_AS_TLS
654 #undef TARGET_HAVE_TLS
655 #define TARGET_HAVE_TLS true
656 #endif
657 
658 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
659 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
660 
661 #undef TARGET_LEGITIMATE_CONSTANT_P
662 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
663 
664 #undef TARGET_CANNOT_FORCE_CONST_MEM
665 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
666 
667 #undef TARGET_MAX_ANCHOR_OFFSET
668 #define TARGET_MAX_ANCHOR_OFFSET 4095
669 
670 /* The minimum is set such that the total size of the block
671    for a particular anchor is -4088 + 1 + 4095 bytes, which is
672    divisible by eight, ensuring natural spacing of anchors.  */
673 #undef TARGET_MIN_ANCHOR_OFFSET
674 #define TARGET_MIN_ANCHOR_OFFSET -4088
675 
676 #undef TARGET_SCHED_ISSUE_RATE
677 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
678 
679 #undef TARGET_SCHED_VARIABLE_ISSUE
680 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
681 
682 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
683 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
684   arm_first_cycle_multipass_dfa_lookahead
685 
686 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
687 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
688   arm_first_cycle_multipass_dfa_lookahead_guard
689 
690 #undef TARGET_MANGLE_TYPE
691 #define TARGET_MANGLE_TYPE arm_mangle_type
692 
693 #undef TARGET_INVALID_CONVERSION
694 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
695 
696 #undef TARGET_INVALID_UNARY_OP
697 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
698 
699 #undef TARGET_INVALID_BINARY_OP
700 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
701 
702 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
703 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
704 
705 #undef TARGET_BUILD_BUILTIN_VA_LIST
706 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
707 #undef TARGET_EXPAND_BUILTIN_VA_START
708 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
709 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
710 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
711 
712 #ifdef HAVE_AS_TLS
713 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
714 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
715 #endif
716 
717 #undef TARGET_LEGITIMATE_ADDRESS_P
718 #define TARGET_LEGITIMATE_ADDRESS_P	arm_legitimate_address_p
719 
720 #undef TARGET_PREFERRED_RELOAD_CLASS
721 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
722 
723 #undef TARGET_PROMOTED_TYPE
724 #define TARGET_PROMOTED_TYPE arm_promoted_type
725 
726 #undef TARGET_SCALAR_MODE_SUPPORTED_P
727 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
728 
729 #undef TARGET_COMPUTE_FRAME_LAYOUT
730 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
731 
732 #undef TARGET_FRAME_POINTER_REQUIRED
733 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
734 
735 #undef TARGET_CAN_ELIMINATE
736 #define TARGET_CAN_ELIMINATE arm_can_eliminate
737 
738 #undef TARGET_CONDITIONAL_REGISTER_USAGE
739 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
740 
741 #undef TARGET_CLASS_LIKELY_SPILLED_P
742 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
743 
744 #undef TARGET_VECTORIZE_BUILTINS
745 #define TARGET_VECTORIZE_BUILTINS
746 
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
749   arm_builtin_vectorized_function
750 
751 #undef TARGET_VECTOR_ALIGNMENT
752 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
753 
754 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
755 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
756   arm_vector_alignment_reachable
757 
758 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
759 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
760   arm_builtin_support_vector_misalignment
761 
762 #undef TARGET_PREFERRED_RENAME_CLASS
763 #define TARGET_PREFERRED_RENAME_CLASS \
764   arm_preferred_rename_class
765 
766 #undef TARGET_VECTORIZE_VEC_PERM_CONST
767 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
768 
769 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
770 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
771   arm_builtin_vectorization_cost
772 #undef TARGET_VECTORIZE_ADD_STMT_COST
773 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
774 
775 #undef TARGET_CANONICALIZE_COMPARISON
776 #define TARGET_CANONICALIZE_COMPARISON \
777   arm_canonicalize_comparison
778 
779 #undef TARGET_ASAN_SHADOW_OFFSET
780 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
781 
782 #undef MAX_INSN_PER_IT_BLOCK
783 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
784 
785 #undef TARGET_CAN_USE_DOLOOP_P
786 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
787 
788 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
789 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
790 
791 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
792 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
793 
794 #undef TARGET_SCHED_FUSION_PRIORITY
795 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
796 
797 #undef  TARGET_ASM_FUNCTION_SECTION
798 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
799 
800 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
801 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
802 
803 #undef TARGET_SECTION_TYPE_FLAGS
804 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
805 
806 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
807 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
808 
809 #undef TARGET_C_EXCESS_PRECISION
810 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
811 
812 /* Although the architecture reserves bits 0 and 1, only the former is
813    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
814 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
815 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
816 
817 #undef TARGET_FIXED_CONDITION_CODE_REGS
818 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
819 
820 #undef TARGET_HARD_REGNO_NREGS
821 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
822 #undef TARGET_HARD_REGNO_MODE_OK
823 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
824 
825 #undef TARGET_MODES_TIEABLE_P
826 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
827 
828 #undef TARGET_CAN_CHANGE_MODE_CLASS
829 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
830 
831 #undef TARGET_CONSTANT_ALIGNMENT
832 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
833 
834 #undef TARGET_INVALID_WITHIN_DOLOOP
835 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
836 
837 #undef TARGET_MD_ASM_ADJUST
838 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
839 
840 /* Obstack for minipool constant handling.  */
841 static struct obstack minipool_obstack;
842 static char *         minipool_startobj;
843 
844 /* The maximum number of insns skipped which
845    will be conditionalised if possible.  */
846 static int max_insns_skipped = 5;
847 
848 extern FILE * asm_out_file;
849 
850 /* True if we are currently building a constant table.  */
851 int making_const_table;
852 
853 /* The processor for which instructions should be scheduled.  */
854 enum processor_type arm_tune = TARGET_CPU_arm_none;
855 
856 /* The current tuning set.  */
857 const struct tune_params *current_tune;
858 
859 /* Which floating point hardware to schedule for.  */
860 int arm_fpu_attr;
861 
862 /* Used for Thumb call_via trampolines.  */
863 rtx thumb_call_via_label[14];
864 static int thumb_call_reg_needed;
865 
866 /* The bits in this mask specify which instruction scheduling options should
867    be used.  */
868 unsigned int tune_flags = 0;
869 
870 /* The highest ARM architecture version supported by the
871    target.  */
872 enum base_architecture arm_base_arch = BASE_ARCH_0;
873 
874 /* Active target architecture and tuning.  */
875 
876 struct arm_build_target arm_active_target;
877 
878 /* The following are used in the arm.md file as equivalents to bits
879    in the above two flag variables.  */
880 
881 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
882 int arm_arch4 = 0;
883 
884 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
885 int arm_arch4t = 0;
886 
887 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
888 int arm_arch5t = 0;
889 
890 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
891 int arm_arch5te = 0;
892 
893 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
894 int arm_arch6 = 0;
895 
896 /* Nonzero if this chip supports the ARM 6K extensions.  */
897 int arm_arch6k = 0;
898 
899 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
900 int arm_arch6kz = 0;
901 
902 /* Nonzero if instructions present in ARMv6-M can be used.  */
903 int arm_arch6m = 0;
904 
905 /* Nonzero if this chip supports the ARM 7 extensions.  */
906 int arm_arch7 = 0;
907 
908 /* Nonzero if this chip supports the Large Physical Address Extension.  */
909 int arm_arch_lpae = 0;
910 
911 /* Nonzero if instructions not present in the 'M' profile can be used.  */
912 int arm_arch_notm = 0;
913 
914 /* Nonzero if instructions present in ARMv7E-M can be used.  */
915 int arm_arch7em = 0;
916 
917 /* Nonzero if instructions present in ARMv8 can be used.  */
918 int arm_arch8 = 0;
919 
920 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
921 int arm_arch8_1 = 0;
922 
923 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
924 int arm_arch8_2 = 0;
925 
926 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
927 int arm_arch8_3 = 0;
928 
929 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
930 int arm_arch8_4 = 0;
931 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
932    extensions.  */
933 int arm_arch8_1m_main = 0;
934 
935 /* Nonzero if this chip supports the FP16 instructions extension of ARM
936    Architecture 8.2.  */
937 int arm_fp16_inst = 0;
938 
939 /* Nonzero if this chip can benefit from load scheduling.  */
940 int arm_ld_sched = 0;
941 
942 /* Nonzero if this chip is a StrongARM.  */
943 int arm_tune_strongarm = 0;
944 
945 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
946 int arm_arch_iwmmxt = 0;
947 
948 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
949 int arm_arch_iwmmxt2 = 0;
950 
951 /* Nonzero if this chip is an XScale.  */
952 int arm_arch_xscale = 0;
953 
954 /* Nonzero if tuning for XScale  */
955 int arm_tune_xscale = 0;
956 
957 /* Nonzero if we want to tune for stores that access the write-buffer.
958    This typically means an ARM6 or ARM7 with MMU or MPU.  */
959 int arm_tune_wbuf = 0;
960 
961 /* Nonzero if tuning for Cortex-A9.  */
962 int arm_tune_cortex_a9 = 0;
963 
964 /* Nonzero if we should define __THUMB_INTERWORK__ in the
965    preprocessor.
966    XXX This is a bit of a hack, it's intended to help work around
967    problems in GLD which doesn't understand that armv5t code is
968    interworking clean.  */
969 int arm_cpp_interwork = 0;
970 
971 /* Nonzero if chip supports Thumb 1.  */
972 int arm_arch_thumb1;
973 
974 /* Nonzero if chip supports Thumb 2.  */
975 int arm_arch_thumb2;
976 
977 /* Nonzero if chip supports integer division instruction.  */
978 int arm_arch_arm_hwdiv;
979 int arm_arch_thumb_hwdiv;
980 
981 /* Nonzero if chip disallows volatile memory access in IT block.  */
982 int arm_arch_no_volatile_ce;
983 
984 /* Nonzero if we shouldn't use literal pools.  */
985 bool arm_disable_literal_pool = false;
986 
987 /* The register number to be used for the PIC offset register.  */
988 unsigned arm_pic_register = INVALID_REGNUM;
989 
990 enum arm_pcs arm_pcs_default;
991 
992 /* For an explanation of these variables, see final_prescan_insn below.  */
993 int arm_ccfsm_state;
994 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
995 enum arm_cond_code arm_current_cc;
996 
997 rtx arm_target_insn;
998 int arm_target_label;
999 /* The number of conditionally executed insns, including the current insn.  */
1000 int arm_condexec_count = 0;
1001 /* A bitmask specifying the patterns for the IT block.
1002    Zero means do not output an IT block before this insn. */
1003 int arm_condexec_mask = 0;
1004 /* The number of bits used in arm_condexec_mask.  */
1005 int arm_condexec_masklen = 0;
1006 
1007 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
1008 int arm_arch_crc = 0;
1009 
1010 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
1011 int arm_arch_dotprod = 0;
1012 
1013 /* Nonzero if chip supports the ARMv8-M security extensions.  */
1014 int arm_arch_cmse = 0;
1015 
1016 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
1017 int arm_m_profile_small_mul = 0;
1018 
1019 /* Nonzero if chip supports the AdvSIMD I8MM instructions.  */
1020 int arm_arch_i8mm = 0;
1021 
1022 /* Nonzero if chip supports the BFloat16 instructions.  */
1023 int arm_arch_bf16 = 0;
1024 
1025 /* Nonzero if chip supports the Custom Datapath Extension.  */
1026 int arm_arch_cde = 0;
1027 int arm_arch_cde_coproc = 0;
1028 const int arm_arch_cde_coproc_bits[] = {
1029   0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1030 };
1031 
1032 /* The condition codes of the ARM, and the inverse function.  */
1033 static const char * const arm_condition_codes[] =
1034 {
1035   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1036   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1037 };
1038 
1039 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1040 int arm_regs_in_sequence[] =
1041 {
1042   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1043 };
1044 
1045 #define DEF_FP_SYSREG(reg) #reg,
1046 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1047   FP_SYSREGS
1048 };
1049 #undef DEF_FP_SYSREG
1050 
1051 #define ARM_LSL_NAME "lsl"
1052 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1053 
1054 #define THUMB2_WORK_REGS					\
1055   (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM)		\
1056 	    | (1 << SP_REGNUM)					\
1057 	    | (1 << PC_REGNUM)					\
1058 	    | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM	\
1059 	       ? (1 << PIC_OFFSET_TABLE_REGNUM)			\
1060 	       : 0)))
1061 
1062 /* Initialization code.  */
1063 
1064 struct cpu_tune
1065 {
1066   enum processor_type scheduler;
1067   unsigned int tune_flags;
1068   const struct tune_params *tune;
1069 };
1070 
1071 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1072 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1073   {								\
1074     num_slots,							\
1075     l1_size,							\
1076     l1_line_size						\
1077   }
1078 
1079 /* arm generic vectorizer costs.  */
1080 static const
1081 struct cpu_vec_costs arm_default_vec_cost = {
1082   1,					/* scalar_stmt_cost.  */
1083   1,					/* scalar load_cost.  */
1084   1,					/* scalar_store_cost.  */
1085   1,					/* vec_stmt_cost.  */
1086   1,					/* vec_to_scalar_cost.  */
1087   1,					/* scalar_to_vec_cost.  */
1088   1,					/* vec_align_load_cost.  */
1089   1,					/* vec_unalign_load_cost.  */
1090   1,					/* vec_unalign_store_cost.  */
1091   1,					/* vec_store_cost.  */
1092   3,					/* cond_taken_branch_cost.  */
1093   1,					/* cond_not_taken_branch_cost.  */
1094 };
1095 
1096 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1097 #include "aarch-cost-tables.h"
1098 
1099 
1100 
1101 const struct cpu_cost_table cortexa9_extra_costs =
1102 {
1103   /* ALU */
1104   {
1105     0,			/* arith.  */
1106     0,			/* logical.  */
1107     0,			/* shift.  */
1108     COSTS_N_INSNS (1),	/* shift_reg.  */
1109     COSTS_N_INSNS (1),	/* arith_shift.  */
1110     COSTS_N_INSNS (2),	/* arith_shift_reg.  */
1111     0,			/* log_shift.  */
1112     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1113     COSTS_N_INSNS (1),	/* extend.  */
1114     COSTS_N_INSNS (2),	/* extend_arith.  */
1115     COSTS_N_INSNS (1),	/* bfi.  */
1116     COSTS_N_INSNS (1),	/* bfx.  */
1117     0,			/* clz.  */
1118     0,			/* rev.  */
1119     0,			/* non_exec.  */
1120     true		/* non_exec_costs_exec.  */
1121   },
1122   {
1123     /* MULT SImode */
1124     {
1125       COSTS_N_INSNS (3),	/* simple.  */
1126       COSTS_N_INSNS (3),	/* flag_setting.  */
1127       COSTS_N_INSNS (2),	/* extend.  */
1128       COSTS_N_INSNS (3),	/* add.  */
1129       COSTS_N_INSNS (2),	/* extend_add.  */
1130       COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A9.  */
1131     },
1132     /* MULT DImode */
1133     {
1134       0,			/* simple (N/A).  */
1135       0,			/* flag_setting (N/A).  */
1136       COSTS_N_INSNS (4),	/* extend.  */
1137       0,			/* add (N/A).  */
1138       COSTS_N_INSNS (4),	/* extend_add.  */
1139       0				/* idiv (N/A).  */
1140     }
1141   },
1142   /* LD/ST */
1143   {
1144     COSTS_N_INSNS (2),	/* load.  */
1145     COSTS_N_INSNS (2),	/* load_sign_extend.  */
1146     COSTS_N_INSNS (2),	/* ldrd.  */
1147     COSTS_N_INSNS (2),	/* ldm_1st.  */
1148     1,			/* ldm_regs_per_insn_1st.  */
1149     2,			/* ldm_regs_per_insn_subsequent.  */
1150     COSTS_N_INSNS (5),	/* loadf.  */
1151     COSTS_N_INSNS (5),	/* loadd.  */
1152     COSTS_N_INSNS (1),  /* load_unaligned.  */
1153     COSTS_N_INSNS (2),	/* store.  */
1154     COSTS_N_INSNS (2),	/* strd.  */
1155     COSTS_N_INSNS (2),	/* stm_1st.  */
1156     1,			/* stm_regs_per_insn_1st.  */
1157     2,			/* stm_regs_per_insn_subsequent.  */
1158     COSTS_N_INSNS (1),	/* storef.  */
1159     COSTS_N_INSNS (1),	/* stored.  */
1160     COSTS_N_INSNS (1),	/* store_unaligned.  */
1161     COSTS_N_INSNS (1),	/* loadv.  */
1162     COSTS_N_INSNS (1)	/* storev.  */
1163   },
1164   {
1165     /* FP SFmode */
1166     {
1167       COSTS_N_INSNS (14),	/* div.  */
1168       COSTS_N_INSNS (4),	/* mult.  */
1169       COSTS_N_INSNS (7),	/* mult_addsub. */
1170       COSTS_N_INSNS (30),	/* fma.  */
1171       COSTS_N_INSNS (3),	/* addsub.  */
1172       COSTS_N_INSNS (1),	/* fpconst.  */
1173       COSTS_N_INSNS (1),	/* neg.  */
1174       COSTS_N_INSNS (3),	/* compare.  */
1175       COSTS_N_INSNS (3),	/* widen.  */
1176       COSTS_N_INSNS (3),	/* narrow.  */
1177       COSTS_N_INSNS (3),	/* toint.  */
1178       COSTS_N_INSNS (3),	/* fromint.  */
1179       COSTS_N_INSNS (3)		/* roundint.  */
1180     },
1181     /* FP DFmode */
1182     {
1183       COSTS_N_INSNS (24),	/* div.  */
1184       COSTS_N_INSNS (5),	/* mult.  */
1185       COSTS_N_INSNS (8),	/* mult_addsub.  */
1186       COSTS_N_INSNS (30),	/* fma.  */
1187       COSTS_N_INSNS (3),	/* addsub.  */
1188       COSTS_N_INSNS (1),	/* fpconst.  */
1189       COSTS_N_INSNS (1),	/* neg.  */
1190       COSTS_N_INSNS (3),	/* compare.  */
1191       COSTS_N_INSNS (3),	/* widen.  */
1192       COSTS_N_INSNS (3),	/* narrow.  */
1193       COSTS_N_INSNS (3),	/* toint.  */
1194       COSTS_N_INSNS (3),	/* fromint.  */
1195       COSTS_N_INSNS (3)		/* roundint.  */
1196     }
1197   },
1198   /* Vector */
1199   {
1200     COSTS_N_INSNS (1),	/* alu.  */
1201     COSTS_N_INSNS (4)	/* mult.  */
1202   }
1203 };
1204 
1205 const struct cpu_cost_table cortexa8_extra_costs =
1206 {
1207   /* ALU */
1208   {
1209     0,			/* arith.  */
1210     0,			/* logical.  */
1211     COSTS_N_INSNS (1),	/* shift.  */
1212     0,			/* shift_reg.  */
1213     COSTS_N_INSNS (1),	/* arith_shift.  */
1214     0,			/* arith_shift_reg.  */
1215     COSTS_N_INSNS (1),	/* log_shift.  */
1216     0,			/* log_shift_reg.  */
1217     0,			/* extend.  */
1218     0,			/* extend_arith.  */
1219     0,			/* bfi.  */
1220     0,			/* bfx.  */
1221     0,			/* clz.  */
1222     0,			/* rev.  */
1223     0,			/* non_exec.  */
1224     true		/* non_exec_costs_exec.  */
1225   },
1226   {
1227     /* MULT SImode */
1228     {
1229       COSTS_N_INSNS (1),	/* simple.  */
1230       COSTS_N_INSNS (1),	/* flag_setting.  */
1231       COSTS_N_INSNS (1),	/* extend.  */
1232       COSTS_N_INSNS (1),	/* add.  */
1233       COSTS_N_INSNS (1),	/* extend_add.  */
1234       COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A8.  */
1235     },
1236     /* MULT DImode */
1237     {
1238       0,			/* simple (N/A).  */
1239       0,			/* flag_setting (N/A).  */
1240       COSTS_N_INSNS (2),	/* extend.  */
1241       0,			/* add (N/A).  */
1242       COSTS_N_INSNS (2),	/* extend_add.  */
1243       0				/* idiv (N/A).  */
1244     }
1245   },
1246   /* LD/ST */
1247   {
1248     COSTS_N_INSNS (1),	/* load.  */
1249     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1250     COSTS_N_INSNS (1),	/* ldrd.  */
1251     COSTS_N_INSNS (1),	/* ldm_1st.  */
1252     1,			/* ldm_regs_per_insn_1st.  */
1253     2,			/* ldm_regs_per_insn_subsequent.  */
1254     COSTS_N_INSNS (1),	/* loadf.  */
1255     COSTS_N_INSNS (1),	/* loadd.  */
1256     COSTS_N_INSNS (1),  /* load_unaligned.  */
1257     COSTS_N_INSNS (1),	/* store.  */
1258     COSTS_N_INSNS (1),	/* strd.  */
1259     COSTS_N_INSNS (1),	/* stm_1st.  */
1260     1,			/* stm_regs_per_insn_1st.  */
1261     2,			/* stm_regs_per_insn_subsequent.  */
1262     COSTS_N_INSNS (1),	/* storef.  */
1263     COSTS_N_INSNS (1),	/* stored.  */
1264     COSTS_N_INSNS (1),	/* store_unaligned.  */
1265     COSTS_N_INSNS (1),	/* loadv.  */
1266     COSTS_N_INSNS (1)	/* storev.  */
1267   },
1268   {
1269     /* FP SFmode */
1270     {
1271       COSTS_N_INSNS (36),	/* div.  */
1272       COSTS_N_INSNS (11),	/* mult.  */
1273       COSTS_N_INSNS (20),	/* mult_addsub. */
1274       COSTS_N_INSNS (30),	/* fma.  */
1275       COSTS_N_INSNS (9),	/* addsub.  */
1276       COSTS_N_INSNS (3),	/* fpconst.  */
1277       COSTS_N_INSNS (3),	/* neg.  */
1278       COSTS_N_INSNS (6),	/* compare.  */
1279       COSTS_N_INSNS (4),	/* widen.  */
1280       COSTS_N_INSNS (4),	/* narrow.  */
1281       COSTS_N_INSNS (8),	/* toint.  */
1282       COSTS_N_INSNS (8),	/* fromint.  */
1283       COSTS_N_INSNS (8)		/* roundint.  */
1284     },
1285     /* FP DFmode */
1286     {
1287       COSTS_N_INSNS (64),	/* div.  */
1288       COSTS_N_INSNS (16),	/* mult.  */
1289       COSTS_N_INSNS (25),	/* mult_addsub.  */
1290       COSTS_N_INSNS (30),	/* fma.  */
1291       COSTS_N_INSNS (9),	/* addsub.  */
1292       COSTS_N_INSNS (3),	/* fpconst.  */
1293       COSTS_N_INSNS (3),	/* neg.  */
1294       COSTS_N_INSNS (6),	/* compare.  */
1295       COSTS_N_INSNS (6),	/* widen.  */
1296       COSTS_N_INSNS (6),	/* narrow.  */
1297       COSTS_N_INSNS (8),	/* toint.  */
1298       COSTS_N_INSNS (8),	/* fromint.  */
1299       COSTS_N_INSNS (8)		/* roundint.  */
1300     }
1301   },
1302   /* Vector */
1303   {
1304     COSTS_N_INSNS (1),	/* alu.  */
1305     COSTS_N_INSNS (4)	/* mult.  */
1306   }
1307 };
1308 
1309 const struct cpu_cost_table cortexa5_extra_costs =
1310 {
1311   /* ALU */
1312   {
1313     0,			/* arith.  */
1314     0,			/* logical.  */
1315     COSTS_N_INSNS (1),	/* shift.  */
1316     COSTS_N_INSNS (1),	/* shift_reg.  */
1317     COSTS_N_INSNS (1),	/* arith_shift.  */
1318     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1319     COSTS_N_INSNS (1),	/* log_shift.  */
1320     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1321     COSTS_N_INSNS (1),	/* extend.  */
1322     COSTS_N_INSNS (1),	/* extend_arith.  */
1323     COSTS_N_INSNS (1),	/* bfi.  */
1324     COSTS_N_INSNS (1),	/* bfx.  */
1325     COSTS_N_INSNS (1),	/* clz.  */
1326     COSTS_N_INSNS (1),	/* rev.  */
1327     0,			/* non_exec.  */
1328     true		/* non_exec_costs_exec.  */
1329   },
1330 
1331   {
1332     /* MULT SImode */
1333     {
1334       0,			/* simple.  */
1335       COSTS_N_INSNS (1),	/* flag_setting.  */
1336       COSTS_N_INSNS (1),	/* extend.  */
1337       COSTS_N_INSNS (1),	/* add.  */
1338       COSTS_N_INSNS (1),	/* extend_add.  */
1339       COSTS_N_INSNS (7)		/* idiv.  */
1340     },
1341     /* MULT DImode */
1342     {
1343       0,			/* simple (N/A).  */
1344       0,			/* flag_setting (N/A).  */
1345       COSTS_N_INSNS (1),	/* extend.  */
1346       0,			/* add.  */
1347       COSTS_N_INSNS (2),	/* extend_add.  */
1348       0				/* idiv (N/A).  */
1349     }
1350   },
1351   /* LD/ST */
1352   {
1353     COSTS_N_INSNS (1),	/* load.  */
1354     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1355     COSTS_N_INSNS (6),	/* ldrd.  */
1356     COSTS_N_INSNS (1),	/* ldm_1st.  */
1357     1,			/* ldm_regs_per_insn_1st.  */
1358     2,			/* ldm_regs_per_insn_subsequent.  */
1359     COSTS_N_INSNS (2),	/* loadf.  */
1360     COSTS_N_INSNS (4),	/* loadd.  */
1361     COSTS_N_INSNS (1),	/* load_unaligned.  */
1362     COSTS_N_INSNS (1),	/* store.  */
1363     COSTS_N_INSNS (3),	/* strd.  */
1364     COSTS_N_INSNS (1),	/* stm_1st.  */
1365     1,			/* stm_regs_per_insn_1st.  */
1366     2,			/* stm_regs_per_insn_subsequent.  */
1367     COSTS_N_INSNS (2),	/* storef.  */
1368     COSTS_N_INSNS (2),	/* stored.  */
1369     COSTS_N_INSNS (1),	/* store_unaligned.  */
1370     COSTS_N_INSNS (1),	/* loadv.  */
1371     COSTS_N_INSNS (1)	/* storev.  */
1372   },
1373   {
1374     /* FP SFmode */
1375     {
1376       COSTS_N_INSNS (15),	/* div.  */
1377       COSTS_N_INSNS (3),	/* mult.  */
1378       COSTS_N_INSNS (7),	/* mult_addsub. */
1379       COSTS_N_INSNS (7),	/* fma.  */
1380       COSTS_N_INSNS (3),	/* addsub.  */
1381       COSTS_N_INSNS (3),	/* fpconst.  */
1382       COSTS_N_INSNS (3),	/* neg.  */
1383       COSTS_N_INSNS (3),	/* compare.  */
1384       COSTS_N_INSNS (3),	/* widen.  */
1385       COSTS_N_INSNS (3),	/* narrow.  */
1386       COSTS_N_INSNS (3),	/* toint.  */
1387       COSTS_N_INSNS (3),	/* fromint.  */
1388       COSTS_N_INSNS (3)		/* roundint.  */
1389     },
1390     /* FP DFmode */
1391     {
1392       COSTS_N_INSNS (30),	/* div.  */
1393       COSTS_N_INSNS (6),	/* mult.  */
1394       COSTS_N_INSNS (10),	/* mult_addsub.  */
1395       COSTS_N_INSNS (7),	/* fma.  */
1396       COSTS_N_INSNS (3),	/* addsub.  */
1397       COSTS_N_INSNS (3),	/* fpconst.  */
1398       COSTS_N_INSNS (3),	/* neg.  */
1399       COSTS_N_INSNS (3),	/* compare.  */
1400       COSTS_N_INSNS (3),	/* widen.  */
1401       COSTS_N_INSNS (3),	/* narrow.  */
1402       COSTS_N_INSNS (3),	/* toint.  */
1403       COSTS_N_INSNS (3),	/* fromint.  */
1404       COSTS_N_INSNS (3)		/* roundint.  */
1405     }
1406   },
1407   /* Vector */
1408   {
1409     COSTS_N_INSNS (1),	/* alu.  */
1410     COSTS_N_INSNS (4)	/* mult.  */
1411   }
1412 };
1413 
1414 
1415 const struct cpu_cost_table cortexa7_extra_costs =
1416 {
1417   /* ALU */
1418   {
1419     0,			/* arith.  */
1420     0,			/* logical.  */
1421     COSTS_N_INSNS (1),	/* shift.  */
1422     COSTS_N_INSNS (1),	/* shift_reg.  */
1423     COSTS_N_INSNS (1),	/* arith_shift.  */
1424     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1425     COSTS_N_INSNS (1),	/* log_shift.  */
1426     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1427     COSTS_N_INSNS (1),	/* extend.  */
1428     COSTS_N_INSNS (1),	/* extend_arith.  */
1429     COSTS_N_INSNS (1),	/* bfi.  */
1430     COSTS_N_INSNS (1),	/* bfx.  */
1431     COSTS_N_INSNS (1),	/* clz.  */
1432     COSTS_N_INSNS (1),	/* rev.  */
1433     0,			/* non_exec.  */
1434     true		/* non_exec_costs_exec.  */
1435   },
1436 
1437   {
1438     /* MULT SImode */
1439     {
1440       0,			/* simple.  */
1441       COSTS_N_INSNS (1),	/* flag_setting.  */
1442       COSTS_N_INSNS (1),	/* extend.  */
1443       COSTS_N_INSNS (1),	/* add.  */
1444       COSTS_N_INSNS (1),	/* extend_add.  */
1445       COSTS_N_INSNS (7)		/* idiv.  */
1446     },
1447     /* MULT DImode */
1448     {
1449       0,			/* simple (N/A).  */
1450       0,			/* flag_setting (N/A).  */
1451       COSTS_N_INSNS (1),	/* extend.  */
1452       0,			/* add.  */
1453       COSTS_N_INSNS (2),	/* extend_add.  */
1454       0				/* idiv (N/A).  */
1455     }
1456   },
1457   /* LD/ST */
1458   {
1459     COSTS_N_INSNS (1),	/* load.  */
1460     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1461     COSTS_N_INSNS (3),	/* ldrd.  */
1462     COSTS_N_INSNS (1),	/* ldm_1st.  */
1463     1,			/* ldm_regs_per_insn_1st.  */
1464     2,			/* ldm_regs_per_insn_subsequent.  */
1465     COSTS_N_INSNS (2),	/* loadf.  */
1466     COSTS_N_INSNS (2),	/* loadd.  */
1467     COSTS_N_INSNS (1),	/* load_unaligned.  */
1468     COSTS_N_INSNS (1),	/* store.  */
1469     COSTS_N_INSNS (3),	/* strd.  */
1470     COSTS_N_INSNS (1),	/* stm_1st.  */
1471     1,			/* stm_regs_per_insn_1st.  */
1472     2,			/* stm_regs_per_insn_subsequent.  */
1473     COSTS_N_INSNS (2),	/* storef.  */
1474     COSTS_N_INSNS (2),	/* stored.  */
1475     COSTS_N_INSNS (1),	/* store_unaligned.  */
1476     COSTS_N_INSNS (1),	/* loadv.  */
1477     COSTS_N_INSNS (1)	/* storev.  */
1478   },
1479   {
1480     /* FP SFmode */
1481     {
1482       COSTS_N_INSNS (15),	/* div.  */
1483       COSTS_N_INSNS (3),	/* mult.  */
1484       COSTS_N_INSNS (7),	/* mult_addsub. */
1485       COSTS_N_INSNS (7),	/* fma.  */
1486       COSTS_N_INSNS (3),	/* addsub.  */
1487       COSTS_N_INSNS (3),	/* fpconst.  */
1488       COSTS_N_INSNS (3),	/* neg.  */
1489       COSTS_N_INSNS (3),	/* compare.  */
1490       COSTS_N_INSNS (3),	/* widen.  */
1491       COSTS_N_INSNS (3),	/* narrow.  */
1492       COSTS_N_INSNS (3),	/* toint.  */
1493       COSTS_N_INSNS (3),	/* fromint.  */
1494       COSTS_N_INSNS (3)		/* roundint.  */
1495     },
1496     /* FP DFmode */
1497     {
1498       COSTS_N_INSNS (30),	/* div.  */
1499       COSTS_N_INSNS (6),	/* mult.  */
1500       COSTS_N_INSNS (10),	/* mult_addsub.  */
1501       COSTS_N_INSNS (7),	/* fma.  */
1502       COSTS_N_INSNS (3),	/* addsub.  */
1503       COSTS_N_INSNS (3),	/* fpconst.  */
1504       COSTS_N_INSNS (3),	/* neg.  */
1505       COSTS_N_INSNS (3),	/* compare.  */
1506       COSTS_N_INSNS (3),	/* widen.  */
1507       COSTS_N_INSNS (3),	/* narrow.  */
1508       COSTS_N_INSNS (3),	/* toint.  */
1509       COSTS_N_INSNS (3),	/* fromint.  */
1510       COSTS_N_INSNS (3)		/* roundint.  */
1511     }
1512   },
1513   /* Vector */
1514   {
1515     COSTS_N_INSNS (1),	/* alu.  */
1516     COSTS_N_INSNS (4)	/* mult.  */
1517   }
1518 };
1519 
1520 const struct cpu_cost_table cortexa12_extra_costs =
1521 {
1522   /* ALU */
1523   {
1524     0,			/* arith.  */
1525     0,			/* logical.  */
1526     0,			/* shift.  */
1527     COSTS_N_INSNS (1),	/* shift_reg.  */
1528     COSTS_N_INSNS (1),	/* arith_shift.  */
1529     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1530     COSTS_N_INSNS (1),	/* log_shift.  */
1531     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1532     0,			/* extend.  */
1533     COSTS_N_INSNS (1),	/* extend_arith.  */
1534     0,			/* bfi.  */
1535     COSTS_N_INSNS (1),	/* bfx.  */
1536     COSTS_N_INSNS (1),	/* clz.  */
1537     COSTS_N_INSNS (1),	/* rev.  */
1538     0,			/* non_exec.  */
1539     true		/* non_exec_costs_exec.  */
1540   },
1541   /* MULT SImode */
1542   {
1543     {
1544       COSTS_N_INSNS (2),	/* simple.  */
1545       COSTS_N_INSNS (3),	/* flag_setting.  */
1546       COSTS_N_INSNS (2),	/* extend.  */
1547       COSTS_N_INSNS (3),	/* add.  */
1548       COSTS_N_INSNS (2),	/* extend_add.  */
1549       COSTS_N_INSNS (18)	/* idiv.  */
1550     },
1551     /* MULT DImode */
1552     {
1553       0,			/* simple (N/A).  */
1554       0,			/* flag_setting (N/A).  */
1555       COSTS_N_INSNS (3),	/* extend.  */
1556       0,			/* add (N/A).  */
1557       COSTS_N_INSNS (3),	/* extend_add.  */
1558       0				/* idiv (N/A).  */
1559     }
1560   },
1561   /* LD/ST */
1562   {
1563     COSTS_N_INSNS (3),	/* load.  */
1564     COSTS_N_INSNS (3),	/* load_sign_extend.  */
1565     COSTS_N_INSNS (3),	/* ldrd.  */
1566     COSTS_N_INSNS (3),	/* ldm_1st.  */
1567     1,			/* ldm_regs_per_insn_1st.  */
1568     2,			/* ldm_regs_per_insn_subsequent.  */
1569     COSTS_N_INSNS (3),	/* loadf.  */
1570     COSTS_N_INSNS (3),	/* loadd.  */
1571     0,			/* load_unaligned.  */
1572     0,			/* store.  */
1573     0,			/* strd.  */
1574     0,			/* stm_1st.  */
1575     1,			/* stm_regs_per_insn_1st.  */
1576     2,			/* stm_regs_per_insn_subsequent.  */
1577     COSTS_N_INSNS (2),	/* storef.  */
1578     COSTS_N_INSNS (2),	/* stored.  */
1579     0,			/* store_unaligned.  */
1580     COSTS_N_INSNS (1),	/* loadv.  */
1581     COSTS_N_INSNS (1)	/* storev.  */
1582   },
1583   {
1584     /* FP SFmode */
1585     {
1586       COSTS_N_INSNS (17),	/* div.  */
1587       COSTS_N_INSNS (4),	/* mult.  */
1588       COSTS_N_INSNS (8),	/* mult_addsub. */
1589       COSTS_N_INSNS (8),	/* fma.  */
1590       COSTS_N_INSNS (4),	/* addsub.  */
1591       COSTS_N_INSNS (2),	/* fpconst. */
1592       COSTS_N_INSNS (2),	/* neg.  */
1593       COSTS_N_INSNS (2),	/* compare.  */
1594       COSTS_N_INSNS (4),	/* widen.  */
1595       COSTS_N_INSNS (4),	/* narrow.  */
1596       COSTS_N_INSNS (4),	/* toint.  */
1597       COSTS_N_INSNS (4),	/* fromint.  */
1598       COSTS_N_INSNS (4)		/* roundint.  */
1599     },
1600     /* FP DFmode */
1601     {
1602       COSTS_N_INSNS (31),	/* div.  */
1603       COSTS_N_INSNS (4),	/* mult.  */
1604       COSTS_N_INSNS (8),	/* mult_addsub.  */
1605       COSTS_N_INSNS (8),	/* fma.  */
1606       COSTS_N_INSNS (4),	/* addsub.  */
1607       COSTS_N_INSNS (2),	/* fpconst.  */
1608       COSTS_N_INSNS (2),	/* neg.  */
1609       COSTS_N_INSNS (2),	/* compare.  */
1610       COSTS_N_INSNS (4),	/* widen.  */
1611       COSTS_N_INSNS (4),	/* narrow.  */
1612       COSTS_N_INSNS (4),	/* toint.  */
1613       COSTS_N_INSNS (4),	/* fromint.  */
1614       COSTS_N_INSNS (4)		/* roundint.  */
1615     }
1616   },
1617   /* Vector */
1618   {
1619     COSTS_N_INSNS (1),	/* alu.  */
1620     COSTS_N_INSNS (4)	/* mult.  */
1621   }
1622 };
1623 
1624 const struct cpu_cost_table cortexa15_extra_costs =
1625 {
1626   /* ALU */
1627   {
1628     0,			/* arith.  */
1629     0,			/* logical.  */
1630     0,			/* shift.  */
1631     0,			/* shift_reg.  */
1632     COSTS_N_INSNS (1),	/* arith_shift.  */
1633     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1634     COSTS_N_INSNS (1),	/* log_shift.  */
1635     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1636     0,			/* extend.  */
1637     COSTS_N_INSNS (1),	/* extend_arith.  */
1638     COSTS_N_INSNS (1),	/* bfi.  */
1639     0,			/* bfx.  */
1640     0,			/* clz.  */
1641     0,			/* rev.  */
1642     0,			/* non_exec.  */
1643     true		/* non_exec_costs_exec.  */
1644   },
1645   /* MULT SImode */
1646   {
1647     {
1648       COSTS_N_INSNS (2),	/* simple.  */
1649       COSTS_N_INSNS (3),	/* flag_setting.  */
1650       COSTS_N_INSNS (2),	/* extend.  */
1651       COSTS_N_INSNS (2),	/* add.  */
1652       COSTS_N_INSNS (2),	/* extend_add.  */
1653       COSTS_N_INSNS (18)	/* idiv.  */
1654     },
1655     /* MULT DImode */
1656     {
1657       0,			/* simple (N/A).  */
1658       0,			/* flag_setting (N/A).  */
1659       COSTS_N_INSNS (3),	/* extend.  */
1660       0,			/* add (N/A).  */
1661       COSTS_N_INSNS (3),	/* extend_add.  */
1662       0				/* idiv (N/A).  */
1663     }
1664   },
1665   /* LD/ST */
1666   {
1667     COSTS_N_INSNS (3),	/* load.  */
1668     COSTS_N_INSNS (3),	/* load_sign_extend.  */
1669     COSTS_N_INSNS (3),	/* ldrd.  */
1670     COSTS_N_INSNS (4),	/* ldm_1st.  */
1671     1,			/* ldm_regs_per_insn_1st.  */
1672     2,			/* ldm_regs_per_insn_subsequent.  */
1673     COSTS_N_INSNS (4),	/* loadf.  */
1674     COSTS_N_INSNS (4),	/* loadd.  */
1675     0,			/* load_unaligned.  */
1676     0,			/* store.  */
1677     0,			/* strd.  */
1678     COSTS_N_INSNS (1),	/* stm_1st.  */
1679     1,			/* stm_regs_per_insn_1st.  */
1680     2,			/* stm_regs_per_insn_subsequent.  */
1681     0,			/* storef.  */
1682     0,			/* stored.  */
1683     0,			/* store_unaligned.  */
1684     COSTS_N_INSNS (1),	/* loadv.  */
1685     COSTS_N_INSNS (1)	/* storev.  */
1686   },
1687   {
1688     /* FP SFmode */
1689     {
1690       COSTS_N_INSNS (17),	/* div.  */
1691       COSTS_N_INSNS (4),	/* mult.  */
1692       COSTS_N_INSNS (8),	/* mult_addsub. */
1693       COSTS_N_INSNS (8),	/* fma.  */
1694       COSTS_N_INSNS (4),	/* addsub.  */
1695       COSTS_N_INSNS (2),	/* fpconst. */
1696       COSTS_N_INSNS (2),	/* neg.  */
1697       COSTS_N_INSNS (5),	/* compare.  */
1698       COSTS_N_INSNS (4),	/* widen.  */
1699       COSTS_N_INSNS (4),	/* narrow.  */
1700       COSTS_N_INSNS (4),	/* toint.  */
1701       COSTS_N_INSNS (4),	/* fromint.  */
1702       COSTS_N_INSNS (4)		/* roundint.  */
1703     },
1704     /* FP DFmode */
1705     {
1706       COSTS_N_INSNS (31),	/* div.  */
1707       COSTS_N_INSNS (4),	/* mult.  */
1708       COSTS_N_INSNS (8),	/* mult_addsub.  */
1709       COSTS_N_INSNS (8),	/* fma.  */
1710       COSTS_N_INSNS (4),	/* addsub.  */
1711       COSTS_N_INSNS (2),	/* fpconst.  */
1712       COSTS_N_INSNS (2),	/* neg.  */
1713       COSTS_N_INSNS (2),	/* compare.  */
1714       COSTS_N_INSNS (4),	/* widen.  */
1715       COSTS_N_INSNS (4),	/* narrow.  */
1716       COSTS_N_INSNS (4),	/* toint.  */
1717       COSTS_N_INSNS (4),	/* fromint.  */
1718       COSTS_N_INSNS (4)		/* roundint.  */
1719     }
1720   },
1721   /* Vector */
1722   {
1723     COSTS_N_INSNS (1),	/* alu.  */
1724     COSTS_N_INSNS (4)	/* mult.  */
1725   }
1726 };
1727 
1728 const struct cpu_cost_table v7m_extra_costs =
1729 {
1730   /* ALU */
1731   {
1732     0,			/* arith.  */
1733     0,			/* logical.  */
1734     0,			/* shift.  */
1735     0,			/* shift_reg.  */
1736     0,			/* arith_shift.  */
1737     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1738     0,			/* log_shift.  */
1739     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1740     0,			/* extend.  */
1741     COSTS_N_INSNS (1),	/* extend_arith.  */
1742     0,			/* bfi.  */
1743     0,			/* bfx.  */
1744     0,			/* clz.  */
1745     0,			/* rev.  */
1746     COSTS_N_INSNS (1),	/* non_exec.  */
1747     false		/* non_exec_costs_exec.  */
1748   },
1749   {
1750     /* MULT SImode */
1751     {
1752       COSTS_N_INSNS (1),	/* simple.  */
1753       COSTS_N_INSNS (1),	/* flag_setting.  */
1754       COSTS_N_INSNS (2),	/* extend.  */
1755       COSTS_N_INSNS (1),	/* add.  */
1756       COSTS_N_INSNS (3),	/* extend_add.  */
1757       COSTS_N_INSNS (8)		/* idiv.  */
1758     },
1759     /* MULT DImode */
1760     {
1761       0,			/* simple (N/A).  */
1762       0,			/* flag_setting (N/A).  */
1763       COSTS_N_INSNS (2),	/* extend.  */
1764       0,			/* add (N/A).  */
1765       COSTS_N_INSNS (3),	/* extend_add.  */
1766       0				/* idiv (N/A).  */
1767     }
1768   },
1769   /* LD/ST */
1770   {
1771     COSTS_N_INSNS (2),	/* load.  */
1772     0,			/* load_sign_extend.  */
1773     COSTS_N_INSNS (3),	/* ldrd.  */
1774     COSTS_N_INSNS (2),	/* ldm_1st.  */
1775     1,			/* ldm_regs_per_insn_1st.  */
1776     1,			/* ldm_regs_per_insn_subsequent.  */
1777     COSTS_N_INSNS (2),	/* loadf.  */
1778     COSTS_N_INSNS (3),	/* loadd.  */
1779     COSTS_N_INSNS (1),  /* load_unaligned.  */
1780     COSTS_N_INSNS (2),	/* store.  */
1781     COSTS_N_INSNS (3),	/* strd.  */
1782     COSTS_N_INSNS (2),	/* stm_1st.  */
1783     1,			/* stm_regs_per_insn_1st.  */
1784     1,			/* stm_regs_per_insn_subsequent.  */
1785     COSTS_N_INSNS (2),	/* storef.  */
1786     COSTS_N_INSNS (3),	/* stored.  */
1787     COSTS_N_INSNS (1),	/* store_unaligned.  */
1788     COSTS_N_INSNS (1),	/* loadv.  */
1789     COSTS_N_INSNS (1)	/* storev.  */
1790   },
1791   {
1792     /* FP SFmode */
1793     {
1794       COSTS_N_INSNS (7),	/* div.  */
1795       COSTS_N_INSNS (2),	/* mult.  */
1796       COSTS_N_INSNS (5),	/* mult_addsub.  */
1797       COSTS_N_INSNS (3),	/* fma.  */
1798       COSTS_N_INSNS (1),	/* addsub.  */
1799       0,			/* fpconst.  */
1800       0,			/* neg.  */
1801       0,			/* compare.  */
1802       0,			/* widen.  */
1803       0,			/* narrow.  */
1804       0,			/* toint.  */
1805       0,			/* fromint.  */
1806       0				/* roundint.  */
1807     },
1808     /* FP DFmode */
1809     {
1810       COSTS_N_INSNS (15),	/* div.  */
1811       COSTS_N_INSNS (5),	/* mult.  */
1812       COSTS_N_INSNS (7),	/* mult_addsub.  */
1813       COSTS_N_INSNS (7),	/* fma.  */
1814       COSTS_N_INSNS (3),	/* addsub.  */
1815       0,			/* fpconst.  */
1816       0,			/* neg.  */
1817       0,			/* compare.  */
1818       0,			/* widen.  */
1819       0,			/* narrow.  */
1820       0,			/* toint.  */
1821       0,			/* fromint.  */
1822       0				/* roundint.  */
1823     }
1824   },
1825   /* Vector */
1826   {
1827     COSTS_N_INSNS (1),	/* alu.  */
1828     COSTS_N_INSNS (4)	/* mult.  */
1829   }
1830 };
1831 
1832 const struct addr_mode_cost_table generic_addr_mode_costs =
1833 {
1834   /* int.  */
1835   {
1836     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1837     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1838     COSTS_N_INSNS (0)	/* AMO_WB.  */
1839   },
1840   /* float.  */
1841   {
1842     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1843     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1844     COSTS_N_INSNS (0)	/* AMO_WB.  */
1845   },
1846   /* vector.  */
1847   {
1848     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1849     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1850     COSTS_N_INSNS (0)	/* AMO_WB.  */
1851   }
1852 };
1853 
1854 const struct tune_params arm_slowmul_tune =
1855 {
1856   &generic_extra_costs,			/* Insn extra costs.  */
1857   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1858   NULL,					/* Sched adj cost.  */
1859   arm_default_branch_cost,
1860   &arm_default_vec_cost,
1861   3,						/* Constant limit.  */
1862   5,						/* Max cond insns.  */
1863   8,						/* Memset max inline.  */
1864   1,						/* Issue rate.  */
1865   ARM_PREFETCH_NOT_BENEFICIAL,
1866   tune_params::PREF_CONST_POOL_TRUE,
1867   tune_params::PREF_LDRD_FALSE,
1868   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1869   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1870   tune_params::DISPARAGE_FLAGS_NEITHER,
1871   tune_params::PREF_NEON_STRINGOPS_FALSE,
1872   tune_params::FUSE_NOTHING,
1873   tune_params::SCHED_AUTOPREF_OFF
1874 };
1875 
1876 const struct tune_params arm_fastmul_tune =
1877 {
1878   &generic_extra_costs,			/* Insn extra costs.  */
1879   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1880   NULL,					/* Sched adj cost.  */
1881   arm_default_branch_cost,
1882   &arm_default_vec_cost,
1883   1,						/* Constant limit.  */
1884   5,						/* Max cond insns.  */
1885   8,						/* Memset max inline.  */
1886   1,						/* Issue rate.  */
1887   ARM_PREFETCH_NOT_BENEFICIAL,
1888   tune_params::PREF_CONST_POOL_TRUE,
1889   tune_params::PREF_LDRD_FALSE,
1890   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1891   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1892   tune_params::DISPARAGE_FLAGS_NEITHER,
1893   tune_params::PREF_NEON_STRINGOPS_FALSE,
1894   tune_params::FUSE_NOTHING,
1895   tune_params::SCHED_AUTOPREF_OFF
1896 };
1897 
1898 /* StrongARM has early execution of branches, so a sequence that is worth
1899    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1900 
1901 const struct tune_params arm_strongarm_tune =
1902 {
1903   &generic_extra_costs,			/* Insn extra costs.  */
1904   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1905   NULL,					/* Sched adj cost.  */
1906   arm_default_branch_cost,
1907   &arm_default_vec_cost,
1908   1,						/* Constant limit.  */
1909   3,						/* Max cond insns.  */
1910   8,						/* Memset max inline.  */
1911   1,						/* Issue rate.  */
1912   ARM_PREFETCH_NOT_BENEFICIAL,
1913   tune_params::PREF_CONST_POOL_TRUE,
1914   tune_params::PREF_LDRD_FALSE,
1915   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1916   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1917   tune_params::DISPARAGE_FLAGS_NEITHER,
1918   tune_params::PREF_NEON_STRINGOPS_FALSE,
1919   tune_params::FUSE_NOTHING,
1920   tune_params::SCHED_AUTOPREF_OFF
1921 };
1922 
1923 const struct tune_params arm_xscale_tune =
1924 {
1925   &generic_extra_costs,			/* Insn extra costs.  */
1926   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1927   xscale_sched_adjust_cost,
1928   arm_default_branch_cost,
1929   &arm_default_vec_cost,
1930   2,						/* Constant limit.  */
1931   3,						/* Max cond insns.  */
1932   8,						/* Memset max inline.  */
1933   1,						/* Issue rate.  */
1934   ARM_PREFETCH_NOT_BENEFICIAL,
1935   tune_params::PREF_CONST_POOL_TRUE,
1936   tune_params::PREF_LDRD_FALSE,
1937   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1938   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1939   tune_params::DISPARAGE_FLAGS_NEITHER,
1940   tune_params::PREF_NEON_STRINGOPS_FALSE,
1941   tune_params::FUSE_NOTHING,
1942   tune_params::SCHED_AUTOPREF_OFF
1943 };
1944 
1945 const struct tune_params arm_9e_tune =
1946 {
1947   &generic_extra_costs,			/* Insn extra costs.  */
1948   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1949   NULL,					/* Sched adj cost.  */
1950   arm_default_branch_cost,
1951   &arm_default_vec_cost,
1952   1,						/* Constant limit.  */
1953   5,						/* Max cond insns.  */
1954   8,						/* Memset max inline.  */
1955   1,						/* Issue rate.  */
1956   ARM_PREFETCH_NOT_BENEFICIAL,
1957   tune_params::PREF_CONST_POOL_TRUE,
1958   tune_params::PREF_LDRD_FALSE,
1959   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1960   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1961   tune_params::DISPARAGE_FLAGS_NEITHER,
1962   tune_params::PREF_NEON_STRINGOPS_FALSE,
1963   tune_params::FUSE_NOTHING,
1964   tune_params::SCHED_AUTOPREF_OFF
1965 };
1966 
1967 const struct tune_params arm_marvell_pj4_tune =
1968 {
1969   &generic_extra_costs,			/* Insn extra costs.  */
1970   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1971   NULL,					/* Sched adj cost.  */
1972   arm_default_branch_cost,
1973   &arm_default_vec_cost,
1974   1,						/* Constant limit.  */
1975   5,						/* Max cond insns.  */
1976   8,						/* Memset max inline.  */
1977   2,						/* Issue rate.  */
1978   ARM_PREFETCH_NOT_BENEFICIAL,
1979   tune_params::PREF_CONST_POOL_TRUE,
1980   tune_params::PREF_LDRD_FALSE,
1981   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1982   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1983   tune_params::DISPARAGE_FLAGS_NEITHER,
1984   tune_params::PREF_NEON_STRINGOPS_FALSE,
1985   tune_params::FUSE_NOTHING,
1986   tune_params::SCHED_AUTOPREF_OFF
1987 };
1988 
1989 const struct tune_params arm_v6t2_tune =
1990 {
1991   &generic_extra_costs,			/* Insn extra costs.  */
1992   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1993   NULL,					/* Sched adj cost.  */
1994   arm_default_branch_cost,
1995   &arm_default_vec_cost,
1996   1,						/* Constant limit.  */
1997   5,						/* Max cond insns.  */
1998   8,						/* Memset max inline.  */
1999   1,						/* Issue rate.  */
2000   ARM_PREFETCH_NOT_BENEFICIAL,
2001   tune_params::PREF_CONST_POOL_FALSE,
2002   tune_params::PREF_LDRD_FALSE,
2003   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2004   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2005   tune_params::DISPARAGE_FLAGS_NEITHER,
2006   tune_params::PREF_NEON_STRINGOPS_FALSE,
2007   tune_params::FUSE_NOTHING,
2008   tune_params::SCHED_AUTOPREF_OFF
2009 };
2010 
2011 
2012 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
2013 const struct tune_params arm_cortex_tune =
2014 {
2015   &generic_extra_costs,
2016   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2017   NULL,					/* Sched adj cost.  */
2018   arm_default_branch_cost,
2019   &arm_default_vec_cost,
2020   1,						/* Constant limit.  */
2021   5,						/* Max cond insns.  */
2022   8,						/* Memset max inline.  */
2023   2,						/* Issue rate.  */
2024   ARM_PREFETCH_NOT_BENEFICIAL,
2025   tune_params::PREF_CONST_POOL_FALSE,
2026   tune_params::PREF_LDRD_FALSE,
2027   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2028   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2029   tune_params::DISPARAGE_FLAGS_NEITHER,
2030   tune_params::PREF_NEON_STRINGOPS_FALSE,
2031   tune_params::FUSE_NOTHING,
2032   tune_params::SCHED_AUTOPREF_OFF
2033 };
2034 
2035 const struct tune_params arm_cortex_a8_tune =
2036 {
2037   &cortexa8_extra_costs,
2038   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2039   NULL,					/* Sched adj cost.  */
2040   arm_default_branch_cost,
2041   &arm_default_vec_cost,
2042   1,						/* Constant limit.  */
2043   5,						/* Max cond insns.  */
2044   8,						/* Memset max inline.  */
2045   2,						/* Issue rate.  */
2046   ARM_PREFETCH_NOT_BENEFICIAL,
2047   tune_params::PREF_CONST_POOL_FALSE,
2048   tune_params::PREF_LDRD_FALSE,
2049   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2050   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2051   tune_params::DISPARAGE_FLAGS_NEITHER,
2052   tune_params::PREF_NEON_STRINGOPS_TRUE,
2053   tune_params::FUSE_NOTHING,
2054   tune_params::SCHED_AUTOPREF_OFF
2055 };
2056 
2057 const struct tune_params arm_cortex_a7_tune =
2058 {
2059   &cortexa7_extra_costs,
2060   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2061   NULL,					/* Sched adj cost.  */
2062   arm_default_branch_cost,
2063   &arm_default_vec_cost,
2064   1,						/* Constant limit.  */
2065   5,						/* Max cond insns.  */
2066   8,						/* Memset max inline.  */
2067   2,						/* Issue rate.  */
2068   ARM_PREFETCH_NOT_BENEFICIAL,
2069   tune_params::PREF_CONST_POOL_FALSE,
2070   tune_params::PREF_LDRD_FALSE,
2071   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2072   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2073   tune_params::DISPARAGE_FLAGS_NEITHER,
2074   tune_params::PREF_NEON_STRINGOPS_TRUE,
2075   tune_params::FUSE_NOTHING,
2076   tune_params::SCHED_AUTOPREF_OFF
2077 };
2078 
2079 const struct tune_params arm_cortex_a15_tune =
2080 {
2081   &cortexa15_extra_costs,
2082   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2083   NULL,					/* Sched adj cost.  */
2084   arm_default_branch_cost,
2085   &arm_default_vec_cost,
2086   1,						/* Constant limit.  */
2087   2,						/* Max cond insns.  */
2088   8,						/* Memset max inline.  */
2089   3,						/* Issue rate.  */
2090   ARM_PREFETCH_NOT_BENEFICIAL,
2091   tune_params::PREF_CONST_POOL_FALSE,
2092   tune_params::PREF_LDRD_TRUE,
2093   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2094   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2095   tune_params::DISPARAGE_FLAGS_ALL,
2096   tune_params::PREF_NEON_STRINGOPS_TRUE,
2097   tune_params::FUSE_NOTHING,
2098   tune_params::SCHED_AUTOPREF_FULL
2099 };
2100 
2101 const struct tune_params arm_cortex_a35_tune =
2102 {
2103   &cortexa53_extra_costs,
2104   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2105   NULL,					/* Sched adj cost.  */
2106   arm_default_branch_cost,
2107   &arm_default_vec_cost,
2108   1,						/* Constant limit.  */
2109   5,						/* Max cond insns.  */
2110   8,						/* Memset max inline.  */
2111   1,						/* Issue rate.  */
2112   ARM_PREFETCH_NOT_BENEFICIAL,
2113   tune_params::PREF_CONST_POOL_FALSE,
2114   tune_params::PREF_LDRD_FALSE,
2115   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2116   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2117   tune_params::DISPARAGE_FLAGS_NEITHER,
2118   tune_params::PREF_NEON_STRINGOPS_TRUE,
2119   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2120   tune_params::SCHED_AUTOPREF_OFF
2121 };
2122 
2123 const struct tune_params arm_cortex_a53_tune =
2124 {
2125   &cortexa53_extra_costs,
2126   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2127   NULL,					/* Sched adj cost.  */
2128   arm_default_branch_cost,
2129   &arm_default_vec_cost,
2130   1,						/* Constant limit.  */
2131   5,						/* Max cond insns.  */
2132   8,						/* Memset max inline.  */
2133   2,						/* Issue rate.  */
2134   ARM_PREFETCH_NOT_BENEFICIAL,
2135   tune_params::PREF_CONST_POOL_FALSE,
2136   tune_params::PREF_LDRD_FALSE,
2137   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2138   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2139   tune_params::DISPARAGE_FLAGS_NEITHER,
2140   tune_params::PREF_NEON_STRINGOPS_TRUE,
2141   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2142   tune_params::SCHED_AUTOPREF_OFF
2143 };
2144 
2145 const struct tune_params arm_cortex_a57_tune =
2146 {
2147   &cortexa57_extra_costs,
2148   &generic_addr_mode_costs,		/* addressing mode costs */
2149   NULL,					/* Sched adj cost.  */
2150   arm_default_branch_cost,
2151   &arm_default_vec_cost,
2152   1,						/* Constant limit.  */
2153   2,						/* Max cond insns.  */
2154   8,						/* Memset max inline.  */
2155   3,						/* Issue rate.  */
2156   ARM_PREFETCH_NOT_BENEFICIAL,
2157   tune_params::PREF_CONST_POOL_FALSE,
2158   tune_params::PREF_LDRD_TRUE,
2159   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2160   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2161   tune_params::DISPARAGE_FLAGS_ALL,
2162   tune_params::PREF_NEON_STRINGOPS_TRUE,
2163   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2164   tune_params::SCHED_AUTOPREF_FULL
2165 };
2166 
2167 const struct tune_params arm_exynosm1_tune =
2168 {
2169   &exynosm1_extra_costs,
2170   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2171   NULL,						/* Sched adj cost.  */
2172   arm_default_branch_cost,
2173   &arm_default_vec_cost,
2174   1,						/* Constant limit.  */
2175   2,						/* Max cond insns.  */
2176   8,						/* Memset max inline.  */
2177   3,						/* Issue rate.  */
2178   ARM_PREFETCH_NOT_BENEFICIAL,
2179   tune_params::PREF_CONST_POOL_FALSE,
2180   tune_params::PREF_LDRD_TRUE,
2181   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,	/* Thumb.  */
2182   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,	/* ARM.  */
2183   tune_params::DISPARAGE_FLAGS_ALL,
2184   tune_params::PREF_NEON_STRINGOPS_TRUE,
2185   tune_params::FUSE_NOTHING,
2186   tune_params::SCHED_AUTOPREF_OFF
2187 };
2188 
2189 const struct tune_params arm_xgene1_tune =
2190 {
2191   &xgene1_extra_costs,
2192   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2193   NULL,					/* Sched adj cost.  */
2194   arm_default_branch_cost,
2195   &arm_default_vec_cost,
2196   1,						/* Constant limit.  */
2197   2,						/* Max cond insns.  */
2198   32,						/* Memset max inline.  */
2199   4,						/* Issue rate.  */
2200   ARM_PREFETCH_NOT_BENEFICIAL,
2201   tune_params::PREF_CONST_POOL_FALSE,
2202   tune_params::PREF_LDRD_TRUE,
2203   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2204   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2205   tune_params::DISPARAGE_FLAGS_ALL,
2206   tune_params::PREF_NEON_STRINGOPS_FALSE,
2207   tune_params::FUSE_NOTHING,
2208   tune_params::SCHED_AUTOPREF_OFF
2209 };
2210 
2211 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2212    less appealing.  Set max_insns_skipped to a low value.  */
2213 
2214 const struct tune_params arm_cortex_a5_tune =
2215 {
2216   &cortexa5_extra_costs,
2217   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2218   NULL,					/* Sched adj cost.  */
2219   arm_cortex_a5_branch_cost,
2220   &arm_default_vec_cost,
2221   1,						/* Constant limit.  */
2222   1,						/* Max cond insns.  */
2223   8,						/* Memset max inline.  */
2224   2,						/* Issue rate.  */
2225   ARM_PREFETCH_NOT_BENEFICIAL,
2226   tune_params::PREF_CONST_POOL_FALSE,
2227   tune_params::PREF_LDRD_FALSE,
2228   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2229   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2230   tune_params::DISPARAGE_FLAGS_NEITHER,
2231   tune_params::PREF_NEON_STRINGOPS_TRUE,
2232   tune_params::FUSE_NOTHING,
2233   tune_params::SCHED_AUTOPREF_OFF
2234 };
2235 
2236 const struct tune_params arm_cortex_a9_tune =
2237 {
2238   &cortexa9_extra_costs,
2239   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2240   cortex_a9_sched_adjust_cost,
2241   arm_default_branch_cost,
2242   &arm_default_vec_cost,
2243   1,						/* Constant limit.  */
2244   5,						/* Max cond insns.  */
2245   8,						/* Memset max inline.  */
2246   2,						/* Issue rate.  */
2247   ARM_PREFETCH_BENEFICIAL(4,32,32),
2248   tune_params::PREF_CONST_POOL_FALSE,
2249   tune_params::PREF_LDRD_FALSE,
2250   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2251   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2252   tune_params::DISPARAGE_FLAGS_NEITHER,
2253   tune_params::PREF_NEON_STRINGOPS_FALSE,
2254   tune_params::FUSE_NOTHING,
2255   tune_params::SCHED_AUTOPREF_OFF
2256 };
2257 
2258 const struct tune_params arm_cortex_a12_tune =
2259 {
2260   &cortexa12_extra_costs,
2261   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2262   NULL,					/* Sched adj cost.  */
2263   arm_default_branch_cost,
2264   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2265   1,						/* Constant limit.  */
2266   2,						/* Max cond insns.  */
2267   8,						/* Memset max inline.  */
2268   2,						/* Issue rate.  */
2269   ARM_PREFETCH_NOT_BENEFICIAL,
2270   tune_params::PREF_CONST_POOL_FALSE,
2271   tune_params::PREF_LDRD_TRUE,
2272   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2273   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2274   tune_params::DISPARAGE_FLAGS_ALL,
2275   tune_params::PREF_NEON_STRINGOPS_TRUE,
2276   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2277   tune_params::SCHED_AUTOPREF_OFF
2278 };
2279 
2280 const struct tune_params arm_cortex_a73_tune =
2281 {
2282   &cortexa57_extra_costs,
2283   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2284   NULL,						/* Sched adj cost.  */
2285   arm_default_branch_cost,
2286   &arm_default_vec_cost,			/* Vectorizer costs.  */
2287   1,						/* Constant limit.  */
2288   2,						/* Max cond insns.  */
2289   8,						/* Memset max inline.  */
2290   2,						/* Issue rate.  */
2291   ARM_PREFETCH_NOT_BENEFICIAL,
2292   tune_params::PREF_CONST_POOL_FALSE,
2293   tune_params::PREF_LDRD_TRUE,
2294   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2295   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2296   tune_params::DISPARAGE_FLAGS_ALL,
2297   tune_params::PREF_NEON_STRINGOPS_TRUE,
2298   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2299   tune_params::SCHED_AUTOPREF_FULL
2300 };
2301 
2302 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2303    cycle to execute each.  An LDR from the constant pool also takes two cycles
2304    to execute, but mildly increases pipelining opportunity (consecutive
2305    loads/stores can be pipelined together, saving one cycle), and may also
2306    improve icache utilisation.  Hence we prefer the constant pool for such
2307    processors.  */
2308 
2309 const struct tune_params arm_v7m_tune =
2310 {
2311   &v7m_extra_costs,
2312   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2313   NULL,					/* Sched adj cost.  */
2314   arm_cortex_m_branch_cost,
2315   &arm_default_vec_cost,
2316   1,						/* Constant limit.  */
2317   2,						/* Max cond insns.  */
2318   8,						/* Memset max inline.  */
2319   1,						/* Issue rate.  */
2320   ARM_PREFETCH_NOT_BENEFICIAL,
2321   tune_params::PREF_CONST_POOL_TRUE,
2322   tune_params::PREF_LDRD_FALSE,
2323   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2324   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2325   tune_params::DISPARAGE_FLAGS_NEITHER,
2326   tune_params::PREF_NEON_STRINGOPS_FALSE,
2327   tune_params::FUSE_NOTHING,
2328   tune_params::SCHED_AUTOPREF_OFF
2329 };
2330 
2331 /* Cortex-M7 tuning.  */
2332 
2333 const struct tune_params arm_cortex_m7_tune =
2334 {
2335   &v7m_extra_costs,
2336   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2337   NULL,					/* Sched adj cost.  */
2338   arm_cortex_m7_branch_cost,
2339   &arm_default_vec_cost,
2340   0,						/* Constant limit.  */
2341   1,						/* Max cond insns.  */
2342   8,						/* Memset max inline.  */
2343   2,						/* Issue rate.  */
2344   ARM_PREFETCH_NOT_BENEFICIAL,
2345   tune_params::PREF_CONST_POOL_TRUE,
2346   tune_params::PREF_LDRD_FALSE,
2347   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2348   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2349   tune_params::DISPARAGE_FLAGS_NEITHER,
2350   tune_params::PREF_NEON_STRINGOPS_FALSE,
2351   tune_params::FUSE_NOTHING,
2352   tune_params::SCHED_AUTOPREF_OFF
2353 };
2354 
2355 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2356    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2357    cortex-m23.  */
2358 const struct tune_params arm_v6m_tune =
2359 {
2360   &generic_extra_costs,			/* Insn extra costs.  */
2361   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2362   NULL,					/* Sched adj cost.  */
2363   arm_default_branch_cost,
2364   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2365   1,						/* Constant limit.  */
2366   5,						/* Max cond insns.  */
2367   8,						/* Memset max inline.  */
2368   1,						/* Issue rate.  */
2369   ARM_PREFETCH_NOT_BENEFICIAL,
2370   tune_params::PREF_CONST_POOL_FALSE,
2371   tune_params::PREF_LDRD_FALSE,
2372   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2373   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2374   tune_params::DISPARAGE_FLAGS_NEITHER,
2375   tune_params::PREF_NEON_STRINGOPS_FALSE,
2376   tune_params::FUSE_NOTHING,
2377   tune_params::SCHED_AUTOPREF_OFF
2378 };
2379 
2380 const struct tune_params arm_fa726te_tune =
2381 {
2382   &generic_extra_costs,				/* Insn extra costs.  */
2383   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2384   fa726te_sched_adjust_cost,
2385   arm_default_branch_cost,
2386   &arm_default_vec_cost,
2387   1,						/* Constant limit.  */
2388   5,						/* Max cond insns.  */
2389   8,						/* Memset max inline.  */
2390   2,						/* Issue rate.  */
2391   ARM_PREFETCH_NOT_BENEFICIAL,
2392   tune_params::PREF_CONST_POOL_TRUE,
2393   tune_params::PREF_LDRD_FALSE,
2394   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2395   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2396   tune_params::DISPARAGE_FLAGS_NEITHER,
2397   tune_params::PREF_NEON_STRINGOPS_FALSE,
2398   tune_params::FUSE_NOTHING,
2399   tune_params::SCHED_AUTOPREF_OFF
2400 };
2401 
2402 /* Auto-generated CPU, FPU and architecture tables.  */
2403 #include "arm-cpu-data.h"
2404 
2405 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2406    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2407    is thus chosen to be big enough to hold the longest architecture name.  */
2408 
2409 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2410 
2411 /* Supported TLS relocations.  */
2412 
2413 enum tls_reloc {
2414   TLS_GD32,
2415   TLS_GD32_FDPIC,
2416   TLS_LDM32,
2417   TLS_LDM32_FDPIC,
2418   TLS_LDO32,
2419   TLS_IE32,
2420   TLS_IE32_FDPIC,
2421   TLS_LE32,
2422   TLS_DESCSEQ	/* GNU scheme */
2423 };
2424 
2425 /* The maximum number of insns to be used when loading a constant.  */
2426 inline static int
arm_constant_limit(bool size_p)2427 arm_constant_limit (bool size_p)
2428 {
2429   return size_p ? 1 : current_tune->constant_limit;
2430 }
2431 
2432 /* Emit an insn that's a simple single-set.  Both the operands must be known
2433    to be valid.  */
2434 inline static rtx_insn *
emit_set_insn(rtx x,rtx y)2435 emit_set_insn (rtx x, rtx y)
2436 {
2437   return emit_insn (gen_rtx_SET (x, y));
2438 }
2439 
2440 /* Return the number of bits set in VALUE.  */
2441 static unsigned
bit_count(unsigned long value)2442 bit_count (unsigned long value)
2443 {
2444   unsigned long count = 0;
2445 
2446   while (value)
2447     {
2448       count++;
2449       value &= value - 1;  /* Clear the least-significant set bit.  */
2450     }
2451 
2452   return count;
2453 }
2454 
2455 /* Return the number of bits set in BMAP.  */
2456 static unsigned
bitmap_popcount(const sbitmap bmap)2457 bitmap_popcount (const sbitmap bmap)
2458 {
2459   unsigned int count = 0;
2460   unsigned int n = 0;
2461   sbitmap_iterator sbi;
2462 
2463   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2464     count++;
2465   return count;
2466 }
2467 
2468 typedef struct
2469 {
2470   machine_mode mode;
2471   const char *name;
2472 } arm_fixed_mode_set;
2473 
2474 /* A small helper for setting fixed-point library libfuncs.  */
2475 
2476 static void
arm_set_fixed_optab_libfunc(optab optable,machine_mode mode,const char * funcname,const char * modename,int num_suffix)2477 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2478 			     const char *funcname, const char *modename,
2479 			     int num_suffix)
2480 {
2481   char buffer[50];
2482 
2483   if (num_suffix == 0)
2484     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2485   else
2486     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2487 
2488   set_optab_libfunc (optable, mode, buffer);
2489 }
2490 
2491 static void
arm_set_fixed_conv_libfunc(convert_optab optable,machine_mode to,machine_mode from,const char * funcname,const char * toname,const char * fromname)2492 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2493 			    machine_mode from, const char *funcname,
2494 			    const char *toname, const char *fromname)
2495 {
2496   char buffer[50];
2497   const char *maybe_suffix_2 = "";
2498 
2499   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2500   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2501       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2502       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2503     maybe_suffix_2 = "2";
2504 
2505   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2506 	   maybe_suffix_2);
2507 
2508   set_conv_libfunc (optable, to, from, buffer);
2509 }
2510 
2511 static GTY(()) rtx speculation_barrier_libfunc;
2512 
2513 /* Record that we have no arithmetic or comparison libfuncs for
2514    machine mode MODE.  */
2515 
2516 static void
arm_block_arith_comp_libfuncs_for_mode(machine_mode mode)2517 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2518 {
2519   /* Arithmetic.  */
2520   set_optab_libfunc (add_optab, mode, NULL);
2521   set_optab_libfunc (sdiv_optab, mode, NULL);
2522   set_optab_libfunc (smul_optab, mode, NULL);
2523   set_optab_libfunc (neg_optab, mode, NULL);
2524   set_optab_libfunc (sub_optab, mode, NULL);
2525 
2526   /* Comparisons.  */
2527   set_optab_libfunc (eq_optab, mode, NULL);
2528   set_optab_libfunc (ne_optab, mode, NULL);
2529   set_optab_libfunc (lt_optab, mode, NULL);
2530   set_optab_libfunc (le_optab, mode, NULL);
2531   set_optab_libfunc (ge_optab, mode, NULL);
2532   set_optab_libfunc (gt_optab, mode, NULL);
2533   set_optab_libfunc (unord_optab, mode, NULL);
2534 }
2535 
2536 /* Set up library functions unique to ARM.  */
2537 static void
arm_init_libfuncs(void)2538 arm_init_libfuncs (void)
2539 {
2540   machine_mode mode_iter;
2541 
2542   /* For Linux, we have access to kernel support for atomic operations.  */
2543   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2544     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2545 
2546   /* There are no special library functions unless we are using the
2547      ARM BPABI.  */
2548   if (!TARGET_BPABI)
2549     return;
2550 
2551   /* The functions below are described in Section 4 of the "Run-Time
2552      ABI for the ARM architecture", Version 1.0.  */
2553 
2554   /* Double-precision floating-point arithmetic.  Table 2.  */
2555   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2556   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2557   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2558   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2559   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2560 
2561   /* Double-precision comparisons.  Table 3.  */
2562   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2563   set_optab_libfunc (ne_optab, DFmode, NULL);
2564   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2565   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2566   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2567   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2568   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2569 
2570   /* Single-precision floating-point arithmetic.  Table 4.  */
2571   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2572   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2573   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2574   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2575   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2576 
2577   /* Single-precision comparisons.  Table 5.  */
2578   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2579   set_optab_libfunc (ne_optab, SFmode, NULL);
2580   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2581   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2582   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2583   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2584   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2585 
2586   /* Floating-point to integer conversions.  Table 6.  */
2587   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2588   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2589   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2590   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2591   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2592   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2593   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2594   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2595 
2596   /* Conversions between floating types.  Table 7.  */
2597   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2598   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2599 
2600   /* Integer to floating-point conversions.  Table 8.  */
2601   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2602   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2603   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2604   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2605   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2606   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2607   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2608   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2609 
2610   /* Long long.  Table 9.  */
2611   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2612   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2613   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2614   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2615   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2616   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2617   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2618   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2619 
2620   /* Integer (32/32->32) division.  \S 4.3.1.  */
2621   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2622   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2623 
2624   /* The divmod functions are designed so that they can be used for
2625      plain division, even though they return both the quotient and the
2626      remainder.  The quotient is returned in the usual location (i.e.,
2627      r0 for SImode, {r0, r1} for DImode), just as would be expected
2628      for an ordinary division routine.  Because the AAPCS calling
2629      conventions specify that all of { r0, r1, r2, r3 } are
2630      callee-saved registers, there is no need to tell the compiler
2631      explicitly that those registers are clobbered by these
2632      routines.  */
2633   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2634   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2635 
2636   /* For SImode division the ABI provides div-without-mod routines,
2637      which are faster.  */
2638   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2639   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2640 
2641   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2642      divmod libcalls instead.  */
2643   set_optab_libfunc (smod_optab, DImode, NULL);
2644   set_optab_libfunc (umod_optab, DImode, NULL);
2645   set_optab_libfunc (smod_optab, SImode, NULL);
2646   set_optab_libfunc (umod_optab, SImode, NULL);
2647 
2648   /* Half-precision float operations.  The compiler handles all operations
2649      with NULL libfuncs by converting the SFmode.  */
2650   switch (arm_fp16_format)
2651     {
2652     case ARM_FP16_FORMAT_IEEE:
2653     case ARM_FP16_FORMAT_ALTERNATIVE:
2654 
2655       /* Conversions.  */
2656       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2657 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2658 			 ? "__gnu_f2h_ieee"
2659 			 : "__gnu_f2h_alternative"));
2660       set_conv_libfunc (sext_optab, SFmode, HFmode,
2661 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2662 			 ? "__gnu_h2f_ieee"
2663 			 : "__gnu_h2f_alternative"));
2664 
2665       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2666 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2667 			 ? "__gnu_d2h_ieee"
2668 			 : "__gnu_d2h_alternative"));
2669 
2670       arm_block_arith_comp_libfuncs_for_mode (HFmode);
2671       break;
2672 
2673     default:
2674       break;
2675     }
2676 
2677   /* For all possible libcalls in BFmode, record NULL.  */
2678   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2679     {
2680       set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2681       set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2682       set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2683       set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2684     }
2685   arm_block_arith_comp_libfuncs_for_mode (BFmode);
2686 
2687   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2688   {
2689     const arm_fixed_mode_set fixed_arith_modes[] =
2690       {
2691 	{ E_QQmode, "qq" },
2692 	{ E_UQQmode, "uqq" },
2693 	{ E_HQmode, "hq" },
2694 	{ E_UHQmode, "uhq" },
2695 	{ E_SQmode, "sq" },
2696 	{ E_USQmode, "usq" },
2697 	{ E_DQmode, "dq" },
2698 	{ E_UDQmode, "udq" },
2699 	{ E_TQmode, "tq" },
2700 	{ E_UTQmode, "utq" },
2701 	{ E_HAmode, "ha" },
2702 	{ E_UHAmode, "uha" },
2703 	{ E_SAmode, "sa" },
2704 	{ E_USAmode, "usa" },
2705 	{ E_DAmode, "da" },
2706 	{ E_UDAmode, "uda" },
2707 	{ E_TAmode, "ta" },
2708 	{ E_UTAmode, "uta" }
2709       };
2710     const arm_fixed_mode_set fixed_conv_modes[] =
2711       {
2712 	{ E_QQmode, "qq" },
2713 	{ E_UQQmode, "uqq" },
2714 	{ E_HQmode, "hq" },
2715 	{ E_UHQmode, "uhq" },
2716 	{ E_SQmode, "sq" },
2717 	{ E_USQmode, "usq" },
2718 	{ E_DQmode, "dq" },
2719 	{ E_UDQmode, "udq" },
2720 	{ E_TQmode, "tq" },
2721 	{ E_UTQmode, "utq" },
2722 	{ E_HAmode, "ha" },
2723 	{ E_UHAmode, "uha" },
2724 	{ E_SAmode, "sa" },
2725 	{ E_USAmode, "usa" },
2726 	{ E_DAmode, "da" },
2727 	{ E_UDAmode, "uda" },
2728 	{ E_TAmode, "ta" },
2729 	{ E_UTAmode, "uta" },
2730 	{ E_QImode, "qi" },
2731 	{ E_HImode, "hi" },
2732 	{ E_SImode, "si" },
2733 	{ E_DImode, "di" },
2734 	{ E_TImode, "ti" },
2735 	{ E_SFmode, "sf" },
2736 	{ E_DFmode, "df" }
2737       };
2738     unsigned int i, j;
2739 
2740     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2741       {
2742 	arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2743 				     "add", fixed_arith_modes[i].name, 3);
2744 	arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2745 				     "ssadd", fixed_arith_modes[i].name, 3);
2746 	arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2747 				     "usadd", fixed_arith_modes[i].name, 3);
2748 	arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2749 				     "sub", fixed_arith_modes[i].name, 3);
2750 	arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2751 				     "sssub", fixed_arith_modes[i].name, 3);
2752 	arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2753 				     "ussub", fixed_arith_modes[i].name, 3);
2754 	arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2755 				     "mul", fixed_arith_modes[i].name, 3);
2756 	arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2757 				     "ssmul", fixed_arith_modes[i].name, 3);
2758 	arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2759 				     "usmul", fixed_arith_modes[i].name, 3);
2760 	arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2761 				     "div", fixed_arith_modes[i].name, 3);
2762 	arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2763 				     "udiv", fixed_arith_modes[i].name, 3);
2764 	arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2765 				     "ssdiv", fixed_arith_modes[i].name, 3);
2766 	arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2767 				     "usdiv", fixed_arith_modes[i].name, 3);
2768 	arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2769 				     "neg", fixed_arith_modes[i].name, 2);
2770 	arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2771 				     "ssneg", fixed_arith_modes[i].name, 2);
2772 	arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2773 				     "usneg", fixed_arith_modes[i].name, 2);
2774 	arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2775 				     "ashl", fixed_arith_modes[i].name, 3);
2776 	arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2777 				     "ashr", fixed_arith_modes[i].name, 3);
2778 	arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2779 				     "lshr", fixed_arith_modes[i].name, 3);
2780 	arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2781 				     "ssashl", fixed_arith_modes[i].name, 3);
2782 	arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2783 				     "usashl", fixed_arith_modes[i].name, 3);
2784 	arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2785 				     "cmp", fixed_arith_modes[i].name, 2);
2786       }
2787 
2788     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2789       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2790 	{
2791 	  if (i == j
2792 	      || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2793 		  && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2794 	    continue;
2795 
2796 	  arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2797 				      fixed_conv_modes[j].mode, "fract",
2798 				      fixed_conv_modes[i].name,
2799 				      fixed_conv_modes[j].name);
2800 	  arm_set_fixed_conv_libfunc (satfract_optab,
2801 				      fixed_conv_modes[i].mode,
2802 				      fixed_conv_modes[j].mode, "satfract",
2803 				      fixed_conv_modes[i].name,
2804 				      fixed_conv_modes[j].name);
2805 	  arm_set_fixed_conv_libfunc (fractuns_optab,
2806 				      fixed_conv_modes[i].mode,
2807 				      fixed_conv_modes[j].mode, "fractuns",
2808 				      fixed_conv_modes[i].name,
2809 				      fixed_conv_modes[j].name);
2810 	  arm_set_fixed_conv_libfunc (satfractuns_optab,
2811 				      fixed_conv_modes[i].mode,
2812 				      fixed_conv_modes[j].mode, "satfractuns",
2813 				      fixed_conv_modes[i].name,
2814 				      fixed_conv_modes[j].name);
2815 	}
2816   }
2817 
2818   if (TARGET_AAPCS_BASED)
2819     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2820 
2821   speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2822 }
2823 
2824 /* On AAPCS systems, this is the "struct __va_list".  */
2825 static GTY(()) tree va_list_type;
2826 
2827 /* Return the type to use as __builtin_va_list.  */
2828 static tree
arm_build_builtin_va_list(void)2829 arm_build_builtin_va_list (void)
2830 {
2831   tree va_list_name;
2832   tree ap_field;
2833 
2834   if (!TARGET_AAPCS_BASED)
2835     return std_build_builtin_va_list ();
2836 
2837   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2838      defined as:
2839 
2840        struct __va_list
2841        {
2842 	 void *__ap;
2843        };
2844 
2845      The C Library ABI further reinforces this definition in \S
2846      4.1.
2847 
2848      We must follow this definition exactly.  The structure tag
2849      name is visible in C++ mangled names, and thus forms a part
2850      of the ABI.  The field name may be used by people who
2851      #include <stdarg.h>.  */
2852   /* Create the type.  */
2853   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2854   /* Give it the required name.  */
2855   va_list_name = build_decl (BUILTINS_LOCATION,
2856 			     TYPE_DECL,
2857 			     get_identifier ("__va_list"),
2858 			     va_list_type);
2859   DECL_ARTIFICIAL (va_list_name) = 1;
2860   TYPE_NAME (va_list_type) = va_list_name;
2861   TYPE_STUB_DECL (va_list_type) = va_list_name;
2862   /* Create the __ap field.  */
2863   ap_field = build_decl (BUILTINS_LOCATION,
2864 			 FIELD_DECL,
2865 			 get_identifier ("__ap"),
2866 			 ptr_type_node);
2867   DECL_ARTIFICIAL (ap_field) = 1;
2868   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2869   TYPE_FIELDS (va_list_type) = ap_field;
2870   /* Compute its layout.  */
2871   layout_type (va_list_type);
2872 
2873   return va_list_type;
2874 }
2875 
2876 /* Return an expression of type "void *" pointing to the next
2877    available argument in a variable-argument list.  VALIST is the
2878    user-level va_list object, of type __builtin_va_list.  */
2879 static tree
arm_extract_valist_ptr(tree valist)2880 arm_extract_valist_ptr (tree valist)
2881 {
2882   if (TREE_TYPE (valist) == error_mark_node)
2883     return error_mark_node;
2884 
2885   /* On an AAPCS target, the pointer is stored within "struct
2886      va_list".  */
2887   if (TARGET_AAPCS_BASED)
2888     {
2889       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2890       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2891 		       valist, ap_field, NULL_TREE);
2892     }
2893 
2894   return valist;
2895 }
2896 
2897 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2898 static void
arm_expand_builtin_va_start(tree valist,rtx nextarg)2899 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2900 {
2901   valist = arm_extract_valist_ptr (valist);
2902   std_expand_builtin_va_start (valist, nextarg);
2903 }
2904 
2905 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2906 static tree
arm_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)2907 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2908 			  gimple_seq *post_p)
2909 {
2910   valist = arm_extract_valist_ptr (valist);
2911   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2912 }
2913 
2914 /* Check any incompatible options that the user has specified.  */
2915 static void
arm_option_check_internal(struct gcc_options * opts)2916 arm_option_check_internal (struct gcc_options *opts)
2917 {
2918   int flags = opts->x_target_flags;
2919 
2920   /* iWMMXt and NEON are incompatible.  */
2921   if (TARGET_IWMMXT
2922       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2923     error ("iWMMXt and NEON are incompatible");
2924 
2925   /* Make sure that the processor choice does not conflict with any of the
2926      other command line choices.  */
2927   if (TARGET_ARM_P (flags)
2928       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2929     error ("target CPU does not support ARM mode");
2930 
2931   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2932   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2933     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2934 
2935   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2936     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2937 
2938   /* If this target is normally configured to use APCS frames, warn if they
2939      are turned off and debugging is turned on.  */
2940   if (TARGET_ARM_P (flags)
2941       && write_symbols != NO_DEBUG
2942       && !TARGET_APCS_FRAME
2943       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2944     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2945 	     "debugging");
2946 
2947   /* iWMMXt unsupported under Thumb mode.  */
2948   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2949     error ("iWMMXt unsupported under Thumb mode");
2950 
2951   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2952     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2953 
2954   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2955     {
2956       error ("RTP PIC is incompatible with Thumb");
2957       flag_pic = 0;
2958     }
2959 
2960   if (target_pure_code || target_slow_flash_data)
2961     {
2962       const char *flag = (target_pure_code ? "-mpure-code" :
2963 					     "-mslow-flash-data");
2964       bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2965 
2966       /* We only support -mslow-flash-data on M-profile targets with
2967 	 MOVT.  */
2968       if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2969 	error ("%s only supports non-pic code on M-profile targets with the "
2970 	       "MOVT instruction", flag);
2971 
2972       /* We only support -mpure-code on M-profile targets.  */
2973       if (target_pure_code && common_unsupported_modes)
2974 	error ("%s only supports non-pic code on M-profile targets", flag);
2975 
2976       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2977 	 -mword-relocations forbids relocation of MOVT/MOVW.  */
2978       if (target_word_relocations)
2979 	error ("%s incompatible with %<-mword-relocations%>", flag);
2980     }
2981 }
2982 
2983 /* Recompute the global settings depending on target attribute options.  */
2984 
2985 static void
arm_option_params_internal(void)2986 arm_option_params_internal (void)
2987 {
2988   /* If we are not using the default (ARM mode) section anchor offset
2989      ranges, then set the correct ranges now.  */
2990   if (TARGET_THUMB1)
2991     {
2992       /* Thumb-1 LDR instructions cannot have negative offsets.
2993          Permissible positive offset ranges are 5-bit (for byte loads),
2994          6-bit (for halfword loads), or 7-bit (for word loads).
2995          Empirical results suggest a 7-bit anchor range gives the best
2996          overall code size.  */
2997       targetm.min_anchor_offset = 0;
2998       targetm.max_anchor_offset = 127;
2999     }
3000   else if (TARGET_THUMB2)
3001     {
3002       /* The minimum is set such that the total size of the block
3003          for a particular anchor is 248 + 1 + 4095 bytes, which is
3004          divisible by eight, ensuring natural spacing of anchors.  */
3005       targetm.min_anchor_offset = -248;
3006       targetm.max_anchor_offset = 4095;
3007     }
3008   else
3009     {
3010       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3011       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3012     }
3013 
3014   /* Increase the number of conditional instructions with -Os.  */
3015   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3016 
3017   /* For THUMB2, we limit the conditional sequence to one IT block.  */
3018   if (TARGET_THUMB2)
3019     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3020 
3021   if (TARGET_THUMB1)
3022     targetm.md_asm_adjust = thumb1_md_asm_adjust;
3023   else
3024     targetm.md_asm_adjust = arm_md_asm_adjust;
3025 }
3026 
3027 /* True if -mflip-thumb should next add an attribute for the default
3028    mode, false if it should next add an attribute for the opposite mode.  */
3029 static GTY(()) bool thumb_flipper;
3030 
3031 /* Options after initial target override.  */
3032 static GTY(()) tree init_optimize;
3033 
3034 static void
arm_override_options_after_change_1(struct gcc_options * opts,struct gcc_options * opts_set)3035 arm_override_options_after_change_1 (struct gcc_options *opts,
3036 				     struct gcc_options *opts_set)
3037 {
3038   /* -falign-functions without argument: supply one.  */
3039   if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3040     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3041       && opts->x_optimize_size ? "2" : "4";
3042 }
3043 
3044 /* Implement targetm.override_options_after_change.  */
3045 
3046 static void
arm_override_options_after_change(void)3047 arm_override_options_after_change (void)
3048 {
3049   arm_override_options_after_change_1 (&global_options, &global_options_set);
3050 }
3051 
3052 /* Implement TARGET_OPTION_RESTORE.  */
3053 static void
arm_option_restore(struct gcc_options *,struct gcc_options *,struct cl_target_option * ptr)3054 arm_option_restore (struct gcc_options */* opts */,
3055 		    struct gcc_options */* opts_set */,
3056 		    struct cl_target_option *ptr)
3057 {
3058   arm_configure_build_target (&arm_active_target, ptr, false);
3059 }
3060 
3061 /* Reset options between modes that the user has specified.  */
3062 static void
arm_option_override_internal(struct gcc_options * opts,struct gcc_options * opts_set)3063 arm_option_override_internal (struct gcc_options *opts,
3064 			      struct gcc_options *opts_set)
3065 {
3066   arm_override_options_after_change_1 (opts, opts_set);
3067 
3068   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3069     {
3070       /* The default is to enable interworking, so this warning message would
3071 	 be confusing to users who have just compiled with
3072 	 eg, -march=armv4.  */
3073       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3074       opts->x_target_flags &= ~MASK_INTERWORK;
3075     }
3076 
3077   if (TARGET_THUMB_P (opts->x_target_flags)
3078       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3079     {
3080       warning (0, "target CPU does not support THUMB instructions");
3081       opts->x_target_flags &= ~MASK_THUMB;
3082     }
3083 
3084   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3085     {
3086       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3087       opts->x_target_flags &= ~MASK_APCS_FRAME;
3088     }
3089 
3090   /* Callee super interworking implies thumb interworking.  Adding
3091      this to the flags here simplifies the logic elsewhere.  */
3092   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3093     opts->x_target_flags |= MASK_INTERWORK;
3094 
3095   /* need to remember initial values so combinaisons of options like
3096      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3097   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3098 
3099   if (! opts_set->x_arm_restrict_it)
3100     opts->x_arm_restrict_it = arm_arch8;
3101 
3102   /* ARM execution state and M profile don't have [restrict] IT.  */
3103   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3104     opts->x_arm_restrict_it = 0;
3105 
3106   /* Use the IT size from CPU specific tuning unless -mrestrict-it is used.  */
3107   if (!opts_set->x_arm_restrict_it
3108       && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3109     opts->x_arm_restrict_it = 0;
3110 
3111   /* Enable -munaligned-access by default for
3112      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3113      i.e. Thumb2 and ARM state only.
3114      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3115      - ARMv8 architecture-base processors.
3116 
3117      Disable -munaligned-access by default for
3118      - all pre-ARMv6 architecture-based processors
3119      - ARMv6-M architecture-based processors
3120      - ARMv8-M Baseline processors.  */
3121 
3122   if (! opts_set->x_unaligned_access)
3123     {
3124       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3125 			  && arm_arch6 && (arm_arch_notm || arm_arch7));
3126     }
3127   else if (opts->x_unaligned_access == 1
3128 	   && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3129     {
3130       warning (0, "target CPU does not support unaligned accesses");
3131      opts->x_unaligned_access = 0;
3132     }
3133 
3134   /* Don't warn since it's on by default in -O2.  */
3135   if (TARGET_THUMB1_P (opts->x_target_flags))
3136     opts->x_flag_schedule_insns = 0;
3137   else
3138     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3139 
3140   /* Disable shrink-wrap when optimizing function for size, since it tends to
3141      generate additional returns.  */
3142   if (optimize_function_for_size_p (cfun)
3143       && TARGET_THUMB2_P (opts->x_target_flags))
3144     opts->x_flag_shrink_wrap = false;
3145   else
3146     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3147 
3148   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3149      - epilogue_insns - does not accurately model the corresponding insns
3150      emitted in the asm file.  In particular, see the comment in thumb_exit
3151      'Find out how many of the (return) argument registers we can corrupt'.
3152      As a consequence, the epilogue may clobber registers without fipa-ra
3153      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3154      TODO: Accurately model clobbers for epilogue_insns and reenable
3155      fipa-ra.  */
3156   if (TARGET_THUMB1_P (opts->x_target_flags))
3157     opts->x_flag_ipa_ra = 0;
3158   else
3159     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3160 
3161   /* Thumb2 inline assembly code should always use unified syntax.
3162      This will apply to ARM and Thumb1 eventually.  */
3163   if (TARGET_THUMB2_P (opts->x_target_flags))
3164     opts->x_inline_asm_unified = true;
3165 
3166 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3167   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3168 #endif
3169 }
3170 
3171 static sbitmap isa_all_fpubits_internal;
3172 static sbitmap isa_all_fpbits;
3173 static sbitmap isa_quirkbits;
3174 
3175 /* Configure a build target TARGET from the user-specified options OPTS and
3176    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3177    architecture have been specified, but the two are not identical.  */
3178 void
arm_configure_build_target(struct arm_build_target * target,struct cl_target_option * opts,bool warn_compatible)3179 arm_configure_build_target (struct arm_build_target *target,
3180 			    struct cl_target_option *opts,
3181 			    bool warn_compatible)
3182 {
3183   const cpu_option *arm_selected_tune = NULL;
3184   const arch_option *arm_selected_arch = NULL;
3185   const cpu_option *arm_selected_cpu = NULL;
3186   const arm_fpu_desc *arm_selected_fpu = NULL;
3187   const char *tune_opts = NULL;
3188   const char *arch_opts = NULL;
3189   const char *cpu_opts = NULL;
3190 
3191   bitmap_clear (target->isa);
3192   target->core_name = NULL;
3193   target->arch_name = NULL;
3194 
3195   if (opts->x_arm_arch_string)
3196     {
3197       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3198 						      "-march",
3199 						      opts->x_arm_arch_string);
3200       arch_opts = strchr (opts->x_arm_arch_string, '+');
3201     }
3202 
3203   if (opts->x_arm_cpu_string)
3204     {
3205       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3206 						    opts->x_arm_cpu_string);
3207       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3208       arm_selected_tune = arm_selected_cpu;
3209       /* If taking the tuning from -mcpu, we don't need to rescan the
3210 	 options for tuning.  */
3211     }
3212 
3213   if (opts->x_arm_tune_string)
3214     {
3215       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3216 						     opts->x_arm_tune_string);
3217       tune_opts = strchr (opts->x_arm_tune_string, '+');
3218     }
3219 
3220   if (arm_selected_arch)
3221     {
3222       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3223       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3224 				 arch_opts);
3225 
3226       if (arm_selected_cpu)
3227 	{
3228 	  auto_sbitmap cpu_isa (isa_num_bits);
3229 	  auto_sbitmap isa_delta (isa_num_bits);
3230 
3231 	  arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3232 	  arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3233 				     cpu_opts);
3234 	  bitmap_xor (isa_delta, cpu_isa, target->isa);
3235 	  /* Ignore any bits that are quirk bits.  */
3236 	  bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3237 	  /* If the user (or the default configuration) has specified a
3238 	     specific FPU, then ignore any bits that depend on the FPU
3239 	     configuration.  Do similarly if using the soft-float
3240 	     ABI.  */
3241 	  if (opts->x_arm_fpu_index != TARGET_FPU_auto
3242 	      || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3243 	    bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3244 
3245 	  if (!bitmap_empty_p (isa_delta))
3246 	    {
3247 	      if (warn_compatible)
3248 		warning (0, "switch %<-mcpu=%s%> conflicts "
3249 			 "with switch %<-march=%s%>",
3250 			 opts->x_arm_cpu_string,
3251 			 opts->x_arm_arch_string);
3252 
3253 	      /* -march wins for code generation.
3254 		 -mcpu wins for default tuning.  */
3255 	      if (!arm_selected_tune)
3256 		arm_selected_tune = arm_selected_cpu;
3257 
3258 	      arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3259 	      target->arch_name = arm_selected_arch->common.name;
3260 	    }
3261 	  else
3262 	    {
3263 	      /* Architecture and CPU are essentially the same.
3264 		 Prefer the CPU setting.  */
3265 	      arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3266 	      target->core_name = arm_selected_cpu->common.name;
3267 	      /* Copy the CPU's capabilities, so that we inherit the
3268 		 appropriate extensions and quirks.  */
3269 	      bitmap_copy (target->isa, cpu_isa);
3270 	    }
3271 	}
3272       else
3273 	{
3274 	  /* Pick a CPU based on the architecture.  */
3275 	  arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3276 	  target->arch_name = arm_selected_arch->common.name;
3277 	  /* Note: target->core_name is left unset in this path.  */
3278 	}
3279     }
3280   else if (arm_selected_cpu)
3281     {
3282       target->core_name = arm_selected_cpu->common.name;
3283       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3284       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3285 				 cpu_opts);
3286       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3287     }
3288   /* If the user did not specify a processor or architecture, choose
3289      one for them.  */
3290   else
3291     {
3292       const cpu_option *sel;
3293       auto_sbitmap sought_isa (isa_num_bits);
3294       bitmap_clear (sought_isa);
3295       auto_sbitmap default_isa (isa_num_bits);
3296 
3297       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3298 						    TARGET_CPU_DEFAULT);
3299       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3300       gcc_assert (arm_selected_cpu->common.name);
3301 
3302       /* RWE: All of the selection logic below (to the end of this
3303 	 'if' clause) looks somewhat suspect.  It appears to be mostly
3304 	 there to support forcing thumb support when the default CPU
3305 	 does not have thumb (somewhat dubious in terms of what the
3306 	 user might be expecting).  I think it should be removed once
3307 	 support for the pre-thumb era cores is removed.  */
3308       sel = arm_selected_cpu;
3309       arm_initialize_isa (default_isa, sel->common.isa_bits);
3310       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3311 				 cpu_opts);
3312 
3313       /* Now check to see if the user has specified any command line
3314 	 switches that require certain abilities from the cpu.  */
3315 
3316       if (TARGET_INTERWORK || TARGET_THUMB)
3317 	bitmap_set_bit (sought_isa, isa_bit_thumb);
3318 
3319       /* If there are such requirements and the default CPU does not
3320 	 satisfy them, we need to run over the complete list of
3321 	 cores looking for one that is satisfactory.  */
3322       if (!bitmap_empty_p (sought_isa)
3323 	  && !bitmap_subset_p (sought_isa, default_isa))
3324 	{
3325 	  auto_sbitmap candidate_isa (isa_num_bits);
3326 	  /* We're only interested in a CPU with at least the
3327 	     capabilities of the default CPU and the required
3328 	     additional features.  */
3329 	  bitmap_ior (default_isa, default_isa, sought_isa);
3330 
3331 	  /* Try to locate a CPU type that supports all of the abilities
3332 	     of the default CPU, plus the extra abilities requested by
3333 	     the user.  */
3334 	  for (sel = all_cores; sel->common.name != NULL; sel++)
3335 	    {
3336 	      arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3337 	      /* An exact match?  */
3338 	      if (bitmap_equal_p (default_isa, candidate_isa))
3339 		break;
3340 	    }
3341 
3342 	  if (sel->common.name == NULL)
3343 	    {
3344 	      unsigned current_bit_count = isa_num_bits;
3345 	      const cpu_option *best_fit = NULL;
3346 
3347 	      /* Ideally we would like to issue an error message here
3348 		 saying that it was not possible to find a CPU compatible
3349 		 with the default CPU, but which also supports the command
3350 		 line options specified by the programmer, and so they
3351 		 ought to use the -mcpu=<name> command line option to
3352 		 override the default CPU type.
3353 
3354 		 If we cannot find a CPU that has exactly the
3355 		 characteristics of the default CPU and the given
3356 		 command line options we scan the array again looking
3357 		 for a best match.  The best match must have at least
3358 		 the capabilities of the perfect match.  */
3359 	      for (sel = all_cores; sel->common.name != NULL; sel++)
3360 		{
3361 		  arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3362 
3363 		  if (bitmap_subset_p (default_isa, candidate_isa))
3364 		    {
3365 		      unsigned count;
3366 
3367 		      bitmap_and_compl (candidate_isa, candidate_isa,
3368 					default_isa);
3369 		      count = bitmap_popcount (candidate_isa);
3370 
3371 		      if (count < current_bit_count)
3372 			{
3373 			  best_fit = sel;
3374 			  current_bit_count = count;
3375 			}
3376 		    }
3377 
3378 		  gcc_assert (best_fit);
3379 		  sel = best_fit;
3380 		}
3381 	    }
3382 	  arm_selected_cpu = sel;
3383 	}
3384 
3385       /* Now we know the CPU, we can finally initialize the target
3386 	 structure.  */
3387       target->core_name = arm_selected_cpu->common.name;
3388       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3389       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3390 				 cpu_opts);
3391       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3392     }
3393 
3394   gcc_assert (arm_selected_cpu);
3395   gcc_assert (arm_selected_arch);
3396 
3397   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3398     {
3399       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3400       auto_sbitmap fpu_bits (isa_num_bits);
3401 
3402       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3403       /* This should clear out ALL bits relating to the FPU/simd
3404 	 extensions, to avoid potentially invalid combinations later on
3405 	 that we can't match.  At present we only clear out those bits
3406 	 that can be set by -mfpu.  This should be fixed in GCC-12.  */
3407       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3408       bitmap_ior (target->isa, target->isa, fpu_bits);
3409     }
3410 
3411   /* There may be implied bits which we still need to enable. These are
3412      non-named features which are needed to complete other sets of features,
3413      but cannot be enabled from arm-cpus.in due to being shared between
3414      multiple fgroups. Each entry in all_implied_fbits is of the form
3415      ante -> cons, meaning that if the feature "ante" is enabled, we should
3416      implicitly enable "cons".  */
3417   const struct fbit_implication *impl = all_implied_fbits;
3418   while (impl->ante)
3419     {
3420       if (bitmap_bit_p (target->isa, impl->ante))
3421 	bitmap_set_bit (target->isa, impl->cons);
3422       impl++;
3423     }
3424 
3425   if (!arm_selected_tune)
3426     arm_selected_tune = arm_selected_cpu;
3427   else /* Validate the features passed to -mtune.  */
3428     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3429 
3430   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3431 
3432   /* Finish initializing the target structure.  */
3433   target->arch_pp_name = arm_selected_arch->arch;
3434   target->base_arch = arm_selected_arch->base_arch;
3435   target->profile = arm_selected_arch->profile;
3436 
3437   target->tune_flags = tune_data->tune_flags;
3438   target->tune = tune_data->tune;
3439   target->tune_core = tune_data->scheduler;
3440   arm_option_reconfigure_globals ();
3441 }
3442 
3443 /* Fix up any incompatible options that the user has specified.  */
3444 static void
arm_option_override(void)3445 arm_option_override (void)
3446 {
3447   static const enum isa_feature fpu_bitlist_internal[]
3448     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3449   /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main.  */
3450   static const enum isa_feature fp_bitlist[]
3451     = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3452   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3453   cl_target_option opts;
3454 
3455   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3456   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3457 
3458   isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3459   isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3460   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3461   arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3462 
3463   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3464 
3465   if (!global_options_set.x_arm_fpu_index)
3466     {
3467       bool ok;
3468       int fpu_index;
3469 
3470       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3471 				  CL_TARGET);
3472       gcc_assert (ok);
3473       arm_fpu_index = (enum fpu_type) fpu_index;
3474     }
3475 
3476   cl_target_option_save (&opts, &global_options, &global_options_set);
3477   arm_configure_build_target (&arm_active_target, &opts, true);
3478 
3479 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3480   SUBTARGET_OVERRIDE_OPTIONS;
3481 #endif
3482 
3483   /* Initialize boolean versions of the architectural flags, for use
3484      in the arm.md file and for enabling feature flags.  */
3485   arm_option_reconfigure_globals ();
3486 
3487   arm_tune = arm_active_target.tune_core;
3488   tune_flags = arm_active_target.tune_flags;
3489   current_tune = arm_active_target.tune;
3490 
3491   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3492   if (TARGET_APCS_FRAME)
3493     flag_shrink_wrap = false;
3494 
3495   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3496     {
3497       warning (0, "%<-mapcs-stack-check%> incompatible with "
3498 	       "%<-mno-apcs-frame%>");
3499       target_flags |= MASK_APCS_FRAME;
3500     }
3501 
3502   if (TARGET_POKE_FUNCTION_NAME)
3503     target_flags |= MASK_APCS_FRAME;
3504 
3505   if (TARGET_APCS_REENT && flag_pic)
3506     error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3507 
3508   if (TARGET_APCS_REENT)
3509     warning (0, "APCS reentrant code not supported.  Ignored");
3510 
3511   /* Set up some tuning parameters.  */
3512   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3513   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3514   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3515   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3516   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3517   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3518 
3519   /* For arm2/3 there is no need to do any scheduling if we are doing
3520      software floating-point.  */
3521   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3522     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3523 
3524   /* Override the default structure alignment for AAPCS ABI.  */
3525   if (!global_options_set.x_arm_structure_size_boundary)
3526     {
3527       if (TARGET_AAPCS_BASED)
3528 	arm_structure_size_boundary = 8;
3529     }
3530   else
3531     {
3532       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3533 
3534       if (arm_structure_size_boundary != 8
3535 	  && arm_structure_size_boundary != 32
3536 	  && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3537 	{
3538 	  if (ARM_DOUBLEWORD_ALIGN)
3539 	    warning (0,
3540 		     "structure size boundary can only be set to 8, 32 or 64");
3541 	  else
3542 	    warning (0, "structure size boundary can only be set to 8 or 32");
3543 	  arm_structure_size_boundary
3544 	    = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3545 	}
3546     }
3547 
3548   if (TARGET_VXWORKS_RTP)
3549     {
3550       if (!global_options_set.x_arm_pic_data_is_text_relative)
3551 	arm_pic_data_is_text_relative = 0;
3552     }
3553   else if (flag_pic
3554 	   && !arm_pic_data_is_text_relative
3555 	   && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3556     /* When text & data segments don't have a fixed displacement, the
3557        intended use is with a single, read only, pic base register.
3558        Unless the user explicitly requested not to do that, set
3559        it.  */
3560     target_flags |= MASK_SINGLE_PIC_BASE;
3561 
3562   /* If stack checking is disabled, we can use r10 as the PIC register,
3563      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3564   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3565     {
3566       if (TARGET_VXWORKS_RTP)
3567 	warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3568       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3569     }
3570 
3571   if (flag_pic && TARGET_VXWORKS_RTP)
3572     arm_pic_register = 9;
3573 
3574   /* If in FDPIC mode then force arm_pic_register to be r9.  */
3575   if (TARGET_FDPIC)
3576     {
3577       arm_pic_register = FDPIC_REGNUM;
3578       if (TARGET_THUMB1)
3579 	sorry ("FDPIC mode is not supported in Thumb-1 mode");
3580     }
3581 
3582   if (arm_pic_register_string != NULL)
3583     {
3584       int pic_register = decode_reg_name (arm_pic_register_string);
3585 
3586       if (!flag_pic)
3587 	warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3588 
3589       /* Prevent the user from choosing an obviously stupid PIC register.  */
3590       else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3591 	       || pic_register == HARD_FRAME_POINTER_REGNUM
3592 	       || pic_register == STACK_POINTER_REGNUM
3593 	       || pic_register >= PC_REGNUM
3594 	       || (TARGET_VXWORKS_RTP
3595 		   && (unsigned int) pic_register != arm_pic_register))
3596 	error ("unable to use %qs for PIC register", arm_pic_register_string);
3597       else
3598 	arm_pic_register = pic_register;
3599     }
3600 
3601   if (flag_pic)
3602     target_word_relocations = 1;
3603 
3604   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3605   if (fix_cm3_ldrd == 2)
3606     {
3607       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3608 	fix_cm3_ldrd = 1;
3609       else
3610 	fix_cm3_ldrd = 0;
3611     }
3612 
3613   /* Hot/Cold partitioning is not currently supported, since we can't
3614      handle literal pool placement in that case.  */
3615   if (flag_reorder_blocks_and_partition)
3616     {
3617       inform (input_location,
3618 	      "%<-freorder-blocks-and-partition%> not supported "
3619 	      "on this architecture");
3620       flag_reorder_blocks_and_partition = 0;
3621       flag_reorder_blocks = 1;
3622     }
3623 
3624   if (flag_pic)
3625     /* Hoisting PIC address calculations more aggressively provides a small,
3626        but measurable, size reduction for PIC code.  Therefore, we decrease
3627        the bar for unrestricted expression hoisting to the cost of PIC address
3628        calculation, which is 2 instructions.  */
3629     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3630 			 param_gcse_unrestricted_cost, 2);
3631 
3632   /* ARM EABI defaults to strict volatile bitfields.  */
3633   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3634       && abi_version_at_least(2))
3635     flag_strict_volatile_bitfields = 1;
3636 
3637   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3638      have deemed it beneficial (signified by setting
3639      prefetch.num_slots to 1 or more).  */
3640   if (flag_prefetch_loop_arrays < 0
3641       && HAVE_prefetch
3642       && optimize >= 3
3643       && current_tune->prefetch.num_slots > 0)
3644     flag_prefetch_loop_arrays = 1;
3645 
3646   /* Set up parameters to be used in prefetching algorithm.  Do not
3647      override the defaults unless we are tuning for a core we have
3648      researched values for.  */
3649   if (current_tune->prefetch.num_slots > 0)
3650     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3651 			 param_simultaneous_prefetches,
3652 			 current_tune->prefetch.num_slots);
3653   if (current_tune->prefetch.l1_cache_line_size >= 0)
3654     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3655 			 param_l1_cache_line_size,
3656 			 current_tune->prefetch.l1_cache_line_size);
3657   if (current_tune->prefetch.l1_cache_size >= 0)
3658     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3659 			 param_l1_cache_size,
3660 			 current_tune->prefetch.l1_cache_size);
3661 
3662   /* Look through ready list and all of queue for instructions
3663      relevant for L2 auto-prefetcher.  */
3664   int sched_autopref_queue_depth;
3665 
3666   switch (current_tune->sched_autopref)
3667     {
3668     case tune_params::SCHED_AUTOPREF_OFF:
3669       sched_autopref_queue_depth = -1;
3670       break;
3671 
3672     case tune_params::SCHED_AUTOPREF_RANK:
3673       sched_autopref_queue_depth = 0;
3674       break;
3675 
3676     case tune_params::SCHED_AUTOPREF_FULL:
3677       sched_autopref_queue_depth = max_insn_queue_index + 1;
3678       break;
3679 
3680     default:
3681       gcc_unreachable ();
3682     }
3683 
3684   SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3685 		       param_sched_autopref_queue_depth,
3686 		       sched_autopref_queue_depth);
3687 
3688   /* Currently, for slow flash data, we just disable literal pools.  We also
3689      disable it for pure-code.  */
3690   if (target_slow_flash_data || target_pure_code)
3691     arm_disable_literal_pool = true;
3692 
3693   /* Disable scheduling fusion by default if it's not armv7 processor
3694      or doesn't prefer ldrd/strd.  */
3695   if (flag_schedule_fusion == 2
3696       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3697     flag_schedule_fusion = 0;
3698 
3699   /* Need to remember initial options before they are overriden.  */
3700   init_optimize = build_optimization_node (&global_options,
3701 					   &global_options_set);
3702 
3703   arm_options_perform_arch_sanity_checks ();
3704   arm_option_override_internal (&global_options, &global_options_set);
3705   arm_option_check_internal (&global_options);
3706   arm_option_params_internal ();
3707 
3708   /* Create the default target_options structure.  */
3709   target_option_default_node = target_option_current_node
3710     = build_target_option_node (&global_options, &global_options_set);
3711 
3712   /* Register global variables with the garbage collector.  */
3713   arm_add_gc_roots ();
3714 
3715   /* Init initial mode for testing.  */
3716   thumb_flipper = TARGET_THUMB;
3717 }
3718 
3719 
3720 /* Reconfigure global status flags from the active_target.isa.  */
3721 void
arm_option_reconfigure_globals(void)3722 arm_option_reconfigure_globals (void)
3723 {
3724   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3725   arm_base_arch = arm_active_target.base_arch;
3726 
3727   /* Initialize boolean versions of the architectural flags, for use
3728      in the arm.md file.  */
3729   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3730   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3731   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3732   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3733   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3734   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3735   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3736   arm_arch6m = arm_arch6 && !arm_arch_notm;
3737   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3738   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3739   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3740   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3741   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3742   arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3743   arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3744   arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3745 				    isa_bit_armv8_1m_main);
3746   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3747   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3748   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3749   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3750   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3751   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3752   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3753   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3754   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3755   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3756   arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3757   arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3758 
3759   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3760   if (arm_fp16_inst)
3761     {
3762       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3763 	error ("selected fp16 options are incompatible");
3764       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3765     }
3766 
3767   arm_arch_cde = 0;
3768   arm_arch_cde_coproc = 0;
3769   int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3770 		    isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3771 		    isa_bit_cdecp6, isa_bit_cdecp7};
3772   for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3773     {
3774       int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3775       if (cde_bit)
3776 	{
3777 	  arm_arch_cde |= cde_bit;
3778 	  arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3779 	}
3780     }
3781 
3782   /* And finally, set up some quirks.  */
3783   arm_arch_no_volatile_ce
3784     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3785   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3786 					    isa_bit_quirk_armv6kz);
3787 
3788   /* Use the cp15 method if it is available.  */
3789   if (target_thread_pointer == TP_AUTO)
3790     {
3791       if (arm_arch6k && !TARGET_THUMB1)
3792 	target_thread_pointer = TP_CP15;
3793       else
3794 	target_thread_pointer = TP_SOFT;
3795     }
3796 }
3797 
3798 /* Perform some validation between the desired architecture and the rest of the
3799    options.  */
3800 void
arm_options_perform_arch_sanity_checks(void)3801 arm_options_perform_arch_sanity_checks (void)
3802 {
3803   /* V5T code we generate is completely interworking capable, so we turn off
3804      TARGET_INTERWORK here to avoid many tests later on.  */
3805 
3806   /* XXX However, we must pass the right pre-processor defines to CPP
3807      or GLD can get confused.  This is a hack.  */
3808   if (TARGET_INTERWORK)
3809     arm_cpp_interwork = 1;
3810 
3811   if (arm_arch5t)
3812     target_flags &= ~MASK_INTERWORK;
3813 
3814   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3815     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3816 
3817   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3818     error ("iwmmxt abi requires an iwmmxt capable cpu");
3819 
3820   /* BPABI targets use linker tricks to allow interworking on cores
3821      without thumb support.  */
3822   if (TARGET_INTERWORK
3823       && !TARGET_BPABI
3824       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3825     {
3826       warning (0, "target CPU does not support interworking" );
3827       target_flags &= ~MASK_INTERWORK;
3828     }
3829 
3830   /* If soft-float is specified then don't use FPU.  */
3831   if (TARGET_SOFT_FLOAT)
3832     arm_fpu_attr = FPU_NONE;
3833   else
3834     arm_fpu_attr = FPU_VFP;
3835 
3836   if (TARGET_AAPCS_BASED)
3837     {
3838       if (TARGET_CALLER_INTERWORKING)
3839 	error ("AAPCS does not support %<-mcaller-super-interworking%>");
3840       else
3841 	if (TARGET_CALLEE_INTERWORKING)
3842 	  error ("AAPCS does not support %<-mcallee-super-interworking%>");
3843     }
3844 
3845   /* __fp16 support currently assumes the core has ldrh.  */
3846   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3847     sorry ("__fp16 and no ldrh");
3848 
3849   if (use_cmse && !arm_arch_cmse)
3850     error ("target CPU does not support ARMv8-M Security Extensions");
3851 
3852   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3853      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3854   if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3855     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3856 
3857 
3858   if (TARGET_AAPCS_BASED)
3859     {
3860       if (arm_abi == ARM_ABI_IWMMXT)
3861 	arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3862       else if (TARGET_HARD_FLOAT_ABI)
3863 	{
3864 	  arm_pcs_default = ARM_PCS_AAPCS_VFP;
3865 	  if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3866 	      && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3867 	    error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3868 	}
3869       else
3870 	arm_pcs_default = ARM_PCS_AAPCS;
3871     }
3872   else
3873     {
3874       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3875 	sorry ("%<-mfloat-abi=hard%> and VFP");
3876 
3877       if (arm_abi == ARM_ABI_APCS)
3878 	arm_pcs_default = ARM_PCS_APCS;
3879       else
3880 	arm_pcs_default = ARM_PCS_ATPCS;
3881     }
3882 }
3883 
3884 /* Test whether a local function descriptor is canonical, i.e.,
3885    whether we can use GOTOFFFUNCDESC to compute the address of the
3886    function.  */
3887 static bool
arm_fdpic_local_funcdesc_p(rtx fnx)3888 arm_fdpic_local_funcdesc_p (rtx fnx)
3889 {
3890   tree fn;
3891   enum symbol_visibility vis;
3892   bool ret;
3893 
3894   if (!TARGET_FDPIC)
3895     return true;
3896 
3897   if (! SYMBOL_REF_LOCAL_P (fnx))
3898     return false;
3899 
3900   fn = SYMBOL_REF_DECL (fnx);
3901 
3902   if (! fn)
3903     return false;
3904 
3905   vis = DECL_VISIBILITY (fn);
3906 
3907   if (vis == VISIBILITY_PROTECTED)
3908     /* Private function descriptors for protected functions are not
3909        canonical.  Temporarily change the visibility to global so that
3910        we can ensure uniqueness of funcdesc pointers.  */
3911     DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
3912 
3913   ret = default_binds_local_p_1 (fn, flag_pic);
3914 
3915   DECL_VISIBILITY (fn) = vis;
3916 
3917   return ret;
3918 }
3919 
3920 static void
arm_add_gc_roots(void)3921 arm_add_gc_roots (void)
3922 {
3923   gcc_obstack_init(&minipool_obstack);
3924   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3925 }
3926 
3927 /* A table of known ARM exception types.
3928    For use with the interrupt function attribute.  */
3929 
3930 typedef struct
3931 {
3932   const char *const arg;
3933   const unsigned long return_value;
3934 }
3935 isr_attribute_arg;
3936 
3937 static const isr_attribute_arg isr_attribute_args [] =
3938 {
3939   { "IRQ",   ARM_FT_ISR },
3940   { "irq",   ARM_FT_ISR },
3941   { "FIQ",   ARM_FT_FIQ },
3942   { "fiq",   ARM_FT_FIQ },
3943   { "ABORT", ARM_FT_ISR },
3944   { "abort", ARM_FT_ISR },
3945   { "UNDEF", ARM_FT_EXCEPTION },
3946   { "undef", ARM_FT_EXCEPTION },
3947   { "SWI",   ARM_FT_EXCEPTION },
3948   { "swi",   ARM_FT_EXCEPTION },
3949   { NULL,    ARM_FT_NORMAL }
3950 };
3951 
3952 /* Returns the (interrupt) function type of the current
3953    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3954 
3955 static unsigned long
arm_isr_value(tree argument)3956 arm_isr_value (tree argument)
3957 {
3958   const isr_attribute_arg * ptr;
3959   const char *              arg;
3960 
3961   if (!arm_arch_notm)
3962     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3963 
3964   /* No argument - default to IRQ.  */
3965   if (argument == NULL_TREE)
3966     return ARM_FT_ISR;
3967 
3968   /* Get the value of the argument.  */
3969   if (TREE_VALUE (argument) == NULL_TREE
3970       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3971     return ARM_FT_UNKNOWN;
3972 
3973   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3974 
3975   /* Check it against the list of known arguments.  */
3976   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3977     if (streq (arg, ptr->arg))
3978       return ptr->return_value;
3979 
3980   /* An unrecognized interrupt type.  */
3981   return ARM_FT_UNKNOWN;
3982 }
3983 
3984 /* Computes the type of the current function.  */
3985 
3986 static unsigned long
arm_compute_func_type(void)3987 arm_compute_func_type (void)
3988 {
3989   unsigned long type = ARM_FT_UNKNOWN;
3990   tree a;
3991   tree attr;
3992 
3993   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3994 
3995   /* Decide if the current function is volatile.  Such functions
3996      never return, and many memory cycles can be saved by not storing
3997      register values that will never be needed again.  This optimization
3998      was added to speed up context switching in a kernel application.  */
3999   if (optimize > 0
4000       && (TREE_NOTHROW (current_function_decl)
4001           || !(flag_unwind_tables
4002                || (flag_exceptions
4003 		   && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4004       && TREE_THIS_VOLATILE (current_function_decl))
4005     type |= ARM_FT_VOLATILE;
4006 
4007   if (cfun->static_chain_decl != NULL)
4008     type |= ARM_FT_NESTED;
4009 
4010   attr = DECL_ATTRIBUTES (current_function_decl);
4011 
4012   a = lookup_attribute ("naked", attr);
4013   if (a != NULL_TREE)
4014     type |= ARM_FT_NAKED;
4015 
4016   a = lookup_attribute ("isr", attr);
4017   if (a == NULL_TREE)
4018     a = lookup_attribute ("interrupt", attr);
4019 
4020   if (a == NULL_TREE)
4021     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4022   else
4023     type |= arm_isr_value (TREE_VALUE (a));
4024 
4025   if (lookup_attribute ("cmse_nonsecure_entry", attr))
4026     type |= ARM_FT_CMSE_ENTRY;
4027 
4028   return type;
4029 }
4030 
4031 /* Returns the type of the current function.  */
4032 
4033 unsigned long
arm_current_func_type(void)4034 arm_current_func_type (void)
4035 {
4036   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4037     cfun->machine->func_type = arm_compute_func_type ();
4038 
4039   return cfun->machine->func_type;
4040 }
4041 
4042 bool
arm_allocate_stack_slots_for_args(void)4043 arm_allocate_stack_slots_for_args (void)
4044 {
4045   /* Naked functions should not allocate stack slots for arguments.  */
4046   return !IS_NAKED (arm_current_func_type ());
4047 }
4048 
4049 static bool
arm_warn_func_return(tree decl)4050 arm_warn_func_return (tree decl)
4051 {
4052   /* Naked functions are implemented entirely in assembly, including the
4053      return sequence, so suppress warnings about this.  */
4054   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4055 }
4056 
4057 
4058 /* Output assembler code for a block containing the constant parts
4059    of a trampoline, leaving space for the variable parts.
4060 
4061    On the ARM, (if r8 is the static chain regnum, and remembering that
4062    referencing pc adds an offset of 8) the trampoline looks like:
4063 	   ldr 		r8, [pc, #0]
4064 	   ldr		pc, [pc]
4065 	   .word	static chain value
4066 	   .word	function's address
4067    XXX FIXME: When the trampoline returns, r8 will be clobbered.
4068 
4069    In FDPIC mode, the trampoline looks like:
4070 	   .word	trampoline address
4071 	   .word	trampoline GOT address
4072 	   ldr 		r12, [pc, #8] ; #4 for Arm mode
4073 	   ldr 		r9,  [pc, #8] ; #4 for Arm mode
4074 	   ldr		pc,  [pc, #8] ; #4 for Arm mode
4075 	   .word	static chain value
4076 	   .word	GOT address
4077 	   .word	function's address
4078 */
4079 
4080 static void
arm_asm_trampoline_template(FILE * f)4081 arm_asm_trampoline_template (FILE *f)
4082 {
4083   fprintf (f, "\t.syntax unified\n");
4084 
4085   if (TARGET_FDPIC)
4086     {
4087       /* The first two words are a function descriptor pointing to the
4088 	 trampoline code just below.  */
4089       if (TARGET_ARM)
4090 	fprintf (f, "\t.arm\n");
4091       else if (TARGET_THUMB2)
4092 	fprintf (f, "\t.thumb\n");
4093       else
4094 	/* Only ARM and Thumb-2 are supported.  */
4095 	gcc_unreachable ();
4096 
4097       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4098       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4099       /* Trampoline code which sets the static chain register but also
4100 	 PIC register before jumping into real code.  */
4101       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4102 		   STATIC_CHAIN_REGNUM, PC_REGNUM,
4103 		   TARGET_THUMB2 ? 8 : 4);
4104       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4105 		   PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4106 		   TARGET_THUMB2 ? 8 : 4);
4107       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4108 		   PC_REGNUM, PC_REGNUM,
4109 		   TARGET_THUMB2 ? 8 : 4);
4110       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4111     }
4112   else if (TARGET_ARM)
4113     {
4114       fprintf (f, "\t.arm\n");
4115       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4116       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4117     }
4118   else if (TARGET_THUMB2)
4119     {
4120       fprintf (f, "\t.thumb\n");
4121       /* The Thumb-2 trampoline is similar to the arm implementation.
4122 	 Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
4123       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4124 		   STATIC_CHAIN_REGNUM, PC_REGNUM);
4125       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4126     }
4127   else
4128     {
4129       ASM_OUTPUT_ALIGN (f, 2);
4130       fprintf (f, "\t.code\t16\n");
4131       fprintf (f, ".Ltrampoline_start:\n");
4132       asm_fprintf (f, "\tpush\t{r0, r1}\n");
4133       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4134       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4135       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4136       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4137       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4138     }
4139   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4140   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4141 }
4142 
4143 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
4144 
4145 static void
arm_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)4146 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4147 {
4148   rtx fnaddr, mem, a_tramp;
4149 
4150   emit_block_move (m_tramp, assemble_trampoline_template (),
4151 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4152 
4153   if (TARGET_FDPIC)
4154     {
4155       rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4156       rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4157       rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4158       /* The function start address is at offset 8, but in Thumb mode
4159 	 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4160 	 below.  */
4161       rtx trampoline_code_start
4162 	= plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4163 
4164       /* Write initial funcdesc which points to the trampoline.  */
4165       mem = adjust_address (m_tramp, SImode, 0);
4166       emit_move_insn (mem, trampoline_code_start);
4167       mem = adjust_address (m_tramp, SImode, 4);
4168       emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4169       /* Setup static chain.  */
4170       mem = adjust_address (m_tramp, SImode, 20);
4171       emit_move_insn (mem, chain_value);
4172       /* GOT + real function entry point.  */
4173       mem = adjust_address (m_tramp, SImode, 24);
4174       emit_move_insn (mem, gotaddr);
4175       mem = adjust_address (m_tramp, SImode, 28);
4176       emit_move_insn (mem, fnaddr);
4177     }
4178   else
4179     {
4180       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4181       emit_move_insn (mem, chain_value);
4182 
4183       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4184       fnaddr = XEXP (DECL_RTL (fndecl), 0);
4185       emit_move_insn (mem, fnaddr);
4186     }
4187 
4188   a_tramp = XEXP (m_tramp, 0);
4189   maybe_emit_call_builtin___clear_cache (a_tramp,
4190 					 plus_constant (ptr_mode,
4191 							a_tramp,
4192 							TRAMPOLINE_SIZE));
4193 }
4194 
4195 /* Thumb trampolines should be entered in thumb mode, so set
4196    the bottom bit of the address.  */
4197 
4198 static rtx
arm_trampoline_adjust_address(rtx addr)4199 arm_trampoline_adjust_address (rtx addr)
4200 {
4201   /* For FDPIC don't fix trampoline address since it's a function
4202      descriptor and not a function address.  */
4203   if (TARGET_THUMB && !TARGET_FDPIC)
4204     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4205 				NULL, 0, OPTAB_LIB_WIDEN);
4206   return addr;
4207 }
4208 
4209 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4210    includes call-clobbered registers too.  If this is a leaf function
4211    we can just examine the registers used by the RTL, but otherwise we
4212    have to assume that whatever function is called might clobber
4213    anything, and so we have to save all the call-clobbered registers
4214    as well.  */
reg_needs_saving_p(unsigned reg)4215 static inline bool reg_needs_saving_p (unsigned reg)
4216 {
4217   unsigned long func_type = arm_current_func_type ();
4218 
4219   if (IS_INTERRUPT (func_type))
4220     if (df_regs_ever_live_p (reg)
4221 	/* Save call-clobbered core registers.  */
4222 	|| (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4223       return true;
4224     else
4225       return false;
4226   else
4227     if (!df_regs_ever_live_p (reg)
4228 	|| call_used_or_fixed_reg_p (reg))
4229       return false;
4230     else
4231       return true;
4232 }
4233 
4234 /* Return 1 if it is possible to return using a single instruction.
4235    If SIBLING is non-null, this is a test for a return before a sibling
4236    call.  SIBLING is the call insn, so we can examine its register usage.  */
4237 
4238 int
use_return_insn(int iscond,rtx sibling)4239 use_return_insn (int iscond, rtx sibling)
4240 {
4241   int regno;
4242   unsigned int func_type;
4243   unsigned long saved_int_regs;
4244   unsigned HOST_WIDE_INT stack_adjust;
4245   arm_stack_offsets *offsets;
4246 
4247   /* Never use a return instruction before reload has run.  */
4248   if (!reload_completed)
4249     return 0;
4250 
4251   func_type = arm_current_func_type ();
4252 
4253   /* Naked, volatile and stack alignment functions need special
4254      consideration.  */
4255   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4256     return 0;
4257 
4258   /* So do interrupt functions that use the frame pointer and Thumb
4259      interrupt functions.  */
4260   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4261     return 0;
4262 
4263   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4264       && !optimize_function_for_size_p (cfun))
4265     return 0;
4266 
4267   offsets = arm_get_frame_offsets ();
4268   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4269 
4270   /* As do variadic functions.  */
4271   if (crtl->args.pretend_args_size
4272       || cfun->machine->uses_anonymous_args
4273       /* Or if the function calls __builtin_eh_return () */
4274       || crtl->calls_eh_return
4275       /* Or if the function calls alloca */
4276       || cfun->calls_alloca
4277       /* Or if there is a stack adjustment.  However, if the stack pointer
4278 	 is saved on the stack, we can use a pre-incrementing stack load.  */
4279       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4280 				 && stack_adjust == 4))
4281       /* Or if the static chain register was saved above the frame, under the
4282 	 assumption that the stack pointer isn't saved on the stack.  */
4283       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4284           && arm_compute_static_chain_stack_bytes() != 0))
4285     return 0;
4286 
4287   saved_int_regs = offsets->saved_regs_mask;
4288 
4289   /* Unfortunately, the insn
4290 
4291        ldmib sp, {..., sp, ...}
4292 
4293      triggers a bug on most SA-110 based devices, such that the stack
4294      pointer won't be correctly restored if the instruction takes a
4295      page fault.  We work around this problem by popping r3 along with
4296      the other registers, since that is never slower than executing
4297      another instruction.
4298 
4299      We test for !arm_arch5t here, because code for any architecture
4300      less than this could potentially be run on one of the buggy
4301      chips.  */
4302   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4303     {
4304       /* Validate that r3 is a call-clobbered register (always true in
4305 	 the default abi) ...  */
4306       if (!call_used_or_fixed_reg_p (3))
4307 	return 0;
4308 
4309       /* ... that it isn't being used for a return value ... */
4310       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4311 	return 0;
4312 
4313       /* ... or for a tail-call argument ...  */
4314       if (sibling)
4315 	{
4316 	  gcc_assert (CALL_P (sibling));
4317 
4318 	  if (find_regno_fusage (sibling, USE, 3))
4319 	    return 0;
4320 	}
4321 
4322       /* ... and that there are no call-saved registers in r0-r2
4323 	 (always true in the default ABI).  */
4324       if (saved_int_regs & 0x7)
4325 	return 0;
4326     }
4327 
4328   /* Can't be done if interworking with Thumb, and any registers have been
4329      stacked.  */
4330   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4331     return 0;
4332 
4333   /* On StrongARM, conditional returns are expensive if they aren't
4334      taken and multiple registers have been stacked.  */
4335   if (iscond && arm_tune_strongarm)
4336     {
4337       /* Conditional return when just the LR is stored is a simple
4338 	 conditional-load instruction, that's not expensive.  */
4339       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4340 	return 0;
4341 
4342       if (flag_pic
4343 	  && arm_pic_register != INVALID_REGNUM
4344 	  && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4345 	return 0;
4346     }
4347 
4348   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4349      several instructions if anything needs to be popped.  Armv8.1-M Mainline
4350      also needs several instructions to save and restore FP context.  */
4351   if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4352     return 0;
4353 
4354   /* If there are saved registers but the LR isn't saved, then we need
4355      two instructions for the return.  */
4356   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4357     return 0;
4358 
4359   /* Can't be done if any of the VFP regs are pushed,
4360      since this also requires an insn.  */
4361   if (TARGET_VFP_BASE)
4362     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4363       if (reg_needs_saving_p (regno))
4364 	return 0;
4365 
4366   if (TARGET_REALLY_IWMMXT)
4367     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4368       if (reg_needs_saving_p (regno))
4369 	return 0;
4370 
4371   return 1;
4372 }
4373 
4374 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4375    shrink-wrapping if possible.  This is the case if we need to emit a
4376    prologue, which we can test by looking at the offsets.  */
4377 bool
use_simple_return_p(void)4378 use_simple_return_p (void)
4379 {
4380   arm_stack_offsets *offsets;
4381 
4382   /* Note this function can be called before or after reload.  */
4383   if (!reload_completed)
4384     arm_compute_frame_layout ();
4385 
4386   offsets = arm_get_frame_offsets ();
4387   return offsets->outgoing_args != 0;
4388 }
4389 
4390 /* Return TRUE if int I is a valid immediate ARM constant.  */
4391 
4392 int
const_ok_for_arm(HOST_WIDE_INT i)4393 const_ok_for_arm (HOST_WIDE_INT i)
4394 {
4395   int lowbit;
4396 
4397   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4398      be all zero, or all one.  */
4399   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4400       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4401 	  != ((~(unsigned HOST_WIDE_INT) 0)
4402 	      & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4403     return FALSE;
4404 
4405   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4406 
4407   /* Fast return for 0 and small values.  We must do this for zero, since
4408      the code below can't handle that one case.  */
4409   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4410     return TRUE;
4411 
4412   /* Get the number of trailing zeros.  */
4413   lowbit = ffs((int) i) - 1;
4414 
4415   /* Only even shifts are allowed in ARM mode so round down to the
4416      nearest even number.  */
4417   if (TARGET_ARM)
4418     lowbit &= ~1;
4419 
4420   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4421     return TRUE;
4422 
4423   if (TARGET_ARM)
4424     {
4425       /* Allow rotated constants in ARM mode.  */
4426       if (lowbit <= 4
4427 	   && ((i & ~0xc000003f) == 0
4428 	       || (i & ~0xf000000f) == 0
4429 	       || (i & ~0xfc000003) == 0))
4430 	return TRUE;
4431     }
4432   else if (TARGET_THUMB2)
4433     {
4434       HOST_WIDE_INT v;
4435 
4436       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4437       v = i & 0xff;
4438       v |= v << 16;
4439       if (i == v || i == (v | (v << 8)))
4440 	return TRUE;
4441 
4442       /* Allow repeated pattern 0xXY00XY00.  */
4443       v = i & 0xff00;
4444       v |= v << 16;
4445       if (i == v)
4446 	return TRUE;
4447     }
4448   else if (TARGET_HAVE_MOVT)
4449     {
4450       /* Thumb-1 Targets with MOVT.  */
4451       if (i > 0xffff)
4452 	return FALSE;
4453       else
4454 	return TRUE;
4455     }
4456 
4457   return FALSE;
4458 }
4459 
4460 /* Return true if I is a valid constant for the operation CODE.  */
4461 int
const_ok_for_op(HOST_WIDE_INT i,enum rtx_code code)4462 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4463 {
4464   if (const_ok_for_arm (i))
4465     return 1;
4466 
4467   switch (code)
4468     {
4469     case SET:
4470       /* See if we can use movw.  */
4471       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4472 	return 1;
4473       else
4474 	/* Otherwise, try mvn.  */
4475 	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4476 
4477     case PLUS:
4478       /* See if we can use addw or subw.  */
4479       if (TARGET_THUMB2
4480 	  && ((i & 0xfffff000) == 0
4481 	      || ((-i) & 0xfffff000) == 0))
4482 	return 1;
4483       /* Fall through.  */
4484     case COMPARE:
4485     case EQ:
4486     case NE:
4487     case GT:
4488     case LE:
4489     case LT:
4490     case GE:
4491     case GEU:
4492     case LTU:
4493     case GTU:
4494     case LEU:
4495     case UNORDERED:
4496     case ORDERED:
4497     case UNEQ:
4498     case UNGE:
4499     case UNLT:
4500     case UNGT:
4501     case UNLE:
4502       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4503 
4504     case MINUS:		/* Should only occur with (MINUS I reg) => rsb */
4505     case XOR:
4506       return 0;
4507 
4508     case IOR:
4509       if (TARGET_THUMB2)
4510 	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4511       return 0;
4512 
4513     case AND:
4514       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4515 
4516     default:
4517       gcc_unreachable ();
4518     }
4519 }
4520 
4521 /* Return true if I is a valid di mode constant for the operation CODE.  */
4522 int
const_ok_for_dimode_op(HOST_WIDE_INT i,enum rtx_code code)4523 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4524 {
4525   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4526   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4527   rtx hi = GEN_INT (hi_val);
4528   rtx lo = GEN_INT (lo_val);
4529 
4530   if (TARGET_THUMB1)
4531     return 0;
4532 
4533   switch (code)
4534     {
4535     case AND:
4536     case IOR:
4537     case XOR:
4538       return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4539 	     || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4540     case PLUS:
4541       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4542 
4543     default:
4544       return 0;
4545     }
4546 }
4547 
4548 /* Emit a sequence of insns to handle a large constant.
4549    CODE is the code of the operation required, it can be any of SET, PLUS,
4550    IOR, AND, XOR, MINUS;
4551    MODE is the mode in which the operation is being performed;
4552    VAL is the integer to operate on;
4553    SOURCE is the other operand (a register, or a null-pointer for SET);
4554    SUBTARGETS means it is safe to create scratch registers if that will
4555    either produce a simpler sequence, or we will want to cse the values.
4556    Return value is the number of insns emitted.  */
4557 
4558 /* ??? Tweak this for thumb2.  */
4559 int
arm_split_constant(enum rtx_code code,machine_mode mode,rtx insn,HOST_WIDE_INT val,rtx target,rtx source,int subtargets)4560 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4561 		    HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4562 {
4563   rtx cond;
4564 
4565   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4566     cond = COND_EXEC_TEST (PATTERN (insn));
4567   else
4568     cond = NULL_RTX;
4569 
4570   if (subtargets || code == SET
4571       || (REG_P (target) && REG_P (source)
4572 	  && REGNO (target) != REGNO (source)))
4573     {
4574       /* After arm_reorg has been called, we can't fix up expensive
4575 	 constants by pushing them into memory so we must synthesize
4576 	 them in-line, regardless of the cost.  This is only likely to
4577 	 be more costly on chips that have load delay slots and we are
4578 	 compiling without running the scheduler (so no splitting
4579 	 occurred before the final instruction emission).
4580 
4581 	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4582       */
4583       if (!cfun->machine->after_arm_reorg
4584 	  && !cond
4585 	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4586 				1, 0)
4587 	      > (arm_constant_limit (optimize_function_for_size_p (cfun))
4588 		 + (code != SET))))
4589 	{
4590 	  if (code == SET)
4591 	    {
4592 	      /* Currently SET is the only monadic value for CODE, all
4593 		 the rest are diadic.  */
4594 	      if (TARGET_USE_MOVT)
4595 		arm_emit_movpair (target, GEN_INT (val));
4596 	      else
4597 		emit_set_insn (target, GEN_INT (val));
4598 
4599 	      return 1;
4600 	    }
4601 	  else
4602 	    {
4603 	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4604 
4605 	      if (TARGET_USE_MOVT)
4606 		arm_emit_movpair (temp, GEN_INT (val));
4607 	      else
4608 		emit_set_insn (temp, GEN_INT (val));
4609 
4610 	      /* For MINUS, the value is subtracted from, since we never
4611 		 have subtraction of a constant.  */
4612 	      if (code == MINUS)
4613 		emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4614 	      else
4615 		emit_set_insn (target,
4616 			       gen_rtx_fmt_ee (code, mode, source, temp));
4617 	      return 2;
4618 	    }
4619 	}
4620     }
4621 
4622   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4623 			   1);
4624 }
4625 
4626 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4627    ARM/THUMB2 immediates, and add up to VAL.
4628    Thr function return value gives the number of insns required.  */
4629 static int
optimal_immediate_sequence(enum rtx_code code,unsigned HOST_WIDE_INT val,struct four_ints * return_sequence)4630 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4631 			    struct four_ints *return_sequence)
4632 {
4633   int best_consecutive_zeros = 0;
4634   int i;
4635   int best_start = 0;
4636   int insns1, insns2;
4637   struct four_ints tmp_sequence;
4638 
4639   /* If we aren't targeting ARM, the best place to start is always at
4640      the bottom, otherwise look more closely.  */
4641   if (TARGET_ARM)
4642     {
4643       for (i = 0; i < 32; i += 2)
4644 	{
4645 	  int consecutive_zeros = 0;
4646 
4647 	  if (!(val & (3 << i)))
4648 	    {
4649 	      while ((i < 32) && !(val & (3 << i)))
4650 		{
4651 		  consecutive_zeros += 2;
4652 		  i += 2;
4653 		}
4654 	      if (consecutive_zeros > best_consecutive_zeros)
4655 		{
4656 		  best_consecutive_zeros = consecutive_zeros;
4657 		  best_start = i - consecutive_zeros;
4658 		}
4659 	      i -= 2;
4660 	    }
4661 	}
4662     }
4663 
4664   /* So long as it won't require any more insns to do so, it's
4665      desirable to emit a small constant (in bits 0...9) in the last
4666      insn.  This way there is more chance that it can be combined with
4667      a later addressing insn to form a pre-indexed load or store
4668      operation.  Consider:
4669 
4670 	   *((volatile int *)0xe0000100) = 1;
4671 	   *((volatile int *)0xe0000110) = 2;
4672 
4673      We want this to wind up as:
4674 
4675 	    mov rA, #0xe0000000
4676 	    mov rB, #1
4677 	    str rB, [rA, #0x100]
4678 	    mov rB, #2
4679 	    str rB, [rA, #0x110]
4680 
4681      rather than having to synthesize both large constants from scratch.
4682 
4683      Therefore, we calculate how many insns would be required to emit
4684      the constant starting from `best_start', and also starting from
4685      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4686      yield a shorter sequence, we may as well use zero.  */
4687   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4688   if (best_start != 0
4689       && ((HOST_WIDE_INT_1U << best_start) < val))
4690     {
4691       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4692       if (insns2 <= insns1)
4693 	{
4694 	  *return_sequence = tmp_sequence;
4695 	  insns1 = insns2;
4696 	}
4697     }
4698 
4699   return insns1;
4700 }
4701 
4702 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4703 static int
optimal_immediate_sequence_1(enum rtx_code code,unsigned HOST_WIDE_INT val,struct four_ints * return_sequence,int i)4704 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4705 			     struct four_ints *return_sequence, int i)
4706 {
4707   int remainder = val & 0xffffffff;
4708   int insns = 0;
4709 
4710   /* Try and find a way of doing the job in either two or three
4711      instructions.
4712 
4713      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4714      location.  We start at position I.  This may be the MSB, or
4715      optimial_immediate_sequence may have positioned it at the largest block
4716      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4717      wrapping around to the top of the word when we drop off the bottom.
4718      In the worst case this code should produce no more than four insns.
4719 
4720      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4721      constants, shifted to any arbitrary location.  We should always start
4722      at the MSB.  */
4723   do
4724     {
4725       int end;
4726       unsigned int b1, b2, b3, b4;
4727       unsigned HOST_WIDE_INT result;
4728       int loc;
4729 
4730       gcc_assert (insns < 4);
4731 
4732       if (i <= 0)
4733 	i += 32;
4734 
4735       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4736       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4737 	{
4738 	  loc = i;
4739 	  if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4740 	    /* We can use addw/subw for the last 12 bits.  */
4741 	    result = remainder;
4742 	  else
4743 	    {
4744 	      /* Use an 8-bit shifted/rotated immediate.  */
4745 	      end = i - 8;
4746 	      if (end < 0)
4747 		end += 32;
4748 	      result = remainder & ((0x0ff << end)
4749 				   | ((i < end) ? (0xff >> (32 - end))
4750 						: 0));
4751 	      i -= 8;
4752 	    }
4753 	}
4754       else
4755 	{
4756 	  /* Arm allows rotates by a multiple of two. Thumb-2 allows
4757 	     arbitrary shifts.  */
4758 	  i -= TARGET_ARM ? 2 : 1;
4759 	  continue;
4760 	}
4761 
4762       /* Next, see if we can do a better job with a thumb2 replicated
4763 	 constant.
4764 
4765          We do it this way around to catch the cases like 0x01F001E0 where
4766 	 two 8-bit immediates would work, but a replicated constant would
4767 	 make it worse.
4768 
4769          TODO: 16-bit constants that don't clear all the bits, but still win.
4770          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4771       if (TARGET_THUMB2)
4772 	{
4773 	  b1 = (remainder & 0xff000000) >> 24;
4774 	  b2 = (remainder & 0x00ff0000) >> 16;
4775 	  b3 = (remainder & 0x0000ff00) >> 8;
4776 	  b4 = remainder & 0xff;
4777 
4778 	  if (loc > 24)
4779 	    {
4780 	      /* The 8-bit immediate already found clears b1 (and maybe b2),
4781 		 but must leave b3 and b4 alone.  */
4782 
4783 	      /* First try to find a 32-bit replicated constant that clears
4784 		 almost everything.  We can assume that we can't do it in one,
4785 		 or else we wouldn't be here.  */
4786 	      unsigned int tmp = b1 & b2 & b3 & b4;
4787 	      unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4788 				  + (tmp << 24);
4789 	      unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4790 					    + (tmp == b3) + (tmp == b4);
4791 	      if (tmp
4792 		  && (matching_bytes >= 3
4793 		      || (matching_bytes == 2
4794 			  && const_ok_for_op (remainder & ~tmp2, code))))
4795 		{
4796 		  /* At least 3 of the bytes match, and the fourth has at
4797 		     least as many bits set, or two of the bytes match
4798 		     and it will only require one more insn to finish.  */
4799 		  result = tmp2;
4800 		  i = tmp != b1 ? 32
4801 		      : tmp != b2 ? 24
4802 		      : tmp != b3 ? 16
4803 		      : 8;
4804 		}
4805 
4806 	      /* Second, try to find a 16-bit replicated constant that can
4807 		 leave three of the bytes clear.  If b2 or b4 is already
4808 		 zero, then we can.  If the 8-bit from above would not
4809 		 clear b2 anyway, then we still win.  */
4810 	      else if (b1 == b3 && (!b2 || !b4
4811 			       || (remainder & 0x00ff0000 & ~result)))
4812 		{
4813 		  result = remainder & 0xff00ff00;
4814 		  i = 24;
4815 		}
4816 	    }
4817 	  else if (loc > 16)
4818 	    {
4819 	      /* The 8-bit immediate already found clears b2 (and maybe b3)
4820 		 and we don't get here unless b1 is alredy clear, but it will
4821 		 leave b4 unchanged.  */
4822 
4823 	      /* If we can clear b2 and b4 at once, then we win, since the
4824 		 8-bits couldn't possibly reach that far.  */
4825 	      if (b2 == b4)
4826 		{
4827 		  result = remainder & 0x00ff00ff;
4828 		  i = 16;
4829 		}
4830 	    }
4831 	}
4832 
4833       return_sequence->i[insns++] = result;
4834       remainder &= ~result;
4835 
4836       if (code == SET || code == MINUS)
4837 	code = PLUS;
4838     }
4839   while (remainder);
4840 
4841   return insns;
4842 }
4843 
4844 /* Emit an instruction with the indicated PATTERN.  If COND is
4845    non-NULL, conditionalize the execution of the instruction on COND
4846    being true.  */
4847 
4848 static void
emit_constant_insn(rtx cond,rtx pattern)4849 emit_constant_insn (rtx cond, rtx pattern)
4850 {
4851   if (cond)
4852     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4853   emit_insn (pattern);
4854 }
4855 
4856 /* As above, but extra parameter GENERATE which, if clear, suppresses
4857    RTL generation.  */
4858 
4859 static int
arm_gen_constant(enum rtx_code code,machine_mode mode,rtx cond,unsigned HOST_WIDE_INT val,rtx target,rtx source,int subtargets,int generate)4860 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4861 		  unsigned HOST_WIDE_INT val, rtx target, rtx source,
4862 		  int subtargets, int generate)
4863 {
4864   int can_invert = 0;
4865   int can_negate = 0;
4866   int final_invert = 0;
4867   int i;
4868   int set_sign_bit_copies = 0;
4869   int clear_sign_bit_copies = 0;
4870   int clear_zero_bit_copies = 0;
4871   int set_zero_bit_copies = 0;
4872   int insns = 0, neg_insns, inv_insns;
4873   unsigned HOST_WIDE_INT temp1, temp2;
4874   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4875   struct four_ints *immediates;
4876   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4877 
4878   /* Find out which operations are safe for a given CODE.  Also do a quick
4879      check for degenerate cases; these can occur when DImode operations
4880      are split.  */
4881   switch (code)
4882     {
4883     case SET:
4884       can_invert = 1;
4885       break;
4886 
4887     case PLUS:
4888       can_negate = 1;
4889       break;
4890 
4891     case IOR:
4892       if (remainder == 0xffffffff)
4893 	{
4894 	  if (generate)
4895 	    emit_constant_insn (cond,
4896 				gen_rtx_SET (target,
4897 					     GEN_INT (ARM_SIGN_EXTEND (val))));
4898 	  return 1;
4899 	}
4900 
4901       if (remainder == 0)
4902 	{
4903 	  if (reload_completed && rtx_equal_p (target, source))
4904 	    return 0;
4905 
4906 	  if (generate)
4907 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4908 	  return 1;
4909 	}
4910       break;
4911 
4912     case AND:
4913       if (remainder == 0)
4914 	{
4915 	  if (generate)
4916 	    emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4917 	  return 1;
4918 	}
4919       if (remainder == 0xffffffff)
4920 	{
4921 	  if (reload_completed && rtx_equal_p (target, source))
4922 	    return 0;
4923 	  if (generate)
4924 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4925 	  return 1;
4926 	}
4927       can_invert = 1;
4928       break;
4929 
4930     case XOR:
4931       if (remainder == 0)
4932 	{
4933 	  if (reload_completed && rtx_equal_p (target, source))
4934 	    return 0;
4935 	  if (generate)
4936 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4937 	  return 1;
4938 	}
4939 
4940       if (remainder == 0xffffffff)
4941 	{
4942 	  if (generate)
4943 	    emit_constant_insn (cond,
4944 				gen_rtx_SET (target,
4945 					     gen_rtx_NOT (mode, source)));
4946 	  return 1;
4947 	}
4948       final_invert = 1;
4949       break;
4950 
4951     case MINUS:
4952       /* We treat MINUS as (val - source), since (source - val) is always
4953 	 passed as (source + (-val)).  */
4954       if (remainder == 0)
4955 	{
4956 	  if (generate)
4957 	    emit_constant_insn (cond,
4958 				gen_rtx_SET (target,
4959 					     gen_rtx_NEG (mode, source)));
4960 	  return 1;
4961 	}
4962       if (const_ok_for_arm (val))
4963 	{
4964 	  if (generate)
4965 	    emit_constant_insn (cond,
4966 				gen_rtx_SET (target,
4967 					     gen_rtx_MINUS (mode, GEN_INT (val),
4968 							    source)));
4969 	  return 1;
4970 	}
4971 
4972       break;
4973 
4974     default:
4975       gcc_unreachable ();
4976     }
4977 
4978   /* If we can do it in one insn get out quickly.  */
4979   if (const_ok_for_op (val, code))
4980     {
4981       if (generate)
4982 	emit_constant_insn (cond,
4983 			    gen_rtx_SET (target,
4984 					 (source
4985 					  ? gen_rtx_fmt_ee (code, mode, source,
4986 							    GEN_INT (val))
4987 					  : GEN_INT (val))));
4988       return 1;
4989     }
4990 
4991   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4992      insn.  */
4993   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4994       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4995     {
4996       if (generate)
4997 	{
4998 	  if (mode == SImode && i == 16)
4999 	    /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5000 	       smaller insn.  */
5001 	    emit_constant_insn (cond,
5002 				gen_zero_extendhisi2
5003 				(target, gen_lowpart (HImode, source)));
5004 	  else
5005 	    /* Extz only supports SImode, but we can coerce the operands
5006 	       into that mode.  */
5007 	    emit_constant_insn (cond,
5008 				gen_extzv_t2 (gen_lowpart (SImode, target),
5009 					      gen_lowpart (SImode, source),
5010 					      GEN_INT (i), const0_rtx));
5011 	}
5012 
5013       return 1;
5014     }
5015 
5016   /* Calculate a few attributes that may be useful for specific
5017      optimizations.  */
5018   /* Count number of leading zeros.  */
5019   for (i = 31; i >= 0; i--)
5020     {
5021       if ((remainder & (1 << i)) == 0)
5022 	clear_sign_bit_copies++;
5023       else
5024 	break;
5025     }
5026 
5027   /* Count number of leading 1's.  */
5028   for (i = 31; i >= 0; i--)
5029     {
5030       if ((remainder & (1 << i)) != 0)
5031 	set_sign_bit_copies++;
5032       else
5033 	break;
5034     }
5035 
5036   /* Count number of trailing zero's.  */
5037   for (i = 0; i <= 31; i++)
5038     {
5039       if ((remainder & (1 << i)) == 0)
5040 	clear_zero_bit_copies++;
5041       else
5042 	break;
5043     }
5044 
5045   /* Count number of trailing 1's.  */
5046   for (i = 0; i <= 31; i++)
5047     {
5048       if ((remainder & (1 << i)) != 0)
5049 	set_zero_bit_copies++;
5050       else
5051 	break;
5052     }
5053 
5054   switch (code)
5055     {
5056     case SET:
5057       /* See if we can do this by sign_extending a constant that is known
5058 	 to be negative.  This is a good, way of doing it, since the shift
5059 	 may well merge into a subsequent insn.  */
5060       if (set_sign_bit_copies > 1)
5061 	{
5062 	  if (const_ok_for_arm
5063 	      (temp1 = ARM_SIGN_EXTEND (remainder
5064 					<< (set_sign_bit_copies - 1))))
5065 	    {
5066 	      if (generate)
5067 		{
5068 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5069 		  emit_constant_insn (cond,
5070 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
5071 		  emit_constant_insn (cond,
5072 				      gen_ashrsi3 (target, new_src,
5073 						   GEN_INT (set_sign_bit_copies - 1)));
5074 		}
5075 	      return 2;
5076 	    }
5077 	  /* For an inverted constant, we will need to set the low bits,
5078 	     these will be shifted out of harm's way.  */
5079 	  temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5080 	  if (const_ok_for_arm (~temp1))
5081 	    {
5082 	      if (generate)
5083 		{
5084 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5085 		  emit_constant_insn (cond,
5086 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
5087 		  emit_constant_insn (cond,
5088 				      gen_ashrsi3 (target, new_src,
5089 						   GEN_INT (set_sign_bit_copies - 1)));
5090 		}
5091 	      return 2;
5092 	    }
5093 	}
5094 
5095       /* See if we can calculate the value as the difference between two
5096 	 valid immediates.  */
5097       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5098 	{
5099 	  int topshift = clear_sign_bit_copies & ~1;
5100 
5101 	  temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5102 				   & (0xff000000 >> topshift));
5103 
5104 	  /* If temp1 is zero, then that means the 9 most significant
5105 	     bits of remainder were 1 and we've caused it to overflow.
5106 	     When topshift is 0 we don't need to do anything since we
5107 	     can borrow from 'bit 32'.  */
5108 	  if (temp1 == 0 && topshift != 0)
5109 	    temp1 = 0x80000000 >> (topshift - 1);
5110 
5111 	  temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5112 
5113 	  if (const_ok_for_arm (temp2))
5114 	    {
5115 	      if (generate)
5116 		{
5117 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5118 		  emit_constant_insn (cond,
5119 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
5120 		  emit_constant_insn (cond,
5121 				      gen_addsi3 (target, new_src,
5122 						  GEN_INT (-temp2)));
5123 		}
5124 
5125 	      return 2;
5126 	    }
5127 	}
5128 
5129       /* See if we can generate this by setting the bottom (or the top)
5130 	 16 bits, and then shifting these into the other half of the
5131 	 word.  We only look for the simplest cases, to do more would cost
5132 	 too much.  Be careful, however, not to generate this when the
5133 	 alternative would take fewer insns.  */
5134       if (val & 0xffff0000)
5135 	{
5136 	  temp1 = remainder & 0xffff0000;
5137 	  temp2 = remainder & 0x0000ffff;
5138 
5139 	  /* Overlaps outside this range are best done using other methods.  */
5140 	  for (i = 9; i < 24; i++)
5141 	    {
5142 	      if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5143 		  && !const_ok_for_arm (temp2))
5144 		{
5145 		  rtx new_src = (subtargets
5146 				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5147 				 : target);
5148 		  insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5149 					    source, subtargets, generate);
5150 		  source = new_src;
5151 		  if (generate)
5152 		    emit_constant_insn
5153 		      (cond,
5154 		       gen_rtx_SET
5155 		       (target,
5156 			gen_rtx_IOR (mode,
5157 				     gen_rtx_ASHIFT (mode, source,
5158 						     GEN_INT (i)),
5159 				     source)));
5160 		  return insns + 1;
5161 		}
5162 	    }
5163 
5164 	  /* Don't duplicate cases already considered.  */
5165 	  for (i = 17; i < 24; i++)
5166 	    {
5167 	      if (((temp1 | (temp1 >> i)) == remainder)
5168 		  && !const_ok_for_arm (temp1))
5169 		{
5170 		  rtx new_src = (subtargets
5171 				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5172 				 : target);
5173 		  insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5174 					    source, subtargets, generate);
5175 		  source = new_src;
5176 		  if (generate)
5177 		    emit_constant_insn
5178 		      (cond,
5179 		       gen_rtx_SET (target,
5180 				    gen_rtx_IOR
5181 				    (mode,
5182 				     gen_rtx_LSHIFTRT (mode, source,
5183 						       GEN_INT (i)),
5184 				     source)));
5185 		  return insns + 1;
5186 		}
5187 	    }
5188 	}
5189       break;
5190 
5191     case IOR:
5192     case XOR:
5193       /* If we have IOR or XOR, and the constant can be loaded in a
5194 	 single instruction, and we can find a temporary to put it in,
5195 	 then this can be done in two instructions instead of 3-4.  */
5196       if (subtargets
5197 	  /* TARGET can't be NULL if SUBTARGETS is 0 */
5198 	  || (reload_completed && !reg_mentioned_p (target, source)))
5199 	{
5200 	  if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5201 	    {
5202 	      if (generate)
5203 		{
5204 		  rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5205 
5206 		  emit_constant_insn (cond,
5207 				      gen_rtx_SET (sub, GEN_INT (val)));
5208 		  emit_constant_insn (cond,
5209 				      gen_rtx_SET (target,
5210 						   gen_rtx_fmt_ee (code, mode,
5211 								   source, sub)));
5212 		}
5213 	      return 2;
5214 	    }
5215 	}
5216 
5217       if (code == XOR)
5218 	break;
5219 
5220       /*  Convert.
5221 	  x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5222 	                     and the remainder 0s for e.g. 0xfff00000)
5223 	  x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5224 
5225 	  This can be done in 2 instructions by using shifts with mov or mvn.
5226 	  e.g. for
5227 	  x = x | 0xfff00000;
5228 	  we generate.
5229 	  mvn	r0, r0, asl #12
5230 	  mvn	r0, r0, lsr #12  */
5231       if (set_sign_bit_copies > 8
5232 	  && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5233 	{
5234 	  if (generate)
5235 	    {
5236 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5237 	      rtx shift = GEN_INT (set_sign_bit_copies);
5238 
5239 	      emit_constant_insn
5240 		(cond,
5241 		 gen_rtx_SET (sub,
5242 			      gen_rtx_NOT (mode,
5243 					   gen_rtx_ASHIFT (mode,
5244 							   source,
5245 							   shift))));
5246 	      emit_constant_insn
5247 		(cond,
5248 		 gen_rtx_SET (target,
5249 			      gen_rtx_NOT (mode,
5250 					   gen_rtx_LSHIFTRT (mode, sub,
5251 							     shift))));
5252 	    }
5253 	  return 2;
5254 	}
5255 
5256       /* Convert
5257 	  x = y | constant (which has set_zero_bit_copies number of trailing ones).
5258 	   to
5259 	  x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5260 
5261 	  For eg. r0 = r0 | 0xfff
5262 	       mvn	r0, r0, lsr #12
5263 	       mvn	r0, r0, asl #12
5264 
5265       */
5266       if (set_zero_bit_copies > 8
5267 	  && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5268 	{
5269 	  if (generate)
5270 	    {
5271 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5272 	      rtx shift = GEN_INT (set_zero_bit_copies);
5273 
5274 	      emit_constant_insn
5275 		(cond,
5276 		 gen_rtx_SET (sub,
5277 			      gen_rtx_NOT (mode,
5278 					   gen_rtx_LSHIFTRT (mode,
5279 							     source,
5280 							     shift))));
5281 	      emit_constant_insn
5282 		(cond,
5283 		 gen_rtx_SET (target,
5284 			      gen_rtx_NOT (mode,
5285 					   gen_rtx_ASHIFT (mode, sub,
5286 							   shift))));
5287 	    }
5288 	  return 2;
5289 	}
5290 
5291       /* This will never be reached for Thumb2 because orn is a valid
5292 	 instruction. This is for Thumb1 and the ARM 32 bit cases.
5293 
5294 	 x = y | constant (such that ~constant is a valid constant)
5295 	 Transform this to
5296 	 x = ~(~y & ~constant).
5297       */
5298       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5299 	{
5300 	  if (generate)
5301 	    {
5302 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5303 	      emit_constant_insn (cond,
5304 				  gen_rtx_SET (sub,
5305 					       gen_rtx_NOT (mode, source)));
5306 	      source = sub;
5307 	      if (subtargets)
5308 		sub = gen_reg_rtx (mode);
5309 	      emit_constant_insn (cond,
5310 				  gen_rtx_SET (sub,
5311 					       gen_rtx_AND (mode, source,
5312 							    GEN_INT (temp1))));
5313 	      emit_constant_insn (cond,
5314 				  gen_rtx_SET (target,
5315 					       gen_rtx_NOT (mode, sub)));
5316 	    }
5317 	  return 3;
5318 	}
5319       break;
5320 
5321     case AND:
5322       /* See if two shifts will do 2 or more insn's worth of work.  */
5323       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5324 	{
5325 	  HOST_WIDE_INT shift_mask = ((0xffffffff
5326 				       << (32 - clear_sign_bit_copies))
5327 				      & 0xffffffff);
5328 
5329 	  if ((remainder | shift_mask) != 0xffffffff)
5330 	    {
5331 	      HOST_WIDE_INT new_val
5332 	        = ARM_SIGN_EXTEND (remainder | shift_mask);
5333 
5334 	      if (generate)
5335 		{
5336 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5337 		  insns = arm_gen_constant (AND, SImode, cond, new_val,
5338 					    new_src, source, subtargets, 1);
5339 		  source = new_src;
5340 		}
5341 	      else
5342 		{
5343 		  rtx targ = subtargets ? NULL_RTX : target;
5344 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5345 					    targ, source, subtargets, 0);
5346 		}
5347 	    }
5348 
5349 	  if (generate)
5350 	    {
5351 	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5352 	      rtx shift = GEN_INT (clear_sign_bit_copies);
5353 
5354 	      emit_insn (gen_ashlsi3 (new_src, source, shift));
5355 	      emit_insn (gen_lshrsi3 (target, new_src, shift));
5356 	    }
5357 
5358 	  return insns + 2;
5359 	}
5360 
5361       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5362 	{
5363 	  HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5364 
5365 	  if ((remainder | shift_mask) != 0xffffffff)
5366 	    {
5367 	      HOST_WIDE_INT new_val
5368 	        = ARM_SIGN_EXTEND (remainder | shift_mask);
5369 	      if (generate)
5370 		{
5371 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5372 
5373 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5374 					    new_src, source, subtargets, 1);
5375 		  source = new_src;
5376 		}
5377 	      else
5378 		{
5379 		  rtx targ = subtargets ? NULL_RTX : target;
5380 
5381 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5382 					    targ, source, subtargets, 0);
5383 		}
5384 	    }
5385 
5386 	  if (generate)
5387 	    {
5388 	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5389 	      rtx shift = GEN_INT (clear_zero_bit_copies);
5390 
5391 	      emit_insn (gen_lshrsi3 (new_src, source, shift));
5392 	      emit_insn (gen_ashlsi3 (target, new_src, shift));
5393 	    }
5394 
5395 	  return insns + 2;
5396 	}
5397 
5398       break;
5399 
5400     default:
5401       break;
5402     }
5403 
5404   /* Calculate what the instruction sequences would be if we generated it
5405      normally, negated, or inverted.  */
5406   if (code == AND)
5407     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5408     insns = 99;
5409   else
5410     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5411 
5412   if (can_negate)
5413     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5414 					    &neg_immediates);
5415   else
5416     neg_insns = 99;
5417 
5418   if (can_invert || final_invert)
5419     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5420 					    &inv_immediates);
5421   else
5422     inv_insns = 99;
5423 
5424   immediates = &pos_immediates;
5425 
5426   /* Is the negated immediate sequence more efficient?  */
5427   if (neg_insns < insns && neg_insns <= inv_insns)
5428     {
5429       insns = neg_insns;
5430       immediates = &neg_immediates;
5431     }
5432   else
5433     can_negate = 0;
5434 
5435   /* Is the inverted immediate sequence more efficient?
5436      We must allow for an extra NOT instruction for XOR operations, although
5437      there is some chance that the final 'mvn' will get optimized later.  */
5438   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5439     {
5440       insns = inv_insns;
5441       immediates = &inv_immediates;
5442     }
5443   else
5444     {
5445       can_invert = 0;
5446       final_invert = 0;
5447     }
5448 
5449   /* Now output the chosen sequence as instructions.  */
5450   if (generate)
5451     {
5452       for (i = 0; i < insns; i++)
5453 	{
5454 	  rtx new_src, temp1_rtx;
5455 
5456 	  temp1 = immediates->i[i];
5457 
5458 	  if (code == SET || code == MINUS)
5459 	    new_src = (subtargets ? gen_reg_rtx (mode) : target);
5460 	  else if ((final_invert || i < (insns - 1)) && subtargets)
5461 	    new_src = gen_reg_rtx (mode);
5462 	  else
5463 	    new_src = target;
5464 
5465 	  if (can_invert)
5466 	    temp1 = ~temp1;
5467 	  else if (can_negate)
5468 	    temp1 = -temp1;
5469 
5470 	  temp1 = trunc_int_for_mode (temp1, mode);
5471 	  temp1_rtx = GEN_INT (temp1);
5472 
5473 	  if (code == SET)
5474 	    ;
5475 	  else if (code == MINUS)
5476 	    temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5477 	  else
5478 	    temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5479 
5480 	  emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5481 	  source = new_src;
5482 
5483 	  if (code == SET)
5484 	    {
5485 	      can_negate = can_invert;
5486 	      can_invert = 0;
5487 	      code = PLUS;
5488 	    }
5489 	  else if (code == MINUS)
5490 	    code = PLUS;
5491 	}
5492     }
5493 
5494   if (final_invert)
5495     {
5496       if (generate)
5497 	emit_constant_insn (cond, gen_rtx_SET (target,
5498 					       gen_rtx_NOT (mode, source)));
5499       insns++;
5500     }
5501 
5502   return insns;
5503 }
5504 
5505 /* Return TRUE if op is a constant where both the low and top words are
5506    suitable for RSB/RSC instructions.  This is never true for Thumb, since
5507    we do not have RSC in that case.  */
5508 static bool
arm_const_double_prefer_rsbs_rsc(rtx op)5509 arm_const_double_prefer_rsbs_rsc (rtx op)
5510 {
5511   /* Thumb lacks RSC, so we never prefer that sequence.  */
5512   if (TARGET_THUMB || !CONST_INT_P (op))
5513     return false;
5514   HOST_WIDE_INT hi, lo;
5515   lo = UINTVAL (op) & 0xffffffffULL;
5516   hi = UINTVAL (op) >> 32;
5517   return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5518 }
5519 
5520 /* Canonicalize a comparison so that we are more likely to recognize it.
5521    This can be done for a few constant compares, where we can make the
5522    immediate value easier to load.  */
5523 
5524 static void
arm_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)5525 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5526 			     bool op0_preserve_value)
5527 {
5528   machine_mode mode;
5529   unsigned HOST_WIDE_INT i, maxval;
5530 
5531   mode = GET_MODE (*op0);
5532   if (mode == VOIDmode)
5533     mode = GET_MODE (*op1);
5534 
5535   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5536 
5537   /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc).  In
5538      ARM mode we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be
5539      either reversed or (for constant OP1) adjusted to GE/LT.
5540      Similarly for GTU/LEU in Thumb mode.  */
5541   if (mode == DImode)
5542     {
5543 
5544       if (*code == GT || *code == LE
5545 	  || *code == GTU || *code == LEU)
5546 	{
5547 	  /* Missing comparison.  First try to use an available
5548 	     comparison.  */
5549 	  if (CONST_INT_P (*op1))
5550 	    {
5551 	      i = INTVAL (*op1);
5552 	      switch (*code)
5553 		{
5554 		case GT:
5555 		case LE:
5556 		  if (i != maxval)
5557 		    {
5558 		      /* Try to convert to GE/LT, unless that would be more
5559 			 expensive.  */
5560 		      if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5561 			  && arm_const_double_prefer_rsbs_rsc (*op1))
5562 			return;
5563 		      *op1 = GEN_INT (i + 1);
5564 		      *code = *code == GT ? GE : LT;
5565 		    }
5566 		  else
5567 		    {
5568 		      /* GT maxval is always false, LE maxval is always true.
5569 			 We can't fold that away here as we must make a
5570 			 comparison, but we can fold them to comparisons
5571 			 with the same result that can be handled:
5572 			   op0 GT maxval -> op0 LT minval
5573 			   op0 LE maxval -> op0 GE minval
5574 			 where minval = (-maxval - 1).  */
5575 		      *op1 = GEN_INT (-maxval - 1);
5576 		      *code = *code == GT ? LT : GE;
5577 		    }
5578 		  return;
5579 
5580 		case GTU:
5581 		case LEU:
5582 		  if (i != ~((unsigned HOST_WIDE_INT) 0))
5583 		    {
5584 		      /* Try to convert to GEU/LTU, unless that would
5585 			 be more expensive.  */
5586 		      if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5587 			  && arm_const_double_prefer_rsbs_rsc (*op1))
5588 			return;
5589 		      *op1 = GEN_INT (i + 1);
5590 		      *code = *code == GTU ? GEU : LTU;
5591 		    }
5592 		  else
5593 		    {
5594 		      /* GTU ~0 is always false, LEU ~0 is always true.
5595 			 We can't fold that away here as we must make a
5596 			 comparison, but we can fold them to comparisons
5597 			 with the same result that can be handled:
5598 			   op0 GTU ~0 -> op0 LTU 0
5599 			   op0 LEU ~0 -> op0 GEU 0.  */
5600 		      *op1 = const0_rtx;
5601 		      *code = *code == GTU ? LTU : GEU;
5602 		    }
5603 		  return;
5604 
5605 		default:
5606 		  gcc_unreachable ();
5607 		}
5608 	    }
5609 
5610 	  if (!op0_preserve_value)
5611 	    {
5612 	      std::swap (*op0, *op1);
5613 	      *code = (int)swap_condition ((enum rtx_code)*code);
5614 	    }
5615 	}
5616       return;
5617     }
5618 
5619   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5620      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5621      to facilitate possible combining with a cmp into 'ands'.  */
5622   if (mode == SImode
5623       && GET_CODE (*op0) == ZERO_EXTEND
5624       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5625       && GET_MODE (XEXP (*op0, 0)) == QImode
5626       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5627       && subreg_lowpart_p (XEXP (*op0, 0))
5628       && *op1 == const0_rtx)
5629     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5630 			GEN_INT (255));
5631 
5632   /* Comparisons smaller than DImode.  Only adjust comparisons against
5633      an out-of-range constant.  */
5634   if (!CONST_INT_P (*op1)
5635       || const_ok_for_arm (INTVAL (*op1))
5636       || const_ok_for_arm (- INTVAL (*op1)))
5637     return;
5638 
5639   i = INTVAL (*op1);
5640 
5641   switch (*code)
5642     {
5643     case EQ:
5644     case NE:
5645       return;
5646 
5647     case GT:
5648     case LE:
5649       if (i != maxval
5650 	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5651 	{
5652 	  *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5653 	  *code = *code == GT ? GE : LT;
5654 	  return;
5655 	}
5656       break;
5657 
5658     case GE:
5659     case LT:
5660       if (i != ~maxval
5661 	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5662 	{
5663 	  *op1 = GEN_INT (i - 1);
5664 	  *code = *code == GE ? GT : LE;
5665 	  return;
5666 	}
5667       break;
5668 
5669     case GTU:
5670     case LEU:
5671       if (i != ~((unsigned HOST_WIDE_INT) 0)
5672 	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5673 	{
5674 	  *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5675 	  *code = *code == GTU ? GEU : LTU;
5676 	  return;
5677 	}
5678       break;
5679 
5680     case GEU:
5681     case LTU:
5682       if (i != 0
5683 	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5684 	{
5685 	  *op1 = GEN_INT (i - 1);
5686 	  *code = *code == GEU ? GTU : LEU;
5687 	  return;
5688 	}
5689       break;
5690 
5691     default:
5692       gcc_unreachable ();
5693     }
5694 }
5695 
5696 
5697 /* Define how to find the value returned by a function.  */
5698 
5699 static rtx
arm_function_value(const_tree type,const_tree func,bool outgoing ATTRIBUTE_UNUSED)5700 arm_function_value(const_tree type, const_tree func,
5701 		   bool outgoing ATTRIBUTE_UNUSED)
5702 {
5703   machine_mode mode;
5704   int unsignedp ATTRIBUTE_UNUSED;
5705   rtx r ATTRIBUTE_UNUSED;
5706 
5707   mode = TYPE_MODE (type);
5708 
5709   if (TARGET_AAPCS_BASED)
5710     return aapcs_allocate_return_reg (mode, type, func);
5711 
5712   /* Promote integer types.  */
5713   if (INTEGRAL_TYPE_P (type))
5714     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5715 
5716   /* Promotes small structs returned in a register to full-word size
5717      for big-endian AAPCS.  */
5718   if (arm_return_in_msb (type))
5719     {
5720       HOST_WIDE_INT size = int_size_in_bytes (type);
5721       if (size % UNITS_PER_WORD != 0)
5722 	{
5723 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5724 	  mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5725 	}
5726     }
5727 
5728   return arm_libcall_value_1 (mode);
5729 }
5730 
5731 /* libcall hashtable helpers.  */
5732 
5733 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5734 {
5735   static inline hashval_t hash (const rtx_def *);
5736   static inline bool equal (const rtx_def *, const rtx_def *);
5737   static inline void remove (rtx_def *);
5738 };
5739 
5740 inline bool
equal(const rtx_def * p1,const rtx_def * p2)5741 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5742 {
5743   return rtx_equal_p (p1, p2);
5744 }
5745 
5746 inline hashval_t
hash(const rtx_def * p1)5747 libcall_hasher::hash (const rtx_def *p1)
5748 {
5749   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5750 }
5751 
5752 typedef hash_table<libcall_hasher> libcall_table_type;
5753 
5754 static void
add_libcall(libcall_table_type * htab,rtx libcall)5755 add_libcall (libcall_table_type *htab, rtx libcall)
5756 {
5757   *htab->find_slot (libcall, INSERT) = libcall;
5758 }
5759 
5760 static bool
arm_libcall_uses_aapcs_base(const_rtx libcall)5761 arm_libcall_uses_aapcs_base (const_rtx libcall)
5762 {
5763   static bool init_done = false;
5764   static libcall_table_type *libcall_htab = NULL;
5765 
5766   if (!init_done)
5767     {
5768       init_done = true;
5769 
5770       libcall_htab = new libcall_table_type (31);
5771       add_libcall (libcall_htab,
5772 		   convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5773       add_libcall (libcall_htab,
5774 		   convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5775       add_libcall (libcall_htab,
5776 		   convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5777       add_libcall (libcall_htab,
5778 		   convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5779 
5780       add_libcall (libcall_htab,
5781 		   convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5782       add_libcall (libcall_htab,
5783 		   convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5784       add_libcall (libcall_htab,
5785 		   convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5786       add_libcall (libcall_htab,
5787 		   convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5788 
5789       add_libcall (libcall_htab,
5790 		   convert_optab_libfunc (sext_optab, SFmode, HFmode));
5791       add_libcall (libcall_htab,
5792 		   convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5793       add_libcall (libcall_htab,
5794 		   convert_optab_libfunc (sfix_optab, SImode, DFmode));
5795       add_libcall (libcall_htab,
5796 		   convert_optab_libfunc (ufix_optab, SImode, DFmode));
5797       add_libcall (libcall_htab,
5798 		   convert_optab_libfunc (sfix_optab, DImode, DFmode));
5799       add_libcall (libcall_htab,
5800 		   convert_optab_libfunc (ufix_optab, DImode, DFmode));
5801       add_libcall (libcall_htab,
5802 		   convert_optab_libfunc (sfix_optab, DImode, SFmode));
5803       add_libcall (libcall_htab,
5804 		   convert_optab_libfunc (ufix_optab, DImode, SFmode));
5805       add_libcall (libcall_htab,
5806 		   convert_optab_libfunc (sfix_optab, SImode, SFmode));
5807       add_libcall (libcall_htab,
5808 		   convert_optab_libfunc (ufix_optab, SImode, SFmode));
5809 
5810       /* Values from double-precision helper functions are returned in core
5811 	 registers if the selected core only supports single-precision
5812 	 arithmetic, even if we are using the hard-float ABI.  The same is
5813 	 true for single-precision helpers except in case of MVE, because in
5814 	 MVE we will be using the hard-float ABI on a CPU which doesn't support
5815 	 single-precision operations in hardware.  In MVE the following check
5816 	 enables use of emulation for the single-precision arithmetic
5817 	 operations.  */
5818       if (TARGET_HAVE_MVE)
5819 	{
5820 	  add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5821 	  add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5822 	  add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5823 	  add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5824 	  add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5825 	  add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5826 	  add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5827 	  add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5828 	  add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5829 	  add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5830 	  add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5831 	}
5832       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5833       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5834       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5835       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5836       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5837       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5838       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5839       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5840       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5841       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5842       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5843       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5844 							SFmode));
5845       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5846 							DFmode));
5847       add_libcall (libcall_htab,
5848 		   convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5849     }
5850 
5851   return libcall && libcall_htab->find (libcall) != NULL;
5852 }
5853 
5854 static rtx
arm_libcall_value_1(machine_mode mode)5855 arm_libcall_value_1 (machine_mode mode)
5856 {
5857   if (TARGET_AAPCS_BASED)
5858     return aapcs_libcall_value (mode);
5859   else if (TARGET_IWMMXT_ABI
5860 	   && arm_vector_mode_supported_p (mode))
5861     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5862   else
5863     return gen_rtx_REG (mode, ARG_REGISTER (1));
5864 }
5865 
5866 /* Define how to find the value returned by a library function
5867    assuming the value has mode MODE.  */
5868 
5869 static rtx
arm_libcall_value(machine_mode mode,const_rtx libcall)5870 arm_libcall_value (machine_mode mode, const_rtx libcall)
5871 {
5872   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5873       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5874     {
5875       /* The following libcalls return their result in integer registers,
5876 	 even though they return a floating point value.  */
5877       if (arm_libcall_uses_aapcs_base (libcall))
5878 	return gen_rtx_REG (mode, ARG_REGISTER(1));
5879 
5880     }
5881 
5882   return arm_libcall_value_1 (mode);
5883 }
5884 
5885 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5886 
5887 static bool
arm_function_value_regno_p(const unsigned int regno)5888 arm_function_value_regno_p (const unsigned int regno)
5889 {
5890   if (regno == ARG_REGISTER (1)
5891       || (TARGET_32BIT
5892 	  && TARGET_AAPCS_BASED
5893 	  && TARGET_HARD_FLOAT
5894 	  && regno == FIRST_VFP_REGNUM)
5895       || (TARGET_IWMMXT_ABI
5896 	  && regno == FIRST_IWMMXT_REGNUM))
5897     return true;
5898 
5899   return false;
5900 }
5901 
5902 /* Determine the amount of memory needed to store the possible return
5903    registers of an untyped call.  */
5904 int
arm_apply_result_size(void)5905 arm_apply_result_size (void)
5906 {
5907   int size = 16;
5908 
5909   if (TARGET_32BIT)
5910     {
5911       if (TARGET_HARD_FLOAT_ABI)
5912 	size += 32;
5913       if (TARGET_IWMMXT_ABI)
5914 	size += 8;
5915     }
5916 
5917   return size;
5918 }
5919 
5920 /* Decide whether TYPE should be returned in memory (true)
5921    or in a register (false).  FNTYPE is the type of the function making
5922    the call.  */
5923 static bool
arm_return_in_memory(const_tree type,const_tree fntype)5924 arm_return_in_memory (const_tree type, const_tree fntype)
5925 {
5926   HOST_WIDE_INT size;
5927 
5928   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5929 
5930   if (TARGET_AAPCS_BASED)
5931     {
5932       /* Simple, non-aggregate types (ie not including vectors and
5933 	 complex) are always returned in a register (or registers).
5934 	 We don't care about which register here, so we can short-cut
5935 	 some of the detail.  */
5936       if (!AGGREGATE_TYPE_P (type)
5937 	  && TREE_CODE (type) != VECTOR_TYPE
5938 	  && TREE_CODE (type) != COMPLEX_TYPE)
5939 	return false;
5940 
5941       /* Any return value that is no larger than one word can be
5942 	 returned in r0.  */
5943       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5944 	return false;
5945 
5946       /* Check any available co-processors to see if they accept the
5947 	 type as a register candidate (VFP, for example, can return
5948 	 some aggregates in consecutive registers).  These aren't
5949 	 available if the call is variadic.  */
5950       if (aapcs_select_return_coproc (type, fntype) >= 0)
5951 	return false;
5952 
5953       /* Vector values should be returned using ARM registers, not
5954 	 memory (unless they're over 16 bytes, which will break since
5955 	 we only have four call-clobbered registers to play with).  */
5956       if (TREE_CODE (type) == VECTOR_TYPE)
5957 	return (size < 0 || size > (4 * UNITS_PER_WORD));
5958 
5959       /* The rest go in memory.  */
5960       return true;
5961     }
5962 
5963   if (TREE_CODE (type) == VECTOR_TYPE)
5964     return (size < 0 || size > (4 * UNITS_PER_WORD));
5965 
5966   if (!AGGREGATE_TYPE_P (type) &&
5967       (TREE_CODE (type) != VECTOR_TYPE))
5968     /* All simple types are returned in registers.  */
5969     return false;
5970 
5971   if (arm_abi != ARM_ABI_APCS)
5972     {
5973       /* ATPCS and later return aggregate types in memory only if they are
5974 	 larger than a word (or are variable size).  */
5975       return (size < 0 || size > UNITS_PER_WORD);
5976     }
5977 
5978   /* For the arm-wince targets we choose to be compatible with Microsoft's
5979      ARM and Thumb compilers, which always return aggregates in memory.  */
5980 #ifndef ARM_WINCE
5981   /* All structures/unions bigger than one word are returned in memory.
5982      Also catch the case where int_size_in_bytes returns -1.  In this case
5983      the aggregate is either huge or of variable size, and in either case
5984      we will want to return it via memory and not in a register.  */
5985   if (size < 0 || size > UNITS_PER_WORD)
5986     return true;
5987 
5988   if (TREE_CODE (type) == RECORD_TYPE)
5989     {
5990       tree field;
5991 
5992       /* For a struct the APCS says that we only return in a register
5993 	 if the type is 'integer like' and every addressable element
5994 	 has an offset of zero.  For practical purposes this means
5995 	 that the structure can have at most one non bit-field element
5996 	 and that this element must be the first one in the structure.  */
5997 
5998       /* Find the first field, ignoring non FIELD_DECL things which will
5999 	 have been created by C++.  */
6000       /* NOTE: This code is deprecated and has not been updated to handle
6001 	 DECL_FIELD_ABI_IGNORED.  */
6002       for (field = TYPE_FIELDS (type);
6003 	   field && TREE_CODE (field) != FIELD_DECL;
6004 	   field = DECL_CHAIN (field))
6005 	continue;
6006 
6007       if (field == NULL)
6008 	return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
6009 
6010       /* Check that the first field is valid for returning in a register.  */
6011 
6012       /* ... Floats are not allowed */
6013       if (FLOAT_TYPE_P (TREE_TYPE (field)))
6014 	return true;
6015 
6016       /* ... Aggregates that are not themselves valid for returning in
6017 	 a register are not allowed.  */
6018       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6019 	return true;
6020 
6021       /* Now check the remaining fields, if any.  Only bitfields are allowed,
6022 	 since they are not addressable.  */
6023       for (field = DECL_CHAIN (field);
6024 	   field;
6025 	   field = DECL_CHAIN (field))
6026 	{
6027 	  if (TREE_CODE (field) != FIELD_DECL)
6028 	    continue;
6029 
6030 	  if (!DECL_BIT_FIELD_TYPE (field))
6031 	    return true;
6032 	}
6033 
6034       return false;
6035     }
6036 
6037   if (TREE_CODE (type) == UNION_TYPE)
6038     {
6039       tree field;
6040 
6041       /* Unions can be returned in registers if every element is
6042 	 integral, or can be returned in an integer register.  */
6043       for (field = TYPE_FIELDS (type);
6044 	   field;
6045 	   field = DECL_CHAIN (field))
6046 	{
6047 	  if (TREE_CODE (field) != FIELD_DECL)
6048 	    continue;
6049 
6050 	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
6051 	    return true;
6052 
6053 	  if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6054 	    return true;
6055 	}
6056 
6057       return false;
6058     }
6059 #endif /* not ARM_WINCE */
6060 
6061   /* Return all other types in memory.  */
6062   return true;
6063 }
6064 
6065 const struct pcs_attribute_arg
6066 {
6067   const char *arg;
6068   enum arm_pcs value;
6069 } pcs_attribute_args[] =
6070   {
6071     {"aapcs", ARM_PCS_AAPCS},
6072     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6073 #if 0
6074     /* We could recognize these, but changes would be needed elsewhere
6075      * to implement them.  */
6076     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6077     {"atpcs", ARM_PCS_ATPCS},
6078     {"apcs", ARM_PCS_APCS},
6079 #endif
6080     {NULL, ARM_PCS_UNKNOWN}
6081   };
6082 
6083 static enum arm_pcs
arm_pcs_from_attribute(tree attr)6084 arm_pcs_from_attribute (tree attr)
6085 {
6086   const struct pcs_attribute_arg *ptr;
6087   const char *arg;
6088 
6089   /* Get the value of the argument.  */
6090   if (TREE_VALUE (attr) == NULL_TREE
6091       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6092     return ARM_PCS_UNKNOWN;
6093 
6094   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6095 
6096   /* Check it against the list of known arguments.  */
6097   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6098     if (streq (arg, ptr->arg))
6099       return ptr->value;
6100 
6101   /* An unrecognized interrupt type.  */
6102   return ARM_PCS_UNKNOWN;
6103 }
6104 
6105 /* Get the PCS variant to use for this call.  TYPE is the function's type
6106    specification, DECL is the specific declartion.  DECL may be null if
6107    the call could be indirect or if this is a library call.  */
6108 static enum arm_pcs
arm_get_pcs_model(const_tree type,const_tree decl)6109 arm_get_pcs_model (const_tree type, const_tree decl)
6110 {
6111   bool user_convention = false;
6112   enum arm_pcs user_pcs = arm_pcs_default;
6113   tree attr;
6114 
6115   gcc_assert (type);
6116 
6117   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6118   if (attr)
6119     {
6120       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6121       user_convention = true;
6122     }
6123 
6124   if (TARGET_AAPCS_BASED)
6125     {
6126       /* Detect varargs functions.  These always use the base rules
6127 	 (no argument is ever a candidate for a co-processor
6128 	 register).  */
6129       bool base_rules = stdarg_p (type);
6130 
6131       if (user_convention)
6132 	{
6133 	  if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6134 	    sorry ("non-AAPCS derived PCS variant");
6135 	  else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6136 	    error ("variadic functions must use the base AAPCS variant");
6137 	}
6138 
6139       if (base_rules)
6140 	return ARM_PCS_AAPCS;
6141       else if (user_convention)
6142 	return user_pcs;
6143       else if (decl && flag_unit_at_a_time)
6144 	{
6145 	  /* Local functions never leak outside this compilation unit,
6146 	     so we are free to use whatever conventions are
6147 	     appropriate.  */
6148 	  /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
6149 	  cgraph_node *local_info_node
6150 	    = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6151 	  if (local_info_node && local_info_node->local)
6152 	    return ARM_PCS_AAPCS_LOCAL;
6153 	}
6154     }
6155   else if (user_convention && user_pcs != arm_pcs_default)
6156     sorry ("PCS variant");
6157 
6158   /* For everything else we use the target's default.  */
6159   return arm_pcs_default;
6160 }
6161 
6162 
6163 static void
aapcs_vfp_cum_init(CUMULATIVE_ARGS * pcum ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,rtx libcall ATTRIBUTE_UNUSED,const_tree fndecl ATTRIBUTE_UNUSED)6164 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6165 		    const_tree fntype ATTRIBUTE_UNUSED,
6166 		    rtx libcall ATTRIBUTE_UNUSED,
6167 		    const_tree fndecl ATTRIBUTE_UNUSED)
6168 {
6169   /* Record the unallocated VFP registers.  */
6170   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6171   pcum->aapcs_vfp_reg_alloc = 0;
6172 }
6173 
6174 /* Bitmasks that indicate whether earlier versions of GCC would have
6175    taken a different path through the ABI logic.  This should result in
6176    a -Wpsabi warning if the earlier path led to a different ABI decision.
6177 
6178    WARN_PSABI_EMPTY_CXX17_BASE
6179       Indicates that the type includes an artificial empty C++17 base field
6180       that, prior to GCC 10.1, would prevent the type from being treated as
6181       a HFA or HVA.  See PR94711 for details.
6182 
6183    WARN_PSABI_NO_UNIQUE_ADDRESS
6184       Indicates that the type includes an empty [[no_unique_address]] field
6185       that, prior to GCC 10.1, would prevent the type from being treated as
6186       a HFA or HVA.  */
6187 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6188 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6189 
6190 /* Walk down the type tree of TYPE counting consecutive base elements.
6191    If *MODEP is VOIDmode, then set it to the first valid floating point
6192    type.  If a non-floating point type is found, or if a floating point
6193    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6194    otherwise return the count in the sub-tree.
6195 
6196    The WARN_PSABI_FLAGS argument allows the caller to check whether this
6197    function has changed its behavior relative to earlier versions of GCC.
6198    Normally the argument should be nonnull and point to a zero-initialized
6199    variable.  The function then records whether the ABI decision might
6200    be affected by a known fix to the ABI logic, setting the associated
6201    WARN_PSABI_* bits if so.
6202 
6203    When the argument is instead a null pointer, the function tries to
6204    simulate the behavior of GCC before all such ABI fixes were made.
6205    This is useful to check whether the function returns something
6206    different after the ABI fixes.  */
6207 static int
aapcs_vfp_sub_candidate(const_tree type,machine_mode * modep,unsigned int * warn_psabi_flags)6208 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6209 			 unsigned int *warn_psabi_flags)
6210 {
6211   machine_mode mode;
6212   HOST_WIDE_INT size;
6213 
6214   switch (TREE_CODE (type))
6215     {
6216     case REAL_TYPE:
6217       mode = TYPE_MODE (type);
6218       if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6219 	return -1;
6220 
6221       if (*modep == VOIDmode)
6222 	*modep = mode;
6223 
6224       if (*modep == mode)
6225 	return 1;
6226 
6227       break;
6228 
6229     case COMPLEX_TYPE:
6230       mode = TYPE_MODE (TREE_TYPE (type));
6231       if (mode != DFmode && mode != SFmode)
6232 	return -1;
6233 
6234       if (*modep == VOIDmode)
6235 	*modep = mode;
6236 
6237       if (*modep == mode)
6238 	return 2;
6239 
6240       break;
6241 
6242     case VECTOR_TYPE:
6243       /* Use V2SImode and V4SImode as representatives of all 64-bit
6244 	 and 128-bit vector types, whether or not those modes are
6245 	 supported with the present options.  */
6246       size = int_size_in_bytes (type);
6247       switch (size)
6248 	{
6249 	case 8:
6250 	  mode = V2SImode;
6251 	  break;
6252 	case 16:
6253 	  mode = V4SImode;
6254 	  break;
6255 	default:
6256 	  return -1;
6257 	}
6258 
6259       if (*modep == VOIDmode)
6260 	*modep = mode;
6261 
6262       /* Vector modes are considered to be opaque: two vectors are
6263 	 equivalent for the purposes of being homogeneous aggregates
6264 	 if they are the same size.  */
6265       if (*modep == mode)
6266 	return 1;
6267 
6268       break;
6269 
6270     case ARRAY_TYPE:
6271       {
6272 	int count;
6273 	tree index = TYPE_DOMAIN (type);
6274 
6275 	/* Can't handle incomplete types nor sizes that are not
6276 	   fixed.  */
6277 	if (!COMPLETE_TYPE_P (type)
6278 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6279 	  return -1;
6280 
6281 	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6282 					 warn_psabi_flags);
6283 	if (count == -1
6284 	    || !index
6285 	    || !TYPE_MAX_VALUE (index)
6286 	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6287 	    || !TYPE_MIN_VALUE (index)
6288 	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6289 	    || count < 0)
6290 	  return -1;
6291 
6292 	count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6293 		      - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6294 
6295 	/* There must be no padding.  */
6296 	if (wi::to_wide (TYPE_SIZE (type))
6297 	    != count * GET_MODE_BITSIZE (*modep))
6298 	  return -1;
6299 
6300 	return count;
6301       }
6302 
6303     case RECORD_TYPE:
6304       {
6305 	int count = 0;
6306 	int sub_count;
6307 	tree field;
6308 
6309 	/* Can't handle incomplete types nor sizes that are not
6310 	   fixed.  */
6311 	if (!COMPLETE_TYPE_P (type)
6312 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6313 	  return -1;
6314 
6315 	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6316 	  {
6317 	    if (TREE_CODE (field) != FIELD_DECL)
6318 	      continue;
6319 
6320 	    if (DECL_FIELD_ABI_IGNORED (field))
6321 	      {
6322 		/* See whether this is something that earlier versions of
6323 		   GCC failed to ignore.  */
6324 		unsigned int flag;
6325 		if (lookup_attribute ("no_unique_address",
6326 				      DECL_ATTRIBUTES (field)))
6327 		  flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6328 		else if (cxx17_empty_base_field_p (field))
6329 		  flag = WARN_PSABI_EMPTY_CXX17_BASE;
6330 		else
6331 		  /* No compatibility problem.  */
6332 		  continue;
6333 
6334 		/* Simulate the old behavior when WARN_PSABI_FLAGS is null.  */
6335 		if (warn_psabi_flags)
6336 		  {
6337 		    *warn_psabi_flags |= flag;
6338 		    continue;
6339 		  }
6340 	      }
6341 
6342 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6343 						 warn_psabi_flags);
6344 	    if (sub_count < 0)
6345 	      return -1;
6346 	    count += sub_count;
6347 	  }
6348 
6349 	/* There must be no padding.  */
6350 	if (wi::to_wide (TYPE_SIZE (type))
6351 	    != count * GET_MODE_BITSIZE (*modep))
6352 	  return -1;
6353 
6354 	return count;
6355       }
6356 
6357     case UNION_TYPE:
6358     case QUAL_UNION_TYPE:
6359       {
6360 	/* These aren't very interesting except in a degenerate case.  */
6361 	int count = 0;
6362 	int sub_count;
6363 	tree field;
6364 
6365 	/* Can't handle incomplete types nor sizes that are not
6366 	   fixed.  */
6367 	if (!COMPLETE_TYPE_P (type)
6368 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6369 	  return -1;
6370 
6371 	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6372 	  {
6373 	    if (TREE_CODE (field) != FIELD_DECL)
6374 	      continue;
6375 
6376 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6377 						 warn_psabi_flags);
6378 	    if (sub_count < 0)
6379 	      return -1;
6380 	    count = count > sub_count ? count : sub_count;
6381 	  }
6382 
6383 	/* There must be no padding.  */
6384 	if (wi::to_wide (TYPE_SIZE (type))
6385 	    != count * GET_MODE_BITSIZE (*modep))
6386 	  return -1;
6387 
6388 	return count;
6389       }
6390 
6391     default:
6392       break;
6393     }
6394 
6395   return -1;
6396 }
6397 
6398 /* Return true if PCS_VARIANT should use VFP registers.  */
6399 static bool
use_vfp_abi(enum arm_pcs pcs_variant,bool is_double)6400 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6401 {
6402   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6403     {
6404       static bool seen_thumb1_vfp = false;
6405 
6406       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6407 	{
6408 	  sorry ("Thumb-1 hard-float VFP ABI");
6409 	  /* sorry() is not immediately fatal, so only display this once.  */
6410 	  seen_thumb1_vfp = true;
6411 	}
6412 
6413       return true;
6414     }
6415 
6416   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6417     return false;
6418 
6419   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6420 	 (TARGET_VFP_DOUBLE || !is_double));
6421 }
6422 
6423 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6424    suitable for passing or returning in VFP registers for the PCS
6425    variant selected.  If it is, then *BASE_MODE is updated to contain
6426    a machine mode describing each element of the argument's type and
6427    *COUNT to hold the number of such elements.  */
6428 static bool
aapcs_vfp_is_call_or_return_candidate(enum arm_pcs pcs_variant,machine_mode mode,const_tree type,machine_mode * base_mode,int * count)6429 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6430 				       machine_mode mode, const_tree type,
6431 				       machine_mode *base_mode, int *count)
6432 {
6433   machine_mode new_mode = VOIDmode;
6434 
6435   /* If we have the type information, prefer that to working things
6436      out from the mode.  */
6437   if (type)
6438     {
6439       unsigned int warn_psabi_flags = 0;
6440       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6441 					      &warn_psabi_flags);
6442       if (ag_count > 0 && ag_count <= 4)
6443 	{
6444 	  static unsigned last_reported_type_uid;
6445 	  unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6446 	  int alt;
6447 	  if (warn_psabi
6448 	      && warn_psabi_flags
6449 	      && uid != last_reported_type_uid
6450 	      && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6451 		  != ag_count))
6452 	    {
6453 	      const char *url
6454 		= CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6455 	      gcc_assert (alt == -1);
6456 	      last_reported_type_uid = uid;
6457 	      /* Use TYPE_MAIN_VARIANT to strip any redundant const
6458 		 qualification.  */
6459 	      if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6460 		inform (input_location, "parameter passing for argument of "
6461 			"type %qT with %<[[no_unique_address]]%> members "
6462 			"changed %{in GCC 10.1%}",
6463 			TYPE_MAIN_VARIANT (type), url);
6464 	      else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6465 		inform (input_location, "parameter passing for argument of "
6466 			"type %qT when C++17 is enabled changed to match "
6467 			"C++14 %{in GCC 10.1%}",
6468 			TYPE_MAIN_VARIANT (type), url);
6469 	    }
6470 	  *count = ag_count;
6471 	}
6472       else
6473 	return false;
6474     }
6475   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6476 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6477 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6478     {
6479       *count = 1;
6480       new_mode = mode;
6481     }
6482   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6483     {
6484       *count = 2;
6485       new_mode = (mode == DCmode ? DFmode : SFmode);
6486     }
6487   else
6488     return false;
6489 
6490 
6491   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6492     return false;
6493 
6494   *base_mode = new_mode;
6495 
6496   if (TARGET_GENERAL_REGS_ONLY)
6497     error ("argument of type %qT not permitted with -mgeneral-regs-only",
6498 	   type);
6499 
6500   return true;
6501 }
6502 
6503 static bool
aapcs_vfp_is_return_candidate(enum arm_pcs pcs_variant,machine_mode mode,const_tree type)6504 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6505 			       machine_mode mode, const_tree type)
6506 {
6507   int count ATTRIBUTE_UNUSED;
6508   machine_mode ag_mode ATTRIBUTE_UNUSED;
6509 
6510   if (!use_vfp_abi (pcs_variant, false))
6511     return false;
6512   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6513 						&ag_mode, &count);
6514 }
6515 
6516 static bool
aapcs_vfp_is_call_candidate(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type)6517 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6518 			     const_tree type)
6519 {
6520   if (!use_vfp_abi (pcum->pcs_variant, false))
6521     return false;
6522 
6523   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6524 						&pcum->aapcs_vfp_rmode,
6525 						&pcum->aapcs_vfp_rcount);
6526 }
6527 
6528 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6529    for the behaviour of this function.  */
6530 
6531 static bool
aapcs_vfp_allocate(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type ATTRIBUTE_UNUSED)6532 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6533 		    const_tree type  ATTRIBUTE_UNUSED)
6534 {
6535   int rmode_size
6536     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6537   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6538   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6539   int regno;
6540 
6541   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6542     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6543       {
6544 	pcum->aapcs_vfp_reg_alloc = mask << regno;
6545 	if (mode == BLKmode
6546 	    || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6547 	    || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6548 	  {
6549 	    int i;
6550 	    int rcount = pcum->aapcs_vfp_rcount;
6551 	    int rshift = shift;
6552 	    machine_mode rmode = pcum->aapcs_vfp_rmode;
6553 	    rtx par;
6554 	    if (!(TARGET_NEON || TARGET_HAVE_MVE))
6555 	      {
6556 		/* Avoid using unsupported vector modes.  */
6557 		if (rmode == V2SImode)
6558 		  rmode = DImode;
6559 		else if (rmode == V4SImode)
6560 		  {
6561 		    rmode = DImode;
6562 		    rcount *= 2;
6563 		    rshift /= 2;
6564 		  }
6565 	      }
6566 	    par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6567 	    for (i = 0; i < rcount; i++)
6568 	      {
6569 		rtx tmp = gen_rtx_REG (rmode,
6570 				       FIRST_VFP_REGNUM + regno + i * rshift);
6571 		tmp = gen_rtx_EXPR_LIST
6572 		  (VOIDmode, tmp,
6573 		   GEN_INT (i * GET_MODE_SIZE (rmode)));
6574 		XVECEXP (par, 0, i) = tmp;
6575 	      }
6576 
6577 	    pcum->aapcs_reg = par;
6578 	  }
6579 	else
6580 	  pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6581 	return true;
6582       }
6583   return false;
6584 }
6585 
6586 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6587    comment there for the behaviour of this function.  */
6588 
6589 static rtx
aapcs_vfp_allocate_return_reg(enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,machine_mode mode,const_tree type ATTRIBUTE_UNUSED)6590 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6591 			       machine_mode mode,
6592 			       const_tree type ATTRIBUTE_UNUSED)
6593 {
6594   if (!use_vfp_abi (pcs_variant, false))
6595     return NULL;
6596 
6597   if (mode == BLKmode
6598       || (GET_MODE_CLASS (mode) == MODE_INT
6599 	  && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6600 	  && !(TARGET_NEON || TARGET_HAVE_MVE)))
6601     {
6602       int count;
6603       machine_mode ag_mode;
6604       int i;
6605       rtx par;
6606       int shift;
6607 
6608       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6609 					     &ag_mode, &count);
6610 
6611       if (!(TARGET_NEON || TARGET_HAVE_MVE))
6612 	{
6613 	  if (ag_mode == V2SImode)
6614 	    ag_mode = DImode;
6615 	  else if (ag_mode == V4SImode)
6616 	    {
6617 	      ag_mode = DImode;
6618 	      count *= 2;
6619 	    }
6620 	}
6621       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6622       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6623       for (i = 0; i < count; i++)
6624 	{
6625 	  rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6626 	  tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6627 				   GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6628 	  XVECEXP (par, 0, i) = tmp;
6629 	}
6630 
6631       return par;
6632     }
6633 
6634   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6635 }
6636 
6637 static void
aapcs_vfp_advance(CUMULATIVE_ARGS * pcum ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED)6638 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6639 		   machine_mode mode  ATTRIBUTE_UNUSED,
6640 		   const_tree type  ATTRIBUTE_UNUSED)
6641 {
6642   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6643   pcum->aapcs_vfp_reg_alloc = 0;
6644   return;
6645 }
6646 
6647 #define AAPCS_CP(X)				\
6648   {						\
6649     aapcs_ ## X ## _cum_init,			\
6650     aapcs_ ## X ## _is_call_candidate,		\
6651     aapcs_ ## X ## _allocate,			\
6652     aapcs_ ## X ## _is_return_candidate,	\
6653     aapcs_ ## X ## _allocate_return_reg,	\
6654     aapcs_ ## X ## _advance			\
6655   }
6656 
6657 /* Table of co-processors that can be used to pass arguments in
6658    registers.  Idealy no arugment should be a candidate for more than
6659    one co-processor table entry, but the table is processed in order
6660    and stops after the first match.  If that entry then fails to put
6661    the argument into a co-processor register, the argument will go on
6662    the stack.  */
6663 static struct
6664 {
6665   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6666   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6667 
6668   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6669      BLKmode) is a candidate for this co-processor's registers; this
6670      function should ignore any position-dependent state in
6671      CUMULATIVE_ARGS and only use call-type dependent information.  */
6672   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6673 
6674   /* Return true if the argument does get a co-processor register; it
6675      should set aapcs_reg to an RTX of the register allocated as is
6676      required for a return from FUNCTION_ARG.  */
6677   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6678 
6679   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6680      be returned in this co-processor's registers.  */
6681   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6682 
6683   /* Allocate and return an RTX element to hold the return type of a call.  This
6684      routine must not fail and will only be called if is_return_candidate
6685      returned true with the same parameters.  */
6686   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6687 
6688   /* Finish processing this argument and prepare to start processing
6689      the next one.  */
6690   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6691 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6692   {
6693     AAPCS_CP(vfp)
6694   };
6695 
6696 #undef AAPCS_CP
6697 
6698 static int
aapcs_select_call_coproc(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type)6699 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6700 			  const_tree type)
6701 {
6702   int i;
6703 
6704   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6705     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6706       return i;
6707 
6708   return -1;
6709 }
6710 
6711 static int
aapcs_select_return_coproc(const_tree type,const_tree fntype)6712 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6713 {
6714   /* We aren't passed a decl, so we can't check that a call is local.
6715      However, it isn't clear that that would be a win anyway, since it
6716      might limit some tail-calling opportunities.  */
6717   enum arm_pcs pcs_variant;
6718 
6719   if (fntype)
6720     {
6721       const_tree fndecl = NULL_TREE;
6722 
6723       if (TREE_CODE (fntype) == FUNCTION_DECL)
6724 	{
6725 	  fndecl = fntype;
6726 	  fntype = TREE_TYPE (fntype);
6727 	}
6728 
6729       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6730     }
6731   else
6732     pcs_variant = arm_pcs_default;
6733 
6734   if (pcs_variant != ARM_PCS_AAPCS)
6735     {
6736       int i;
6737 
6738       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6739 	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6740 							TYPE_MODE (type),
6741 							type))
6742 	  return i;
6743     }
6744   return -1;
6745 }
6746 
6747 static rtx
aapcs_allocate_return_reg(machine_mode mode,const_tree type,const_tree fntype)6748 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6749 			   const_tree fntype)
6750 {
6751   /* We aren't passed a decl, so we can't check that a call is local.
6752      However, it isn't clear that that would be a win anyway, since it
6753      might limit some tail-calling opportunities.  */
6754   enum arm_pcs pcs_variant;
6755   int unsignedp ATTRIBUTE_UNUSED;
6756 
6757   if (fntype)
6758     {
6759       const_tree fndecl = NULL_TREE;
6760 
6761       if (TREE_CODE (fntype) == FUNCTION_DECL)
6762 	{
6763 	  fndecl = fntype;
6764 	  fntype = TREE_TYPE (fntype);
6765 	}
6766 
6767       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6768     }
6769   else
6770     pcs_variant = arm_pcs_default;
6771 
6772   /* Promote integer types.  */
6773   if (type && INTEGRAL_TYPE_P (type))
6774     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6775 
6776   if (pcs_variant != ARM_PCS_AAPCS)
6777     {
6778       int i;
6779 
6780       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6781 	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6782 							type))
6783 	  return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6784 							     mode, type);
6785     }
6786 
6787   /* Promotes small structs returned in a register to full-word size
6788      for big-endian AAPCS.  */
6789   if (type && arm_return_in_msb (type))
6790     {
6791       HOST_WIDE_INT size = int_size_in_bytes (type);
6792       if (size % UNITS_PER_WORD != 0)
6793 	{
6794 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6795 	  mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6796 	}
6797     }
6798 
6799   return gen_rtx_REG (mode, R0_REGNUM);
6800 }
6801 
6802 static rtx
aapcs_libcall_value(machine_mode mode)6803 aapcs_libcall_value (machine_mode mode)
6804 {
6805   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6806       && GET_MODE_SIZE (mode) <= 4)
6807     mode = SImode;
6808 
6809   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6810 }
6811 
6812 /* Lay out a function argument using the AAPCS rules.  The rule
6813    numbers referred to here are those in the AAPCS.  */
6814 static void
aapcs_layout_arg(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type,bool named)6815 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6816 		  const_tree type, bool named)
6817 {
6818   int nregs, nregs2;
6819   int ncrn;
6820 
6821   /* We only need to do this once per argument.  */
6822   if (pcum->aapcs_arg_processed)
6823     return;
6824 
6825   pcum->aapcs_arg_processed = true;
6826 
6827   /* Special case: if named is false then we are handling an incoming
6828      anonymous argument which is on the stack.  */
6829   if (!named)
6830     return;
6831 
6832   /* Is this a potential co-processor register candidate?  */
6833   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6834     {
6835       int slot = aapcs_select_call_coproc (pcum, mode, type);
6836       pcum->aapcs_cprc_slot = slot;
6837 
6838       /* We don't have to apply any of the rules from part B of the
6839 	 preparation phase, these are handled elsewhere in the
6840 	 compiler.  */
6841 
6842       if (slot >= 0)
6843 	{
6844 	  /* A Co-processor register candidate goes either in its own
6845 	     class of registers or on the stack.  */
6846 	  if (!pcum->aapcs_cprc_failed[slot])
6847 	    {
6848 	      /* C1.cp - Try to allocate the argument to co-processor
6849 		 registers.  */
6850 	      if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6851 		return;
6852 
6853 	      /* C2.cp - Put the argument on the stack and note that we
6854 		 can't assign any more candidates in this slot.  We also
6855 		 need to note that we have allocated stack space, so that
6856 		 we won't later try to split a non-cprc candidate between
6857 		 core registers and the stack.  */
6858 	      pcum->aapcs_cprc_failed[slot] = true;
6859 	      pcum->can_split = false;
6860 	    }
6861 
6862 	  /* We didn't get a register, so this argument goes on the
6863 	     stack.  */
6864 	  gcc_assert (pcum->can_split == false);
6865 	  return;
6866 	}
6867     }
6868 
6869   /* C3 - For double-word aligned arguments, round the NCRN up to the
6870      next even number.  */
6871   ncrn = pcum->aapcs_ncrn;
6872   if (ncrn & 1)
6873     {
6874       int res = arm_needs_doubleword_align (mode, type);
6875       /* Only warn during RTL expansion of call stmts, otherwise we would
6876 	 warn e.g. during gimplification even on functions that will be
6877 	 always inlined, and we'd warn multiple times.  Don't warn when
6878 	 called in expand_function_start either, as we warn instead in
6879 	 arm_function_arg_boundary in that case.  */
6880       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6881 	inform (input_location, "parameter passing for argument of type "
6882 		"%qT changed in GCC 7.1", type);
6883       else if (res > 0)
6884 	ncrn++;
6885     }
6886 
6887   nregs = ARM_NUM_REGS2(mode, type);
6888 
6889   /* Sigh, this test should really assert that nregs > 0, but a GCC
6890      extension allows empty structs and then gives them empty size; it
6891      then allows such a structure to be passed by value.  For some of
6892      the code below we have to pretend that such an argument has
6893      non-zero size so that we 'locate' it correctly either in
6894      registers or on the stack.  */
6895   gcc_assert (nregs >= 0);
6896 
6897   nregs2 = nregs ? nregs : 1;
6898 
6899   /* C4 - Argument fits entirely in core registers.  */
6900   if (ncrn + nregs2 <= NUM_ARG_REGS)
6901     {
6902       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6903       pcum->aapcs_next_ncrn = ncrn + nregs;
6904       return;
6905     }
6906 
6907   /* C5 - Some core registers left and there are no arguments already
6908      on the stack: split this argument between the remaining core
6909      registers and the stack.  */
6910   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6911     {
6912       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6913       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6914       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6915       return;
6916     }
6917 
6918   /* C6 - NCRN is set to 4.  */
6919   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6920 
6921   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6922   return;
6923 }
6924 
6925 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6926    for a call to a function whose data type is FNTYPE.
6927    For a library call, FNTYPE is NULL.  */
6928 void
arm_init_cumulative_args(CUMULATIVE_ARGS * pcum,tree fntype,rtx libname,tree fndecl ATTRIBUTE_UNUSED)6929 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6930 			  rtx libname,
6931 			  tree fndecl ATTRIBUTE_UNUSED)
6932 {
6933   /* Long call handling.  */
6934   if (fntype)
6935     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6936   else
6937     pcum->pcs_variant = arm_pcs_default;
6938 
6939   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6940     {
6941       if (arm_libcall_uses_aapcs_base (libname))
6942 	pcum->pcs_variant = ARM_PCS_AAPCS;
6943 
6944       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6945       pcum->aapcs_reg = NULL_RTX;
6946       pcum->aapcs_partial = 0;
6947       pcum->aapcs_arg_processed = false;
6948       pcum->aapcs_cprc_slot = -1;
6949       pcum->can_split = true;
6950 
6951       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6952 	{
6953 	  int i;
6954 
6955 	  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6956 	    {
6957 	      pcum->aapcs_cprc_failed[i] = false;
6958 	      aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6959 	    }
6960 	}
6961       return;
6962     }
6963 
6964   /* Legacy ABIs */
6965 
6966   /* On the ARM, the offset starts at 0.  */
6967   pcum->nregs = 0;
6968   pcum->iwmmxt_nregs = 0;
6969   pcum->can_split = true;
6970 
6971   /* Varargs vectors are treated the same as long long.
6972      named_count avoids having to change the way arm handles 'named' */
6973   pcum->named_count = 0;
6974   pcum->nargs = 0;
6975 
6976   if (TARGET_REALLY_IWMMXT && fntype)
6977     {
6978       tree fn_arg;
6979 
6980       for (fn_arg = TYPE_ARG_TYPES (fntype);
6981 	   fn_arg;
6982 	   fn_arg = TREE_CHAIN (fn_arg))
6983 	pcum->named_count += 1;
6984 
6985       if (! pcum->named_count)
6986 	pcum->named_count = INT_MAX;
6987     }
6988 }
6989 
6990 /* Return 2 if double word alignment is required for argument passing,
6991    but wasn't required before the fix for PR88469.
6992    Return 1 if double word alignment is required for argument passing.
6993    Return -1 if double word alignment used to be required for argument
6994    passing before PR77728 ABI fix, but is not required anymore.
6995    Return 0 if double word alignment is not required and wasn't requried
6996    before either.  */
6997 static int
arm_needs_doubleword_align(machine_mode mode,const_tree type)6998 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6999 {
7000   if (!type)
7001     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7002 
7003   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
7004   if (!AGGREGATE_TYPE_P (type))
7005     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7006 
7007   /* Array types: Use member alignment of element type.  */
7008   if (TREE_CODE (type) == ARRAY_TYPE)
7009     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7010 
7011   int ret = 0;
7012   int ret2 = 0;
7013   /* Record/aggregate types: Use greatest member alignment of any member.
7014 
7015      Note that we explicitly consider zero-sized fields here, even though
7016      they don't map to AAPCS machine types.  For example, in:
7017 
7018 	 struct __attribute__((aligned(8))) empty {};
7019 
7020 	 struct s {
7021 	   [[no_unique_address]] empty e;
7022 	   int x;
7023 	 };
7024 
7025      "s" contains only one Fundamental Data Type (the int field)
7026      but gains 8-byte alignment and size thanks to "e".  */
7027   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7028     if (DECL_ALIGN (field) > PARM_BOUNDARY)
7029       {
7030 	if (TREE_CODE (field) == FIELD_DECL)
7031 	  return 1;
7032 	else
7033 	  /* Before PR77728 fix, we were incorrectly considering also
7034 	     other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7035 	     Make sure we can warn about that with -Wpsabi.  */
7036 	  ret = -1;
7037       }
7038     else if (TREE_CODE (field) == FIELD_DECL
7039 	     && DECL_BIT_FIELD_TYPE (field)
7040 	     && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7041       ret2 = 1;
7042 
7043   if (ret2)
7044     return 2;
7045 
7046   return ret;
7047 }
7048 
7049 
7050 /* Determine where to put an argument to a function.
7051    Value is zero to push the argument on the stack,
7052    or a hard register in which to store the argument.
7053 
7054    CUM is a variable of type CUMULATIVE_ARGS which gives info about
7055     the preceding args and about the function being called.
7056    ARG is a description of the argument.
7057 
7058    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7059    other arguments are passed on the stack.  If (NAMED == 0) (which happens
7060    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7061    defined), say it is passed in the stack (function_prologue will
7062    indeed make it pass in the stack if necessary).  */
7063 
7064 static rtx
arm_function_arg(cumulative_args_t pcum_v,const function_arg_info & arg)7065 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7066 {
7067   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7068   int nregs;
7069 
7070   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
7071      a call insn (op3 of a call_value insn).  */
7072   if (arg.end_marker_p ())
7073     return const0_rtx;
7074 
7075   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7076     {
7077       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7078       return pcum->aapcs_reg;
7079     }
7080 
7081   /* Varargs vectors are treated the same as long long.
7082      named_count avoids having to change the way arm handles 'named' */
7083   if (TARGET_IWMMXT_ABI
7084       && arm_vector_mode_supported_p (arg.mode)
7085       && pcum->named_count > pcum->nargs + 1)
7086     {
7087       if (pcum->iwmmxt_nregs <= 9)
7088 	return gen_rtx_REG (arg.mode,
7089 			    pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7090       else
7091 	{
7092 	  pcum->can_split = false;
7093 	  return NULL_RTX;
7094 	}
7095     }
7096 
7097   /* Put doubleword aligned quantities in even register pairs.  */
7098   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7099     {
7100       int res = arm_needs_doubleword_align (arg.mode, arg.type);
7101       if (res < 0 && warn_psabi)
7102 	inform (input_location, "parameter passing for argument of type "
7103 		"%qT changed in GCC 7.1", arg.type);
7104       else if (res > 0)
7105 	{
7106 	  pcum->nregs++;
7107 	  if (res > 1 && warn_psabi)
7108 	    inform (input_location, "parameter passing for argument of type "
7109 		    "%qT changed in GCC 9.1", arg.type);
7110 	}
7111     }
7112 
7113   /* Only allow splitting an arg between regs and memory if all preceding
7114      args were allocated to regs.  For args passed by reference we only count
7115      the reference pointer.  */
7116   if (pcum->can_split)
7117     nregs = 1;
7118   else
7119     nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7120 
7121   if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7122     return NULL_RTX;
7123 
7124   return gen_rtx_REG (arg.mode, pcum->nregs);
7125 }
7126 
7127 static unsigned int
arm_function_arg_boundary(machine_mode mode,const_tree type)7128 arm_function_arg_boundary (machine_mode mode, const_tree type)
7129 {
7130   if (!ARM_DOUBLEWORD_ALIGN)
7131     return PARM_BOUNDARY;
7132 
7133   int res = arm_needs_doubleword_align (mode, type);
7134   if (res < 0 && warn_psabi)
7135     inform (input_location, "parameter passing for argument of type %qT "
7136 	    "changed in GCC 7.1", type);
7137   if (res > 1 && warn_psabi)
7138     inform (input_location, "parameter passing for argument of type "
7139 	    "%qT changed in GCC 9.1", type);
7140 
7141   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7142 }
7143 
7144 static int
arm_arg_partial_bytes(cumulative_args_t pcum_v,const function_arg_info & arg)7145 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7146 {
7147   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7148   int nregs = pcum->nregs;
7149 
7150   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7151     {
7152       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7153       return pcum->aapcs_partial;
7154     }
7155 
7156   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7157     return 0;
7158 
7159   if (NUM_ARG_REGS > nregs
7160       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7161       && pcum->can_split)
7162     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7163 
7164   return 0;
7165 }
7166 
7167 /* Update the data in PCUM to advance over argument ARG.  */
7168 
7169 static void
arm_function_arg_advance(cumulative_args_t pcum_v,const function_arg_info & arg)7170 arm_function_arg_advance (cumulative_args_t pcum_v,
7171 			  const function_arg_info &arg)
7172 {
7173   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7174 
7175   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7176     {
7177       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7178 
7179       if (pcum->aapcs_cprc_slot >= 0)
7180 	{
7181 	  aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7182 							      arg.type);
7183 	  pcum->aapcs_cprc_slot = -1;
7184 	}
7185 
7186       /* Generic stuff.  */
7187       pcum->aapcs_arg_processed = false;
7188       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7189       pcum->aapcs_reg = NULL_RTX;
7190       pcum->aapcs_partial = 0;
7191     }
7192   else
7193     {
7194       pcum->nargs += 1;
7195       if (arm_vector_mode_supported_p (arg.mode)
7196 	  && pcum->named_count > pcum->nargs
7197 	  && TARGET_IWMMXT_ABI)
7198 	pcum->iwmmxt_nregs += 1;
7199       else
7200 	pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7201     }
7202 }
7203 
7204 /* Variable sized types are passed by reference.  This is a GCC
7205    extension to the ARM ABI.  */
7206 
7207 static bool
arm_pass_by_reference(cumulative_args_t,const function_arg_info & arg)7208 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7209 {
7210   return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7211 }
7212 
7213 /* Encode the current state of the #pragma [no_]long_calls.  */
7214 typedef enum
7215 {
7216   OFF,		/* No #pragma [no_]long_calls is in effect.  */
7217   LONG,		/* #pragma long_calls is in effect.  */
7218   SHORT		/* #pragma no_long_calls is in effect.  */
7219 } arm_pragma_enum;
7220 
7221 static arm_pragma_enum arm_pragma_long_calls = OFF;
7222 
7223 void
arm_pr_long_calls(struct cpp_reader * pfile ATTRIBUTE_UNUSED)7224 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7225 {
7226   arm_pragma_long_calls = LONG;
7227 }
7228 
7229 void
arm_pr_no_long_calls(struct cpp_reader * pfile ATTRIBUTE_UNUSED)7230 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7231 {
7232   arm_pragma_long_calls = SHORT;
7233 }
7234 
7235 void
arm_pr_long_calls_off(struct cpp_reader * pfile ATTRIBUTE_UNUSED)7236 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7237 {
7238   arm_pragma_long_calls = OFF;
7239 }
7240 
7241 /* Handle an attribute requiring a FUNCTION_DECL;
7242    arguments as in struct attribute_spec.handler.  */
7243 static tree
arm_handle_fndecl_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7244 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7245 			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7246 {
7247   if (TREE_CODE (*node) != FUNCTION_DECL)
7248     {
7249       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7250 	       name);
7251       *no_add_attrs = true;
7252     }
7253 
7254   return NULL_TREE;
7255 }
7256 
7257 /* Handle an "interrupt" or "isr" attribute;
7258    arguments as in struct attribute_spec.handler.  */
7259 static tree
arm_handle_isr_attribute(tree * node,tree name,tree args,int flags,bool * no_add_attrs)7260 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7261 			  bool *no_add_attrs)
7262 {
7263   if (DECL_P (*node))
7264     {
7265       if (TREE_CODE (*node) != FUNCTION_DECL)
7266 	{
7267 	  warning (OPT_Wattributes, "%qE attribute only applies to functions",
7268 		   name);
7269 	  *no_add_attrs = true;
7270 	}
7271       else if (TARGET_VFP_BASE)
7272 	{
7273 	  warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7274 		   name);
7275 	}
7276       /* FIXME: the argument if any is checked for type attributes;
7277 	 should it be checked for decl ones?  */
7278     }
7279   else
7280     {
7281       if (TREE_CODE (*node) == FUNCTION_TYPE
7282 	  || TREE_CODE (*node) == METHOD_TYPE)
7283 	{
7284 	  if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7285 	    {
7286 	      warning (OPT_Wattributes, "%qE attribute ignored",
7287 		       name);
7288 	      *no_add_attrs = true;
7289 	    }
7290 	}
7291       else if (TREE_CODE (*node) == POINTER_TYPE
7292 	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7293 		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7294 	       && arm_isr_value (args) != ARM_FT_UNKNOWN)
7295 	{
7296 	  *node = build_variant_type_copy (*node);
7297 	  TREE_TYPE (*node) = build_type_attribute_variant
7298 	    (TREE_TYPE (*node),
7299 	     tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7300 	  *no_add_attrs = true;
7301 	}
7302       else
7303 	{
7304 	  /* Possibly pass this attribute on from the type to a decl.  */
7305 	  if (flags & ((int) ATTR_FLAG_DECL_NEXT
7306 		       | (int) ATTR_FLAG_FUNCTION_NEXT
7307 		       | (int) ATTR_FLAG_ARRAY_NEXT))
7308 	    {
7309 	      *no_add_attrs = true;
7310 	      return tree_cons (name, args, NULL_TREE);
7311 	    }
7312 	  else
7313 	    {
7314 	      warning (OPT_Wattributes, "%qE attribute ignored",
7315 		       name);
7316 	    }
7317 	}
7318     }
7319 
7320   return NULL_TREE;
7321 }
7322 
7323 /* Handle a "pcs" attribute; arguments as in struct
7324    attribute_spec.handler.  */
7325 static tree
arm_handle_pcs_attribute(tree * node ATTRIBUTE_UNUSED,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7326 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7327 			  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7328 {
7329   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7330     {
7331       warning (OPT_Wattributes, "%qE attribute ignored", name);
7332       *no_add_attrs = true;
7333     }
7334   return NULL_TREE;
7335 }
7336 
7337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7338 /* Handle the "notshared" attribute.  This attribute is another way of
7339    requesting hidden visibility.  ARM's compiler supports
7340    "__declspec(notshared)"; we support the same thing via an
7341    attribute.  */
7342 
7343 static tree
arm_handle_notshared_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7344 arm_handle_notshared_attribute (tree *node,
7345 				tree name ATTRIBUTE_UNUSED,
7346 				tree args ATTRIBUTE_UNUSED,
7347 				int flags ATTRIBUTE_UNUSED,
7348 				bool *no_add_attrs)
7349 {
7350   tree decl = TYPE_NAME (*node);
7351 
7352   if (decl)
7353     {
7354       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7355       DECL_VISIBILITY_SPECIFIED (decl) = 1;
7356       *no_add_attrs = false;
7357     }
7358   return NULL_TREE;
7359 }
7360 #endif
7361 
7362 /* This function returns true if a function with declaration FNDECL and type
7363    FNTYPE uses the stack to pass arguments or return variables and false
7364    otherwise.  This is used for functions with the attributes
7365    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7366    diagnostic messages if the stack is used.  NAME is the name of the attribute
7367    used.  */
7368 
7369 static bool
cmse_func_args_or_return_in_stack(tree fndecl,tree name,tree fntype)7370 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7371 {
7372   function_args_iterator args_iter;
7373   CUMULATIVE_ARGS args_so_far_v;
7374   cumulative_args_t args_so_far;
7375   bool first_param = true;
7376   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7377 
7378   /* Error out if any argument is passed on the stack.  */
7379   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7380   args_so_far = pack_cumulative_args (&args_so_far_v);
7381   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7382     {
7383       rtx arg_rtx;
7384 
7385       prev_arg_type = arg_type;
7386       if (VOID_TYPE_P (arg_type))
7387 	continue;
7388 
7389       function_arg_info arg (arg_type, /*named=*/true);
7390       if (!first_param)
7391 	/* ??? We should advance after processing the argument and pass
7392 	   the argument we're advancing past.  */
7393 	arm_function_arg_advance (args_so_far, arg);
7394       arg_rtx = arm_function_arg (args_so_far, arg);
7395       if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7396 	{
7397 	  error ("%qE attribute not available to functions with arguments "
7398 		 "passed on the stack", name);
7399 	  return true;
7400 	}
7401       first_param = false;
7402     }
7403 
7404   /* Error out for variadic functions since we cannot control how many
7405      arguments will be passed and thus stack could be used.  stdarg_p () is not
7406      used for the checking to avoid browsing arguments twice.  */
7407   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7408     {
7409       error ("%qE attribute not available to functions with variable number "
7410 	     "of arguments", name);
7411       return true;
7412     }
7413 
7414   /* Error out if return value is passed on the stack.  */
7415   ret_type = TREE_TYPE (fntype);
7416   if (arm_return_in_memory (ret_type, fntype))
7417     {
7418       error ("%qE attribute not available to functions that return value on "
7419 	     "the stack", name);
7420       return true;
7421     }
7422   return false;
7423 }
7424 
7425 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7426    function will check whether the attribute is allowed here and will add the
7427    attribute to the function declaration tree or otherwise issue a warning.  */
7428 
7429 static tree
arm_handle_cmse_nonsecure_entry(tree * node,tree name,tree,int,bool * no_add_attrs)7430 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7431 				 tree /* args */,
7432 				 int /* flags */,
7433 				 bool *no_add_attrs)
7434 {
7435   tree fndecl;
7436 
7437   if (!use_cmse)
7438     {
7439       *no_add_attrs = true;
7440       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7441 	       "option.", name);
7442       return NULL_TREE;
7443     }
7444 
7445   /* Ignore attribute for function types.  */
7446   if (TREE_CODE (*node) != FUNCTION_DECL)
7447     {
7448       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7449 	       name);
7450       *no_add_attrs = true;
7451       return NULL_TREE;
7452     }
7453 
7454   fndecl = *node;
7455 
7456   /* Warn for static linkage functions.  */
7457   if (!TREE_PUBLIC (fndecl))
7458     {
7459       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7460 	       "with static linkage", name);
7461       *no_add_attrs = true;
7462       return NULL_TREE;
7463     }
7464 
7465   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7466 						TREE_TYPE (fndecl));
7467   return NULL_TREE;
7468 }
7469 
7470 
7471 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7472    function will check whether the attribute is allowed here and will add the
7473    attribute to the function type tree or otherwise issue a diagnostic.  The
7474    reason we check this at declaration time is to only allow the use of the
7475    attribute with declarations of function pointers and not function
7476    declarations.  This function checks NODE is of the expected type and issues
7477    diagnostics otherwise using NAME.  If it is not of the expected type
7478    *NO_ADD_ATTRS will be set to true.  */
7479 
7480 static tree
arm_handle_cmse_nonsecure_call(tree * node,tree name,tree,int,bool * no_add_attrs)7481 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7482 				 tree /* args */,
7483 				 int /* flags */,
7484 				 bool *no_add_attrs)
7485 {
7486   tree decl = NULL_TREE, fntype = NULL_TREE;
7487   tree type;
7488 
7489   if (!use_cmse)
7490     {
7491       *no_add_attrs = true;
7492       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7493 	       "option.", name);
7494       return NULL_TREE;
7495     }
7496 
7497   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7498     {
7499       decl = *node;
7500       fntype = TREE_TYPE (decl);
7501     }
7502 
7503   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7504     fntype = TREE_TYPE (fntype);
7505 
7506   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7507     {
7508 	warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7509 		 "function pointer", name);
7510 	*no_add_attrs = true;
7511 	return NULL_TREE;
7512     }
7513 
7514   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7515 
7516   if (*no_add_attrs)
7517     return NULL_TREE;
7518 
7519   /* Prevent trees being shared among function types with and without
7520      cmse_nonsecure_call attribute.  */
7521   type = TREE_TYPE (decl);
7522 
7523   type = build_distinct_type_copy (type);
7524   TREE_TYPE (decl) = type;
7525   fntype = type;
7526 
7527   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7528     {
7529       type = fntype;
7530       fntype = TREE_TYPE (fntype);
7531       fntype = build_distinct_type_copy (fntype);
7532       TREE_TYPE (type) = fntype;
7533     }
7534 
7535   /* Construct a type attribute and add it to the function type.  */
7536   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7537 			  TYPE_ATTRIBUTES (fntype));
7538   TYPE_ATTRIBUTES (fntype) = attrs;
7539   return NULL_TREE;
7540 }
7541 
7542 /* Return 0 if the attributes for two types are incompatible, 1 if they
7543    are compatible, and 2 if they are nearly compatible (which causes a
7544    warning to be generated).  */
7545 static int
arm_comp_type_attributes(const_tree type1,const_tree type2)7546 arm_comp_type_attributes (const_tree type1, const_tree type2)
7547 {
7548   int l1, l2, s1, s2;
7549 
7550   tree attrs1 = lookup_attribute ("Advanced SIMD type",
7551 				  TYPE_ATTRIBUTES (type1));
7552   tree attrs2 = lookup_attribute ("Advanced SIMD type",
7553 				  TYPE_ATTRIBUTES (type2));
7554   if (bool (attrs1) != bool (attrs2))
7555     return 0;
7556   if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7557     return 0;
7558 
7559   /* Check for mismatch of non-default calling convention.  */
7560   if (TREE_CODE (type1) != FUNCTION_TYPE)
7561     return 1;
7562 
7563   /* Check for mismatched call attributes.  */
7564   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7565   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7566   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7567   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7568 
7569   /* Only bother to check if an attribute is defined.  */
7570   if (l1 | l2 | s1 | s2)
7571     {
7572       /* If one type has an attribute, the other must have the same attribute.  */
7573       if ((l1 != l2) || (s1 != s2))
7574 	return 0;
7575 
7576       /* Disallow mixed attributes.  */
7577       if ((l1 & s2) || (l2 & s1))
7578 	return 0;
7579     }
7580 
7581   /* Check for mismatched ISR attribute.  */
7582   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7583   if (! l1)
7584     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7585   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7586   if (! l2)
7587     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7588   if (l1 != l2)
7589     return 0;
7590 
7591   l1 = lookup_attribute ("cmse_nonsecure_call",
7592 			 TYPE_ATTRIBUTES (type1)) != NULL;
7593   l2 = lookup_attribute ("cmse_nonsecure_call",
7594 			 TYPE_ATTRIBUTES (type2)) != NULL;
7595 
7596   if (l1 != l2)
7597     return 0;
7598 
7599   return 1;
7600 }
7601 
7602 /*  Assigns default attributes to newly defined type.  This is used to
7603     set short_call/long_call attributes for function types of
7604     functions defined inside corresponding #pragma scopes.  */
7605 static void
arm_set_default_type_attributes(tree type)7606 arm_set_default_type_attributes (tree type)
7607 {
7608   /* Add __attribute__ ((long_call)) to all functions, when
7609      inside #pragma long_calls or __attribute__ ((short_call)),
7610      when inside #pragma no_long_calls.  */
7611   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7612     {
7613       tree type_attr_list, attr_name;
7614       type_attr_list = TYPE_ATTRIBUTES (type);
7615 
7616       if (arm_pragma_long_calls == LONG)
7617  	attr_name = get_identifier ("long_call");
7618       else if (arm_pragma_long_calls == SHORT)
7619  	attr_name = get_identifier ("short_call");
7620       else
7621  	return;
7622 
7623       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7624       TYPE_ATTRIBUTES (type) = type_attr_list;
7625     }
7626 }
7627 
7628 /* Return true if DECL is known to be linked into section SECTION.  */
7629 
7630 static bool
arm_function_in_section_p(tree decl,section * section)7631 arm_function_in_section_p (tree decl, section *section)
7632 {
7633   /* We can only be certain about the prevailing symbol definition.  */
7634   if (!decl_binds_to_current_def_p (decl))
7635     return false;
7636 
7637   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7638   if (!DECL_SECTION_NAME (decl))
7639     {
7640       /* Make sure that we will not create a unique section for DECL.  */
7641       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7642 	return false;
7643     }
7644 
7645   return function_section (decl) == section;
7646 }
7647 
7648 /* Return nonzero if a 32-bit "long_call" should be generated for
7649    a call from the current function to DECL.  We generate a long_call
7650    if the function:
7651 
7652         a.  has an __attribute__((long call))
7653      or b.  is within the scope of a #pragma long_calls
7654      or c.  the -mlong-calls command line switch has been specified
7655 
7656    However we do not generate a long call if the function:
7657 
7658         d.  has an __attribute__ ((short_call))
7659      or e.  is inside the scope of a #pragma no_long_calls
7660      or f.  is defined in the same section as the current function.  */
7661 
7662 bool
arm_is_long_call_p(tree decl)7663 arm_is_long_call_p (tree decl)
7664 {
7665   tree attrs;
7666 
7667   if (!decl)
7668     return TARGET_LONG_CALLS;
7669 
7670   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7671   if (lookup_attribute ("short_call", attrs))
7672     return false;
7673 
7674   /* For "f", be conservative, and only cater for cases in which the
7675      whole of the current function is placed in the same section.  */
7676   if (!flag_reorder_blocks_and_partition
7677       && TREE_CODE (decl) == FUNCTION_DECL
7678       && arm_function_in_section_p (decl, current_function_section ()))
7679     return false;
7680 
7681   if (lookup_attribute ("long_call", attrs))
7682     return true;
7683 
7684   return TARGET_LONG_CALLS;
7685 }
7686 
7687 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7688 static bool
arm_function_ok_for_sibcall(tree decl,tree exp)7689 arm_function_ok_for_sibcall (tree decl, tree exp)
7690 {
7691   unsigned long func_type;
7692 
7693   if (cfun->machine->sibcall_blocked)
7694     return false;
7695 
7696   if (TARGET_FDPIC)
7697     {
7698       /* In FDPIC, never tailcall something for which we have no decl:
7699 	 the target function could be in a different module, requiring
7700 	 a different FDPIC register value.  */
7701       if (decl == NULL)
7702 	return false;
7703     }
7704 
7705   /* Never tailcall something if we are generating code for Thumb-1.  */
7706   if (TARGET_THUMB1)
7707     return false;
7708 
7709   /* The PIC register is live on entry to VxWorks PLT entries, so we
7710      must make the call before restoring the PIC register.  */
7711   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7712     return false;
7713 
7714   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7715      may be used both as target of the call and base register for restoring
7716      the VFP registers  */
7717   if (TARGET_APCS_FRAME && TARGET_ARM
7718       && TARGET_HARD_FLOAT
7719       && decl && arm_is_long_call_p (decl))
7720     return false;
7721 
7722   /* If we are interworking and the function is not declared static
7723      then we can't tail-call it unless we know that it exists in this
7724      compilation unit (since it might be a Thumb routine).  */
7725   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7726       && !TREE_ASM_WRITTEN (decl))
7727     return false;
7728 
7729   func_type = arm_current_func_type ();
7730   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7731   if (IS_INTERRUPT (func_type))
7732     return false;
7733 
7734   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7735      generated for entry functions themselves.  */
7736   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7737     return false;
7738 
7739   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7740      this would complicate matters for later code generation.  */
7741   if (TREE_CODE (exp) == CALL_EXPR)
7742     {
7743       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7744       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7745 	return false;
7746     }
7747 
7748   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7749     {
7750       /* Check that the return value locations are the same.  For
7751 	 example that we aren't returning a value from the sibling in
7752 	 a VFP register but then need to transfer it to a core
7753 	 register.  */
7754       rtx a, b;
7755       tree decl_or_type = decl;
7756 
7757       /* If it is an indirect function pointer, get the function type.  */
7758       if (!decl)
7759 	decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7760 
7761       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7762       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7763 			      cfun->decl, false);
7764       if (!rtx_equal_p (a, b))
7765 	return false;
7766     }
7767 
7768   /* Never tailcall if function may be called with a misaligned SP.  */
7769   if (IS_STACKALIGN (func_type))
7770     return false;
7771 
7772   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7773      references should become a NOP.  Don't convert such calls into
7774      sibling calls.  */
7775   if (TARGET_AAPCS_BASED
7776       && arm_abi == ARM_ABI_AAPCS
7777       && decl
7778       && DECL_WEAK (decl))
7779     return false;
7780 
7781   /* We cannot do a tailcall for an indirect call by descriptor if all the
7782      argument registers are used because the only register left to load the
7783      address is IP and it will already contain the static chain.  */
7784   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7785     {
7786       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7787       CUMULATIVE_ARGS cum;
7788       cumulative_args_t cum_v;
7789 
7790       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7791       cum_v = pack_cumulative_args (&cum);
7792 
7793       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7794 	{
7795 	  tree type = TREE_VALUE (t);
7796 	  if (!VOID_TYPE_P (type))
7797 	    {
7798 	      function_arg_info arg (type, /*named=*/true);
7799 	      arm_function_arg_advance (cum_v, arg);
7800 	    }
7801 	}
7802 
7803       function_arg_info arg (integer_type_node, /*named=*/true);
7804       if (!arm_function_arg (cum_v, arg))
7805 	return false;
7806     }
7807 
7808   /* Everything else is ok.  */
7809   return true;
7810 }
7811 
7812 
7813 /* Addressing mode support functions.  */
7814 
7815 /* Return nonzero if X is a legitimate immediate operand when compiling
7816    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7817 int
legitimate_pic_operand_p(rtx x)7818 legitimate_pic_operand_p (rtx x)
7819 {
7820   if (SYMBOL_REF_P (x)
7821       || (GET_CODE (x) == CONST
7822 	  && GET_CODE (XEXP (x, 0)) == PLUS
7823 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7824     return 0;
7825 
7826   return 1;
7827 }
7828 
7829 /* Record that the current function needs a PIC register.  If PIC_REG is null,
7830    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
7831    both case cfun->machine->pic_reg is initialized if we have not already done
7832    so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
7833    PIC register is reloaded in the current position of the instruction stream
7834    irregardless of whether it was loaded before.  Otherwise, it is only loaded
7835    if not already done so (crtl->uses_pic_offset_table is null).  Note that
7836    nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7837    is only supported iff COMPUTE_NOW is false.  */
7838 
7839 static void
require_pic_register(rtx pic_reg,bool compute_now)7840 require_pic_register (rtx pic_reg, bool compute_now)
7841 {
7842   gcc_assert (compute_now == (pic_reg != NULL_RTX));
7843 
7844   /* A lot of the logic here is made obscure by the fact that this
7845      routine gets called as part of the rtx cost estimation process.
7846      We don't want those calls to affect any assumptions about the real
7847      function; and further, we can't call entry_of_function() until we
7848      start the real expansion process.  */
7849   if (!crtl->uses_pic_offset_table || compute_now)
7850     {
7851       gcc_assert (can_create_pseudo_p ()
7852 		  || (pic_reg != NULL_RTX
7853 		      && REG_P (pic_reg)
7854 		      && GET_MODE (pic_reg) == Pmode));
7855       if (arm_pic_register != INVALID_REGNUM
7856 	  && !compute_now
7857 	  && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7858 	{
7859 	  if (!cfun->machine->pic_reg)
7860 	    cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7861 
7862 	  /* Play games to avoid marking the function as needing pic
7863 	     if we are being called as part of the cost-estimation
7864 	     process.  */
7865 	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7866 	    crtl->uses_pic_offset_table = 1;
7867 	}
7868       else
7869 	{
7870 	  rtx_insn *seq, *insn;
7871 
7872 	  if (pic_reg == NULL_RTX)
7873 	    pic_reg = gen_reg_rtx (Pmode);
7874 	  if (!cfun->machine->pic_reg)
7875 	    cfun->machine->pic_reg = pic_reg;
7876 
7877 	  /* Play games to avoid marking the function as needing pic
7878 	     if we are being called as part of the cost-estimation
7879 	     process.  */
7880 	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7881 	    {
7882 	      crtl->uses_pic_offset_table = 1;
7883 	      start_sequence ();
7884 
7885 	      if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7886 		  && arm_pic_register > LAST_LO_REGNUM
7887 		  && !compute_now)
7888 		emit_move_insn (cfun->machine->pic_reg,
7889 				gen_rtx_REG (Pmode, arm_pic_register));
7890 	      else
7891 		arm_load_pic_register (0UL, pic_reg);
7892 
7893 	      seq = get_insns ();
7894 	      end_sequence ();
7895 
7896 	      for (insn = seq; insn; insn = NEXT_INSN (insn))
7897 		if (INSN_P (insn))
7898 		  INSN_LOCATION (insn) = prologue_location;
7899 
7900 	      /* We can be called during expansion of PHI nodes, where
7901 	         we can't yet emit instructions directly in the final
7902 		 insn stream.  Queue the insns on the entry edge, they will
7903 		 be committed after everything else is expanded.  */
7904 	      if (currently_expanding_to_rtl)
7905 		insert_insn_on_edge (seq,
7906 				     single_succ_edge
7907 				     (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7908 	      else
7909 		emit_insn (seq);
7910 	    }
7911 	}
7912     }
7913 }
7914 
7915 /* Generate insns to calculate the address of ORIG in pic mode.  */
7916 static rtx_insn *
calculate_pic_address_constant(rtx reg,rtx pic_reg,rtx orig)7917 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
7918 {
7919   rtx pat;
7920   rtx mem;
7921 
7922   pat = gen_calculate_pic_address (reg, pic_reg, orig);
7923 
7924   /* Make the MEM as close to a constant as possible.  */
7925   mem = SET_SRC (pat);
7926   gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7927   MEM_READONLY_P (mem) = 1;
7928   MEM_NOTRAP_P (mem) = 1;
7929 
7930   return emit_insn (pat);
7931 }
7932 
7933 /* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
7934    created to hold the result of the load.  If not NULL, PIC_REG indicates
7935    which register to use as PIC register, otherwise it is decided by register
7936    allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
7937    location in the instruction stream, irregardless of whether it was loaded
7938    previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7939    true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7940 
7941    Returns the register REG into which the PIC load is performed.  */
7942 
7943 rtx
legitimize_pic_address(rtx orig,machine_mode mode,rtx reg,rtx pic_reg,bool compute_now)7944 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7945 			bool compute_now)
7946 {
7947   gcc_assert (compute_now == (pic_reg != NULL_RTX));
7948 
7949   if (SYMBOL_REF_P (orig)
7950       || LABEL_REF_P (orig))
7951     {
7952       if (reg == 0)
7953 	{
7954 	  gcc_assert (can_create_pseudo_p ());
7955 	  reg = gen_reg_rtx (Pmode);
7956 	}
7957 
7958       /* VxWorks does not impose a fixed gap between segments; the run-time
7959 	 gap can be different from the object-file gap.  We therefore can't
7960 	 use GOTOFF unless we are absolutely sure that the symbol is in the
7961 	 same segment as the GOT.  Unfortunately, the flexibility of linker
7962 	 scripts means that we can't be sure of that in general, so assume
7963 	 that GOTOFF is never valid on VxWorks.  */
7964       /* References to weak symbols cannot be resolved locally: they
7965 	 may be overridden by a non-weak definition at link time.  */
7966       rtx_insn *insn;
7967       if ((LABEL_REF_P (orig)
7968 	   || (SYMBOL_REF_P (orig)
7969 	       && SYMBOL_REF_LOCAL_P (orig)
7970 	       && (SYMBOL_REF_DECL (orig)
7971 		   ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
7972 	       && (!SYMBOL_REF_FUNCTION_P (orig)
7973 		   || arm_fdpic_local_funcdesc_p (orig))))
7974 	  && NEED_GOT_RELOC
7975 	  && arm_pic_data_is_text_relative)
7976 	insn = arm_pic_static_addr (orig, reg);
7977       else
7978 	{
7979 	  /* If this function doesn't have a pic register, create one now.  */
7980 	  require_pic_register (pic_reg, compute_now);
7981 
7982 	  if (pic_reg == NULL_RTX)
7983 	    pic_reg = cfun->machine->pic_reg;
7984 
7985 	  insn = calculate_pic_address_constant (reg, pic_reg, orig);
7986 	}
7987 
7988       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7989 	 by loop.  */
7990       set_unique_reg_note (insn, REG_EQUAL, orig);
7991 
7992       return reg;
7993     }
7994   else if (GET_CODE (orig) == CONST)
7995     {
7996       rtx base, offset;
7997 
7998       if (GET_CODE (XEXP (orig, 0)) == PLUS
7999 	  && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8000 	return orig;
8001 
8002       /* Handle the case where we have: const (UNSPEC_TLS).  */
8003       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8004 	  && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8005 	return orig;
8006 
8007       /* Handle the case where we have:
8008          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
8009          CONST_INT.  */
8010       if (GET_CODE (XEXP (orig, 0)) == PLUS
8011           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8012           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8013         {
8014 	  gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8015 	  return orig;
8016 	}
8017 
8018       if (reg == 0)
8019 	{
8020 	  gcc_assert (can_create_pseudo_p ());
8021 	  reg = gen_reg_rtx (Pmode);
8022 	}
8023 
8024       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8025 
8026       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8027 				     pic_reg, compute_now);
8028       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8029 				       base == reg ? 0 : reg, pic_reg,
8030 				       compute_now);
8031 
8032       if (CONST_INT_P (offset))
8033 	{
8034 	  /* The base register doesn't really matter, we only want to
8035 	     test the index for the appropriate mode.  */
8036 	  if (!arm_legitimate_index_p (mode, offset, SET, 0))
8037 	    {
8038 	      gcc_assert (can_create_pseudo_p ());
8039 	      offset = force_reg (Pmode, offset);
8040 	    }
8041 
8042 	  if (CONST_INT_P (offset))
8043 	    return plus_constant (Pmode, base, INTVAL (offset));
8044 	}
8045 
8046       if (GET_MODE_SIZE (mode) > 4
8047 	  && (GET_MODE_CLASS (mode) == MODE_INT
8048 	      || TARGET_SOFT_FLOAT))
8049 	{
8050 	  emit_insn (gen_addsi3 (reg, base, offset));
8051 	  return reg;
8052 	}
8053 
8054       return gen_rtx_PLUS (Pmode, base, offset);
8055     }
8056 
8057   return orig;
8058 }
8059 
8060 
8061 /* Whether a register is callee saved or not.  This is necessary because high
8062    registers are marked as caller saved when optimizing for size on Thumb-1
8063    targets despite being callee saved in order to avoid using them.  */
8064 #define callee_saved_reg_p(reg) \
8065   (!call_used_or_fixed_reg_p (reg) \
8066    || (TARGET_THUMB1 && optimize_size \
8067        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8068 
8069 /* Return a mask for the call-clobbered low registers that are unused
8070    at the end of the prologue.  */
8071 static unsigned long
thumb1_prologue_unused_call_clobbered_lo_regs(void)8072 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8073 {
8074   unsigned long mask = 0;
8075   bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8076 
8077   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8078     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8079       mask |= 1 << (reg - FIRST_LO_REGNUM);
8080   return mask;
8081 }
8082 
8083 /* Similarly for the start of the epilogue.  */
8084 static unsigned long
thumb1_epilogue_unused_call_clobbered_lo_regs(void)8085 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8086 {
8087   unsigned long mask = 0;
8088   bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8089 
8090   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8091     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8092       mask |= 1 << (reg - FIRST_LO_REGNUM);
8093   return mask;
8094 }
8095 
8096 /* Find a spare register to use during the prolog of a function.  */
8097 
8098 static int
thumb_find_work_register(unsigned long pushed_regs_mask)8099 thumb_find_work_register (unsigned long pushed_regs_mask)
8100 {
8101   int reg;
8102 
8103   unsigned long unused_regs
8104     = thumb1_prologue_unused_call_clobbered_lo_regs ();
8105 
8106   /* Check the argument registers first as these are call-used.  The
8107      register allocation order means that sometimes r3 might be used
8108      but earlier argument registers might not, so check them all.  */
8109   for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8110     if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8111       return reg;
8112 
8113   /* Otherwise look for a call-saved register that is going to be pushed.  */
8114   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8115     if (pushed_regs_mask & (1 << reg))
8116       return reg;
8117 
8118   if (TARGET_THUMB2)
8119     {
8120       /* Thumb-2 can use high regs.  */
8121       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8122 	if (pushed_regs_mask & (1 << reg))
8123 	  return reg;
8124     }
8125   /* Something went wrong - thumb_compute_save_reg_mask()
8126      should have arranged for a suitable register to be pushed.  */
8127   gcc_unreachable ();
8128 }
8129 
8130 static GTY(()) int pic_labelno;
8131 
8132 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
8133    low register.  */
8134 
8135 void
arm_load_pic_register(unsigned long saved_regs ATTRIBUTE_UNUSED,rtx pic_reg)8136 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8137 {
8138   rtx l1, labelno, pic_tmp, pic_rtx;
8139 
8140   if (crtl->uses_pic_offset_table == 0
8141       || TARGET_SINGLE_PIC_BASE
8142       || TARGET_FDPIC)
8143     return;
8144 
8145   gcc_assert (flag_pic);
8146 
8147   if (pic_reg == NULL_RTX)
8148     pic_reg = cfun->machine->pic_reg;
8149   if (TARGET_VXWORKS_RTP)
8150     {
8151       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8152       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8153       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8154 
8155       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8156 
8157       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8158       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8159     }
8160   else
8161     {
8162       /* We use an UNSPEC rather than a LABEL_REF because this label
8163 	 never appears in the code stream.  */
8164 
8165       labelno = GEN_INT (pic_labelno++);
8166       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8167       l1 = gen_rtx_CONST (VOIDmode, l1);
8168 
8169       /* On the ARM the PC register contains 'dot + 8' at the time of the
8170 	 addition, on the Thumb it is 'dot + 4'.  */
8171       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8172       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8173 				UNSPEC_GOTSYM_OFF);
8174       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8175 
8176       if (TARGET_32BIT)
8177 	{
8178 	  emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8179 	}
8180       else /* TARGET_THUMB1 */
8181 	{
8182 	  if (arm_pic_register != INVALID_REGNUM
8183 	      && REGNO (pic_reg) > LAST_LO_REGNUM)
8184 	    {
8185 	      /* We will have pushed the pic register, so we should always be
8186 		 able to find a work register.  */
8187 	      pic_tmp = gen_rtx_REG (SImode,
8188 				     thumb_find_work_register (saved_regs));
8189 	      emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8190 	      emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8191 	      emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8192 	    }
8193 	  else if (arm_pic_register != INVALID_REGNUM
8194 		   && arm_pic_register > LAST_LO_REGNUM
8195 		   && REGNO (pic_reg) <= LAST_LO_REGNUM)
8196 	    {
8197 	      emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8198 	      emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8199 	      emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8200 	    }
8201 	  else
8202 	    emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8203 	}
8204     }
8205 
8206   /* Need to emit this whether or not we obey regdecls,
8207      since setjmp/longjmp can cause life info to screw up.  */
8208   emit_use (pic_reg);
8209 }
8210 
8211 /* Try to determine whether an object, referenced via ORIG, will be
8212    placed in the text or data segment.  This is used in FDPIC mode, to
8213    decide which relocations to use when accessing ORIG.  *IS_READONLY
8214    is set to true if ORIG is a read-only location, false otherwise.
8215    Return true if we could determine the location of ORIG, false
8216    otherwise.  *IS_READONLY is valid only when we return true.  */
8217 static bool
arm_is_segment_info_known(rtx orig,bool * is_readonly)8218 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8219 {
8220   *is_readonly = false;
8221 
8222   if (LABEL_REF_P (orig))
8223     {
8224       *is_readonly = true;
8225       return true;
8226     }
8227 
8228   if (SYMBOL_REF_P (orig))
8229     {
8230       if (CONSTANT_POOL_ADDRESS_P (orig))
8231 	{
8232 	  *is_readonly = true;
8233 	  return true;
8234 	}
8235       if (SYMBOL_REF_LOCAL_P (orig)
8236 	  && !SYMBOL_REF_EXTERNAL_P (orig)
8237 	  && SYMBOL_REF_DECL (orig)
8238 	  && (!DECL_P (SYMBOL_REF_DECL (orig))
8239 	      || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8240 	{
8241 	  tree decl = SYMBOL_REF_DECL (orig);
8242 	  tree init = (TREE_CODE (decl) == VAR_DECL)
8243 	    ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8244 	    ? decl : 0;
8245 	  int reloc = 0;
8246 	  bool named_section, readonly;
8247 
8248 	  if (init && init != error_mark_node)
8249 	    reloc = compute_reloc_for_constant (init);
8250 
8251 	  named_section = TREE_CODE (decl) == VAR_DECL
8252 	    && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8253 	  readonly = decl_readonly_section (decl, reloc);
8254 
8255 	  /* We don't know where the link script will put a named
8256 	     section, so return false in such a case.  */
8257 	  if (named_section)
8258 	    return false;
8259 
8260 	  *is_readonly = readonly;
8261 	  return true;
8262 	}
8263 
8264       /* We don't know.  */
8265       return false;
8266     }
8267 
8268   gcc_unreachable ();
8269 }
8270 
8271 /* Generate code to load the address of a static var when flag_pic is set.  */
8272 static rtx_insn *
arm_pic_static_addr(rtx orig,rtx reg)8273 arm_pic_static_addr (rtx orig, rtx reg)
8274 {
8275   rtx l1, labelno, offset_rtx;
8276   rtx_insn *insn;
8277 
8278   gcc_assert (flag_pic);
8279 
8280   bool is_readonly = false;
8281   bool info_known = false;
8282 
8283   if (TARGET_FDPIC
8284       && SYMBOL_REF_P (orig)
8285       && !SYMBOL_REF_FUNCTION_P (orig))
8286     info_known = arm_is_segment_info_known (orig, &is_readonly);
8287 
8288   if (TARGET_FDPIC
8289       && SYMBOL_REF_P (orig)
8290       && !SYMBOL_REF_FUNCTION_P (orig)
8291       && !info_known)
8292     {
8293       /* We don't know where orig is stored, so we have be
8294 	 pessimistic and use a GOT relocation.  */
8295       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8296 
8297       insn = calculate_pic_address_constant (reg, pic_reg, orig);
8298     }
8299   else if (TARGET_FDPIC
8300 	   && SYMBOL_REF_P (orig)
8301 	   && (SYMBOL_REF_FUNCTION_P (orig)
8302 	       || !is_readonly))
8303     {
8304       /* We use the GOTOFF relocation.  */
8305       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8306 
8307       rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8308       emit_insn (gen_movsi (reg, l1));
8309       insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8310     }
8311   else
8312     {
8313       /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8314 	 PC-relative access.  */
8315       /* We use an UNSPEC rather than a LABEL_REF because this label
8316 	 never appears in the code stream.  */
8317       labelno = GEN_INT (pic_labelno++);
8318       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8319       l1 = gen_rtx_CONST (VOIDmode, l1);
8320 
8321       /* On the ARM the PC register contains 'dot + 8' at the time of the
8322 	 addition, on the Thumb it is 'dot + 4'.  */
8323       offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8324       offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8325 				   UNSPEC_SYMBOL_OFFSET);
8326       offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8327 
8328       insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8329 						   labelno));
8330     }
8331 
8332   return insn;
8333 }
8334 
8335 /* Return nonzero if X is valid as an ARM state addressing register.  */
8336 static int
arm_address_register_rtx_p(rtx x,int strict_p)8337 arm_address_register_rtx_p (rtx x, int strict_p)
8338 {
8339   int regno;
8340 
8341   if (!REG_P (x))
8342     return 0;
8343 
8344   regno = REGNO (x);
8345 
8346   if (strict_p)
8347     return ARM_REGNO_OK_FOR_BASE_P (regno);
8348 
8349   return (regno <= LAST_ARM_REGNUM
8350 	  || regno >= FIRST_PSEUDO_REGISTER
8351 	  || regno == FRAME_POINTER_REGNUM
8352 	  || regno == ARG_POINTER_REGNUM);
8353 }
8354 
8355 /* Return TRUE if this rtx is the difference of a symbol and a label,
8356    and will reduce to a PC-relative relocation in the object file.
8357    Expressions like this can be left alone when generating PIC, rather
8358    than forced through the GOT.  */
8359 static int
pcrel_constant_p(rtx x)8360 pcrel_constant_p (rtx x)
8361 {
8362   if (GET_CODE (x) == MINUS)
8363     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8364 
8365   return FALSE;
8366 }
8367 
8368 /* Return true if X will surely end up in an index register after next
8369    splitting pass.  */
8370 static bool
will_be_in_index_register(const_rtx x)8371 will_be_in_index_register (const_rtx x)
8372 {
8373   /* arm.md: calculate_pic_address will split this into a register.  */
8374   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8375 }
8376 
8377 /* Return nonzero if X is a valid ARM state address operand.  */
8378 int
arm_legitimate_address_outer_p(machine_mode mode,rtx x,RTX_CODE outer,int strict_p)8379 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8380 			        int strict_p)
8381 {
8382   bool use_ldrd;
8383   enum rtx_code code = GET_CODE (x);
8384 
8385   if (arm_address_register_rtx_p (x, strict_p))
8386     return 1;
8387 
8388   use_ldrd = (TARGET_LDRD
8389 	      && (mode == DImode || mode == DFmode));
8390 
8391   if (code == POST_INC || code == PRE_DEC
8392       || ((code == PRE_INC || code == POST_DEC)
8393 	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8394     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8395 
8396   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8397 	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8398 	   && GET_CODE (XEXP (x, 1)) == PLUS
8399 	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8400     {
8401       rtx addend = XEXP (XEXP (x, 1), 1);
8402 
8403       /* Don't allow ldrd post increment by register because it's hard
8404 	 to fixup invalid register choices.  */
8405       if (use_ldrd
8406 	  && GET_CODE (x) == POST_MODIFY
8407 	  && REG_P (addend))
8408 	return 0;
8409 
8410       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8411 	      && arm_legitimate_index_p (mode, addend, outer, strict_p));
8412     }
8413 
8414   /* After reload constants split into minipools will have addresses
8415      from a LABEL_REF.  */
8416   else if (reload_completed
8417 	   && (code == LABEL_REF
8418 	       || (code == CONST
8419 		   && GET_CODE (XEXP (x, 0)) == PLUS
8420 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8421 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8422     return 1;
8423 
8424   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8425     return 0;
8426 
8427   else if (code == PLUS)
8428     {
8429       rtx xop0 = XEXP (x, 0);
8430       rtx xop1 = XEXP (x, 1);
8431 
8432       return ((arm_address_register_rtx_p (xop0, strict_p)
8433 	       && ((CONST_INT_P (xop1)
8434 		    && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8435 		   || (!strict_p && will_be_in_index_register (xop1))))
8436 	      || (arm_address_register_rtx_p (xop1, strict_p)
8437 		  && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8438     }
8439 
8440 #if 0
8441   /* Reload currently can't handle MINUS, so disable this for now */
8442   else if (GET_CODE (x) == MINUS)
8443     {
8444       rtx xop0 = XEXP (x, 0);
8445       rtx xop1 = XEXP (x, 1);
8446 
8447       return (arm_address_register_rtx_p (xop0, strict_p)
8448 	      && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8449     }
8450 #endif
8451 
8452   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8453 	   && code == SYMBOL_REF
8454 	   && CONSTANT_POOL_ADDRESS_P (x)
8455 	   && ! (flag_pic
8456 		 && symbol_mentioned_p (get_pool_constant (x))
8457 		 && ! pcrel_constant_p (get_pool_constant (x))))
8458     return 1;
8459 
8460   return 0;
8461 }
8462 
8463 /* Return true if we can avoid creating a constant pool entry for x.  */
8464 static bool
can_avoid_literal_pool_for_label_p(rtx x)8465 can_avoid_literal_pool_for_label_p (rtx x)
8466 {
8467   /* Normally we can assign constant values to target registers without
8468      the help of constant pool.  But there are cases we have to use constant
8469      pool like:
8470      1) assign a label to register.
8471      2) sign-extend a 8bit value to 32bit and then assign to register.
8472 
8473      Constant pool access in format:
8474      (set (reg r0) (mem (symbol_ref (".LC0"))))
8475      will cause the use of literal pool (later in function arm_reorg).
8476      So here we mark such format as an invalid format, then the compiler
8477      will adjust it into:
8478      (set (reg r0) (symbol_ref (".LC0")))
8479      (set (reg r0) (mem (reg r0))).
8480      No extra register is required, and (mem (reg r0)) won't cause the use
8481      of literal pools.  */
8482   if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8483       && CONSTANT_POOL_ADDRESS_P (x))
8484     return 1;
8485   return 0;
8486 }
8487 
8488 
8489 /* Return nonzero if X is a valid Thumb-2 address operand.  */
8490 static int
thumb2_legitimate_address_p(machine_mode mode,rtx x,int strict_p)8491 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8492 {
8493   bool use_ldrd;
8494   enum rtx_code code = GET_CODE (x);
8495 
8496   if (TARGET_HAVE_MVE
8497       && (mode == V8QImode || mode == E_V4QImode || mode == V4HImode))
8498     return mve_vector_mem_operand (mode, x, strict_p);
8499 
8500   if (arm_address_register_rtx_p (x, strict_p))
8501     return 1;
8502 
8503   use_ldrd = (TARGET_LDRD
8504 	      && (mode == DImode || mode == DFmode));
8505 
8506   if (code == POST_INC || code == PRE_DEC
8507       || ((code == PRE_INC || code == POST_DEC)
8508 	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8509     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8510 
8511   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8512 	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8513 	   && GET_CODE (XEXP (x, 1)) == PLUS
8514 	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8515     {
8516       /* Thumb-2 only has autoincrement by constant.  */
8517       rtx addend = XEXP (XEXP (x, 1), 1);
8518       HOST_WIDE_INT offset;
8519 
8520       if (!CONST_INT_P (addend))
8521 	return 0;
8522 
8523       offset = INTVAL(addend);
8524       if (GET_MODE_SIZE (mode) <= 4)
8525 	return (offset > -256 && offset < 256);
8526 
8527       return (use_ldrd && offset > -1024 && offset < 1024
8528 	      && (offset & 3) == 0);
8529     }
8530 
8531   /* After reload constants split into minipools will have addresses
8532      from a LABEL_REF.  */
8533   else if (reload_completed
8534 	   && (code == LABEL_REF
8535 	       || (code == CONST
8536 		   && GET_CODE (XEXP (x, 0)) == PLUS
8537 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8538 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8539     return 1;
8540 
8541   else if (mode == TImode
8542 	   || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8543 	   || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8544     return 0;
8545 
8546   else if (code == PLUS)
8547     {
8548       rtx xop0 = XEXP (x, 0);
8549       rtx xop1 = XEXP (x, 1);
8550 
8551       return ((arm_address_register_rtx_p (xop0, strict_p)
8552 	       && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8553 		   || (!strict_p && will_be_in_index_register (xop1))))
8554 	      || (arm_address_register_rtx_p (xop1, strict_p)
8555 		  && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8556     }
8557 
8558   else if (can_avoid_literal_pool_for_label_p (x))
8559     return 0;
8560 
8561   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8562 	   && code == SYMBOL_REF
8563 	   && CONSTANT_POOL_ADDRESS_P (x)
8564 	   && ! (flag_pic
8565 		 && symbol_mentioned_p (get_pool_constant (x))
8566 		 && ! pcrel_constant_p (get_pool_constant (x))))
8567     return 1;
8568 
8569   return 0;
8570 }
8571 
8572 /* Return nonzero if INDEX is valid for an address index operand in
8573    ARM state.  */
8574 static int
arm_legitimate_index_p(machine_mode mode,rtx index,RTX_CODE outer,int strict_p)8575 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8576 			int strict_p)
8577 {
8578   HOST_WIDE_INT range;
8579   enum rtx_code code = GET_CODE (index);
8580 
8581   /* Standard coprocessor addressing modes.  */
8582   if (TARGET_HARD_FLOAT
8583       && (mode == SFmode || mode == DFmode))
8584     return (code == CONST_INT && INTVAL (index) < 1024
8585 	    && INTVAL (index) > -1024
8586 	    && (INTVAL (index) & 3) == 0);
8587 
8588   /* For quad modes, we restrict the constant offset to be slightly less
8589      than what the instruction format permits.  We do this because for
8590      quad mode moves, we will actually decompose them into two separate
8591      double-mode reads or writes.  INDEX must therefore be a valid
8592      (double-mode) offset and so should INDEX+8.  */
8593   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8594     return (code == CONST_INT
8595 	    && INTVAL (index) < 1016
8596 	    && INTVAL (index) > -1024
8597 	    && (INTVAL (index) & 3) == 0);
8598 
8599   /* We have no such constraint on double mode offsets, so we permit the
8600      full range of the instruction format.  */
8601   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8602     return (code == CONST_INT
8603 	    && INTVAL (index) < 1024
8604 	    && INTVAL (index) > -1024
8605 	    && (INTVAL (index) & 3) == 0);
8606 
8607   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8608     return (code == CONST_INT
8609 	    && INTVAL (index) < 1024
8610 	    && INTVAL (index) > -1024
8611 	    && (INTVAL (index) & 3) == 0);
8612 
8613   if (arm_address_register_rtx_p (index, strict_p)
8614       && (GET_MODE_SIZE (mode) <= 4))
8615     return 1;
8616 
8617   if (mode == DImode || mode == DFmode)
8618     {
8619       if (code == CONST_INT)
8620 	{
8621 	  HOST_WIDE_INT val = INTVAL (index);
8622 
8623 	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8624 	     If vldr is selected it uses arm_coproc_mem_operand.  */
8625 	  if (TARGET_LDRD)
8626 	    return val > -256 && val < 256;
8627 	  else
8628 	    return val > -4096 && val < 4092;
8629 	}
8630 
8631       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8632     }
8633 
8634   if (GET_MODE_SIZE (mode) <= 4
8635       && ! (arm_arch4
8636 	    && (mode == HImode
8637 		|| mode == HFmode
8638 		|| (mode == QImode && outer == SIGN_EXTEND))))
8639     {
8640       if (code == MULT)
8641 	{
8642 	  rtx xiop0 = XEXP (index, 0);
8643 	  rtx xiop1 = XEXP (index, 1);
8644 
8645 	  return ((arm_address_register_rtx_p (xiop0, strict_p)
8646 		   && power_of_two_operand (xiop1, SImode))
8647 		  || (arm_address_register_rtx_p (xiop1, strict_p)
8648 		      && power_of_two_operand (xiop0, SImode)));
8649 	}
8650       else if (code == LSHIFTRT || code == ASHIFTRT
8651 	       || code == ASHIFT || code == ROTATERT)
8652 	{
8653 	  rtx op = XEXP (index, 1);
8654 
8655 	  return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8656 		  && CONST_INT_P (op)
8657 		  && INTVAL (op) > 0
8658 		  && INTVAL (op) <= 31);
8659 	}
8660     }
8661 
8662   /* For ARM v4 we may be doing a sign-extend operation during the
8663      load.  */
8664   if (arm_arch4)
8665     {
8666       if (mode == HImode
8667 	  || mode == HFmode
8668 	  || (outer == SIGN_EXTEND && mode == QImode))
8669 	range = 256;
8670       else
8671 	range = 4096;
8672     }
8673   else
8674     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8675 
8676   return (code == CONST_INT
8677 	  && INTVAL (index) < range
8678 	  && INTVAL (index) > -range);
8679 }
8680 
8681 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8682    index operand.  i.e. 1, 2, 4 or 8.  */
8683 static bool
thumb2_index_mul_operand(rtx op)8684 thumb2_index_mul_operand (rtx op)
8685 {
8686   HOST_WIDE_INT val;
8687 
8688   if (!CONST_INT_P (op))
8689     return false;
8690 
8691   val = INTVAL(op);
8692   return (val == 1 || val == 2 || val == 4 || val == 8);
8693 }
8694 
8695 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8696 static int
thumb2_legitimate_index_p(machine_mode mode,rtx index,int strict_p)8697 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8698 {
8699   enum rtx_code code = GET_CODE (index);
8700 
8701   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8702   /* Standard coprocessor addressing modes.  */
8703   if (TARGET_VFP_BASE
8704       && (mode == SFmode || mode == DFmode))
8705     return (code == CONST_INT && INTVAL (index) < 1024
8706 	    /* Thumb-2 allows only > -256 index range for it's core register
8707 	       load/stores. Since we allow SF/DF in core registers, we have
8708 	       to use the intersection between -256~4096 (core) and -1024~1024
8709 	       (coprocessor).  */
8710 	    && INTVAL (index) > -256
8711 	    && (INTVAL (index) & 3) == 0);
8712 
8713   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8714     {
8715       /* For DImode assume values will usually live in core regs
8716 	 and only allow LDRD addressing modes.  */
8717       if (!TARGET_LDRD || mode != DImode)
8718 	return (code == CONST_INT
8719 		&& INTVAL (index) < 1024
8720 		&& INTVAL (index) > -1024
8721 		&& (INTVAL (index) & 3) == 0);
8722     }
8723 
8724   /* For quad modes, we restrict the constant offset to be slightly less
8725      than what the instruction format permits.  We do this because for
8726      quad mode moves, we will actually decompose them into two separate
8727      double-mode reads or writes.  INDEX must therefore be a valid
8728      (double-mode) offset and so should INDEX+8.  */
8729   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8730     return (code == CONST_INT
8731 	    && INTVAL (index) < 1016
8732 	    && INTVAL (index) > -1024
8733 	    && (INTVAL (index) & 3) == 0);
8734 
8735   /* We have no such constraint on double mode offsets, so we permit the
8736      full range of the instruction format.  */
8737   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8738     return (code == CONST_INT
8739 	    && INTVAL (index) < 1024
8740 	    && INTVAL (index) > -1024
8741 	    && (INTVAL (index) & 3) == 0);
8742 
8743   if (arm_address_register_rtx_p (index, strict_p)
8744       && (GET_MODE_SIZE (mode) <= 4))
8745     return 1;
8746 
8747   if (mode == DImode || mode == DFmode)
8748     {
8749       if (code == CONST_INT)
8750 	{
8751 	  HOST_WIDE_INT val = INTVAL (index);
8752 	  /* Thumb-2 ldrd only has reg+const addressing modes.
8753 	     Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8754 	     If vldr is selected it uses arm_coproc_mem_operand.  */
8755 	  if (TARGET_LDRD)
8756 	    return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8757 	  else
8758 	    return IN_RANGE (val, -255, 4095 - 4);
8759 	}
8760       else
8761 	return 0;
8762     }
8763 
8764   if (code == MULT)
8765     {
8766       rtx xiop0 = XEXP (index, 0);
8767       rtx xiop1 = XEXP (index, 1);
8768 
8769       return ((arm_address_register_rtx_p (xiop0, strict_p)
8770 	       && thumb2_index_mul_operand (xiop1))
8771 	      || (arm_address_register_rtx_p (xiop1, strict_p)
8772 		  && thumb2_index_mul_operand (xiop0)));
8773     }
8774   else if (code == ASHIFT)
8775     {
8776       rtx op = XEXP (index, 1);
8777 
8778       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8779 	      && CONST_INT_P (op)
8780 	      && INTVAL (op) > 0
8781 	      && INTVAL (op) <= 3);
8782     }
8783 
8784   return (code == CONST_INT
8785 	  && INTVAL (index) < 4096
8786 	  && INTVAL (index) > -256);
8787 }
8788 
8789 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8790 static int
thumb1_base_register_rtx_p(rtx x,machine_mode mode,int strict_p)8791 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8792 {
8793   int regno;
8794 
8795   if (!REG_P (x))
8796     return 0;
8797 
8798   regno = REGNO (x);
8799 
8800   if (strict_p)
8801     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8802 
8803   return (regno <= LAST_LO_REGNUM
8804 	  || regno > LAST_VIRTUAL_REGISTER
8805 	  || regno == FRAME_POINTER_REGNUM
8806 	  || (GET_MODE_SIZE (mode) >= 4
8807 	      && (regno == STACK_POINTER_REGNUM
8808 		  || regno >= FIRST_PSEUDO_REGISTER
8809 		  || x == hard_frame_pointer_rtx
8810 		  || x == arg_pointer_rtx)));
8811 }
8812 
8813 /* Return nonzero if x is a legitimate index register.  This is the case
8814    for any base register that can access a QImode object.  */
8815 inline static int
thumb1_index_register_rtx_p(rtx x,int strict_p)8816 thumb1_index_register_rtx_p (rtx x, int strict_p)
8817 {
8818   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8819 }
8820 
8821 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8822 
8823    The AP may be eliminated to either the SP or the FP, so we use the
8824    least common denominator, e.g. SImode, and offsets from 0 to 64.
8825 
8826    ??? Verify whether the above is the right approach.
8827 
8828    ??? Also, the FP may be eliminated to the SP, so perhaps that
8829    needs special handling also.
8830 
8831    ??? Look at how the mips16 port solves this problem.  It probably uses
8832    better ways to solve some of these problems.
8833 
8834    Although it is not incorrect, we don't accept QImode and HImode
8835    addresses based on the frame pointer or arg pointer until the
8836    reload pass starts.  This is so that eliminating such addresses
8837    into stack based ones won't produce impossible code.  */
8838 int
thumb1_legitimate_address_p(machine_mode mode,rtx x,int strict_p)8839 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8840 {
8841   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8842     return 0;
8843 
8844   /* ??? Not clear if this is right.  Experiment.  */
8845   if (GET_MODE_SIZE (mode) < 4
8846       && !(reload_in_progress || reload_completed)
8847       && (reg_mentioned_p (frame_pointer_rtx, x)
8848 	  || reg_mentioned_p (arg_pointer_rtx, x)
8849 	  || reg_mentioned_p (virtual_incoming_args_rtx, x)
8850 	  || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8851 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8852 	  || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8853     return 0;
8854 
8855   /* Accept any base register.  SP only in SImode or larger.  */
8856   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8857     return 1;
8858 
8859   /* This is PC relative data before arm_reorg runs.  */
8860   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8861 	   && SYMBOL_REF_P (x)
8862 	   && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
8863 	   && !arm_disable_literal_pool)
8864     return 1;
8865 
8866   /* This is PC relative data after arm_reorg runs.  */
8867   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8868 	   && reload_completed
8869 	   && (LABEL_REF_P (x)
8870 	       || (GET_CODE (x) == CONST
8871 		   && GET_CODE (XEXP (x, 0)) == PLUS
8872 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8873 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8874     return 1;
8875 
8876   /* Post-inc indexing only supported for SImode and larger.  */
8877   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8878 	   && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8879     return 1;
8880 
8881   else if (GET_CODE (x) == PLUS)
8882     {
8883       /* REG+REG address can be any two index registers.  */
8884       /* We disallow FRAME+REG addressing since we know that FRAME
8885 	 will be replaced with STACK, and SP relative addressing only
8886 	 permits SP+OFFSET.  */
8887       if (GET_MODE_SIZE (mode) <= 4
8888 	  && XEXP (x, 0) != frame_pointer_rtx
8889 	  && XEXP (x, 1) != frame_pointer_rtx
8890 	  && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8891 	  && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8892 	      || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8893 	return 1;
8894 
8895       /* REG+const has 5-7 bit offset for non-SP registers.  */
8896       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8897 		|| XEXP (x, 0) == arg_pointer_rtx)
8898 	       && CONST_INT_P (XEXP (x, 1))
8899 	       && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8900 	return 1;
8901 
8902       /* REG+const has 10-bit offset for SP, but only SImode and
8903 	 larger is supported.  */
8904       /* ??? Should probably check for DI/DFmode overflow here
8905 	 just like GO_IF_LEGITIMATE_OFFSET does.  */
8906       else if (REG_P (XEXP (x, 0))
8907 	       && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8908 	       && GET_MODE_SIZE (mode) >= 4
8909 	       && CONST_INT_P (XEXP (x, 1))
8910 	       && INTVAL (XEXP (x, 1)) >= 0
8911 	       && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8912 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
8913 	return 1;
8914 
8915       else if (REG_P (XEXP (x, 0))
8916 	       && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8917 		   || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8918 		   || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8919 		       && REGNO (XEXP (x, 0))
8920 			  <= LAST_VIRTUAL_POINTER_REGISTER))
8921 	       && GET_MODE_SIZE (mode) >= 4
8922 	       && CONST_INT_P (XEXP (x, 1))
8923 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
8924 	return 1;
8925     }
8926 
8927   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8928 	   && GET_MODE_SIZE (mode) == 4
8929 	   && SYMBOL_REF_P (x)
8930 	   && CONSTANT_POOL_ADDRESS_P (x)
8931 	   && !arm_disable_literal_pool
8932 	   && ! (flag_pic
8933 		 && symbol_mentioned_p (get_pool_constant (x))
8934 		 && ! pcrel_constant_p (get_pool_constant (x))))
8935     return 1;
8936 
8937   return 0;
8938 }
8939 
8940 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8941    instruction of mode MODE.  */
8942 int
thumb_legitimate_offset_p(machine_mode mode,HOST_WIDE_INT val)8943 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8944 {
8945   switch (GET_MODE_SIZE (mode))
8946     {
8947     case 1:
8948       return val >= 0 && val < 32;
8949 
8950     case 2:
8951       return val >= 0 && val < 64 && (val & 1) == 0;
8952 
8953     default:
8954       return (val >= 0
8955 	      && (val + GET_MODE_SIZE (mode)) <= 128
8956 	      && (val & 3) == 0);
8957     }
8958 }
8959 
8960 bool
arm_legitimate_address_p(machine_mode mode,rtx x,bool strict_p)8961 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8962 {
8963   if (TARGET_ARM)
8964     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8965   else if (TARGET_THUMB2)
8966     return thumb2_legitimate_address_p (mode, x, strict_p);
8967   else /* if (TARGET_THUMB1) */
8968     return thumb1_legitimate_address_p (mode, x, strict_p);
8969 }
8970 
8971 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8972 
8973    Given an rtx X being reloaded into a reg required to be
8974    in class CLASS, return the class of reg to actually use.
8975    In general this is just CLASS, but for the Thumb core registers and
8976    immediate constants we prefer a LO_REGS class or a subset.  */
8977 
8978 static reg_class_t
arm_preferred_reload_class(rtx x ATTRIBUTE_UNUSED,reg_class_t rclass)8979 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8980 {
8981   if (TARGET_32BIT)
8982     return rclass;
8983   else
8984     {
8985       if (rclass == GENERAL_REGS)
8986 	return LO_REGS;
8987       else
8988 	return rclass;
8989     }
8990 }
8991 
8992 /* Build the SYMBOL_REF for __tls_get_addr.  */
8993 
8994 static GTY(()) rtx tls_get_addr_libfunc;
8995 
8996 static rtx
get_tls_get_addr(void)8997 get_tls_get_addr (void)
8998 {
8999   if (!tls_get_addr_libfunc)
9000     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9001   return tls_get_addr_libfunc;
9002 }
9003 
9004 rtx
arm_load_tp(rtx target)9005 arm_load_tp (rtx target)
9006 {
9007   if (!target)
9008     target = gen_reg_rtx (SImode);
9009 
9010   if (TARGET_HARD_TP)
9011     {
9012       /* Can return in any reg.  */
9013       emit_insn (gen_load_tp_hard (target));
9014     }
9015   else
9016     {
9017       /* Always returned in r0.  Immediately copy the result into a pseudo,
9018 	 otherwise other uses of r0 (e.g. setting up function arguments) may
9019 	 clobber the value.  */
9020 
9021       rtx tmp;
9022 
9023       if (TARGET_FDPIC)
9024 	{
9025 	  rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9026 	  rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9027 
9028 	  emit_insn (gen_load_tp_soft_fdpic ());
9029 
9030 	  /* Restore r9.  */
9031 	  emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9032 	}
9033       else
9034 	emit_insn (gen_load_tp_soft ());
9035 
9036       tmp = gen_rtx_REG (SImode, R0_REGNUM);
9037       emit_move_insn (target, tmp);
9038     }
9039   return target;
9040 }
9041 
9042 static rtx
load_tls_operand(rtx x,rtx reg)9043 load_tls_operand (rtx x, rtx reg)
9044 {
9045   rtx tmp;
9046 
9047   if (reg == NULL_RTX)
9048     reg = gen_reg_rtx (SImode);
9049 
9050   tmp = gen_rtx_CONST (SImode, x);
9051 
9052   emit_move_insn (reg, tmp);
9053 
9054   return reg;
9055 }
9056 
9057 static rtx_insn *
arm_call_tls_get_addr(rtx x,rtx reg,rtx * valuep,int reloc)9058 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9059 {
9060   rtx label, labelno = NULL_RTX, sum;
9061 
9062   gcc_assert (reloc != TLS_DESCSEQ);
9063   start_sequence ();
9064 
9065   if (TARGET_FDPIC)
9066     {
9067       sum = gen_rtx_UNSPEC (Pmode,
9068 			    gen_rtvec (2, x, GEN_INT (reloc)),
9069 			    UNSPEC_TLS);
9070     }
9071   else
9072     {
9073       labelno = GEN_INT (pic_labelno++);
9074       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9075       label = gen_rtx_CONST (VOIDmode, label);
9076 
9077       sum = gen_rtx_UNSPEC (Pmode,
9078 			    gen_rtvec (4, x, GEN_INT (reloc), label,
9079 				       GEN_INT (TARGET_ARM ? 8 : 4)),
9080 			    UNSPEC_TLS);
9081     }
9082   reg = load_tls_operand (sum, reg);
9083 
9084   if (TARGET_FDPIC)
9085       emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9086   else if (TARGET_ARM)
9087     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9088   else
9089     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9090 
9091   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9092 				     LCT_PURE, /* LCT_CONST?  */
9093 				     Pmode, reg, Pmode);
9094 
9095   rtx_insn *insns = get_insns ();
9096   end_sequence ();
9097 
9098   return insns;
9099 }
9100 
9101 static rtx
arm_tls_descseq_addr(rtx x,rtx reg)9102 arm_tls_descseq_addr (rtx x, rtx reg)
9103 {
9104   rtx labelno = GEN_INT (pic_labelno++);
9105   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9106   rtx sum = gen_rtx_UNSPEC (Pmode,
9107 			    gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9108 				       gen_rtx_CONST (VOIDmode, label),
9109 				       GEN_INT (!TARGET_ARM)),
9110 			    UNSPEC_TLS);
9111   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9112 
9113   emit_insn (gen_tlscall (x, labelno));
9114   if (!reg)
9115     reg = gen_reg_rtx (SImode);
9116   else
9117     gcc_assert (REGNO (reg) != R0_REGNUM);
9118 
9119   emit_move_insn (reg, reg0);
9120 
9121   return reg;
9122 }
9123 
9124 
9125 rtx
legitimize_tls_address(rtx x,rtx reg)9126 legitimize_tls_address (rtx x, rtx reg)
9127 {
9128   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9129   rtx_insn *insns;
9130   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9131 
9132   switch (model)
9133     {
9134     case TLS_MODEL_GLOBAL_DYNAMIC:
9135       if (TARGET_GNU2_TLS)
9136 	{
9137 	  gcc_assert (!TARGET_FDPIC);
9138 
9139 	  reg = arm_tls_descseq_addr (x, reg);
9140 
9141 	  tp = arm_load_tp (NULL_RTX);
9142 
9143 	  dest = gen_rtx_PLUS (Pmode, tp, reg);
9144 	}
9145       else
9146 	{
9147 	  /* Original scheme */
9148 	  if (TARGET_FDPIC)
9149 	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9150 	  else
9151 	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9152 	  dest = gen_reg_rtx (Pmode);
9153 	  emit_libcall_block (insns, dest, ret, x);
9154 	}
9155       return dest;
9156 
9157     case TLS_MODEL_LOCAL_DYNAMIC:
9158       if (TARGET_GNU2_TLS)
9159 	{
9160 	  gcc_assert (!TARGET_FDPIC);
9161 
9162 	  reg = arm_tls_descseq_addr (x, reg);
9163 
9164 	  tp = arm_load_tp (NULL_RTX);
9165 
9166 	  dest = gen_rtx_PLUS (Pmode, tp, reg);
9167 	}
9168       else
9169 	{
9170 	  if (TARGET_FDPIC)
9171 	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9172 	  else
9173 	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9174 
9175 	  /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9176 	     share the LDM result with other LD model accesses.  */
9177 	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9178 				UNSPEC_TLS);
9179 	  dest = gen_reg_rtx (Pmode);
9180 	  emit_libcall_block (insns, dest, ret, eqv);
9181 
9182 	  /* Load the addend.  */
9183 	  addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9184 						     GEN_INT (TLS_LDO32)),
9185 				   UNSPEC_TLS);
9186 	  addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9187 	  dest = gen_rtx_PLUS (Pmode, dest, addend);
9188 	}
9189       return dest;
9190 
9191     case TLS_MODEL_INITIAL_EXEC:
9192       if (TARGET_FDPIC)
9193 	{
9194 	  sum = gen_rtx_UNSPEC (Pmode,
9195 				gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9196 				UNSPEC_TLS);
9197 	  reg = load_tls_operand (sum, reg);
9198 	  emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9199 	  emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9200 	}
9201       else
9202 	{
9203 	  labelno = GEN_INT (pic_labelno++);
9204 	  label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9205 	  label = gen_rtx_CONST (VOIDmode, label);
9206 	  sum = gen_rtx_UNSPEC (Pmode,
9207 				gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9208 					   GEN_INT (TARGET_ARM ? 8 : 4)),
9209 				UNSPEC_TLS);
9210 	  reg = load_tls_operand (sum, reg);
9211 
9212 	  if (TARGET_ARM)
9213 	    emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9214 	  else if (TARGET_THUMB2)
9215 	    emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9216 	  else
9217 	    {
9218 	      emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9219 	      emit_move_insn (reg, gen_const_mem (SImode, reg));
9220 	    }
9221 	}
9222 
9223       tp = arm_load_tp (NULL_RTX);
9224 
9225       return gen_rtx_PLUS (Pmode, tp, reg);
9226 
9227     case TLS_MODEL_LOCAL_EXEC:
9228       tp = arm_load_tp (NULL_RTX);
9229 
9230       reg = gen_rtx_UNSPEC (Pmode,
9231 			    gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9232 			    UNSPEC_TLS);
9233       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9234 
9235       return gen_rtx_PLUS (Pmode, tp, reg);
9236 
9237     default:
9238       abort ();
9239     }
9240 }
9241 
9242 /* Try machine-dependent ways of modifying an illegitimate address
9243    to be legitimate.  If we find one, return the new, valid address.  */
9244 rtx
arm_legitimize_address(rtx x,rtx orig_x,machine_mode mode)9245 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9246 {
9247   if (arm_tls_referenced_p (x))
9248     {
9249       rtx addend = NULL;
9250 
9251       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9252 	{
9253 	  addend = XEXP (XEXP (x, 0), 1);
9254 	  x = XEXP (XEXP (x, 0), 0);
9255 	}
9256 
9257       if (!SYMBOL_REF_P (x))
9258 	return x;
9259 
9260       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9261 
9262       x = legitimize_tls_address (x, NULL_RTX);
9263 
9264       if (addend)
9265 	{
9266 	  x = gen_rtx_PLUS (SImode, x, addend);
9267 	  orig_x = x;
9268 	}
9269       else
9270 	return x;
9271     }
9272 
9273   if (TARGET_THUMB1)
9274     return thumb_legitimize_address (x, orig_x, mode);
9275 
9276   if (GET_CODE (x) == PLUS)
9277     {
9278       rtx xop0 = XEXP (x, 0);
9279       rtx xop1 = XEXP (x, 1);
9280 
9281       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9282 	xop0 = force_reg (SImode, xop0);
9283 
9284       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9285 	  && !symbol_mentioned_p (xop1))
9286 	xop1 = force_reg (SImode, xop1);
9287 
9288       if (ARM_BASE_REGISTER_RTX_P (xop0)
9289 	  && CONST_INT_P (xop1))
9290 	{
9291 	  HOST_WIDE_INT n, low_n;
9292 	  rtx base_reg, val;
9293 	  n = INTVAL (xop1);
9294 
9295 	  /* VFP addressing modes actually allow greater offsets, but for
9296 	     now we just stick with the lowest common denominator.  */
9297 	  if (mode == DImode || mode == DFmode)
9298 	    {
9299 	      low_n = n & 0x0f;
9300 	      n &= ~0x0f;
9301 	      if (low_n > 4)
9302 		{
9303 		  n += 16;
9304 		  low_n -= 16;
9305 		}
9306 	    }
9307 	  else
9308 	    {
9309 	      low_n = ((mode) == TImode ? 0
9310 		       : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9311 	      n -= low_n;
9312 	    }
9313 
9314 	  base_reg = gen_reg_rtx (SImode);
9315 	  val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9316 	  emit_move_insn (base_reg, val);
9317 	  x = plus_constant (Pmode, base_reg, low_n);
9318 	}
9319       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9320 	x = gen_rtx_PLUS (SImode, xop0, xop1);
9321     }
9322 
9323   /* XXX We don't allow MINUS any more -- see comment in
9324      arm_legitimate_address_outer_p ().  */
9325   else if (GET_CODE (x) == MINUS)
9326     {
9327       rtx xop0 = XEXP (x, 0);
9328       rtx xop1 = XEXP (x, 1);
9329 
9330       if (CONSTANT_P (xop0))
9331 	xop0 = force_reg (SImode, xop0);
9332 
9333       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9334 	xop1 = force_reg (SImode, xop1);
9335 
9336       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9337 	x = gen_rtx_MINUS (SImode, xop0, xop1);
9338     }
9339 
9340   /* Make sure to take full advantage of the pre-indexed addressing mode
9341      with absolute addresses which often allows for the base register to
9342      be factorized for multiple adjacent memory references, and it might
9343      even allows for the mini pool to be avoided entirely. */
9344   else if (CONST_INT_P (x) && optimize > 0)
9345     {
9346       unsigned int bits;
9347       HOST_WIDE_INT mask, base, index;
9348       rtx base_reg;
9349 
9350       /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9351 	 only use a 8-bit index. So let's use a 12-bit index for
9352 	 SImode only and hope that arm_gen_constant will enable LDRB
9353 	 to use more bits. */
9354       bits = (mode == SImode) ? 12 : 8;
9355       mask = (1 << bits) - 1;
9356       base = INTVAL (x) & ~mask;
9357       index = INTVAL (x) & mask;
9358       if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9359 	{
9360 	  /* It'll most probably be more efficient to generate the
9361 	     base with more bits set and use a negative index instead.
9362 	     Don't do this for Thumb as negative offsets are much more
9363 	     limited.  */
9364 	  base |= mask;
9365 	  index -= mask;
9366 	}
9367       base_reg = force_reg (SImode, GEN_INT (base));
9368       x = plus_constant (Pmode, base_reg, index);
9369     }
9370 
9371   if (flag_pic)
9372     {
9373       /* We need to find and carefully transform any SYMBOL and LABEL
9374 	 references; so go back to the original address expression.  */
9375       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9376 					  false /*compute_now*/);
9377 
9378       if (new_x != orig_x)
9379 	x = new_x;
9380     }
9381 
9382   return x;
9383 }
9384 
9385 
9386 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9387    to be legitimate.  If we find one, return the new, valid address.  */
9388 rtx
thumb_legitimize_address(rtx x,rtx orig_x,machine_mode mode)9389 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9390 {
9391   if (GET_CODE (x) == PLUS
9392       && CONST_INT_P (XEXP (x, 1))
9393       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9394 	  || INTVAL (XEXP (x, 1)) < 0))
9395     {
9396       rtx xop0 = XEXP (x, 0);
9397       rtx xop1 = XEXP (x, 1);
9398       HOST_WIDE_INT offset = INTVAL (xop1);
9399 
9400       /* Try and fold the offset into a biasing of the base register and
9401 	 then offsetting that.  Don't do this when optimizing for space
9402 	 since it can cause too many CSEs.  */
9403       if (optimize_size && offset >= 0
9404 	  && offset < 256 + 31 * GET_MODE_SIZE (mode))
9405 	{
9406 	  HOST_WIDE_INT delta;
9407 
9408 	  if (offset >= 256)
9409 	    delta = offset - (256 - GET_MODE_SIZE (mode));
9410 	  else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9411 	    delta = 31 * GET_MODE_SIZE (mode);
9412 	  else
9413 	    delta = offset & (~31 * GET_MODE_SIZE (mode));
9414 
9415 	  xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9416 				NULL_RTX);
9417 	  x = plus_constant (Pmode, xop0, delta);
9418 	}
9419       else if (offset < 0 && offset > -256)
9420 	/* Small negative offsets are best done with a subtract before the
9421 	   dereference, forcing these into a register normally takes two
9422 	   instructions.  */
9423 	x = force_operand (x, NULL_RTX);
9424       else
9425 	{
9426 	  /* For the remaining cases, force the constant into a register.  */
9427 	  xop1 = force_reg (SImode, xop1);
9428 	  x = gen_rtx_PLUS (SImode, xop0, xop1);
9429 	}
9430     }
9431   else if (GET_CODE (x) == PLUS
9432 	   && s_register_operand (XEXP (x, 1), SImode)
9433 	   && !s_register_operand (XEXP (x, 0), SImode))
9434     {
9435       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9436 
9437       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9438     }
9439 
9440   if (flag_pic)
9441     {
9442       /* We need to find and carefully transform any SYMBOL and LABEL
9443 	 references; so go back to the original address expression.  */
9444       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9445 					  false /*compute_now*/);
9446 
9447       if (new_x != orig_x)
9448 	x = new_x;
9449     }
9450 
9451   return x;
9452 }
9453 
9454 /* Return TRUE if X contains any TLS symbol references.  */
9455 
9456 bool
arm_tls_referenced_p(rtx x)9457 arm_tls_referenced_p (rtx x)
9458 {
9459   if (! TARGET_HAVE_TLS)
9460     return false;
9461 
9462   subrtx_iterator::array_type array;
9463   FOR_EACH_SUBRTX (iter, array, x, ALL)
9464     {
9465       const_rtx x = *iter;
9466       if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9467 	{
9468 	  /* ARM currently does not provide relocations to encode TLS variables
9469 	     into AArch32 instructions, only data, so there is no way to
9470 	     currently implement these if a literal pool is disabled.  */
9471 	  if (arm_disable_literal_pool)
9472 	    sorry ("accessing thread-local storage is not currently supported "
9473 		   "with %<-mpure-code%> or %<-mslow-flash-data%>");
9474 
9475 	  return true;
9476 	}
9477 
9478       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9479 	 TLS offsets, not real symbol references.  */
9480       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9481 	iter.skip_subrtxes ();
9482     }
9483   return false;
9484 }
9485 
9486 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9487 
9488    On the ARM, allow any integer (invalid ones are removed later by insn
9489    patterns), nice doubles and symbol_refs which refer to the function's
9490    constant pool XXX.
9491 
9492    When generating pic allow anything.  */
9493 
9494 static bool
arm_legitimate_constant_p_1(machine_mode,rtx x)9495 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9496 {
9497   if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9498     return false;
9499 
9500   return flag_pic || !label_mentioned_p (x);
9501 }
9502 
9503 static bool
thumb_legitimate_constant_p(machine_mode mode ATTRIBUTE_UNUSED,rtx x)9504 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9505 {
9506   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9507      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
9508      for ARMv8-M Baseline or later the result is valid.  */
9509   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9510     x = XEXP (x, 0);
9511 
9512   return (CONST_INT_P (x)
9513 	  || CONST_DOUBLE_P (x)
9514 	  || CONSTANT_ADDRESS_P (x)
9515 	  || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9516 	  /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9517 	     we build the symbol address with upper/lower
9518 	     relocations.  */
9519 	  || (TARGET_THUMB1
9520 	      && !label_mentioned_p (x)
9521 	      && arm_valid_symbolic_address_p (x)
9522 	      && arm_disable_literal_pool)
9523 	  || flag_pic);
9524 }
9525 
9526 static bool
arm_legitimate_constant_p(machine_mode mode,rtx x)9527 arm_legitimate_constant_p (machine_mode mode, rtx x)
9528 {
9529   return (!arm_cannot_force_const_mem (mode, x)
9530 	  && (TARGET_32BIT
9531 	      ? arm_legitimate_constant_p_1 (mode, x)
9532 	      : thumb_legitimate_constant_p (mode, x)));
9533 }
9534 
9535 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9536 
9537 static bool
arm_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)9538 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9539 {
9540   rtx base, offset;
9541   split_const (x, &base, &offset);
9542 
9543   if (SYMBOL_REF_P (base))
9544     {
9545       /* Function symbols cannot have an offset due to the Thumb bit.  */
9546       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9547 	  && INTVAL (offset) != 0)
9548 	return true;
9549 
9550       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9551 	  && !offset_within_block_p (base, INTVAL (offset)))
9552 	return true;
9553     }
9554   return arm_tls_referenced_p (x);
9555 }
9556 
9557 #define REG_OR_SUBREG_REG(X)						\
9558   (REG_P (X)							\
9559    || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9560 
9561 #define REG_OR_SUBREG_RTX(X)			\
9562    (REG_P (X) ? (X) : SUBREG_REG (X))
9563 
9564 static inline int
thumb1_rtx_costs(rtx x,enum rtx_code code,enum rtx_code outer)9565 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9566 {
9567   machine_mode mode = GET_MODE (x);
9568   int total, words;
9569 
9570   switch (code)
9571     {
9572     case ASHIFT:
9573     case ASHIFTRT:
9574     case LSHIFTRT:
9575     case ROTATERT:
9576       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9577 
9578     case PLUS:
9579     case MINUS:
9580     case COMPARE:
9581     case NEG:
9582     case NOT:
9583       return COSTS_N_INSNS (1);
9584 
9585     case MULT:
9586       if (arm_arch6m && arm_m_profile_small_mul)
9587 	return COSTS_N_INSNS (32);
9588 
9589       if (CONST_INT_P (XEXP (x, 1)))
9590 	{
9591 	  int cycles = 0;
9592 	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9593 
9594 	  while (i)
9595 	    {
9596 	      i >>= 2;
9597 	      cycles++;
9598 	    }
9599 	  return COSTS_N_INSNS (2) + cycles;
9600 	}
9601       return COSTS_N_INSNS (1) + 16;
9602 
9603     case SET:
9604       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9605 	 the mode.  */
9606       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9607       return (COSTS_N_INSNS (words)
9608 	      + 4 * ((MEM_P (SET_SRC (x)))
9609 		     + MEM_P (SET_DEST (x))));
9610 
9611     case CONST_INT:
9612       if (outer == SET)
9613 	{
9614 	  if (UINTVAL (x) < 256
9615 	      /* 16-bit constant.  */
9616 	      || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9617 	    return 0;
9618 	  if (thumb_shiftable_const (INTVAL (x)))
9619 	    return COSTS_N_INSNS (2);
9620 	  return arm_disable_literal_pool
9621 	    ? COSTS_N_INSNS (8)
9622 	    : COSTS_N_INSNS (3);
9623 	}
9624       else if ((outer == PLUS || outer == COMPARE)
9625 	       && INTVAL (x) < 256 && INTVAL (x) > -256)
9626 	return 0;
9627       else if ((outer == IOR || outer == XOR || outer == AND)
9628 	       && INTVAL (x) < 256 && INTVAL (x) >= -256)
9629 	return COSTS_N_INSNS (1);
9630       else if (outer == AND)
9631 	{
9632 	  int i;
9633 	  /* This duplicates the tests in the andsi3 expander.  */
9634 	  for (i = 9; i <= 31; i++)
9635 	    if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9636 		|| (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9637 	      return COSTS_N_INSNS (2);
9638 	}
9639       else if (outer == ASHIFT || outer == ASHIFTRT
9640 	       || outer == LSHIFTRT)
9641 	return 0;
9642       return COSTS_N_INSNS (2);
9643 
9644     case CONST:
9645     case CONST_DOUBLE:
9646     case LABEL_REF:
9647     case SYMBOL_REF:
9648       return COSTS_N_INSNS (3);
9649 
9650     case UDIV:
9651     case UMOD:
9652     case DIV:
9653     case MOD:
9654       return 100;
9655 
9656     case TRUNCATE:
9657       return 99;
9658 
9659     case AND:
9660     case XOR:
9661     case IOR:
9662       /* XXX guess.  */
9663       return 8;
9664 
9665     case MEM:
9666       /* XXX another guess.  */
9667       /* Memory costs quite a lot for the first word, but subsequent words
9668 	 load at the equivalent of a single insn each.  */
9669       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9670 	      + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9671 		 ? 4 : 0));
9672 
9673     case IF_THEN_ELSE:
9674       /* XXX a guess.  */
9675       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9676 	return 14;
9677       return 2;
9678 
9679     case SIGN_EXTEND:
9680     case ZERO_EXTEND:
9681       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9682       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9683 
9684       if (mode == SImode)
9685 	return total;
9686 
9687       if (arm_arch6)
9688 	return total + COSTS_N_INSNS (1);
9689 
9690       /* Assume a two-shift sequence.  Increase the cost slightly so
9691 	 we prefer actual shifts over an extend operation.  */
9692       return total + 1 + COSTS_N_INSNS (2);
9693 
9694     default:
9695       return 99;
9696     }
9697 }
9698 
9699 /* Estimates the size cost of thumb1 instructions.
9700    For now most of the code is copied from thumb1_rtx_costs. We need more
9701    fine grain tuning when we have more related test cases.  */
9702 static inline int
thumb1_size_rtx_costs(rtx x,enum rtx_code code,enum rtx_code outer)9703 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9704 {
9705   machine_mode mode = GET_MODE (x);
9706   int words, cost;
9707 
9708   switch (code)
9709     {
9710     case ASHIFT:
9711     case ASHIFTRT:
9712     case LSHIFTRT:
9713     case ROTATERT:
9714       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9715 
9716     case PLUS:
9717     case MINUS:
9718       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9719 	 defined by RTL expansion, especially for the expansion of
9720 	 multiplication.  */
9721       if ((GET_CODE (XEXP (x, 0)) == MULT
9722 	   && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9723 	  || (GET_CODE (XEXP (x, 1)) == MULT
9724 	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9725 	return COSTS_N_INSNS (2);
9726       /* Fall through.  */
9727     case COMPARE:
9728     case NEG:
9729     case NOT:
9730       return COSTS_N_INSNS (1);
9731 
9732     case MULT:
9733       if (CONST_INT_P (XEXP (x, 1)))
9734         {
9735           /* Thumb1 mul instruction can't operate on const. We must Load it
9736              into a register first.  */
9737           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9738 	  /* For the targets which have a very small and high-latency multiply
9739 	     unit, we prefer to synthesize the mult with up to 5 instructions,
9740 	     giving a good balance between size and performance.  */
9741 	  if (arm_arch6m && arm_m_profile_small_mul)
9742 	    return COSTS_N_INSNS (5);
9743 	  else
9744 	    return COSTS_N_INSNS (1) + const_size;
9745         }
9746       return COSTS_N_INSNS (1);
9747 
9748     case SET:
9749       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9750 	 the mode.  */
9751       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9752       cost = COSTS_N_INSNS (words);
9753       if (satisfies_constraint_J (SET_SRC (x))
9754 	  || satisfies_constraint_K (SET_SRC (x))
9755 	     /* Too big an immediate for a 2-byte mov, using MOVT.  */
9756 	  || (CONST_INT_P (SET_SRC (x))
9757 	      && UINTVAL (SET_SRC (x)) >= 256
9758 	      && TARGET_HAVE_MOVT
9759 	      && satisfies_constraint_j (SET_SRC (x)))
9760 	     /* thumb1_movdi_insn.  */
9761 	  || ((words > 1) && MEM_P (SET_SRC (x))))
9762 	cost += COSTS_N_INSNS (1);
9763       return cost;
9764 
9765     case CONST_INT:
9766       if (outer == SET)
9767         {
9768           if (UINTVAL (x) < 256)
9769             return COSTS_N_INSNS (1);
9770 	  /* movw is 4byte long.  */
9771 	  if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9772 	    return COSTS_N_INSNS (2);
9773 	  /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9774 	  if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9775             return COSTS_N_INSNS (2);
9776 	  /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9777           if (thumb_shiftable_const (INTVAL (x)))
9778             return COSTS_N_INSNS (2);
9779 	  return arm_disable_literal_pool
9780 	    ? COSTS_N_INSNS (8)
9781 	    : COSTS_N_INSNS (3);
9782         }
9783       else if ((outer == PLUS || outer == COMPARE)
9784                && INTVAL (x) < 256 && INTVAL (x) > -256)
9785         return 0;
9786       else if ((outer == IOR || outer == XOR || outer == AND)
9787                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9788         return COSTS_N_INSNS (1);
9789       else if (outer == AND)
9790         {
9791           int i;
9792           /* This duplicates the tests in the andsi3 expander.  */
9793           for (i = 9; i <= 31; i++)
9794             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9795                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9796               return COSTS_N_INSNS (2);
9797         }
9798       else if (outer == ASHIFT || outer == ASHIFTRT
9799                || outer == LSHIFTRT)
9800         return 0;
9801       return COSTS_N_INSNS (2);
9802 
9803     case CONST:
9804     case CONST_DOUBLE:
9805     case LABEL_REF:
9806     case SYMBOL_REF:
9807       return COSTS_N_INSNS (3);
9808 
9809     case UDIV:
9810     case UMOD:
9811     case DIV:
9812     case MOD:
9813       return 100;
9814 
9815     case TRUNCATE:
9816       return 99;
9817 
9818     case AND:
9819     case XOR:
9820     case IOR:
9821       return COSTS_N_INSNS (1);
9822 
9823     case MEM:
9824       return (COSTS_N_INSNS (1)
9825 	      + COSTS_N_INSNS (1)
9826 		* ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9827               + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9828                  ? COSTS_N_INSNS (1) : 0));
9829 
9830     case IF_THEN_ELSE:
9831       /* XXX a guess.  */
9832       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9833         return 14;
9834       return 2;
9835 
9836     case ZERO_EXTEND:
9837       /* XXX still guessing.  */
9838       switch (GET_MODE (XEXP (x, 0)))
9839         {
9840           case E_QImode:
9841             return (1 + (mode == DImode ? 4 : 0)
9842                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9843 
9844           case E_HImode:
9845             return (4 + (mode == DImode ? 4 : 0)
9846                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9847 
9848           case E_SImode:
9849             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9850 
9851           default:
9852             return 99;
9853         }
9854 
9855     default:
9856       return 99;
9857     }
9858 }
9859 
9860 /* Helper function for arm_rtx_costs.  If one operand of the OP, a
9861    PLUS, adds the carry flag, then return the other operand.  If
9862    neither is a carry, return OP unchanged.  */
9863 static rtx
strip_carry_operation(rtx op)9864 strip_carry_operation (rtx op)
9865 {
9866   gcc_assert (GET_CODE (op) == PLUS);
9867   if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
9868     return XEXP (op, 1);
9869   else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
9870     return XEXP (op, 0);
9871   return op;
9872 }
9873 
9874 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9875    operand, then return the operand that is being shifted.  If the shift
9876    is not by a constant, then set SHIFT_REG to point to the operand.
9877    Return NULL if OP is not a shifter operand.  */
9878 static rtx
shifter_op_p(rtx op,rtx * shift_reg)9879 shifter_op_p (rtx op, rtx *shift_reg)
9880 {
9881   enum rtx_code code = GET_CODE (op);
9882 
9883   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9884       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9885     return XEXP (op, 0);
9886   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9887     return XEXP (op, 0);
9888   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9889 	   || code == ASHIFTRT)
9890     {
9891       if (!CONST_INT_P (XEXP (op, 1)))
9892 	*shift_reg = XEXP (op, 1);
9893       return XEXP (op, 0);
9894     }
9895 
9896   return NULL;
9897 }
9898 
9899 static bool
arm_unspec_cost(rtx x,enum rtx_code,bool speed_p,int * cost)9900 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9901 {
9902   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9903   rtx_code code = GET_CODE (x);
9904   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9905 
9906   switch (XINT (x, 1))
9907     {
9908     case UNSPEC_UNALIGNED_LOAD:
9909       /* We can only do unaligned loads into the integer unit, and we can't
9910 	 use LDM or LDRD.  */
9911       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9912       if (speed_p)
9913 	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9914 		  + extra_cost->ldst.load_unaligned);
9915 
9916 #ifdef NOT_YET
9917       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9918 				 ADDR_SPACE_GENERIC, speed_p);
9919 #endif
9920       return true;
9921 
9922     case UNSPEC_UNALIGNED_STORE:
9923       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9924       if (speed_p)
9925 	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9926 		  + extra_cost->ldst.store_unaligned);
9927 
9928       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9929 #ifdef NOT_YET
9930       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9931 				 ADDR_SPACE_GENERIC, speed_p);
9932 #endif
9933       return true;
9934 
9935     case UNSPEC_VRINTZ:
9936     case UNSPEC_VRINTP:
9937     case UNSPEC_VRINTM:
9938     case UNSPEC_VRINTR:
9939     case UNSPEC_VRINTX:
9940     case UNSPEC_VRINTA:
9941       if (speed_p)
9942         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9943 
9944       return true;
9945     default:
9946       *cost = COSTS_N_INSNS (2);
9947       break;
9948     }
9949   return true;
9950 }
9951 
9952 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9953    call (one insn for -Os) and then one for processing the result.  */
9954 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9955 
9956 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)				\
9957 	do								\
9958 	  {								\
9959 	    shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);	\
9960 	    if (shift_op != NULL					\
9961 	        && arm_rtx_shift_left_p (XEXP (x, IDX)))		\
9962 	      {								\
9963 	        if (shift_reg)						\
9964 		  {							\
9965 		    if (speed_p)					\
9966 		      *cost += extra_cost->alu.arith_shift_reg;		\
9967 		    *cost += rtx_cost (shift_reg, GET_MODE (shift_reg),	\
9968 				       ASHIFT, 1, speed_p);		\
9969 		  }							\
9970 	        else if (speed_p)					\
9971 		  *cost += extra_cost->alu.arith_shift;			\
9972 									\
9973 		*cost += (rtx_cost (shift_op, GET_MODE (shift_op),	\
9974 				    ASHIFT, 0, speed_p)			\
9975 			  + rtx_cost (XEXP (x, 1 - IDX),		\
9976 				      GET_MODE (shift_op),		\
9977 			              OP, 1, speed_p));			\
9978 	        return true;						\
9979 	      }								\
9980 	  }								\
9981 	while (0)
9982 
9983 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
9984    considering the costs of the addressing mode and memory access
9985    separately.  */
9986 static bool
arm_mem_costs(rtx x,const struct cpu_cost_table * extra_cost,int * cost,bool speed_p)9987 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9988 	       int *cost, bool speed_p)
9989 {
9990   machine_mode mode = GET_MODE (x);
9991 
9992   *cost = COSTS_N_INSNS (1);
9993 
9994   if (flag_pic
9995       && GET_CODE (XEXP (x, 0)) == PLUS
9996       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9997     /* This will be split into two instructions.  Add the cost of the
9998        additional instruction here.  The cost of the memory access is computed
9999        below.  See arm.md:calculate_pic_address.  */
10000     *cost += COSTS_N_INSNS (1);
10001 
10002   /* Calculate cost of the addressing mode.  */
10003   if (speed_p)
10004     {
10005       arm_addr_mode_op op_type;
10006       switch (GET_CODE (XEXP (x, 0)))
10007 	{
10008 	default:
10009 	case REG:
10010 	  op_type = AMO_DEFAULT;
10011 	  break;
10012 	case MINUS:
10013 	  /* MINUS does not appear in RTL, but the architecture supports it,
10014 	     so handle this case defensively.  */
10015 	  /* fall through */
10016 	case PLUS:
10017 	  op_type = AMO_NO_WB;
10018 	  break;
10019 	case PRE_INC:
10020 	case PRE_DEC:
10021 	case POST_INC:
10022 	case POST_DEC:
10023 	case PRE_MODIFY:
10024 	case POST_MODIFY:
10025 	  op_type = AMO_WB;
10026 	  break;
10027 	}
10028 
10029       if (VECTOR_MODE_P (mode))
10030 	  *cost += current_tune->addr_mode_costs->vector[op_type];
10031       else if (FLOAT_MODE_P (mode))
10032 	  *cost += current_tune->addr_mode_costs->fp[op_type];
10033       else
10034 	  *cost += current_tune->addr_mode_costs->integer[op_type];
10035     }
10036 
10037   /* Calculate cost of memory access.  */
10038   if (speed_p)
10039     {
10040       if (FLOAT_MODE_P (mode))
10041 	{
10042 	  if (GET_MODE_SIZE (mode) == 8)
10043 	    *cost += extra_cost->ldst.loadd;
10044 	  else
10045 	    *cost += extra_cost->ldst.loadf;
10046 	}
10047       else if (VECTOR_MODE_P (mode))
10048 	*cost += extra_cost->ldst.loadv;
10049       else
10050 	{
10051 	  /* Integer modes */
10052 	  if (GET_MODE_SIZE (mode) == 8)
10053 	    *cost += extra_cost->ldst.ldrd;
10054 	  else
10055 	    *cost += extra_cost->ldst.load;
10056 	}
10057     }
10058 
10059   return true;
10060 }
10061 
10062 /* RTX costs.  Make an estimate of the cost of executing the operation
10063    X, which is contained within an operation with code OUTER_CODE.
10064    SPEED_P indicates whether the cost desired is the performance cost,
10065    or the size cost.  The estimate is stored in COST and the return
10066    value is TRUE if the cost calculation is final, or FALSE if the
10067    caller should recurse through the operands of X to add additional
10068    costs.
10069 
10070    We currently make no attempt to model the size savings of Thumb-2
10071    16-bit instructions.  At the normal points in compilation where
10072    this code is called we have no measure of whether the condition
10073    flags are live or not, and thus no realistic way to determine what
10074    the size will eventually be.  */
10075 static bool
arm_rtx_costs_internal(rtx x,enum rtx_code code,enum rtx_code outer_code,const struct cpu_cost_table * extra_cost,int * cost,bool speed_p)10076 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10077 		   const struct cpu_cost_table *extra_cost,
10078 		   int *cost, bool speed_p)
10079 {
10080   machine_mode mode = GET_MODE (x);
10081 
10082   *cost = COSTS_N_INSNS (1);
10083 
10084   if (TARGET_THUMB1)
10085     {
10086       if (speed_p)
10087 	*cost = thumb1_rtx_costs (x, code, outer_code);
10088       else
10089 	*cost = thumb1_size_rtx_costs (x, code, outer_code);
10090       return true;
10091     }
10092 
10093   switch (code)
10094     {
10095     case SET:
10096       *cost = 0;
10097       /* SET RTXs don't have a mode so we get it from the destination.  */
10098       mode = GET_MODE (SET_DEST (x));
10099 
10100       if (REG_P (SET_SRC (x))
10101 	  && REG_P (SET_DEST (x)))
10102 	{
10103 	  /* Assume that most copies can be done with a single insn,
10104 	     unless we don't have HW FP, in which case everything
10105 	     larger than word mode will require two insns.  */
10106 	  *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10107 				   && GET_MODE_SIZE (mode) > 4)
10108 				  || mode == DImode)
10109 				 ? 2 : 1);
10110 	  /* Conditional register moves can be encoded
10111 	     in 16 bits in Thumb mode.  */
10112 	  if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10113 	    *cost >>= 1;
10114 
10115 	  return true;
10116 	}
10117 
10118       if (CONST_INT_P (SET_SRC (x)))
10119 	{
10120 	  /* Handle CONST_INT here, since the value doesn't have a mode
10121 	     and we would otherwise be unable to work out the true cost.  */
10122 	  *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10123 			    0, speed_p);
10124 	  outer_code = SET;
10125 	  /* Slightly lower the cost of setting a core reg to a constant.
10126 	     This helps break up chains and allows for better scheduling.  */
10127 	  if (REG_P (SET_DEST (x))
10128 	      && REGNO (SET_DEST (x)) <= LR_REGNUM)
10129 	    *cost -= 1;
10130 	  x = SET_SRC (x);
10131 	  /* Immediate moves with an immediate in the range [0, 255] can be
10132 	     encoded in 16 bits in Thumb mode.  */
10133 	  if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10134 	      && INTVAL (x) >= 0 && INTVAL (x) <=255)
10135 	    *cost >>= 1;
10136 	  goto const_int_cost;
10137 	}
10138 
10139       return false;
10140 
10141     case MEM:
10142       return arm_mem_costs (x, extra_cost, cost, speed_p);
10143 
10144     case PARALLEL:
10145     {
10146    /* Calculations of LDM costs are complex.  We assume an initial cost
10147    (ldm_1st) which will load the number of registers mentioned in
10148    ldm_regs_per_insn_1st registers; then each additional
10149    ldm_regs_per_insn_subsequent registers cost one more insn.  The
10150    formula for N regs is thus:
10151 
10152    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10153 			     + ldm_regs_per_insn_subsequent - 1)
10154 			    / ldm_regs_per_insn_subsequent).
10155 
10156    Additional costs may also be added for addressing.  A similar
10157    formula is used for STM.  */
10158 
10159       bool is_ldm = load_multiple_operation (x, SImode);
10160       bool is_stm = store_multiple_operation (x, SImode);
10161 
10162       if (is_ldm || is_stm)
10163         {
10164 	  if (speed_p)
10165 	    {
10166 	      HOST_WIDE_INT nregs = XVECLEN (x, 0);
10167 	      HOST_WIDE_INT regs_per_insn_1st = is_ldm
10168 	                              ? extra_cost->ldst.ldm_regs_per_insn_1st
10169 	                              : extra_cost->ldst.stm_regs_per_insn_1st;
10170 	      HOST_WIDE_INT regs_per_insn_sub = is_ldm
10171 	                       ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10172 	                       : extra_cost->ldst.stm_regs_per_insn_subsequent;
10173 
10174 	      *cost += regs_per_insn_1st
10175 	               + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10176 					    + regs_per_insn_sub - 1)
10177 					  / regs_per_insn_sub);
10178 	      return true;
10179 	    }
10180 
10181         }
10182       return false;
10183     }
10184     case DIV:
10185     case UDIV:
10186       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10187 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10188 	*cost += COSTS_N_INSNS (speed_p
10189 			       ? extra_cost->fp[mode != SFmode].div : 0);
10190       else if (mode == SImode && TARGET_IDIV)
10191 	*cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10192       else
10193 	*cost = LIBCALL_COST (2);
10194 
10195       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10196 	 possible udiv is prefered.  */
10197       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10198       return false;	/* All arguments must be in registers.  */
10199 
10200     case MOD:
10201       /* MOD by a power of 2 can be expanded as:
10202 	 rsbs    r1, r0, #0
10203 	 and     r0, r0, #(n - 1)
10204 	 and     r1, r1, #(n - 1)
10205 	 rsbpl   r0, r1, #0.  */
10206       if (CONST_INT_P (XEXP (x, 1))
10207 	  && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10208 	  && mode == SImode)
10209 	{
10210 	  *cost += COSTS_N_INSNS (3);
10211 
10212 	  if (speed_p)
10213 	    *cost += 2 * extra_cost->alu.logical
10214 		     + extra_cost->alu.arith;
10215 	  return true;
10216 	}
10217 
10218     /* Fall-through.  */
10219     case UMOD:
10220       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10221 	 possible udiv is prefered.  */
10222       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10223       return false;	/* All arguments must be in registers.  */
10224 
10225     case ROTATE:
10226       if (mode == SImode && REG_P (XEXP (x, 1)))
10227 	{
10228 	  *cost += (COSTS_N_INSNS (1)
10229 		   + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10230 	  if (speed_p)
10231 	    *cost += extra_cost->alu.shift_reg;
10232 	  return true;
10233 	}
10234       /* Fall through */
10235     case ROTATERT:
10236     case ASHIFT:
10237     case LSHIFTRT:
10238     case ASHIFTRT:
10239       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10240 	{
10241 	  *cost += (COSTS_N_INSNS (2)
10242 		   + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10243 	  if (speed_p)
10244 	    *cost += 2 * extra_cost->alu.shift;
10245 	  /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
10246 	  if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10247 	    *cost += 1;
10248 	  return true;
10249 	}
10250       else if (mode == SImode)
10251 	{
10252 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10253 	  /* Slightly disparage register shifts at -Os, but not by much.  */
10254 	  if (!CONST_INT_P (XEXP (x, 1)))
10255 	    *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10256 		      + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10257 	  return true;
10258 	}
10259       else if (GET_MODE_CLASS (mode) == MODE_INT
10260 	       && GET_MODE_SIZE (mode) < 4)
10261 	{
10262 	  if (code == ASHIFT)
10263 	    {
10264 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10265 	      /* Slightly disparage register shifts at -Os, but not by
10266 	         much.  */
10267 	      if (!CONST_INT_P (XEXP (x, 1)))
10268 		*cost += (speed_p ? extra_cost->alu.shift_reg : 1
10269 			  + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10270 	    }
10271 	  else if (code == LSHIFTRT || code == ASHIFTRT)
10272 	    {
10273 	      if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10274 		{
10275 		  /* Can use SBFX/UBFX.  */
10276 		  if (speed_p)
10277 		    *cost += extra_cost->alu.bfx;
10278 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10279 		}
10280 	      else
10281 		{
10282 		  *cost += COSTS_N_INSNS (1);
10283 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10284 		  if (speed_p)
10285 		    {
10286 		      if (CONST_INT_P (XEXP (x, 1)))
10287 			*cost += 2 * extra_cost->alu.shift;
10288 		      else
10289 			*cost += (extra_cost->alu.shift
10290 				  + extra_cost->alu.shift_reg);
10291 		    }
10292 		  else
10293 		    /* Slightly disparage register shifts.  */
10294 		    *cost += !CONST_INT_P (XEXP (x, 1));
10295 		}
10296 	    }
10297 	  else /* Rotates.  */
10298 	    {
10299 	      *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10300 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10301 	      if (speed_p)
10302 		{
10303 		  if (CONST_INT_P (XEXP (x, 1)))
10304 		    *cost += (2 * extra_cost->alu.shift
10305 			      + extra_cost->alu.log_shift);
10306 		  else
10307 		    *cost += (extra_cost->alu.shift
10308 			      + extra_cost->alu.shift_reg
10309 			      + extra_cost->alu.log_shift_reg);
10310 		}
10311 	    }
10312 	  return true;
10313 	}
10314 
10315       *cost = LIBCALL_COST (2);
10316       return false;
10317 
10318     case BSWAP:
10319       if (arm_arch6)
10320         {
10321           if (mode == SImode)
10322             {
10323               if (speed_p)
10324                 *cost += extra_cost->alu.rev;
10325 
10326               return false;
10327             }
10328         }
10329       else
10330         {
10331         /* No rev instruction available.  Look at arm_legacy_rev
10332            and thumb_legacy_rev for the form of RTL used then.  */
10333           if (TARGET_THUMB)
10334             {
10335               *cost += COSTS_N_INSNS (9);
10336 
10337               if (speed_p)
10338                 {
10339                   *cost += 6 * extra_cost->alu.shift;
10340                   *cost += 3 * extra_cost->alu.logical;
10341                 }
10342             }
10343           else
10344             {
10345               *cost += COSTS_N_INSNS (4);
10346 
10347               if (speed_p)
10348                 {
10349                   *cost += 2 * extra_cost->alu.shift;
10350                   *cost += extra_cost->alu.arith_shift;
10351                   *cost += 2 * extra_cost->alu.logical;
10352                 }
10353             }
10354           return true;
10355         }
10356       return false;
10357 
10358     case MINUS:
10359       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10360 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10361 	{
10362 	  if (GET_CODE (XEXP (x, 0)) == MULT
10363 	      || GET_CODE (XEXP (x, 1)) == MULT)
10364 	    {
10365 	      rtx mul_op0, mul_op1, sub_op;
10366 
10367 	      if (speed_p)
10368 		*cost += extra_cost->fp[mode != SFmode].mult_addsub;
10369 
10370 	      if (GET_CODE (XEXP (x, 0)) == MULT)
10371 		{
10372 		  mul_op0 = XEXP (XEXP (x, 0), 0);
10373 		  mul_op1 = XEXP (XEXP (x, 0), 1);
10374 		  sub_op = XEXP (x, 1);
10375 		}
10376 	      else
10377 		{
10378 		  mul_op0 = XEXP (XEXP (x, 1), 0);
10379 		  mul_op1 = XEXP (XEXP (x, 1), 1);
10380 		  sub_op = XEXP (x, 0);
10381 		}
10382 
10383 	      /* The first operand of the multiply may be optionally
10384 		 negated.  */
10385 	      if (GET_CODE (mul_op0) == NEG)
10386 		mul_op0 = XEXP (mul_op0, 0);
10387 
10388 	      *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10389 			+ rtx_cost (mul_op1, mode, code, 0, speed_p)
10390 			+ rtx_cost (sub_op, mode, code, 0, speed_p));
10391 
10392 	      return true;
10393 	    }
10394 
10395 	  if (speed_p)
10396 	    *cost += extra_cost->fp[mode != SFmode].addsub;
10397 	  return false;
10398 	}
10399 
10400       if (mode == SImode)
10401 	{
10402 	  rtx shift_by_reg = NULL;
10403 	  rtx shift_op;
10404 	  rtx non_shift_op;
10405 	  rtx op0 = XEXP (x, 0);
10406 	  rtx op1 = XEXP (x, 1);
10407 
10408 	  /* Factor out any borrow operation.  There's more than one way
10409 	     of expressing this; try to recognize them all.  */
10410 	  if (GET_CODE (op0) == MINUS)
10411 	    {
10412 	      if (arm_borrow_operation (op1, SImode))
10413 		{
10414 		  op1 = XEXP (op0, 1);
10415 		  op0 = XEXP (op0, 0);
10416 		}
10417 	      else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10418 		op0 = XEXP (op0, 0);
10419 	    }
10420 	  else if (GET_CODE (op1) == PLUS
10421 		   && arm_borrow_operation (XEXP (op1, 0), SImode))
10422 	    op1 = XEXP (op1, 0);
10423 	  else if (GET_CODE (op0) == NEG
10424 		   && arm_borrow_operation (op1, SImode))
10425 	    {
10426 	      /* Negate with carry-in.  For Thumb2 this is done with
10427 		 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10428 		 RSC instruction that exists in Arm mode.  */
10429 	      if (speed_p)
10430 		*cost += (TARGET_THUMB2
10431 			  ? extra_cost->alu.arith_shift
10432 			  : extra_cost->alu.arith);
10433 	      *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10434 	      return true;
10435 	    }
10436 	  /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10437 	     Note we do mean ~borrow here.  */
10438 	  else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10439 	    {
10440 	      *cost += rtx_cost (op1, mode, code, 1, speed_p);
10441 	      return true;
10442 	    }
10443 
10444 	  shift_op = shifter_op_p (op0, &shift_by_reg);
10445 	  if (shift_op == NULL)
10446 	    {
10447 	      shift_op = shifter_op_p (op1, &shift_by_reg);
10448 	      non_shift_op = op0;
10449 	    }
10450 	  else
10451 	    non_shift_op = op1;
10452 
10453 	  if (shift_op != NULL)
10454 	    {
10455 	      if (shift_by_reg != NULL)
10456 		{
10457 		  if (speed_p)
10458 		    *cost += extra_cost->alu.arith_shift_reg;
10459 		  *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10460 		}
10461 	      else if (speed_p)
10462 		*cost += extra_cost->alu.arith_shift;
10463 
10464 	      *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10465 	      *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10466 	      return true;
10467 	    }
10468 
10469 	  if (arm_arch_thumb2
10470 	      && GET_CODE (XEXP (x, 1)) == MULT)
10471 	    {
10472 	      /* MLS.  */
10473 	      if (speed_p)
10474 		*cost += extra_cost->mult[0].add;
10475 	      *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10476 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10477 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10478 	      return true;
10479 	    }
10480 
10481 	  if (CONST_INT_P (op0))
10482 	    {
10483 	      int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10484 					    INTVAL (op0), NULL_RTX,
10485 					    NULL_RTX, 1, 0);
10486 	      *cost = COSTS_N_INSNS (insns);
10487 	      if (speed_p)
10488 		*cost += insns * extra_cost->alu.arith;
10489 	      *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10490 	      return true;
10491 	    }
10492 	  else if (speed_p)
10493 	    *cost += extra_cost->alu.arith;
10494 
10495 	  /* Don't recurse as we don't want to cost any borrow that
10496 	     we've stripped.  */
10497 	  *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10498 	  *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10499 	  return true;
10500 	}
10501 
10502       if (GET_MODE_CLASS (mode) == MODE_INT
10503 	  && GET_MODE_SIZE (mode) < 4)
10504 	{
10505 	  rtx shift_op, shift_reg;
10506 	  shift_reg = NULL;
10507 
10508 	  /* We check both sides of the MINUS for shifter operands since,
10509 	     unlike PLUS, it's not commutative.  */
10510 
10511 	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10512 	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10513 
10514 	  /* Slightly disparage, as we might need to widen the result.  */
10515 	  *cost += 1;
10516 	  if (speed_p)
10517 	    *cost += extra_cost->alu.arith;
10518 
10519 	  if (CONST_INT_P (XEXP (x, 0)))
10520 	    {
10521 	      *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10522 	      return true;
10523 	    }
10524 
10525 	  return false;
10526 	}
10527 
10528       if (mode == DImode)
10529 	{
10530 	  *cost += COSTS_N_INSNS (1);
10531 
10532 	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10533 	    {
10534 	      rtx op1 = XEXP (x, 1);
10535 
10536 	      if (speed_p)
10537 		*cost += 2 * extra_cost->alu.arith;
10538 
10539 	      if (GET_CODE (op1) == ZERO_EXTEND)
10540 		*cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10541 				   0, speed_p);
10542 	      else
10543 		*cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10544 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10545 				 0, speed_p);
10546 	      return true;
10547 	    }
10548 	  else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10549 	    {
10550 	      if (speed_p)
10551 		*cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10552 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10553 				  0, speed_p)
10554 			+ rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10555 	      return true;
10556 	    }
10557 	  else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10558 		   || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10559 	    {
10560 	      if (speed_p)
10561 		*cost += (extra_cost->alu.arith
10562 			  + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10563 			     ? extra_cost->alu.arith
10564 			     : extra_cost->alu.arith_shift));
10565 	      *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10566 			+ rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10567 				    GET_CODE (XEXP (x, 1)), 0, speed_p));
10568 	      return true;
10569 	    }
10570 
10571 	  if (speed_p)
10572 	    *cost += 2 * extra_cost->alu.arith;
10573 	  return false;
10574 	}
10575 
10576       /* Vector mode?  */
10577 
10578       *cost = LIBCALL_COST (2);
10579       return false;
10580 
10581     case PLUS:
10582       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10583 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10584 	{
10585 	  if (GET_CODE (XEXP (x, 0)) == MULT)
10586 	    {
10587 	      rtx mul_op0, mul_op1, add_op;
10588 
10589 	      if (speed_p)
10590 		*cost += extra_cost->fp[mode != SFmode].mult_addsub;
10591 
10592 	      mul_op0 = XEXP (XEXP (x, 0), 0);
10593 	      mul_op1 = XEXP (XEXP (x, 0), 1);
10594 	      add_op = XEXP (x, 1);
10595 
10596 	      *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10597 			+ rtx_cost (mul_op1, mode, code, 0, speed_p)
10598 			+ rtx_cost (add_op, mode, code, 0, speed_p));
10599 
10600 	      return true;
10601 	    }
10602 
10603 	  if (speed_p)
10604 	    *cost += extra_cost->fp[mode != SFmode].addsub;
10605 	  return false;
10606 	}
10607       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10608 	{
10609 	  *cost = LIBCALL_COST (2);
10610 	  return false;
10611 	}
10612 
10613 	/* Narrow modes can be synthesized in SImode, but the range
10614 	   of useful sub-operations is limited.  Check for shift operations
10615 	   on one of the operands.  Only left shifts can be used in the
10616 	   narrow modes.  */
10617       if (GET_MODE_CLASS (mode) == MODE_INT
10618 	  && GET_MODE_SIZE (mode) < 4)
10619 	{
10620 	  rtx shift_op, shift_reg;
10621 	  shift_reg = NULL;
10622 
10623 	  HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10624 
10625 	  if (CONST_INT_P (XEXP (x, 1)))
10626 	    {
10627 	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10628 					    INTVAL (XEXP (x, 1)), NULL_RTX,
10629 					    NULL_RTX, 1, 0);
10630 	      *cost = COSTS_N_INSNS (insns);
10631 	      if (speed_p)
10632 		*cost += insns * extra_cost->alu.arith;
10633 	      /* Slightly penalize a narrow operation as the result may
10634 		 need widening.  */
10635 	      *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10636 	      return true;
10637 	    }
10638 
10639 	  /* Slightly penalize a narrow operation as the result may
10640 	     need widening.  */
10641 	  *cost += 1;
10642 	  if (speed_p)
10643 	    *cost += extra_cost->alu.arith;
10644 
10645 	  return false;
10646 	}
10647 
10648       if (mode == SImode)
10649 	{
10650 	  rtx shift_op, shift_reg;
10651 
10652 	  if (TARGET_INT_SIMD
10653 	      && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10654 		  || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10655 	    {
10656 	      /* UXTA[BH] or SXTA[BH].  */
10657 	      if (speed_p)
10658 		*cost += extra_cost->alu.extend_arith;
10659 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10660 				  0, speed_p)
10661 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10662 	      return true;
10663 	    }
10664 
10665 	  rtx op0 = XEXP (x, 0);
10666 	  rtx op1 = XEXP (x, 1);
10667 
10668 	  /* Handle a side effect of adding in the carry to an addition.  */
10669 	  if (GET_CODE (op0) == PLUS
10670 	      && arm_carry_operation (op1, mode))
10671 	    {
10672 	      op1 = XEXP (op0, 1);
10673 	      op0 = XEXP (op0, 0);
10674 	    }
10675 	  else if (GET_CODE (op1) == PLUS
10676 		   && arm_carry_operation (op0, mode))
10677 	    {
10678 	      op0 = XEXP (op1, 0);
10679 	      op1 = XEXP (op1, 1);
10680 	    }
10681 	  else if (GET_CODE (op0) == PLUS)
10682 	    {
10683 	      op0 = strip_carry_operation (op0);
10684 	      if (swap_commutative_operands_p (op0, op1))
10685 		std::swap (op0, op1);
10686 	    }
10687 
10688 	  if (arm_carry_operation (op0, mode))
10689 	    {
10690 	      /* Adding the carry to a register is a canonicalization of
10691 		 adding 0 to the register plus the carry.  */
10692 	      if (speed_p)
10693 		*cost += extra_cost->alu.arith;
10694 	      *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10695 	      return true;
10696 	    }
10697 
10698 	  shift_reg = NULL;
10699 	  shift_op = shifter_op_p (op0, &shift_reg);
10700 	  if (shift_op != NULL)
10701 	    {
10702 	      if (shift_reg)
10703 		{
10704 		  if (speed_p)
10705 		    *cost += extra_cost->alu.arith_shift_reg;
10706 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10707 		}
10708 	      else if (speed_p)
10709 		*cost += extra_cost->alu.arith_shift;
10710 
10711 	      *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10712 			+ rtx_cost (op1, mode, PLUS, 1, speed_p));
10713 	      return true;
10714 	    }
10715 
10716 	  if (GET_CODE (op0) == MULT)
10717 	    {
10718 	      rtx mul_op = op0;
10719 
10720 	      if (TARGET_DSP_MULTIPLY
10721 		  && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10722 		       && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10723 			   || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10724 			       && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10725 			       && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10726 		      || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10727 			  && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10728 			  && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10729 			  && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10730 			      || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10731 				  && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10732 				  && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10733 				      == 16))))))
10734 		{
10735 		  /* SMLA[BT][BT].  */
10736 		  if (speed_p)
10737 		    *cost += extra_cost->mult[0].extend_add;
10738 		  *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10739 				      SIGN_EXTEND, 0, speed_p)
10740 			    + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10741 					SIGN_EXTEND, 0, speed_p)
10742 			    + rtx_cost (op1, mode, PLUS, 1, speed_p));
10743 		  return true;
10744 		}
10745 
10746 	      if (speed_p)
10747 		*cost += extra_cost->mult[0].add;
10748 	      *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10749 			+ rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10750 			+ rtx_cost (op1, mode, PLUS, 1, speed_p));
10751 	      return true;
10752 	    }
10753 
10754 	  if (CONST_INT_P (op1))
10755 	    {
10756 	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10757 					    INTVAL (op1), NULL_RTX,
10758 					    NULL_RTX, 1, 0);
10759 	      *cost = COSTS_N_INSNS (insns);
10760 	      if (speed_p)
10761 		*cost += insns * extra_cost->alu.arith;
10762 	      *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10763 	      return true;
10764 	    }
10765 
10766 	  if (speed_p)
10767 	    *cost += extra_cost->alu.arith;
10768 
10769 	  /* Don't recurse here because we want to test the operands
10770 	     without any carry operation.  */
10771 	  *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10772 	  *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10773 	  return true;
10774 	}
10775 
10776       if (mode == DImode)
10777 	{
10778 	  if (GET_CODE (XEXP (x, 0)) == MULT
10779 	      && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10780 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10781 		  || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10782 		      && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10783 	    {
10784 	      if (speed_p)
10785 		*cost += extra_cost->mult[1].extend_add;
10786 	      *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10787 				  ZERO_EXTEND, 0, speed_p)
10788 			+ rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10789 				    ZERO_EXTEND, 0, speed_p)
10790 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10791 	      return true;
10792 	    }
10793 
10794 	  *cost += COSTS_N_INSNS (1);
10795 
10796 	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10797 	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10798 	    {
10799 	      if (speed_p)
10800 		*cost += (extra_cost->alu.arith
10801 			  + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10802 			     ? extra_cost->alu.arith
10803 			     : extra_cost->alu.arith_shift));
10804 
10805 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10806 				  0, speed_p)
10807 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10808 	      return true;
10809 	    }
10810 
10811 	  if (speed_p)
10812 	    *cost += 2 * extra_cost->alu.arith;
10813 	  return false;
10814 	}
10815 
10816       /* Vector mode?  */
10817       *cost = LIBCALL_COST (2);
10818       return false;
10819     case IOR:
10820       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10821         {
10822           if (speed_p)
10823             *cost += extra_cost->alu.rev;
10824 
10825           return true;
10826         }
10827     /* Fall through.  */
10828     case AND: case XOR:
10829       if (mode == SImode)
10830 	{
10831 	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10832 	  rtx op0 = XEXP (x, 0);
10833 	  rtx shift_op, shift_reg;
10834 
10835 	  if (subcode == NOT
10836 	      && (code == AND
10837 		  || (code == IOR && TARGET_THUMB2)))
10838 	    op0 = XEXP (op0, 0);
10839 
10840 	  shift_reg = NULL;
10841 	  shift_op = shifter_op_p (op0, &shift_reg);
10842 	  if (shift_op != NULL)
10843 	    {
10844 	      if (shift_reg)
10845 		{
10846 		  if (speed_p)
10847 		    *cost += extra_cost->alu.log_shift_reg;
10848 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10849 		}
10850 	      else if (speed_p)
10851 		*cost += extra_cost->alu.log_shift;
10852 
10853 	      *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10854 			+ rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10855 	      return true;
10856 	    }
10857 
10858 	  if (CONST_INT_P (XEXP (x, 1)))
10859 	    {
10860 	      int insns = arm_gen_constant (code, SImode, NULL_RTX,
10861 					    INTVAL (XEXP (x, 1)), NULL_RTX,
10862 					    NULL_RTX, 1, 0);
10863 
10864 	      *cost = COSTS_N_INSNS (insns);
10865 	      if (speed_p)
10866 		*cost += insns * extra_cost->alu.logical;
10867 	      *cost += rtx_cost (op0, mode, code, 0, speed_p);
10868 	      return true;
10869 	    }
10870 
10871 	  if (speed_p)
10872 	    *cost += extra_cost->alu.logical;
10873 	  *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10874 		    + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10875 	  return true;
10876 	}
10877 
10878       if (mode == DImode)
10879 	{
10880 	  rtx op0 = XEXP (x, 0);
10881 	  enum rtx_code subcode = GET_CODE (op0);
10882 
10883 	  *cost += COSTS_N_INSNS (1);
10884 
10885 	  if (subcode == NOT
10886 	      && (code == AND
10887 		  || (code == IOR && TARGET_THUMB2)))
10888 	    op0 = XEXP (op0, 0);
10889 
10890 	  if (GET_CODE (op0) == ZERO_EXTEND)
10891 	    {
10892 	      if (speed_p)
10893 		*cost += 2 * extra_cost->alu.logical;
10894 
10895 	      *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10896 				  0, speed_p)
10897 			+ rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10898 	      return true;
10899 	    }
10900 	  else if (GET_CODE (op0) == SIGN_EXTEND)
10901 	    {
10902 	      if (speed_p)
10903 		*cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10904 
10905 	      *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10906 				  0, speed_p)
10907 			+ rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10908 	      return true;
10909 	    }
10910 
10911 	  if (speed_p)
10912 	    *cost += 2 * extra_cost->alu.logical;
10913 
10914 	  return true;
10915 	}
10916       /* Vector mode?  */
10917 
10918       *cost = LIBCALL_COST (2);
10919       return false;
10920 
10921     case MULT:
10922       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10923 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10924 	{
10925 	  rtx op0 = XEXP (x, 0);
10926 
10927 	  if (GET_CODE (op0) == NEG && !flag_rounding_math)
10928 	    op0 = XEXP (op0, 0);
10929 
10930 	  if (speed_p)
10931 	    *cost += extra_cost->fp[mode != SFmode].mult;
10932 
10933 	  *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10934 		    + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10935 	  return true;
10936 	}
10937       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10938 	{
10939 	  *cost = LIBCALL_COST (2);
10940 	  return false;
10941 	}
10942 
10943       if (mode == SImode)
10944 	{
10945 	  if (TARGET_DSP_MULTIPLY
10946 	      && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10947 		   && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10948 		       || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10949 			   && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10950 			   && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10951 		  || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10952 		      && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10953 		      && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10954 		      && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10955 			  || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10956 			      && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10957 			      && (INTVAL (XEXP (XEXP (x, 1), 1))
10958 				  == 16))))))
10959 	    {
10960 	      /* SMUL[TB][TB].  */
10961 	      if (speed_p)
10962 		*cost += extra_cost->mult[0].extend;
10963 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10964 				 SIGN_EXTEND, 0, speed_p);
10965 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10966 				 SIGN_EXTEND, 1, speed_p);
10967 	      return true;
10968 	    }
10969 	  if (speed_p)
10970 	    *cost += extra_cost->mult[0].simple;
10971 	  return false;
10972 	}
10973 
10974       if (mode == DImode)
10975 	{
10976 	  if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10977 		&& GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10978 	       || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10979 		   && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10980 	    {
10981 	      if (speed_p)
10982 		*cost += extra_cost->mult[1].extend;
10983 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10984 				  ZERO_EXTEND, 0, speed_p)
10985 			+ rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10986 				    ZERO_EXTEND, 0, speed_p));
10987 	      return true;
10988 	    }
10989 
10990 	  *cost = LIBCALL_COST (2);
10991 	  return false;
10992 	}
10993 
10994       /* Vector mode?  */
10995       *cost = LIBCALL_COST (2);
10996       return false;
10997 
10998     case NEG:
10999       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11000 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
11001 	{
11002 	  if (GET_CODE (XEXP (x, 0)) == MULT)
11003 	    {
11004 	      /* VNMUL.  */
11005 	      *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11006 	      return true;
11007 	    }
11008 
11009 	  if (speed_p)
11010 	    *cost += extra_cost->fp[mode != SFmode].neg;
11011 
11012 	  return false;
11013 	}
11014       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11015 	{
11016 	  *cost = LIBCALL_COST (1);
11017 	  return false;
11018 	}
11019 
11020       if (mode == SImode)
11021 	{
11022 	  if (GET_CODE (XEXP (x, 0)) == ABS)
11023 	    {
11024 	      *cost += COSTS_N_INSNS (1);
11025 	      /* Assume the non-flag-changing variant.  */
11026 	      if (speed_p)
11027 		*cost += (extra_cost->alu.log_shift
11028 			  + extra_cost->alu.arith_shift);
11029 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11030 	      return true;
11031 	    }
11032 
11033 	  if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11034 	      || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11035 	    {
11036 	      *cost += COSTS_N_INSNS (1);
11037 	      /* No extra cost for MOV imm and MVN imm.  */
11038 	      /* If the comparison op is using the flags, there's no further
11039 		 cost, otherwise we need to add the cost of the comparison.  */
11040 	      if (!(REG_P (XEXP (XEXP (x, 0), 0))
11041 		    && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11042 		    && XEXP (XEXP (x, 0), 1) == const0_rtx))
11043 		{
11044 		  mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11045 		  *cost += (COSTS_N_INSNS (1)
11046 			    + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11047 					0, speed_p)
11048 			    + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11049 					1, speed_p));
11050 		  if (speed_p)
11051 		    *cost += extra_cost->alu.arith;
11052 		}
11053 	      return true;
11054 	    }
11055 
11056 	  if (speed_p)
11057 	    *cost += extra_cost->alu.arith;
11058 	  return false;
11059 	}
11060 
11061       if (GET_MODE_CLASS (mode) == MODE_INT
11062 	  && GET_MODE_SIZE (mode) < 4)
11063 	{
11064 	  /* Slightly disparage, as we might need an extend operation.  */
11065 	  *cost += 1;
11066 	  if (speed_p)
11067 	    *cost += extra_cost->alu.arith;
11068 	  return false;
11069 	}
11070 
11071       if (mode == DImode)
11072 	{
11073 	  *cost += COSTS_N_INSNS (1);
11074 	  if (speed_p)
11075 	    *cost += 2 * extra_cost->alu.arith;
11076 	  return false;
11077 	}
11078 
11079       /* Vector mode?  */
11080       *cost = LIBCALL_COST (1);
11081       return false;
11082 
11083     case NOT:
11084       if (mode == SImode)
11085 	{
11086 	  rtx shift_op;
11087 	  rtx shift_reg = NULL;
11088 
11089 	  shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11090 
11091 	  if (shift_op)
11092 	    {
11093 	      if (shift_reg != NULL)
11094 		{
11095 		  if (speed_p)
11096 		    *cost += extra_cost->alu.log_shift_reg;
11097 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11098 		}
11099 	      else if (speed_p)
11100 		*cost += extra_cost->alu.log_shift;
11101 	      *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11102 	      return true;
11103 	    }
11104 
11105 	  if (speed_p)
11106 	    *cost += extra_cost->alu.logical;
11107 	  return false;
11108 	}
11109       if (mode == DImode)
11110 	{
11111 	  *cost += COSTS_N_INSNS (1);
11112 	  return false;
11113 	}
11114 
11115       /* Vector mode?  */
11116 
11117       *cost += LIBCALL_COST (1);
11118       return false;
11119 
11120     case IF_THEN_ELSE:
11121       {
11122         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11123 	  {
11124 	    *cost += COSTS_N_INSNS (3);
11125 	    return true;
11126 	  }
11127 	int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11128 	int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11129 
11130 	*cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11131 	/* Assume that if one arm of the if_then_else is a register,
11132 	   that it will be tied with the result and eliminate the
11133 	   conditional insn.  */
11134 	if (REG_P (XEXP (x, 1)))
11135 	  *cost += op2cost;
11136 	else if (REG_P (XEXP (x, 2)))
11137 	  *cost += op1cost;
11138 	else
11139 	  {
11140 	    if (speed_p)
11141 	      {
11142 		if (extra_cost->alu.non_exec_costs_exec)
11143 		  *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11144 		else
11145 		  *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11146 	      }
11147 	    else
11148 	      *cost += op1cost + op2cost;
11149 	  }
11150       }
11151       return true;
11152 
11153     case COMPARE:
11154       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11155 	*cost = 0;
11156       else
11157 	{
11158 	  machine_mode op0mode;
11159 	  /* We'll mostly assume that the cost of a compare is the cost of the
11160 	     LHS.  However, there are some notable exceptions.  */
11161 
11162 	  /* Floating point compares are never done as side-effects.  */
11163 	  op0mode = GET_MODE (XEXP (x, 0));
11164 	  if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11165 	      && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11166 	    {
11167 	      if (speed_p)
11168 		*cost += extra_cost->fp[op0mode != SFmode].compare;
11169 
11170 	      if (XEXP (x, 1) == CONST0_RTX (op0mode))
11171 		{
11172 		  *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11173 		  return true;
11174 		}
11175 
11176 	      return false;
11177 	    }
11178 	  else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11179 	    {
11180 	      *cost = LIBCALL_COST (2);
11181 	      return false;
11182 	    }
11183 
11184 	  /* DImode compares normally take two insns.  */
11185 	  if (op0mode == DImode)
11186 	    {
11187 	      *cost += COSTS_N_INSNS (1);
11188 	      if (speed_p)
11189 		*cost += 2 * extra_cost->alu.arith;
11190 	      return false;
11191 	    }
11192 
11193 	  if (op0mode == SImode)
11194 	    {
11195 	      rtx shift_op;
11196 	      rtx shift_reg;
11197 
11198 	      if (XEXP (x, 1) == const0_rtx
11199 		  && !(REG_P (XEXP (x, 0))
11200 		       || (GET_CODE (XEXP (x, 0)) == SUBREG
11201 			   && REG_P (SUBREG_REG (XEXP (x, 0))))))
11202 		{
11203 		  *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11204 
11205 		  /* Multiply operations that set the flags are often
11206 		     significantly more expensive.  */
11207 		  if (speed_p
11208 		      && GET_CODE (XEXP (x, 0)) == MULT
11209 		      && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11210 		    *cost += extra_cost->mult[0].flag_setting;
11211 
11212 		  if (speed_p
11213 		      && GET_CODE (XEXP (x, 0)) == PLUS
11214 		      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11215 		      && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11216 							    0), 1), mode))
11217 		    *cost += extra_cost->mult[0].flag_setting;
11218 		  return true;
11219 		}
11220 
11221 	      shift_reg = NULL;
11222 	      shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11223 	      if (shift_op != NULL)
11224 		{
11225 		  if (shift_reg != NULL)
11226 		    {
11227 		      *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11228 					 1, speed_p);
11229 		      if (speed_p)
11230 			*cost += extra_cost->alu.arith_shift_reg;
11231 		    }
11232 		  else if (speed_p)
11233 		    *cost += extra_cost->alu.arith_shift;
11234 		  *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11235 		  *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11236 		  return true;
11237 		}
11238 
11239 	      if (speed_p)
11240 		*cost += extra_cost->alu.arith;
11241 	      if (CONST_INT_P (XEXP (x, 1))
11242 		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11243 		{
11244 		  *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11245 		  return true;
11246 		}
11247 	      return false;
11248 	    }
11249 
11250 	  /* Vector mode?  */
11251 
11252 	  *cost = LIBCALL_COST (2);
11253 	  return false;
11254 	}
11255       return true;
11256 
11257     case EQ:
11258     case GE:
11259     case GT:
11260     case LE:
11261     case LT:
11262       /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11263 	 vcle and vclt). */
11264       if (TARGET_NEON
11265 	  && TARGET_HARD_FLOAT
11266 	  && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11267 	  && (XEXP (x, 1) == CONST0_RTX (mode)))
11268 	{
11269 	  *cost = 0;
11270 	  return true;
11271 	}
11272 
11273       /* Fall through.  */
11274     case NE:
11275     case LTU:
11276     case LEU:
11277     case GEU:
11278     case GTU:
11279     case ORDERED:
11280     case UNORDERED:
11281     case UNEQ:
11282     case UNLE:
11283     case UNLT:
11284     case UNGE:
11285     case UNGT:
11286     case LTGT:
11287       if (outer_code == SET)
11288 	{
11289 	  /* Is it a store-flag operation?  */
11290 	  if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11291 	      && XEXP (x, 1) == const0_rtx)
11292 	    {
11293 	      /* Thumb also needs an IT insn.  */
11294 	      *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11295 	      return true;
11296 	    }
11297 	  if (XEXP (x, 1) == const0_rtx)
11298 	    {
11299 	      switch (code)
11300 		{
11301 		case LT:
11302 		  /* LSR Rd, Rn, #31.  */
11303 		  if (speed_p)
11304 		    *cost += extra_cost->alu.shift;
11305 		  break;
11306 
11307 		case EQ:
11308 		  /* RSBS T1, Rn, #0
11309 		     ADC  Rd, Rn, T1.  */
11310 
11311 		case NE:
11312 		  /* SUBS T1, Rn, #1
11313 		     SBC  Rd, Rn, T1.  */
11314 		  *cost += COSTS_N_INSNS (1);
11315 		  break;
11316 
11317 		case LE:
11318 		  /* RSBS T1, Rn, Rn, LSR #31
11319 		     ADC  Rd, Rn, T1. */
11320 		  *cost += COSTS_N_INSNS (1);
11321 		  if (speed_p)
11322 		    *cost += extra_cost->alu.arith_shift;
11323 		  break;
11324 
11325 		case GT:
11326 		  /* RSB  Rd, Rn, Rn, ASR #1
11327 		     LSR  Rd, Rd, #31.  */
11328 		  *cost += COSTS_N_INSNS (1);
11329 		  if (speed_p)
11330 		    *cost += (extra_cost->alu.arith_shift
11331 			      + extra_cost->alu.shift);
11332 		  break;
11333 
11334 		case GE:
11335 		  /* ASR  Rd, Rn, #31
11336 		     ADD  Rd, Rn, #1.  */
11337 		  *cost += COSTS_N_INSNS (1);
11338 		  if (speed_p)
11339 		    *cost += extra_cost->alu.shift;
11340 		  break;
11341 
11342 		default:
11343 		  /* Remaining cases are either meaningless or would take
11344 		     three insns anyway.  */
11345 		  *cost = COSTS_N_INSNS (3);
11346 		  break;
11347 		}
11348 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11349 	      return true;
11350 	    }
11351 	  else
11352 	    {
11353 	      *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11354 	      if (CONST_INT_P (XEXP (x, 1))
11355 		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11356 		{
11357 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11358 		  return true;
11359 		}
11360 
11361 	      return false;
11362 	    }
11363 	}
11364       /* Not directly inside a set.  If it involves the condition code
11365 	 register it must be the condition for a branch, cond_exec or
11366 	 I_T_E operation.  Since the comparison is performed elsewhere
11367 	 this is just the control part which has no additional
11368 	 cost.  */
11369       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11370 	       && XEXP (x, 1) == const0_rtx)
11371 	{
11372 	  *cost = 0;
11373 	  return true;
11374 	}
11375       return false;
11376 
11377     case ABS:
11378       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11379 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
11380 	{
11381 	  if (speed_p)
11382 	    *cost += extra_cost->fp[mode != SFmode].neg;
11383 
11384 	  return false;
11385 	}
11386       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11387 	{
11388 	  *cost = LIBCALL_COST (1);
11389 	  return false;
11390 	}
11391 
11392       if (mode == SImode)
11393 	{
11394 	  if (speed_p)
11395 	    *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11396 	  return false;
11397 	}
11398       /* Vector mode?  */
11399       *cost = LIBCALL_COST (1);
11400       return false;
11401 
11402     case SIGN_EXTEND:
11403       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11404 	  && MEM_P (XEXP (x, 0)))
11405 	{
11406 	  if (mode == DImode)
11407 	    *cost += COSTS_N_INSNS (1);
11408 
11409 	  if (!speed_p)
11410 	    return true;
11411 
11412 	  if (GET_MODE (XEXP (x, 0)) == SImode)
11413 	    *cost += extra_cost->ldst.load;
11414 	  else
11415 	    *cost += extra_cost->ldst.load_sign_extend;
11416 
11417 	  if (mode == DImode)
11418 	    *cost += extra_cost->alu.shift;
11419 
11420 	  return true;
11421 	}
11422 
11423       /* Widening from less than 32-bits requires an extend operation.  */
11424       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11425 	{
11426 	  /* We have SXTB/SXTH.  */
11427 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11428 	  if (speed_p)
11429 	    *cost += extra_cost->alu.extend;
11430 	}
11431       else if (GET_MODE (XEXP (x, 0)) != SImode)
11432 	{
11433 	  /* Needs two shifts.  */
11434 	  *cost += COSTS_N_INSNS (1);
11435 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11436 	  if (speed_p)
11437 	    *cost += 2 * extra_cost->alu.shift;
11438 	}
11439 
11440       /* Widening beyond 32-bits requires one more insn.  */
11441       if (mode == DImode)
11442 	{
11443 	  *cost += COSTS_N_INSNS (1);
11444 	  if (speed_p)
11445 	    *cost += extra_cost->alu.shift;
11446 	}
11447 
11448       return true;
11449 
11450     case ZERO_EXTEND:
11451       if ((arm_arch4
11452 	   || GET_MODE (XEXP (x, 0)) == SImode
11453 	   || GET_MODE (XEXP (x, 0)) == QImode)
11454 	  && MEM_P (XEXP (x, 0)))
11455 	{
11456 	  *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11457 
11458 	  if (mode == DImode)
11459 	    *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
11460 
11461 	  return true;
11462 	}
11463 
11464       /* Widening from less than 32-bits requires an extend operation.  */
11465       if (GET_MODE (XEXP (x, 0)) == QImode)
11466 	{
11467 	  /* UXTB can be a shorter instruction in Thumb2, but it might
11468 	     be slower than the AND Rd, Rn, #255 alternative.  When
11469 	     optimizing for speed it should never be slower to use
11470 	     AND, and we don't really model 16-bit vs 32-bit insns
11471 	     here.  */
11472 	  if (speed_p)
11473 	    *cost += extra_cost->alu.logical;
11474 	}
11475       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11476 	{
11477 	  /* We have UXTB/UXTH.  */
11478 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11479 	  if (speed_p)
11480 	    *cost += extra_cost->alu.extend;
11481 	}
11482       else if (GET_MODE (XEXP (x, 0)) != SImode)
11483 	{
11484 	  /* Needs two shifts.  It's marginally preferable to use
11485 	     shifts rather than two BIC instructions as the second
11486 	     shift may merge with a subsequent insn as a shifter
11487 	     op.  */
11488 	  *cost = COSTS_N_INSNS (2);
11489 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11490 	  if (speed_p)
11491 	    *cost += 2 * extra_cost->alu.shift;
11492 	}
11493 
11494       /* Widening beyond 32-bits requires one more insn.  */
11495       if (mode == DImode)
11496 	{
11497 	  *cost += COSTS_N_INSNS (1);	/* No speed penalty.  */
11498 	}
11499 
11500       return true;
11501 
11502     case CONST_INT:
11503       *cost = 0;
11504       /* CONST_INT has no mode, so we cannot tell for sure how many
11505 	 insns are really going to be needed.  The best we can do is
11506 	 look at the value passed.  If it fits in SImode, then assume
11507 	 that's the mode it will be used for.  Otherwise assume it
11508 	 will be used in DImode.  */
11509       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11510 	mode = SImode;
11511       else
11512 	mode = DImode;
11513 
11514       /* Avoid blowing up in arm_gen_constant ().  */
11515       if (!(outer_code == PLUS
11516 	    || outer_code == AND
11517 	    || outer_code == IOR
11518 	    || outer_code == XOR
11519 	    || outer_code == MINUS))
11520 	outer_code = SET;
11521 
11522     const_int_cost:
11523       if (mode == SImode)
11524 	{
11525 	  *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11526 						    INTVAL (x), NULL, NULL,
11527 						    0, 0));
11528 	  /* Extra costs?  */
11529 	}
11530       else
11531 	{
11532 	  *cost += COSTS_N_INSNS (arm_gen_constant
11533 				  (outer_code, SImode, NULL,
11534 				   trunc_int_for_mode (INTVAL (x), SImode),
11535 				   NULL, NULL, 0, 0)
11536 				  + arm_gen_constant (outer_code, SImode, NULL,
11537 						      INTVAL (x) >> 32, NULL,
11538 						      NULL, 0, 0));
11539 	  /* Extra costs?  */
11540 	}
11541 
11542       return true;
11543 
11544     case CONST:
11545     case LABEL_REF:
11546     case SYMBOL_REF:
11547       if (speed_p)
11548 	{
11549 	  if (arm_arch_thumb2 && !flag_pic)
11550 	    *cost += COSTS_N_INSNS (1);
11551 	  else
11552 	    *cost += extra_cost->ldst.load;
11553 	}
11554       else
11555 	*cost += COSTS_N_INSNS (1);
11556 
11557       if (flag_pic)
11558 	{
11559 	  *cost += COSTS_N_INSNS (1);
11560 	  if (speed_p)
11561 	    *cost += extra_cost->alu.arith;
11562 	}
11563 
11564       return true;
11565 
11566     case CONST_FIXED:
11567       *cost = COSTS_N_INSNS (4);
11568       /* Fixme.  */
11569       return true;
11570 
11571     case CONST_DOUBLE:
11572       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11573 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
11574 	{
11575 	  if (vfp3_const_double_rtx (x))
11576 	    {
11577 	      if (speed_p)
11578 		*cost += extra_cost->fp[mode == DFmode].fpconst;
11579 	      return true;
11580 	    }
11581 
11582 	  if (speed_p)
11583 	    {
11584 	      if (mode == DFmode)
11585 		*cost += extra_cost->ldst.loadd;
11586 	      else
11587 		*cost += extra_cost->ldst.loadf;
11588 	    }
11589 	  else
11590 	    *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11591 
11592 	  return true;
11593 	}
11594       *cost = COSTS_N_INSNS (4);
11595       return true;
11596 
11597     case CONST_VECTOR:
11598       /* Fixme.  */
11599       if (((TARGET_NEON && TARGET_HARD_FLOAT
11600 	    && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11601 	   || TARGET_HAVE_MVE)
11602 	  && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11603 	*cost = COSTS_N_INSNS (1);
11604       else
11605 	*cost = COSTS_N_INSNS (4);
11606       return true;
11607 
11608     case HIGH:
11609     case LO_SUM:
11610       /* When optimizing for size, we prefer constant pool entries to
11611 	 MOVW/MOVT pairs, so bump the cost of these slightly.  */
11612       if (!speed_p)
11613 	*cost += 1;
11614       return true;
11615 
11616     case CLZ:
11617       if (speed_p)
11618 	*cost += extra_cost->alu.clz;
11619       return false;
11620 
11621     case SMIN:
11622       if (XEXP (x, 1) == const0_rtx)
11623 	{
11624 	  if (speed_p)
11625 	    *cost += extra_cost->alu.log_shift;
11626 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11627 	  return true;
11628 	}
11629       /* Fall through.  */
11630     case SMAX:
11631     case UMIN:
11632     case UMAX:
11633       *cost += COSTS_N_INSNS (1);
11634       return false;
11635 
11636     case TRUNCATE:
11637       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11638 	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11639 	  && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11640 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11641 	  && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11642 	       && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11643 	      || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11644 		  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11645 		      == ZERO_EXTEND))))
11646 	{
11647 	  if (speed_p)
11648 	    *cost += extra_cost->mult[1].extend;
11649 	  *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11650 			      ZERO_EXTEND, 0, speed_p)
11651 		    + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11652 				ZERO_EXTEND, 0, speed_p));
11653 	  return true;
11654 	}
11655       *cost = LIBCALL_COST (1);
11656       return false;
11657 
11658     case UNSPEC_VOLATILE:
11659     case UNSPEC:
11660       return arm_unspec_cost (x, outer_code, speed_p, cost);
11661 
11662     case PC:
11663       /* Reading the PC is like reading any other register.  Writing it
11664 	 is more expensive, but we take that into account elsewhere.  */
11665       *cost = 0;
11666       return true;
11667 
11668     case ZERO_EXTRACT:
11669       /* TODO: Simple zero_extract of bottom bits using AND.  */
11670       /* Fall through.  */
11671     case SIGN_EXTRACT:
11672       if (arm_arch6
11673 	  && mode == SImode
11674 	  && CONST_INT_P (XEXP (x, 1))
11675 	  && CONST_INT_P (XEXP (x, 2)))
11676 	{
11677 	  if (speed_p)
11678 	    *cost += extra_cost->alu.bfx;
11679 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11680 	  return true;
11681 	}
11682       /* Without UBFX/SBFX, need to resort to shift operations.  */
11683       *cost += COSTS_N_INSNS (1);
11684       if (speed_p)
11685 	*cost += 2 * extra_cost->alu.shift;
11686       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11687       return true;
11688 
11689     case FLOAT_EXTEND:
11690       if (TARGET_HARD_FLOAT)
11691 	{
11692 	  if (speed_p)
11693 	    *cost += extra_cost->fp[mode == DFmode].widen;
11694 	  if (!TARGET_VFP5
11695 	      && GET_MODE (XEXP (x, 0)) == HFmode)
11696 	    {
11697 	      /* Pre v8, widening HF->DF is a two-step process, first
11698 	         widening to SFmode.  */
11699 	      *cost += COSTS_N_INSNS (1);
11700 	      if (speed_p)
11701 		*cost += extra_cost->fp[0].widen;
11702 	    }
11703 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11704 	  return true;
11705 	}
11706 
11707       *cost = LIBCALL_COST (1);
11708       return false;
11709 
11710     case FLOAT_TRUNCATE:
11711       if (TARGET_HARD_FLOAT)
11712 	{
11713 	  if (speed_p)
11714 	    *cost += extra_cost->fp[mode == DFmode].narrow;
11715 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11716 	  return true;
11717 	  /* Vector modes?  */
11718 	}
11719       *cost = LIBCALL_COST (1);
11720       return false;
11721 
11722     case FMA:
11723       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11724         {
11725           rtx op0 = XEXP (x, 0);
11726           rtx op1 = XEXP (x, 1);
11727           rtx op2 = XEXP (x, 2);
11728 
11729 
11730           /* vfms or vfnma.  */
11731           if (GET_CODE (op0) == NEG)
11732             op0 = XEXP (op0, 0);
11733 
11734           /* vfnms or vfnma.  */
11735           if (GET_CODE (op2) == NEG)
11736             op2 = XEXP (op2, 0);
11737 
11738           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11739           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11740           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11741 
11742           if (speed_p)
11743             *cost += extra_cost->fp[mode ==DFmode].fma;
11744 
11745           return true;
11746         }
11747 
11748       *cost = LIBCALL_COST (3);
11749       return false;
11750 
11751     case FIX:
11752     case UNSIGNED_FIX:
11753       if (TARGET_HARD_FLOAT)
11754 	{
11755 	  /* The *combine_vcvtf2i reduces a vmul+vcvt into
11756 	     a vcvt fixed-point conversion.  */
11757 	  if (code == FIX && mode == SImode
11758 	      && GET_CODE (XEXP (x, 0)) == FIX
11759 	      && GET_MODE (XEXP (x, 0)) == SFmode
11760 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11761 	      && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11762 		 > 0)
11763 	    {
11764 	      if (speed_p)
11765 		*cost += extra_cost->fp[0].toint;
11766 
11767 	      *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11768 				 code, 0, speed_p);
11769 	      return true;
11770 	    }
11771 
11772 	  if (GET_MODE_CLASS (mode) == MODE_INT)
11773 	    {
11774 	      mode = GET_MODE (XEXP (x, 0));
11775 	      if (speed_p)
11776 		*cost += extra_cost->fp[mode == DFmode].toint;
11777 	      /* Strip of the 'cost' of rounding towards zero.  */
11778 	      if (GET_CODE (XEXP (x, 0)) == FIX)
11779 		*cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11780 				   0, speed_p);
11781 	      else
11782 		*cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11783 	      /* ??? Increase the cost to deal with transferring from
11784 		 FP -> CORE registers?  */
11785 	      return true;
11786 	    }
11787 	  else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11788 		   && TARGET_VFP5)
11789 	    {
11790 	      if (speed_p)
11791 		*cost += extra_cost->fp[mode == DFmode].roundint;
11792 	      return false;
11793 	    }
11794 	  /* Vector costs? */
11795 	}
11796       *cost = LIBCALL_COST (1);
11797       return false;
11798 
11799     case FLOAT:
11800     case UNSIGNED_FLOAT:
11801       if (TARGET_HARD_FLOAT)
11802 	{
11803 	  /* ??? Increase the cost to deal with transferring from CORE
11804 	     -> FP registers?  */
11805 	  if (speed_p)
11806 	    *cost += extra_cost->fp[mode == DFmode].fromint;
11807 	  return false;
11808 	}
11809       *cost = LIBCALL_COST (1);
11810       return false;
11811 
11812     case CALL:
11813       return true;
11814 
11815     case ASM_OPERANDS:
11816       {
11817       /* Just a guess.  Guess number of instructions in the asm
11818          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11819          though (see PR60663).  */
11820         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11821         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11822 
11823         *cost = COSTS_N_INSNS (asm_length + num_operands);
11824         return true;
11825       }
11826     default:
11827       if (mode != VOIDmode)
11828 	*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11829       else
11830 	*cost = COSTS_N_INSNS (4); /* Who knows?  */
11831       return false;
11832     }
11833 }
11834 
11835 #undef HANDLE_NARROW_SHIFT_ARITH
11836 
11837 /* RTX costs entry point.  */
11838 
11839 static bool
arm_rtx_costs(rtx x,machine_mode mode ATTRIBUTE_UNUSED,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)11840 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11841 	       int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11842 {
11843   bool result;
11844   int code = GET_CODE (x);
11845   gcc_assert (current_tune->insn_extra_cost);
11846 
11847   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
11848 				(enum rtx_code) outer_code,
11849 				current_tune->insn_extra_cost,
11850 				total, speed);
11851 
11852   if (dump_file && arm_verbose_cost)
11853     {
11854       print_rtl_single (dump_file, x);
11855       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11856 	       *total, result ? "final" : "partial");
11857     }
11858   return result;
11859 }
11860 
11861 static int
arm_insn_cost(rtx_insn * insn,bool speed)11862 arm_insn_cost (rtx_insn *insn, bool speed)
11863 {
11864   int cost;
11865 
11866   /* Don't cost a simple reg-reg move at a full insn cost: such moves
11867      will likely disappear during register allocation.  */
11868   if (!reload_completed
11869       && GET_CODE (PATTERN (insn)) == SET
11870       && REG_P (SET_DEST (PATTERN (insn)))
11871       && REG_P (SET_SRC (PATTERN (insn))))
11872     return 2;
11873   cost = pattern_cost (PATTERN (insn), speed);
11874   /* If the cost is zero, then it's likely a complex insn.  We don't want the
11875      cost of these to be less than something we know about.  */
11876   return cost ? cost : COSTS_N_INSNS (2);
11877 }
11878 
11879 /* All address computations that can be done are free, but rtx cost returns
11880    the same for practically all of them.  So we weight the different types
11881    of address here in the order (most pref first):
11882    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11883 static inline int
arm_arm_address_cost(rtx x)11884 arm_arm_address_cost (rtx x)
11885 {
11886   enum rtx_code c  = GET_CODE (x);
11887 
11888   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11889     return 0;
11890   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11891     return 10;
11892 
11893   if (c == PLUS)
11894     {
11895       if (CONST_INT_P (XEXP (x, 1)))
11896 	return 2;
11897 
11898       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11899 	return 3;
11900 
11901       return 4;
11902     }
11903 
11904   return 6;
11905 }
11906 
11907 static inline int
arm_thumb_address_cost(rtx x)11908 arm_thumb_address_cost (rtx x)
11909 {
11910   enum rtx_code c  = GET_CODE (x);
11911 
11912   if (c == REG)
11913     return 1;
11914   if (c == PLUS
11915       && REG_P (XEXP (x, 0))
11916       && CONST_INT_P (XEXP (x, 1)))
11917     return 1;
11918 
11919   return 2;
11920 }
11921 
11922 static int
arm_address_cost(rtx x,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)11923 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11924 		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11925 {
11926   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11927 }
11928 
11929 /* Adjust cost hook for XScale.  */
11930 static bool
xscale_sched_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int * cost)11931 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11932 			  int * cost)
11933 {
11934   /* Some true dependencies can have a higher cost depending
11935      on precisely how certain input operands are used.  */
11936   if (dep_type == 0
11937       && recog_memoized (insn) >= 0
11938       && recog_memoized (dep) >= 0)
11939     {
11940       int shift_opnum = get_attr_shift (insn);
11941       enum attr_type attr_type = get_attr_type (dep);
11942 
11943       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11944 	 operand for INSN.  If we have a shifted input operand and the
11945 	 instruction we depend on is another ALU instruction, then we may
11946 	 have to account for an additional stall.  */
11947       if (shift_opnum != 0
11948 	  && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
11949 	      || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
11950 	      || attr_type == TYPE_ALUS_SHIFT_IMM
11951 	      || attr_type == TYPE_LOGIC_SHIFT_IMM
11952 	      || attr_type == TYPE_LOGICS_SHIFT_IMM
11953 	      || attr_type == TYPE_ALU_SHIFT_REG
11954 	      || attr_type == TYPE_ALUS_SHIFT_REG
11955 	      || attr_type == TYPE_LOGIC_SHIFT_REG
11956 	      || attr_type == TYPE_LOGICS_SHIFT_REG
11957 	      || attr_type == TYPE_MOV_SHIFT
11958 	      || attr_type == TYPE_MVN_SHIFT
11959 	      || attr_type == TYPE_MOV_SHIFT_REG
11960 	      || attr_type == TYPE_MVN_SHIFT_REG))
11961 	{
11962 	  rtx shifted_operand;
11963 	  int opno;
11964 
11965 	  /* Get the shifted operand.  */
11966 	  extract_insn (insn);
11967 	  shifted_operand = recog_data.operand[shift_opnum];
11968 
11969 	  /* Iterate over all the operands in DEP.  If we write an operand
11970 	     that overlaps with SHIFTED_OPERAND, then we have increase the
11971 	     cost of this dependency.  */
11972 	  extract_insn (dep);
11973 	  preprocess_constraints (dep);
11974 	  for (opno = 0; opno < recog_data.n_operands; opno++)
11975 	    {
11976 	      /* We can ignore strict inputs.  */
11977 	      if (recog_data.operand_type[opno] == OP_IN)
11978 		continue;
11979 
11980 	      if (reg_overlap_mentioned_p (recog_data.operand[opno],
11981 					   shifted_operand))
11982 		{
11983 		  *cost = 2;
11984 		  return false;
11985 		}
11986 	    }
11987 	}
11988     }
11989   return true;
11990 }
11991 
11992 /* Adjust cost hook for Cortex A9.  */
11993 static bool
cortex_a9_sched_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int * cost)11994 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11995 			     int * cost)
11996 {
11997   switch (dep_type)
11998     {
11999     case REG_DEP_ANTI:
12000       *cost = 0;
12001       return false;
12002 
12003     case REG_DEP_TRUE:
12004     case REG_DEP_OUTPUT:
12005 	if (recog_memoized (insn) >= 0
12006 	    && recog_memoized (dep) >= 0)
12007 	  {
12008 	    if (GET_CODE (PATTERN (insn)) == SET)
12009 	      {
12010 		if (GET_MODE_CLASS
12011 		    (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12012 		  || GET_MODE_CLASS
12013 		    (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12014 		  {
12015 		    enum attr_type attr_type_insn = get_attr_type (insn);
12016 		    enum attr_type attr_type_dep = get_attr_type (dep);
12017 
12018 		    /* By default all dependencies of the form
12019 		       s0 = s0 <op> s1
12020 		       s0 = s0 <op> s2
12021 		       have an extra latency of 1 cycle because
12022 		       of the input and output dependency in this
12023 		       case. However this gets modeled as an true
12024 		       dependency and hence all these checks.  */
12025 		    if (REG_P (SET_DEST (PATTERN (insn)))
12026 			&& reg_set_p (SET_DEST (PATTERN (insn)), dep))
12027 		      {
12028 			/* FMACS is a special case where the dependent
12029 			   instruction can be issued 3 cycles before
12030 			   the normal latency in case of an output
12031 			   dependency.  */
12032 			if ((attr_type_insn == TYPE_FMACS
12033 			     || attr_type_insn == TYPE_FMACD)
12034 			    && (attr_type_dep == TYPE_FMACS
12035 				|| attr_type_dep == TYPE_FMACD))
12036 			  {
12037 			    if (dep_type == REG_DEP_OUTPUT)
12038 			      *cost = insn_default_latency (dep) - 3;
12039 			    else
12040 			      *cost = insn_default_latency (dep);
12041 			    return false;
12042 			  }
12043 			else
12044 			  {
12045 			    if (dep_type == REG_DEP_OUTPUT)
12046 			      *cost = insn_default_latency (dep) + 1;
12047 			    else
12048 			      *cost = insn_default_latency (dep);
12049 			  }
12050 			return false;
12051 		      }
12052 		  }
12053 	      }
12054 	  }
12055 	break;
12056 
12057     default:
12058       gcc_unreachable ();
12059     }
12060 
12061   return true;
12062 }
12063 
12064 /* Adjust cost hook for FA726TE.  */
12065 static bool
fa726te_sched_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int * cost)12066 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12067 			   int * cost)
12068 {
12069   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12070      have penalty of 3.  */
12071   if (dep_type == REG_DEP_TRUE
12072       && recog_memoized (insn) >= 0
12073       && recog_memoized (dep) >= 0
12074       && get_attr_conds (dep) == CONDS_SET)
12075     {
12076       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
12077       if (get_attr_conds (insn) == CONDS_USE
12078           && get_attr_type (insn) != TYPE_BRANCH)
12079         {
12080           *cost = 3;
12081           return false;
12082         }
12083 
12084       if (GET_CODE (PATTERN (insn)) == COND_EXEC
12085           || get_attr_conds (insn) == CONDS_USE)
12086         {
12087           *cost = 0;
12088           return false;
12089         }
12090     }
12091 
12092   return true;
12093 }
12094 
12095 /* Implement TARGET_REGISTER_MOVE_COST.
12096 
12097    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12098    it is typically more expensive than a single memory access.  We set
12099    the cost to less than two memory accesses so that floating
12100    point to integer conversion does not go through memory.  */
12101 
12102 int
arm_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)12103 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12104 			reg_class_t from, reg_class_t to)
12105 {
12106   if (TARGET_32BIT)
12107     {
12108       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12109 	  || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12110 	return 15;
12111       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12112 	       || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12113 	return 4;
12114       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12115 	return 20;
12116       else
12117 	return 2;
12118     }
12119   else
12120     {
12121       if (from == HI_REGS || to == HI_REGS)
12122 	return 4;
12123       else
12124 	return 2;
12125     }
12126 }
12127 
12128 /* Implement TARGET_MEMORY_MOVE_COST.  */
12129 
12130 int
arm_memory_move_cost(machine_mode mode,reg_class_t rclass,bool in ATTRIBUTE_UNUSED)12131 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12132 		      bool in ATTRIBUTE_UNUSED)
12133 {
12134   if (TARGET_32BIT)
12135     return 10;
12136   else
12137     {
12138       if (GET_MODE_SIZE (mode) < 4)
12139 	return 8;
12140       else
12141 	return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12142     }
12143 }
12144 
12145 /* Vectorizer cost model implementation.  */
12146 
12147 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
12148 static int
arm_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)12149 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12150 				tree vectype,
12151 				int misalign ATTRIBUTE_UNUSED)
12152 {
12153   unsigned elements;
12154 
12155   switch (type_of_cost)
12156     {
12157       case scalar_stmt:
12158         return current_tune->vec_costs->scalar_stmt_cost;
12159 
12160       case scalar_load:
12161         return current_tune->vec_costs->scalar_load_cost;
12162 
12163       case scalar_store:
12164         return current_tune->vec_costs->scalar_store_cost;
12165 
12166       case vector_stmt:
12167         return current_tune->vec_costs->vec_stmt_cost;
12168 
12169       case vector_load:
12170         return current_tune->vec_costs->vec_align_load_cost;
12171 
12172       case vector_store:
12173         return current_tune->vec_costs->vec_store_cost;
12174 
12175       case vec_to_scalar:
12176         return current_tune->vec_costs->vec_to_scalar_cost;
12177 
12178       case scalar_to_vec:
12179         return current_tune->vec_costs->scalar_to_vec_cost;
12180 
12181       case unaligned_load:
12182       case vector_gather_load:
12183         return current_tune->vec_costs->vec_unalign_load_cost;
12184 
12185       case unaligned_store:
12186       case vector_scatter_store:
12187         return current_tune->vec_costs->vec_unalign_store_cost;
12188 
12189       case cond_branch_taken:
12190         return current_tune->vec_costs->cond_taken_branch_cost;
12191 
12192       case cond_branch_not_taken:
12193         return current_tune->vec_costs->cond_not_taken_branch_cost;
12194 
12195       case vec_perm:
12196       case vec_promote_demote:
12197         return current_tune->vec_costs->vec_stmt_cost;
12198 
12199       case vec_construct:
12200 	elements = TYPE_VECTOR_SUBPARTS (vectype);
12201 	return elements / 2 + 1;
12202 
12203       default:
12204         gcc_unreachable ();
12205     }
12206 }
12207 
12208 /* Implement targetm.vectorize.add_stmt_cost.  */
12209 
12210 static unsigned
arm_add_stmt_cost(vec_info * vinfo,void * data,int count,enum vect_cost_for_stmt kind,struct _stmt_vec_info * stmt_info,tree vectype,int misalign,enum vect_cost_model_location where)12211 arm_add_stmt_cost (vec_info *vinfo, void *data, int count,
12212 		   enum vect_cost_for_stmt kind,
12213 		   struct _stmt_vec_info *stmt_info, tree vectype,
12214 		   int misalign, enum vect_cost_model_location where)
12215 {
12216   unsigned *cost = (unsigned *) data;
12217   unsigned retval = 0;
12218 
12219   if (flag_vect_cost_model)
12220     {
12221       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
12222 
12223       /* Statements in an inner loop relative to the loop being
12224 	 vectorized are weighted more heavily.  The value here is
12225 	 arbitrary and could potentially be improved with analysis.  */
12226       if (where == vect_body && stmt_info
12227 	  && stmt_in_inner_loop_p (vinfo, stmt_info))
12228 	count *= 50;  /* FIXME.  */
12229 
12230       retval = (unsigned) (count * stmt_cost);
12231       cost[where] += retval;
12232     }
12233 
12234   return retval;
12235 }
12236 
12237 /* Return true if and only if this insn can dual-issue only as older.  */
12238 static bool
cortexa7_older_only(rtx_insn * insn)12239 cortexa7_older_only (rtx_insn *insn)
12240 {
12241   if (recog_memoized (insn) < 0)
12242     return false;
12243 
12244   switch (get_attr_type (insn))
12245     {
12246     case TYPE_ALU_DSP_REG:
12247     case TYPE_ALU_SREG:
12248     case TYPE_ALUS_SREG:
12249     case TYPE_LOGIC_REG:
12250     case TYPE_LOGICS_REG:
12251     case TYPE_ADC_REG:
12252     case TYPE_ADCS_REG:
12253     case TYPE_ADR:
12254     case TYPE_BFM:
12255     case TYPE_REV:
12256     case TYPE_MVN_REG:
12257     case TYPE_SHIFT_IMM:
12258     case TYPE_SHIFT_REG:
12259     case TYPE_LOAD_BYTE:
12260     case TYPE_LOAD_4:
12261     case TYPE_STORE_4:
12262     case TYPE_FFARITHS:
12263     case TYPE_FADDS:
12264     case TYPE_FFARITHD:
12265     case TYPE_FADDD:
12266     case TYPE_FMOV:
12267     case TYPE_F_CVT:
12268     case TYPE_FCMPS:
12269     case TYPE_FCMPD:
12270     case TYPE_FCONSTS:
12271     case TYPE_FCONSTD:
12272     case TYPE_FMULS:
12273     case TYPE_FMACS:
12274     case TYPE_FMULD:
12275     case TYPE_FMACD:
12276     case TYPE_FDIVS:
12277     case TYPE_FDIVD:
12278     case TYPE_F_MRC:
12279     case TYPE_F_MRRC:
12280     case TYPE_F_FLAG:
12281     case TYPE_F_LOADS:
12282     case TYPE_F_STORES:
12283       return true;
12284     default:
12285       return false;
12286     }
12287 }
12288 
12289 /* Return true if and only if this insn can dual-issue as younger.  */
12290 static bool
cortexa7_younger(FILE * file,int verbose,rtx_insn * insn)12291 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12292 {
12293   if (recog_memoized (insn) < 0)
12294     {
12295       if (verbose > 5)
12296         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12297       return false;
12298     }
12299 
12300   switch (get_attr_type (insn))
12301     {
12302     case TYPE_ALU_IMM:
12303     case TYPE_ALUS_IMM:
12304     case TYPE_LOGIC_IMM:
12305     case TYPE_LOGICS_IMM:
12306     case TYPE_EXTEND:
12307     case TYPE_MVN_IMM:
12308     case TYPE_MOV_IMM:
12309     case TYPE_MOV_REG:
12310     case TYPE_MOV_SHIFT:
12311     case TYPE_MOV_SHIFT_REG:
12312     case TYPE_BRANCH:
12313     case TYPE_CALL:
12314       return true;
12315     default:
12316       return false;
12317     }
12318 }
12319 
12320 
12321 /* Look for an instruction that can dual issue only as an older
12322    instruction, and move it in front of any instructions that can
12323    dual-issue as younger, while preserving the relative order of all
12324    other instructions in the ready list.  This is a hueuristic to help
12325    dual-issue in later cycles, by postponing issue of more flexible
12326    instructions.  This heuristic may affect dual issue opportunities
12327    in the current cycle.  */
12328 static void
cortexa7_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * n_readyp,int clock)12329 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12330 			int *n_readyp, int clock)
12331 {
12332   int i;
12333   int first_older_only = -1, first_younger = -1;
12334 
12335   if (verbose > 5)
12336     fprintf (file,
12337              ";; sched_reorder for cycle %d with %d insns in ready list\n",
12338              clock,
12339              *n_readyp);
12340 
12341   /* Traverse the ready list from the head (the instruction to issue
12342      first), and looking for the first instruction that can issue as
12343      younger and the first instruction that can dual-issue only as
12344      older.  */
12345   for (i = *n_readyp - 1; i >= 0; i--)
12346     {
12347       rtx_insn *insn = ready[i];
12348       if (cortexa7_older_only (insn))
12349         {
12350           first_older_only = i;
12351           if (verbose > 5)
12352             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12353           break;
12354         }
12355       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12356         first_younger = i;
12357     }
12358 
12359   /* Nothing to reorder because either no younger insn found or insn
12360      that can dual-issue only as older appears before any insn that
12361      can dual-issue as younger.  */
12362   if (first_younger == -1)
12363     {
12364       if (verbose > 5)
12365         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12366       return;
12367     }
12368 
12369   /* Nothing to reorder because no older-only insn in the ready list.  */
12370   if (first_older_only == -1)
12371     {
12372       if (verbose > 5)
12373         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12374       return;
12375     }
12376 
12377   /* Move first_older_only insn before first_younger.  */
12378   if (verbose > 5)
12379     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12380              INSN_UID(ready [first_older_only]),
12381              INSN_UID(ready [first_younger]));
12382   rtx_insn *first_older_only_insn = ready [first_older_only];
12383   for (i = first_older_only; i < first_younger; i++)
12384     {
12385       ready[i] = ready[i+1];
12386     }
12387 
12388   ready[i] = first_older_only_insn;
12389   return;
12390 }
12391 
12392 /* Implement TARGET_SCHED_REORDER. */
12393 static int
arm_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * n_readyp,int clock)12394 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12395                    int clock)
12396 {
12397   switch (arm_tune)
12398     {
12399     case TARGET_CPU_cortexa7:
12400       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12401       break;
12402     default:
12403       /* Do nothing for other cores.  */
12404       break;
12405     }
12406 
12407   return arm_issue_rate ();
12408 }
12409 
12410 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12411    It corrects the value of COST based on the relationship between
12412    INSN and DEP through the dependence LINK.  It returns the new
12413    value. There is a per-core adjust_cost hook to adjust scheduler costs
12414    and the per-core hook can choose to completely override the generic
12415    adjust_cost function. Only put bits of code into arm_adjust_cost that
12416    are common across all cores.  */
12417 static int
arm_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int cost,unsigned int)12418 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12419 		 unsigned int)
12420 {
12421   rtx i_pat, d_pat;
12422 
12423  /* When generating Thumb-1 code, we want to place flag-setting operations
12424     close to a conditional branch which depends on them, so that we can
12425     omit the comparison. */
12426   if (TARGET_THUMB1
12427       && dep_type == 0
12428       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12429       && recog_memoized (dep) >= 0
12430       && get_attr_conds (dep) == CONDS_SET)
12431     return 0;
12432 
12433   if (current_tune->sched_adjust_cost != NULL)
12434     {
12435       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12436 	return cost;
12437     }
12438 
12439   /* XXX Is this strictly true?  */
12440   if (dep_type == REG_DEP_ANTI
12441       || dep_type == REG_DEP_OUTPUT)
12442     return 0;
12443 
12444   /* Call insns don't incur a stall, even if they follow a load.  */
12445   if (dep_type == 0
12446       && CALL_P (insn))
12447     return 1;
12448 
12449   if ((i_pat = single_set (insn)) != NULL
12450       && MEM_P (SET_SRC (i_pat))
12451       && (d_pat = single_set (dep)) != NULL
12452       && MEM_P (SET_DEST (d_pat)))
12453     {
12454       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12455       /* This is a load after a store, there is no conflict if the load reads
12456 	 from a cached area.  Assume that loads from the stack, and from the
12457 	 constant pool are cached, and that others will miss.  This is a
12458 	 hack.  */
12459 
12460       if ((SYMBOL_REF_P (src_mem)
12461 	   && CONSTANT_POOL_ADDRESS_P (src_mem))
12462 	  || reg_mentioned_p (stack_pointer_rtx, src_mem)
12463 	  || reg_mentioned_p (frame_pointer_rtx, src_mem)
12464 	  || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12465 	return 1;
12466     }
12467 
12468   return cost;
12469 }
12470 
12471 int
arm_max_conditional_execute(void)12472 arm_max_conditional_execute (void)
12473 {
12474   return max_insns_skipped;
12475 }
12476 
12477 static int
arm_default_branch_cost(bool speed_p,bool predictable_p ATTRIBUTE_UNUSED)12478 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12479 {
12480   if (TARGET_32BIT)
12481     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12482   else
12483     return (optimize > 0) ? 2 : 0;
12484 }
12485 
12486 static int
arm_cortex_a5_branch_cost(bool speed_p,bool predictable_p)12487 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12488 {
12489   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12490 }
12491 
12492 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12493    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12494    sequences of non-executed instructions in IT blocks probably take the same
12495    amount of time as executed instructions (and the IT instruction itself takes
12496    space in icache).  This function was experimentally determined to give good
12497    results on a popular embedded benchmark.  */
12498 
12499 static int
arm_cortex_m_branch_cost(bool speed_p,bool predictable_p)12500 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12501 {
12502   return (TARGET_32BIT && speed_p) ? 1
12503          : arm_default_branch_cost (speed_p, predictable_p);
12504 }
12505 
12506 static int
arm_cortex_m7_branch_cost(bool speed_p,bool predictable_p)12507 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12508 {
12509   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12510 }
12511 
12512 static bool fp_consts_inited = false;
12513 
12514 static REAL_VALUE_TYPE value_fp0;
12515 
12516 static void
init_fp_table(void)12517 init_fp_table (void)
12518 {
12519   REAL_VALUE_TYPE r;
12520 
12521   r = REAL_VALUE_ATOF ("0", DFmode);
12522   value_fp0 = r;
12523   fp_consts_inited = true;
12524 }
12525 
12526 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12527 int
arm_const_double_rtx(rtx x)12528 arm_const_double_rtx (rtx x)
12529 {
12530   const REAL_VALUE_TYPE *r;
12531 
12532   if (!fp_consts_inited)
12533     init_fp_table ();
12534 
12535   r = CONST_DOUBLE_REAL_VALUE (x);
12536   if (REAL_VALUE_MINUS_ZERO (*r))
12537     return 0;
12538 
12539   if (real_equal (r, &value_fp0))
12540     return 1;
12541 
12542   return 0;
12543 }
12544 
12545 /* VFPv3 has a fairly wide range of representable immediates, formed from
12546    "quarter-precision" floating-point values. These can be evaluated using this
12547    formula (with ^ for exponentiation):
12548 
12549      -1^s * n * 2^-r
12550 
12551    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12552    16 <= n <= 31 and 0 <= r <= 7.
12553 
12554    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12555 
12556      - A (most-significant) is the sign bit.
12557      - BCD are the exponent (encoded as r XOR 3).
12558      - EFGH are the mantissa (encoded as n - 16).
12559 */
12560 
12561 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12562    fconst[sd] instruction, or -1 if X isn't suitable.  */
12563 static int
vfp3_const_double_index(rtx x)12564 vfp3_const_double_index (rtx x)
12565 {
12566   REAL_VALUE_TYPE r, m;
12567   int sign, exponent;
12568   unsigned HOST_WIDE_INT mantissa, mant_hi;
12569   unsigned HOST_WIDE_INT mask;
12570   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12571   bool fail;
12572 
12573   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12574     return -1;
12575 
12576   r = *CONST_DOUBLE_REAL_VALUE (x);
12577 
12578   /* We can't represent these things, so detect them first.  */
12579   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12580     return -1;
12581 
12582   /* Extract sign, exponent and mantissa.  */
12583   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12584   r = real_value_abs (&r);
12585   exponent = REAL_EXP (&r);
12586   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12587      highest (sign) bit, with a fixed binary point at bit point_pos.
12588      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12589      bits for the mantissa, this may fail (low bits would be lost).  */
12590   real_ldexp (&m, &r, point_pos - exponent);
12591   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12592   mantissa = w.elt (0);
12593   mant_hi = w.elt (1);
12594 
12595   /* If there are bits set in the low part of the mantissa, we can't
12596      represent this value.  */
12597   if (mantissa != 0)
12598     return -1;
12599 
12600   /* Now make it so that mantissa contains the most-significant bits, and move
12601      the point_pos to indicate that the least-significant bits have been
12602      discarded.  */
12603   point_pos -= HOST_BITS_PER_WIDE_INT;
12604   mantissa = mant_hi;
12605 
12606   /* We can permit four significant bits of mantissa only, plus a high bit
12607      which is always 1.  */
12608   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12609   if ((mantissa & mask) != 0)
12610     return -1;
12611 
12612   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12613   mantissa >>= point_pos - 5;
12614 
12615   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12616      floating-point immediate zero with Neon using an integer-zero load, but
12617      that case is handled elsewhere.)  */
12618   if (mantissa == 0)
12619     return -1;
12620 
12621   gcc_assert (mantissa >= 16 && mantissa <= 31);
12622 
12623   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12624      normalized significands are in the range [1, 2). (Our mantissa is shifted
12625      left 4 places at this point relative to normalized IEEE754 values).  GCC
12626      internally uses [0.5, 1) (see real.c), so the exponent returned from
12627      REAL_EXP must be altered.  */
12628   exponent = 5 - exponent;
12629 
12630   if (exponent < 0 || exponent > 7)
12631     return -1;
12632 
12633   /* Sign, mantissa and exponent are now in the correct form to plug into the
12634      formula described in the comment above.  */
12635   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12636 }
12637 
12638 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12639 int
vfp3_const_double_rtx(rtx x)12640 vfp3_const_double_rtx (rtx x)
12641 {
12642   if (!TARGET_VFP3)
12643     return 0;
12644 
12645   return vfp3_const_double_index (x) != -1;
12646 }
12647 
12648 /* Recognize immediates which can be used in various Neon and MVE instructions.
12649    Legal immediates are described by the following table (for VMVN variants, the
12650    bitwise inverse of the constant shown is recognized. In either case, VMOV
12651    is output and the correct instruction to use for a given constant is chosen
12652    by the assembler). The constant shown is replicated across all elements of
12653    the destination vector.
12654 
12655    insn elems variant constant (binary)
12656    ---- ----- ------- -----------------
12657    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12658    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12659    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12660    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12661    vmov  i16     4    00000000 abcdefgh
12662    vmov  i16     5    abcdefgh 00000000
12663    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12664    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12665    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12666    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12667    vmvn  i16    10    00000000 abcdefgh
12668    vmvn  i16    11    abcdefgh 00000000
12669    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12670    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12671    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12672    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12673    vmov   i8    16    abcdefgh
12674    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12675                       eeeeeeee ffffffff gggggggg hhhhhhhh
12676    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12677    vmov  f32    19    00000000 00000000 00000000 00000000
12678 
12679    For case 18, B = !b. Representable values are exactly those accepted by
12680    vfp3_const_double_index, but are output as floating-point numbers rather
12681    than indices.
12682 
12683    For case 19, we will change it to vmov.i32 when assembling.
12684 
12685    Variants 0-5 (inclusive) may also be used as immediates for the second
12686    operand of VORR/VBIC instructions.
12687 
12688    The INVERSE argument causes the bitwise inverse of the given operand to be
12689    recognized instead (used for recognizing legal immediates for the VAND/VORN
12690    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12691    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12692    output, rather than the real insns vbic/vorr).
12693 
12694    INVERSE makes no difference to the recognition of float vectors.
12695 
12696    The return value is the variant of immediate as shown in the above table, or
12697    -1 if the given value doesn't match any of the listed patterns.
12698 */
12699 static int
simd_valid_immediate(rtx op,machine_mode mode,int inverse,rtx * modconst,int * elementwidth)12700 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12701 		      rtx *modconst, int *elementwidth)
12702 {
12703 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)	\
12704   matches = 1;					\
12705   for (i = 0; i < idx; i += (STRIDE))		\
12706     if (!(TEST))				\
12707       matches = 0;				\
12708   if (matches)					\
12709     {						\
12710       immtype = (CLASS);			\
12711       elsize = (ELSIZE);			\
12712       break;					\
12713     }
12714 
12715   unsigned int i, elsize = 0, idx = 0, n_elts;
12716   unsigned int innersize;
12717   unsigned char bytes[16] = {};
12718   int immtype = -1, matches;
12719   unsigned int invmask = inverse ? 0xff : 0;
12720   bool vector = GET_CODE (op) == CONST_VECTOR;
12721 
12722   if (vector)
12723     n_elts = CONST_VECTOR_NUNITS (op);
12724   else
12725     {
12726       n_elts = 1;
12727       gcc_assert (mode != VOIDmode);
12728     }
12729 
12730   innersize = GET_MODE_UNIT_SIZE (mode);
12731 
12732   /* Only support 128-bit vectors for MVE.  */
12733   if (TARGET_HAVE_MVE && (!vector || n_elts * innersize != 16))
12734     return -1;
12735 
12736   /* Vectors of float constants.  */
12737   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12738     {
12739       rtx el0 = CONST_VECTOR_ELT (op, 0);
12740 
12741       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12742         return -1;
12743 
12744       /* FP16 vectors cannot be represented.  */
12745       if (GET_MODE_INNER (mode) == HFmode)
12746 	return -1;
12747 
12748       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
12749 	 are distinct in this context.  */
12750       if (!const_vec_duplicate_p (op))
12751 	return -1;
12752 
12753       if (modconst)
12754         *modconst = CONST_VECTOR_ELT (op, 0);
12755 
12756       if (elementwidth)
12757         *elementwidth = 0;
12758 
12759       if (el0 == CONST0_RTX (GET_MODE (el0)))
12760 	return 19;
12761       else
12762 	return 18;
12763     }
12764 
12765   /* The tricks done in the code below apply for little-endian vector layout.
12766      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12767      FIXME: Implement logic for big-endian vectors.  */
12768   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12769     return -1;
12770 
12771   /* Splat vector constant out into a byte vector.  */
12772   for (i = 0; i < n_elts; i++)
12773     {
12774       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12775       unsigned HOST_WIDE_INT elpart;
12776 
12777       gcc_assert (CONST_INT_P (el));
12778       elpart = INTVAL (el);
12779 
12780       for (unsigned int byte = 0; byte < innersize; byte++)
12781 	{
12782 	  bytes[idx++] = (elpart & 0xff) ^ invmask;
12783 	  elpart >>= BITS_PER_UNIT;
12784 	}
12785     }
12786 
12787   /* Sanity check.  */
12788   gcc_assert (idx == GET_MODE_SIZE (mode));
12789 
12790   do
12791     {
12792       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12793 		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12794 
12795       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12796 		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12797 
12798       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12799 		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12800 
12801       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12802 		       && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12803 
12804       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12805 
12806       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12807 
12808       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12809 		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12810 
12811       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12812 		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12813 
12814       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12815 		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12816 
12817       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12818 		       && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12819 
12820       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12821 
12822       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12823 
12824       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12825 			&& bytes[i + 2] == 0 && bytes[i + 3] == 0);
12826 
12827       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12828 			&& bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12829 
12830       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12831 			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12832 
12833       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12834 			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12835 
12836       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12837 
12838       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12839 			&& bytes[i] == bytes[(i + 8) % idx]);
12840     }
12841   while (0);
12842 
12843   if (immtype == -1)
12844     return -1;
12845 
12846   if (elementwidth)
12847     *elementwidth = elsize;
12848 
12849   if (modconst)
12850     {
12851       unsigned HOST_WIDE_INT imm = 0;
12852 
12853       /* Un-invert bytes of recognized vector, if necessary.  */
12854       if (invmask != 0)
12855         for (i = 0; i < idx; i++)
12856           bytes[i] ^= invmask;
12857 
12858       if (immtype == 17)
12859         {
12860           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12861           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12862 
12863           for (i = 0; i < 8; i++)
12864             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12865                    << (i * BITS_PER_UNIT);
12866 
12867           *modconst = GEN_INT (imm);
12868         }
12869       else
12870         {
12871           unsigned HOST_WIDE_INT imm = 0;
12872 
12873           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12874             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12875 
12876           *modconst = GEN_INT (imm);
12877         }
12878     }
12879 
12880   return immtype;
12881 #undef CHECK
12882 }
12883 
12884 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
12885    implicitly, VMVN) immediate.  Write back width per element to *ELEMENTWIDTH
12886    (or zero for float elements), and a modified constant (whatever should be
12887    output for a VMOV) in *MODCONST.  "neon_immediate_valid_for_move" function is
12888    modified to "simd_immediate_valid_for_move" as this function will be used
12889    both by neon and mve.  */
12890 int
simd_immediate_valid_for_move(rtx op,machine_mode mode,rtx * modconst,int * elementwidth)12891 simd_immediate_valid_for_move (rtx op, machine_mode mode,
12892 			       rtx *modconst, int *elementwidth)
12893 {
12894   rtx tmpconst;
12895   int tmpwidth;
12896   int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12897 
12898   if (retval == -1)
12899     return 0;
12900 
12901   if (modconst)
12902     *modconst = tmpconst;
12903 
12904   if (elementwidth)
12905     *elementwidth = tmpwidth;
12906 
12907   return 1;
12908 }
12909 
12910 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12911    the immediate is valid, write a constant suitable for using as an operand
12912    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12913    *ELEMENTWIDTH.  See simd_valid_immediate for description of INVERSE.  */
12914 
12915 int
neon_immediate_valid_for_logic(rtx op,machine_mode mode,int inverse,rtx * modconst,int * elementwidth)12916 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12917 				rtx *modconst, int *elementwidth)
12918 {
12919   rtx tmpconst;
12920   int tmpwidth;
12921   int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12922 
12923   if (retval < 0 || retval > 5)
12924     return 0;
12925 
12926   if (modconst)
12927     *modconst = tmpconst;
12928 
12929   if (elementwidth)
12930     *elementwidth = tmpwidth;
12931 
12932   return 1;
12933 }
12934 
12935 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12936    the immediate is valid, write a constant suitable for using as an operand
12937    to VSHR/VSHL to *MODCONST and the corresponding element width to
12938    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12939    because they have different limitations.  */
12940 
12941 int
neon_immediate_valid_for_shift(rtx op,machine_mode mode,rtx * modconst,int * elementwidth,bool isleftshift)12942 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12943 				rtx *modconst, int *elementwidth,
12944 				bool isleftshift)
12945 {
12946   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12947   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12948   unsigned HOST_WIDE_INT last_elt = 0;
12949   unsigned HOST_WIDE_INT maxshift;
12950 
12951   /* Split vector constant out into a byte vector.  */
12952   for (i = 0; i < n_elts; i++)
12953     {
12954       rtx el = CONST_VECTOR_ELT (op, i);
12955       unsigned HOST_WIDE_INT elpart;
12956 
12957       if (CONST_INT_P (el))
12958         elpart = INTVAL (el);
12959       else if (CONST_DOUBLE_P (el))
12960         return 0;
12961       else
12962         gcc_unreachable ();
12963 
12964       if (i != 0 && elpart != last_elt)
12965         return 0;
12966 
12967       last_elt = elpart;
12968     }
12969 
12970   /* Shift less than element size.  */
12971   maxshift = innersize * 8;
12972 
12973   if (isleftshift)
12974     {
12975       /* Left shift immediate value can be from 0 to <size>-1.  */
12976       if (last_elt >= maxshift)
12977         return 0;
12978     }
12979   else
12980     {
12981       /* Right shift immediate value can be from 1 to <size>.  */
12982       if (last_elt == 0 || last_elt > maxshift)
12983 	return 0;
12984     }
12985 
12986   if (elementwidth)
12987     *elementwidth = innersize * 8;
12988 
12989   if (modconst)
12990     *modconst = CONST_VECTOR_ELT (op, 0);
12991 
12992   return 1;
12993 }
12994 
12995 /* Return a string suitable for output of Neon immediate logic operation
12996    MNEM.  */
12997 
12998 char *
neon_output_logic_immediate(const char * mnem,rtx * op2,machine_mode mode,int inverse,int quad)12999 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13000 			     int inverse, int quad)
13001 {
13002   int width, is_valid;
13003   static char templ[40];
13004 
13005   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13006 
13007   gcc_assert (is_valid != 0);
13008 
13009   if (quad)
13010     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13011   else
13012     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13013 
13014   return templ;
13015 }
13016 
13017 /* Return a string suitable for output of Neon immediate shift operation
13018    (VSHR or VSHL) MNEM.  */
13019 
13020 char *
neon_output_shift_immediate(const char * mnem,char sign,rtx * op2,machine_mode mode,int quad,bool isleftshift)13021 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13022 			     machine_mode mode, int quad,
13023 			     bool isleftshift)
13024 {
13025   int width, is_valid;
13026   static char templ[40];
13027 
13028   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13029   gcc_assert (is_valid != 0);
13030 
13031   if (quad)
13032     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13033   else
13034     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13035 
13036   return templ;
13037 }
13038 
13039 /* Output a sequence of pairwise operations to implement a reduction.
13040    NOTE: We do "too much work" here, because pairwise operations work on two
13041    registers-worth of operands in one go. Unfortunately we can't exploit those
13042    extra calculations to do the full operation in fewer steps, I don't think.
13043    Although all vector elements of the result but the first are ignored, we
13044    actually calculate the same result in each of the elements. An alternative
13045    such as initially loading a vector with zero to use as each of the second
13046    operands would use up an additional register and take an extra instruction,
13047    for no particular gain.  */
13048 
13049 void
neon_pairwise_reduce(rtx op0,rtx op1,machine_mode mode,rtx (* reduc)(rtx,rtx,rtx))13050 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13051 		      rtx (*reduc) (rtx, rtx, rtx))
13052 {
13053   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13054   rtx tmpsum = op1;
13055 
13056   for (i = parts / 2; i >= 1; i /= 2)
13057     {
13058       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13059       emit_insn (reduc (dest, tmpsum, tmpsum));
13060       tmpsum = dest;
13061     }
13062 }
13063 
13064 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13065    loaded into a register using VDUP.
13066 
13067    If this is the case, and GENERATE is set, we also generate
13068    instructions to do this and return an RTX to assign to the register.  */
13069 
13070 static rtx
neon_vdup_constant(rtx vals,bool generate)13071 neon_vdup_constant (rtx vals, bool generate)
13072 {
13073   machine_mode mode = GET_MODE (vals);
13074   machine_mode inner_mode = GET_MODE_INNER (mode);
13075   rtx x;
13076 
13077   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13078     return NULL_RTX;
13079 
13080   if (!const_vec_duplicate_p (vals, &x))
13081     /* The elements are not all the same.  We could handle repeating
13082        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13083        {0, C, 0, C, 0, C, 0, C} which can be loaded using
13084        vdup.i16).  */
13085     return NULL_RTX;
13086 
13087   if (!generate)
13088     return x;
13089 
13090   /* We can load this constant by using VDUP and a constant in a
13091      single ARM register.  This will be cheaper than a vector
13092      load.  */
13093 
13094   x = copy_to_mode_reg (inner_mode, x);
13095   return gen_vec_duplicate (mode, x);
13096 }
13097 
13098 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13099    constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13100    into a register.
13101 
13102    If this is the case, and GENERATE is set, we also generate code to do
13103    this and return an RTX to copy into the register.  */
13104 
13105 rtx
neon_make_constant(rtx vals,bool generate)13106 neon_make_constant (rtx vals, bool generate)
13107 {
13108   machine_mode mode = GET_MODE (vals);
13109   rtx target;
13110   rtx const_vec = NULL_RTX;
13111   int n_elts = GET_MODE_NUNITS (mode);
13112   int n_const = 0;
13113   int i;
13114 
13115   if (GET_CODE (vals) == CONST_VECTOR)
13116     const_vec = vals;
13117   else if (GET_CODE (vals) == PARALLEL)
13118     {
13119       /* A CONST_VECTOR must contain only CONST_INTs and
13120 	 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13121 	 Only store valid constants in a CONST_VECTOR.  */
13122       for (i = 0; i < n_elts; ++i)
13123 	{
13124 	  rtx x = XVECEXP (vals, 0, i);
13125 	  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13126 	    n_const++;
13127 	}
13128       if (n_const == n_elts)
13129 	const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13130     }
13131   else
13132     gcc_unreachable ();
13133 
13134   if (const_vec != NULL
13135       && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13136     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
13137     return const_vec;
13138   else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13139     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
13140        pipeline cycle; creating the constant takes one or two ARM
13141        pipeline cycles.  */
13142     return target;
13143   else if (const_vec != NULL_RTX)
13144     /* Load from constant pool.  On Cortex-A8 this takes two cycles
13145        (for either double or quad vectors).  We cannot take advantage
13146        of single-cycle VLD1 because we need a PC-relative addressing
13147        mode.  */
13148     return arm_disable_literal_pool ? NULL_RTX : const_vec;
13149   else
13150     /* A PARALLEL containing something not valid inside CONST_VECTOR.
13151        We cannot construct an initializer.  */
13152     return NULL_RTX;
13153 }
13154 
13155 /* Initialize vector TARGET to VALS.  */
13156 
13157 void
neon_expand_vector_init(rtx target,rtx vals)13158 neon_expand_vector_init (rtx target, rtx vals)
13159 {
13160   machine_mode mode = GET_MODE (target);
13161   machine_mode inner_mode = GET_MODE_INNER (mode);
13162   int n_elts = GET_MODE_NUNITS (mode);
13163   int n_var = 0, one_var = -1;
13164   bool all_same = true;
13165   rtx x, mem;
13166   int i;
13167 
13168   for (i = 0; i < n_elts; ++i)
13169     {
13170       x = XVECEXP (vals, 0, i);
13171       if (!CONSTANT_P (x))
13172 	++n_var, one_var = i;
13173 
13174       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13175 	all_same = false;
13176     }
13177 
13178   if (n_var == 0)
13179     {
13180       rtx constant = neon_make_constant (vals);
13181       if (constant != NULL_RTX)
13182 	{
13183 	  emit_move_insn (target, constant);
13184 	  return;
13185 	}
13186     }
13187 
13188   /* Splat a single non-constant element if we can.  */
13189   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13190     {
13191       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13192       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13193       return;
13194     }
13195 
13196   /* One field is non-constant.  Load constant then overwrite varying
13197      field.  This is more efficient than using the stack.  */
13198   if (n_var == 1)
13199     {
13200       rtx copy = copy_rtx (vals);
13201       rtx merge_mask = GEN_INT (1 << one_var);
13202 
13203       /* Load constant part of vector, substitute neighboring value for
13204 	 varying element.  */
13205       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13206       neon_expand_vector_init (target, copy);
13207 
13208       /* Insert variable.  */
13209       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13210       emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13211       return;
13212     }
13213 
13214   /* Construct the vector in memory one field at a time
13215      and load the whole vector.  */
13216   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13217   for (i = 0; i < n_elts; i++)
13218     emit_move_insn (adjust_address_nv (mem, inner_mode,
13219 				    i * GET_MODE_SIZE (inner_mode)),
13220 		    XVECEXP (vals, 0, i));
13221   emit_move_insn (target, mem);
13222 }
13223 
13224 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
13225    ERR if it doesn't.  EXP indicates the source location, which includes the
13226    inlining history for intrinsics.  */
13227 
13228 static void
bounds_check(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high,const_tree exp,const char * desc)13229 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13230 	      const_tree exp, const char *desc)
13231 {
13232   HOST_WIDE_INT lane;
13233 
13234   gcc_assert (CONST_INT_P (operand));
13235 
13236   lane = INTVAL (operand);
13237 
13238   if (lane < low || lane >= high)
13239     {
13240       if (exp)
13241 	error ("%K%s %wd out of range %wd - %wd",
13242 	       exp, desc, lane, low, high - 1);
13243       else
13244 	error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13245     }
13246 }
13247 
13248 /* Bounds-check lanes.  */
13249 
13250 void
neon_lane_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high,const_tree exp)13251 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13252 		  const_tree exp)
13253 {
13254   bounds_check (operand, low, high, exp, "lane");
13255 }
13256 
13257 /* Bounds-check constants.  */
13258 
13259 void
arm_const_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high)13260 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13261 {
13262   bounds_check (operand, low, high, NULL_TREE, "constant");
13263 }
13264 
13265 HOST_WIDE_INT
neon_element_bits(machine_mode mode)13266 neon_element_bits (machine_mode mode)
13267 {
13268   return GET_MODE_UNIT_BITSIZE (mode);
13269 }
13270 
13271 
13272 /* Predicates for `match_operand' and `match_operator'.  */
13273 
13274 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13275    WB level is 2 if full writeback address modes are allowed, 1
13276    if limited writeback address modes (POST_INC and PRE_DEC) are
13277    allowed and 0 if no writeback at all is supported.  */
13278 
13279 int
arm_coproc_mem_operand_wb(rtx op,int wb_level)13280 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13281 {
13282   gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13283   rtx ind;
13284 
13285   /* Reject eliminable registers.  */
13286   if (! (reload_in_progress || reload_completed || lra_in_progress)
13287       && (   reg_mentioned_p (frame_pointer_rtx, op)
13288 	  || reg_mentioned_p (arg_pointer_rtx, op)
13289 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
13290 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13291 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13292 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13293     return FALSE;
13294 
13295   /* Constants are converted into offsets from labels.  */
13296   if (!MEM_P (op))
13297     return FALSE;
13298 
13299   ind = XEXP (op, 0);
13300 
13301   if (reload_completed
13302       && (LABEL_REF_P (ind)
13303 	  || (GET_CODE (ind) == CONST
13304 	      && GET_CODE (XEXP (ind, 0)) == PLUS
13305 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13306 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13307     return TRUE;
13308 
13309   /* Match: (mem (reg)).  */
13310   if (REG_P (ind))
13311     return arm_address_register_rtx_p (ind, 0);
13312 
13313   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
13314      acceptable in any case (subject to verification by
13315      arm_address_register_rtx_p).  We need full writeback to accept
13316      PRE_INC and POST_DEC, and at least restricted writeback for
13317      PRE_INC and POST_DEC.  */
13318   if (wb_level > 0
13319       && (GET_CODE (ind) == POST_INC
13320 	  || GET_CODE (ind) == PRE_DEC
13321 	  || (wb_level > 1
13322 	      && (GET_CODE (ind) == PRE_INC
13323 		  || GET_CODE (ind) == POST_DEC))))
13324     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13325 
13326   if (wb_level > 1
13327       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13328       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13329       && GET_CODE (XEXP (ind, 1)) == PLUS
13330       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13331     ind = XEXP (ind, 1);
13332 
13333   /* Match:
13334      (plus (reg)
13335 	   (const))
13336 
13337      The encoded immediate for 16-bit modes is multiplied by 2,
13338      while the encoded immediate for 32-bit and 64-bit modes is
13339      multiplied by 4.  */
13340   int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13341   if (GET_CODE (ind) == PLUS
13342       && REG_P (XEXP (ind, 0))
13343       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13344       && CONST_INT_P (XEXP (ind, 1))
13345       && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13346       && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13347     return TRUE;
13348 
13349   return FALSE;
13350 }
13351 
13352 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13353    WB is true if full writeback address modes are allowed and is false
13354    if limited writeback address modes (POST_INC and PRE_DEC) are
13355    allowed.  */
13356 
arm_coproc_mem_operand(rtx op,bool wb)13357 int arm_coproc_mem_operand (rtx op, bool wb)
13358 {
13359   return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13360 }
13361 
13362 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13363    context in which no writeback address modes are allowed.  */
13364 
13365 int
arm_coproc_mem_operand_no_writeback(rtx op)13366 arm_coproc_mem_operand_no_writeback (rtx op)
13367 {
13368   return arm_coproc_mem_operand_wb (op, 0);
13369 }
13370 
13371 /* This function returns TRUE on matching mode and op.
13372 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13373 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).  */
13374 int
mve_vector_mem_operand(machine_mode mode,rtx op,bool strict)13375 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13376 {
13377   enum rtx_code code;
13378   int val, reg_no;
13379 
13380   /* Match: (mem (reg)).  */
13381   if (REG_P (op))
13382     {
13383       int reg_no = REGNO (op);
13384       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13385 	       ? reg_no <= LAST_LO_REGNUM
13386 	       :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13387 	      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13388     }
13389   code = GET_CODE (op);
13390 
13391   if (code == POST_INC || code == PRE_DEC
13392       || code == PRE_INC || code == POST_DEC)
13393     {
13394       reg_no = REGNO (XEXP (op, 0));
13395       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13396 	       ? reg_no <= LAST_LO_REGNUM
13397 	       :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13398 	      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13399     }
13400   else if ((code == POST_MODIFY || code == PRE_MODIFY)
13401 	   && GET_CODE (XEXP (op, 1)) == PLUS && REG_P (XEXP (XEXP (op, 1), 1)))
13402     {
13403       reg_no = REGNO (XEXP (op, 0));
13404       val = INTVAL (XEXP ( XEXP (op, 1), 1));
13405       switch (mode)
13406 	{
13407 	  case E_V16QImode:
13408 	    if (abs (val) <= 127)
13409 	      return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13410 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13411 	    return FALSE;
13412 	  case E_V8HImode:
13413 	  case E_V8HFmode:
13414 	    if (abs (val) <= 255)
13415 	      return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13416 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13417 	    return FALSE;
13418 	  case E_V8QImode:
13419 	  case E_V4QImode:
13420 	    if (abs (val) <= 127)
13421 	      return (reg_no <= LAST_LO_REGNUM
13422 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13423 	    return FALSE;
13424 	  case E_V4HImode:
13425 	  case E_V4HFmode:
13426 	    if (val % 2 == 0 && abs (val) <= 254)
13427 	      return (reg_no <= LAST_LO_REGNUM
13428 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13429 	    return FALSE;
13430 	  case E_V4SImode:
13431 	  case E_V4SFmode:
13432 	    if (val % 4 == 0 && abs (val) <= 508)
13433 	      return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13434 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13435 	    return FALSE;
13436 	  case E_V2DImode:
13437 	  case E_V2DFmode:
13438 	  case E_TImode:
13439 	    if (val % 4 == 0 && val >= 0 && val <= 1020)
13440 	      return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13441 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13442 	    return FALSE;
13443 	  default:
13444 	    return FALSE;
13445 	}
13446     }
13447   return FALSE;
13448 }
13449 
13450 /* Return TRUE if OP is a memory operand which we can load or store a vector
13451    to/from. TYPE is one of the following values:
13452     0 - Vector load/stor (vldr)
13453     1 - Core registers (ldm)
13454     2 - Element/structure loads (vld1)
13455  */
13456 int
neon_vector_mem_operand(rtx op,int type,bool strict)13457 neon_vector_mem_operand (rtx op, int type, bool strict)
13458 {
13459   rtx ind;
13460 
13461   /* Reject eliminable registers.  */
13462   if (strict && ! (reload_in_progress || reload_completed)
13463       && (reg_mentioned_p (frame_pointer_rtx, op)
13464 	  || reg_mentioned_p (arg_pointer_rtx, op)
13465 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
13466 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13467 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13468 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13469     return FALSE;
13470 
13471   /* Constants are converted into offsets from labels.  */
13472   if (!MEM_P (op))
13473     return FALSE;
13474 
13475   ind = XEXP (op, 0);
13476 
13477   if (reload_completed
13478       && (LABEL_REF_P (ind)
13479 	  || (GET_CODE (ind) == CONST
13480 	      && GET_CODE (XEXP (ind, 0)) == PLUS
13481 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13482 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13483     return TRUE;
13484 
13485   /* Match: (mem (reg)).  */
13486   if (REG_P (ind))
13487     return arm_address_register_rtx_p (ind, 0);
13488 
13489   /* Allow post-increment with Neon registers.  */
13490   if ((type != 1 && GET_CODE (ind) == POST_INC)
13491       || (type == 0 && GET_CODE (ind) == PRE_DEC))
13492     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13493 
13494   /* Allow post-increment by register for VLDn */
13495   if (type == 2 && GET_CODE (ind) == POST_MODIFY
13496       && GET_CODE (XEXP (ind, 1)) == PLUS
13497       && REG_P (XEXP (XEXP (ind, 1), 1))
13498       && REG_P (XEXP (ind, 0))
13499       && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13500      return true;
13501 
13502   /* Match:
13503      (plus (reg)
13504           (const)).  */
13505   if (type == 0
13506       && GET_CODE (ind) == PLUS
13507       && REG_P (XEXP (ind, 0))
13508       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13509       && CONST_INT_P (XEXP (ind, 1))
13510       && INTVAL (XEXP (ind, 1)) > -1024
13511       /* For quad modes, we restrict the constant offset to be slightly less
13512 	 than what the instruction format permits.  We have no such constraint
13513 	 on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13514       && (INTVAL (XEXP (ind, 1))
13515 	  < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13516       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13517     return TRUE;
13518 
13519   return FALSE;
13520 }
13521 
13522 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13523    type.  */
13524 int
neon_struct_mem_operand(rtx op)13525 neon_struct_mem_operand (rtx op)
13526 {
13527   rtx ind;
13528 
13529   /* Reject eliminable registers.  */
13530   if (! (reload_in_progress || reload_completed)
13531       && (   reg_mentioned_p (frame_pointer_rtx, op)
13532 	  || reg_mentioned_p (arg_pointer_rtx, op)
13533 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
13534 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13535 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13536 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13537     return FALSE;
13538 
13539   /* Constants are converted into offsets from labels.  */
13540   if (!MEM_P (op))
13541     return FALSE;
13542 
13543   ind = XEXP (op, 0);
13544 
13545   if (reload_completed
13546       && (LABEL_REF_P (ind)
13547 	  || (GET_CODE (ind) == CONST
13548 	      && GET_CODE (XEXP (ind, 0)) == PLUS
13549 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13550 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13551     return TRUE;
13552 
13553   /* Match: (mem (reg)).  */
13554   if (REG_P (ind))
13555     return arm_address_register_rtx_p (ind, 0);
13556 
13557   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13558   if (GET_CODE (ind) == POST_INC
13559       || GET_CODE (ind) == PRE_DEC)
13560     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13561 
13562   return FALSE;
13563 }
13564 
13565 /* Prepares the operands for the VCMLA by lane instruction such that the right
13566    register number is selected.  This instruction is special in that it always
13567    requires a D register, however there is a choice to be made between Dn[0],
13568    Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13569 
13570    The VCMLA by lane function always selects two values. For instance given D0
13571    and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13572    used by the instruction.  However given V4SF then index 0 and 1 are valid as
13573    D0[0] or D1[0] are both valid.
13574 
13575    This function centralizes that information based on OPERANDS, OPERANDS[3]
13576    will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13577    updated to contain the right index.  */
13578 
13579 rtx *
neon_vcmla_lane_prepare_operands(rtx * operands)13580 neon_vcmla_lane_prepare_operands (rtx *operands)
13581 {
13582   int lane = INTVAL (operands[4]);
13583   machine_mode constmode = SImode;
13584   machine_mode mode = GET_MODE (operands[3]);
13585   int regno = REGNO (operands[3]);
13586   regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13587   if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13588     {
13589       operands[3] = gen_int_mode (regno + 1, constmode);
13590       operands[4]
13591 	= gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13592     }
13593   else
13594     {
13595       operands[3] = gen_int_mode (regno, constmode);
13596       operands[4] = gen_int_mode (lane, constmode);
13597     }
13598   return operands;
13599 }
13600 
13601 
13602 /* Return true if X is a register that will be eliminated later on.  */
13603 int
arm_eliminable_register(rtx x)13604 arm_eliminable_register (rtx x)
13605 {
13606   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13607 		       || REGNO (x) == ARG_POINTER_REGNUM
13608 		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13609 			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13610 }
13611 
13612 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13613    coprocessor registers.  Otherwise return NO_REGS.  */
13614 
13615 enum reg_class
coproc_secondary_reload_class(machine_mode mode,rtx x,bool wb)13616 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13617 {
13618   if (mode == HFmode)
13619     {
13620       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13621 	return GENERAL_REGS;
13622       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13623 	return NO_REGS;
13624       return GENERAL_REGS;
13625     }
13626 
13627   /* The neon move patterns handle all legitimate vector and struct
13628      addresses.  */
13629   if (TARGET_NEON
13630       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13631       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13632 	  || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13633 	  || VALID_NEON_STRUCT_MODE (mode)))
13634     return NO_REGS;
13635 
13636   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13637     return NO_REGS;
13638 
13639   return GENERAL_REGS;
13640 }
13641 
13642 /* Values which must be returned in the most-significant end of the return
13643    register.  */
13644 
13645 static bool
arm_return_in_msb(const_tree valtype)13646 arm_return_in_msb (const_tree valtype)
13647 {
13648   return (TARGET_AAPCS_BASED
13649           && BYTES_BIG_ENDIAN
13650 	  && (AGGREGATE_TYPE_P (valtype)
13651 	      || TREE_CODE (valtype) == COMPLEX_TYPE
13652 	      || FIXED_POINT_TYPE_P (valtype)));
13653 }
13654 
13655 /* Return TRUE if X references a SYMBOL_REF.  */
13656 int
symbol_mentioned_p(rtx x)13657 symbol_mentioned_p (rtx x)
13658 {
13659   const char * fmt;
13660   int i;
13661 
13662   if (SYMBOL_REF_P (x))
13663     return 1;
13664 
13665   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13666      are constant offsets, not symbols.  */
13667   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13668     return 0;
13669 
13670   fmt = GET_RTX_FORMAT (GET_CODE (x));
13671 
13672   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13673     {
13674       if (fmt[i] == 'E')
13675 	{
13676 	  int j;
13677 
13678 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13679 	    if (symbol_mentioned_p (XVECEXP (x, i, j)))
13680 	      return 1;
13681 	}
13682       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13683 	return 1;
13684     }
13685 
13686   return 0;
13687 }
13688 
13689 /* Return TRUE if X references a LABEL_REF.  */
13690 int
label_mentioned_p(rtx x)13691 label_mentioned_p (rtx x)
13692 {
13693   const char * fmt;
13694   int i;
13695 
13696   if (LABEL_REF_P (x))
13697     return 1;
13698 
13699   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13700      instruction, but they are constant offsets, not symbols.  */
13701   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13702     return 0;
13703 
13704   fmt = GET_RTX_FORMAT (GET_CODE (x));
13705   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13706     {
13707       if (fmt[i] == 'E')
13708 	{
13709 	  int j;
13710 
13711 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13712 	    if (label_mentioned_p (XVECEXP (x, i, j)))
13713 	      return 1;
13714 	}
13715       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13716 	return 1;
13717     }
13718 
13719   return 0;
13720 }
13721 
13722 int
tls_mentioned_p(rtx x)13723 tls_mentioned_p (rtx x)
13724 {
13725   switch (GET_CODE (x))
13726     {
13727     case CONST:
13728       return tls_mentioned_p (XEXP (x, 0));
13729 
13730     case UNSPEC:
13731       if (XINT (x, 1) == UNSPEC_TLS)
13732 	return 1;
13733 
13734     /* Fall through.  */
13735     default:
13736       return 0;
13737     }
13738 }
13739 
13740 /* Must not copy any rtx that uses a pc-relative address.
13741    Also, disallow copying of load-exclusive instructions that
13742    may appear after splitting of compare-and-swap-style operations
13743    so as to prevent those loops from being transformed away from their
13744    canonical forms (see PR 69904).  */
13745 
13746 static bool
arm_cannot_copy_insn_p(rtx_insn * insn)13747 arm_cannot_copy_insn_p (rtx_insn *insn)
13748 {
13749   /* The tls call insn cannot be copied, as it is paired with a data
13750      word.  */
13751   if (recog_memoized (insn) == CODE_FOR_tlscall)
13752     return true;
13753 
13754   subrtx_iterator::array_type array;
13755   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13756     {
13757       const_rtx x = *iter;
13758       if (GET_CODE (x) == UNSPEC
13759 	  && (XINT (x, 1) == UNSPEC_PIC_BASE
13760 	      || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13761 	return true;
13762     }
13763 
13764   rtx set = single_set (insn);
13765   if (set)
13766     {
13767       rtx src = SET_SRC (set);
13768       if (GET_CODE (src) == ZERO_EXTEND)
13769 	src = XEXP (src, 0);
13770 
13771       /* Catch the load-exclusive and load-acquire operations.  */
13772       if (GET_CODE (src) == UNSPEC_VOLATILE
13773 	  && (XINT (src, 1) == VUNSPEC_LL
13774 	      || XINT (src, 1) == VUNSPEC_LAX))
13775 	return true;
13776     }
13777   return false;
13778 }
13779 
13780 enum rtx_code
minmax_code(rtx x)13781 minmax_code (rtx x)
13782 {
13783   enum rtx_code code = GET_CODE (x);
13784 
13785   switch (code)
13786     {
13787     case SMAX:
13788       return GE;
13789     case SMIN:
13790       return LE;
13791     case UMIN:
13792       return LEU;
13793     case UMAX:
13794       return GEU;
13795     default:
13796       gcc_unreachable ();
13797     }
13798 }
13799 
13800 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
13801 
13802 bool
arm_sat_operator_match(rtx lo_bound,rtx hi_bound,int * mask,bool * signed_sat)13803 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13804 			int *mask, bool *signed_sat)
13805 {
13806   /* The high bound must be a power of two minus one.  */
13807   int log = exact_log2 (INTVAL (hi_bound) + 1);
13808   if (log == -1)
13809     return false;
13810 
13811   /* The low bound is either zero (for usat) or one less than the
13812      negation of the high bound (for ssat).  */
13813   if (INTVAL (lo_bound) == 0)
13814     {
13815       if (mask)
13816         *mask = log;
13817       if (signed_sat)
13818         *signed_sat = false;
13819 
13820       return true;
13821     }
13822 
13823   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13824     {
13825       if (mask)
13826         *mask = log + 1;
13827       if (signed_sat)
13828         *signed_sat = true;
13829 
13830       return true;
13831     }
13832 
13833   return false;
13834 }
13835 
13836 /* Return 1 if memory locations are adjacent.  */
13837 int
adjacent_mem_locations(rtx a,rtx b)13838 adjacent_mem_locations (rtx a, rtx b)
13839 {
13840   /* We don't guarantee to preserve the order of these memory refs.  */
13841   if (volatile_refs_p (a) || volatile_refs_p (b))
13842     return 0;
13843 
13844   if ((REG_P (XEXP (a, 0))
13845        || (GET_CODE (XEXP (a, 0)) == PLUS
13846 	   && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13847       && (REG_P (XEXP (b, 0))
13848 	  || (GET_CODE (XEXP (b, 0)) == PLUS
13849 	      && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13850     {
13851       HOST_WIDE_INT val0 = 0, val1 = 0;
13852       rtx reg0, reg1;
13853       int val_diff;
13854 
13855       if (GET_CODE (XEXP (a, 0)) == PLUS)
13856         {
13857 	  reg0 = XEXP (XEXP (a, 0), 0);
13858 	  val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13859         }
13860       else
13861 	reg0 = XEXP (a, 0);
13862 
13863       if (GET_CODE (XEXP (b, 0)) == PLUS)
13864         {
13865 	  reg1 = XEXP (XEXP (b, 0), 0);
13866 	  val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13867         }
13868       else
13869 	reg1 = XEXP (b, 0);
13870 
13871       /* Don't accept any offset that will require multiple
13872 	 instructions to handle, since this would cause the
13873 	 arith_adjacentmem pattern to output an overlong sequence.  */
13874       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13875 	return 0;
13876 
13877       /* Don't allow an eliminable register: register elimination can make
13878 	 the offset too large.  */
13879       if (arm_eliminable_register (reg0))
13880 	return 0;
13881 
13882       val_diff = val1 - val0;
13883 
13884       if (arm_ld_sched)
13885 	{
13886 	  /* If the target has load delay slots, then there's no benefit
13887 	     to using an ldm instruction unless the offset is zero and
13888 	     we are optimizing for size.  */
13889 	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13890 		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13891 		  && (val_diff == 4 || val_diff == -4));
13892 	}
13893 
13894       return ((REGNO (reg0) == REGNO (reg1))
13895 	      && (val_diff == 4 || val_diff == -4));
13896     }
13897 
13898   return 0;
13899 }
13900 
13901 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13902    for load operations, false for store operations.  CONSECUTIVE is true
13903    if the register numbers in the operation must be consecutive in the register
13904    bank. RETURN_PC is true if value is to be loaded in PC.
13905    The pattern we are trying to match for load is:
13906      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13907       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13908        :
13909        :
13910       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13911      ]
13912      where
13913      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13914      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13915      3.  If consecutive is TRUE, then for kth register being loaded,
13916          REGNO (R_dk) = REGNO (R_d0) + k.
13917    The pattern for store is similar.  */
13918 bool
ldm_stm_operation_p(rtx op,bool load,machine_mode mode,bool consecutive,bool return_pc)13919 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13920                      bool consecutive, bool return_pc)
13921 {
13922   HOST_WIDE_INT count = XVECLEN (op, 0);
13923   rtx reg, mem, addr;
13924   unsigned regno;
13925   unsigned first_regno;
13926   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13927   rtx elt;
13928   bool addr_reg_in_reglist = false;
13929   bool update = false;
13930   int reg_increment;
13931   int offset_adj;
13932   int regs_per_val;
13933 
13934   /* If not in SImode, then registers must be consecutive
13935      (e.g., VLDM instructions for DFmode).  */
13936   gcc_assert ((mode == SImode) || consecutive);
13937   /* Setting return_pc for stores is illegal.  */
13938   gcc_assert (!return_pc || load);
13939 
13940   /* Set up the increments and the regs per val based on the mode.  */
13941   reg_increment = GET_MODE_SIZE (mode);
13942   regs_per_val = reg_increment / 4;
13943   offset_adj = return_pc ? 1 : 0;
13944 
13945   if (count <= 1
13946       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13947       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13948     return false;
13949 
13950   /* Check if this is a write-back.  */
13951   elt = XVECEXP (op, 0, offset_adj);
13952   if (GET_CODE (SET_SRC (elt)) == PLUS)
13953     {
13954       i++;
13955       base = 1;
13956       update = true;
13957 
13958       /* The offset adjustment must be the number of registers being
13959          popped times the size of a single register.  */
13960       if (!REG_P (SET_DEST (elt))
13961           || !REG_P (XEXP (SET_SRC (elt), 0))
13962           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13963           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13964           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13965              ((count - 1 - offset_adj) * reg_increment))
13966         return false;
13967     }
13968 
13969   i = i + offset_adj;
13970   base = base + offset_adj;
13971   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13972      success depends on the type: VLDM can do just one reg,
13973      LDM must do at least two.  */
13974   if ((count <= i) && (mode == SImode))
13975       return false;
13976 
13977   elt = XVECEXP (op, 0, i - 1);
13978   if (GET_CODE (elt) != SET)
13979     return false;
13980 
13981   if (load)
13982     {
13983       reg = SET_DEST (elt);
13984       mem = SET_SRC (elt);
13985     }
13986   else
13987     {
13988       reg = SET_SRC (elt);
13989       mem = SET_DEST (elt);
13990     }
13991 
13992   if (!REG_P (reg) || !MEM_P (mem))
13993     return false;
13994 
13995   regno = REGNO (reg);
13996   first_regno = regno;
13997   addr = XEXP (mem, 0);
13998   if (GET_CODE (addr) == PLUS)
13999     {
14000       if (!CONST_INT_P (XEXP (addr, 1)))
14001 	return false;
14002 
14003       offset = INTVAL (XEXP (addr, 1));
14004       addr = XEXP (addr, 0);
14005     }
14006 
14007   if (!REG_P (addr))
14008     return false;
14009 
14010   /* Don't allow SP to be loaded unless it is also the base register. It
14011      guarantees that SP is reset correctly when an LDM instruction
14012      is interrupted. Otherwise, we might end up with a corrupt stack.  */
14013   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14014     return false;
14015 
14016   if (regno == REGNO (addr))
14017     addr_reg_in_reglist = true;
14018 
14019   for (; i < count; i++)
14020     {
14021       elt = XVECEXP (op, 0, i);
14022       if (GET_CODE (elt) != SET)
14023         return false;
14024 
14025       if (load)
14026         {
14027           reg = SET_DEST (elt);
14028           mem = SET_SRC (elt);
14029         }
14030       else
14031         {
14032           reg = SET_SRC (elt);
14033           mem = SET_DEST (elt);
14034         }
14035 
14036       if (!REG_P (reg)
14037           || GET_MODE (reg) != mode
14038           || REGNO (reg) <= regno
14039           || (consecutive
14040               && (REGNO (reg) !=
14041                   (unsigned int) (first_regno + regs_per_val * (i - base))))
14042           /* Don't allow SP to be loaded unless it is also the base register. It
14043              guarantees that SP is reset correctly when an LDM instruction
14044              is interrupted. Otherwise, we might end up with a corrupt stack.  */
14045           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14046           || !MEM_P (mem)
14047           || GET_MODE (mem) != mode
14048           || ((GET_CODE (XEXP (mem, 0)) != PLUS
14049 	       || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14050 	       || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14051 	       || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14052                    offset + (i - base) * reg_increment))
14053 	      && (!REG_P (XEXP (mem, 0))
14054 		  || offset + (i - base) * reg_increment != 0)))
14055         return false;
14056 
14057       regno = REGNO (reg);
14058       if (regno == REGNO (addr))
14059         addr_reg_in_reglist = true;
14060     }
14061 
14062   if (load)
14063     {
14064       if (update && addr_reg_in_reglist)
14065         return false;
14066 
14067       /* For Thumb-1, address register is always modified - either by write-back
14068          or by explicit load.  If the pattern does not describe an update,
14069          then the address register must be in the list of loaded registers.  */
14070       if (TARGET_THUMB1)
14071         return update || addr_reg_in_reglist;
14072     }
14073 
14074   return true;
14075 }
14076 
14077 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14078    or VSCCLRM (otherwise) insn.  To be a valid CLRM pattern, OP must have the
14079    following form:
14080 
14081    [(set (reg:SI <N>) (const_int 0))
14082     (set (reg:SI <M>) (const_int 0))
14083     ...
14084     (unspec_volatile [(const_int 0)]
14085 		     VUNSPEC_CLRM_APSR)
14086     (clobber (reg:CC CC_REGNUM))
14087    ]
14088 
14089    Any number (including 0) of set expressions is valid, the volatile unspec is
14090    optional.  All registers but SP and PC are allowed and registers must be in
14091    strict increasing order.
14092 
14093    To be a valid VSCCLRM pattern, OP must have the following form:
14094 
14095    [(unspec_volatile [(const_int 0)]
14096 		     VUNSPEC_VSCCLRM_VPR)
14097     (set (reg:SF <N>) (const_int 0))
14098     (set (reg:SF <M>) (const_int 0))
14099     ...
14100    ]
14101 
14102    As with CLRM, any number (including 0) of set expressions is valid, however
14103    the volatile unspec is mandatory here.  Any VFP single-precision register is
14104    accepted but all registers must be consecutive and in increasing order.  */
14105 
14106 bool
clear_operation_p(rtx op,bool vfp)14107 clear_operation_p (rtx op, bool vfp)
14108 {
14109   unsigned regno;
14110   unsigned last_regno = INVALID_REGNUM;
14111   rtx elt, reg, zero;
14112   int count = XVECLEN (op, 0);
14113   int first_set = vfp ? 1 : 0;
14114   machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14115 
14116   for (int i = first_set; i < count; i++)
14117     {
14118       elt = XVECEXP (op, 0, i);
14119 
14120       if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14121 	{
14122 	  if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14123 	      || XVECLEN (elt, 0) != 1
14124 	      || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14125 	      || i != count - 2)
14126 	    return false;
14127 
14128 	  continue;
14129 	}
14130 
14131       if (GET_CODE (elt) == CLOBBER)
14132 	continue;
14133 
14134       if (GET_CODE (elt) != SET)
14135 	return false;
14136 
14137       reg = SET_DEST (elt);
14138       zero = SET_SRC (elt);
14139 
14140       if (!REG_P (reg)
14141 	  || GET_MODE (reg) != expected_mode
14142 	  || zero != CONST0_RTX (SImode))
14143 	return false;
14144 
14145       regno = REGNO (reg);
14146 
14147       if (vfp)
14148 	{
14149 	  if (i != first_set && regno != last_regno + 1)
14150 	    return false;
14151 	}
14152       else
14153 	{
14154 	  if (regno == SP_REGNUM || regno == PC_REGNUM)
14155 	    return false;
14156 	  if (i != first_set && regno <= last_regno)
14157 	    return false;
14158 	}
14159 
14160       last_regno = regno;
14161     }
14162 
14163   return true;
14164 }
14165 
14166 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14167    or stores (depending on IS_STORE) into a load-multiple or store-multiple
14168    instruction.  ADD_OFFSET is nonzero if the base address register needs
14169    to be modified with an add instruction before we can use it.  */
14170 
14171 static bool
multiple_operation_profitable_p(bool is_store ATTRIBUTE_UNUSED,int nops,HOST_WIDE_INT add_offset)14172 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14173 				 int nops, HOST_WIDE_INT add_offset)
14174  {
14175   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14176      if the offset isn't small enough.  The reason 2 ldrs are faster
14177      is because these ARMs are able to do more than one cache access
14178      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
14179      whilst the ARM8 has a double bandwidth cache.  This means that
14180      these cores can do both an instruction fetch and a data fetch in
14181      a single cycle, so the trick of calculating the address into a
14182      scratch register (one of the result regs) and then doing a load
14183      multiple actually becomes slower (and no smaller in code size).
14184      That is the transformation
14185 
14186  	ldr	rd1, [rbase + offset]
14187  	ldr	rd2, [rbase + offset + 4]
14188 
14189      to
14190 
14191  	add	rd1, rbase, offset
14192  	ldmia	rd1, {rd1, rd2}
14193 
14194      produces worse code -- '3 cycles + any stalls on rd2' instead of
14195      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
14196      access per cycle, the first sequence could never complete in less
14197      than 6 cycles, whereas the ldm sequence would only take 5 and
14198      would make better use of sequential accesses if not hitting the
14199      cache.
14200 
14201      We cheat here and test 'arm_ld_sched' which we currently know to
14202      only be true for the ARM8, ARM9 and StrongARM.  If this ever
14203      changes, then the test below needs to be reworked.  */
14204   if (nops == 2 && arm_ld_sched && add_offset != 0)
14205     return false;
14206 
14207   /* XScale has load-store double instructions, but they have stricter
14208      alignment requirements than load-store multiple, so we cannot
14209      use them.
14210 
14211      For XScale ldm requires 2 + NREGS cycles to complete and blocks
14212      the pipeline until completion.
14213 
14214 	NREGS		CYCLES
14215 	  1		  3
14216 	  2		  4
14217 	  3		  5
14218 	  4		  6
14219 
14220      An ldr instruction takes 1-3 cycles, but does not block the
14221      pipeline.
14222 
14223 	NREGS		CYCLES
14224 	  1		 1-3
14225 	  2		 2-6
14226 	  3		 3-9
14227 	  4		 4-12
14228 
14229      Best case ldr will always win.  However, the more ldr instructions
14230      we issue, the less likely we are to be able to schedule them well.
14231      Using ldr instructions also increases code size.
14232 
14233      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14234      for counts of 3 or 4 regs.  */
14235   if (nops <= 2 && arm_tune_xscale && !optimize_size)
14236     return false;
14237   return true;
14238 }
14239 
14240 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14241    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14242    an array ORDER which describes the sequence to use when accessing the
14243    offsets that produces an ascending order.  In this sequence, each
14244    offset must be larger by exactly 4 than the previous one.  ORDER[0]
14245    must have been filled in with the lowest offset by the caller.
14246    If UNSORTED_REGS is nonnull, it is an array of register numbers that
14247    we use to verify that ORDER produces an ascending order of registers.
14248    Return true if it was possible to construct such an order, false if
14249    not.  */
14250 
14251 static bool
compute_offset_order(int nops,HOST_WIDE_INT * unsorted_offsets,int * order,int * unsorted_regs)14252 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14253 		      int *unsorted_regs)
14254 {
14255   int i;
14256   for (i = 1; i < nops; i++)
14257     {
14258       int j;
14259 
14260       order[i] = order[i - 1];
14261       for (j = 0; j < nops; j++)
14262 	if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14263 	  {
14264 	    /* We must find exactly one offset that is higher than the
14265 	       previous one by 4.  */
14266 	    if (order[i] != order[i - 1])
14267 	      return false;
14268 	    order[i] = j;
14269 	  }
14270       if (order[i] == order[i - 1])
14271 	return false;
14272       /* The register numbers must be ascending.  */
14273       if (unsorted_regs != NULL
14274 	  && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14275 	return false;
14276     }
14277   return true;
14278 }
14279 
14280 /* Used to determine in a peephole whether a sequence of load
14281    instructions can be changed into a load-multiple instruction.
14282    NOPS is the number of separate load instructions we are examining.  The
14283    first NOPS entries in OPERANDS are the destination registers, the
14284    next NOPS entries are memory operands.  If this function is
14285    successful, *BASE is set to the common base register of the memory
14286    accesses; *LOAD_OFFSET is set to the first memory location's offset
14287    from that base register.
14288    REGS is an array filled in with the destination register numbers.
14289    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14290    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
14291    the sequence of registers in REGS matches the loads from ascending memory
14292    locations, and the function verifies that the register numbers are
14293    themselves ascending.  If CHECK_REGS is false, the register numbers
14294    are stored in the order they are found in the operands.  */
14295 static int
load_multiple_sequence(rtx * operands,int nops,int * regs,int * saved_order,int * base,HOST_WIDE_INT * load_offset,bool check_regs)14296 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14297 			int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14298 {
14299   int unsorted_regs[MAX_LDM_STM_OPS];
14300   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14301   int order[MAX_LDM_STM_OPS];
14302   int base_reg = -1;
14303   int i, ldm_case;
14304 
14305   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14306      easily extended if required.  */
14307   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14308 
14309   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14310 
14311   /* Loop over the operands and check that the memory references are
14312      suitable (i.e. immediate offsets from the same base register).  At
14313      the same time, extract the target register, and the memory
14314      offsets.  */
14315   for (i = 0; i < nops; i++)
14316     {
14317       rtx reg;
14318       rtx offset;
14319 
14320       /* Convert a subreg of a mem into the mem itself.  */
14321       if (GET_CODE (operands[nops + i]) == SUBREG)
14322 	operands[nops + i] = alter_subreg (operands + (nops + i), true);
14323 
14324       gcc_assert (MEM_P (operands[nops + i]));
14325 
14326       /* Don't reorder volatile memory references; it doesn't seem worth
14327 	 looking for the case where the order is ok anyway.  */
14328       if (MEM_VOLATILE_P (operands[nops + i]))
14329 	return 0;
14330 
14331       offset = const0_rtx;
14332 
14333       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14334 	   || (SUBREG_P (reg)
14335 	       && REG_P (reg = SUBREG_REG (reg))))
14336 	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14337 	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14338 		  || (SUBREG_P (reg)
14339 		      && REG_P (reg = SUBREG_REG (reg))))
14340 	      && (CONST_INT_P (offset
14341 		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
14342 	{
14343 	  if (i == 0)
14344 	    {
14345 	      base_reg = REGNO (reg);
14346 	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14347 		return 0;
14348 	    }
14349 	  else if (base_reg != (int) REGNO (reg))
14350 	    /* Not addressed from the same base register.  */
14351 	    return 0;
14352 
14353 	  unsorted_regs[i] = (REG_P (operands[i])
14354 			      ? REGNO (operands[i])
14355 			      : REGNO (SUBREG_REG (operands[i])));
14356 
14357 	  /* If it isn't an integer register, or if it overwrites the
14358 	     base register but isn't the last insn in the list, then
14359 	     we can't do this.  */
14360 	  if (unsorted_regs[i] < 0
14361 	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14362 	      || unsorted_regs[i] > 14
14363 	      || (i != nops - 1 && unsorted_regs[i] == base_reg))
14364 	    return 0;
14365 
14366           /* Don't allow SP to be loaded unless it is also the base
14367              register.  It guarantees that SP is reset correctly when
14368              an LDM instruction is interrupted.  Otherwise, we might
14369              end up with a corrupt stack.  */
14370           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14371             return 0;
14372 
14373 	  unsorted_offsets[i] = INTVAL (offset);
14374 	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14375 	    order[0] = i;
14376 	}
14377       else
14378 	/* Not a suitable memory address.  */
14379 	return 0;
14380     }
14381 
14382   /* All the useful information has now been extracted from the
14383      operands into unsorted_regs and unsorted_offsets; additionally,
14384      order[0] has been set to the lowest offset in the list.  Sort
14385      the offsets into order, verifying that they are adjacent, and
14386      check that the register numbers are ascending.  */
14387   if (!compute_offset_order (nops, unsorted_offsets, order,
14388 			     check_regs ? unsorted_regs : NULL))
14389     return 0;
14390 
14391   if (saved_order)
14392     memcpy (saved_order, order, sizeof order);
14393 
14394   if (base)
14395     {
14396       *base = base_reg;
14397 
14398       for (i = 0; i < nops; i++)
14399 	regs[i] = unsorted_regs[check_regs ? order[i] : i];
14400 
14401       *load_offset = unsorted_offsets[order[0]];
14402     }
14403 
14404   if (unsorted_offsets[order[0]] == 0)
14405     ldm_case = 1; /* ldmia */
14406   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14407     ldm_case = 2; /* ldmib */
14408   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14409     ldm_case = 3; /* ldmda */
14410   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14411     ldm_case = 4; /* ldmdb */
14412   else if (const_ok_for_arm (unsorted_offsets[order[0]])
14413 	   || const_ok_for_arm (-unsorted_offsets[order[0]]))
14414     ldm_case = 5;
14415   else
14416     return 0;
14417 
14418   if (!multiple_operation_profitable_p (false, nops,
14419 					ldm_case == 5
14420 					? unsorted_offsets[order[0]] : 0))
14421     return 0;
14422 
14423   return ldm_case;
14424 }
14425 
14426 /* Used to determine in a peephole whether a sequence of store instructions can
14427    be changed into a store-multiple instruction.
14428    NOPS is the number of separate store instructions we are examining.
14429    NOPS_TOTAL is the total number of instructions recognized by the peephole
14430    pattern.
14431    The first NOPS entries in OPERANDS are the source registers, the next
14432    NOPS entries are memory operands.  If this function is successful, *BASE is
14433    set to the common base register of the memory accesses; *LOAD_OFFSET is set
14434    to the first memory location's offset from that base register.  REGS is an
14435    array filled in with the source register numbers, REG_RTXS (if nonnull) is
14436    likewise filled with the corresponding rtx's.
14437    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14438    numbers to an ascending order of stores.
14439    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14440    from ascending memory locations, and the function verifies that the register
14441    numbers are themselves ascending.  If CHECK_REGS is false, the register
14442    numbers are stored in the order they are found in the operands.  */
14443 static int
store_multiple_sequence(rtx * operands,int nops,int nops_total,int * regs,rtx * reg_rtxs,int * saved_order,int * base,HOST_WIDE_INT * load_offset,bool check_regs)14444 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14445 			 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14446 			 HOST_WIDE_INT *load_offset, bool check_regs)
14447 {
14448   int unsorted_regs[MAX_LDM_STM_OPS];
14449   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14450   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14451   int order[MAX_LDM_STM_OPS];
14452   int base_reg = -1;
14453   rtx base_reg_rtx = NULL;
14454   int i, stm_case;
14455 
14456   /* Write back of base register is currently only supported for Thumb 1.  */
14457   int base_writeback = TARGET_THUMB1;
14458 
14459   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14460      easily extended if required.  */
14461   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14462 
14463   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14464 
14465   /* Loop over the operands and check that the memory references are
14466      suitable (i.e. immediate offsets from the same base register).  At
14467      the same time, extract the target register, and the memory
14468      offsets.  */
14469   for (i = 0; i < nops; i++)
14470     {
14471       rtx reg;
14472       rtx offset;
14473 
14474       /* Convert a subreg of a mem into the mem itself.  */
14475       if (GET_CODE (operands[nops + i]) == SUBREG)
14476 	operands[nops + i] = alter_subreg (operands + (nops + i), true);
14477 
14478       gcc_assert (MEM_P (operands[nops + i]));
14479 
14480       /* Don't reorder volatile memory references; it doesn't seem worth
14481 	 looking for the case where the order is ok anyway.  */
14482       if (MEM_VOLATILE_P (operands[nops + i]))
14483 	return 0;
14484 
14485       offset = const0_rtx;
14486 
14487       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14488 	   || (SUBREG_P (reg)
14489 	       && REG_P (reg = SUBREG_REG (reg))))
14490 	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14491 	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14492 		  || (SUBREG_P (reg)
14493 		      && REG_P (reg = SUBREG_REG (reg))))
14494 	      && (CONST_INT_P (offset
14495 		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
14496 	{
14497 	  unsorted_reg_rtxs[i] = (REG_P (operands[i])
14498 				  ? operands[i] : SUBREG_REG (operands[i]));
14499 	  unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14500 
14501 	  if (i == 0)
14502 	    {
14503 	      base_reg = REGNO (reg);
14504 	      base_reg_rtx = reg;
14505 	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14506 		return 0;
14507 	    }
14508 	  else if (base_reg != (int) REGNO (reg))
14509 	    /* Not addressed from the same base register.  */
14510 	    return 0;
14511 
14512 	  /* If it isn't an integer register, then we can't do this.  */
14513 	  if (unsorted_regs[i] < 0
14514 	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14515 	      /* The effects are unpredictable if the base register is
14516 		 both updated and stored.  */
14517 	      || (base_writeback && unsorted_regs[i] == base_reg)
14518 	      || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14519 	      || unsorted_regs[i] > 14)
14520 	    return 0;
14521 
14522 	  unsorted_offsets[i] = INTVAL (offset);
14523 	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14524 	    order[0] = i;
14525 	}
14526       else
14527 	/* Not a suitable memory address.  */
14528 	return 0;
14529     }
14530 
14531   /* All the useful information has now been extracted from the
14532      operands into unsorted_regs and unsorted_offsets; additionally,
14533      order[0] has been set to the lowest offset in the list.  Sort
14534      the offsets into order, verifying that they are adjacent, and
14535      check that the register numbers are ascending.  */
14536   if (!compute_offset_order (nops, unsorted_offsets, order,
14537 			     check_regs ? unsorted_regs : NULL))
14538     return 0;
14539 
14540   if (saved_order)
14541     memcpy (saved_order, order, sizeof order);
14542 
14543   if (base)
14544     {
14545       *base = base_reg;
14546 
14547       for (i = 0; i < nops; i++)
14548 	{
14549 	  regs[i] = unsorted_regs[check_regs ? order[i] : i];
14550 	  if (reg_rtxs)
14551 	    reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14552 	}
14553 
14554       *load_offset = unsorted_offsets[order[0]];
14555     }
14556 
14557   if (TARGET_THUMB1
14558       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14559     return 0;
14560 
14561   if (unsorted_offsets[order[0]] == 0)
14562     stm_case = 1; /* stmia */
14563   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14564     stm_case = 2; /* stmib */
14565   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14566     stm_case = 3; /* stmda */
14567   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14568     stm_case = 4; /* stmdb */
14569   else
14570     return 0;
14571 
14572   if (!multiple_operation_profitable_p (false, nops, 0))
14573     return 0;
14574 
14575   return stm_case;
14576 }
14577 
14578 /* Routines for use in generating RTL.  */
14579 
14580 /* Generate a load-multiple instruction.  COUNT is the number of loads in
14581    the instruction; REGS and MEMS are arrays containing the operands.
14582    BASEREG is the base register to be used in addressing the memory operands.
14583    WBACK_OFFSET is nonzero if the instruction should update the base
14584    register.  */
14585 
14586 static rtx
arm_gen_load_multiple_1(int count,int * regs,rtx * mems,rtx basereg,HOST_WIDE_INT wback_offset)14587 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14588 			 HOST_WIDE_INT wback_offset)
14589 {
14590   int i = 0, j;
14591   rtx result;
14592 
14593   if (!multiple_operation_profitable_p (false, count, 0))
14594     {
14595       rtx seq;
14596 
14597       start_sequence ();
14598 
14599       for (i = 0; i < count; i++)
14600 	emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14601 
14602       if (wback_offset != 0)
14603 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14604 
14605       seq = get_insns ();
14606       end_sequence ();
14607 
14608       return seq;
14609     }
14610 
14611   result = gen_rtx_PARALLEL (VOIDmode,
14612 			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14613   if (wback_offset != 0)
14614     {
14615       XVECEXP (result, 0, 0)
14616 	= gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14617       i = 1;
14618       count++;
14619     }
14620 
14621   for (j = 0; i < count; i++, j++)
14622     XVECEXP (result, 0, i)
14623       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14624 
14625   return result;
14626 }
14627 
14628 /* Generate a store-multiple instruction.  COUNT is the number of stores in
14629    the instruction; REGS and MEMS are arrays containing the operands.
14630    BASEREG is the base register to be used in addressing the memory operands.
14631    WBACK_OFFSET is nonzero if the instruction should update the base
14632    register.  */
14633 
14634 static rtx
arm_gen_store_multiple_1(int count,int * regs,rtx * mems,rtx basereg,HOST_WIDE_INT wback_offset)14635 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14636 			  HOST_WIDE_INT wback_offset)
14637 {
14638   int i = 0, j;
14639   rtx result;
14640 
14641   if (GET_CODE (basereg) == PLUS)
14642     basereg = XEXP (basereg, 0);
14643 
14644   if (!multiple_operation_profitable_p (false, count, 0))
14645     {
14646       rtx seq;
14647 
14648       start_sequence ();
14649 
14650       for (i = 0; i < count; i++)
14651 	emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14652 
14653       if (wback_offset != 0)
14654 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14655 
14656       seq = get_insns ();
14657       end_sequence ();
14658 
14659       return seq;
14660     }
14661 
14662   result = gen_rtx_PARALLEL (VOIDmode,
14663 			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14664   if (wback_offset != 0)
14665     {
14666       XVECEXP (result, 0, 0)
14667 	= gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14668       i = 1;
14669       count++;
14670     }
14671 
14672   for (j = 0; i < count; i++, j++)
14673     XVECEXP (result, 0, i)
14674       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14675 
14676   return result;
14677 }
14678 
14679 /* Generate either a load-multiple or a store-multiple instruction.  This
14680    function can be used in situations where we can start with a single MEM
14681    rtx and adjust its address upwards.
14682    COUNT is the number of operations in the instruction, not counting a
14683    possible update of the base register.  REGS is an array containing the
14684    register operands.
14685    BASEREG is the base register to be used in addressing the memory operands,
14686    which are constructed from BASEMEM.
14687    WRITE_BACK specifies whether the generated instruction should include an
14688    update of the base register.
14689    OFFSETP is used to pass an offset to and from this function; this offset
14690    is not used when constructing the address (instead BASEMEM should have an
14691    appropriate offset in its address), it is used only for setting
14692    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14693 
14694 static rtx
arm_gen_multiple_op(bool is_load,int * regs,int count,rtx basereg,bool write_back,rtx basemem,HOST_WIDE_INT * offsetp)14695 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14696 		     bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14697 {
14698   rtx mems[MAX_LDM_STM_OPS];
14699   HOST_WIDE_INT offset = *offsetp;
14700   int i;
14701 
14702   gcc_assert (count <= MAX_LDM_STM_OPS);
14703 
14704   if (GET_CODE (basereg) == PLUS)
14705     basereg = XEXP (basereg, 0);
14706 
14707   for (i = 0; i < count; i++)
14708     {
14709       rtx addr = plus_constant (Pmode, basereg, i * 4);
14710       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14711       offset += 4;
14712     }
14713 
14714   if (write_back)
14715     *offsetp = offset;
14716 
14717   if (is_load)
14718     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14719 				    write_back ? 4 * count : 0);
14720   else
14721     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14722 				     write_back ? 4 * count : 0);
14723 }
14724 
14725 rtx
arm_gen_load_multiple(int * regs,int count,rtx basereg,int write_back,rtx basemem,HOST_WIDE_INT * offsetp)14726 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14727 		       rtx basemem, HOST_WIDE_INT *offsetp)
14728 {
14729   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14730 			      offsetp);
14731 }
14732 
14733 rtx
arm_gen_store_multiple(int * regs,int count,rtx basereg,int write_back,rtx basemem,HOST_WIDE_INT * offsetp)14734 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14735 			rtx basemem, HOST_WIDE_INT *offsetp)
14736 {
14737   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14738 			      offsetp);
14739 }
14740 
14741 /* Called from a peephole2 expander to turn a sequence of loads into an
14742    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14743    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14744    is true if we can reorder the registers because they are used commutatively
14745    subsequently.
14746    Returns true iff we could generate a new instruction.  */
14747 
14748 bool
gen_ldm_seq(rtx * operands,int nops,bool sort_regs)14749 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14750 {
14751   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14752   rtx mems[MAX_LDM_STM_OPS];
14753   int i, j, base_reg;
14754   rtx base_reg_rtx;
14755   HOST_WIDE_INT offset;
14756   int write_back = FALSE;
14757   int ldm_case;
14758   rtx addr;
14759 
14760   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14761 				     &base_reg, &offset, !sort_regs);
14762 
14763   if (ldm_case == 0)
14764     return false;
14765 
14766   if (sort_regs)
14767     for (i = 0; i < nops - 1; i++)
14768       for (j = i + 1; j < nops; j++)
14769 	if (regs[i] > regs[j])
14770 	  {
14771 	    int t = regs[i];
14772 	    regs[i] = regs[j];
14773 	    regs[j] = t;
14774 	  }
14775   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14776 
14777   if (TARGET_THUMB1)
14778     {
14779       gcc_assert (ldm_case == 1 || ldm_case == 5);
14780 
14781       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
14782       write_back = true;
14783       for (i = 0; i < nops; i++)
14784 	if (base_reg == regs[i])
14785 	  write_back = false;
14786 
14787       /* Ensure the base is dead if it is updated.  */
14788       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
14789 	return false;
14790     }
14791 
14792   if (ldm_case == 5)
14793     {
14794       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14795       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14796       offset = 0;
14797       base_reg_rtx = newbase;
14798     }
14799 
14800   for (i = 0; i < nops; i++)
14801     {
14802       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14803       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14804 					      SImode, addr, 0);
14805     }
14806   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14807 				      write_back ? offset + i * 4 : 0));
14808   return true;
14809 }
14810 
14811 /* Called from a peephole2 expander to turn a sequence of stores into an
14812    STM instruction.  OPERANDS are the operands found by the peephole matcher;
14813    NOPS indicates how many separate stores we are trying to combine.
14814    Returns true iff we could generate a new instruction.  */
14815 
14816 bool
gen_stm_seq(rtx * operands,int nops)14817 gen_stm_seq (rtx *operands, int nops)
14818 {
14819   int i;
14820   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14821   rtx mems[MAX_LDM_STM_OPS];
14822   int base_reg;
14823   rtx base_reg_rtx;
14824   HOST_WIDE_INT offset;
14825   int write_back = FALSE;
14826   int stm_case;
14827   rtx addr;
14828   bool base_reg_dies;
14829 
14830   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14831 				      mem_order, &base_reg, &offset, true);
14832 
14833   if (stm_case == 0)
14834     return false;
14835 
14836   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14837 
14838   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14839   if (TARGET_THUMB1)
14840     {
14841       gcc_assert (base_reg_dies);
14842       write_back = TRUE;
14843     }
14844 
14845   if (stm_case == 5)
14846     {
14847       gcc_assert (base_reg_dies);
14848       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14849       offset = 0;
14850     }
14851 
14852   addr = plus_constant (Pmode, base_reg_rtx, offset);
14853 
14854   for (i = 0; i < nops; i++)
14855     {
14856       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14857       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14858 					      SImode, addr, 0);
14859     }
14860   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14861 				       write_back ? offset + i * 4 : 0));
14862   return true;
14863 }
14864 
14865 /* Called from a peephole2 expander to turn a sequence of stores that are
14866    preceded by constant loads into an STM instruction.  OPERANDS are the
14867    operands found by the peephole matcher; NOPS indicates how many
14868    separate stores we are trying to combine; there are 2 * NOPS
14869    instructions in the peephole.
14870    Returns true iff we could generate a new instruction.  */
14871 
14872 bool
gen_const_stm_seq(rtx * operands,int nops)14873 gen_const_stm_seq (rtx *operands, int nops)
14874 {
14875   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14876   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14877   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14878   rtx mems[MAX_LDM_STM_OPS];
14879   int base_reg;
14880   rtx base_reg_rtx;
14881   HOST_WIDE_INT offset;
14882   int write_back = FALSE;
14883   int stm_case;
14884   rtx addr;
14885   bool base_reg_dies;
14886   int i, j;
14887   HARD_REG_SET allocated;
14888 
14889   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14890 				      mem_order, &base_reg, &offset, false);
14891 
14892   if (stm_case == 0)
14893     return false;
14894 
14895   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14896 
14897   /* If the same register is used more than once, try to find a free
14898      register.  */
14899   CLEAR_HARD_REG_SET (allocated);
14900   for (i = 0; i < nops; i++)
14901     {
14902       for (j = i + 1; j < nops; j++)
14903 	if (regs[i] == regs[j])
14904 	  {
14905 	    rtx t = peep2_find_free_register (0, nops * 2,
14906 					      TARGET_THUMB1 ? "l" : "r",
14907 					      SImode, &allocated);
14908 	    if (t == NULL_RTX)
14909 	      return false;
14910 	    reg_rtxs[i] = t;
14911 	    regs[i] = REGNO (t);
14912 	  }
14913     }
14914 
14915   /* Compute an ordering that maps the register numbers to an ascending
14916      sequence.  */
14917   reg_order[0] = 0;
14918   for (i = 0; i < nops; i++)
14919     if (regs[i] < regs[reg_order[0]])
14920       reg_order[0] = i;
14921 
14922   for (i = 1; i < nops; i++)
14923     {
14924       int this_order = reg_order[i - 1];
14925       for (j = 0; j < nops; j++)
14926 	if (regs[j] > regs[reg_order[i - 1]]
14927 	    && (this_order == reg_order[i - 1]
14928 		|| regs[j] < regs[this_order]))
14929 	  this_order = j;
14930       reg_order[i] = this_order;
14931     }
14932 
14933   /* Ensure that registers that must be live after the instruction end
14934      up with the correct value.  */
14935   for (i = 0; i < nops; i++)
14936     {
14937       int this_order = reg_order[i];
14938       if ((this_order != mem_order[i]
14939 	   || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14940 	  && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14941 	return false;
14942     }
14943 
14944   /* Load the constants.  */
14945   for (i = 0; i < nops; i++)
14946     {
14947       rtx op = operands[2 * nops + mem_order[i]];
14948       sorted_regs[i] = regs[reg_order[i]];
14949       emit_move_insn (reg_rtxs[reg_order[i]], op);
14950     }
14951 
14952   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14953 
14954   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14955   if (TARGET_THUMB1)
14956     {
14957       gcc_assert (base_reg_dies);
14958       write_back = TRUE;
14959     }
14960 
14961   if (stm_case == 5)
14962     {
14963       gcc_assert (base_reg_dies);
14964       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14965       offset = 0;
14966     }
14967 
14968   addr = plus_constant (Pmode, base_reg_rtx, offset);
14969 
14970   for (i = 0; i < nops; i++)
14971     {
14972       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14973       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14974 					      SImode, addr, 0);
14975     }
14976   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14977 				       write_back ? offset + i * 4 : 0));
14978   return true;
14979 }
14980 
14981 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14982    unaligned copies on processors which support unaligned semantics for those
14983    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14984    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14985    An interleave factor of 1 (the minimum) will perform no interleaving.
14986    Load/store multiple are used for aligned addresses where possible.  */
14987 
14988 static void
arm_block_move_unaligned_straight(rtx dstbase,rtx srcbase,HOST_WIDE_INT length,unsigned int interleave_factor)14989 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14990 				   HOST_WIDE_INT length,
14991 				   unsigned int interleave_factor)
14992 {
14993   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14994   int *regnos = XALLOCAVEC (int, interleave_factor);
14995   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14996   HOST_WIDE_INT i, j;
14997   HOST_WIDE_INT remaining = length, words;
14998   rtx halfword_tmp = NULL, byte_tmp = NULL;
14999   rtx dst, src;
15000   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15001   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15002   HOST_WIDE_INT srcoffset, dstoffset;
15003   HOST_WIDE_INT src_autoinc, dst_autoinc;
15004   rtx mem, addr;
15005 
15006   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15007 
15008   /* Use hard registers if we have aligned source or destination so we can use
15009      load/store multiple with contiguous registers.  */
15010   if (dst_aligned || src_aligned)
15011     for (i = 0; i < interleave_factor; i++)
15012       regs[i] = gen_rtx_REG (SImode, i);
15013   else
15014     for (i = 0; i < interleave_factor; i++)
15015       regs[i] = gen_reg_rtx (SImode);
15016 
15017   dst = copy_addr_to_reg (XEXP (dstbase, 0));
15018   src = copy_addr_to_reg (XEXP (srcbase, 0));
15019 
15020   srcoffset = dstoffset = 0;
15021 
15022   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15023      For copying the last bytes we want to subtract this offset again.  */
15024   src_autoinc = dst_autoinc = 0;
15025 
15026   for (i = 0; i < interleave_factor; i++)
15027     regnos[i] = i;
15028 
15029   /* Copy BLOCK_SIZE_BYTES chunks.  */
15030 
15031   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15032     {
15033       /* Load words.  */
15034       if (src_aligned && interleave_factor > 1)
15035 	{
15036 	  emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15037 					    TRUE, srcbase, &srcoffset));
15038 	  src_autoinc += UNITS_PER_WORD * interleave_factor;
15039 	}
15040       else
15041 	{
15042 	  for (j = 0; j < interleave_factor; j++)
15043 	    {
15044 	      addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15045 						 - src_autoinc));
15046 	      mem = adjust_automodify_address (srcbase, SImode, addr,
15047 					       srcoffset + j * UNITS_PER_WORD);
15048 	      emit_insn (gen_unaligned_loadsi (regs[j], mem));
15049 	    }
15050 	  srcoffset += block_size_bytes;
15051 	}
15052 
15053       /* Store words.  */
15054       if (dst_aligned && interleave_factor > 1)
15055 	{
15056 	  emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15057 					     TRUE, dstbase, &dstoffset));
15058 	  dst_autoinc += UNITS_PER_WORD * interleave_factor;
15059 	}
15060       else
15061 	{
15062 	  for (j = 0; j < interleave_factor; j++)
15063 	    {
15064 	      addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15065 						 - dst_autoinc));
15066 	      mem = adjust_automodify_address (dstbase, SImode, addr,
15067 					       dstoffset + j * UNITS_PER_WORD);
15068 	      emit_insn (gen_unaligned_storesi (mem, regs[j]));
15069 	    }
15070 	  dstoffset += block_size_bytes;
15071 	}
15072 
15073       remaining -= block_size_bytes;
15074     }
15075 
15076   /* Copy any whole words left (note these aren't interleaved with any
15077      subsequent halfword/byte load/stores in the interests of simplicity).  */
15078 
15079   words = remaining / UNITS_PER_WORD;
15080 
15081   gcc_assert (words < interleave_factor);
15082 
15083   if (src_aligned && words > 1)
15084     {
15085       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15086 					&srcoffset));
15087       src_autoinc += UNITS_PER_WORD * words;
15088     }
15089   else
15090     {
15091       for (j = 0; j < words; j++)
15092 	{
15093 	  addr = plus_constant (Pmode, src,
15094 				srcoffset + j * UNITS_PER_WORD - src_autoinc);
15095 	  mem = adjust_automodify_address (srcbase, SImode, addr,
15096 					   srcoffset + j * UNITS_PER_WORD);
15097 	  if (src_aligned)
15098 	    emit_move_insn (regs[j], mem);
15099 	  else
15100 	    emit_insn (gen_unaligned_loadsi (regs[j], mem));
15101 	}
15102       srcoffset += words * UNITS_PER_WORD;
15103     }
15104 
15105   if (dst_aligned && words > 1)
15106     {
15107       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15108 					 &dstoffset));
15109       dst_autoinc += words * UNITS_PER_WORD;
15110     }
15111   else
15112     {
15113       for (j = 0; j < words; j++)
15114 	{
15115 	  addr = plus_constant (Pmode, dst,
15116 				dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15117 	  mem = adjust_automodify_address (dstbase, SImode, addr,
15118 					   dstoffset + j * UNITS_PER_WORD);
15119 	  if (dst_aligned)
15120 	    emit_move_insn (mem, regs[j]);
15121 	  else
15122 	    emit_insn (gen_unaligned_storesi (mem, regs[j]));
15123 	}
15124       dstoffset += words * UNITS_PER_WORD;
15125     }
15126 
15127   remaining -= words * UNITS_PER_WORD;
15128 
15129   gcc_assert (remaining < 4);
15130 
15131   /* Copy a halfword if necessary.  */
15132 
15133   if (remaining >= 2)
15134     {
15135       halfword_tmp = gen_reg_rtx (SImode);
15136 
15137       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15138       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15139       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15140 
15141       /* Either write out immediately, or delay until we've loaded the last
15142 	 byte, depending on interleave factor.  */
15143       if (interleave_factor == 1)
15144 	{
15145 	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15146 	  mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15147 	  emit_insn (gen_unaligned_storehi (mem,
15148 		       gen_lowpart (HImode, halfword_tmp)));
15149 	  halfword_tmp = NULL;
15150 	  dstoffset += 2;
15151 	}
15152 
15153       remaining -= 2;
15154       srcoffset += 2;
15155     }
15156 
15157   gcc_assert (remaining < 2);
15158 
15159   /* Copy last byte.  */
15160 
15161   if ((remaining & 1) != 0)
15162     {
15163       byte_tmp = gen_reg_rtx (SImode);
15164 
15165       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15166       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15167       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15168 
15169       if (interleave_factor == 1)
15170 	{
15171 	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15172 	  mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15173 	  emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15174 	  byte_tmp = NULL;
15175 	  dstoffset++;
15176 	}
15177 
15178       remaining--;
15179       srcoffset++;
15180     }
15181 
15182   /* Store last halfword if we haven't done so already.  */
15183 
15184   if (halfword_tmp)
15185     {
15186       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15187       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15188       emit_insn (gen_unaligned_storehi (mem,
15189 		   gen_lowpart (HImode, halfword_tmp)));
15190       dstoffset += 2;
15191     }
15192 
15193   /* Likewise for last byte.  */
15194 
15195   if (byte_tmp)
15196     {
15197       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15198       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15199       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15200       dstoffset++;
15201     }
15202 
15203   gcc_assert (remaining == 0 && srcoffset == dstoffset);
15204 }
15205 
15206 /* From mips_adjust_block_mem:
15207 
15208    Helper function for doing a loop-based block operation on memory
15209    reference MEM.  Each iteration of the loop will operate on LENGTH
15210    bytes of MEM.
15211 
15212    Create a new base register for use within the loop and point it to
15213    the start of MEM.  Create a new memory reference that uses this
15214    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
15215 
15216 static void
arm_adjust_block_mem(rtx mem,HOST_WIDE_INT length,rtx * loop_reg,rtx * loop_mem)15217 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15218 		      rtx *loop_mem)
15219 {
15220   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15221 
15222   /* Although the new mem does not refer to a known location,
15223      it does keep up to LENGTH bytes of alignment.  */
15224   *loop_mem = change_address (mem, BLKmode, *loop_reg);
15225   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15226 }
15227 
15228 /* From mips_block_move_loop:
15229 
15230    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15231    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
15232    the memory regions do not overlap.  */
15233 
15234 static void
arm_block_move_unaligned_loop(rtx dest,rtx src,HOST_WIDE_INT length,unsigned int interleave_factor,HOST_WIDE_INT bytes_per_iter)15235 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15236 			       unsigned int interleave_factor,
15237 			       HOST_WIDE_INT bytes_per_iter)
15238 {
15239   rtx src_reg, dest_reg, final_src, test;
15240   HOST_WIDE_INT leftover;
15241 
15242   leftover = length % bytes_per_iter;
15243   length -= leftover;
15244 
15245   /* Create registers and memory references for use within the loop.  */
15246   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15247   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15248 
15249   /* Calculate the value that SRC_REG should have after the last iteration of
15250      the loop.  */
15251   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15252 				   0, 0, OPTAB_WIDEN);
15253 
15254   /* Emit the start of the loop.  */
15255   rtx_code_label *label = gen_label_rtx ();
15256   emit_label (label);
15257 
15258   /* Emit the loop body.  */
15259   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15260 				     interleave_factor);
15261 
15262   /* Move on to the next block.  */
15263   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15264   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15265 
15266   /* Emit the loop condition.  */
15267   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15268   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15269 
15270   /* Mop up any left-over bytes.  */
15271   if (leftover)
15272     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15273 }
15274 
15275 /* Emit a block move when either the source or destination is unaligned (not
15276    aligned to a four-byte boundary).  This may need further tuning depending on
15277    core type, optimize_size setting, etc.  */
15278 
15279 static int
arm_cpymemqi_unaligned(rtx * operands)15280 arm_cpymemqi_unaligned (rtx *operands)
15281 {
15282   HOST_WIDE_INT length = INTVAL (operands[2]);
15283 
15284   if (optimize_size)
15285     {
15286       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15287       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15288       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15289 	 size of code if optimizing for size.  We'll use ldm/stm if src_aligned
15290 	 or dst_aligned though: allow more interleaving in those cases since the
15291 	 resulting code can be smaller.  */
15292       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15293       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15294 
15295       if (length > 12)
15296 	arm_block_move_unaligned_loop (operands[0], operands[1], length,
15297 				       interleave_factor, bytes_per_iter);
15298       else
15299 	arm_block_move_unaligned_straight (operands[0], operands[1], length,
15300 					   interleave_factor);
15301     }
15302   else
15303     {
15304       /* Note that the loop created by arm_block_move_unaligned_loop may be
15305 	 subject to loop unrolling, which makes tuning this condition a little
15306 	 redundant.  */
15307       if (length > 32)
15308 	arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15309       else
15310 	arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15311     }
15312 
15313   return 1;
15314 }
15315 
15316 int
arm_gen_cpymemqi(rtx * operands)15317 arm_gen_cpymemqi (rtx *operands)
15318 {
15319   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15320   HOST_WIDE_INT srcoffset, dstoffset;
15321   rtx src, dst, srcbase, dstbase;
15322   rtx part_bytes_reg = NULL;
15323   rtx mem;
15324 
15325   if (!CONST_INT_P (operands[2])
15326       || !CONST_INT_P (operands[3])
15327       || INTVAL (operands[2]) > 64)
15328     return 0;
15329 
15330   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15331     return arm_cpymemqi_unaligned (operands);
15332 
15333   if (INTVAL (operands[3]) & 3)
15334     return 0;
15335 
15336   dstbase = operands[0];
15337   srcbase = operands[1];
15338 
15339   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15340   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15341 
15342   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15343   out_words_to_go = INTVAL (operands[2]) / 4;
15344   last_bytes = INTVAL (operands[2]) & 3;
15345   dstoffset = srcoffset = 0;
15346 
15347   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15348     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15349 
15350   while (in_words_to_go >= 2)
15351     {
15352       if (in_words_to_go > 4)
15353 	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15354 					  TRUE, srcbase, &srcoffset));
15355       else
15356 	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15357 					  src, FALSE, srcbase,
15358 					  &srcoffset));
15359 
15360       if (out_words_to_go)
15361 	{
15362 	  if (out_words_to_go > 4)
15363 	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15364 					       TRUE, dstbase, &dstoffset));
15365 	  else if (out_words_to_go != 1)
15366 	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15367 					       out_words_to_go, dst,
15368 					       (last_bytes == 0
15369 						? FALSE : TRUE),
15370 					       dstbase, &dstoffset));
15371 	  else
15372 	    {
15373 	      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15374 	      emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15375 	      if (last_bytes != 0)
15376 		{
15377 		  emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15378 		  dstoffset += 4;
15379 		}
15380 	    }
15381 	}
15382 
15383       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15384       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15385     }
15386 
15387   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
15388   if (out_words_to_go)
15389     {
15390       rtx sreg;
15391 
15392       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15393       sreg = copy_to_reg (mem);
15394 
15395       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15396       emit_move_insn (mem, sreg);
15397       in_words_to_go--;
15398 
15399       gcc_assert (!in_words_to_go);	/* Sanity check */
15400     }
15401 
15402   if (in_words_to_go)
15403     {
15404       gcc_assert (in_words_to_go > 0);
15405 
15406       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15407       part_bytes_reg = copy_to_mode_reg (SImode, mem);
15408     }
15409 
15410   gcc_assert (!last_bytes || part_bytes_reg);
15411 
15412   if (BYTES_BIG_ENDIAN && last_bytes)
15413     {
15414       rtx tmp = gen_reg_rtx (SImode);
15415 
15416       /* The bytes we want are in the top end of the word.  */
15417       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15418 			      GEN_INT (8 * (4 - last_bytes))));
15419       part_bytes_reg = tmp;
15420 
15421       while (last_bytes)
15422 	{
15423 	  mem = adjust_automodify_address (dstbase, QImode,
15424 					   plus_constant (Pmode, dst,
15425 							  last_bytes - 1),
15426 					   dstoffset + last_bytes - 1);
15427 	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15428 
15429 	  if (--last_bytes)
15430 	    {
15431 	      tmp = gen_reg_rtx (SImode);
15432 	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15433 	      part_bytes_reg = tmp;
15434 	    }
15435 	}
15436 
15437     }
15438   else
15439     {
15440       if (last_bytes > 1)
15441 	{
15442 	  mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15443 	  emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15444 	  last_bytes -= 2;
15445 	  if (last_bytes)
15446 	    {
15447 	      rtx tmp = gen_reg_rtx (SImode);
15448 	      emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15449 	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15450 	      part_bytes_reg = tmp;
15451 	      dstoffset += 2;
15452 	    }
15453 	}
15454 
15455       if (last_bytes)
15456 	{
15457 	  mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15458 	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15459 	}
15460     }
15461 
15462   return 1;
15463 }
15464 
15465 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15466 by mode size.  */
15467 inline static rtx
next_consecutive_mem(rtx mem)15468 next_consecutive_mem (rtx mem)
15469 {
15470   machine_mode mode = GET_MODE (mem);
15471   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15472   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15473 
15474   return adjust_automodify_address (mem, mode, addr, offset);
15475 }
15476 
15477 /* Copy using LDRD/STRD instructions whenever possible.
15478    Returns true upon success. */
15479 bool
gen_cpymem_ldrd_strd(rtx * operands)15480 gen_cpymem_ldrd_strd (rtx *operands)
15481 {
15482   unsigned HOST_WIDE_INT len;
15483   HOST_WIDE_INT align;
15484   rtx src, dst, base;
15485   rtx reg0;
15486   bool src_aligned, dst_aligned;
15487   bool src_volatile, dst_volatile;
15488 
15489   gcc_assert (CONST_INT_P (operands[2]));
15490   gcc_assert (CONST_INT_P (operands[3]));
15491 
15492   len = UINTVAL (operands[2]);
15493   if (len > 64)
15494     return false;
15495 
15496   /* Maximum alignment we can assume for both src and dst buffers.  */
15497   align = INTVAL (operands[3]);
15498 
15499   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15500     return false;
15501 
15502   /* Place src and dst addresses in registers
15503      and update the corresponding mem rtx.  */
15504   dst = operands[0];
15505   dst_volatile = MEM_VOLATILE_P (dst);
15506   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15507   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15508   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15509 
15510   src = operands[1];
15511   src_volatile = MEM_VOLATILE_P (src);
15512   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15513   base = copy_to_mode_reg (SImode, XEXP (src, 0));
15514   src = adjust_automodify_address (src, VOIDmode, base, 0);
15515 
15516   if (!unaligned_access && !(src_aligned && dst_aligned))
15517     return false;
15518 
15519   if (src_volatile || dst_volatile)
15520     return false;
15521 
15522   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
15523   if (!(dst_aligned || src_aligned))
15524     return arm_gen_cpymemqi (operands);
15525 
15526   /* If the either src or dst is unaligned we'll be accessing it as pairs
15527      of unaligned SImode accesses.  Otherwise we can generate DImode
15528      ldrd/strd instructions.  */
15529   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15530   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15531 
15532   while (len >= 8)
15533     {
15534       len -= 8;
15535       reg0 = gen_reg_rtx (DImode);
15536       rtx low_reg = NULL_RTX;
15537       rtx hi_reg = NULL_RTX;
15538 
15539       if (!src_aligned || !dst_aligned)
15540 	{
15541 	  low_reg = gen_lowpart (SImode, reg0);
15542 	  hi_reg = gen_highpart_mode (SImode, DImode, reg0);
15543 	}
15544       if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15545 	emit_move_insn (reg0, src);
15546       else if (src_aligned)
15547 	emit_insn (gen_unaligned_loaddi (reg0, src));
15548       else
15549 	{
15550 	  emit_insn (gen_unaligned_loadsi (low_reg, src));
15551 	  src = next_consecutive_mem (src);
15552 	  emit_insn (gen_unaligned_loadsi (hi_reg, src));
15553 	}
15554 
15555       if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15556 	emit_move_insn (dst, reg0);
15557       else if (dst_aligned)
15558 	emit_insn (gen_unaligned_storedi (dst, reg0));
15559       else
15560 	{
15561 	  emit_insn (gen_unaligned_storesi (dst, low_reg));
15562 	  dst = next_consecutive_mem (dst);
15563 	  emit_insn (gen_unaligned_storesi (dst, hi_reg));
15564 	}
15565 
15566       src = next_consecutive_mem (src);
15567       dst = next_consecutive_mem (dst);
15568     }
15569 
15570   gcc_assert (len < 8);
15571   if (len >= 4)
15572     {
15573       /* More than a word but less than a double-word to copy.  Copy a word.  */
15574       reg0 = gen_reg_rtx (SImode);
15575       src = adjust_address (src, SImode, 0);
15576       dst = adjust_address (dst, SImode, 0);
15577       if (src_aligned)
15578         emit_move_insn (reg0, src);
15579       else
15580         emit_insn (gen_unaligned_loadsi (reg0, src));
15581 
15582       if (dst_aligned)
15583         emit_move_insn (dst, reg0);
15584       else
15585         emit_insn (gen_unaligned_storesi (dst, reg0));
15586 
15587       src = next_consecutive_mem (src);
15588       dst = next_consecutive_mem (dst);
15589       len -= 4;
15590     }
15591 
15592   if (len == 0)
15593     return true;
15594 
15595   /* Copy the remaining bytes.  */
15596   if (len >= 2)
15597     {
15598       dst = adjust_address (dst, HImode, 0);
15599       src = adjust_address (src, HImode, 0);
15600       reg0 = gen_reg_rtx (SImode);
15601       if (src_aligned)
15602         emit_insn (gen_zero_extendhisi2 (reg0, src));
15603       else
15604         emit_insn (gen_unaligned_loadhiu (reg0, src));
15605 
15606       if (dst_aligned)
15607         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15608       else
15609         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15610 
15611       src = next_consecutive_mem (src);
15612       dst = next_consecutive_mem (dst);
15613       if (len == 2)
15614         return true;
15615     }
15616 
15617   dst = adjust_address (dst, QImode, 0);
15618   src = adjust_address (src, QImode, 0);
15619   reg0 = gen_reg_rtx (QImode);
15620   emit_move_insn (reg0, src);
15621   emit_move_insn (dst, reg0);
15622   return true;
15623 }
15624 
15625 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15626    into its component 32-bit subregs.  OP2 may be an immediate
15627    constant and we want to simplify it in that case.  */
15628 void
arm_decompose_di_binop(rtx op1,rtx op2,rtx * lo_op1,rtx * hi_op1,rtx * lo_op2,rtx * hi_op2)15629 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15630 			rtx *lo_op2, rtx *hi_op2)
15631 {
15632   *lo_op1 = gen_lowpart (SImode, op1);
15633   *hi_op1 = gen_highpart (SImode, op1);
15634   *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15635 				 subreg_lowpart_offset (SImode, DImode));
15636   *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15637 				 subreg_highpart_offset (SImode, DImode));
15638 }
15639 
15640 /* Select a dominance comparison mode if possible for a test of the general
15641    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
15642    COND_OR == DOM_CC_X_AND_Y => (X && Y)
15643    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15644    COND_OR == DOM_CC_X_OR_Y => (X || Y)
15645    In all cases OP will be either EQ or NE, but we don't need to know which
15646    here.  If we are unable to support a dominance comparison we return
15647    CC mode.  This will then fail to match for the RTL expressions that
15648    generate this call.  */
15649 machine_mode
arm_select_dominance_cc_mode(rtx x,rtx y,HOST_WIDE_INT cond_or)15650 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15651 {
15652   enum rtx_code cond1, cond2;
15653   int swapped = 0;
15654 
15655   /* Currently we will probably get the wrong result if the individual
15656      comparisons are not simple.  This also ensures that it is safe to
15657      reverse a comparison if necessary.  */
15658   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15659        != CCmode)
15660       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15661 	  != CCmode))
15662     return CCmode;
15663 
15664   /* The if_then_else variant of this tests the second condition if the
15665      first passes, but is true if the first fails.  Reverse the first
15666      condition to get a true "inclusive-or" expression.  */
15667   if (cond_or == DOM_CC_NX_OR_Y)
15668     cond1 = reverse_condition (cond1);
15669 
15670   /* If the comparisons are not equal, and one doesn't dominate the other,
15671      then we can't do this.  */
15672   if (cond1 != cond2
15673       && !comparison_dominates_p (cond1, cond2)
15674       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15675     return CCmode;
15676 
15677   if (swapped)
15678     std::swap (cond1, cond2);
15679 
15680   switch (cond1)
15681     {
15682     case EQ:
15683       if (cond_or == DOM_CC_X_AND_Y)
15684 	return CC_DEQmode;
15685 
15686       switch (cond2)
15687 	{
15688 	case EQ: return CC_DEQmode;
15689 	case LE: return CC_DLEmode;
15690 	case LEU: return CC_DLEUmode;
15691 	case GE: return CC_DGEmode;
15692 	case GEU: return CC_DGEUmode;
15693 	default: gcc_unreachable ();
15694 	}
15695 
15696     case LT:
15697       if (cond_or == DOM_CC_X_AND_Y)
15698 	return CC_DLTmode;
15699 
15700       switch (cond2)
15701 	{
15702 	case  LT:
15703 	    return CC_DLTmode;
15704 	case LE:
15705 	  return CC_DLEmode;
15706 	case NE:
15707 	  return CC_DNEmode;
15708 	default:
15709 	  gcc_unreachable ();
15710 	}
15711 
15712     case GT:
15713       if (cond_or == DOM_CC_X_AND_Y)
15714 	return CC_DGTmode;
15715 
15716       switch (cond2)
15717 	{
15718 	case GT:
15719 	  return CC_DGTmode;
15720 	case GE:
15721 	  return CC_DGEmode;
15722 	case NE:
15723 	  return CC_DNEmode;
15724 	default:
15725 	  gcc_unreachable ();
15726 	}
15727 
15728     case LTU:
15729       if (cond_or == DOM_CC_X_AND_Y)
15730 	return CC_DLTUmode;
15731 
15732       switch (cond2)
15733 	{
15734 	case LTU:
15735 	  return CC_DLTUmode;
15736 	case LEU:
15737 	  return CC_DLEUmode;
15738 	case NE:
15739 	  return CC_DNEmode;
15740 	default:
15741 	  gcc_unreachable ();
15742 	}
15743 
15744     case GTU:
15745       if (cond_or == DOM_CC_X_AND_Y)
15746 	return CC_DGTUmode;
15747 
15748       switch (cond2)
15749 	{
15750 	case GTU:
15751 	  return CC_DGTUmode;
15752 	case GEU:
15753 	  return CC_DGEUmode;
15754 	case NE:
15755 	  return CC_DNEmode;
15756 	default:
15757 	  gcc_unreachable ();
15758 	}
15759 
15760     /* The remaining cases only occur when both comparisons are the
15761        same.  */
15762     case NE:
15763       gcc_assert (cond1 == cond2);
15764       return CC_DNEmode;
15765 
15766     case LE:
15767       gcc_assert (cond1 == cond2);
15768       return CC_DLEmode;
15769 
15770     case GE:
15771       gcc_assert (cond1 == cond2);
15772       return CC_DGEmode;
15773 
15774     case LEU:
15775       gcc_assert (cond1 == cond2);
15776       return CC_DLEUmode;
15777 
15778     case GEU:
15779       gcc_assert (cond1 == cond2);
15780       return CC_DGEUmode;
15781 
15782     default:
15783       gcc_unreachable ();
15784     }
15785 }
15786 
15787 machine_mode
arm_select_cc_mode(enum rtx_code op,rtx x,rtx y)15788 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15789 {
15790   /* All floating point compares return CCFP if it is an equality
15791      comparison, and CCFPE otherwise.  */
15792   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15793     {
15794       switch (op)
15795 	{
15796 	case EQ:
15797 	case NE:
15798 	case UNORDERED:
15799 	case ORDERED:
15800 	case UNLT:
15801 	case UNLE:
15802 	case UNGT:
15803 	case UNGE:
15804 	case UNEQ:
15805 	case LTGT:
15806 	  return CCFPmode;
15807 
15808 	case LT:
15809 	case LE:
15810 	case GT:
15811 	case GE:
15812 	  return CCFPEmode;
15813 
15814 	default:
15815 	  gcc_unreachable ();
15816 	}
15817     }
15818 
15819   /* A compare with a shifted operand.  Because of canonicalization, the
15820      comparison will have to be swapped when we emit the assembler.  */
15821   if (GET_MODE (y) == SImode
15822       && (REG_P (y) || (SUBREG_P (y)))
15823       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15824 	  || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15825 	  || GET_CODE (x) == ROTATERT))
15826     return CC_SWPmode;
15827 
15828   /* A widened compare of the sum of a value plus a carry against a
15829      constant.  This is a representation of RSC.  We want to swap the
15830      result of the comparison at output.  Not valid if the Z bit is
15831      needed.  */
15832   if (GET_MODE (x) == DImode
15833       && GET_CODE (x) == PLUS
15834       && arm_borrow_operation (XEXP (x, 1), DImode)
15835       && CONST_INT_P (y)
15836       && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15837 	   && (op == LE || op == GT))
15838 	  || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
15839 	      && (op == LEU || op == GTU))))
15840     return CC_SWPmode;
15841 
15842   /* If X is a constant we want to use CC_RSBmode.  This is
15843      non-canonical, but arm_gen_compare_reg uses this to generate the
15844      correct canonical form.  */
15845   if (GET_MODE (y) == SImode
15846       && (REG_P (y) || SUBREG_P (y))
15847       && CONST_INT_P (x))
15848     return CC_RSBmode;
15849 
15850   /* This operation is performed swapped, but since we only rely on the Z
15851      flag we don't need an additional mode.  */
15852   if (GET_MODE (y) == SImode
15853       && (REG_P (y) || (SUBREG_P (y)))
15854       && GET_CODE (x) == NEG
15855       && (op ==	EQ || op == NE))
15856     return CC_Zmode;
15857 
15858   /* This is a special case that is used by combine to allow a
15859      comparison of a shifted byte load to be split into a zero-extend
15860      followed by a comparison of the shifted integer (only valid for
15861      equalities and unsigned inequalities).  */
15862   if (GET_MODE (x) == SImode
15863       && GET_CODE (x) == ASHIFT
15864       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15865       && GET_CODE (XEXP (x, 0)) == SUBREG
15866       && MEM_P (SUBREG_REG (XEXP (x, 0)))
15867       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15868       && (op == EQ || op == NE
15869 	  || op == GEU || op == GTU || op == LTU || op == LEU)
15870       && CONST_INT_P (y))
15871     return CC_Zmode;
15872 
15873   /* A construct for a conditional compare, if the false arm contains
15874      0, then both conditions must be true, otherwise either condition
15875      must be true.  Not all conditions are possible, so CCmode is
15876      returned if it can't be done.  */
15877   if (GET_CODE (x) == IF_THEN_ELSE
15878       && (XEXP (x, 2) == const0_rtx
15879 	  || XEXP (x, 2) == const1_rtx)
15880       && COMPARISON_P (XEXP (x, 0))
15881       && COMPARISON_P (XEXP (x, 1)))
15882     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15883 					 INTVAL (XEXP (x, 2)));
15884 
15885   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
15886   if (GET_CODE (x) == AND
15887       && (op == EQ || op == NE)
15888       && COMPARISON_P (XEXP (x, 0))
15889       && COMPARISON_P (XEXP (x, 1)))
15890     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15891 					 DOM_CC_X_AND_Y);
15892 
15893   if (GET_CODE (x) == IOR
15894       && (op == EQ || op == NE)
15895       && COMPARISON_P (XEXP (x, 0))
15896       && COMPARISON_P (XEXP (x, 1)))
15897     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15898 					 DOM_CC_X_OR_Y);
15899 
15900   /* An operation (on Thumb) where we want to test for a single bit.
15901      This is done by shifting that bit up into the top bit of a
15902      scratch register; we can then branch on the sign bit.  */
15903   if (TARGET_THUMB1
15904       && GET_MODE (x) == SImode
15905       && (op == EQ || op == NE)
15906       && GET_CODE (x) == ZERO_EXTRACT
15907       && XEXP (x, 1) == const1_rtx)
15908     return CC_Nmode;
15909 
15910   /* An operation that sets the condition codes as a side-effect, the
15911      V flag is not set correctly, so we can only use comparisons where
15912      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15913      instead.)  */
15914   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15915   if (GET_MODE (x) == SImode
15916       && y == const0_rtx
15917       && (op == EQ || op == NE || op == LT || op == GE)
15918       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15919 	  || GET_CODE (x) == AND || GET_CODE (x) == IOR
15920 	  || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15921 	  || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15922 	  || GET_CODE (x) == LSHIFTRT
15923 	  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15924 	  || GET_CODE (x) == ROTATERT
15925 	  || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15926     return CC_NZmode;
15927 
15928   /* A comparison of ~reg with a const is really a special
15929      canoncialization of compare (~const, reg), which is a reverse
15930      subtract operation.  We may not get here if CONST is 0, but that
15931      doesn't matter because ~0 isn't a valid immediate for RSB.  */
15932   if (GET_MODE (x) == SImode
15933       && GET_CODE (x) == NOT
15934       && CONST_INT_P (y))
15935     return CC_RSBmode;
15936 
15937   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15938     return CC_Zmode;
15939 
15940   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15941       && GET_CODE (x) == PLUS
15942       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15943     return CC_Cmode;
15944 
15945   if (GET_MODE (x) == DImode
15946       && GET_CODE (x) == PLUS
15947       && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
15948       && CONST_INT_P (y)
15949       && UINTVAL (y) == 0x800000000
15950       && (op == GEU || op == LTU))
15951     return CC_ADCmode;
15952 
15953   if (GET_MODE (x) == DImode
15954       && (op == GE || op == LT)
15955       && GET_CODE (x) == SIGN_EXTEND
15956       && ((GET_CODE (y) == PLUS
15957 	   && arm_borrow_operation (XEXP (y, 0), DImode))
15958 	  || arm_borrow_operation (y, DImode)))
15959     return CC_NVmode;
15960 
15961   if (GET_MODE (x) == DImode
15962       && (op == GEU || op == LTU)
15963       && GET_CODE (x) == ZERO_EXTEND
15964       && ((GET_CODE (y) == PLUS
15965 	   && arm_borrow_operation (XEXP (y, 0), DImode))
15966 	  || arm_borrow_operation (y, DImode)))
15967     return CC_Bmode;
15968 
15969   if (GET_MODE (x) == DImode
15970       && (op == EQ || op == NE)
15971       && (GET_CODE (x) == PLUS
15972 	  || GET_CODE (x) == MINUS)
15973       && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15974 	  || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
15975       && GET_CODE (y) == SIGN_EXTEND
15976       && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
15977     return CC_Vmode;
15978 
15979   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15980     return GET_MODE (x);
15981 
15982   return CCmode;
15983 }
15984 
15985 /* X and Y are two (DImode) things to compare for the condition CODE.  Emit
15986    the sequence of instructions needed to generate a suitable condition
15987    code register.  Return the CC register result.  */
15988 static rtx
arm_gen_dicompare_reg(rtx_code code,rtx x,rtx y,rtx scratch)15989 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
15990 {
15991   machine_mode mode;
15992   rtx cc_reg;
15993 
15994     /* We don't currently handle DImode in thumb1, but rely on libgcc.  */
15995   gcc_assert (TARGET_32BIT);
15996   gcc_assert (!CONST_INT_P (x));
15997 
15998   rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
15999 				  subreg_lowpart_offset (SImode, DImode));
16000   rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16001 				  subreg_highpart_offset (SImode, DImode));
16002   rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16003 				  subreg_lowpart_offset (SImode, DImode));
16004   rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16005 				  subreg_highpart_offset (SImode, DImode));
16006   switch (code)
16007     {
16008     case EQ:
16009     case NE:
16010       {
16011 	if (y_lo == const0_rtx || y_hi == const0_rtx)
16012 	  {
16013 	    if (y_lo != const0_rtx)
16014 	      {
16015 		rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16016 
16017 		gcc_assert (y_hi == const0_rtx);
16018 		y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16019 		if (!arm_add_operand (y_lo, SImode))
16020 		  y_lo = force_reg (SImode, y_lo);
16021 		emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16022 		x_lo = scratch2;
16023 	      }
16024 	    else if (y_hi != const0_rtx)
16025 	      {
16026 		rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16027 
16028 		y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16029 		if (!arm_add_operand (y_hi, SImode))
16030 		  y_hi = force_reg (SImode, y_hi);
16031 		emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16032 		x_hi = scratch2;
16033 	      }
16034 
16035 	    if (!scratch)
16036 	      {
16037 		gcc_assert (!reload_completed);
16038 		scratch = gen_rtx_SCRATCH (SImode);
16039 	      }
16040 
16041 	    rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16042 	    cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16043 
16044 	    rtx set
16045 	      = gen_rtx_SET (cc_reg,
16046 			     gen_rtx_COMPARE (CC_NZmode,
16047 					      gen_rtx_IOR (SImode, x_lo, x_hi),
16048 					      const0_rtx));
16049 	    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16050 							      clobber)));
16051 	    return cc_reg;
16052 	  }
16053 
16054 	if (!arm_add_operand (y_lo, SImode))
16055 	  y_lo = force_reg (SImode, y_lo);
16056 
16057 	if (!arm_add_operand (y_hi, SImode))
16058 	  y_hi = force_reg (SImode, y_hi);
16059 
16060 	rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16061 	rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16062 	rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16063 	mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16064 	cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16065 
16066 	emit_insn (gen_rtx_SET (cc_reg,
16067 				gen_rtx_COMPARE (mode, conjunction,
16068 						 const0_rtx)));
16069 	return cc_reg;
16070       }
16071 
16072     case LT:
16073     case GE:
16074       {
16075 	if (y_lo == const0_rtx)
16076 	  {
16077 	    /* If the low word of y is 0, then this is simply a normal
16078 	       compare of the upper words.  */
16079 	    if (!arm_add_operand (y_hi, SImode))
16080 	      y_hi = force_reg (SImode, y_hi);
16081 
16082 	    return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16083 	  }
16084 
16085 	if (!arm_add_operand (y_lo, SImode))
16086 	  y_lo = force_reg (SImode, y_lo);
16087 
16088 	rtx cmp1
16089 	  = gen_rtx_LTU (DImode,
16090 			 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16091 			 const0_rtx);
16092 
16093 	if (!scratch)
16094 	  scratch = gen_rtx_SCRATCH (SImode);
16095 
16096 	if (!arm_not_operand (y_hi, SImode))
16097 	  y_hi = force_reg (SImode, y_hi);
16098 
16099 	rtx_insn *insn;
16100 	if (y_hi == const0_rtx)
16101 	  insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16102 							   cmp1));
16103 	else if (CONST_INT_P (y_hi))
16104 	  insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16105 							     y_hi, cmp1));
16106 	else
16107 	  insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16108 							 cmp1));
16109 	return SET_DEST (single_set (insn));
16110       }
16111 
16112     case LE:
16113     case GT:
16114       {
16115 	/* During expansion, we only expect to get here if y is a
16116 	   constant that we want to handle, otherwise we should have
16117 	   swapped the operands already.  */
16118 	gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16119 
16120 	if (!const_ok_for_arm (INTVAL (y_lo)))
16121 	  y_lo = force_reg (SImode, y_lo);
16122 
16123 	/* Perform a reverse subtract and compare.  */
16124 	rtx cmp1
16125 	  = gen_rtx_LTU (DImode,
16126 			 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16127 			 const0_rtx);
16128 	rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16129 								 x_hi, cmp1));
16130 	return SET_DEST (single_set (insn));
16131       }
16132 
16133     case LTU:
16134     case GEU:
16135       {
16136 	if (y_lo == const0_rtx)
16137 	  {
16138 	    /* If the low word of y is 0, then this is simply a normal
16139 	       compare of the upper words.  */
16140 	    if (!arm_add_operand (y_hi, SImode))
16141 	      y_hi = force_reg (SImode, y_hi);
16142 
16143 	    return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16144 	  }
16145 
16146 	if (!arm_add_operand (y_lo, SImode))
16147 	  y_lo = force_reg (SImode, y_lo);
16148 
16149 	rtx cmp1
16150 	  = gen_rtx_LTU (DImode,
16151 			 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16152 			 const0_rtx);
16153 
16154 	if (!scratch)
16155 	  scratch = gen_rtx_SCRATCH (SImode);
16156 	if (!arm_not_operand (y_hi, SImode))
16157 	  y_hi = force_reg (SImode, y_hi);
16158 
16159 	rtx_insn *insn;
16160 	if (y_hi == const0_rtx)
16161 	  insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16162 							  cmp1));
16163 	else if (CONST_INT_P (y_hi))
16164 	  {
16165 	    /* Constant is viewed as unsigned when zero-extended.  */
16166 	    y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16167 	    insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16168 							      y_hi, cmp1));
16169 	  }
16170 	else
16171 	  insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16172 							cmp1));
16173 	return SET_DEST (single_set (insn));
16174       }
16175 
16176     case LEU:
16177     case GTU:
16178       {
16179 	/* During expansion, we only expect to get here if y is a
16180 	   constant that we want to handle, otherwise we should have
16181 	   swapped the operands already.  */
16182 	gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16183 
16184 	if (!const_ok_for_arm (INTVAL (y_lo)))
16185 	  y_lo = force_reg (SImode, y_lo);
16186 
16187 	/* Perform a reverse subtract and compare.  */
16188 	rtx cmp1
16189 	  = gen_rtx_LTU (DImode,
16190 			 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16191 			 const0_rtx);
16192 	y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16193 	rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16194 								x_hi, cmp1));
16195 	return SET_DEST (single_set (insn));
16196       }
16197 
16198     default:
16199       gcc_unreachable ();
16200     }
16201 }
16202 
16203 /* X and Y are two things to compare using CODE.  Emit the compare insn and
16204    return the rtx for register 0 in the proper mode.  */
16205 rtx
arm_gen_compare_reg(rtx_code code,rtx x,rtx y,rtx scratch)16206 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16207 {
16208   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16209     return arm_gen_dicompare_reg (code, x, y, scratch);
16210 
16211   machine_mode mode = SELECT_CC_MODE (code, x, y);
16212   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16213   if (mode == CC_RSBmode)
16214     {
16215       if (!scratch)
16216 	scratch = gen_rtx_SCRATCH (SImode);
16217       emit_insn (gen_rsb_imm_compare_scratch (scratch,
16218 					      GEN_INT (~UINTVAL (x)), y));
16219     }
16220   else
16221     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16222 
16223   return cc_reg;
16224 }
16225 
16226 /* Generate a sequence of insns that will generate the correct return
16227    address mask depending on the physical architecture that the program
16228    is running on.  */
16229 rtx
arm_gen_return_addr_mask(void)16230 arm_gen_return_addr_mask (void)
16231 {
16232   rtx reg = gen_reg_rtx (Pmode);
16233 
16234   emit_insn (gen_return_addr_mask (reg));
16235   return reg;
16236 }
16237 
16238 void
arm_reload_in_hi(rtx * operands)16239 arm_reload_in_hi (rtx *operands)
16240 {
16241   rtx ref = operands[1];
16242   rtx base, scratch;
16243   HOST_WIDE_INT offset = 0;
16244 
16245   if (SUBREG_P (ref))
16246     {
16247       offset = SUBREG_BYTE (ref);
16248       ref = SUBREG_REG (ref);
16249     }
16250 
16251   if (REG_P (ref))
16252     {
16253       /* We have a pseudo which has been spilt onto the stack; there
16254 	 are two cases here: the first where there is a simple
16255 	 stack-slot replacement and a second where the stack-slot is
16256 	 out of range, or is used as a subreg.  */
16257       if (reg_equiv_mem (REGNO (ref)))
16258 	{
16259 	  ref = reg_equiv_mem (REGNO (ref));
16260 	  base = find_replacement (&XEXP (ref, 0));
16261 	}
16262       else
16263 	/* The slot is out of range, or was dressed up in a SUBREG.  */
16264 	base = reg_equiv_address (REGNO (ref));
16265 
16266       /* PR 62554: If there is no equivalent memory location then just move
16267 	 the value as an SImode register move.  This happens when the target
16268 	 architecture variant does not have an HImode register move.  */
16269       if (base == NULL)
16270 	{
16271 	  gcc_assert (REG_P (operands[0]));
16272 	  emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16273 				gen_rtx_SUBREG (SImode, ref, 0)));
16274 	  return;
16275 	}
16276     }
16277   else
16278     base = find_replacement (&XEXP (ref, 0));
16279 
16280   /* Handle the case where the address is too complex to be offset by 1.  */
16281   if (GET_CODE (base) == MINUS
16282       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16283     {
16284       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16285 
16286       emit_set_insn (base_plus, base);
16287       base = base_plus;
16288     }
16289   else if (GET_CODE (base) == PLUS)
16290     {
16291       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16292       HOST_WIDE_INT hi, lo;
16293 
16294       offset += INTVAL (XEXP (base, 1));
16295       base = XEXP (base, 0);
16296 
16297       /* Rework the address into a legal sequence of insns.  */
16298       /* Valid range for lo is -4095 -> 4095 */
16299       lo = (offset >= 0
16300 	    ? (offset & 0xfff)
16301 	    : -((-offset) & 0xfff));
16302 
16303       /* Corner case, if lo is the max offset then we would be out of range
16304 	 once we have added the additional 1 below, so bump the msb into the
16305 	 pre-loading insn(s).  */
16306       if (lo == 4095)
16307 	lo &= 0x7ff;
16308 
16309       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16310 	     ^ (HOST_WIDE_INT) 0x80000000)
16311 	    - (HOST_WIDE_INT) 0x80000000);
16312 
16313       gcc_assert (hi + lo == offset);
16314 
16315       if (hi != 0)
16316 	{
16317 	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16318 
16319 	  /* Get the base address; addsi3 knows how to handle constants
16320 	     that require more than one insn.  */
16321 	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16322 	  base = base_plus;
16323 	  offset = lo;
16324 	}
16325     }
16326 
16327   /* Operands[2] may overlap operands[0] (though it won't overlap
16328      operands[1]), that's why we asked for a DImode reg -- so we can
16329      use the bit that does not overlap.  */
16330   if (REGNO (operands[2]) == REGNO (operands[0]))
16331     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16332   else
16333     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16334 
16335   emit_insn (gen_zero_extendqisi2 (scratch,
16336 				   gen_rtx_MEM (QImode,
16337 						plus_constant (Pmode, base,
16338 							       offset))));
16339   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16340 				   gen_rtx_MEM (QImode,
16341 						plus_constant (Pmode, base,
16342 							       offset + 1))));
16343   if (!BYTES_BIG_ENDIAN)
16344     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16345 		   gen_rtx_IOR (SImode,
16346 				gen_rtx_ASHIFT
16347 				(SImode,
16348 				 gen_rtx_SUBREG (SImode, operands[0], 0),
16349 				 GEN_INT (8)),
16350 				scratch));
16351   else
16352     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16353 		   gen_rtx_IOR (SImode,
16354 				gen_rtx_ASHIFT (SImode, scratch,
16355 						GEN_INT (8)),
16356 				gen_rtx_SUBREG (SImode, operands[0], 0)));
16357 }
16358 
16359 /* Handle storing a half-word to memory during reload by synthesizing as two
16360    byte stores.  Take care not to clobber the input values until after we
16361    have moved them somewhere safe.  This code assumes that if the DImode
16362    scratch in operands[2] overlaps either the input value or output address
16363    in some way, then that value must die in this insn (we absolutely need
16364    two scratch registers for some corner cases).  */
16365 void
arm_reload_out_hi(rtx * operands)16366 arm_reload_out_hi (rtx *operands)
16367 {
16368   rtx ref = operands[0];
16369   rtx outval = operands[1];
16370   rtx base, scratch;
16371   HOST_WIDE_INT offset = 0;
16372 
16373   if (SUBREG_P (ref))
16374     {
16375       offset = SUBREG_BYTE (ref);
16376       ref = SUBREG_REG (ref);
16377     }
16378 
16379   if (REG_P (ref))
16380     {
16381       /* We have a pseudo which has been spilt onto the stack; there
16382 	 are two cases here: the first where there is a simple
16383 	 stack-slot replacement and a second where the stack-slot is
16384 	 out of range, or is used as a subreg.  */
16385       if (reg_equiv_mem (REGNO (ref)))
16386 	{
16387 	  ref = reg_equiv_mem (REGNO (ref));
16388 	  base = find_replacement (&XEXP (ref, 0));
16389 	}
16390       else
16391 	/* The slot is out of range, or was dressed up in a SUBREG.  */
16392 	base = reg_equiv_address (REGNO (ref));
16393 
16394       /* PR 62254: If there is no equivalent memory location then just move
16395 	 the value as an SImode register move.  This happens when the target
16396 	 architecture variant does not have an HImode register move.  */
16397       if (base == NULL)
16398 	{
16399 	  gcc_assert (REG_P (outval) || SUBREG_P (outval));
16400 
16401 	  if (REG_P (outval))
16402 	    {
16403 	      emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16404 				    gen_rtx_SUBREG (SImode, outval, 0)));
16405 	    }
16406 	  else /* SUBREG_P (outval)  */
16407 	    {
16408 	      if (GET_MODE (SUBREG_REG (outval)) == SImode)
16409 		emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16410 				      SUBREG_REG (outval)));
16411 	      else
16412 		/* FIXME: Handle other cases ?  */
16413 		gcc_unreachable ();
16414 	    }
16415 	  return;
16416 	}
16417     }
16418   else
16419     base = find_replacement (&XEXP (ref, 0));
16420 
16421   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16422 
16423   /* Handle the case where the address is too complex to be offset by 1.  */
16424   if (GET_CODE (base) == MINUS
16425       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16426     {
16427       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16428 
16429       /* Be careful not to destroy OUTVAL.  */
16430       if (reg_overlap_mentioned_p (base_plus, outval))
16431 	{
16432 	  /* Updating base_plus might destroy outval, see if we can
16433 	     swap the scratch and base_plus.  */
16434 	  if (!reg_overlap_mentioned_p (scratch, outval))
16435 	    std::swap (scratch, base_plus);
16436 	  else
16437 	    {
16438 	      rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16439 
16440 	      /* Be conservative and copy OUTVAL into the scratch now,
16441 		 this should only be necessary if outval is a subreg
16442 		 of something larger than a word.  */
16443 	      /* XXX Might this clobber base?  I can't see how it can,
16444 		 since scratch is known to overlap with OUTVAL, and
16445 		 must be wider than a word.  */
16446 	      emit_insn (gen_movhi (scratch_hi, outval));
16447 	      outval = scratch_hi;
16448 	    }
16449 	}
16450 
16451       emit_set_insn (base_plus, base);
16452       base = base_plus;
16453     }
16454   else if (GET_CODE (base) == PLUS)
16455     {
16456       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16457       HOST_WIDE_INT hi, lo;
16458 
16459       offset += INTVAL (XEXP (base, 1));
16460       base = XEXP (base, 0);
16461 
16462       /* Rework the address into a legal sequence of insns.  */
16463       /* Valid range for lo is -4095 -> 4095 */
16464       lo = (offset >= 0
16465 	    ? (offset & 0xfff)
16466 	    : -((-offset) & 0xfff));
16467 
16468       /* Corner case, if lo is the max offset then we would be out of range
16469 	 once we have added the additional 1 below, so bump the msb into the
16470 	 pre-loading insn(s).  */
16471       if (lo == 4095)
16472 	lo &= 0x7ff;
16473 
16474       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16475 	     ^ (HOST_WIDE_INT) 0x80000000)
16476 	    - (HOST_WIDE_INT) 0x80000000);
16477 
16478       gcc_assert (hi + lo == offset);
16479 
16480       if (hi != 0)
16481 	{
16482 	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16483 
16484 	  /* Be careful not to destroy OUTVAL.  */
16485 	  if (reg_overlap_mentioned_p (base_plus, outval))
16486 	    {
16487 	      /* Updating base_plus might destroy outval, see if we
16488 		 can swap the scratch and base_plus.  */
16489 	      if (!reg_overlap_mentioned_p (scratch, outval))
16490 	        std::swap (scratch, base_plus);
16491 	      else
16492 		{
16493 		  rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16494 
16495 		  /* Be conservative and copy outval into scratch now,
16496 		     this should only be necessary if outval is a
16497 		     subreg of something larger than a word.  */
16498 		  /* XXX Might this clobber base?  I can't see how it
16499 		     can, since scratch is known to overlap with
16500 		     outval.  */
16501 		  emit_insn (gen_movhi (scratch_hi, outval));
16502 		  outval = scratch_hi;
16503 		}
16504 	    }
16505 
16506 	  /* Get the base address; addsi3 knows how to handle constants
16507 	     that require more than one insn.  */
16508 	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16509 	  base = base_plus;
16510 	  offset = lo;
16511 	}
16512     }
16513 
16514   if (BYTES_BIG_ENDIAN)
16515     {
16516       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16517 					 plus_constant (Pmode, base,
16518 							offset + 1)),
16519 			    gen_lowpart (QImode, outval)));
16520       emit_insn (gen_lshrsi3 (scratch,
16521 			      gen_rtx_SUBREG (SImode, outval, 0),
16522 			      GEN_INT (8)));
16523       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16524 								offset)),
16525 			    gen_lowpart (QImode, scratch)));
16526     }
16527   else
16528     {
16529       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16530 								offset)),
16531 			    gen_lowpart (QImode, outval)));
16532       emit_insn (gen_lshrsi3 (scratch,
16533 			      gen_rtx_SUBREG (SImode, outval, 0),
16534 			      GEN_INT (8)));
16535       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16536 					 plus_constant (Pmode, base,
16537 							offset + 1)),
16538 			    gen_lowpart (QImode, scratch)));
16539     }
16540 }
16541 
16542 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16543    (padded to the size of a word) should be passed in a register.  */
16544 
16545 static bool
arm_must_pass_in_stack(const function_arg_info & arg)16546 arm_must_pass_in_stack (const function_arg_info &arg)
16547 {
16548   if (TARGET_AAPCS_BASED)
16549     return must_pass_in_stack_var_size (arg);
16550   else
16551     return must_pass_in_stack_var_size_or_pad (arg);
16552 }
16553 
16554 
16555 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16556    byte of a stack argument has useful data.  For legacy APCS ABIs we use
16557    the default.  For AAPCS based ABIs small aggregate types are placed
16558    in the lowest memory address.  */
16559 
16560 static pad_direction
arm_function_arg_padding(machine_mode mode,const_tree type)16561 arm_function_arg_padding (machine_mode mode, const_tree type)
16562 {
16563   if (!TARGET_AAPCS_BASED)
16564     return default_function_arg_padding (mode, type);
16565 
16566   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16567     return PAD_DOWNWARD;
16568 
16569   return PAD_UPWARD;
16570 }
16571 
16572 
16573 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16574    Return !BYTES_BIG_ENDIAN if the least significant byte of the
16575    register has useful data, and return the opposite if the most
16576    significant byte does.  */
16577 
16578 bool
arm_pad_reg_upward(machine_mode mode,tree type,int first ATTRIBUTE_UNUSED)16579 arm_pad_reg_upward (machine_mode mode,
16580                     tree type, int first ATTRIBUTE_UNUSED)
16581 {
16582   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16583     {
16584       /* For AAPCS, small aggregates, small fixed-point types,
16585 	 and small complex types are always padded upwards.  */
16586       if (type)
16587 	{
16588 	  if ((AGGREGATE_TYPE_P (type)
16589 	       || TREE_CODE (type) == COMPLEX_TYPE
16590 	       || FIXED_POINT_TYPE_P (type))
16591 	      && int_size_in_bytes (type) <= 4)
16592 	    return true;
16593 	}
16594       else
16595 	{
16596 	  if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16597 	      && GET_MODE_SIZE (mode) <= 4)
16598 	    return true;
16599 	}
16600     }
16601 
16602   /* Otherwise, use default padding.  */
16603   return !BYTES_BIG_ENDIAN;
16604 }
16605 
16606 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16607    assuming that the address in the base register is word aligned.  */
16608 bool
offset_ok_for_ldrd_strd(HOST_WIDE_INT offset)16609 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16610 {
16611   HOST_WIDE_INT max_offset;
16612 
16613   /* Offset must be a multiple of 4 in Thumb mode.  */
16614   if (TARGET_THUMB2 && ((offset & 3) != 0))
16615     return false;
16616 
16617   if (TARGET_THUMB2)
16618     max_offset = 1020;
16619   else if (TARGET_ARM)
16620     max_offset = 255;
16621   else
16622     return false;
16623 
16624   return ((offset <= max_offset) && (offset >= -max_offset));
16625 }
16626 
16627 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16628    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
16629    Assumes that the address in the base register RN is word aligned.  Pattern
16630    guarantees that both memory accesses use the same base register,
16631    the offsets are constants within the range, and the gap between the offsets is 4.
16632    If preload complete then check that registers are legal.  WBACK indicates whether
16633    address is updated.  LOAD indicates whether memory access is load or store.  */
16634 bool
operands_ok_ldrd_strd(rtx rt,rtx rt2,rtx rn,HOST_WIDE_INT offset,bool wback,bool load)16635 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16636                        bool wback, bool load)
16637 {
16638   unsigned int t, t2, n;
16639 
16640   if (!reload_completed)
16641     return true;
16642 
16643   if (!offset_ok_for_ldrd_strd (offset))
16644     return false;
16645 
16646   t = REGNO (rt);
16647   t2 = REGNO (rt2);
16648   n = REGNO (rn);
16649 
16650   if ((TARGET_THUMB2)
16651       && ((wback && (n == t || n == t2))
16652           || (t == SP_REGNUM)
16653           || (t == PC_REGNUM)
16654           || (t2 == SP_REGNUM)
16655           || (t2 == PC_REGNUM)
16656           || (!load && (n == PC_REGNUM))
16657           || (load && (t == t2))
16658           /* Triggers Cortex-M3 LDRD errata.  */
16659           || (!wback && load && fix_cm3_ldrd && (n == t))))
16660     return false;
16661 
16662   if ((TARGET_ARM)
16663       && ((wback && (n == t || n == t2))
16664           || (t2 == PC_REGNUM)
16665           || (t % 2 != 0)   /* First destination register is not even.  */
16666           || (t2 != t + 1)
16667           /* PC can be used as base register (for offset addressing only),
16668              but it is depricated.  */
16669           || (n == PC_REGNUM)))
16670     return false;
16671 
16672   return true;
16673 }
16674 
16675 /* Return true if a 64-bit access with alignment ALIGN and with a
16676    constant offset OFFSET from the base pointer is permitted on this
16677    architecture.  */
16678 static bool
align_ok_ldrd_strd(HOST_WIDE_INT align,HOST_WIDE_INT offset)16679 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16680 {
16681   return (unaligned_access
16682 	  ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16683 	  : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16684 }
16685 
16686 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
16687    operand MEM's address contains an immediate offset from the base
16688    register and has no side effects, in which case it sets BASE,
16689    OFFSET and ALIGN accordingly.  */
16690 static bool
mem_ok_for_ldrd_strd(rtx mem,rtx * base,rtx * offset,HOST_WIDE_INT * align)16691 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16692 {
16693   rtx addr;
16694 
16695   gcc_assert (base != NULL && offset != NULL);
16696 
16697   /* TODO: Handle more general memory operand patterns, such as
16698      PRE_DEC and PRE_INC.  */
16699 
16700   if (side_effects_p (mem))
16701     return false;
16702 
16703   /* Can't deal with subregs.  */
16704   if (SUBREG_P (mem))
16705     return false;
16706 
16707   gcc_assert (MEM_P (mem));
16708 
16709   *offset = const0_rtx;
16710   *align = MEM_ALIGN (mem);
16711 
16712   addr = XEXP (mem, 0);
16713 
16714   /* If addr isn't valid for DImode, then we can't handle it.  */
16715   if (!arm_legitimate_address_p (DImode, addr,
16716 				 reload_in_progress || reload_completed))
16717     return false;
16718 
16719   if (REG_P (addr))
16720     {
16721       *base = addr;
16722       return true;
16723     }
16724   else if (GET_CODE (addr) == PLUS)
16725     {
16726       *base = XEXP (addr, 0);
16727       *offset = XEXP (addr, 1);
16728       return (REG_P (*base) && CONST_INT_P (*offset));
16729     }
16730 
16731   return false;
16732 }
16733 
16734 /* Called from a peephole2 to replace two word-size accesses with a
16735    single LDRD/STRD instruction.  Returns true iff we can generate a
16736    new instruction sequence.  That is, both accesses use the same base
16737    register and the gap between constant offsets is 4.  This function
16738    may reorder its operands to match ldrd/strd RTL templates.
16739    OPERANDS are the operands found by the peephole matcher;
16740    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16741    corresponding memory operands.  LOAD indicaates whether the access
16742    is load or store.  CONST_STORE indicates a store of constant
16743    integer values held in OPERANDS[4,5] and assumes that the pattern
16744    is of length 4 insn, for the purpose of checking dead registers.
16745    COMMUTE indicates that register operands may be reordered.  */
16746 bool
gen_operands_ldrd_strd(rtx * operands,bool load,bool const_store,bool commute)16747 gen_operands_ldrd_strd (rtx *operands, bool load,
16748                         bool const_store, bool commute)
16749 {
16750   int nops = 2;
16751   HOST_WIDE_INT offsets[2], offset, align[2];
16752   rtx base = NULL_RTX;
16753   rtx cur_base, cur_offset, tmp;
16754   int i, gap;
16755   HARD_REG_SET regset;
16756 
16757   gcc_assert (!const_store || !load);
16758   /* Check that the memory references are immediate offsets from the
16759      same base register.  Extract the base register, the destination
16760      registers, and the corresponding memory offsets.  */
16761   for (i = 0; i < nops; i++)
16762     {
16763       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16764 				 &align[i]))
16765         return false;
16766 
16767       if (i == 0)
16768         base = cur_base;
16769       else if (REGNO (base) != REGNO (cur_base))
16770         return false;
16771 
16772       offsets[i] = INTVAL (cur_offset);
16773       if (GET_CODE (operands[i]) == SUBREG)
16774         {
16775           tmp = SUBREG_REG (operands[i]);
16776           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16777           operands[i] = tmp;
16778         }
16779     }
16780 
16781   /* Make sure there is no dependency between the individual loads.  */
16782   if (load && REGNO (operands[0]) == REGNO (base))
16783     return false; /* RAW */
16784 
16785   if (load && REGNO (operands[0]) == REGNO (operands[1]))
16786     return false; /* WAW */
16787 
16788   /* If the same input register is used in both stores
16789      when storing different constants, try to find a free register.
16790      For example, the code
16791 	mov r0, 0
16792 	str r0, [r2]
16793 	mov r0, 1
16794 	str r0, [r2, #4]
16795      can be transformed into
16796 	mov r1, 0
16797 	mov r0, 1
16798 	strd r1, r0, [r2]
16799      in Thumb mode assuming that r1 is free.
16800      For ARM mode do the same but only if the starting register
16801      can be made to be even.  */
16802   if (const_store
16803       && REGNO (operands[0]) == REGNO (operands[1])
16804       && INTVAL (operands[4]) != INTVAL (operands[5]))
16805     {
16806     if (TARGET_THUMB2)
16807       {
16808         CLEAR_HARD_REG_SET (regset);
16809         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16810         if (tmp == NULL_RTX)
16811           return false;
16812 
16813         /* Use the new register in the first load to ensure that
16814            if the original input register is not dead after peephole,
16815            then it will have the correct constant value.  */
16816         operands[0] = tmp;
16817       }
16818     else if (TARGET_ARM)
16819       {
16820         int regno = REGNO (operands[0]);
16821         if (!peep2_reg_dead_p (4, operands[0]))
16822           {
16823             /* When the input register is even and is not dead after the
16824                pattern, it has to hold the second constant but we cannot
16825                form a legal STRD in ARM mode with this register as the second
16826                register.  */
16827             if (regno % 2 == 0)
16828               return false;
16829 
16830             /* Is regno-1 free? */
16831             SET_HARD_REG_SET (regset);
16832             CLEAR_HARD_REG_BIT(regset, regno - 1);
16833             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16834             if (tmp == NULL_RTX)
16835               return false;
16836 
16837             operands[0] = tmp;
16838           }
16839         else
16840           {
16841             /* Find a DImode register.  */
16842             CLEAR_HARD_REG_SET (regset);
16843             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16844             if (tmp != NULL_RTX)
16845               {
16846                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16847                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16848               }
16849             else
16850               {
16851                 /* Can we use the input register to form a DI register?  */
16852                 SET_HARD_REG_SET (regset);
16853                 CLEAR_HARD_REG_BIT(regset,
16854                                    regno % 2 == 0 ? regno + 1 : regno - 1);
16855                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16856                 if (tmp == NULL_RTX)
16857                   return false;
16858                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
16859               }
16860           }
16861 
16862         gcc_assert (operands[0] != NULL_RTX);
16863         gcc_assert (operands[1] != NULL_RTX);
16864         gcc_assert (REGNO (operands[0]) % 2 == 0);
16865         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
16866       }
16867     }
16868 
16869   /* Make sure the instructions are ordered with lower memory access first.  */
16870   if (offsets[0] > offsets[1])
16871     {
16872       gap = offsets[0] - offsets[1];
16873       offset = offsets[1];
16874 
16875       /* Swap the instructions such that lower memory is accessed first.  */
16876       std::swap (operands[0], operands[1]);
16877       std::swap (operands[2], operands[3]);
16878       std::swap (align[0], align[1]);
16879       if (const_store)
16880         std::swap (operands[4], operands[5]);
16881     }
16882   else
16883     {
16884       gap = offsets[1] - offsets[0];
16885       offset = offsets[0];
16886     }
16887 
16888   /* Make sure accesses are to consecutive memory locations.  */
16889   if (gap != GET_MODE_SIZE (SImode))
16890     return false;
16891 
16892   if (!align_ok_ldrd_strd (align[0], offset))
16893     return false;
16894 
16895   /* Make sure we generate legal instructions.  */
16896   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16897                              false, load))
16898     return true;
16899 
16900   /* In Thumb state, where registers are almost unconstrained, there
16901      is little hope to fix it.  */
16902   if (TARGET_THUMB2)
16903     return false;
16904 
16905   if (load && commute)
16906     {
16907       /* Try reordering registers.  */
16908       std::swap (operands[0], operands[1]);
16909       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16910                                  false, load))
16911         return true;
16912     }
16913 
16914   if (const_store)
16915     {
16916       /* If input registers are dead after this pattern, they can be
16917          reordered or replaced by other registers that are free in the
16918          current pattern.  */
16919       if (!peep2_reg_dead_p (4, operands[0])
16920           || !peep2_reg_dead_p (4, operands[1]))
16921         return false;
16922 
16923       /* Try to reorder the input registers.  */
16924       /* For example, the code
16925            mov r0, 0
16926            mov r1, 1
16927            str r1, [r2]
16928            str r0, [r2, #4]
16929          can be transformed into
16930            mov r1, 0
16931            mov r0, 1
16932            strd r0, [r2]
16933       */
16934       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16935                                   false, false))
16936         {
16937           std::swap (operands[0], operands[1]);
16938           return true;
16939         }
16940 
16941       /* Try to find a free DI register.  */
16942       CLEAR_HARD_REG_SET (regset);
16943       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16944       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16945       while (true)
16946         {
16947           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16948           if (tmp == NULL_RTX)
16949             return false;
16950 
16951           /* DREG must be an even-numbered register in DImode.
16952              Split it into SI registers.  */
16953           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16954           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16955           gcc_assert (operands[0] != NULL_RTX);
16956           gcc_assert (operands[1] != NULL_RTX);
16957           gcc_assert (REGNO (operands[0]) % 2 == 0);
16958           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16959 
16960           return (operands_ok_ldrd_strd (operands[0], operands[1],
16961                                          base, offset,
16962                                          false, load));
16963         }
16964     }
16965 
16966   return false;
16967 }
16968 
16969 
16970 /* Return true if parallel execution of the two word-size accesses provided
16971    could be satisfied with a single LDRD/STRD instruction.  Two word-size
16972    accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
16973    register operands and OPERANDS[2,3] are the corresponding memory operands.
16974    */
16975 bool
valid_operands_ldrd_strd(rtx * operands,bool load)16976 valid_operands_ldrd_strd (rtx *operands, bool load)
16977 {
16978   int nops = 2;
16979   HOST_WIDE_INT offsets[2], offset, align[2];
16980   rtx base = NULL_RTX;
16981   rtx cur_base, cur_offset;
16982   int i, gap;
16983 
16984   /* Check that the memory references are immediate offsets from the
16985      same base register.  Extract the base register, the destination
16986      registers, and the corresponding memory offsets.  */
16987   for (i = 0; i < nops; i++)
16988     {
16989       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16990 				 &align[i]))
16991 	return false;
16992 
16993       if (i == 0)
16994 	base = cur_base;
16995       else if (REGNO (base) != REGNO (cur_base))
16996 	return false;
16997 
16998       offsets[i] = INTVAL (cur_offset);
16999       if (GET_CODE (operands[i]) == SUBREG)
17000 	return false;
17001     }
17002 
17003   if (offsets[0] > offsets[1])
17004     return false;
17005 
17006   gap = offsets[1] - offsets[0];
17007   offset = offsets[0];
17008 
17009   /* Make sure accesses are to consecutive memory locations.  */
17010   if (gap != GET_MODE_SIZE (SImode))
17011     return false;
17012 
17013   if (!align_ok_ldrd_strd (align[0], offset))
17014     return false;
17015 
17016   return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17017 				false, load);
17018 }
17019 
17020 
17021 /* Print a symbolic form of X to the debug file, F.  */
17022 static void
arm_print_value(FILE * f,rtx x)17023 arm_print_value (FILE *f, rtx x)
17024 {
17025   switch (GET_CODE (x))
17026     {
17027     case CONST_INT:
17028       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17029       return;
17030 
17031     case CONST_DOUBLE:
17032       {
17033 	char fpstr[20];
17034 	real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17035 			 sizeof (fpstr), 0, 1);
17036 	fputs (fpstr, f);
17037       }
17038       return;
17039 
17040     case CONST_VECTOR:
17041       {
17042 	int i;
17043 
17044 	fprintf (f, "<");
17045 	for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17046 	  {
17047 	    fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17048 	    if (i < (CONST_VECTOR_NUNITS (x) - 1))
17049 	      fputc (',', f);
17050 	  }
17051 	fprintf (f, ">");
17052       }
17053       return;
17054 
17055     case CONST_STRING:
17056       fprintf (f, "\"%s\"", XSTR (x, 0));
17057       return;
17058 
17059     case SYMBOL_REF:
17060       fprintf (f, "`%s'", XSTR (x, 0));
17061       return;
17062 
17063     case LABEL_REF:
17064       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17065       return;
17066 
17067     case CONST:
17068       arm_print_value (f, XEXP (x, 0));
17069       return;
17070 
17071     case PLUS:
17072       arm_print_value (f, XEXP (x, 0));
17073       fprintf (f, "+");
17074       arm_print_value (f, XEXP (x, 1));
17075       return;
17076 
17077     case PC:
17078       fprintf (f, "pc");
17079       return;
17080 
17081     default:
17082       fprintf (f, "????");
17083       return;
17084     }
17085 }
17086 
17087 /* Routines for manipulation of the constant pool.  */
17088 
17089 /* Arm instructions cannot load a large constant directly into a
17090    register; they have to come from a pc relative load.  The constant
17091    must therefore be placed in the addressable range of the pc
17092    relative load.  Depending on the precise pc relative load
17093    instruction the range is somewhere between 256 bytes and 4k.  This
17094    means that we often have to dump a constant inside a function, and
17095    generate code to branch around it.
17096 
17097    It is important to minimize this, since the branches will slow
17098    things down and make the code larger.
17099 
17100    Normally we can hide the table after an existing unconditional
17101    branch so that there is no interruption of the flow, but in the
17102    worst case the code looks like this:
17103 
17104 	ldr	rn, L1
17105 	...
17106 	b	L2
17107 	align
17108 	L1:	.long value
17109 	L2:
17110 	...
17111 
17112 	ldr	rn, L3
17113 	...
17114 	b	L4
17115 	align
17116 	L3:	.long value
17117 	L4:
17118 	...
17119 
17120    We fix this by performing a scan after scheduling, which notices
17121    which instructions need to have their operands fetched from the
17122    constant table and builds the table.
17123 
17124    The algorithm starts by building a table of all the constants that
17125    need fixing up and all the natural barriers in the function (places
17126    where a constant table can be dropped without breaking the flow).
17127    For each fixup we note how far the pc-relative replacement will be
17128    able to reach and the offset of the instruction into the function.
17129 
17130    Having built the table we then group the fixes together to form
17131    tables that are as large as possible (subject to addressing
17132    constraints) and emit each table of constants after the last
17133    barrier that is within range of all the instructions in the group.
17134    If a group does not contain a barrier, then we forcibly create one
17135    by inserting a jump instruction into the flow.  Once the table has
17136    been inserted, the insns are then modified to reference the
17137    relevant entry in the pool.
17138 
17139    Possible enhancements to the algorithm (not implemented) are:
17140 
17141    1) For some processors and object formats, there may be benefit in
17142    aligning the pools to the start of cache lines; this alignment
17143    would need to be taken into account when calculating addressability
17144    of a pool.  */
17145 
17146 /* These typedefs are located at the start of this file, so that
17147    they can be used in the prototypes there.  This comment is to
17148    remind readers of that fact so that the following structures
17149    can be understood more easily.
17150 
17151      typedef struct minipool_node    Mnode;
17152      typedef struct minipool_fixup   Mfix;  */
17153 
17154 struct minipool_node
17155 {
17156   /* Doubly linked chain of entries.  */
17157   Mnode * next;
17158   Mnode * prev;
17159   /* The maximum offset into the code that this entry can be placed.  While
17160      pushing fixes for forward references, all entries are sorted in order
17161      of increasing max_address.  */
17162   HOST_WIDE_INT max_address;
17163   /* Similarly for an entry inserted for a backwards ref.  */
17164   HOST_WIDE_INT min_address;
17165   /* The number of fixes referencing this entry.  This can become zero
17166      if we "unpush" an entry.  In this case we ignore the entry when we
17167      come to emit the code.  */
17168   int refcount;
17169   /* The offset from the start of the minipool.  */
17170   HOST_WIDE_INT offset;
17171   /* The value in table.  */
17172   rtx value;
17173   /* The mode of value.  */
17174   machine_mode mode;
17175   /* The size of the value.  With iWMMXt enabled
17176      sizes > 4 also imply an alignment of 8-bytes.  */
17177   int fix_size;
17178 };
17179 
17180 struct minipool_fixup
17181 {
17182   Mfix *            next;
17183   rtx_insn *        insn;
17184   HOST_WIDE_INT     address;
17185   rtx *             loc;
17186   machine_mode mode;
17187   int               fix_size;
17188   rtx               value;
17189   Mnode *           minipool;
17190   HOST_WIDE_INT     forwards;
17191   HOST_WIDE_INT     backwards;
17192 };
17193 
17194 /* Fixes less than a word need padding out to a word boundary.  */
17195 #define MINIPOOL_FIX_SIZE(mode) \
17196   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17197 
17198 static Mnode *	minipool_vector_head;
17199 static Mnode *	minipool_vector_tail;
17200 static rtx_code_label	*minipool_vector_label;
17201 static int	minipool_pad;
17202 
17203 /* The linked list of all minipool fixes required for this function.  */
17204 Mfix * 		minipool_fix_head;
17205 Mfix * 		minipool_fix_tail;
17206 /* The fix entry for the current minipool, once it has been placed.  */
17207 Mfix *		minipool_barrier;
17208 
17209 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17210 #define JUMP_TABLES_IN_TEXT_SECTION 0
17211 #endif
17212 
17213 static HOST_WIDE_INT
get_jump_table_size(rtx_jump_table_data * insn)17214 get_jump_table_size (rtx_jump_table_data *insn)
17215 {
17216   /* ADDR_VECs only take room if read-only data does into the text
17217      section.  */
17218   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17219     {
17220       rtx body = PATTERN (insn);
17221       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17222       HOST_WIDE_INT size;
17223       HOST_WIDE_INT modesize;
17224 
17225       modesize = GET_MODE_SIZE (GET_MODE (body));
17226       size = modesize * XVECLEN (body, elt);
17227       switch (modesize)
17228 	{
17229 	case 1:
17230 	  /* Round up size  of TBB table to a halfword boundary.  */
17231 	  size = (size + 1) & ~HOST_WIDE_INT_1;
17232 	  break;
17233 	case 2:
17234 	  /* No padding necessary for TBH.  */
17235 	  break;
17236 	case 4:
17237 	  /* Add two bytes for alignment on Thumb.  */
17238 	  if (TARGET_THUMB)
17239 	    size += 2;
17240 	  break;
17241 	default:
17242 	  gcc_unreachable ();
17243 	}
17244       return size;
17245     }
17246 
17247   return 0;
17248 }
17249 
17250 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17251    function descriptor) into a register and the GOT address into the
17252    FDPIC register, returning an rtx for the register holding the
17253    function address.  */
17254 
17255 rtx
arm_load_function_descriptor(rtx funcdesc)17256 arm_load_function_descriptor (rtx funcdesc)
17257 {
17258   rtx fnaddr_reg = gen_reg_rtx (Pmode);
17259   rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17260   rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17261   rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17262 
17263   emit_move_insn (fnaddr_reg, fnaddr);
17264 
17265   /* The ABI requires the entry point address to be loaded first, but
17266      since we cannot support lazy binding for lack of atomic load of
17267      two 32-bits values, we do not need to bother to prevent the
17268      previous load from being moved after that of the GOT address.  */
17269   emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17270 
17271   return fnaddr_reg;
17272 }
17273 
17274 /* Return the maximum amount of padding that will be inserted before
17275    label LABEL.  */
17276 static HOST_WIDE_INT
get_label_padding(rtx label)17277 get_label_padding (rtx label)
17278 {
17279   HOST_WIDE_INT align, min_insn_size;
17280 
17281   align = 1 << label_to_alignment (label).levels[0].log;
17282   min_insn_size = TARGET_THUMB ? 2 : 4;
17283   return align > min_insn_size ? align - min_insn_size : 0;
17284 }
17285 
17286 /* Move a minipool fix MP from its current location to before MAX_MP.
17287    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17288    constraints may need updating.  */
17289 static Mnode *
move_minipool_fix_forward_ref(Mnode * mp,Mnode * max_mp,HOST_WIDE_INT max_address)17290 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17291 			       HOST_WIDE_INT max_address)
17292 {
17293   /* The code below assumes these are different.  */
17294   gcc_assert (mp != max_mp);
17295 
17296   if (max_mp == NULL)
17297     {
17298       if (max_address < mp->max_address)
17299 	mp->max_address = max_address;
17300     }
17301   else
17302     {
17303       if (max_address > max_mp->max_address - mp->fix_size)
17304 	mp->max_address = max_mp->max_address - mp->fix_size;
17305       else
17306 	mp->max_address = max_address;
17307 
17308       /* Unlink MP from its current position.  Since max_mp is non-null,
17309        mp->prev must be non-null.  */
17310       mp->prev->next = mp->next;
17311       if (mp->next != NULL)
17312 	mp->next->prev = mp->prev;
17313       else
17314 	minipool_vector_tail = mp->prev;
17315 
17316       /* Re-insert it before MAX_MP.  */
17317       mp->next = max_mp;
17318       mp->prev = max_mp->prev;
17319       max_mp->prev = mp;
17320 
17321       if (mp->prev != NULL)
17322 	mp->prev->next = mp;
17323       else
17324 	minipool_vector_head = mp;
17325     }
17326 
17327   /* Save the new entry.  */
17328   max_mp = mp;
17329 
17330   /* Scan over the preceding entries and adjust their addresses as
17331      required.  */
17332   while (mp->prev != NULL
17333 	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17334     {
17335       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17336       mp = mp->prev;
17337     }
17338 
17339   return max_mp;
17340 }
17341 
17342 /* Add a constant to the minipool for a forward reference.  Returns the
17343    node added or NULL if the constant will not fit in this pool.  */
17344 static Mnode *
add_minipool_forward_ref(Mfix * fix)17345 add_minipool_forward_ref (Mfix *fix)
17346 {
17347   /* If set, max_mp is the first pool_entry that has a lower
17348      constraint than the one we are trying to add.  */
17349   Mnode *       max_mp = NULL;
17350   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17351   Mnode *       mp;
17352 
17353   /* If the minipool starts before the end of FIX->INSN then this FIX
17354      cannot be placed into the current pool.  Furthermore, adding the
17355      new constant pool entry may cause the pool to start FIX_SIZE bytes
17356      earlier.  */
17357   if (minipool_vector_head &&
17358       (fix->address + get_attr_length (fix->insn)
17359        >= minipool_vector_head->max_address - fix->fix_size))
17360     return NULL;
17361 
17362   /* Scan the pool to see if a constant with the same value has
17363      already been added.  While we are doing this, also note the
17364      location where we must insert the constant if it doesn't already
17365      exist.  */
17366   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17367     {
17368       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17369 	  && fix->mode == mp->mode
17370 	  && (!LABEL_P (fix->value)
17371 	      || (CODE_LABEL_NUMBER (fix->value)
17372 		  == CODE_LABEL_NUMBER (mp->value)))
17373 	  && rtx_equal_p (fix->value, mp->value))
17374 	{
17375 	  /* More than one fix references this entry.  */
17376 	  mp->refcount++;
17377 	  return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17378 	}
17379 
17380       /* Note the insertion point if necessary.  */
17381       if (max_mp == NULL
17382 	  && mp->max_address > max_address)
17383 	max_mp = mp;
17384 
17385       /* If we are inserting an 8-bytes aligned quantity and
17386 	 we have not already found an insertion point, then
17387 	 make sure that all such 8-byte aligned quantities are
17388 	 placed at the start of the pool.  */
17389       if (ARM_DOUBLEWORD_ALIGN
17390 	  && max_mp == NULL
17391 	  && fix->fix_size >= 8
17392 	  && mp->fix_size < 8)
17393 	{
17394 	  max_mp = mp;
17395 	  max_address = mp->max_address;
17396 	}
17397     }
17398 
17399   /* The value is not currently in the minipool, so we need to create
17400      a new entry for it.  If MAX_MP is NULL, the entry will be put on
17401      the end of the list since the placement is less constrained than
17402      any existing entry.  Otherwise, we insert the new fix before
17403      MAX_MP and, if necessary, adjust the constraints on the other
17404      entries.  */
17405   mp = XNEW (Mnode);
17406   mp->fix_size = fix->fix_size;
17407   mp->mode = fix->mode;
17408   mp->value = fix->value;
17409   mp->refcount = 1;
17410   /* Not yet required for a backwards ref.  */
17411   mp->min_address = -65536;
17412 
17413   if (max_mp == NULL)
17414     {
17415       mp->max_address = max_address;
17416       mp->next = NULL;
17417       mp->prev = minipool_vector_tail;
17418 
17419       if (mp->prev == NULL)
17420 	{
17421 	  minipool_vector_head = mp;
17422 	  minipool_vector_label = gen_label_rtx ();
17423 	}
17424       else
17425 	mp->prev->next = mp;
17426 
17427       minipool_vector_tail = mp;
17428     }
17429   else
17430     {
17431       if (max_address > max_mp->max_address - mp->fix_size)
17432 	mp->max_address = max_mp->max_address - mp->fix_size;
17433       else
17434 	mp->max_address = max_address;
17435 
17436       mp->next = max_mp;
17437       mp->prev = max_mp->prev;
17438       max_mp->prev = mp;
17439       if (mp->prev != NULL)
17440 	mp->prev->next = mp;
17441       else
17442 	minipool_vector_head = mp;
17443     }
17444 
17445   /* Save the new entry.  */
17446   max_mp = mp;
17447 
17448   /* Scan over the preceding entries and adjust their addresses as
17449      required.  */
17450   while (mp->prev != NULL
17451 	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17452     {
17453       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17454       mp = mp->prev;
17455     }
17456 
17457   return max_mp;
17458 }
17459 
17460 static Mnode *
move_minipool_fix_backward_ref(Mnode * mp,Mnode * min_mp,HOST_WIDE_INT min_address)17461 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17462 				HOST_WIDE_INT  min_address)
17463 {
17464   HOST_WIDE_INT offset;
17465 
17466   /* The code below assumes these are different.  */
17467   gcc_assert (mp != min_mp);
17468 
17469   if (min_mp == NULL)
17470     {
17471       if (min_address > mp->min_address)
17472 	mp->min_address = min_address;
17473     }
17474   else
17475     {
17476       /* We will adjust this below if it is too loose.  */
17477       mp->min_address = min_address;
17478 
17479       /* Unlink MP from its current position.  Since min_mp is non-null,
17480 	 mp->next must be non-null.  */
17481       mp->next->prev = mp->prev;
17482       if (mp->prev != NULL)
17483 	mp->prev->next = mp->next;
17484       else
17485 	minipool_vector_head = mp->next;
17486 
17487       /* Reinsert it after MIN_MP.  */
17488       mp->prev = min_mp;
17489       mp->next = min_mp->next;
17490       min_mp->next = mp;
17491       if (mp->next != NULL)
17492 	mp->next->prev = mp;
17493       else
17494 	minipool_vector_tail = mp;
17495     }
17496 
17497   min_mp = mp;
17498 
17499   offset = 0;
17500   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17501     {
17502       mp->offset = offset;
17503       if (mp->refcount > 0)
17504 	offset += mp->fix_size;
17505 
17506       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17507 	mp->next->min_address = mp->min_address + mp->fix_size;
17508     }
17509 
17510   return min_mp;
17511 }
17512 
17513 /* Add a constant to the minipool for a backward reference.  Returns the
17514    node added or NULL if the constant will not fit in this pool.
17515 
17516    Note that the code for insertion for a backwards reference can be
17517    somewhat confusing because the calculated offsets for each fix do
17518    not take into account the size of the pool (which is still under
17519    construction.  */
17520 static Mnode *
add_minipool_backward_ref(Mfix * fix)17521 add_minipool_backward_ref (Mfix *fix)
17522 {
17523   /* If set, min_mp is the last pool_entry that has a lower constraint
17524      than the one we are trying to add.  */
17525   Mnode *min_mp = NULL;
17526   /* This can be negative, since it is only a constraint.  */
17527   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
17528   Mnode *mp;
17529 
17530   /* If we can't reach the current pool from this insn, or if we can't
17531      insert this entry at the end of the pool without pushing other
17532      fixes out of range, then we don't try.  This ensures that we
17533      can't fail later on.  */
17534   if (min_address >= minipool_barrier->address
17535       || (minipool_vector_tail->min_address + fix->fix_size
17536 	  >= minipool_barrier->address))
17537     return NULL;
17538 
17539   /* Scan the pool to see if a constant with the same value has
17540      already been added.  While we are doing this, also note the
17541      location where we must insert the constant if it doesn't already
17542      exist.  */
17543   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17544     {
17545       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17546 	  && fix->mode == mp->mode
17547 	  && (!LABEL_P (fix->value)
17548 	      || (CODE_LABEL_NUMBER (fix->value)
17549 		  == CODE_LABEL_NUMBER (mp->value)))
17550 	  && rtx_equal_p (fix->value, mp->value)
17551 	  /* Check that there is enough slack to move this entry to the
17552 	     end of the table (this is conservative).  */
17553 	  && (mp->max_address
17554 	      > (minipool_barrier->address
17555 		 + minipool_vector_tail->offset
17556 		 + minipool_vector_tail->fix_size)))
17557 	{
17558 	  mp->refcount++;
17559 	  return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17560 	}
17561 
17562       if (min_mp != NULL)
17563 	mp->min_address += fix->fix_size;
17564       else
17565 	{
17566 	  /* Note the insertion point if necessary.  */
17567 	  if (mp->min_address < min_address)
17568 	    {
17569 	      /* For now, we do not allow the insertion of 8-byte alignment
17570 		 requiring nodes anywhere but at the start of the pool.  */
17571 	      if (ARM_DOUBLEWORD_ALIGN
17572 		  && fix->fix_size >= 8 && mp->fix_size < 8)
17573 		return NULL;
17574 	      else
17575 		min_mp = mp;
17576 	    }
17577 	  else if (mp->max_address
17578 		   < minipool_barrier->address + mp->offset + fix->fix_size)
17579 	    {
17580 	      /* Inserting before this entry would push the fix beyond
17581 		 its maximum address (which can happen if we have
17582 		 re-located a forwards fix); force the new fix to come
17583 		 after it.  */
17584 	      if (ARM_DOUBLEWORD_ALIGN
17585 		  && fix->fix_size >= 8 && mp->fix_size < 8)
17586 		return NULL;
17587 	      else
17588 		{
17589 		  min_mp = mp;
17590 		  min_address = mp->min_address + fix->fix_size;
17591 		}
17592 	    }
17593 	  /* Do not insert a non-8-byte aligned quantity before 8-byte
17594 	     aligned quantities.  */
17595 	  else if (ARM_DOUBLEWORD_ALIGN
17596 		   && fix->fix_size < 8
17597 		   && mp->fix_size >= 8)
17598 	    {
17599 	      min_mp = mp;
17600 	      min_address = mp->min_address + fix->fix_size;
17601 	    }
17602 	}
17603     }
17604 
17605   /* We need to create a new entry.  */
17606   mp = XNEW (Mnode);
17607   mp->fix_size = fix->fix_size;
17608   mp->mode = fix->mode;
17609   mp->value = fix->value;
17610   mp->refcount = 1;
17611   mp->max_address = minipool_barrier->address + 65536;
17612 
17613   mp->min_address = min_address;
17614 
17615   if (min_mp == NULL)
17616     {
17617       mp->prev = NULL;
17618       mp->next = minipool_vector_head;
17619 
17620       if (mp->next == NULL)
17621 	{
17622 	  minipool_vector_tail = mp;
17623 	  minipool_vector_label = gen_label_rtx ();
17624 	}
17625       else
17626 	mp->next->prev = mp;
17627 
17628       minipool_vector_head = mp;
17629     }
17630   else
17631     {
17632       mp->next = min_mp->next;
17633       mp->prev = min_mp;
17634       min_mp->next = mp;
17635 
17636       if (mp->next != NULL)
17637 	mp->next->prev = mp;
17638       else
17639 	minipool_vector_tail = mp;
17640     }
17641 
17642   /* Save the new entry.  */
17643   min_mp = mp;
17644 
17645   if (mp->prev)
17646     mp = mp->prev;
17647   else
17648     mp->offset = 0;
17649 
17650   /* Scan over the following entries and adjust their offsets.  */
17651   while (mp->next != NULL)
17652     {
17653       if (mp->next->min_address < mp->min_address + mp->fix_size)
17654 	mp->next->min_address = mp->min_address + mp->fix_size;
17655 
17656       if (mp->refcount)
17657 	mp->next->offset = mp->offset + mp->fix_size;
17658       else
17659 	mp->next->offset = mp->offset;
17660 
17661       mp = mp->next;
17662     }
17663 
17664   return min_mp;
17665 }
17666 
17667 static void
assign_minipool_offsets(Mfix * barrier)17668 assign_minipool_offsets (Mfix *barrier)
17669 {
17670   HOST_WIDE_INT offset = 0;
17671   Mnode *mp;
17672 
17673   minipool_barrier = barrier;
17674 
17675   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17676     {
17677       mp->offset = offset;
17678 
17679       if (mp->refcount > 0)
17680 	offset += mp->fix_size;
17681     }
17682 }
17683 
17684 /* Output the literal table */
17685 static void
dump_minipool(rtx_insn * scan)17686 dump_minipool (rtx_insn *scan)
17687 {
17688   Mnode * mp;
17689   Mnode * nmp;
17690   int align64 = 0;
17691 
17692   if (ARM_DOUBLEWORD_ALIGN)
17693     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17694       if (mp->refcount > 0 && mp->fix_size >= 8)
17695 	{
17696 	  align64 = 1;
17697 	  break;
17698 	}
17699 
17700   if (dump_file)
17701     fprintf (dump_file,
17702 	     ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17703 	     INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17704 
17705   scan = emit_label_after (gen_label_rtx (), scan);
17706   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17707   scan = emit_label_after (minipool_vector_label, scan);
17708 
17709   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17710     {
17711       if (mp->refcount > 0)
17712 	{
17713 	  if (dump_file)
17714 	    {
17715 	      fprintf (dump_file,
17716 		       ";;  Offset %u, min %ld, max %ld ",
17717 		       (unsigned) mp->offset, (unsigned long) mp->min_address,
17718 		       (unsigned long) mp->max_address);
17719 	      arm_print_value (dump_file, mp->value);
17720 	      fputc ('\n', dump_file);
17721 	    }
17722 
17723 	  rtx val = copy_rtx (mp->value);
17724 
17725 	  switch (GET_MODE_SIZE (mp->mode))
17726 	    {
17727 #ifdef HAVE_consttable_1
17728 	    case 1:
17729 	      scan = emit_insn_after (gen_consttable_1 (val), scan);
17730 	      break;
17731 
17732 #endif
17733 #ifdef HAVE_consttable_2
17734 	    case 2:
17735 	      scan = emit_insn_after (gen_consttable_2 (val), scan);
17736 	      break;
17737 
17738 #endif
17739 #ifdef HAVE_consttable_4
17740 	    case 4:
17741 	      scan = emit_insn_after (gen_consttable_4 (val), scan);
17742 	      break;
17743 
17744 #endif
17745 #ifdef HAVE_consttable_8
17746 	    case 8:
17747 	      scan = emit_insn_after (gen_consttable_8 (val), scan);
17748 	      break;
17749 
17750 #endif
17751 #ifdef HAVE_consttable_16
17752 	    case 16:
17753               scan = emit_insn_after (gen_consttable_16 (val), scan);
17754               break;
17755 
17756 #endif
17757 	    default:
17758 	      gcc_unreachable ();
17759 	    }
17760 	}
17761 
17762       nmp = mp->next;
17763       free (mp);
17764     }
17765 
17766   minipool_vector_head = minipool_vector_tail = NULL;
17767   scan = emit_insn_after (gen_consttable_end (), scan);
17768   scan = emit_barrier_after (scan);
17769 }
17770 
17771 /* Return the cost of forcibly inserting a barrier after INSN.  */
17772 static int
arm_barrier_cost(rtx_insn * insn)17773 arm_barrier_cost (rtx_insn *insn)
17774 {
17775   /* Basing the location of the pool on the loop depth is preferable,
17776      but at the moment, the basic block information seems to be
17777      corrupt by this stage of the compilation.  */
17778   int base_cost = 50;
17779   rtx_insn *next = next_nonnote_insn (insn);
17780 
17781   if (next != NULL && LABEL_P (next))
17782     base_cost -= 20;
17783 
17784   switch (GET_CODE (insn))
17785     {
17786     case CODE_LABEL:
17787       /* It will always be better to place the table before the label, rather
17788 	 than after it.  */
17789       return 50;
17790 
17791     case INSN:
17792     case CALL_INSN:
17793       return base_cost;
17794 
17795     case JUMP_INSN:
17796       return base_cost - 10;
17797 
17798     default:
17799       return base_cost + 10;
17800     }
17801 }
17802 
17803 /* Find the best place in the insn stream in the range
17804    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
17805    Create the barrier by inserting a jump and add a new fix entry for
17806    it.  */
17807 static Mfix *
create_fix_barrier(Mfix * fix,HOST_WIDE_INT max_address)17808 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
17809 {
17810   HOST_WIDE_INT count = 0;
17811   rtx_barrier *barrier;
17812   rtx_insn *from = fix->insn;
17813   /* The instruction after which we will insert the jump.  */
17814   rtx_insn *selected = NULL;
17815   int selected_cost;
17816   /* The address at which the jump instruction will be placed.  */
17817   HOST_WIDE_INT selected_address;
17818   Mfix * new_fix;
17819   HOST_WIDE_INT max_count = max_address - fix->address;
17820   rtx_code_label *label = gen_label_rtx ();
17821 
17822   selected_cost = arm_barrier_cost (from);
17823   selected_address = fix->address;
17824 
17825   while (from && count < max_count)
17826     {
17827       rtx_jump_table_data *tmp;
17828       int new_cost;
17829 
17830       /* This code shouldn't have been called if there was a natural barrier
17831 	 within range.  */
17832       gcc_assert (!BARRIER_P (from));
17833 
17834       /* Count the length of this insn.  This must stay in sync with the
17835 	 code that pushes minipool fixes.  */
17836       if (LABEL_P (from))
17837 	count += get_label_padding (from);
17838       else
17839 	count += get_attr_length (from);
17840 
17841       /* If there is a jump table, add its length.  */
17842       if (tablejump_p (from, NULL, &tmp))
17843 	{
17844 	  count += get_jump_table_size (tmp);
17845 
17846 	  /* Jump tables aren't in a basic block, so base the cost on
17847 	     the dispatch insn.  If we select this location, we will
17848 	     still put the pool after the table.  */
17849 	  new_cost = arm_barrier_cost (from);
17850 
17851 	  if (count < max_count
17852 	      && (!selected || new_cost <= selected_cost))
17853 	    {
17854 	      selected = tmp;
17855 	      selected_cost = new_cost;
17856 	      selected_address = fix->address + count;
17857 	    }
17858 
17859 	  /* Continue after the dispatch table.  */
17860 	  from = NEXT_INSN (tmp);
17861 	  continue;
17862 	}
17863 
17864       new_cost = arm_barrier_cost (from);
17865 
17866       if (count < max_count
17867 	  && (!selected || new_cost <= selected_cost))
17868 	{
17869 	  selected = from;
17870 	  selected_cost = new_cost;
17871 	  selected_address = fix->address + count;
17872 	}
17873 
17874       from = NEXT_INSN (from);
17875     }
17876 
17877   /* Make sure that we found a place to insert the jump.  */
17878   gcc_assert (selected);
17879 
17880   /* Create a new JUMP_INSN that branches around a barrier.  */
17881   from = emit_jump_insn_after (gen_jump (label), selected);
17882   JUMP_LABEL (from) = label;
17883   barrier = emit_barrier_after (from);
17884   emit_label_after (label, barrier);
17885 
17886   /* Create a minipool barrier entry for the new barrier.  */
17887   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
17888   new_fix->insn = barrier;
17889   new_fix->address = selected_address;
17890   new_fix->next = fix->next;
17891   fix->next = new_fix;
17892 
17893   return new_fix;
17894 }
17895 
17896 /* Record that there is a natural barrier in the insn stream at
17897    ADDRESS.  */
17898 static void
push_minipool_barrier(rtx_insn * insn,HOST_WIDE_INT address)17899 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
17900 {
17901   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17902 
17903   fix->insn = insn;
17904   fix->address = address;
17905 
17906   fix->next = NULL;
17907   if (minipool_fix_head != NULL)
17908     minipool_fix_tail->next = fix;
17909   else
17910     minipool_fix_head = fix;
17911 
17912   minipool_fix_tail = fix;
17913 }
17914 
17915 /* Record INSN, which will need fixing up to load a value from the
17916    minipool.  ADDRESS is the offset of the insn since the start of the
17917    function; LOC is a pointer to the part of the insn which requires
17918    fixing; VALUE is the constant that must be loaded, which is of type
17919    MODE.  */
17920 static void
push_minipool_fix(rtx_insn * insn,HOST_WIDE_INT address,rtx * loc,machine_mode mode,rtx value)17921 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
17922 		   machine_mode mode, rtx value)
17923 {
17924   gcc_assert (!arm_disable_literal_pool);
17925   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17926 
17927   fix->insn = insn;
17928   fix->address = address;
17929   fix->loc = loc;
17930   fix->mode = mode;
17931   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
17932   fix->value = value;
17933   fix->forwards = get_attr_pool_range (insn);
17934   fix->backwards = get_attr_neg_pool_range (insn);
17935   fix->minipool = NULL;
17936 
17937   /* If an insn doesn't have a range defined for it, then it isn't
17938      expecting to be reworked by this code.  Better to stop now than
17939      to generate duff assembly code.  */
17940   gcc_assert (fix->forwards || fix->backwards);
17941 
17942   /* If an entry requires 8-byte alignment then assume all constant pools
17943      require 4 bytes of padding.  Trying to do this later on a per-pool
17944      basis is awkward because existing pool entries have to be modified.  */
17945   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
17946     minipool_pad = 4;
17947 
17948   if (dump_file)
17949     {
17950       fprintf (dump_file,
17951 	       ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17952 	       GET_MODE_NAME (mode),
17953 	       INSN_UID (insn), (unsigned long) address,
17954 	       -1 * (long)fix->backwards, (long)fix->forwards);
17955       arm_print_value (dump_file, fix->value);
17956       fprintf (dump_file, "\n");
17957     }
17958 
17959   /* Add it to the chain of fixes.  */
17960   fix->next = NULL;
17961 
17962   if (minipool_fix_head != NULL)
17963     minipool_fix_tail->next = fix;
17964   else
17965     minipool_fix_head = fix;
17966 
17967   minipool_fix_tail = fix;
17968 }
17969 
17970 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17971    Returns the number of insns needed, or 99 if we always want to synthesize
17972    the value.  */
17973 int
arm_max_const_double_inline_cost()17974 arm_max_const_double_inline_cost ()
17975 {
17976   return ((optimize_size || arm_ld_sched) ? 3 : 4);
17977 }
17978 
17979 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17980    Returns the number of insns needed, or 99 if we don't know how to
17981    do it.  */
17982 int
arm_const_double_inline_cost(rtx val)17983 arm_const_double_inline_cost (rtx val)
17984 {
17985   rtx lowpart, highpart;
17986   machine_mode mode;
17987 
17988   mode = GET_MODE (val);
17989 
17990   if (mode == VOIDmode)
17991     mode = DImode;
17992 
17993   gcc_assert (GET_MODE_SIZE (mode) == 8);
17994 
17995   lowpart = gen_lowpart (SImode, val);
17996   highpart = gen_highpart_mode (SImode, mode, val);
17997 
17998   gcc_assert (CONST_INT_P (lowpart));
17999   gcc_assert (CONST_INT_P (highpart));
18000 
18001   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18002 			    NULL_RTX, NULL_RTX, 0, 0)
18003 	  + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18004 			      NULL_RTX, NULL_RTX, 0, 0));
18005 }
18006 
18007 /* Cost of loading a SImode constant.  */
18008 static inline int
arm_const_inline_cost(enum rtx_code code,rtx val)18009 arm_const_inline_cost (enum rtx_code code, rtx val)
18010 {
18011   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18012                            NULL_RTX, NULL_RTX, 1, 0);
18013 }
18014 
18015 /* Return true if it is worthwhile to split a 64-bit constant into two
18016    32-bit operations.  This is the case if optimizing for size, or
18017    if we have load delay slots, or if one 32-bit part can be done with
18018    a single data operation.  */
18019 bool
arm_const_double_by_parts(rtx val)18020 arm_const_double_by_parts (rtx val)
18021 {
18022   machine_mode mode = GET_MODE (val);
18023   rtx part;
18024 
18025   if (optimize_size || arm_ld_sched)
18026     return true;
18027 
18028   if (mode == VOIDmode)
18029     mode = DImode;
18030 
18031   part = gen_highpart_mode (SImode, mode, val);
18032 
18033   gcc_assert (CONST_INT_P (part));
18034 
18035   if (const_ok_for_arm (INTVAL (part))
18036       || const_ok_for_arm (~INTVAL (part)))
18037     return true;
18038 
18039   part = gen_lowpart (SImode, val);
18040 
18041   gcc_assert (CONST_INT_P (part));
18042 
18043   if (const_ok_for_arm (INTVAL (part))
18044       || const_ok_for_arm (~INTVAL (part)))
18045     return true;
18046 
18047   return false;
18048 }
18049 
18050 /* Return true if it is possible to inline both the high and low parts
18051    of a 64-bit constant into 32-bit data processing instructions.  */
18052 bool
arm_const_double_by_immediates(rtx val)18053 arm_const_double_by_immediates (rtx val)
18054 {
18055   machine_mode mode = GET_MODE (val);
18056   rtx part;
18057 
18058   if (mode == VOIDmode)
18059     mode = DImode;
18060 
18061   part = gen_highpart_mode (SImode, mode, val);
18062 
18063   gcc_assert (CONST_INT_P (part));
18064 
18065   if (!const_ok_for_arm (INTVAL (part)))
18066     return false;
18067 
18068   part = gen_lowpart (SImode, val);
18069 
18070   gcc_assert (CONST_INT_P (part));
18071 
18072   if (!const_ok_for_arm (INTVAL (part)))
18073     return false;
18074 
18075   return true;
18076 }
18077 
18078 /* Scan INSN and note any of its operands that need fixing.
18079    If DO_PUSHES is false we do not actually push any of the fixups
18080    needed.  */
18081 static void
note_invalid_constants(rtx_insn * insn,HOST_WIDE_INT address,int do_pushes)18082 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18083 {
18084   int opno;
18085 
18086   extract_constrain_insn (insn);
18087 
18088   if (recog_data.n_alternatives == 0)
18089     return;
18090 
18091   /* Fill in recog_op_alt with information about the constraints of
18092      this insn.  */
18093   preprocess_constraints (insn);
18094 
18095   const operand_alternative *op_alt = which_op_alt ();
18096   for (opno = 0; opno < recog_data.n_operands; opno++)
18097     {
18098       /* Things we need to fix can only occur in inputs.  */
18099       if (recog_data.operand_type[opno] != OP_IN)
18100 	continue;
18101 
18102       /* If this alternative is a memory reference, then any mention
18103 	 of constants in this alternative is really to fool reload
18104 	 into allowing us to accept one there.  We need to fix them up
18105 	 now so that we output the right code.  */
18106       if (op_alt[opno].memory_ok)
18107 	{
18108 	  rtx op = recog_data.operand[opno];
18109 
18110 	  if (CONSTANT_P (op))
18111 	    {
18112 	      if (do_pushes)
18113 		push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18114 				   recog_data.operand_mode[opno], op);
18115 	    }
18116 	  else if (MEM_P (op)
18117 		   && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18118 		   && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18119 	    {
18120 	      if (do_pushes)
18121 		{
18122 		  rtx cop = avoid_constant_pool_reference (op);
18123 
18124 		  /* Casting the address of something to a mode narrower
18125 		     than a word can cause avoid_constant_pool_reference()
18126 		     to return the pool reference itself.  That's no good to
18127 		     us here.  Lets just hope that we can use the
18128 		     constant pool value directly.  */
18129 		  if (op == cop)
18130 		    cop = get_pool_constant (XEXP (op, 0));
18131 
18132 		  push_minipool_fix (insn, address,
18133 				     recog_data.operand_loc[opno],
18134 				     recog_data.operand_mode[opno], cop);
18135 		}
18136 
18137 	    }
18138 	}
18139     }
18140 
18141   return;
18142 }
18143 
18144 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18145    and unions in the context of ARMv8-M Security Extensions.  It is used as a
18146    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18147    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18148    or four masks, depending on whether it is being computed for a
18149    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18150    respectively.  The tree for the type of the argument or a field within an
18151    argument is passed in ARG_TYPE, the current register this argument or field
18152    starts in is kept in the pointer REGNO and updated accordingly, the bit this
18153    argument or field starts at is passed in STARTING_BIT and the last used bit
18154    is kept in LAST_USED_BIT which is also updated accordingly.  */
18155 
18156 static unsigned HOST_WIDE_INT
comp_not_to_clear_mask_str_un(tree arg_type,int * regno,uint32_t * padding_bits_to_clear,unsigned starting_bit,int * last_used_bit)18157 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18158 			       uint32_t * padding_bits_to_clear,
18159 			       unsigned starting_bit, int * last_used_bit)
18160 
18161 {
18162   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18163 
18164   if (TREE_CODE (arg_type) == RECORD_TYPE)
18165     {
18166       unsigned current_bit = starting_bit;
18167       tree field;
18168       long int offset, size;
18169 
18170 
18171       field = TYPE_FIELDS (arg_type);
18172       while (field)
18173 	{
18174 	  /* The offset within a structure is always an offset from
18175 	     the start of that structure.  Make sure we take that into the
18176 	     calculation of the register based offset that we use here.  */
18177 	  offset = starting_bit;
18178 	  offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18179 	  offset %= 32;
18180 
18181 	  /* This is the actual size of the field, for bitfields this is the
18182 	     bitfield width and not the container size.  */
18183 	  size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18184 
18185 	  if (*last_used_bit != offset)
18186 	    {
18187 	      if (offset < *last_used_bit)
18188 		{
18189 		  /* This field's offset is before the 'last_used_bit', that
18190 		     means this field goes on the next register.  So we need to
18191 		     pad the rest of the current register and increase the
18192 		     register number.  */
18193 		  uint32_t mask;
18194 		  mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18195 		  mask++;
18196 
18197 		  padding_bits_to_clear[*regno] |= mask;
18198 		  not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18199 		  (*regno)++;
18200 		}
18201 	      else
18202 		{
18203 		  /* Otherwise we pad the bits between the last field's end and
18204 		     the start of the new field.  */
18205 		  uint32_t mask;
18206 
18207 		  mask = ((uint32_t)-1) >> (32 - offset);
18208 		  mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18209 		  padding_bits_to_clear[*regno] |= mask;
18210 		}
18211 	      current_bit = offset;
18212 	    }
18213 
18214 	  /* Calculate further padding bits for inner structs/unions too.  */
18215 	  if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18216 	    {
18217 	      *last_used_bit = current_bit;
18218 	      not_to_clear_reg_mask
18219 		|= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18220 						  padding_bits_to_clear, offset,
18221 						  last_used_bit);
18222 	    }
18223 	  else
18224 	    {
18225 	      /* Update 'current_bit' with this field's size.  If the
18226 		 'current_bit' lies in a subsequent register, update 'regno' and
18227 		 reset 'current_bit' to point to the current bit in that new
18228 		 register.  */
18229 	      current_bit += size;
18230 	      while (current_bit >= 32)
18231 		{
18232 		  current_bit-=32;
18233 		  not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18234 		  (*regno)++;
18235 		}
18236 	      *last_used_bit = current_bit;
18237 	    }
18238 
18239 	  field = TREE_CHAIN (field);
18240 	}
18241       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18242     }
18243   else if (TREE_CODE (arg_type) == UNION_TYPE)
18244     {
18245       tree field, field_t;
18246       int i, regno_t, field_size;
18247       int max_reg = -1;
18248       int max_bit = -1;
18249       uint32_t mask;
18250       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18251 	= {-1, -1, -1, -1};
18252 
18253       /* To compute the padding bits in a union we only consider bits as
18254 	 padding bits if they are always either a padding bit or fall outside a
18255 	 fields size for all fields in the union.  */
18256       field = TYPE_FIELDS (arg_type);
18257       while (field)
18258 	{
18259 	  uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18260 	    = {0U, 0U, 0U, 0U};
18261 	  int last_used_bit_t = *last_used_bit;
18262 	  regno_t = *regno;
18263 	  field_t = TREE_TYPE (field);
18264 
18265 	  /* If the field's type is either a record or a union make sure to
18266 	     compute their padding bits too.  */
18267 	  if (RECORD_OR_UNION_TYPE_P (field_t))
18268 	    not_to_clear_reg_mask
18269 	      |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18270 						&padding_bits_to_clear_t[0],
18271 						starting_bit, &last_used_bit_t);
18272 	  else
18273 	    {
18274 	      field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18275 	      regno_t = (field_size / 32) + *regno;
18276 	      last_used_bit_t = (starting_bit + field_size) % 32;
18277 	    }
18278 
18279 	  for (i = *regno; i < regno_t; i++)
18280 	    {
18281 	      /* For all but the last register used by this field only keep the
18282 		 padding bits that were padding bits in this field.  */
18283 	      padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18284 	    }
18285 
18286 	    /* For the last register, keep all padding bits that were padding
18287 	       bits in this field and any padding bits that are still valid
18288 	       as padding bits but fall outside of this field's size.  */
18289 	    mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18290 	    padding_bits_to_clear_res[regno_t]
18291 	      &= padding_bits_to_clear_t[regno_t] | mask;
18292 
18293 	  /* Update the maximum size of the fields in terms of registers used
18294 	     ('max_reg') and the 'last_used_bit' in said register.  */
18295 	  if (max_reg < regno_t)
18296 	    {
18297 	      max_reg = regno_t;
18298 	      max_bit = last_used_bit_t;
18299 	    }
18300 	  else if (max_reg == regno_t && max_bit < last_used_bit_t)
18301 	    max_bit = last_used_bit_t;
18302 
18303 	  field = TREE_CHAIN (field);
18304 	}
18305 
18306       /* Update the current padding_bits_to_clear using the intersection of the
18307 	 padding bits of all the fields.  */
18308       for (i=*regno; i < max_reg; i++)
18309 	padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18310 
18311       /* Do not keep trailing padding bits, we do not know yet whether this
18312 	 is the end of the argument.  */
18313       mask = ((uint32_t) 1 << max_bit) - 1;
18314       padding_bits_to_clear[max_reg]
18315 	|= padding_bits_to_clear_res[max_reg] & mask;
18316 
18317       *regno = max_reg;
18318       *last_used_bit = max_bit;
18319     }
18320   else
18321     /* This function should only be used for structs and unions.  */
18322     gcc_unreachable ();
18323 
18324   return not_to_clear_reg_mask;
18325 }
18326 
18327 /* In the context of ARMv8-M Security Extensions, this function is used for both
18328    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18329    registers are used when returning or passing arguments, which is then
18330    returned as a mask.  It will also compute a mask to indicate padding/unused
18331    bits for each of these registers, and passes this through the
18332    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
18333    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18334    the starting register used to pass this argument or return value is passed
18335    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18336    for struct and union types.  */
18337 
18338 static unsigned HOST_WIDE_INT
compute_not_to_clear_mask(tree arg_type,rtx arg_rtx,int regno,uint32_t * padding_bits_to_clear)18339 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18340 			     uint32_t * padding_bits_to_clear)
18341 
18342 {
18343   int last_used_bit = 0;
18344   unsigned HOST_WIDE_INT not_to_clear_mask;
18345 
18346   if (RECORD_OR_UNION_TYPE_P (arg_type))
18347     {
18348       not_to_clear_mask
18349 	= comp_not_to_clear_mask_str_un (arg_type, &regno,
18350 					 padding_bits_to_clear, 0,
18351 					 &last_used_bit);
18352 
18353 
18354       /* If the 'last_used_bit' is not zero, that means we are still using a
18355 	 part of the last 'regno'.  In such cases we must clear the trailing
18356 	 bits.  Otherwise we are not using regno and we should mark it as to
18357 	 clear.  */
18358       if (last_used_bit != 0)
18359 	padding_bits_to_clear[regno]
18360 	  |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18361       else
18362 	not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18363     }
18364   else
18365     {
18366       not_to_clear_mask = 0;
18367       /* We are not dealing with structs nor unions.  So these arguments may be
18368 	 passed in floating point registers too.  In some cases a BLKmode is
18369 	 used when returning or passing arguments in multiple VFP registers.  */
18370       if (GET_MODE (arg_rtx) == BLKmode)
18371 	{
18372 	  int i, arg_regs;
18373 	  rtx reg;
18374 
18375 	  /* This should really only occur when dealing with the hard-float
18376 	     ABI.  */
18377 	  gcc_assert (TARGET_HARD_FLOAT_ABI);
18378 
18379 	  for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18380 	    {
18381 	      reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18382 	      gcc_assert (REG_P (reg));
18383 
18384 	      not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18385 
18386 	      /* If we are dealing with DF mode, make sure we don't
18387 		 clear either of the registers it addresses.  */
18388 	      arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18389 	      if (arg_regs > 1)
18390 		{
18391 		  unsigned HOST_WIDE_INT mask;
18392 		  mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18393 		  mask -= HOST_WIDE_INT_1U << REGNO (reg);
18394 		  not_to_clear_mask |= mask;
18395 		}
18396 	    }
18397 	}
18398       else
18399 	{
18400 	  /* Otherwise we can rely on the MODE to determine how many registers
18401 	     are being used by this argument.  */
18402 	  int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18403 	  not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18404 	  if (arg_regs > 1)
18405 	    {
18406 	      unsigned HOST_WIDE_INT
18407 	      mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18408 	      mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18409 	      not_to_clear_mask |= mask;
18410 	    }
18411 	}
18412     }
18413 
18414   return not_to_clear_mask;
18415 }
18416 
18417 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18418    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
18419    are to be fully cleared, using the value in register CLEARING_REG if more
18420    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18421    the bits that needs to be cleared in caller-saved core registers, with
18422    SCRATCH_REG used as a scratch register for that clearing.
18423 
18424    NOTE: one of three following assertions must hold:
18425    - SCRATCH_REG is a low register
18426    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18427      in TO_CLEAR_BITMAP)
18428    - CLEARING_REG is a low register.  */
18429 
18430 static void
cmse_clear_registers(sbitmap to_clear_bitmap,uint32_t * padding_bits_to_clear,int padding_bits_len,rtx scratch_reg,rtx clearing_reg)18431 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18432 		      int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18433 {
18434   bool saved_clearing = false;
18435   rtx saved_clearing_reg = NULL_RTX;
18436   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18437 
18438   gcc_assert (arm_arch_cmse);
18439 
18440   if (!bitmap_empty_p (to_clear_bitmap))
18441     {
18442       minregno = bitmap_first_set_bit (to_clear_bitmap);
18443       maxregno = bitmap_last_set_bit (to_clear_bitmap);
18444     }
18445   clearing_regno = REGNO (clearing_reg);
18446 
18447   /* Clear padding bits.  */
18448   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18449   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18450     {
18451       uint64_t mask;
18452       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18453 
18454       if (padding_bits_to_clear[i] == 0)
18455 	continue;
18456 
18457       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18458 	 CLEARING_REG as scratch.  */
18459       if (TARGET_THUMB1
18460 	  && REGNO (scratch_reg) > LAST_LO_REGNUM)
18461 	{
18462 	  /* clearing_reg is not to be cleared, copy its value into scratch_reg
18463 	     such that we can use clearing_reg to clear the unused bits in the
18464 	     arguments.  */
18465 	  if ((clearing_regno > maxregno
18466 	       || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18467 	      && !saved_clearing)
18468 	    {
18469 	      gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18470 	      emit_move_insn (scratch_reg, clearing_reg);
18471 	      saved_clearing = true;
18472 	      saved_clearing_reg = scratch_reg;
18473 	    }
18474 	  scratch_reg = clearing_reg;
18475 	}
18476 
18477       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
18478       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18479       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18480 
18481       /* Fill the top half of the negated padding_bits_to_clear[i].  */
18482       mask = (~padding_bits_to_clear[i]) >> 16;
18483       rtx16 = gen_int_mode (16, SImode);
18484       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18485       if (mask)
18486 	emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18487 
18488       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18489     }
18490   if (saved_clearing)
18491     emit_move_insn (clearing_reg, saved_clearing_reg);
18492 
18493 
18494   /* Clear full registers.  */
18495 
18496   if (TARGET_HAVE_FPCXT_CMSE)
18497     {
18498       rtvec vunspec_vec;
18499       int i, j, k, nb_regs;
18500       rtx use_seq, par, reg, set, vunspec;
18501       int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18502       auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18503       auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18504 
18505       for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18506 	{
18507 	  /* Find next register to clear and exit if none.  */
18508 	  for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18509 	  if (i > maxregno)
18510 	    break;
18511 
18512 	  /* Compute number of consecutive registers to clear.  */
18513 	  for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18514 	       j++);
18515 	  nb_regs = j - i;
18516 
18517 	  /* Create VSCCLRM RTX pattern.  */
18518 	  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18519 	  vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18520 	  vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18521 					     VUNSPEC_VSCCLRM_VPR);
18522 	  XVECEXP (par, 0, 0) = vunspec;
18523 
18524 	  /* Insert VFP register clearing RTX in the pattern.  */
18525 	  start_sequence ();
18526 	  for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18527 	    {
18528 	      if (!bitmap_bit_p (to_clear_bitmap, j))
18529 		continue;
18530 
18531 	      reg = gen_rtx_REG (SFmode, j);
18532 	      set = gen_rtx_SET (reg, const0_rtx);
18533 	      XVECEXP (par, 0, k++) = set;
18534 	      emit_use (reg);
18535 	    }
18536 	  use_seq = get_insns ();
18537 	  end_sequence ();
18538 
18539 	  emit_insn_after (use_seq, emit_insn (par));
18540 	}
18541 
18542       /* Get set of core registers to clear.  */
18543       bitmap_clear (core_regs_bitmap);
18544       bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18545 			IP_REGNUM - R0_REGNUM + 1);
18546       bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18547 		  core_regs_bitmap);
18548       gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18549 
18550       if (bitmap_empty_p (to_clear_core_bitmap))
18551 	return;
18552 
18553       /* Create clrm RTX pattern.  */
18554       nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18555       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18556 
18557       /* Insert core register clearing RTX in the pattern.  */
18558       start_sequence ();
18559       for (j = 0, i = minregno; j < nb_regs; i++)
18560 	{
18561 	  if (!bitmap_bit_p (to_clear_core_bitmap, i))
18562 	    continue;
18563 
18564 	  reg = gen_rtx_REG (SImode, i);
18565 	  set = gen_rtx_SET (reg, const0_rtx);
18566 	  XVECEXP (par, 0, j++) = set;
18567 	  emit_use (reg);
18568 	}
18569 
18570       /* Insert APSR register clearing RTX in the pattern
18571        * along with clobbering CC.  */
18572       vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18573       vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18574 					 VUNSPEC_CLRM_APSR);
18575 
18576       XVECEXP (par, 0, j++) = vunspec;
18577 
18578       rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18579       rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18580       XVECEXP (par, 0, j) = clobber;
18581 
18582       use_seq = get_insns ();
18583       end_sequence ();
18584 
18585       emit_insn_after (use_seq, emit_insn (par));
18586     }
18587   else
18588     {
18589       /* If not marked for clearing, clearing_reg already does not contain
18590 	 any secret.  */
18591       if (clearing_regno <= maxregno
18592 	  && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18593 	{
18594 	  emit_move_insn (clearing_reg, const0_rtx);
18595 	  emit_use (clearing_reg);
18596 	  bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18597 	}
18598 
18599       for (regno = minregno; regno <= maxregno; regno++)
18600 	{
18601 	  if (!bitmap_bit_p (to_clear_bitmap, regno))
18602 	    continue;
18603 
18604 	  if (IS_VFP_REGNUM (regno))
18605 	    {
18606 	      /* If regno is an even vfp register and its successor is also to
18607 		 be cleared, use vmov.  */
18608 	      if (TARGET_VFP_DOUBLE
18609 		  && VFP_REGNO_OK_FOR_DOUBLE (regno)
18610 		  && bitmap_bit_p (to_clear_bitmap, regno + 1))
18611 		{
18612 		  emit_move_insn (gen_rtx_REG (DFmode, regno),
18613 				  CONST1_RTX (DFmode));
18614 		  emit_use (gen_rtx_REG (DFmode, regno));
18615 		  regno++;
18616 		}
18617 	      else
18618 		{
18619 		  emit_move_insn (gen_rtx_REG (SFmode, regno),
18620 				  CONST1_RTX (SFmode));
18621 		  emit_use (gen_rtx_REG (SFmode, regno));
18622 		}
18623 	    }
18624 	  else
18625 	    {
18626 	      emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18627 	      emit_use (gen_rtx_REG (SImode, regno));
18628 	    }
18629 	}
18630     }
18631 }
18632 
18633 /* Clear core and caller-saved VFP registers not used to pass arguments before
18634    a cmse_nonsecure_call.  Saving, clearing and restoring of VFP callee-saved
18635    registers is done in the __gnu_cmse_nonsecure_call libcall.  See
18636    libgcc/config/arm/cmse_nonsecure_call.S.  */
18637 
18638 static void
cmse_nonsecure_call_inline_register_clear(void)18639 cmse_nonsecure_call_inline_register_clear (void)
18640 {
18641   basic_block bb;
18642 
18643   FOR_EACH_BB_FN (bb, cfun)
18644     {
18645       rtx_insn *insn;
18646 
18647       FOR_BB_INSNS (bb, insn)
18648 	{
18649 	  bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18650 	  /* frame = VFP regs + FPSCR + VPR.  */
18651 	  unsigned lazy_store_stack_frame_size
18652 	    = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18653 	  unsigned long callee_saved_mask
18654 	    = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18655 	    & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18656 	  unsigned address_regnum, regno;
18657 	  unsigned max_int_regno
18658 	    = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18659 	  unsigned max_fp_regno
18660 	    = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18661 	  unsigned maxregno
18662 	    = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18663 	  auto_sbitmap to_clear_bitmap (maxregno + 1);
18664 	  rtx_insn *seq;
18665 	  rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18666 	  rtx address;
18667 	  CUMULATIVE_ARGS args_so_far_v;
18668 	  cumulative_args_t args_so_far;
18669 	  tree arg_type, fntype;
18670 	  bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18671 	  function_args_iterator args_iter;
18672 	  uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18673 
18674 	  if (!NONDEBUG_INSN_P (insn))
18675 	    continue;
18676 
18677 	  if (!CALL_P (insn))
18678 	    continue;
18679 
18680 	  pat = PATTERN (insn);
18681 	  gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18682 	  call = XVECEXP (pat, 0, 0);
18683 
18684 	  /* Get the real call RTX if the insn sets a value, ie. returns.  */
18685 	  if (GET_CODE (call) == SET)
18686 	      call = SET_SRC (call);
18687 
18688 	  /* Check if it is a cmse_nonsecure_call.  */
18689 	  unspec = XEXP (call, 0);
18690 	  if (GET_CODE (unspec) != UNSPEC
18691 	      || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18692 	    continue;
18693 
18694 	  /* Mark registers that needs to be cleared.  Those that holds a
18695 	     parameter are removed from the set further below.  */
18696 	  bitmap_clear (to_clear_bitmap);
18697 	  bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18698 			    max_int_regno - R0_REGNUM + 1);
18699 
18700 	  /* Only look at the caller-saved floating point registers in case of
18701 	     -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
18702 	     lazy store and loads which clear both caller- and callee-saved
18703 	     registers.  */
18704 	  if (!lazy_fpclear)
18705 	    {
18706 	      auto_sbitmap float_bitmap (maxregno + 1);
18707 
18708 	      bitmap_clear (float_bitmap);
18709 	      bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18710 				max_fp_regno - FIRST_VFP_REGNUM + 1);
18711 	      bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18712 	    }
18713 
18714 	  /* Make sure the register used to hold the function address is not
18715 	     cleared.  */
18716 	  address = RTVEC_ELT (XVEC (unspec, 0), 0);
18717 	  gcc_assert (MEM_P (address));
18718 	  gcc_assert (REG_P (XEXP (address, 0)));
18719 	  address_regnum = REGNO (XEXP (address, 0));
18720 	  if (address_regnum <= max_int_regno)
18721 	    bitmap_clear_bit (to_clear_bitmap, address_regnum);
18722 
18723 	  /* Set basic block of call insn so that df rescan is performed on
18724 	     insns inserted here.  */
18725 	  set_block_for_insn (insn, bb);
18726 	  df_set_flags (DF_DEFER_INSN_RESCAN);
18727 	  start_sequence ();
18728 
18729 	  /* Make sure the scheduler doesn't schedule other insns beyond
18730 	     here.  */
18731 	  emit_insn (gen_blockage ());
18732 
18733 	  /* Walk through all arguments and clear registers appropriately.
18734 	  */
18735 	  fntype = TREE_TYPE (MEM_EXPR (address));
18736 	  arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
18737 				    NULL_TREE);
18738 	  args_so_far = pack_cumulative_args (&args_so_far_v);
18739 	  FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
18740 	    {
18741 	      rtx arg_rtx;
18742 	      uint64_t to_clear_args_mask;
18743 
18744 	      if (VOID_TYPE_P (arg_type))
18745 		continue;
18746 
18747 	      function_arg_info arg (arg_type, /*named=*/true);
18748 	      if (!first_param)
18749 		/* ??? We should advance after processing the argument and pass
18750 		   the argument we're advancing past.  */
18751 		arm_function_arg_advance (args_so_far, arg);
18752 
18753 	      arg_rtx = arm_function_arg (args_so_far, arg);
18754 	      gcc_assert (REG_P (arg_rtx));
18755 	      to_clear_args_mask
18756 		= compute_not_to_clear_mask (arg_type, arg_rtx,
18757 					     REGNO (arg_rtx),
18758 					     &padding_bits_to_clear[0]);
18759 	      if (to_clear_args_mask)
18760 		{
18761 		  for (regno = R0_REGNUM; regno <= maxregno; regno++)
18762 		    {
18763 		      if (to_clear_args_mask & (1ULL << regno))
18764 			bitmap_clear_bit (to_clear_bitmap, regno);
18765 		    }
18766 		}
18767 
18768 	      first_param = false;
18769 	    }
18770 
18771 	  /* We use right shift and left shift to clear the LSB of the address
18772 	     we jump to instead of using bic, to avoid having to use an extra
18773 	     register on Thumb-1.  */
18774 	  clearing_reg = XEXP (address, 0);
18775 	  shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
18776 	  emit_insn (gen_rtx_SET (clearing_reg, shift));
18777 	  shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
18778 	  emit_insn (gen_rtx_SET (clearing_reg, shift));
18779 
18780 	  if (clear_callee_saved)
18781 	    {
18782 	      rtx push_insn =
18783 		emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
18784 	      /* Disable frame debug info in push because it needs to be
18785 		 disabled for pop (see below).  */
18786 	      RTX_FRAME_RELATED_P (push_insn) = 0;
18787 
18788 	      /* Lazy store multiple.  */
18789 	      if (lazy_fpclear)
18790 		{
18791 		  rtx imm;
18792 		  rtx_insn *add_insn;
18793 
18794 		  imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
18795 		  add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
18796 						    stack_pointer_rtx, imm));
18797 		  /* If we have the frame pointer, then it will be the
18798 		     CFA reg.  Otherwise, the stack pointer is the CFA
18799 		     reg, so we need to emit a CFA adjust.  */
18800 		  if (!frame_pointer_needed)
18801 		    arm_add_cfa_adjust_cfa_note (add_insn,
18802 						 - lazy_store_stack_frame_size,
18803 						 stack_pointer_rtx,
18804 						 stack_pointer_rtx);
18805 		  emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
18806 		}
18807 	      /* Save VFP callee-saved registers.  */
18808 	      else
18809 		{
18810 		  vfp_emit_fstmd (D7_VFP_REGNUM + 1,
18811 				  (max_fp_regno - D7_VFP_REGNUM) / 2);
18812 		  /* Disable frame debug info in push because it needs to be
18813 		     disabled for vpop (see below).  */
18814 		  RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
18815 		}
18816 	    }
18817 
18818 	  /* Clear caller-saved registers that leak before doing a non-secure
18819 	     call.  */
18820 	  ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
18821 	  cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
18822 				NUM_ARG_REGS, ip_reg, clearing_reg);
18823 
18824 	  seq = get_insns ();
18825 	  end_sequence ();
18826 	  emit_insn_before (seq, insn);
18827 
18828 	  if (TARGET_HAVE_FPCXT_CMSE)
18829 	    {
18830 	      rtx_insn *last, *pop_insn, *after = insn;
18831 
18832 	      start_sequence ();
18833 
18834 	      /* Lazy load multiple done as part of libcall in Armv8-M.  */
18835 	      if (lazy_fpclear)
18836 		{
18837 		  rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
18838 		  emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
18839 		  rtx_insn *add_insn =
18840 		    emit_insn (gen_addsi3 (stack_pointer_rtx,
18841 					   stack_pointer_rtx, imm));
18842 		  if (!frame_pointer_needed)
18843 		    arm_add_cfa_adjust_cfa_note (add_insn,
18844 						 lazy_store_stack_frame_size,
18845 						 stack_pointer_rtx,
18846 						 stack_pointer_rtx);
18847 		}
18848 	      /* Restore VFP callee-saved registers.  */
18849 	      else
18850 		{
18851 		  int nb_callee_saved_vfp_regs =
18852 		    (max_fp_regno - D7_VFP_REGNUM) / 2;
18853 		  arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
18854 					      nb_callee_saved_vfp_regs,
18855 					      stack_pointer_rtx);
18856 		  /* Disable frame debug info in vpop because the SP adjustment
18857 		     is made using a CFA adjustment note while CFA used is
18858 		     sometimes R7.  This then causes an assert failure in the
18859 		     CFI note creation code.  */
18860 		  RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
18861 		}
18862 
18863 	      arm_emit_multi_reg_pop (callee_saved_mask);
18864 	      pop_insn = get_last_insn ();
18865 
18866 	      /* Disable frame debug info in pop because they reset the state
18867 		 of popped registers to what it was at the beginning of the
18868 		 function, before the prologue.  This leads to incorrect state
18869 		 when doing the pop after the nonsecure call for registers that
18870 		 are pushed both in prologue and before the nonsecure call.
18871 
18872 		 It also occasionally triggers an assert failure in CFI note
18873 		 creation code when there are two codepaths to the epilogue,
18874 		 one of which does not go through the nonsecure call.
18875 		 Obviously this mean that debugging between the push and pop is
18876 		 not reliable.  */
18877 	      RTX_FRAME_RELATED_P (pop_insn) = 0;
18878 
18879 	      seq = get_insns ();
18880 	      last = get_last_insn ();
18881 	      end_sequence ();
18882 
18883 	      emit_insn_after (seq, after);
18884 
18885 	      /* Skip pop we have just inserted after nonsecure call, we know
18886 		 it does not contain a nonsecure call.  */
18887 	      insn = last;
18888 	    }
18889 	}
18890     }
18891 }
18892 
18893 /* Rewrite move insn into subtract of 0 if the condition codes will
18894    be useful in next conditional jump insn.  */
18895 
18896 static void
thumb1_reorg(void)18897 thumb1_reorg (void)
18898 {
18899   basic_block bb;
18900 
18901   FOR_EACH_BB_FN (bb, cfun)
18902     {
18903       rtx dest, src;
18904       rtx cmp, op0, op1, set = NULL;
18905       rtx_insn *prev, *insn = BB_END (bb);
18906       bool insn_clobbered = false;
18907 
18908       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
18909 	insn = PREV_INSN (insn);
18910 
18911       /* Find the last cbranchsi4_insn in basic block BB.  */
18912       if (insn == BB_HEAD (bb)
18913 	  || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
18914 	continue;
18915 
18916       /* Get the register with which we are comparing.  */
18917       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
18918       op0 = XEXP (cmp, 0);
18919       op1 = XEXP (cmp, 1);
18920 
18921       /* Check that comparison is against ZERO.  */
18922       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
18923 	continue;
18924 
18925       /* Find the first flag setting insn before INSN in basic block BB.  */
18926       gcc_assert (insn != BB_HEAD (bb));
18927       for (prev = PREV_INSN (insn);
18928 	   (!insn_clobbered
18929 	    && prev != BB_HEAD (bb)
18930 	    && (NOTE_P (prev)
18931 		|| DEBUG_INSN_P (prev)
18932 		|| ((set = single_set (prev)) != NULL
18933 		    && get_attr_conds (prev) == CONDS_NOCOND)));
18934 	   prev = PREV_INSN (prev))
18935 	{
18936 	  if (reg_set_p (op0, prev))
18937 	    insn_clobbered = true;
18938 	}
18939 
18940       /* Skip if op0 is clobbered by insn other than prev. */
18941       if (insn_clobbered)
18942 	continue;
18943 
18944       if (!set)
18945 	continue;
18946 
18947       dest = SET_DEST (set);
18948       src = SET_SRC (set);
18949       if (!low_register_operand (dest, SImode)
18950 	  || !low_register_operand (src, SImode))
18951 	continue;
18952 
18953       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
18954 	 in INSN.  Both src and dest of the move insn are checked.  */
18955       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
18956 	{
18957 	  dest = copy_rtx (dest);
18958 	  src = copy_rtx (src);
18959 	  src = gen_rtx_MINUS (SImode, src, const0_rtx);
18960 	  PATTERN (prev) = gen_rtx_SET (dest, src);
18961 	  INSN_CODE (prev) = -1;
18962 	  /* Set test register in INSN to dest.  */
18963 	  XEXP (cmp, 0) = copy_rtx (dest);
18964 	  INSN_CODE (insn) = -1;
18965 	}
18966     }
18967 }
18968 
18969 /* Convert instructions to their cc-clobbering variant if possible, since
18970    that allows us to use smaller encodings.  */
18971 
18972 static void
thumb2_reorg(void)18973 thumb2_reorg (void)
18974 {
18975   basic_block bb;
18976   regset_head live;
18977 
18978   INIT_REG_SET (&live);
18979 
18980   /* We are freeing block_for_insn in the toplev to keep compatibility
18981      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
18982   compute_bb_for_insn ();
18983   df_analyze ();
18984 
18985   enum Convert_Action {SKIP, CONV, SWAP_CONV};
18986 
18987   FOR_EACH_BB_FN (bb, cfun)
18988     {
18989       if ((current_tune->disparage_flag_setting_t16_encodings
18990 	   == tune_params::DISPARAGE_FLAGS_ALL)
18991 	  && optimize_bb_for_speed_p (bb))
18992 	continue;
18993 
18994       rtx_insn *insn;
18995       Convert_Action action = SKIP;
18996       Convert_Action action_for_partial_flag_setting
18997 	= ((current_tune->disparage_flag_setting_t16_encodings
18998 	    != tune_params::DISPARAGE_FLAGS_NEITHER)
18999 	   && optimize_bb_for_speed_p (bb))
19000 	  ? SKIP : CONV;
19001 
19002       COPY_REG_SET (&live, DF_LR_OUT (bb));
19003       df_simulate_initialize_backwards (bb, &live);
19004       FOR_BB_INSNS_REVERSE (bb, insn)
19005 	{
19006 	  if (NONJUMP_INSN_P (insn)
19007 	      && !REGNO_REG_SET_P (&live, CC_REGNUM)
19008 	      && GET_CODE (PATTERN (insn)) == SET)
19009 	    {
19010 	      action = SKIP;
19011 	      rtx pat = PATTERN (insn);
19012 	      rtx dst = XEXP (pat, 0);
19013 	      rtx src = XEXP (pat, 1);
19014 	      rtx op0 = NULL_RTX, op1 = NULL_RTX;
19015 
19016 	      if (UNARY_P (src) || BINARY_P (src))
19017 		  op0 = XEXP (src, 0);
19018 
19019 	      if (BINARY_P (src))
19020 		  op1 = XEXP (src, 1);
19021 
19022 	      if (low_register_operand (dst, SImode))
19023 		{
19024 		  switch (GET_CODE (src))
19025 		    {
19026 		    case PLUS:
19027 		      /* Adding two registers and storing the result
19028 			 in the first source is already a 16-bit
19029 			 operation.  */
19030 		      if (rtx_equal_p (dst, op0)
19031 			  && register_operand (op1, SImode))
19032 			break;
19033 
19034 		      if (low_register_operand (op0, SImode))
19035 			{
19036 			  /* ADDS <Rd>,<Rn>,<Rm>  */
19037 			  if (low_register_operand (op1, SImode))
19038 			    action = CONV;
19039 			  /* ADDS <Rdn>,#<imm8>  */
19040 			  /* SUBS <Rdn>,#<imm8>  */
19041 			  else if (rtx_equal_p (dst, op0)
19042 				   && CONST_INT_P (op1)
19043 				   && IN_RANGE (INTVAL (op1), -255, 255))
19044 			    action = CONV;
19045 			  /* ADDS <Rd>,<Rn>,#<imm3>  */
19046 			  /* SUBS <Rd>,<Rn>,#<imm3>  */
19047 			  else if (CONST_INT_P (op1)
19048 				   && IN_RANGE (INTVAL (op1), -7, 7))
19049 			    action = CONV;
19050 			}
19051 		      /* ADCS <Rd>, <Rn>  */
19052 		      else if (GET_CODE (XEXP (src, 0)) == PLUS
19053 			      && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19054 			      && low_register_operand (XEXP (XEXP (src, 0), 1),
19055 						       SImode)
19056 			      && COMPARISON_P (op1)
19057 			      && cc_register (XEXP (op1, 0), VOIDmode)
19058 			      && maybe_get_arm_condition_code (op1) == ARM_CS
19059 			      && XEXP (op1, 1) == const0_rtx)
19060 		        action = CONV;
19061 		      break;
19062 
19063 		    case MINUS:
19064 		      /* RSBS <Rd>,<Rn>,#0
19065 			 Not handled here: see NEG below.  */
19066 		      /* SUBS <Rd>,<Rn>,#<imm3>
19067 			 SUBS <Rdn>,#<imm8>
19068 			 Not handled here: see PLUS above.  */
19069 		      /* SUBS <Rd>,<Rn>,<Rm>  */
19070 		      if (low_register_operand (op0, SImode)
19071 			  && low_register_operand (op1, SImode))
19072 			    action = CONV;
19073 		      break;
19074 
19075 		    case MULT:
19076 		      /* MULS <Rdm>,<Rn>,<Rdm>
19077 			 As an exception to the rule, this is only used
19078 			 when optimizing for size since MULS is slow on all
19079 			 known implementations.  We do not even want to use
19080 			 MULS in cold code, if optimizing for speed, so we
19081 			 test the global flag here.  */
19082 		      if (!optimize_size)
19083 			break;
19084 		      /* Fall through.  */
19085 		    case AND:
19086 		    case IOR:
19087 		    case XOR:
19088 		      /* ANDS <Rdn>,<Rm>  */
19089 		      if (rtx_equal_p (dst, op0)
19090 			  && low_register_operand (op1, SImode))
19091 			action = action_for_partial_flag_setting;
19092 		      else if (rtx_equal_p (dst, op1)
19093 			       && low_register_operand (op0, SImode))
19094 			action = action_for_partial_flag_setting == SKIP
19095 				 ? SKIP : SWAP_CONV;
19096 		      break;
19097 
19098 		    case ASHIFTRT:
19099 		    case ASHIFT:
19100 		    case LSHIFTRT:
19101 		      /* ASRS <Rdn>,<Rm> */
19102 		      /* LSRS <Rdn>,<Rm> */
19103 		      /* LSLS <Rdn>,<Rm> */
19104 		      if (rtx_equal_p (dst, op0)
19105 			  && low_register_operand (op1, SImode))
19106 			action = action_for_partial_flag_setting;
19107 		      /* ASRS <Rd>,<Rm>,#<imm5> */
19108 		      /* LSRS <Rd>,<Rm>,#<imm5> */
19109 		      /* LSLS <Rd>,<Rm>,#<imm5> */
19110 		      else if (low_register_operand (op0, SImode)
19111 			       && CONST_INT_P (op1)
19112 			       && IN_RANGE (INTVAL (op1), 0, 31))
19113 			action = action_for_partial_flag_setting;
19114 		      break;
19115 
19116 		    case ROTATERT:
19117 		      /* RORS <Rdn>,<Rm>  */
19118 		      if (rtx_equal_p (dst, op0)
19119 			  && low_register_operand (op1, SImode))
19120 			action = action_for_partial_flag_setting;
19121 		      break;
19122 
19123 		    case NOT:
19124 		      /* MVNS <Rd>,<Rm>  */
19125 		      if (low_register_operand (op0, SImode))
19126 			action = action_for_partial_flag_setting;
19127 		      break;
19128 
19129 		    case NEG:
19130 		      /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
19131 		      if (low_register_operand (op0, SImode))
19132 			action = CONV;
19133 		      break;
19134 
19135 		    case CONST_INT:
19136 		      /* MOVS <Rd>,#<imm8>  */
19137 		      if (CONST_INT_P (src)
19138 			  && IN_RANGE (INTVAL (src), 0, 255))
19139 			action = action_for_partial_flag_setting;
19140 		      break;
19141 
19142 		    case REG:
19143 		      /* MOVS and MOV<c> with registers have different
19144 			 encodings, so are not relevant here.  */
19145 		      break;
19146 
19147 		    default:
19148 		      break;
19149 		    }
19150 		}
19151 
19152 	      if (action != SKIP)
19153 		{
19154 		  rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19155 		  rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19156 		  rtvec vec;
19157 
19158 		  if (action == SWAP_CONV)
19159 		    {
19160 		      src = copy_rtx (src);
19161 		      XEXP (src, 0) = op1;
19162 		      XEXP (src, 1) = op0;
19163 		      pat = gen_rtx_SET (dst, src);
19164 		      vec = gen_rtvec (2, pat, clobber);
19165 		    }
19166 		  else /* action == CONV */
19167 		    vec = gen_rtvec (2, pat, clobber);
19168 
19169 		  PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19170 		  INSN_CODE (insn) = -1;
19171 		}
19172 	    }
19173 
19174 	  if (NONDEBUG_INSN_P (insn))
19175 	    df_simulate_one_insn_backwards (bb, insn, &live);
19176 	}
19177     }
19178 
19179   CLEAR_REG_SET (&live);
19180 }
19181 
19182 /* Gcc puts the pool in the wrong place for ARM, since we can only
19183    load addresses a limited distance around the pc.  We do some
19184    special munging to move the constant pool values to the correct
19185    point in the code.  */
19186 static void
arm_reorg(void)19187 arm_reorg (void)
19188 {
19189   rtx_insn *insn;
19190   HOST_WIDE_INT address = 0;
19191   Mfix * fix;
19192 
19193   if (use_cmse)
19194     cmse_nonsecure_call_inline_register_clear ();
19195 
19196   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
19197   if (cfun->is_thunk)
19198     ;
19199   else if (TARGET_THUMB1)
19200     thumb1_reorg ();
19201   else if (TARGET_THUMB2)
19202     thumb2_reorg ();
19203 
19204   /* Ensure all insns that must be split have been split at this point.
19205      Otherwise, the pool placement code below may compute incorrect
19206      insn lengths.  Note that when optimizing, all insns have already
19207      been split at this point.  */
19208   if (!optimize)
19209     split_all_insns_noflow ();
19210 
19211   /* Make sure we do not attempt to create a literal pool even though it should
19212      no longer be necessary to create any.  */
19213   if (arm_disable_literal_pool)
19214     return ;
19215 
19216   minipool_fix_head = minipool_fix_tail = NULL;
19217 
19218   /* The first insn must always be a note, or the code below won't
19219      scan it properly.  */
19220   insn = get_insns ();
19221   gcc_assert (NOTE_P (insn));
19222   minipool_pad = 0;
19223 
19224   /* Scan all the insns and record the operands that will need fixing.  */
19225   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19226     {
19227       if (BARRIER_P (insn))
19228 	push_minipool_barrier (insn, address);
19229       else if (INSN_P (insn))
19230 	{
19231 	  rtx_jump_table_data *table;
19232 
19233 	  note_invalid_constants (insn, address, true);
19234 	  address += get_attr_length (insn);
19235 
19236 	  /* If the insn is a vector jump, add the size of the table
19237 	     and skip the table.  */
19238 	  if (tablejump_p (insn, NULL, &table))
19239 	    {
19240 	      address += get_jump_table_size (table);
19241 	      insn = table;
19242 	    }
19243 	}
19244       else if (LABEL_P (insn))
19245 	/* Add the worst-case padding due to alignment.  We don't add
19246 	   the _current_ padding because the minipool insertions
19247 	   themselves might change it.  */
19248 	address += get_label_padding (insn);
19249     }
19250 
19251   fix = minipool_fix_head;
19252 
19253   /* Now scan the fixups and perform the required changes.  */
19254   while (fix)
19255     {
19256       Mfix * ftmp;
19257       Mfix * fdel;
19258       Mfix *  last_added_fix;
19259       Mfix * last_barrier = NULL;
19260       Mfix * this_fix;
19261 
19262       /* Skip any further barriers before the next fix.  */
19263       while (fix && BARRIER_P (fix->insn))
19264 	fix = fix->next;
19265 
19266       /* No more fixes.  */
19267       if (fix == NULL)
19268 	break;
19269 
19270       last_added_fix = NULL;
19271 
19272       for (ftmp = fix; ftmp; ftmp = ftmp->next)
19273 	{
19274 	  if (BARRIER_P (ftmp->insn))
19275 	    {
19276 	      if (ftmp->address >= minipool_vector_head->max_address)
19277 		break;
19278 
19279 	      last_barrier = ftmp;
19280 	    }
19281 	  else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19282 	    break;
19283 
19284 	  last_added_fix = ftmp;  /* Keep track of the last fix added.  */
19285 	}
19286 
19287       /* If we found a barrier, drop back to that; any fixes that we
19288 	 could have reached but come after the barrier will now go in
19289 	 the next mini-pool.  */
19290       if (last_barrier != NULL)
19291 	{
19292 	  /* Reduce the refcount for those fixes that won't go into this
19293 	     pool after all.  */
19294 	  for (fdel = last_barrier->next;
19295 	       fdel && fdel != ftmp;
19296 	       fdel = fdel->next)
19297 	    {
19298 	      fdel->minipool->refcount--;
19299 	      fdel->minipool = NULL;
19300 	    }
19301 
19302 	  ftmp = last_barrier;
19303 	}
19304       else
19305         {
19306 	  /* ftmp is first fix that we can't fit into this pool and
19307 	     there no natural barriers that we could use.  Insert a
19308 	     new barrier in the code somewhere between the previous
19309 	     fix and this one, and arrange to jump around it.  */
19310 	  HOST_WIDE_INT max_address;
19311 
19312 	  /* The last item on the list of fixes must be a barrier, so
19313 	     we can never run off the end of the list of fixes without
19314 	     last_barrier being set.  */
19315 	  gcc_assert (ftmp);
19316 
19317 	  max_address = minipool_vector_head->max_address;
19318 	  /* Check that there isn't another fix that is in range that
19319 	     we couldn't fit into this pool because the pool was
19320 	     already too large: we need to put the pool before such an
19321 	     instruction.  The pool itself may come just after the
19322 	     fix because create_fix_barrier also allows space for a
19323 	     jump instruction.  */
19324 	  if (ftmp->address < max_address)
19325 	    max_address = ftmp->address + 1;
19326 
19327 	  last_barrier = create_fix_barrier (last_added_fix, max_address);
19328 	}
19329 
19330       assign_minipool_offsets (last_barrier);
19331 
19332       while (ftmp)
19333 	{
19334 	  if (!BARRIER_P (ftmp->insn)
19335 	      && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19336 		  == NULL))
19337 	    break;
19338 
19339 	  ftmp = ftmp->next;
19340 	}
19341 
19342       /* Scan over the fixes we have identified for this pool, fixing them
19343 	 up and adding the constants to the pool itself.  */
19344       for (this_fix = fix; this_fix && ftmp != this_fix;
19345 	   this_fix = this_fix->next)
19346 	if (!BARRIER_P (this_fix->insn))
19347 	  {
19348 	    rtx addr
19349 	      = plus_constant (Pmode,
19350 			       gen_rtx_LABEL_REF (VOIDmode,
19351 						  minipool_vector_label),
19352 			       this_fix->minipool->offset);
19353 	    *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19354 	  }
19355 
19356       dump_minipool (last_barrier->insn);
19357       fix = ftmp;
19358     }
19359 
19360   /* From now on we must synthesize any constants that we can't handle
19361      directly.  This can happen if the RTL gets split during final
19362      instruction generation.  */
19363   cfun->machine->after_arm_reorg = 1;
19364 
19365   /* Free the minipool memory.  */
19366   obstack_free (&minipool_obstack, minipool_startobj);
19367 }
19368 
19369 /* Routines to output assembly language.  */
19370 
19371 /* Return string representation of passed in real value.  */
19372 static const char *
fp_const_from_val(REAL_VALUE_TYPE * r)19373 fp_const_from_val (REAL_VALUE_TYPE *r)
19374 {
19375   if (!fp_consts_inited)
19376     init_fp_table ();
19377 
19378   gcc_assert (real_equal (r, &value_fp0));
19379   return "0";
19380 }
19381 
19382 /* OPERANDS[0] is the entire list of insns that constitute pop,
19383    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19384    is in the list, UPDATE is true iff the list contains explicit
19385    update of base register.  */
19386 void
arm_output_multireg_pop(rtx * operands,bool return_pc,rtx cond,bool reverse,bool update)19387 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19388                          bool update)
19389 {
19390   int i;
19391   char pattern[100];
19392   int offset;
19393   const char *conditional;
19394   int num_saves = XVECLEN (operands[0], 0);
19395   unsigned int regno;
19396   unsigned int regno_base = REGNO (operands[1]);
19397   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19398 
19399   offset = 0;
19400   offset += update ? 1 : 0;
19401   offset += return_pc ? 1 : 0;
19402 
19403   /* Is the base register in the list?  */
19404   for (i = offset; i < num_saves; i++)
19405     {
19406       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19407       /* If SP is in the list, then the base register must be SP.  */
19408       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19409       /* If base register is in the list, there must be no explicit update.  */
19410       if (regno == regno_base)
19411         gcc_assert (!update);
19412     }
19413 
19414   conditional = reverse ? "%?%D0" : "%?%d0";
19415   /* Can't use POP if returning from an interrupt.  */
19416   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19417     sprintf (pattern, "pop%s\t{", conditional);
19418   else
19419     {
19420       /* Output ldmfd when the base register is SP, otherwise output ldmia.
19421          It's just a convention, their semantics are identical.  */
19422       if (regno_base == SP_REGNUM)
19423 	sprintf (pattern, "ldmfd%s\t", conditional);
19424       else if (update)
19425 	sprintf (pattern, "ldmia%s\t", conditional);
19426       else
19427 	sprintf (pattern, "ldm%s\t", conditional);
19428 
19429       strcat (pattern, reg_names[regno_base]);
19430       if (update)
19431         strcat (pattern, "!, {");
19432       else
19433         strcat (pattern, ", {");
19434     }
19435 
19436   /* Output the first destination register.  */
19437   strcat (pattern,
19438           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19439 
19440   /* Output the rest of the destination registers.  */
19441   for (i = offset + 1; i < num_saves; i++)
19442     {
19443       strcat (pattern, ", ");
19444       strcat (pattern,
19445               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19446     }
19447 
19448   strcat (pattern, "}");
19449 
19450   if (interrupt_p && return_pc)
19451     strcat (pattern, "^");
19452 
19453   output_asm_insn (pattern, &cond);
19454 }
19455 
19456 
19457 /* Output the assembly for a store multiple.  */
19458 
19459 const char *
vfp_output_vstmd(rtx * operands)19460 vfp_output_vstmd (rtx * operands)
19461 {
19462   char pattern[100];
19463   int p;
19464   int base;
19465   int i;
19466   rtx addr_reg = REG_P (XEXP (operands[0], 0))
19467 		   ? XEXP (operands[0], 0)
19468 		   : XEXP (XEXP (operands[0], 0), 0);
19469   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
19470 
19471   if (push_p)
19472     strcpy (pattern, "vpush%?.64\t{%P1");
19473   else
19474     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19475 
19476   p = strlen (pattern);
19477 
19478   gcc_assert (REG_P (operands[1]));
19479 
19480   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19481   for (i = 1; i < XVECLEN (operands[2], 0); i++)
19482     {
19483       p += sprintf (&pattern[p], ", d%d", base + i);
19484     }
19485   strcpy (&pattern[p], "}");
19486 
19487   output_asm_insn (pattern, operands);
19488   return "";
19489 }
19490 
19491 
19492 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
19493    number of bytes pushed.  */
19494 
19495 static int
vfp_emit_fstmd(int base_reg,int count)19496 vfp_emit_fstmd (int base_reg, int count)
19497 {
19498   rtx par;
19499   rtx dwarf;
19500   rtx tmp, reg;
19501   int i;
19502 
19503   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
19504      register pairs are stored by a store multiple insn.  We avoid this
19505      by pushing an extra pair.  */
19506   if (count == 2 && !arm_arch6)
19507     {
19508       if (base_reg == LAST_VFP_REGNUM - 3)
19509 	base_reg -= 2;
19510       count++;
19511     }
19512 
19513   /* FSTMD may not store more than 16 doubleword registers at once.  Split
19514      larger stores into multiple parts (up to a maximum of two, in
19515      practice).  */
19516   if (count > 16)
19517     {
19518       int saved;
19519       /* NOTE: base_reg is an internal register number, so each D register
19520          counts as 2.  */
19521       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19522       saved += vfp_emit_fstmd (base_reg, 16);
19523       return saved;
19524     }
19525 
19526   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19527   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19528 
19529   reg = gen_rtx_REG (DFmode, base_reg);
19530   base_reg += 2;
19531 
19532   XVECEXP (par, 0, 0)
19533     = gen_rtx_SET (gen_frame_mem
19534 		   (BLKmode,
19535 		    gen_rtx_PRE_MODIFY (Pmode,
19536 					stack_pointer_rtx,
19537 					plus_constant
19538 					(Pmode, stack_pointer_rtx,
19539 					 - (count * 8)))
19540 		    ),
19541 		   gen_rtx_UNSPEC (BLKmode,
19542 				   gen_rtvec (1, reg),
19543 				   UNSPEC_PUSH_MULT));
19544 
19545   tmp = gen_rtx_SET (stack_pointer_rtx,
19546 		     plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19547   RTX_FRAME_RELATED_P (tmp) = 1;
19548   XVECEXP (dwarf, 0, 0) = tmp;
19549 
19550   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19551   RTX_FRAME_RELATED_P (tmp) = 1;
19552   XVECEXP (dwarf, 0, 1) = tmp;
19553 
19554   for (i = 1; i < count; i++)
19555     {
19556       reg = gen_rtx_REG (DFmode, base_reg);
19557       base_reg += 2;
19558       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19559 
19560       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19561 					plus_constant (Pmode,
19562 						       stack_pointer_rtx,
19563 						       i * 8)),
19564 			 reg);
19565       RTX_FRAME_RELATED_P (tmp) = 1;
19566       XVECEXP (dwarf, 0, i + 1) = tmp;
19567     }
19568 
19569   par = emit_insn (par);
19570   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19571   RTX_FRAME_RELATED_P (par) = 1;
19572 
19573   return count * 8;
19574 }
19575 
19576 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19577    has the cmse_nonsecure_call attribute and returns false otherwise.  */
19578 
19579 bool
detect_cmse_nonsecure_call(tree addr)19580 detect_cmse_nonsecure_call (tree addr)
19581 {
19582   if (!addr)
19583     return FALSE;
19584 
19585   tree fntype = TREE_TYPE (addr);
19586   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19587 				    TYPE_ATTRIBUTES (fntype)))
19588     return TRUE;
19589   return FALSE;
19590 }
19591 
19592 
19593 /* Emit a call instruction with pattern PAT.  ADDR is the address of
19594    the call target.  */
19595 
19596 void
arm_emit_call_insn(rtx pat,rtx addr,bool sibcall)19597 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19598 {
19599   rtx insn;
19600 
19601   insn = emit_call_insn (pat);
19602 
19603   /* The PIC register is live on entry to VxWorks PIC PLT entries.
19604      If the call might use such an entry, add a use of the PIC register
19605      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
19606   if (TARGET_VXWORKS_RTP
19607       && flag_pic
19608       && !sibcall
19609       && SYMBOL_REF_P (addr)
19610       && (SYMBOL_REF_DECL (addr)
19611 	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19612 	  : !SYMBOL_REF_LOCAL_P (addr)))
19613     {
19614       require_pic_register (NULL_RTX, false /*compute_now*/);
19615       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19616     }
19617 
19618   if (TARGET_FDPIC)
19619     {
19620       rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19621       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19622     }
19623 
19624   if (TARGET_AAPCS_BASED)
19625     {
19626       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19627 	 linker.  We need to add an IP clobber to allow setting
19628 	 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
19629 	 is not needed since it's a fixed register.  */
19630       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19631       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19632     }
19633 }
19634 
19635 /* Output a 'call' insn.  */
19636 const char *
output_call(rtx * operands)19637 output_call (rtx *operands)
19638 {
19639   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
19640 
19641   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
19642   if (REGNO (operands[0]) == LR_REGNUM)
19643     {
19644       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19645       output_asm_insn ("mov%?\t%0, %|lr", operands);
19646     }
19647 
19648   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19649 
19650   if (TARGET_INTERWORK || arm_arch4t)
19651     output_asm_insn ("bx%?\t%0", operands);
19652   else
19653     output_asm_insn ("mov%?\t%|pc, %0", operands);
19654 
19655   return "";
19656 }
19657 
19658 /* Output a move from arm registers to arm registers of a long double
19659    OPERANDS[0] is the destination.
19660    OPERANDS[1] is the source.  */
19661 const char *
output_mov_long_double_arm_from_arm(rtx * operands)19662 output_mov_long_double_arm_from_arm (rtx *operands)
19663 {
19664   /* We have to be careful here because the two might overlap.  */
19665   int dest_start = REGNO (operands[0]);
19666   int src_start = REGNO (operands[1]);
19667   rtx ops[2];
19668   int i;
19669 
19670   if (dest_start < src_start)
19671     {
19672       for (i = 0; i < 3; i++)
19673 	{
19674 	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
19675 	  ops[1] = gen_rtx_REG (SImode, src_start + i);
19676 	  output_asm_insn ("mov%?\t%0, %1", ops);
19677 	}
19678     }
19679   else
19680     {
19681       for (i = 2; i >= 0; i--)
19682 	{
19683 	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
19684 	  ops[1] = gen_rtx_REG (SImode, src_start + i);
19685 	  output_asm_insn ("mov%?\t%0, %1", ops);
19686 	}
19687     }
19688 
19689   return "";
19690 }
19691 
19692 void
arm_emit_movpair(rtx dest,rtx src)19693 arm_emit_movpair (rtx dest, rtx src)
19694  {
19695   /* If the src is an immediate, simplify it.  */
19696   if (CONST_INT_P (src))
19697     {
19698       HOST_WIDE_INT val = INTVAL (src);
19699       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19700       if ((val >> 16) & 0x0000ffff)
19701 	{
19702 	  emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
19703 					       GEN_INT (16)),
19704 			 GEN_INT ((val >> 16) & 0x0000ffff));
19705 	  rtx_insn *insn = get_last_insn ();
19706 	  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19707 	}
19708       return;
19709     }
19710    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
19711    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
19712    rtx_insn *insn = get_last_insn ();
19713    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19714  }
19715 
19716 /* Output a move between double words.  It must be REG<-MEM
19717    or MEM<-REG.  */
19718 const char *
output_move_double(rtx * operands,bool emit,int * count)19719 output_move_double (rtx *operands, bool emit, int *count)
19720 {
19721   enum rtx_code code0 = GET_CODE (operands[0]);
19722   enum rtx_code code1 = GET_CODE (operands[1]);
19723   rtx otherops[3];
19724   if (count)
19725     *count = 1;
19726 
19727   /* The only case when this might happen is when
19728      you are looking at the length of a DImode instruction
19729      that has an invalid constant in it.  */
19730   if (code0 == REG && code1 != MEM)
19731     {
19732       gcc_assert (!emit);
19733       *count = 2;
19734       return "";
19735     }
19736 
19737   if (code0 == REG)
19738     {
19739       unsigned int reg0 = REGNO (operands[0]);
19740       const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
19741 
19742       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
19743 
19744       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
19745 
19746       switch (GET_CODE (XEXP (operands[1], 0)))
19747 	{
19748 	case REG:
19749 
19750 	  if (emit)
19751 	    {
19752 	      if (can_ldrd
19753 		  && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
19754 		output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
19755 	      else
19756 		output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19757 	    }
19758 	  break;
19759 
19760 	case PRE_INC:
19761 	  gcc_assert (can_ldrd);
19762 	  if (emit)
19763 	    output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
19764 	  break;
19765 
19766 	case PRE_DEC:
19767 	  if (emit)
19768 	    {
19769 	      if (can_ldrd)
19770 		output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
19771 	      else
19772 		output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
19773 	    }
19774 	  break;
19775 
19776 	case POST_INC:
19777 	  if (emit)
19778 	    {
19779 	      if (can_ldrd)
19780 		output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
19781 	      else
19782 		output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
19783 	    }
19784 	  break;
19785 
19786 	case POST_DEC:
19787 	  gcc_assert (can_ldrd);
19788 	  if (emit)
19789 	    output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
19790 	  break;
19791 
19792 	case PRE_MODIFY:
19793 	case POST_MODIFY:
19794 	  /* Autoicrement addressing modes should never have overlapping
19795 	     base and destination registers, and overlapping index registers
19796 	     are already prohibited, so this doesn't need to worry about
19797 	     fix_cm3_ldrd.  */
19798 	  otherops[0] = operands[0];
19799 	  otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
19800 	  otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
19801 
19802 	  if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
19803 	    {
19804 	      if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
19805 		{
19806 		  /* Registers overlap so split out the increment.  */
19807 		  if (emit)
19808 		    {
19809 		      gcc_assert (can_ldrd);
19810 		      output_asm_insn ("add%?\t%1, %1, %2", otherops);
19811 		      output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
19812 		    }
19813 		  if (count)
19814 		    *count = 2;
19815 		}
19816 	      else
19817 		{
19818 		  /* Use a single insn if we can.
19819 		     FIXME: IWMMXT allows offsets larger than ldrd can
19820 		     handle, fix these up with a pair of ldr.  */
19821 		  if (can_ldrd
19822 		      && (TARGET_THUMB2
19823 		      || !CONST_INT_P (otherops[2])
19824 		      || (INTVAL (otherops[2]) > -256
19825 			  && INTVAL (otherops[2]) < 256)))
19826 		    {
19827 		      if (emit)
19828 			output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
19829 		    }
19830 		  else
19831 		    {
19832 		      if (emit)
19833 			{
19834 			  output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
19835 			  output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19836 			}
19837 		      if (count)
19838 			*count = 2;
19839 
19840 		    }
19841 		}
19842 	    }
19843 	  else
19844 	    {
19845 	      /* Use a single insn if we can.
19846 		 FIXME: IWMMXT allows offsets larger than ldrd can handle,
19847 		 fix these up with a pair of ldr.  */
19848 	      if (can_ldrd
19849 		  && (TARGET_THUMB2
19850 		  || !CONST_INT_P (otherops[2])
19851 		  || (INTVAL (otherops[2]) > -256
19852 		      && INTVAL (otherops[2]) < 256)))
19853 		{
19854 		  if (emit)
19855 		    output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
19856 		}
19857 	      else
19858 		{
19859 		  if (emit)
19860 		    {
19861 		      output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19862 		      output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
19863 		    }
19864 		  if (count)
19865 		    *count = 2;
19866 		}
19867 	    }
19868 	  break;
19869 
19870 	case LABEL_REF:
19871 	case CONST:
19872 	  /* We might be able to use ldrd %0, %1 here.  However the range is
19873 	     different to ldr/adr, and it is broken on some ARMv7-M
19874 	     implementations.  */
19875 	  /* Use the second register of the pair to avoid problematic
19876 	     overlap.  */
19877 	  otherops[1] = operands[1];
19878 	  if (emit)
19879 	    output_asm_insn ("adr%?\t%0, %1", otherops);
19880 	  operands[1] = otherops[0];
19881 	  if (emit)
19882 	    {
19883 	      if (can_ldrd)
19884 		output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19885 	      else
19886 		output_asm_insn ("ldmia%?\t%1, %M0", operands);
19887 	    }
19888 
19889 	  if (count)
19890 	    *count = 2;
19891 	  break;
19892 
19893 	  /* ??? This needs checking for thumb2.  */
19894 	default:
19895 	  if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
19896 			       GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
19897 	    {
19898 	      otherops[0] = operands[0];
19899 	      otherops[1] = XEXP (XEXP (operands[1], 0), 0);
19900 	      otherops[2] = XEXP (XEXP (operands[1], 0), 1);
19901 
19902 	      if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
19903 		{
19904 		  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
19905 		    {
19906 		      switch ((int) INTVAL (otherops[2]))
19907 			{
19908 			case -8:
19909 			  if (emit)
19910 			    output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
19911 			  return "";
19912 			case -4:
19913 			  if (TARGET_THUMB2)
19914 			    break;
19915 			  if (emit)
19916 			    output_asm_insn ("ldmda%?\t%1, %M0", otherops);
19917 			  return "";
19918 			case 4:
19919 			  if (TARGET_THUMB2)
19920 			    break;
19921 			  if (emit)
19922 			    output_asm_insn ("ldmib%?\t%1, %M0", otherops);
19923 			  return "";
19924 			}
19925 		    }
19926 		  otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
19927 		  operands[1] = otherops[0];
19928 		  if (can_ldrd
19929 		      && (REG_P (otherops[2])
19930 			  || TARGET_THUMB2
19931 			  || (CONST_INT_P (otherops[2])
19932 			      && INTVAL (otherops[2]) > -256
19933 			      && INTVAL (otherops[2]) < 256)))
19934 		    {
19935 		      if (reg_overlap_mentioned_p (operands[0],
19936 						   otherops[2]))
19937 			{
19938 			  /* Swap base and index registers over to
19939 			     avoid a conflict.  */
19940 			  std::swap (otherops[1], otherops[2]);
19941 			}
19942 		      /* If both registers conflict, it will usually
19943 			 have been fixed by a splitter.  */
19944 		      if (reg_overlap_mentioned_p (operands[0], otherops[2])
19945 			  || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
19946 			{
19947 			  if (emit)
19948 			    {
19949 			      output_asm_insn ("add%?\t%0, %1, %2", otherops);
19950 			      output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19951 			    }
19952 			  if (count)
19953 			    *count = 2;
19954 			}
19955 		      else
19956 			{
19957 			  otherops[0] = operands[0];
19958 			  if (emit)
19959 			    output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
19960 			}
19961 		      return "";
19962 		    }
19963 
19964 		  if (CONST_INT_P (otherops[2]))
19965 		    {
19966 		      if (emit)
19967 			{
19968 			  if (!(const_ok_for_arm (INTVAL (otherops[2]))))
19969 			    output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
19970 			  else
19971 			    output_asm_insn ("add%?\t%0, %1, %2", otherops);
19972 			}
19973 		    }
19974 		  else
19975 		    {
19976 		      if (emit)
19977 			output_asm_insn ("add%?\t%0, %1, %2", otherops);
19978 		    }
19979 		}
19980 	      else
19981 		{
19982 		  if (emit)
19983 		    output_asm_insn ("sub%?\t%0, %1, %2", otherops);
19984 		}
19985 
19986 	      if (count)
19987 		*count = 2;
19988 
19989 	      if (can_ldrd)
19990 		return "ldrd%?\t%0, [%1]";
19991 
19992 	      return "ldmia%?\t%1, %M0";
19993 	    }
19994 	  else
19995 	    {
19996 	      otherops[1] = adjust_address (operands[1], SImode, 4);
19997 	      /* Take care of overlapping base/data reg.  */
19998 	      if (reg_mentioned_p (operands[0], operands[1]))
19999 		{
20000 		  if (emit)
20001 		    {
20002 		      output_asm_insn ("ldr%?\t%0, %1", otherops);
20003 		      output_asm_insn ("ldr%?\t%0, %1", operands);
20004 		    }
20005 		  if (count)
20006 		    *count = 2;
20007 
20008 		}
20009 	      else
20010 		{
20011 		  if (emit)
20012 		    {
20013 		      output_asm_insn ("ldr%?\t%0, %1", operands);
20014 		      output_asm_insn ("ldr%?\t%0, %1", otherops);
20015 		    }
20016 		  if (count)
20017 		    *count = 2;
20018 		}
20019 	    }
20020 	}
20021     }
20022   else
20023     {
20024       /* Constraints should ensure this.  */
20025       gcc_assert (code0 == MEM && code1 == REG);
20026       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20027                   || (TARGET_ARM && TARGET_LDRD));
20028 
20029       /* For TARGET_ARM the first source register of an STRD
20030 	 must be even.  This is usually the case for double-word
20031 	 values but user assembly constraints can force an odd
20032 	 starting register.  */
20033       bool allow_strd = TARGET_LDRD
20034 			 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20035       switch (GET_CODE (XEXP (operands[0], 0)))
20036         {
20037 	case REG:
20038 	  if (emit)
20039 	    {
20040 	      if (allow_strd)
20041 		output_asm_insn ("strd%?\t%1, [%m0]", operands);
20042 	      else
20043 		output_asm_insn ("stm%?\t%m0, %M1", operands);
20044 	    }
20045 	  break;
20046 
20047         case PRE_INC:
20048 	  gcc_assert (allow_strd);
20049 	  if (emit)
20050 	    output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20051 	  break;
20052 
20053         case PRE_DEC:
20054 	  if (emit)
20055 	    {
20056 	      if (allow_strd)
20057 		output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20058 	      else
20059 		output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20060 	    }
20061 	  break;
20062 
20063         case POST_INC:
20064 	  if (emit)
20065 	    {
20066 	      if (allow_strd)
20067 		output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20068 	      else
20069 		output_asm_insn ("stm%?\t%m0!, %M1", operands);
20070 	    }
20071 	  break;
20072 
20073         case POST_DEC:
20074 	  gcc_assert (allow_strd);
20075 	  if (emit)
20076 	    output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20077 	  break;
20078 
20079 	case PRE_MODIFY:
20080 	case POST_MODIFY:
20081 	  otherops[0] = operands[1];
20082 	  otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20083 	  otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20084 
20085 	  /* IWMMXT allows offsets larger than strd can handle,
20086 	     fix these up with a pair of str.  */
20087 	  if (!TARGET_THUMB2
20088 	      && CONST_INT_P (otherops[2])
20089 	      && (INTVAL(otherops[2]) <= -256
20090 		  || INTVAL(otherops[2]) >= 256))
20091 	    {
20092 	      if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20093 		{
20094 		  if (emit)
20095 		    {
20096 		      output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20097 		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20098 		    }
20099 		  if (count)
20100 		    *count = 2;
20101 		}
20102 	      else
20103 		{
20104 		  if (emit)
20105 		    {
20106 		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20107 		      output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20108 		    }
20109 		  if (count)
20110 		    *count = 2;
20111 		}
20112 	    }
20113 	  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20114 	    {
20115 	      if (emit)
20116 		output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20117 	    }
20118 	  else
20119 	    {
20120 	      if (emit)
20121 		output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20122 	    }
20123 	  break;
20124 
20125 	case PLUS:
20126 	  otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20127 	  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20128 	    {
20129 	      switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20130 		{
20131 		case -8:
20132 		  if (emit)
20133 		    output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20134 		  return "";
20135 
20136 		case -4:
20137 		  if (TARGET_THUMB2)
20138 		    break;
20139 		  if (emit)
20140 		    output_asm_insn ("stmda%?\t%m0, %M1", operands);
20141 		  return "";
20142 
20143 		case 4:
20144 		  if (TARGET_THUMB2)
20145 		    break;
20146 		  if (emit)
20147 		    output_asm_insn ("stmib%?\t%m0, %M1", operands);
20148 		  return "";
20149 		}
20150 	    }
20151 	  if (allow_strd
20152 	      && (REG_P (otherops[2])
20153 		  || TARGET_THUMB2
20154 		  || (CONST_INT_P (otherops[2])
20155 		      && INTVAL (otherops[2]) > -256
20156 		      && INTVAL (otherops[2]) < 256)))
20157 	    {
20158 	      otherops[0] = operands[1];
20159 	      otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20160 	      if (emit)
20161 		output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20162 	      return "";
20163 	    }
20164 	  /* Fall through */
20165 
20166         default:
20167 	  otherops[0] = adjust_address (operands[0], SImode, 4);
20168 	  otherops[1] = operands[1];
20169 	  if (emit)
20170 	    {
20171 	      output_asm_insn ("str%?\t%1, %0", operands);
20172 	      output_asm_insn ("str%?\t%H1, %0", otherops);
20173 	    }
20174 	  if (count)
20175 	    *count = 2;
20176 	}
20177     }
20178 
20179   return "";
20180 }
20181 
20182 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
20183    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
20184 
20185 const char *
output_move_quad(rtx * operands)20186 output_move_quad (rtx *operands)
20187 {
20188   if (REG_P (operands[0]))
20189     {
20190       /* Load, or reg->reg move.  */
20191 
20192       if (MEM_P (operands[1]))
20193         {
20194           switch (GET_CODE (XEXP (operands[1], 0)))
20195             {
20196             case REG:
20197               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20198               break;
20199 
20200             case LABEL_REF:
20201             case CONST:
20202               output_asm_insn ("adr%?\t%0, %1", operands);
20203               output_asm_insn ("ldmia%?\t%0, %M0", operands);
20204               break;
20205 
20206             default:
20207               gcc_unreachable ();
20208             }
20209         }
20210       else
20211         {
20212           rtx ops[2];
20213           int dest, src, i;
20214 
20215           gcc_assert (REG_P (operands[1]));
20216 
20217           dest = REGNO (operands[0]);
20218           src = REGNO (operands[1]);
20219 
20220           /* This seems pretty dumb, but hopefully GCC won't try to do it
20221              very often.  */
20222           if (dest < src)
20223             for (i = 0; i < 4; i++)
20224               {
20225                 ops[0] = gen_rtx_REG (SImode, dest + i);
20226                 ops[1] = gen_rtx_REG (SImode, src + i);
20227                 output_asm_insn ("mov%?\t%0, %1", ops);
20228               }
20229           else
20230             for (i = 3; i >= 0; i--)
20231               {
20232                 ops[0] = gen_rtx_REG (SImode, dest + i);
20233                 ops[1] = gen_rtx_REG (SImode, src + i);
20234                 output_asm_insn ("mov%?\t%0, %1", ops);
20235               }
20236         }
20237     }
20238   else
20239     {
20240       gcc_assert (MEM_P (operands[0]));
20241       gcc_assert (REG_P (operands[1]));
20242       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20243 
20244       switch (GET_CODE (XEXP (operands[0], 0)))
20245         {
20246         case REG:
20247           output_asm_insn ("stm%?\t%m0, %M1", operands);
20248           break;
20249 
20250         default:
20251           gcc_unreachable ();
20252         }
20253     }
20254 
20255   return "";
20256 }
20257 
20258 /* Output a VFP load or store instruction.  */
20259 
20260 const char *
output_move_vfp(rtx * operands)20261 output_move_vfp (rtx *operands)
20262 {
20263   rtx reg, mem, addr, ops[2];
20264   int load = REG_P (operands[0]);
20265   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20266   int sp = (!TARGET_VFP_FP16INST
20267 	    || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20268   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20269   const char *templ;
20270   char buff[50];
20271   machine_mode mode;
20272 
20273   reg = operands[!load];
20274   mem = operands[load];
20275 
20276   mode = GET_MODE (reg);
20277 
20278   gcc_assert (REG_P (reg));
20279   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20280   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20281 	      || mode == SFmode
20282 	      || mode == DFmode
20283 	      || mode == HImode
20284 	      || mode == SImode
20285 	      || mode == DImode
20286               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20287   gcc_assert (MEM_P (mem));
20288 
20289   addr = XEXP (mem, 0);
20290 
20291   switch (GET_CODE (addr))
20292     {
20293     case PRE_DEC:
20294       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20295       ops[0] = XEXP (addr, 0);
20296       ops[1] = reg;
20297       break;
20298 
20299     case POST_INC:
20300       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20301       ops[0] = XEXP (addr, 0);
20302       ops[1] = reg;
20303       break;
20304 
20305     default:
20306       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20307       ops[0] = reg;
20308       ops[1] = mem;
20309       break;
20310     }
20311 
20312   sprintf (buff, templ,
20313 	   load ? "ld" : "st",
20314 	   dp ? "64" : sp ? "32" : "16",
20315 	   dp ? "P" : "",
20316 	   integer_p ? "\t%@ int" : "");
20317   output_asm_insn (buff, ops);
20318 
20319   return "";
20320 }
20321 
20322 /* Output a Neon double-word or quad-word load or store, or a load
20323    or store for larger structure modes.
20324 
20325    WARNING: The ordering of elements is weird in big-endian mode,
20326    because the EABI requires that vectors stored in memory appear
20327    as though they were stored by a VSTM, as required by the EABI.
20328    GCC RTL defines element ordering based on in-memory order.
20329    This can be different from the architectural ordering of elements
20330    within a NEON register. The intrinsics defined in arm_neon.h use the
20331    NEON register element ordering, not the GCC RTL element ordering.
20332 
20333    For example, the in-memory ordering of a big-endian a quadword
20334    vector with 16-bit elements when stored from register pair {d0,d1}
20335    will be (lowest address first, d0[N] is NEON register element N):
20336 
20337      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20338 
20339    When necessary, quadword registers (dN, dN+1) are moved to ARM
20340    registers from rN in the order:
20341 
20342      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20343 
20344    So that STM/LDM can be used on vectors in ARM registers, and the
20345    same memory layout will result as if VSTM/VLDM were used.
20346 
20347    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20348    possible, which allows use of appropriate alignment tags.
20349    Note that the choice of "64" is independent of the actual vector
20350    element size; this size simply ensures that the behavior is
20351    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20352 
20353    Due to limitations of those instructions, use of VST1.64/VLD1.64
20354    is not possible if:
20355     - the address contains PRE_DEC, or
20356     - the mode refers to more than 4 double-word registers
20357 
20358    In those cases, it would be possible to replace VSTM/VLDM by a
20359    sequence of instructions; this is not currently implemented since
20360    this is not certain to actually improve performance.  */
20361 
20362 const char *
output_move_neon(rtx * operands)20363 output_move_neon (rtx *operands)
20364 {
20365   rtx reg, mem, addr, ops[2];
20366   int regno, nregs, load = REG_P (operands[0]);
20367   const char *templ;
20368   char buff[50];
20369   machine_mode mode;
20370 
20371   reg = operands[!load];
20372   mem = operands[load];
20373 
20374   mode = GET_MODE (reg);
20375 
20376   gcc_assert (REG_P (reg));
20377   regno = REGNO (reg);
20378   nregs = REG_NREGS (reg) / 2;
20379   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20380 	      || NEON_REGNO_OK_FOR_QUAD (regno));
20381   gcc_assert (VALID_NEON_DREG_MODE (mode)
20382 	      || VALID_NEON_QREG_MODE (mode)
20383 	      || VALID_NEON_STRUCT_MODE (mode));
20384   gcc_assert (MEM_P (mem));
20385 
20386   addr = XEXP (mem, 0);
20387 
20388   /* Strip off const from addresses like (const (plus (...))).  */
20389   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20390     addr = XEXP (addr, 0);
20391 
20392   switch (GET_CODE (addr))
20393     {
20394     case POST_INC:
20395       /* We have to use vldm / vstm for too-large modes.  */
20396       if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20397 	{
20398 	  templ = "v%smia%%?\t%%0!, %%h1";
20399 	  ops[0] = XEXP (addr, 0);
20400 	}
20401       else
20402 	{
20403 	  templ = "v%s1.64\t%%h1, %%A0";
20404 	  ops[0] = mem;
20405 	}
20406       ops[1] = reg;
20407       break;
20408 
20409     case PRE_DEC:
20410       /* We have to use vldm / vstm in this case, since there is no
20411 	 pre-decrement form of the vld1 / vst1 instructions.  */
20412       templ = "v%smdb%%?\t%%0!, %%h1";
20413       ops[0] = XEXP (addr, 0);
20414       ops[1] = reg;
20415       break;
20416 
20417     case POST_MODIFY:
20418       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
20419       gcc_unreachable ();
20420 
20421     case REG:
20422       /* We have to use vldm / vstm for too-large modes.  */
20423       if (nregs > 1)
20424 	{
20425 	  if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20426 	    templ = "v%smia%%?\t%%m0, %%h1";
20427 	  else
20428 	    templ = "v%s1.64\t%%h1, %%A0";
20429 
20430 	  ops[0] = mem;
20431 	  ops[1] = reg;
20432 	  break;
20433 	}
20434       /* Fall through.  */
20435     case PLUS:
20436       if (GET_CODE (addr) == PLUS)
20437 	addr = XEXP (addr, 0);
20438       /* Fall through.  */
20439     case LABEL_REF:
20440       {
20441 	int i;
20442 	int overlap = -1;
20443 	for (i = 0; i < nregs; i++)
20444 	  {
20445 	    /* We're only using DImode here because it's a convenient
20446 	       size.  */
20447 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20448 	    ops[1] = adjust_address (mem, DImode, 8 * i);
20449 	    if (reg_overlap_mentioned_p (ops[0], mem))
20450 	      {
20451 		gcc_assert (overlap == -1);
20452 		overlap = i;
20453 	      }
20454 	    else
20455 	      {
20456 		if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20457 		  sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20458 		else
20459 		  sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20460 		output_asm_insn (buff, ops);
20461 	      }
20462 	  }
20463 	if (overlap != -1)
20464 	  {
20465 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20466 	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
20467 	    if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20468 	      sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20469 	    else
20470 	      sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20471 	    output_asm_insn (buff, ops);
20472 	  }
20473 
20474         return "";
20475       }
20476 
20477     default:
20478       gcc_unreachable ();
20479     }
20480 
20481   sprintf (buff, templ, load ? "ld" : "st");
20482   output_asm_insn (buff, ops);
20483 
20484   return "";
20485 }
20486 
20487 /* Compute and return the length of neon_mov<mode>, where <mode> is
20488    one of VSTRUCT modes: EI, OI, CI or XI.  */
20489 int
arm_attr_length_move_neon(rtx_insn * insn)20490 arm_attr_length_move_neon (rtx_insn *insn)
20491 {
20492   rtx reg, mem, addr;
20493   int load;
20494   machine_mode mode;
20495 
20496   extract_insn_cached (insn);
20497 
20498   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20499     {
20500       mode = GET_MODE (recog_data.operand[0]);
20501       switch (mode)
20502 	{
20503 	case E_EImode:
20504 	case E_OImode:
20505 	  return 8;
20506 	case E_CImode:
20507 	  return 12;
20508 	case E_XImode:
20509 	  return 16;
20510 	default:
20511 	  gcc_unreachable ();
20512 	}
20513     }
20514 
20515   load = REG_P (recog_data.operand[0]);
20516   reg = recog_data.operand[!load];
20517   mem = recog_data.operand[load];
20518 
20519   gcc_assert (MEM_P (mem));
20520 
20521   addr = XEXP (mem, 0);
20522 
20523   /* Strip off const from addresses like (const (plus (...))).  */
20524   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20525     addr = XEXP (addr, 0);
20526 
20527   if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20528     {
20529       int insns = REG_NREGS (reg) / 2;
20530       return insns * 4;
20531     }
20532   else
20533     return 4;
20534 }
20535 
20536 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
20537    return zero.  */
20538 
20539 int
arm_address_offset_is_imm(rtx_insn * insn)20540 arm_address_offset_is_imm (rtx_insn *insn)
20541 {
20542   rtx mem, addr;
20543 
20544   extract_insn_cached (insn);
20545 
20546   if (REG_P (recog_data.operand[0]))
20547     return 0;
20548 
20549   mem = recog_data.operand[0];
20550 
20551   gcc_assert (MEM_P (mem));
20552 
20553   addr = XEXP (mem, 0);
20554 
20555   if (REG_P (addr)
20556       || (GET_CODE (addr) == PLUS
20557 	  && REG_P (XEXP (addr, 0))
20558 	  && CONST_INT_P (XEXP (addr, 1))))
20559     return 1;
20560   else
20561     return 0;
20562 }
20563 
20564 /* Output an ADD r, s, #n where n may be too big for one instruction.
20565    If adding zero to one register, output nothing.  */
20566 const char *
output_add_immediate(rtx * operands)20567 output_add_immediate (rtx *operands)
20568 {
20569   HOST_WIDE_INT n = INTVAL (operands[2]);
20570 
20571   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20572     {
20573       if (n < 0)
20574 	output_multi_immediate (operands,
20575 				"sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20576 				-n);
20577       else
20578 	output_multi_immediate (operands,
20579 				"add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20580 				n);
20581     }
20582 
20583   return "";
20584 }
20585 
20586 /* Output a multiple immediate operation.
20587    OPERANDS is the vector of operands referred to in the output patterns.
20588    INSTR1 is the output pattern to use for the first constant.
20589    INSTR2 is the output pattern to use for subsequent constants.
20590    IMMED_OP is the index of the constant slot in OPERANDS.
20591    N is the constant value.  */
20592 static const char *
output_multi_immediate(rtx * operands,const char * instr1,const char * instr2,int immed_op,HOST_WIDE_INT n)20593 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20594 			int immed_op, HOST_WIDE_INT n)
20595 {
20596 #if HOST_BITS_PER_WIDE_INT > 32
20597   n &= 0xffffffff;
20598 #endif
20599 
20600   if (n == 0)
20601     {
20602       /* Quick and easy output.  */
20603       operands[immed_op] = const0_rtx;
20604       output_asm_insn (instr1, operands);
20605     }
20606   else
20607     {
20608       int i;
20609       const char * instr = instr1;
20610 
20611       /* Note that n is never zero here (which would give no output).  */
20612       for (i = 0; i < 32; i += 2)
20613 	{
20614 	  if (n & (3 << i))
20615 	    {
20616 	      operands[immed_op] = GEN_INT (n & (255 << i));
20617 	      output_asm_insn (instr, operands);
20618 	      instr = instr2;
20619 	      i += 6;
20620 	    }
20621 	}
20622     }
20623 
20624   return "";
20625 }
20626 
20627 /* Return the name of a shifter operation.  */
20628 static const char *
arm_shift_nmem(enum rtx_code code)20629 arm_shift_nmem(enum rtx_code code)
20630 {
20631   switch (code)
20632     {
20633     case ASHIFT:
20634       return ARM_LSL_NAME;
20635 
20636     case ASHIFTRT:
20637       return "asr";
20638 
20639     case LSHIFTRT:
20640       return "lsr";
20641 
20642     case ROTATERT:
20643       return "ror";
20644 
20645     default:
20646       abort();
20647     }
20648 }
20649 
20650 /* Return the appropriate ARM instruction for the operation code.
20651    The returned result should not be overwritten.  OP is the rtx of the
20652    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20653    was shifted.  */
20654 const char *
arithmetic_instr(rtx op,int shift_first_arg)20655 arithmetic_instr (rtx op, int shift_first_arg)
20656 {
20657   switch (GET_CODE (op))
20658     {
20659     case PLUS:
20660       return "add";
20661 
20662     case MINUS:
20663       return shift_first_arg ? "rsb" : "sub";
20664 
20665     case IOR:
20666       return "orr";
20667 
20668     case XOR:
20669       return "eor";
20670 
20671     case AND:
20672       return "and";
20673 
20674     case ASHIFT:
20675     case ASHIFTRT:
20676     case LSHIFTRT:
20677     case ROTATERT:
20678       return arm_shift_nmem(GET_CODE(op));
20679 
20680     default:
20681       gcc_unreachable ();
20682     }
20683 }
20684 
20685 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20686    for the operation code.  The returned result should not be overwritten.
20687    OP is the rtx code of the shift.
20688    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20689    shift.  */
20690 static const char *
shift_op(rtx op,HOST_WIDE_INT * amountp)20691 shift_op (rtx op, HOST_WIDE_INT *amountp)
20692 {
20693   const char * mnem;
20694   enum rtx_code code = GET_CODE (op);
20695 
20696   switch (code)
20697     {
20698     case ROTATE:
20699       if (!CONST_INT_P (XEXP (op, 1)))
20700 	{
20701 	  output_operand_lossage ("invalid shift operand");
20702 	  return NULL;
20703 	}
20704 
20705       code = ROTATERT;
20706       *amountp = 32 - INTVAL (XEXP (op, 1));
20707       mnem = "ror";
20708       break;
20709 
20710     case ASHIFT:
20711     case ASHIFTRT:
20712     case LSHIFTRT:
20713     case ROTATERT:
20714       mnem = arm_shift_nmem(code);
20715       if (CONST_INT_P (XEXP (op, 1)))
20716 	{
20717 	  *amountp = INTVAL (XEXP (op, 1));
20718 	}
20719       else if (REG_P (XEXP (op, 1)))
20720 	{
20721 	  *amountp = -1;
20722 	  return mnem;
20723 	}
20724       else
20725 	{
20726 	  output_operand_lossage ("invalid shift operand");
20727 	  return NULL;
20728 	}
20729       break;
20730 
20731     case MULT:
20732       /* We never have to worry about the amount being other than a
20733 	 power of 2, since this case can never be reloaded from a reg.  */
20734       if (!CONST_INT_P (XEXP (op, 1)))
20735 	{
20736 	  output_operand_lossage ("invalid shift operand");
20737 	  return NULL;
20738 	}
20739 
20740       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
20741 
20742       /* Amount must be a power of two.  */
20743       if (*amountp & (*amountp - 1))
20744 	{
20745 	  output_operand_lossage ("invalid shift operand");
20746 	  return NULL;
20747 	}
20748 
20749       *amountp = exact_log2 (*amountp);
20750       gcc_assert (IN_RANGE (*amountp, 0, 31));
20751       return ARM_LSL_NAME;
20752 
20753     default:
20754       output_operand_lossage ("invalid shift operand");
20755       return NULL;
20756     }
20757 
20758   /* This is not 100% correct, but follows from the desire to merge
20759      multiplication by a power of 2 with the recognizer for a
20760      shift.  >=32 is not a valid shift for "lsl", so we must try and
20761      output a shift that produces the correct arithmetical result.
20762      Using lsr #32 is identical except for the fact that the carry bit
20763      is not set correctly if we set the flags; but we never use the
20764      carry bit from such an operation, so we can ignore that.  */
20765   if (code == ROTATERT)
20766     /* Rotate is just modulo 32.  */
20767     *amountp &= 31;
20768   else if (*amountp != (*amountp & 31))
20769     {
20770       if (code == ASHIFT)
20771 	mnem = "lsr";
20772       *amountp = 32;
20773     }
20774 
20775   /* Shifts of 0 are no-ops.  */
20776   if (*amountp == 0)
20777     return NULL;
20778 
20779   return mnem;
20780 }
20781 
20782 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
20783    because /bin/as is horribly restrictive.  The judgement about
20784    whether or not each character is 'printable' (and can be output as
20785    is) or not (and must be printed with an octal escape) must be made
20786    with reference to the *host* character set -- the situation is
20787    similar to that discussed in the comments above pp_c_char in
20788    c-pretty-print.c.  */
20789 
20790 #define MAX_ASCII_LEN 51
20791 
20792 void
output_ascii_pseudo_op(FILE * stream,const unsigned char * p,int len)20793 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
20794 {
20795   int i;
20796   int len_so_far = 0;
20797 
20798   fputs ("\t.ascii\t\"", stream);
20799 
20800   for (i = 0; i < len; i++)
20801     {
20802       int c = p[i];
20803 
20804       if (len_so_far >= MAX_ASCII_LEN)
20805 	{
20806 	  fputs ("\"\n\t.ascii\t\"", stream);
20807 	  len_so_far = 0;
20808 	}
20809 
20810       if (ISPRINT (c))
20811 	{
20812 	  if (c == '\\' || c == '\"')
20813 	    {
20814 	      putc ('\\', stream);
20815 	      len_so_far++;
20816 	    }
20817 	  putc (c, stream);
20818 	  len_so_far++;
20819 	}
20820       else
20821 	{
20822 	  fprintf (stream, "\\%03o", c);
20823 	  len_so_far += 4;
20824 	}
20825     }
20826 
20827   fputs ("\"\n", stream);
20828 }
20829 
20830 
20831 /* Compute the register save mask for registers 0 through 12
20832    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
20833 
20834 static unsigned long
arm_compute_save_reg0_reg12_mask(void)20835 arm_compute_save_reg0_reg12_mask (void)
20836 {
20837   unsigned long func_type = arm_current_func_type ();
20838   unsigned long save_reg_mask = 0;
20839   unsigned int reg;
20840 
20841   if (IS_INTERRUPT (func_type))
20842     {
20843       unsigned int max_reg;
20844       /* Interrupt functions must not corrupt any registers,
20845 	 even call clobbered ones.  If this is a leaf function
20846 	 we can just examine the registers used by the RTL, but
20847 	 otherwise we have to assume that whatever function is
20848 	 called might clobber anything, and so we have to save
20849 	 all the call-clobbered registers as well.  */
20850       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
20851 	/* FIQ handlers have registers r8 - r12 banked, so
20852 	   we only need to check r0 - r7, Normal ISRs only
20853 	   bank r14 and r15, so we must check up to r12.
20854 	   r13 is the stack pointer which is always preserved,
20855 	   so we do not need to consider it here.  */
20856 	max_reg = 7;
20857       else
20858 	max_reg = 12;
20859 
20860       for (reg = 0; reg <= max_reg; reg++)
20861 	if (reg_needs_saving_p (reg))
20862 	  save_reg_mask |= (1 << reg);
20863 
20864       /* Also save the pic base register if necessary.  */
20865       if (PIC_REGISTER_MAY_NEED_SAVING
20866 	  && crtl->uses_pic_offset_table)
20867 	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20868     }
20869   else if (IS_VOLATILE(func_type))
20870     {
20871       /* For noreturn functions we historically omitted register saves
20872 	 altogether.  However this really messes up debugging.  As a
20873 	 compromise save just the frame pointers.  Combined with the link
20874 	 register saved elsewhere this should be sufficient to get
20875 	 a backtrace.  */
20876       if (frame_pointer_needed)
20877 	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20878       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
20879 	save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
20880       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
20881 	save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
20882     }
20883   else
20884     {
20885       /* In the normal case we only need to save those registers
20886 	 which are call saved and which are used by this function.  */
20887       for (reg = 0; reg <= 11; reg++)
20888 	if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
20889 	  save_reg_mask |= (1 << reg);
20890 
20891       /* Handle the frame pointer as a special case.  */
20892       if (frame_pointer_needed)
20893 	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20894 
20895       /* If we aren't loading the PIC register,
20896 	 don't stack it even though it may be live.  */
20897       if (PIC_REGISTER_MAY_NEED_SAVING
20898 	  && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
20899 	      || crtl->uses_pic_offset_table))
20900 	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20901 
20902       /* The prologue will copy SP into R0, so save it.  */
20903       if (IS_STACKALIGN (func_type))
20904 	save_reg_mask |= 1;
20905     }
20906 
20907   /* Save registers so the exception handler can modify them.  */
20908   if (crtl->calls_eh_return)
20909     {
20910       unsigned int i;
20911 
20912       for (i = 0; ; i++)
20913 	{
20914 	  reg = EH_RETURN_DATA_REGNO (i);
20915 	  if (reg == INVALID_REGNUM)
20916 	    break;
20917 	  save_reg_mask |= 1 << reg;
20918 	}
20919     }
20920 
20921   return save_reg_mask;
20922 }
20923 
20924 /* Return true if r3 is live at the start of the function.  */
20925 
20926 static bool
arm_r3_live_at_start_p(void)20927 arm_r3_live_at_start_p (void)
20928 {
20929   /* Just look at cfg info, which is still close enough to correct at this
20930      point.  This gives false positives for broken functions that might use
20931      uninitialized data that happens to be allocated in r3, but who cares?  */
20932   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
20933 }
20934 
20935 /* Compute the number of bytes used to store the static chain register on the
20936    stack, above the stack frame.  We need to know this accurately to get the
20937    alignment of the rest of the stack frame correct.  */
20938 
20939 static int
arm_compute_static_chain_stack_bytes(void)20940 arm_compute_static_chain_stack_bytes (void)
20941 {
20942   /* Once the value is updated from the init value of -1, do not
20943      re-compute.  */
20944   if (cfun->machine->static_chain_stack_bytes != -1)
20945     return cfun->machine->static_chain_stack_bytes;
20946 
20947   /* See the defining assertion in arm_expand_prologue.  */
20948   if (IS_NESTED (arm_current_func_type ())
20949       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20950 	  || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
20951 	       || flag_stack_clash_protection)
20952 	      && !df_regs_ever_live_p (LR_REGNUM)))
20953       && arm_r3_live_at_start_p ()
20954       && crtl->args.pretend_args_size == 0)
20955     return 4;
20956 
20957   return 0;
20958 }
20959 
20960 /* Compute a bit mask of which core registers need to be
20961    saved on the stack for the current function.
20962    This is used by arm_compute_frame_layout, which may add extra registers.  */
20963 
20964 static unsigned long
arm_compute_save_core_reg_mask(void)20965 arm_compute_save_core_reg_mask (void)
20966 {
20967   unsigned int save_reg_mask = 0;
20968   unsigned long func_type = arm_current_func_type ();
20969   unsigned int reg;
20970 
20971   if (IS_NAKED (func_type))
20972     /* This should never really happen.  */
20973     return 0;
20974 
20975   /* If we are creating a stack frame, then we must save the frame pointer,
20976      IP (which will hold the old stack pointer), LR and the PC.  */
20977   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20978     save_reg_mask |=
20979       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
20980       | (1 << IP_REGNUM)
20981       | (1 << LR_REGNUM)
20982       | (1 << PC_REGNUM);
20983 
20984   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
20985 
20986   /* Decide if we need to save the link register.
20987      Interrupt routines have their own banked link register,
20988      so they never need to save it.
20989      Otherwise if we do not use the link register we do not need to save
20990      it.  If we are pushing other registers onto the stack however, we
20991      can save an instruction in the epilogue by pushing the link register
20992      now and then popping it back into the PC.  This incurs extra memory
20993      accesses though, so we only do it when optimizing for size, and only
20994      if we know that we will not need a fancy return sequence.  */
20995   if (df_regs_ever_live_p (LR_REGNUM)
20996       || (save_reg_mask
20997 	  && optimize_size
20998 	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
20999 	  && !crtl->tail_call_emit
21000 	  && !crtl->calls_eh_return))
21001     save_reg_mask |= 1 << LR_REGNUM;
21002 
21003   if (cfun->machine->lr_save_eliminated)
21004     save_reg_mask &= ~ (1 << LR_REGNUM);
21005 
21006   if (TARGET_REALLY_IWMMXT
21007       && ((bit_count (save_reg_mask)
21008 	   + ARM_NUM_INTS (crtl->args.pretend_args_size +
21009 			   arm_compute_static_chain_stack_bytes())
21010 	   ) % 2) != 0)
21011     {
21012       /* The total number of registers that are going to be pushed
21013 	 onto the stack is odd.  We need to ensure that the stack
21014 	 is 64-bit aligned before we start to save iWMMXt registers,
21015 	 and also before we start to create locals.  (A local variable
21016 	 might be a double or long long which we will load/store using
21017 	 an iWMMXt instruction).  Therefore we need to push another
21018 	 ARM register, so that the stack will be 64-bit aligned.  We
21019 	 try to avoid using the arg registers (r0 -r3) as they might be
21020 	 used to pass values in a tail call.  */
21021       for (reg = 4; reg <= 12; reg++)
21022 	if ((save_reg_mask & (1 << reg)) == 0)
21023 	  break;
21024 
21025       if (reg <= 12)
21026 	save_reg_mask |= (1 << reg);
21027       else
21028 	{
21029 	  cfun->machine->sibcall_blocked = 1;
21030 	  save_reg_mask |= (1 << 3);
21031 	}
21032     }
21033 
21034   /* We may need to push an additional register for use initializing the
21035      PIC base register.  */
21036   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21037       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21038     {
21039       reg = thumb_find_work_register (1 << 4);
21040       if (!call_used_or_fixed_reg_p (reg))
21041 	save_reg_mask |= (1 << reg);
21042     }
21043 
21044   return save_reg_mask;
21045 }
21046 
21047 /* Compute a bit mask of which core registers need to be
21048    saved on the stack for the current function.  */
21049 static unsigned long
thumb1_compute_save_core_reg_mask(void)21050 thumb1_compute_save_core_reg_mask (void)
21051 {
21052   unsigned long mask;
21053   unsigned reg;
21054 
21055   mask = 0;
21056   for (reg = 0; reg < 12; reg ++)
21057     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21058       mask |= 1 << reg;
21059 
21060   /* Handle the frame pointer as a special case.  */
21061   if (frame_pointer_needed)
21062     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21063 
21064   if (flag_pic
21065       && !TARGET_SINGLE_PIC_BASE
21066       && arm_pic_register != INVALID_REGNUM
21067       && crtl->uses_pic_offset_table)
21068     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21069 
21070   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
21071   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21072     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21073 
21074   /* LR will also be pushed if any lo regs are pushed.  */
21075   if (mask & 0xff || thumb_force_lr_save ())
21076     mask |= (1 << LR_REGNUM);
21077 
21078   bool call_clobbered_scratch
21079     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21080        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21081 
21082   /* Make sure we have a low work register if we need one.  We will
21083      need one if we are going to push a high register, but we are not
21084      currently intending to push a low register.  However if both the
21085      prologue and epilogue have a spare call-clobbered low register,
21086      then we won't need to find an additional work register.  It does
21087      not need to be the same register in the prologue and
21088      epilogue.  */
21089   if ((mask & 0xff) == 0
21090       && !call_clobbered_scratch
21091       && ((mask & 0x0f00) || TARGET_BACKTRACE))
21092     {
21093       /* Use thumb_find_work_register to choose which register
21094 	 we will use.  If the register is live then we will
21095 	 have to push it.  Use LAST_LO_REGNUM as our fallback
21096 	 choice for the register to select.  */
21097       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21098       /* Make sure the register returned by thumb_find_work_register is
21099 	 not part of the return value.  */
21100       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21101 	reg = LAST_LO_REGNUM;
21102 
21103       if (callee_saved_reg_p (reg))
21104 	mask |= 1 << reg;
21105     }
21106 
21107   /* The 504 below is 8 bytes less than 512 because there are two possible
21108      alignment words.  We can't tell here if they will be present or not so we
21109      have to play it safe and assume that they are. */
21110   if ((CALLER_INTERWORKING_SLOT_SIZE +
21111        ROUND_UP_WORD (get_frame_size ()) +
21112        crtl->outgoing_args_size) >= 504)
21113     {
21114       /* This is the same as the code in thumb1_expand_prologue() which
21115 	 determines which register to use for stack decrement. */
21116       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21117 	if (mask & (1 << reg))
21118 	  break;
21119 
21120       if (reg > LAST_LO_REGNUM)
21121 	{
21122 	  /* Make sure we have a register available for stack decrement. */
21123 	  mask |= 1 << LAST_LO_REGNUM;
21124 	}
21125     }
21126 
21127   return mask;
21128 }
21129 
21130 /* Return the number of bytes required to save VFP registers.  */
21131 static int
arm_get_vfp_saved_size(void)21132 arm_get_vfp_saved_size (void)
21133 {
21134   unsigned int regno;
21135   int count;
21136   int saved;
21137 
21138   saved = 0;
21139   /* Space for saved VFP registers.  */
21140   if (TARGET_VFP_BASE)
21141     {
21142       count = 0;
21143       for (regno = FIRST_VFP_REGNUM;
21144 	   regno < LAST_VFP_REGNUM;
21145 	   regno += 2)
21146 	{
21147 	  if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21148 	    {
21149 	      if (count > 0)
21150 		{
21151 		  /* Workaround ARM10 VFPr1 bug.  */
21152 		  if (count == 2 && !arm_arch6)
21153 		    count++;
21154 		  saved += count * 8;
21155 		}
21156 	      count = 0;
21157 	    }
21158 	  else
21159 	    count++;
21160 	}
21161       if (count > 0)
21162 	{
21163 	  if (count == 2 && !arm_arch6)
21164 	    count++;
21165 	  saved += count * 8;
21166 	}
21167     }
21168   return saved;
21169 }
21170 
21171 
21172 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
21173    everything bar the final return instruction.  If simple_return is true,
21174    then do not output epilogue, because it has already been emitted in RTL.
21175 
21176    Note: do not forget to update length attribute of corresponding insn pattern
21177    when changing assembly output (eg. length attribute of
21178    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21179    register clearing sequences).  */
21180 const char *
output_return_instruction(rtx operand,bool really_return,bool reverse,bool simple_return)21181 output_return_instruction (rtx operand, bool really_return, bool reverse,
21182                            bool simple_return)
21183 {
21184   char conditional[10];
21185   char instr[100];
21186   unsigned reg;
21187   unsigned long live_regs_mask;
21188   unsigned long func_type;
21189   arm_stack_offsets *offsets;
21190 
21191   func_type = arm_current_func_type ();
21192 
21193   if (IS_NAKED (func_type))
21194     return "";
21195 
21196   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21197     {
21198       /* If this function was declared non-returning, and we have
21199 	 found a tail call, then we have to trust that the called
21200 	 function won't return.  */
21201       if (really_return)
21202 	{
21203 	  rtx ops[2];
21204 
21205 	  /* Otherwise, trap an attempted return by aborting.  */
21206 	  ops[0] = operand;
21207 	  ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21208 				       : "abort");
21209 	  assemble_external_libcall (ops[1]);
21210 	  output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21211 	}
21212 
21213       return "";
21214     }
21215 
21216   gcc_assert (!cfun->calls_alloca || really_return);
21217 
21218   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21219 
21220   cfun->machine->return_used_this_function = 1;
21221 
21222   offsets = arm_get_frame_offsets ();
21223   live_regs_mask = offsets->saved_regs_mask;
21224 
21225   if (!simple_return && live_regs_mask)
21226     {
21227       const char * return_reg;
21228 
21229       /* If we do not have any special requirements for function exit
21230 	 (e.g. interworking) then we can load the return address
21231 	 directly into the PC.  Otherwise we must load it into LR.  */
21232       if (really_return
21233 	  && !IS_CMSE_ENTRY (func_type)
21234 	  && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21235 	return_reg = reg_names[PC_REGNUM];
21236       else
21237 	return_reg = reg_names[LR_REGNUM];
21238 
21239       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21240 	{
21241 	  /* There are three possible reasons for the IP register
21242 	     being saved.  1) a stack frame was created, in which case
21243 	     IP contains the old stack pointer, or 2) an ISR routine
21244 	     corrupted it, or 3) it was saved to align the stack on
21245 	     iWMMXt.  In case 1, restore IP into SP, otherwise just
21246 	     restore IP.  */
21247 	  if (frame_pointer_needed)
21248 	    {
21249 	      live_regs_mask &= ~ (1 << IP_REGNUM);
21250 	      live_regs_mask |=   (1 << SP_REGNUM);
21251 	    }
21252 	  else
21253 	    gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21254 	}
21255 
21256       /* On some ARM architectures it is faster to use LDR rather than
21257 	 LDM to load a single register.  On other architectures, the
21258 	 cost is the same.  In 26 bit mode, or for exception handlers,
21259 	 we have to use LDM to load the PC so that the CPSR is also
21260 	 restored.  */
21261       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21262 	if (live_regs_mask == (1U << reg))
21263 	  break;
21264 
21265       if (reg <= LAST_ARM_REGNUM
21266 	  && (reg != LR_REGNUM
21267 	      || ! really_return
21268 	      || ! IS_INTERRUPT (func_type)))
21269 	{
21270 	  sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21271 		   (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21272 	}
21273       else
21274 	{
21275 	  char *p;
21276 	  int first = 1;
21277 
21278 	  /* Generate the load multiple instruction to restore the
21279 	     registers.  Note we can get here, even if
21280 	     frame_pointer_needed is true, but only if sp already
21281 	     points to the base of the saved core registers.  */
21282 	  if (live_regs_mask & (1 << SP_REGNUM))
21283 	    {
21284 	      unsigned HOST_WIDE_INT stack_adjust;
21285 
21286 	      stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21287 	      gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21288 
21289 	      if (stack_adjust && arm_arch5t && TARGET_ARM)
21290 		  sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21291 	      else
21292 		{
21293 		  /* If we can't use ldmib (SA110 bug),
21294 		     then try to pop r3 instead.  */
21295 		  if (stack_adjust)
21296 		    live_regs_mask |= 1 << 3;
21297 
21298 		  sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21299 		}
21300 	    }
21301 	  /* For interrupt returns we have to use an LDM rather than
21302 	     a POP so that we can use the exception return variant.  */
21303 	  else if (IS_INTERRUPT (func_type))
21304 	    sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21305 	  else
21306 	    sprintf (instr, "pop%s\t{", conditional);
21307 
21308 	  p = instr + strlen (instr);
21309 
21310 	  for (reg = 0; reg <= SP_REGNUM; reg++)
21311 	    if (live_regs_mask & (1 << reg))
21312 	      {
21313 		int l = strlen (reg_names[reg]);
21314 
21315 		if (first)
21316 		  first = 0;
21317 		else
21318 		  {
21319 		    memcpy (p, ", ", 2);
21320 		    p += 2;
21321 		  }
21322 
21323 		memcpy (p, "%|", 2);
21324 		memcpy (p + 2, reg_names[reg], l);
21325 		p += l + 2;
21326 	      }
21327 
21328 	  if (live_regs_mask & (1 << LR_REGNUM))
21329 	    {
21330 	      sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21331 	      /* If returning from an interrupt, restore the CPSR.  */
21332 	      if (IS_INTERRUPT (func_type))
21333 		strcat (p, "^");
21334 	    }
21335 	  else
21336 	    strcpy (p, "}");
21337 	}
21338 
21339       output_asm_insn (instr, & operand);
21340 
21341       /* See if we need to generate an extra instruction to
21342 	 perform the actual function return.  */
21343       if (really_return
21344 	  && func_type != ARM_FT_INTERWORKED
21345 	  && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21346 	{
21347 	  /* The return has already been handled
21348 	     by loading the LR into the PC.  */
21349           return "";
21350 	}
21351     }
21352 
21353   if (really_return)
21354     {
21355       switch ((int) ARM_FUNC_TYPE (func_type))
21356 	{
21357 	case ARM_FT_ISR:
21358 	case ARM_FT_FIQ:
21359 	  /* ??? This is wrong for unified assembly syntax.  */
21360 	  sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21361 	  break;
21362 
21363 	case ARM_FT_INTERWORKED:
21364 	  gcc_assert (arm_arch5t || arm_arch4t);
21365 	  sprintf (instr, "bx%s\t%%|lr", conditional);
21366 	  break;
21367 
21368 	case ARM_FT_EXCEPTION:
21369 	  /* ??? This is wrong for unified assembly syntax.  */
21370 	  sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21371 	  break;
21372 
21373 	default:
21374 	  if (IS_CMSE_ENTRY (func_type))
21375 	    {
21376 	      /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21377 		 emitted by cmse_nonsecure_entry_clear_before_return () and the
21378 		 VSTR/VLDR instructions in the prologue and epilogue.  */
21379 	      if (!TARGET_HAVE_FPCXT_CMSE)
21380 		{
21381 		  /* Check if we have to clear the 'GE bits' which is only used if
21382 		     parallel add and subtraction instructions are available.  */
21383 		  if (TARGET_INT_SIMD)
21384 		    snprintf (instr, sizeof (instr),
21385 			      "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21386 		  else
21387 		    snprintf (instr, sizeof (instr),
21388 			      "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21389 
21390 		  output_asm_insn (instr, & operand);
21391 		  /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21392 		     care of it.  */
21393 		  if (TARGET_HARD_FLOAT)
21394 		    {
21395 		      /* Clear the cumulative exception-status bits (0-4,7) and
21396 			 the condition code bits (28-31) of the FPSCR.  We need
21397 			 to remember to clear the first scratch register used
21398 			 (IP) and save and restore the second (r4).
21399 
21400 			 Important note: the length of the
21401 			 thumb2_cmse_entry_return insn pattern must account for
21402 			 the size of the below instructions.  */
21403 		      output_asm_insn ("push\t{%|r4}", & operand);
21404 		      output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21405 		      output_asm_insn ("movw\t%|r4, #65376", & operand);
21406 		      output_asm_insn ("movt\t%|r4, #4095", & operand);
21407 		      output_asm_insn ("and\t%|ip, %|r4", & operand);
21408 		      output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21409 		      output_asm_insn ("pop\t{%|r4}", & operand);
21410 		      output_asm_insn ("mov\t%|ip, %|lr", & operand);
21411 		    }
21412 		}
21413 	      snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21414 	    }
21415 	  /* Use bx if it's available.  */
21416 	  else if (arm_arch5t || arm_arch4t)
21417 	    sprintf (instr, "bx%s\t%%|lr", conditional);
21418 	  else
21419 	    sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21420 	  break;
21421 	}
21422 
21423       output_asm_insn (instr, & operand);
21424     }
21425 
21426   return "";
21427 }
21428 
21429 /* Output in FILE asm statements needed to declare the NAME of the function
21430    defined by its DECL node.  */
21431 
21432 void
arm_asm_declare_function_name(FILE * file,const char * name,tree decl)21433 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21434 {
21435   size_t cmse_name_len;
21436   char *cmse_name = 0;
21437   char cmse_prefix[] = "__acle_se_";
21438 
21439   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21440      extra function label for each function with the 'cmse_nonsecure_entry'
21441      attribute.  This extra function label should be prepended with
21442      '__acle_se_', telling the linker that it needs to create secure gateway
21443      veneers for this function.  */
21444   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21445 				    DECL_ATTRIBUTES (decl)))
21446     {
21447       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21448       cmse_name = XALLOCAVEC (char, cmse_name_len);
21449       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21450       targetm.asm_out.globalize_label (file, cmse_name);
21451 
21452       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21453       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21454     }
21455 
21456   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21457   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21458   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21459   ASM_OUTPUT_LABEL (file, name);
21460 
21461   if (cmse_name)
21462     ASM_OUTPUT_LABEL (file, cmse_name);
21463 
21464   ARM_OUTPUT_FN_UNWIND (file, TRUE);
21465 }
21466 
21467 /* Write the function name into the code section, directly preceding
21468    the function prologue.
21469 
21470    Code will be output similar to this:
21471      t0
21472 	 .ascii "arm_poke_function_name", 0
21473 	 .align
21474      t1
21475 	 .word 0xff000000 + (t1 - t0)
21476      arm_poke_function_name
21477 	 mov     ip, sp
21478 	 stmfd   sp!, {fp, ip, lr, pc}
21479 	 sub     fp, ip, #4
21480 
21481    When performing a stack backtrace, code can inspect the value
21482    of 'pc' stored at 'fp' + 0.  If the trace function then looks
21483    at location pc - 12 and the top 8 bits are set, then we know
21484    that there is a function name embedded immediately preceding this
21485    location and has length ((pc[-3]) & 0xff000000).
21486 
21487    We assume that pc is declared as a pointer to an unsigned long.
21488 
21489    It is of no benefit to output the function name if we are assembling
21490    a leaf function.  These function types will not contain a stack
21491    backtrace structure, therefore it is not possible to determine the
21492    function name.  */
21493 void
arm_poke_function_name(FILE * stream,const char * name)21494 arm_poke_function_name (FILE *stream, const char *name)
21495 {
21496   unsigned long alignlength;
21497   unsigned long length;
21498   rtx           x;
21499 
21500   length      = strlen (name) + 1;
21501   alignlength = ROUND_UP_WORD (length);
21502 
21503   ASM_OUTPUT_ASCII (stream, name, length);
21504   ASM_OUTPUT_ALIGN (stream, 2);
21505   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21506   assemble_aligned_integer (UNITS_PER_WORD, x);
21507 }
21508 
21509 /* Place some comments into the assembler stream
21510    describing the current function.  */
21511 static void
arm_output_function_prologue(FILE * f)21512 arm_output_function_prologue (FILE *f)
21513 {
21514   unsigned long func_type;
21515 
21516   /* Sanity check.  */
21517   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21518 
21519   func_type = arm_current_func_type ();
21520 
21521   switch ((int) ARM_FUNC_TYPE (func_type))
21522     {
21523     default:
21524     case ARM_FT_NORMAL:
21525       break;
21526     case ARM_FT_INTERWORKED:
21527       asm_fprintf (f, "\t%@ Function supports interworking.\n");
21528       break;
21529     case ARM_FT_ISR:
21530       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21531       break;
21532     case ARM_FT_FIQ:
21533       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21534       break;
21535     case ARM_FT_EXCEPTION:
21536       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21537       break;
21538     }
21539 
21540   if (IS_NAKED (func_type))
21541     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21542 
21543   if (IS_VOLATILE (func_type))
21544     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21545 
21546   if (IS_NESTED (func_type))
21547     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21548   if (IS_STACKALIGN (func_type))
21549     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21550   if (IS_CMSE_ENTRY (func_type))
21551     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21552 
21553   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21554 	       (HOST_WIDE_INT) crtl->args.size,
21555 	       crtl->args.pretend_args_size,
21556 	       (HOST_WIDE_INT) get_frame_size ());
21557 
21558   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21559 	       frame_pointer_needed,
21560 	       cfun->machine->uses_anonymous_args);
21561 
21562   if (cfun->machine->lr_save_eliminated)
21563     asm_fprintf (f, "\t%@ link register save eliminated.\n");
21564 
21565   if (crtl->calls_eh_return)
21566     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21567 
21568 }
21569 
21570 static void
arm_output_function_epilogue(FILE *)21571 arm_output_function_epilogue (FILE *)
21572 {
21573   arm_stack_offsets *offsets;
21574 
21575   if (TARGET_THUMB1)
21576     {
21577       int regno;
21578 
21579       /* Emit any call-via-reg trampolines that are needed for v4t support
21580 	 of call_reg and call_value_reg type insns.  */
21581       for (regno = 0; regno < LR_REGNUM; regno++)
21582 	{
21583 	  rtx label = cfun->machine->call_via[regno];
21584 
21585 	  if (label != NULL)
21586 	    {
21587 	      switch_to_section (function_section (current_function_decl));
21588 	      targetm.asm_out.internal_label (asm_out_file, "L",
21589 					      CODE_LABEL_NUMBER (label));
21590 	      asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21591 	    }
21592 	}
21593 
21594       /* ??? Probably not safe to set this here, since it assumes that a
21595 	 function will be emitted as assembly immediately after we generate
21596 	 RTL for it.  This does not happen for inline functions.  */
21597       cfun->machine->return_used_this_function = 0;
21598     }
21599   else /* TARGET_32BIT */
21600     {
21601       /* We need to take into account any stack-frame rounding.  */
21602       offsets = arm_get_frame_offsets ();
21603 
21604       gcc_assert (!use_return_insn (FALSE, NULL)
21605 		  || (cfun->machine->return_used_this_function != 0)
21606 		  || offsets->saved_regs == offsets->outgoing_args
21607 		  || frame_pointer_needed);
21608     }
21609 }
21610 
21611 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21612    STR and STRD.  If an even number of registers are being pushed, one
21613    or more STRD patterns are created for each register pair.  If an
21614    odd number of registers are pushed, emit an initial STR followed by
21615    as many STRD instructions as are needed.  This works best when the
21616    stack is initially 64-bit aligned (the normal case), since it
21617    ensures that each STRD is also 64-bit aligned.  */
21618 static void
thumb2_emit_strd_push(unsigned long saved_regs_mask)21619 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21620 {
21621   int num_regs = 0;
21622   int i;
21623   int regno;
21624   rtx par = NULL_RTX;
21625   rtx dwarf = NULL_RTX;
21626   rtx tmp;
21627   bool first = true;
21628 
21629   num_regs = bit_count (saved_regs_mask);
21630 
21631   /* Must be at least one register to save, and can't save SP or PC.  */
21632   gcc_assert (num_regs > 0 && num_regs <= 14);
21633   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21634   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21635 
21636   /* Create sequence for DWARF info.  All the frame-related data for
21637      debugging is held in this wrapper.  */
21638   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21639 
21640   /* Describe the stack adjustment.  */
21641   tmp = gen_rtx_SET (stack_pointer_rtx,
21642 		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21643   RTX_FRAME_RELATED_P (tmp) = 1;
21644   XVECEXP (dwarf, 0, 0) = tmp;
21645 
21646   /* Find the first register.  */
21647   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21648     ;
21649 
21650   i = 0;
21651 
21652   /* If there's an odd number of registers to push.  Start off by
21653      pushing a single register.  This ensures that subsequent strd
21654      operations are dword aligned (assuming that SP was originally
21655      64-bit aligned).  */
21656   if ((num_regs & 1) != 0)
21657     {
21658       rtx reg, mem, insn;
21659 
21660       reg = gen_rtx_REG (SImode, regno);
21661       if (num_regs == 1)
21662 	mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21663 						     stack_pointer_rtx));
21664       else
21665 	mem = gen_frame_mem (Pmode,
21666 			     gen_rtx_PRE_MODIFY
21667 			     (Pmode, stack_pointer_rtx,
21668 			      plus_constant (Pmode, stack_pointer_rtx,
21669 					     -4 * num_regs)));
21670 
21671       tmp = gen_rtx_SET (mem, reg);
21672       RTX_FRAME_RELATED_P (tmp) = 1;
21673       insn = emit_insn (tmp);
21674       RTX_FRAME_RELATED_P (insn) = 1;
21675       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21676       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21677       RTX_FRAME_RELATED_P (tmp) = 1;
21678       i++;
21679       regno++;
21680       XVECEXP (dwarf, 0, i) = tmp;
21681       first = false;
21682     }
21683 
21684   while (i < num_regs)
21685     if (saved_regs_mask & (1 << regno))
21686       {
21687 	rtx reg1, reg2, mem1, mem2;
21688 	rtx tmp0, tmp1, tmp2;
21689 	int regno2;
21690 
21691 	/* Find the register to pair with this one.  */
21692 	for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21693 	     regno2++)
21694 	  ;
21695 
21696 	reg1 = gen_rtx_REG (SImode, regno);
21697 	reg2 = gen_rtx_REG (SImode, regno2);
21698 
21699 	if (first)
21700 	  {
21701 	    rtx insn;
21702 
21703 	    first = false;
21704 	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21705 							stack_pointer_rtx,
21706 							-4 * num_regs));
21707 	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21708 							stack_pointer_rtx,
21709 							-4 * (num_regs - 1)));
21710 	    tmp0 = gen_rtx_SET (stack_pointer_rtx,
21711 				plus_constant (Pmode, stack_pointer_rtx,
21712 					       -4 * (num_regs)));
21713 	    tmp1 = gen_rtx_SET (mem1, reg1);
21714 	    tmp2 = gen_rtx_SET (mem2, reg2);
21715 	    RTX_FRAME_RELATED_P (tmp0) = 1;
21716 	    RTX_FRAME_RELATED_P (tmp1) = 1;
21717 	    RTX_FRAME_RELATED_P (tmp2) = 1;
21718 	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
21719 	    XVECEXP (par, 0, 0) = tmp0;
21720 	    XVECEXP (par, 0, 1) = tmp1;
21721 	    XVECEXP (par, 0, 2) = tmp2;
21722 	    insn = emit_insn (par);
21723 	    RTX_FRAME_RELATED_P (insn) = 1;
21724 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21725 	  }
21726 	else
21727 	  {
21728 	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21729 							stack_pointer_rtx,
21730 							4 * i));
21731 	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21732 							stack_pointer_rtx,
21733 							4 * (i + 1)));
21734 	    tmp1 = gen_rtx_SET (mem1, reg1);
21735 	    tmp2 = gen_rtx_SET (mem2, reg2);
21736 	    RTX_FRAME_RELATED_P (tmp1) = 1;
21737 	    RTX_FRAME_RELATED_P (tmp2) = 1;
21738 	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21739 	    XVECEXP (par, 0, 0) = tmp1;
21740 	    XVECEXP (par, 0, 1) = tmp2;
21741 	    emit_insn (par);
21742 	  }
21743 
21744 	/* Create unwind information.  This is an approximation.  */
21745 	tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
21746 					   plus_constant (Pmode,
21747 							  stack_pointer_rtx,
21748 							  4 * i)),
21749 			    reg1);
21750 	tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
21751 					   plus_constant (Pmode,
21752 							  stack_pointer_rtx,
21753 							  4 * (i + 1))),
21754 			    reg2);
21755 
21756 	RTX_FRAME_RELATED_P (tmp1) = 1;
21757 	RTX_FRAME_RELATED_P (tmp2) = 1;
21758 	XVECEXP (dwarf, 0, i + 1) = tmp1;
21759 	XVECEXP (dwarf, 0, i + 2) = tmp2;
21760 	i += 2;
21761 	regno = regno2 + 1;
21762       }
21763     else
21764       regno++;
21765 
21766   return;
21767 }
21768 
21769 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
21770    whenever possible, otherwise it emits single-word stores.  The first store
21771    also allocates stack space for all saved registers, using writeback with
21772    post-addressing mode.  All other stores use offset addressing.  If no STRD
21773    can be emitted, this function emits a sequence of single-word stores,
21774    and not an STM as before, because single-word stores provide more freedom
21775    scheduling and can be turned into an STM by peephole optimizations.  */
21776 static void
arm_emit_strd_push(unsigned long saved_regs_mask)21777 arm_emit_strd_push (unsigned long saved_regs_mask)
21778 {
21779   int num_regs = 0;
21780   int i, j, dwarf_index  = 0;
21781   int offset = 0;
21782   rtx dwarf = NULL_RTX;
21783   rtx insn = NULL_RTX;
21784   rtx tmp, mem;
21785 
21786   /* TODO: A more efficient code can be emitted by changing the
21787      layout, e.g., first push all pairs that can use STRD to keep the
21788      stack aligned, and then push all other registers.  */
21789   for (i = 0; i <= LAST_ARM_REGNUM; i++)
21790     if (saved_regs_mask & (1 << i))
21791       num_regs++;
21792 
21793   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21794   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21795   gcc_assert (num_regs > 0);
21796 
21797   /* Create sequence for DWARF info.  */
21798   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21799 
21800   /* For dwarf info, we generate explicit stack update.  */
21801   tmp = gen_rtx_SET (stack_pointer_rtx,
21802                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21803   RTX_FRAME_RELATED_P (tmp) = 1;
21804   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21805 
21806   /* Save registers.  */
21807   offset = - 4 * num_regs;
21808   j = 0;
21809   while (j <= LAST_ARM_REGNUM)
21810     if (saved_regs_mask & (1 << j))
21811       {
21812         if ((j % 2 == 0)
21813             && (saved_regs_mask & (1 << (j + 1))))
21814           {
21815             /* Current register and previous register form register pair for
21816                which STRD can be generated.  */
21817             if (offset < 0)
21818               {
21819                 /* Allocate stack space for all saved registers.  */
21820                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21821                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21822                 mem = gen_frame_mem (DImode, tmp);
21823                 offset = 0;
21824               }
21825             else if (offset > 0)
21826               mem = gen_frame_mem (DImode,
21827                                    plus_constant (Pmode,
21828                                                   stack_pointer_rtx,
21829                                                   offset));
21830             else
21831               mem = gen_frame_mem (DImode, stack_pointer_rtx);
21832 
21833             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
21834             RTX_FRAME_RELATED_P (tmp) = 1;
21835             tmp = emit_insn (tmp);
21836 
21837             /* Record the first store insn.  */
21838             if (dwarf_index == 1)
21839               insn = tmp;
21840 
21841             /* Generate dwarf info.  */
21842             mem = gen_frame_mem (SImode,
21843                                  plus_constant (Pmode,
21844                                                 stack_pointer_rtx,
21845                                                 offset));
21846             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21847             RTX_FRAME_RELATED_P (tmp) = 1;
21848             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21849 
21850             mem = gen_frame_mem (SImode,
21851                                  plus_constant (Pmode,
21852                                                 stack_pointer_rtx,
21853                                                 offset + 4));
21854             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
21855             RTX_FRAME_RELATED_P (tmp) = 1;
21856             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21857 
21858             offset += 8;
21859             j += 2;
21860           }
21861         else
21862           {
21863             /* Emit a single word store.  */
21864             if (offset < 0)
21865               {
21866                 /* Allocate stack space for all saved registers.  */
21867                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21868                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21869                 mem = gen_frame_mem (SImode, tmp);
21870                 offset = 0;
21871               }
21872             else if (offset > 0)
21873               mem = gen_frame_mem (SImode,
21874                                    plus_constant (Pmode,
21875                                                   stack_pointer_rtx,
21876                                                   offset));
21877             else
21878               mem = gen_frame_mem (SImode, stack_pointer_rtx);
21879 
21880             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21881             RTX_FRAME_RELATED_P (tmp) = 1;
21882             tmp = emit_insn (tmp);
21883 
21884             /* Record the first store insn.  */
21885             if (dwarf_index == 1)
21886               insn = tmp;
21887 
21888             /* Generate dwarf info.  */
21889             mem = gen_frame_mem (SImode,
21890                                  plus_constant(Pmode,
21891                                                stack_pointer_rtx,
21892                                                offset));
21893             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21894             RTX_FRAME_RELATED_P (tmp) = 1;
21895             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21896 
21897             offset += 4;
21898             j += 1;
21899           }
21900       }
21901     else
21902       j++;
21903 
21904   /* Attach dwarf info to the first insn we generate.  */
21905   gcc_assert (insn != NULL_RTX);
21906   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21907   RTX_FRAME_RELATED_P (insn) = 1;
21908 }
21909 
21910 /* Generate and emit an insn that we will recognize as a push_multi.
21911    Unfortunately, since this insn does not reflect very well the actual
21912    semantics of the operation, we need to annotate the insn for the benefit
21913    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
21914    MASK for registers that should be annotated for DWARF2 frame unwind
21915    information.  */
21916 static rtx
emit_multi_reg_push(unsigned long mask,unsigned long dwarf_regs_mask)21917 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
21918 {
21919   int num_regs = 0;
21920   int num_dwarf_regs = 0;
21921   int i, j;
21922   rtx par;
21923   rtx dwarf;
21924   int dwarf_par_index;
21925   rtx tmp, reg;
21926 
21927   /* We don't record the PC in the dwarf frame information.  */
21928   dwarf_regs_mask &= ~(1 << PC_REGNUM);
21929 
21930   for (i = 0; i <= LAST_ARM_REGNUM; i++)
21931     {
21932       if (mask & (1 << i))
21933 	num_regs++;
21934       if (dwarf_regs_mask & (1 << i))
21935 	num_dwarf_regs++;
21936     }
21937 
21938   gcc_assert (num_regs && num_regs <= 16);
21939   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
21940 
21941   /* For the body of the insn we are going to generate an UNSPEC in
21942      parallel with several USEs.  This allows the insn to be recognized
21943      by the push_multi pattern in the arm.md file.
21944 
21945      The body of the insn looks something like this:
21946 
21947        (parallel [
21948            (set (mem:BLK (pre_modify:SI (reg:SI sp)
21949 	                                (const_int:SI <num>)))
21950 	        (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
21951            (use (reg:SI XX))
21952            (use (reg:SI YY))
21953 	   ...
21954         ])
21955 
21956      For the frame note however, we try to be more explicit and actually
21957      show each register being stored into the stack frame, plus a (single)
21958      decrement of the stack pointer.  We do it this way in order to be
21959      friendly to the stack unwinding code, which only wants to see a single
21960      stack decrement per instruction.  The RTL we generate for the note looks
21961      something like this:
21962 
21963       (sequence [
21964            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
21965            (set (mem:SI (reg:SI sp)) (reg:SI r4))
21966            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
21967            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
21968 	   ...
21969         ])
21970 
21971      FIXME:: In an ideal world the PRE_MODIFY would not exist and
21972      instead we'd have a parallel expression detailing all
21973      the stores to the various memory addresses so that debug
21974      information is more up-to-date. Remember however while writing
21975      this to take care of the constraints with the push instruction.
21976 
21977      Note also that this has to be taken care of for the VFP registers.
21978 
21979      For more see PR43399.  */
21980 
21981   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
21982   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
21983   dwarf_par_index = 1;
21984 
21985   for (i = 0; i <= LAST_ARM_REGNUM; i++)
21986     {
21987       if (mask & (1 << i))
21988 	{
21989 	  reg = gen_rtx_REG (SImode, i);
21990 
21991 	  XVECEXP (par, 0, 0)
21992 	    = gen_rtx_SET (gen_frame_mem
21993 			   (BLKmode,
21994 			    gen_rtx_PRE_MODIFY (Pmode,
21995 						stack_pointer_rtx,
21996 						plus_constant
21997 						(Pmode, stack_pointer_rtx,
21998 						 -4 * num_regs))
21999 			    ),
22000 			   gen_rtx_UNSPEC (BLKmode,
22001 					   gen_rtvec (1, reg),
22002 					   UNSPEC_PUSH_MULT));
22003 
22004 	  if (dwarf_regs_mask & (1 << i))
22005 	    {
22006 	      tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22007 				 reg);
22008 	      RTX_FRAME_RELATED_P (tmp) = 1;
22009 	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22010 	    }
22011 
22012 	  break;
22013 	}
22014     }
22015 
22016   for (j = 1, i++; j < num_regs; i++)
22017     {
22018       if (mask & (1 << i))
22019 	{
22020 	  reg = gen_rtx_REG (SImode, i);
22021 
22022 	  XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22023 
22024 	  if (dwarf_regs_mask & (1 << i))
22025 	    {
22026 	      tmp
22027 		= gen_rtx_SET (gen_frame_mem
22028 			       (SImode,
22029 				plus_constant (Pmode, stack_pointer_rtx,
22030 					       4 * j)),
22031 			       reg);
22032 	      RTX_FRAME_RELATED_P (tmp) = 1;
22033 	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22034 	    }
22035 
22036 	  j++;
22037 	}
22038     }
22039 
22040   par = emit_insn (par);
22041 
22042   tmp = gen_rtx_SET (stack_pointer_rtx,
22043 		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22044   RTX_FRAME_RELATED_P (tmp) = 1;
22045   XVECEXP (dwarf, 0, 0) = tmp;
22046 
22047   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22048 
22049   return par;
22050 }
22051 
22052 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22053    SIZE is the offset to be adjusted.
22054    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
22055 static void
arm_add_cfa_adjust_cfa_note(rtx insn,int size,rtx dest,rtx src)22056 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22057 {
22058   rtx dwarf;
22059 
22060   RTX_FRAME_RELATED_P (insn) = 1;
22061   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22062   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22063 }
22064 
22065 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22066    SAVED_REGS_MASK shows which registers need to be restored.
22067 
22068    Unfortunately, since this insn does not reflect very well the actual
22069    semantics of the operation, we need to annotate the insn for the benefit
22070    of DWARF2 frame unwind information.  */
22071 static void
arm_emit_multi_reg_pop(unsigned long saved_regs_mask)22072 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22073 {
22074   int num_regs = 0;
22075   int i, j;
22076   rtx par;
22077   rtx dwarf = NULL_RTX;
22078   rtx tmp, reg;
22079   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22080   int offset_adj;
22081   int emit_update;
22082 
22083   offset_adj = return_in_pc ? 1 : 0;
22084   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22085     if (saved_regs_mask & (1 << i))
22086       num_regs++;
22087 
22088   gcc_assert (num_regs && num_regs <= 16);
22089 
22090   /* If SP is in reglist, then we don't emit SP update insn.  */
22091   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22092 
22093   /* The parallel needs to hold num_regs SETs
22094      and one SET for the stack update.  */
22095   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22096 
22097   if (return_in_pc)
22098     XVECEXP (par, 0, 0) = ret_rtx;
22099 
22100   if (emit_update)
22101     {
22102       /* Increment the stack pointer, based on there being
22103          num_regs 4-byte registers to restore.  */
22104       tmp = gen_rtx_SET (stack_pointer_rtx,
22105                          plus_constant (Pmode,
22106                                         stack_pointer_rtx,
22107                                         4 * num_regs));
22108       RTX_FRAME_RELATED_P (tmp) = 1;
22109       XVECEXP (par, 0, offset_adj) = tmp;
22110     }
22111 
22112   /* Now restore every reg, which may include PC.  */
22113   for (j = 0, i = 0; j < num_regs; i++)
22114     if (saved_regs_mask & (1 << i))
22115       {
22116         reg = gen_rtx_REG (SImode, i);
22117         if ((num_regs == 1) && emit_update && !return_in_pc)
22118           {
22119             /* Emit single load with writeback.  */
22120             tmp = gen_frame_mem (SImode,
22121                                  gen_rtx_POST_INC (Pmode,
22122                                                    stack_pointer_rtx));
22123             tmp = emit_insn (gen_rtx_SET (reg, tmp));
22124             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22125             return;
22126           }
22127 
22128         tmp = gen_rtx_SET (reg,
22129                            gen_frame_mem
22130                            (SImode,
22131                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22132         RTX_FRAME_RELATED_P (tmp) = 1;
22133         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22134 
22135         /* We need to maintain a sequence for DWARF info too.  As dwarf info
22136            should not have PC, skip PC.  */
22137         if (i != PC_REGNUM)
22138           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22139 
22140         j++;
22141       }
22142 
22143   if (return_in_pc)
22144     par = emit_jump_insn (par);
22145   else
22146     par = emit_insn (par);
22147 
22148   REG_NOTES (par) = dwarf;
22149   if (!return_in_pc)
22150     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22151 				 stack_pointer_rtx, stack_pointer_rtx);
22152 }
22153 
22154 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22155    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22156 
22157    Unfortunately, since this insn does not reflect very well the actual
22158    semantics of the operation, we need to annotate the insn for the benefit
22159    of DWARF2 frame unwind information.  */
22160 static void
arm_emit_vfp_multi_reg_pop(int first_reg,int num_regs,rtx base_reg)22161 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22162 {
22163   int i, j;
22164   rtx par;
22165   rtx dwarf = NULL_RTX;
22166   rtx tmp, reg;
22167 
22168   gcc_assert (num_regs && num_regs <= 32);
22169 
22170     /* Workaround ARM10 VFPr1 bug.  */
22171   if (num_regs == 2 && !arm_arch6)
22172     {
22173       if (first_reg == 15)
22174         first_reg--;
22175 
22176       num_regs++;
22177     }
22178 
22179   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22180      there could be up to 32 D-registers to restore.
22181      If there are more than 16 D-registers, make two recursive calls,
22182      each of which emits one pop_multi instruction.  */
22183   if (num_regs > 16)
22184     {
22185       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22186       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22187       return;
22188     }
22189 
22190   /* The parallel needs to hold num_regs SETs
22191      and one SET for the stack update.  */
22192   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22193 
22194   /* Increment the stack pointer, based on there being
22195      num_regs 8-byte registers to restore.  */
22196   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22197   RTX_FRAME_RELATED_P (tmp) = 1;
22198   XVECEXP (par, 0, 0) = tmp;
22199 
22200   /* Now show every reg that will be restored, using a SET for each.  */
22201   for (j = 0, i=first_reg; j < num_regs; i += 2)
22202     {
22203       reg = gen_rtx_REG (DFmode, i);
22204 
22205       tmp = gen_rtx_SET (reg,
22206                          gen_frame_mem
22207                          (DFmode,
22208                           plus_constant (Pmode, base_reg, 8 * j)));
22209       RTX_FRAME_RELATED_P (tmp) = 1;
22210       XVECEXP (par, 0, j + 1) = tmp;
22211 
22212       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22213 
22214       j++;
22215     }
22216 
22217   par = emit_insn (par);
22218   REG_NOTES (par) = dwarf;
22219 
22220   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
22221   if (REGNO (base_reg) == IP_REGNUM)
22222     {
22223       RTX_FRAME_RELATED_P (par) = 1;
22224       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22225     }
22226   else
22227     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22228 				 base_reg, base_reg);
22229 }
22230 
22231 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
22232    number of registers are being popped, multiple LDRD patterns are created for
22233    all register pairs.  If odd number of registers are popped, last register is
22234    loaded by using LDR pattern.  */
22235 static void
thumb2_emit_ldrd_pop(unsigned long saved_regs_mask)22236 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22237 {
22238   int num_regs = 0;
22239   int i, j;
22240   rtx par = NULL_RTX;
22241   rtx dwarf = NULL_RTX;
22242   rtx tmp, reg, tmp1;
22243   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22244 
22245   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22246     if (saved_regs_mask & (1 << i))
22247       num_regs++;
22248 
22249   gcc_assert (num_regs && num_regs <= 16);
22250 
22251   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
22252      to be popped.  So, if num_regs is even, now it will become odd,
22253      and we can generate pop with PC.  If num_regs is odd, it will be
22254      even now, and ldr with return can be generated for PC.  */
22255   if (return_in_pc)
22256     num_regs--;
22257 
22258   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22259 
22260   /* Var j iterates over all the registers to gather all the registers in
22261      saved_regs_mask.  Var i gives index of saved registers in stack frame.
22262      A PARALLEL RTX of register-pair is created here, so that pattern for
22263      LDRD can be matched.  As PC is always last register to be popped, and
22264      we have already decremented num_regs if PC, we don't have to worry
22265      about PC in this loop.  */
22266   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22267     if (saved_regs_mask & (1 << j))
22268       {
22269         /* Create RTX for memory load.  */
22270         reg = gen_rtx_REG (SImode, j);
22271         tmp = gen_rtx_SET (reg,
22272                            gen_frame_mem (SImode,
22273                                plus_constant (Pmode,
22274                                               stack_pointer_rtx, 4 * i)));
22275         RTX_FRAME_RELATED_P (tmp) = 1;
22276 
22277         if (i % 2 == 0)
22278           {
22279             /* When saved-register index (i) is even, the RTX to be emitted is
22280                yet to be created.  Hence create it first.  The LDRD pattern we
22281                are generating is :
22282                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22283                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22284                where target registers need not be consecutive.  */
22285             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22286             dwarf = NULL_RTX;
22287           }
22288 
22289         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
22290            added as 0th element and if i is odd, reg_i is added as 1st element
22291            of LDRD pattern shown above.  */
22292         XVECEXP (par, 0, (i % 2)) = tmp;
22293         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22294 
22295         if ((i % 2) == 1)
22296           {
22297             /* When saved-register index (i) is odd, RTXs for both the registers
22298                to be loaded are generated in above given LDRD pattern, and the
22299                pattern can be emitted now.  */
22300             par = emit_insn (par);
22301             REG_NOTES (par) = dwarf;
22302 	    RTX_FRAME_RELATED_P (par) = 1;
22303           }
22304 
22305         i++;
22306       }
22307 
22308   /* If the number of registers pushed is odd AND return_in_pc is false OR
22309      number of registers are even AND return_in_pc is true, last register is
22310      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
22311      then LDR with post increment.  */
22312 
22313   /* Increment the stack pointer, based on there being
22314      num_regs 4-byte registers to restore.  */
22315   tmp = gen_rtx_SET (stack_pointer_rtx,
22316                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22317   RTX_FRAME_RELATED_P (tmp) = 1;
22318   tmp = emit_insn (tmp);
22319   if (!return_in_pc)
22320     {
22321       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22322 				   stack_pointer_rtx, stack_pointer_rtx);
22323     }
22324 
22325   dwarf = NULL_RTX;
22326 
22327   if (((num_regs % 2) == 1 && !return_in_pc)
22328       || ((num_regs % 2) == 0 && return_in_pc))
22329     {
22330       /* Scan for the single register to be popped.  Skip until the saved
22331          register is found.  */
22332       for (; (saved_regs_mask & (1 << j)) == 0; j++);
22333 
22334       /* Gen LDR with post increment here.  */
22335       tmp1 = gen_rtx_MEM (SImode,
22336                           gen_rtx_POST_INC (SImode,
22337                                             stack_pointer_rtx));
22338       set_mem_alias_set (tmp1, get_frame_alias_set ());
22339 
22340       reg = gen_rtx_REG (SImode, j);
22341       tmp = gen_rtx_SET (reg, tmp1);
22342       RTX_FRAME_RELATED_P (tmp) = 1;
22343       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22344 
22345       if (return_in_pc)
22346         {
22347           /* If return_in_pc, j must be PC_REGNUM.  */
22348           gcc_assert (j == PC_REGNUM);
22349           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22350           XVECEXP (par, 0, 0) = ret_rtx;
22351           XVECEXP (par, 0, 1) = tmp;
22352           par = emit_jump_insn (par);
22353         }
22354       else
22355         {
22356           par = emit_insn (tmp);
22357 	  REG_NOTES (par) = dwarf;
22358 	  arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22359 				       stack_pointer_rtx, stack_pointer_rtx);
22360         }
22361 
22362     }
22363   else if ((num_regs % 2) == 1 && return_in_pc)
22364     {
22365       /* There are 2 registers to be popped.  So, generate the pattern
22366          pop_multiple_with_stack_update_and_return to pop in PC.  */
22367       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22368     }
22369 
22370   return;
22371 }
22372 
22373 /* LDRD in ARM mode needs consecutive registers as operands.  This function
22374    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22375    offset addressing and then generates one separate stack udpate. This provides
22376    more scheduling freedom, compared to writeback on every load.  However,
22377    if the function returns using load into PC directly
22378    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22379    before the last load.  TODO: Add a peephole optimization to recognize
22380    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
22381    peephole optimization to merge the load at stack-offset zero
22382    with the stack update instruction using load with writeback
22383    in post-index addressing mode.  */
22384 static void
arm_emit_ldrd_pop(unsigned long saved_regs_mask)22385 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22386 {
22387   int j = 0;
22388   int offset = 0;
22389   rtx par = NULL_RTX;
22390   rtx dwarf = NULL_RTX;
22391   rtx tmp, mem;
22392 
22393   /* Restore saved registers.  */
22394   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22395   j = 0;
22396   while (j <= LAST_ARM_REGNUM)
22397     if (saved_regs_mask & (1 << j))
22398       {
22399         if ((j % 2) == 0
22400             && (saved_regs_mask & (1 << (j + 1)))
22401             && (j + 1) != PC_REGNUM)
22402           {
22403             /* Current register and next register form register pair for which
22404                LDRD can be generated. PC is always the last register popped, and
22405                we handle it separately.  */
22406             if (offset > 0)
22407               mem = gen_frame_mem (DImode,
22408                                    plus_constant (Pmode,
22409                                                   stack_pointer_rtx,
22410                                                   offset));
22411             else
22412               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22413 
22414             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22415             tmp = emit_insn (tmp);
22416 	    RTX_FRAME_RELATED_P (tmp) = 1;
22417 
22418             /* Generate dwarf info.  */
22419 
22420             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22421                                     gen_rtx_REG (SImode, j),
22422                                     NULL_RTX);
22423             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22424                                     gen_rtx_REG (SImode, j + 1),
22425                                     dwarf);
22426 
22427             REG_NOTES (tmp) = dwarf;
22428 
22429             offset += 8;
22430             j += 2;
22431           }
22432         else if (j != PC_REGNUM)
22433           {
22434             /* Emit a single word load.  */
22435             if (offset > 0)
22436               mem = gen_frame_mem (SImode,
22437                                    plus_constant (Pmode,
22438                                                   stack_pointer_rtx,
22439                                                   offset));
22440             else
22441               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22442 
22443             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22444             tmp = emit_insn (tmp);
22445 	    RTX_FRAME_RELATED_P (tmp) = 1;
22446 
22447             /* Generate dwarf info.  */
22448             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22449                                               gen_rtx_REG (SImode, j),
22450                                               NULL_RTX);
22451 
22452             offset += 4;
22453             j += 1;
22454           }
22455         else /* j == PC_REGNUM */
22456           j++;
22457       }
22458     else
22459       j++;
22460 
22461   /* Update the stack.  */
22462   if (offset > 0)
22463     {
22464       tmp = gen_rtx_SET (stack_pointer_rtx,
22465                          plus_constant (Pmode,
22466                                         stack_pointer_rtx,
22467                                         offset));
22468       tmp = emit_insn (tmp);
22469       arm_add_cfa_adjust_cfa_note (tmp, offset,
22470 				   stack_pointer_rtx, stack_pointer_rtx);
22471       offset = 0;
22472     }
22473 
22474   if (saved_regs_mask & (1 << PC_REGNUM))
22475     {
22476       /* Only PC is to be popped.  */
22477       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22478       XVECEXP (par, 0, 0) = ret_rtx;
22479       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22480                          gen_frame_mem (SImode,
22481                                         gen_rtx_POST_INC (SImode,
22482                                                           stack_pointer_rtx)));
22483       RTX_FRAME_RELATED_P (tmp) = 1;
22484       XVECEXP (par, 0, 1) = tmp;
22485       par = emit_jump_insn (par);
22486 
22487       /* Generate dwarf info.  */
22488       dwarf = alloc_reg_note (REG_CFA_RESTORE,
22489                               gen_rtx_REG (SImode, PC_REGNUM),
22490                               NULL_RTX);
22491       REG_NOTES (par) = dwarf;
22492       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22493 				   stack_pointer_rtx, stack_pointer_rtx);
22494     }
22495 }
22496 
22497 /* Calculate the size of the return value that is passed in registers.  */
22498 static unsigned
arm_size_return_regs(void)22499 arm_size_return_regs (void)
22500 {
22501   machine_mode mode;
22502 
22503   if (crtl->return_rtx != 0)
22504     mode = GET_MODE (crtl->return_rtx);
22505   else
22506     mode = DECL_MODE (DECL_RESULT (current_function_decl));
22507 
22508   return GET_MODE_SIZE (mode);
22509 }
22510 
22511 /* Return true if the current function needs to save/restore LR.  */
22512 static bool
thumb_force_lr_save(void)22513 thumb_force_lr_save (void)
22514 {
22515   return !cfun->machine->lr_save_eliminated
22516 	 && (!crtl->is_leaf
22517 	     || thumb_far_jump_used_p ()
22518 	     || df_regs_ever_live_p (LR_REGNUM));
22519 }
22520 
22521 /* We do not know if r3 will be available because
22522    we do have an indirect tailcall happening in this
22523    particular case.  */
22524 static bool
is_indirect_tailcall_p(rtx call)22525 is_indirect_tailcall_p (rtx call)
22526 {
22527   rtx pat = PATTERN (call);
22528 
22529   /* Indirect tail call.  */
22530   pat = XVECEXP (pat, 0, 0);
22531   if (GET_CODE (pat) == SET)
22532     pat = SET_SRC (pat);
22533 
22534   pat = XEXP (XEXP (pat, 0), 0);
22535   return REG_P (pat);
22536 }
22537 
22538 /* Return true if r3 is used by any of the tail call insns in the
22539    current function.  */
22540 static bool
any_sibcall_could_use_r3(void)22541 any_sibcall_could_use_r3 (void)
22542 {
22543   edge_iterator ei;
22544   edge e;
22545 
22546   if (!crtl->tail_call_emit)
22547     return false;
22548   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22549     if (e->flags & EDGE_SIBCALL)
22550       {
22551 	rtx_insn *call = BB_END (e->src);
22552 	if (!CALL_P (call))
22553 	  call = prev_nonnote_nondebug_insn (call);
22554 	gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22555 	if (find_regno_fusage (call, USE, 3)
22556 	    || is_indirect_tailcall_p (call))
22557 	  return true;
22558       }
22559   return false;
22560 }
22561 
22562 
22563 /* Compute the distance from register FROM to register TO.
22564    These can be the arg pointer (26), the soft frame pointer (25),
22565    the stack pointer (13) or the hard frame pointer (11).
22566    In thumb mode r7 is used as the soft frame pointer, if needed.
22567    Typical stack layout looks like this:
22568 
22569        old stack pointer -> |    |
22570                              ----
22571                             |    | \
22572                             |    |   saved arguments for
22573                             |    |   vararg functions
22574 			    |    | /
22575                               --
22576    hard FP & arg pointer -> |    | \
22577                             |    |   stack
22578                             |    |   frame
22579                             |    | /
22580                               --
22581                             |    | \
22582                             |    |   call saved
22583                             |    |   registers
22584       soft frame pointer -> |    | /
22585                               --
22586                             |    | \
22587                             |    |   local
22588                             |    |   variables
22589      locals base pointer -> |    | /
22590                               --
22591                             |    | \
22592                             |    |   outgoing
22593                             |    |   arguments
22594    current stack pointer -> |    | /
22595                               --
22596 
22597   For a given function some or all of these stack components
22598   may not be needed, giving rise to the possibility of
22599   eliminating some of the registers.
22600 
22601   The values returned by this function must reflect the behavior
22602   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22603 
22604   The sign of the number returned reflects the direction of stack
22605   growth, so the values are positive for all eliminations except
22606   from the soft frame pointer to the hard frame pointer.
22607 
22608   SFP may point just inside the local variables block to ensure correct
22609   alignment.  */
22610 
22611 
22612 /* Return cached stack offsets.  */
22613 
22614 static arm_stack_offsets *
arm_get_frame_offsets(void)22615 arm_get_frame_offsets (void)
22616 {
22617   struct arm_stack_offsets *offsets;
22618 
22619   offsets = &cfun->machine->stack_offsets;
22620 
22621   return offsets;
22622 }
22623 
22624 
22625 /* Calculate stack offsets.  These are used to calculate register elimination
22626    offsets and in prologue/epilogue code.  Also calculates which registers
22627    should be saved.  */
22628 
22629 static void
arm_compute_frame_layout(void)22630 arm_compute_frame_layout (void)
22631 {
22632   struct arm_stack_offsets *offsets;
22633   unsigned long func_type;
22634   int saved;
22635   int core_saved;
22636   HOST_WIDE_INT frame_size;
22637   int i;
22638 
22639   offsets = &cfun->machine->stack_offsets;
22640 
22641   /* Initially this is the size of the local variables.  It will translated
22642      into an offset once we have determined the size of preceding data.  */
22643   frame_size = ROUND_UP_WORD (get_frame_size ());
22644 
22645   /* Space for variadic functions.  */
22646   offsets->saved_args = crtl->args.pretend_args_size;
22647 
22648   /* In Thumb mode this is incorrect, but never used.  */
22649   offsets->frame
22650     = (offsets->saved_args
22651        + arm_compute_static_chain_stack_bytes ()
22652        + (frame_pointer_needed ? 4 : 0));
22653 
22654   if (TARGET_32BIT)
22655     {
22656       unsigned int regno;
22657 
22658       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22659       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22660       saved = core_saved;
22661 
22662       /* We know that SP will be doubleword aligned on entry, and we must
22663 	 preserve that condition at any subroutine call.  We also require the
22664 	 soft frame pointer to be doubleword aligned.  */
22665 
22666       if (TARGET_REALLY_IWMMXT)
22667 	{
22668 	  /* Check for the call-saved iWMMXt registers.  */
22669 	  for (regno = FIRST_IWMMXT_REGNUM;
22670 	       regno <= LAST_IWMMXT_REGNUM;
22671 	       regno++)
22672 	    if (reg_needs_saving_p (regno))
22673 	      saved += 8;
22674 	}
22675 
22676       func_type = arm_current_func_type ();
22677       /* Space for saved VFP registers.  */
22678       if (! IS_VOLATILE (func_type)
22679 	  && TARGET_VFP_BASE)
22680 	saved += arm_get_vfp_saved_size ();
22681 
22682       /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22683 	 nonecure entry functions with VSTR/VLDR.  */
22684       if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
22685 	saved += 4;
22686     }
22687   else /* TARGET_THUMB1 */
22688     {
22689       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
22690       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22691       saved = core_saved;
22692       if (TARGET_BACKTRACE)
22693 	saved += 16;
22694     }
22695 
22696   /* Saved registers include the stack frame.  */
22697   offsets->saved_regs
22698     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
22699   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
22700 
22701   /* A leaf function does not need any stack alignment if it has nothing
22702      on the stack.  */
22703   if (crtl->is_leaf && frame_size == 0
22704       /* However if it calls alloca(), we have a dynamically allocated
22705 	 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
22706       && ! cfun->calls_alloca)
22707     {
22708       offsets->outgoing_args = offsets->soft_frame;
22709       offsets->locals_base = offsets->soft_frame;
22710       return;
22711     }
22712 
22713   /* Ensure SFP has the correct alignment.  */
22714   if (ARM_DOUBLEWORD_ALIGN
22715       && (offsets->soft_frame & 7))
22716     {
22717       offsets->soft_frame += 4;
22718       /* Try to align stack by pushing an extra reg.  Don't bother doing this
22719          when there is a stack frame as the alignment will be rolled into
22720 	 the normal stack adjustment.  */
22721       if (frame_size + crtl->outgoing_args_size == 0)
22722 	{
22723 	  int reg = -1;
22724 
22725 	  /* Register r3 is caller-saved.  Normally it does not need to be
22726 	     saved on entry by the prologue.  However if we choose to save
22727 	     it for padding then we may confuse the compiler into thinking
22728 	     a prologue sequence is required when in fact it is not.  This
22729 	     will occur when shrink-wrapping if r3 is used as a scratch
22730 	     register and there are no other callee-saved writes.
22731 
22732 	     This situation can be avoided when other callee-saved registers
22733 	     are available and r3 is not mandatory if we choose a callee-saved
22734 	     register for padding.  */
22735 	  bool prefer_callee_reg_p = false;
22736 
22737 	  /* If it is safe to use r3, then do so.  This sometimes
22738 	     generates better code on Thumb-2 by avoiding the need to
22739 	     use 32-bit push/pop instructions.  */
22740           if (! any_sibcall_could_use_r3 ()
22741 	      && arm_size_return_regs () <= 12
22742 	      && (offsets->saved_regs_mask & (1 << 3)) == 0
22743 	      && (TARGET_THUMB2
22744 		  || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
22745 	    {
22746 	      reg = 3;
22747 	      if (!TARGET_THUMB2)
22748 		prefer_callee_reg_p = true;
22749 	    }
22750 	  if (reg == -1
22751 	      || prefer_callee_reg_p)
22752 	    {
22753 	      for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
22754 		{
22755 		  /* Avoid fixed registers; they may be changed at
22756 		     arbitrary times so it's unsafe to restore them
22757 		     during the epilogue.  */
22758 		  if (!fixed_regs[i]
22759 		      && (offsets->saved_regs_mask & (1 << i)) == 0)
22760 		    {
22761 		      reg = i;
22762 		      break;
22763 		    }
22764 		}
22765 	    }
22766 
22767 	  if (reg != -1)
22768 	    {
22769 	      offsets->saved_regs += 4;
22770 	      offsets->saved_regs_mask |= (1 << reg);
22771 	    }
22772 	}
22773     }
22774 
22775   offsets->locals_base = offsets->soft_frame + frame_size;
22776   offsets->outgoing_args = (offsets->locals_base
22777 			    + crtl->outgoing_args_size);
22778 
22779   if (ARM_DOUBLEWORD_ALIGN)
22780     {
22781       /* Ensure SP remains doubleword aligned.  */
22782       if (offsets->outgoing_args & 7)
22783 	offsets->outgoing_args += 4;
22784       gcc_assert (!(offsets->outgoing_args & 7));
22785     }
22786 }
22787 
22788 
22789 /* Calculate the relative offsets for the different stack pointers.  Positive
22790    offsets are in the direction of stack growth.  */
22791 
22792 HOST_WIDE_INT
arm_compute_initial_elimination_offset(unsigned int from,unsigned int to)22793 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22794 {
22795   arm_stack_offsets *offsets;
22796 
22797   offsets = arm_get_frame_offsets ();
22798 
22799   /* OK, now we have enough information to compute the distances.
22800      There must be an entry in these switch tables for each pair
22801      of registers in ELIMINABLE_REGS, even if some of the entries
22802      seem to be redundant or useless.  */
22803   switch (from)
22804     {
22805     case ARG_POINTER_REGNUM:
22806       switch (to)
22807 	{
22808 	case THUMB_HARD_FRAME_POINTER_REGNUM:
22809 	  return 0;
22810 
22811 	case FRAME_POINTER_REGNUM:
22812 	  /* This is the reverse of the soft frame pointer
22813 	     to hard frame pointer elimination below.  */
22814 	  return offsets->soft_frame - offsets->saved_args;
22815 
22816 	case ARM_HARD_FRAME_POINTER_REGNUM:
22817 	  /* This is only non-zero in the case where the static chain register
22818 	     is stored above the frame.  */
22819 	  return offsets->frame - offsets->saved_args - 4;
22820 
22821 	case STACK_POINTER_REGNUM:
22822 	  /* If nothing has been pushed on the stack at all
22823 	     then this will return -4.  This *is* correct!  */
22824 	  return offsets->outgoing_args - (offsets->saved_args + 4);
22825 
22826 	default:
22827 	  gcc_unreachable ();
22828 	}
22829       gcc_unreachable ();
22830 
22831     case FRAME_POINTER_REGNUM:
22832       switch (to)
22833 	{
22834 	case THUMB_HARD_FRAME_POINTER_REGNUM:
22835 	  return 0;
22836 
22837 	case ARM_HARD_FRAME_POINTER_REGNUM:
22838 	  /* The hard frame pointer points to the top entry in the
22839 	     stack frame.  The soft frame pointer to the bottom entry
22840 	     in the stack frame.  If there is no stack frame at all,
22841 	     then they are identical.  */
22842 
22843 	  return offsets->frame - offsets->soft_frame;
22844 
22845 	case STACK_POINTER_REGNUM:
22846 	  return offsets->outgoing_args - offsets->soft_frame;
22847 
22848 	default:
22849 	  gcc_unreachable ();
22850 	}
22851       gcc_unreachable ();
22852 
22853     default:
22854       /* You cannot eliminate from the stack pointer.
22855 	 In theory you could eliminate from the hard frame
22856 	 pointer to the stack pointer, but this will never
22857 	 happen, since if a stack frame is not needed the
22858 	 hard frame pointer will never be used.  */
22859       gcc_unreachable ();
22860     }
22861 }
22862 
22863 /* Given FROM and TO register numbers, say whether this elimination is
22864    allowed.  Frame pointer elimination is automatically handled.
22865 
22866    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
22867    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
22868    pointer, we must eliminate FRAME_POINTER_REGNUM into
22869    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
22870    ARG_POINTER_REGNUM.  */
22871 
22872 bool
arm_can_eliminate(const int from,const int to)22873 arm_can_eliminate (const int from, const int to)
22874 {
22875   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
22876           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
22877           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
22878           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
22879            true);
22880 }
22881 
22882 /* Emit RTL to save coprocessor registers on function entry.  Returns the
22883    number of bytes pushed.  */
22884 
22885 static int
arm_save_coproc_regs(void)22886 arm_save_coproc_regs(void)
22887 {
22888   int saved_size = 0;
22889   unsigned reg;
22890   unsigned start_reg;
22891   rtx insn;
22892 
22893   if (TARGET_REALLY_IWMMXT)
22894   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
22895     if (reg_needs_saving_p (reg))
22896       {
22897 	insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22898 	insn = gen_rtx_MEM (V2SImode, insn);
22899 	insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
22900 	RTX_FRAME_RELATED_P (insn) = 1;
22901 	saved_size += 8;
22902       }
22903 
22904   if (TARGET_VFP_BASE)
22905     {
22906       start_reg = FIRST_VFP_REGNUM;
22907 
22908       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
22909 	{
22910 	  if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
22911 	    {
22912 	      if (start_reg != reg)
22913 		saved_size += vfp_emit_fstmd (start_reg,
22914 					      (reg - start_reg) / 2);
22915 	      start_reg = reg + 2;
22916 	    }
22917 	}
22918       if (start_reg != reg)
22919 	saved_size += vfp_emit_fstmd (start_reg,
22920 				      (reg - start_reg) / 2);
22921     }
22922   return saved_size;
22923 }
22924 
22925 
22926 /* Set the Thumb frame pointer from the stack pointer.  */
22927 
22928 static void
thumb_set_frame_pointer(arm_stack_offsets * offsets)22929 thumb_set_frame_pointer (arm_stack_offsets *offsets)
22930 {
22931   HOST_WIDE_INT amount;
22932   rtx insn, dwarf;
22933 
22934   amount = offsets->outgoing_args - offsets->locals_base;
22935   if (amount < 1024)
22936     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22937 				  stack_pointer_rtx, GEN_INT (amount)));
22938   else
22939     {
22940       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
22941       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
22942          expects the first two operands to be the same.  */
22943       if (TARGET_THUMB2)
22944 	{
22945 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22946 					stack_pointer_rtx,
22947 					hard_frame_pointer_rtx));
22948 	}
22949       else
22950 	{
22951 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22952 					hard_frame_pointer_rtx,
22953 					stack_pointer_rtx));
22954 	}
22955       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
22956 			   plus_constant (Pmode, stack_pointer_rtx, amount));
22957       RTX_FRAME_RELATED_P (dwarf) = 1;
22958       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22959     }
22960 
22961   RTX_FRAME_RELATED_P (insn) = 1;
22962 }
22963 
22964 struct scratch_reg {
22965   rtx reg;
22966   bool saved;
22967 };
22968 
22969 /* Return a short-lived scratch register for use as a 2nd scratch register on
22970    function entry after the registers are saved in the prologue.  This register
22971    must be released by means of release_scratch_register_on_entry.  IP is not
22972    considered since it is always used as the 1st scratch register if available.
22973 
22974    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
22975    mask of live registers.  */
22976 
22977 static void
get_scratch_register_on_entry(struct scratch_reg * sr,unsigned int regno1,unsigned long live_regs)22978 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
22979 			       unsigned long live_regs)
22980 {
22981   int regno = -1;
22982 
22983   sr->saved = false;
22984 
22985   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
22986     regno = LR_REGNUM;
22987   else
22988     {
22989       unsigned int i;
22990 
22991       for (i = 4; i < 11; i++)
22992 	if (regno1 != i && (live_regs & (1 << i)) != 0)
22993 	  {
22994 	    regno = i;
22995 	    break;
22996 	  }
22997 
22998       if (regno < 0)
22999 	{
23000 	  /* If IP is used as the 1st scratch register for a nested function,
23001 	     then either r3 wasn't available or is used to preserve IP.  */
23002 	  if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23003 	    regno1 = 3;
23004 	  regno = (regno1 == 3 ? 2 : 3);
23005 	  sr->saved
23006 	    = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23007 			       regno);
23008 	}
23009     }
23010 
23011   sr->reg = gen_rtx_REG (SImode, regno);
23012   if (sr->saved)
23013     {
23014       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23015       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23016       rtx x = gen_rtx_SET (stack_pointer_rtx,
23017 		           plus_constant (Pmode, stack_pointer_rtx, -4));
23018       RTX_FRAME_RELATED_P (insn) = 1;
23019       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23020     }
23021 }
23022 
23023 /* Release a scratch register obtained from the preceding function.  */
23024 
23025 static void
release_scratch_register_on_entry(struct scratch_reg * sr)23026 release_scratch_register_on_entry (struct scratch_reg *sr)
23027 {
23028   if (sr->saved)
23029     {
23030       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23031       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23032       rtx x = gen_rtx_SET (stack_pointer_rtx,
23033 			   plus_constant (Pmode, stack_pointer_rtx, 4));
23034       RTX_FRAME_RELATED_P (insn) = 1;
23035       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23036     }
23037 }
23038 
23039 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23040 
23041 #if PROBE_INTERVAL > 4096
23042 #error Cannot use indexed addressing mode for stack probing
23043 #endif
23044 
23045 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23046    inclusive.  These are offsets from the current stack pointer.  REGNO1
23047    is the index number of the 1st scratch register and LIVE_REGS is the
23048    mask of live registers.  */
23049 
23050 static void
arm_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size,unsigned int regno1,unsigned long live_regs)23051 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23052 			    unsigned int regno1, unsigned long live_regs)
23053 {
23054   rtx reg1 = gen_rtx_REG (Pmode, regno1);
23055 
23056   /* See if we have a constant small number of probes to generate.  If so,
23057      that's the easy case.  */
23058   if (size <= PROBE_INTERVAL)
23059     {
23060       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23061       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23062       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23063     }
23064 
23065   /* The run-time loop is made up of 10 insns in the generic case while the
23066      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
23067   else if (size <= 5 * PROBE_INTERVAL)
23068     {
23069       HOST_WIDE_INT i, rem;
23070 
23071       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23072       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23073       emit_stack_probe (reg1);
23074 
23075       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23076 	 it exceeds SIZE.  If only two probes are needed, this will not
23077 	 generate any code.  Then probe at FIRST + SIZE.  */
23078       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23079 	{
23080 	  emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23081 	  emit_stack_probe (reg1);
23082 	}
23083 
23084       rem = size - (i - PROBE_INTERVAL);
23085       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23086 	{
23087 	  emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23088 	  emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23089 	}
23090       else
23091 	emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23092     }
23093 
23094   /* Otherwise, do the same as above, but in a loop.  Note that we must be
23095      extra careful with variables wrapping around because we might be at
23096      the very top (or the very bottom) of the address space and we have
23097      to be able to handle this case properly; in particular, we use an
23098      equality test for the loop condition.  */
23099   else
23100     {
23101       HOST_WIDE_INT rounded_size;
23102       struct scratch_reg sr;
23103 
23104       get_scratch_register_on_entry (&sr, regno1, live_regs);
23105 
23106       emit_move_insn (reg1, GEN_INT (first));
23107 
23108 
23109       /* Step 1: round SIZE to the previous multiple of the interval.  */
23110 
23111       rounded_size = size & -PROBE_INTERVAL;
23112       emit_move_insn (sr.reg, GEN_INT (rounded_size));
23113 
23114 
23115       /* Step 2: compute initial and final value of the loop counter.  */
23116 
23117       /* TEST_ADDR = SP + FIRST.  */
23118       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23119 
23120       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
23121       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23122 
23123 
23124       /* Step 3: the loop
23125 
23126 	 do
23127 	   {
23128 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23129 	     probe at TEST_ADDR
23130 	   }
23131 	 while (TEST_ADDR != LAST_ADDR)
23132 
23133 	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23134 	 until it is equal to ROUNDED_SIZE.  */
23135 
23136       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23137 
23138 
23139       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23140 	 that SIZE is equal to ROUNDED_SIZE.  */
23141 
23142       if (size != rounded_size)
23143 	{
23144 	  HOST_WIDE_INT rem = size - rounded_size;
23145 
23146 	  if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23147 	    {
23148 	      emit_set_insn (sr.reg,
23149 			     plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23150 	      emit_stack_probe (plus_constant (Pmode, sr.reg,
23151 					       PROBE_INTERVAL - rem));
23152 	    }
23153 	  else
23154 	    emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23155 	}
23156 
23157       release_scratch_register_on_entry (&sr);
23158     }
23159 
23160   /* Make sure nothing is scheduled before we are done.  */
23161   emit_insn (gen_blockage ());
23162 }
23163 
23164 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
23165    absolute addresses.  */
23166 
23167 const char *
output_probe_stack_range(rtx reg1,rtx reg2)23168 output_probe_stack_range (rtx reg1, rtx reg2)
23169 {
23170   static int labelno = 0;
23171   char loop_lab[32];
23172   rtx xops[2];
23173 
23174   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23175 
23176   /* Loop.  */
23177   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23178 
23179   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
23180   xops[0] = reg1;
23181   xops[1] = GEN_INT (PROBE_INTERVAL);
23182   output_asm_insn ("sub\t%0, %0, %1", xops);
23183 
23184   /* Probe at TEST_ADDR.  */
23185   output_asm_insn ("str\tr0, [%0, #0]", xops);
23186 
23187   /* Test if TEST_ADDR == LAST_ADDR.  */
23188   xops[1] = reg2;
23189   output_asm_insn ("cmp\t%0, %1", xops);
23190 
23191   /* Branch.  */
23192   fputs ("\tbne\t", asm_out_file);
23193   assemble_name_raw (asm_out_file, loop_lab);
23194   fputc ('\n', asm_out_file);
23195 
23196   return "";
23197 }
23198 
23199 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23200    function.  */
23201 void
arm_expand_prologue(void)23202 arm_expand_prologue (void)
23203 {
23204   rtx amount;
23205   rtx insn;
23206   rtx ip_rtx;
23207   unsigned long live_regs_mask;
23208   unsigned long func_type;
23209   int fp_offset = 0;
23210   int saved_pretend_args = 0;
23211   int saved_regs = 0;
23212   unsigned HOST_WIDE_INT args_to_push;
23213   HOST_WIDE_INT size;
23214   arm_stack_offsets *offsets;
23215   bool clobber_ip;
23216 
23217   func_type = arm_current_func_type ();
23218 
23219   /* Naked functions don't have prologues.  */
23220   if (IS_NAKED (func_type))
23221     {
23222       if (flag_stack_usage_info)
23223 	current_function_static_stack_size = 0;
23224       return;
23225     }
23226 
23227   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
23228   args_to_push = crtl->args.pretend_args_size;
23229 
23230   /* Compute which register we will have to save onto the stack.  */
23231   offsets = arm_get_frame_offsets ();
23232   live_regs_mask = offsets->saved_regs_mask;
23233 
23234   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23235 
23236   if (IS_STACKALIGN (func_type))
23237     {
23238       rtx r0, r1;
23239 
23240       /* Handle a word-aligned stack pointer.  We generate the following:
23241 
23242 	  mov r0, sp
23243 	  bic r1, r0, #7
23244 	  mov sp, r1
23245 	  <save and restore r0 in normal prologue/epilogue>
23246 	  mov sp, r0
23247 	  bx lr
23248 
23249 	 The unwinder doesn't need to know about the stack realignment.
23250 	 Just tell it we saved SP in r0.  */
23251       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23252 
23253       r0 = gen_rtx_REG (SImode, R0_REGNUM);
23254       r1 = gen_rtx_REG (SImode, R1_REGNUM);
23255 
23256       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23257       RTX_FRAME_RELATED_P (insn) = 1;
23258       add_reg_note (insn, REG_CFA_REGISTER, NULL);
23259 
23260       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23261 
23262       /* ??? The CFA changes here, which may cause GDB to conclude that it
23263 	 has entered a different function.  That said, the unwind info is
23264 	 correct, individually, before and after this instruction because
23265 	 we've described the save of SP, which will override the default
23266 	 handling of SP as restoring from the CFA.  */
23267       emit_insn (gen_movsi (stack_pointer_rtx, r1));
23268     }
23269 
23270   /* Let's compute the static_chain_stack_bytes required and store it.  Right
23271      now the value must be -1 as stored by arm_init_machine_status ().  */
23272   cfun->machine->static_chain_stack_bytes
23273     = arm_compute_static_chain_stack_bytes ();
23274 
23275   /* The static chain register is the same as the IP register.  If it is
23276      clobbered when creating the frame, we need to save and restore it.  */
23277   clobber_ip = IS_NESTED (func_type)
23278 	       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23279 		   || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23280 			|| flag_stack_clash_protection)
23281 		       && !df_regs_ever_live_p (LR_REGNUM)
23282 		       && arm_r3_live_at_start_p ()));
23283 
23284   /* Find somewhere to store IP whilst the frame is being created.
23285      We try the following places in order:
23286 
23287        1. The last argument register r3 if it is available.
23288        2. A slot on the stack above the frame if there are no
23289 	  arguments to push onto the stack.
23290        3. Register r3 again, after pushing the argument registers
23291 	  onto the stack, if this is a varargs function.
23292        4. The last slot on the stack created for the arguments to
23293 	  push, if this isn't a varargs function.
23294 
23295      Note - we only need to tell the dwarf2 backend about the SP
23296      adjustment in the second variant; the static chain register
23297      doesn't need to be unwound, as it doesn't contain a value
23298      inherited from the caller.  */
23299   if (clobber_ip)
23300     {
23301       if (!arm_r3_live_at_start_p ())
23302 	insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23303       else if (args_to_push == 0)
23304 	{
23305 	  rtx addr, dwarf;
23306 
23307 	  gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
23308 	  saved_regs += 4;
23309 
23310 	  addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23311 	  insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23312 	  fp_offset = 4;
23313 
23314 	  /* Just tell the dwarf backend that we adjusted SP.  */
23315 	  dwarf = gen_rtx_SET (stack_pointer_rtx,
23316 			       plus_constant (Pmode, stack_pointer_rtx,
23317 					      -fp_offset));
23318 	  RTX_FRAME_RELATED_P (insn) = 1;
23319 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23320 	}
23321       else
23322 	{
23323 	  /* Store the args on the stack.  */
23324 	  if (cfun->machine->uses_anonymous_args)
23325 	    {
23326 	      insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23327 					  (0xf0 >> (args_to_push / 4)) & 0xf);
23328 	      emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23329 	      saved_pretend_args = 1;
23330 	    }
23331 	  else
23332 	    {
23333 	      rtx addr, dwarf;
23334 
23335 	      if (args_to_push == 4)
23336 		addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23337 	      else
23338 		addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23339 					   plus_constant (Pmode,
23340 							  stack_pointer_rtx,
23341 							  -args_to_push));
23342 
23343 	      insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23344 
23345 	      /* Just tell the dwarf backend that we adjusted SP.  */
23346 	      dwarf = gen_rtx_SET (stack_pointer_rtx,
23347 				   plus_constant (Pmode, stack_pointer_rtx,
23348 						  -args_to_push));
23349 	      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23350 	    }
23351 
23352 	  RTX_FRAME_RELATED_P (insn) = 1;
23353 	  fp_offset = args_to_push;
23354 	  args_to_push = 0;
23355 	}
23356     }
23357 
23358   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23359     {
23360       if (IS_INTERRUPT (func_type))
23361 	{
23362 	  /* Interrupt functions must not corrupt any registers.
23363 	     Creating a frame pointer however, corrupts the IP
23364 	     register, so we must push it first.  */
23365 	  emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23366 
23367 	  /* Do not set RTX_FRAME_RELATED_P on this insn.
23368 	     The dwarf stack unwinding code only wants to see one
23369 	     stack decrement per function, and this is not it.  If
23370 	     this instruction is labeled as being part of the frame
23371 	     creation sequence then dwarf2out_frame_debug_expr will
23372 	     die when it encounters the assignment of IP to FP
23373 	     later on, since the use of SP here establishes SP as
23374 	     the CFA register and not IP.
23375 
23376 	     Anyway this instruction is not really part of the stack
23377 	     frame creation although it is part of the prologue.  */
23378 	}
23379 
23380       insn = emit_set_insn (ip_rtx,
23381 			    plus_constant (Pmode, stack_pointer_rtx,
23382 					   fp_offset));
23383       RTX_FRAME_RELATED_P (insn) = 1;
23384     }
23385 
23386   /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR.  */
23387   if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23388     {
23389       saved_regs += 4;
23390       insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23391 						GEN_INT (FPCXTNS_ENUM)));
23392       rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23393 			  plus_constant (Pmode, stack_pointer_rtx, -4));
23394       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23395       RTX_FRAME_RELATED_P (insn) = 1;
23396     }
23397 
23398   if (args_to_push)
23399     {
23400       /* Push the argument registers, or reserve space for them.  */
23401       if (cfun->machine->uses_anonymous_args)
23402 	insn = emit_multi_reg_push
23403 	  ((0xf0 >> (args_to_push / 4)) & 0xf,
23404 	   (0xf0 >> (args_to_push / 4)) & 0xf);
23405       else
23406 	insn = emit_insn
23407 	  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23408 		       GEN_INT (- args_to_push)));
23409       RTX_FRAME_RELATED_P (insn) = 1;
23410     }
23411 
23412   /* If this is an interrupt service routine, and the link register
23413      is going to be pushed, and we're not generating extra
23414      push of IP (needed when frame is needed and frame layout if apcs),
23415      subtracting four from LR now will mean that the function return
23416      can be done with a single instruction.  */
23417   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23418       && (live_regs_mask & (1 << LR_REGNUM)) != 0
23419       && !(frame_pointer_needed && TARGET_APCS_FRAME)
23420       && TARGET_ARM)
23421     {
23422       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23423 
23424       emit_set_insn (lr, plus_constant (SImode, lr, -4));
23425     }
23426 
23427   if (live_regs_mask)
23428     {
23429       unsigned long dwarf_regs_mask = live_regs_mask;
23430 
23431       saved_regs += bit_count (live_regs_mask) * 4;
23432       if (optimize_size && !frame_pointer_needed
23433 	  && saved_regs == offsets->saved_regs - offsets->saved_args)
23434 	{
23435 	  /* If no coprocessor registers are being pushed and we don't have
23436 	     to worry about a frame pointer then push extra registers to
23437 	     create the stack frame.  This is done in a way that does not
23438 	     alter the frame layout, so is independent of the epilogue.  */
23439 	  int n;
23440 	  int frame;
23441 	  n = 0;
23442 	  while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23443 	    n++;
23444 	  frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23445 	  if (frame && n * 4 >= frame)
23446 	    {
23447 	      n = frame / 4;
23448 	      live_regs_mask |= (1 << n) - 1;
23449 	      saved_regs += frame;
23450 	    }
23451 	}
23452 
23453       if (TARGET_LDRD
23454 	  && current_tune->prefer_ldrd_strd
23455           && !optimize_function_for_size_p (cfun))
23456         {
23457 	  gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23458           if (TARGET_THUMB2)
23459 	    thumb2_emit_strd_push (live_regs_mask);
23460           else if (TARGET_ARM
23461                    && !TARGET_APCS_FRAME
23462                    && !IS_INTERRUPT (func_type))
23463 	    arm_emit_strd_push (live_regs_mask);
23464           else
23465             {
23466 	      insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23467               RTX_FRAME_RELATED_P (insn) = 1;
23468             }
23469         }
23470       else
23471         {
23472 	  insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23473           RTX_FRAME_RELATED_P (insn) = 1;
23474         }
23475     }
23476 
23477   if (! IS_VOLATILE (func_type))
23478     saved_regs += arm_save_coproc_regs ();
23479 
23480   if (frame_pointer_needed && TARGET_ARM)
23481     {
23482       /* Create the new frame pointer.  */
23483       if (TARGET_APCS_FRAME)
23484 	{
23485 	  insn = GEN_INT (-(4 + args_to_push + fp_offset));
23486 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23487 	  RTX_FRAME_RELATED_P (insn) = 1;
23488 	}
23489       else
23490 	{
23491 	  insn = GEN_INT (saved_regs - (4 + fp_offset));
23492 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23493 					stack_pointer_rtx, insn));
23494 	  RTX_FRAME_RELATED_P (insn) = 1;
23495 	}
23496     }
23497 
23498   size = offsets->outgoing_args - offsets->saved_args;
23499   if (flag_stack_usage_info)
23500     current_function_static_stack_size = size;
23501 
23502   /* If this isn't an interrupt service routine and we have a frame, then do
23503      stack checking.  We use IP as the first scratch register, except for the
23504      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
23505   if (!IS_INTERRUPT (func_type)
23506       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23507 	  || flag_stack_clash_protection))
23508     {
23509       unsigned int regno;
23510 
23511       if (!IS_NESTED (func_type) || clobber_ip)
23512 	regno = IP_REGNUM;
23513       else if (df_regs_ever_live_p (LR_REGNUM))
23514 	regno = LR_REGNUM;
23515       else
23516 	regno = 3;
23517 
23518       if (crtl->is_leaf && !cfun->calls_alloca)
23519 	{
23520 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23521 	    arm_emit_probe_stack_range (get_stack_check_protect (),
23522 					size - get_stack_check_protect (),
23523 					regno, live_regs_mask);
23524 	}
23525       else if (size > 0)
23526 	arm_emit_probe_stack_range (get_stack_check_protect (), size,
23527 				    regno, live_regs_mask);
23528     }
23529 
23530   /* Recover the static chain register.  */
23531   if (clobber_ip)
23532     {
23533       if (!arm_r3_live_at_start_p () || saved_pretend_args)
23534 	insn = gen_rtx_REG (SImode, 3);
23535       else
23536 	{
23537 	  insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23538 	  insn = gen_frame_mem (SImode, insn);
23539 	}
23540       emit_set_insn (ip_rtx, insn);
23541       emit_insn (gen_force_register_use (ip_rtx));
23542     }
23543 
23544   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23545     {
23546       /* This add can produce multiple insns for a large constant, so we
23547 	 need to get tricky.  */
23548       rtx_insn *last = get_last_insn ();
23549 
23550       amount = GEN_INT (offsets->saved_args + saved_regs
23551 			- offsets->outgoing_args);
23552 
23553       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23554 				    amount));
23555       do
23556 	{
23557 	  last = last ? NEXT_INSN (last) : get_insns ();
23558 	  RTX_FRAME_RELATED_P (last) = 1;
23559 	}
23560       while (last != insn);
23561 
23562       /* If the frame pointer is needed, emit a special barrier that
23563 	 will prevent the scheduler from moving stores to the frame
23564 	 before the stack adjustment.  */
23565       if (frame_pointer_needed)
23566 	emit_insn (gen_stack_tie (stack_pointer_rtx,
23567 				  hard_frame_pointer_rtx));
23568     }
23569 
23570 
23571   if (frame_pointer_needed && TARGET_THUMB2)
23572     thumb_set_frame_pointer (offsets);
23573 
23574   if (flag_pic && arm_pic_register != INVALID_REGNUM)
23575     {
23576       unsigned long mask;
23577 
23578       mask = live_regs_mask;
23579       mask &= THUMB2_WORK_REGS;
23580       if (!IS_NESTED (func_type))
23581 	mask |= (1 << IP_REGNUM);
23582       arm_load_pic_register (mask, NULL_RTX);
23583     }
23584 
23585   /* If we are profiling, make sure no instructions are scheduled before
23586      the call to mcount.  Similarly if the user has requested no
23587      scheduling in the prolog.  Similarly if we want non-call exceptions
23588      using the EABI unwinder, to prevent faulting instructions from being
23589      swapped with a stack adjustment.  */
23590   if (crtl->profile || !TARGET_SCHED_PROLOG
23591       || (arm_except_unwind_info (&global_options) == UI_TARGET
23592 	  && cfun->can_throw_non_call_exceptions))
23593     emit_insn (gen_blockage ());
23594 
23595   /* If the link register is being kept alive, with the return address in it,
23596      then make sure that it does not get reused by the ce2 pass.  */
23597   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23598     cfun->machine->lr_save_eliminated = 1;
23599 }
23600 
23601 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
23602 static void
arm_print_condition(FILE * stream)23603 arm_print_condition (FILE *stream)
23604 {
23605   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23606     {
23607       /* Branch conversion is not implemented for Thumb-2.  */
23608       if (TARGET_THUMB)
23609 	{
23610 	  output_operand_lossage ("predicated Thumb instruction");
23611 	  return;
23612 	}
23613       if (current_insn_predicate != NULL)
23614 	{
23615 	  output_operand_lossage
23616 	    ("predicated instruction in conditional sequence");
23617 	  return;
23618 	}
23619 
23620       fputs (arm_condition_codes[arm_current_cc], stream);
23621     }
23622   else if (current_insn_predicate)
23623     {
23624       enum arm_cond_code code;
23625 
23626       if (TARGET_THUMB1)
23627 	{
23628 	  output_operand_lossage ("predicated Thumb instruction");
23629 	  return;
23630 	}
23631 
23632       code = get_arm_condition_code (current_insn_predicate);
23633       fputs (arm_condition_codes[code], stream);
23634     }
23635 }
23636 
23637 
23638 /* Globally reserved letters: acln
23639    Puncutation letters currently used: @_|?().!#
23640    Lower case letters currently used: bcdefhimpqtvwxyz
23641    Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTU
23642    Letters previously used, but now deprecated/obsolete: sVWXYZ.
23643 
23644    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23645 
23646    If CODE is 'd', then the X is a condition operand and the instruction
23647    should only be executed if the condition is true.
23648    if CODE is 'D', then the X is a condition operand and the instruction
23649    should only be executed if the condition is false: however, if the mode
23650    of the comparison is CCFPEmode, then always execute the instruction -- we
23651    do this because in these circumstances !GE does not necessarily imply LT;
23652    in these cases the instruction pattern will take care to make sure that
23653    an instruction containing %d will follow, thereby undoing the effects of
23654    doing this instruction unconditionally.
23655    If CODE is 'N' then X is a floating point operand that must be negated
23656    before output.
23657    If CODE is 'B' then output a bitwise inverted value of X (a const int).
23658    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
23659 static void
arm_print_operand(FILE * stream,rtx x,int code)23660 arm_print_operand (FILE *stream, rtx x, int code)
23661 {
23662   switch (code)
23663     {
23664     case '@':
23665       fputs (ASM_COMMENT_START, stream);
23666       return;
23667 
23668     case '_':
23669       fputs (user_label_prefix, stream);
23670       return;
23671 
23672     case '|':
23673       fputs (REGISTER_PREFIX, stream);
23674       return;
23675 
23676     case '?':
23677       arm_print_condition (stream);
23678       return;
23679 
23680     case '.':
23681       /* The current condition code for a condition code setting instruction.
23682 	 Preceded by 's' in unified syntax, otherwise followed by 's'.  */
23683       fputc('s', stream);
23684       arm_print_condition (stream);
23685       return;
23686 
23687     case '!':
23688       /* If the instruction is conditionally executed then print
23689 	 the current condition code, otherwise print 's'.  */
23690       gcc_assert (TARGET_THUMB2);
23691       if (current_insn_predicate)
23692 	arm_print_condition (stream);
23693       else
23694 	fputc('s', stream);
23695       break;
23696 
23697     /* %# is a "break" sequence. It doesn't output anything, but is used to
23698        separate e.g. operand numbers from following text, if that text consists
23699        of further digits which we don't want to be part of the operand
23700        number.  */
23701     case '#':
23702       return;
23703 
23704     case 'N':
23705       {
23706 	REAL_VALUE_TYPE r;
23707 	r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
23708 	fprintf (stream, "%s", fp_const_from_val (&r));
23709       }
23710       return;
23711 
23712     /* An integer or symbol address without a preceding # sign.  */
23713     case 'c':
23714       switch (GET_CODE (x))
23715 	{
23716 	case CONST_INT:
23717 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
23718 	  break;
23719 
23720 	case SYMBOL_REF:
23721 	  output_addr_const (stream, x);
23722 	  break;
23723 
23724 	case CONST:
23725 	  if (GET_CODE (XEXP (x, 0)) == PLUS
23726 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
23727 	    {
23728 	      output_addr_const (stream, x);
23729 	      break;
23730 	    }
23731 	  /* Fall through.  */
23732 
23733 	default:
23734 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
23735 	}
23736       return;
23737 
23738     /* An integer that we want to print in HEX.  */
23739     case 'x':
23740       switch (GET_CODE (x))
23741 	{
23742 	case CONST_INT:
23743 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
23744 	  break;
23745 
23746 	default:
23747 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
23748 	}
23749       return;
23750 
23751     case 'B':
23752       if (CONST_INT_P (x))
23753 	{
23754 	  HOST_WIDE_INT val;
23755 	  val = ARM_SIGN_EXTEND (~INTVAL (x));
23756 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
23757 	}
23758       else
23759 	{
23760 	  putc ('~', stream);
23761 	  output_addr_const (stream, x);
23762 	}
23763       return;
23764 
23765     case 'b':
23766       /* Print the log2 of a CONST_INT.  */
23767       {
23768 	HOST_WIDE_INT val;
23769 
23770 	if (!CONST_INT_P (x)
23771 	    || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
23772 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
23773 	else
23774 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23775       }
23776       return;
23777 
23778     case 'L':
23779       /* The low 16 bits of an immediate constant.  */
23780       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
23781       return;
23782 
23783     case 'i':
23784       fprintf (stream, "%s", arithmetic_instr (x, 1));
23785       return;
23786 
23787     case 'I':
23788       fprintf (stream, "%s", arithmetic_instr (x, 0));
23789       return;
23790 
23791     case 'S':
23792       {
23793 	HOST_WIDE_INT val;
23794 	const char *shift;
23795 
23796 	shift = shift_op (x, &val);
23797 
23798 	if (shift)
23799 	  {
23800 	    fprintf (stream, ", %s ", shift);
23801 	    if (val == -1)
23802 	      arm_print_operand (stream, XEXP (x, 1), 0);
23803 	    else
23804 	      fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23805 	  }
23806       }
23807       return;
23808 
23809       /* An explanation of the 'Q', 'R' and 'H' register operands:
23810 
23811 	 In a pair of registers containing a DI or DF value the 'Q'
23812 	 operand returns the register number of the register containing
23813 	 the least significant part of the value.  The 'R' operand returns
23814 	 the register number of the register containing the most
23815 	 significant part of the value.
23816 
23817 	 The 'H' operand returns the higher of the two register numbers.
23818 	 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
23819 	 same as the 'Q' operand, since the most significant part of the
23820 	 value is held in the lower number register.  The reverse is true
23821 	 on systems where WORDS_BIG_ENDIAN is false.
23822 
23823 	 The purpose of these operands is to distinguish between cases
23824 	 where the endian-ness of the values is important (for example
23825 	 when they are added together), and cases where the endian-ness
23826 	 is irrelevant, but the order of register operations is important.
23827 	 For example when loading a value from memory into a register
23828 	 pair, the endian-ness does not matter.  Provided that the value
23829 	 from the lower memory address is put into the lower numbered
23830 	 register, and the value from the higher address is put into the
23831 	 higher numbered register, the load will work regardless of whether
23832 	 the value being loaded is big-wordian or little-wordian.  The
23833 	 order of the two register loads can matter however, if the address
23834 	 of the memory location is actually held in one of the registers
23835 	 being overwritten by the load.
23836 
23837 	 The 'Q' and 'R' constraints are also available for 64-bit
23838 	 constants.  */
23839     case 'Q':
23840       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23841 	{
23842 	  rtx part = gen_lowpart (SImode, x);
23843 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23844 	  return;
23845 	}
23846 
23847       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23848 	{
23849 	  output_operand_lossage ("invalid operand for code '%c'", code);
23850 	  return;
23851 	}
23852 
23853       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
23854       return;
23855 
23856     case 'R':
23857       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23858 	{
23859 	  machine_mode mode = GET_MODE (x);
23860 	  rtx part;
23861 
23862 	  if (mode == VOIDmode)
23863 	    mode = DImode;
23864 	  part = gen_highpart_mode (SImode, mode, x);
23865 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23866 	  return;
23867 	}
23868 
23869       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23870 	{
23871 	  output_operand_lossage ("invalid operand for code '%c'", code);
23872 	  return;
23873 	}
23874 
23875       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
23876       return;
23877 
23878     case 'H':
23879       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23880 	{
23881 	  output_operand_lossage ("invalid operand for code '%c'", code);
23882 	  return;
23883 	}
23884 
23885       asm_fprintf (stream, "%r", REGNO (x) + 1);
23886       return;
23887 
23888     case 'J':
23889       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23890 	{
23891 	  output_operand_lossage ("invalid operand for code '%c'", code);
23892 	  return;
23893 	}
23894 
23895       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
23896       return;
23897 
23898     case 'K':
23899       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23900 	{
23901 	  output_operand_lossage ("invalid operand for code '%c'", code);
23902 	  return;
23903 	}
23904 
23905       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
23906       return;
23907 
23908     case 'm':
23909       asm_fprintf (stream, "%r",
23910 		   REG_P (XEXP (x, 0))
23911 		   ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
23912       return;
23913 
23914     case 'M':
23915       asm_fprintf (stream, "{%r-%r}",
23916 		   REGNO (x),
23917 		   REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
23918       return;
23919 
23920     /* Like 'M', but writing doubleword vector registers, for use by Neon
23921        insns.  */
23922     case 'h':
23923       {
23924         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
23925         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
23926         if (numregs == 1)
23927           asm_fprintf (stream, "{d%d}", regno);
23928         else
23929           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
23930       }
23931       return;
23932 
23933     case 'd':
23934       /* CONST_TRUE_RTX means always -- that's the default.  */
23935       if (x == const_true_rtx)
23936 	return;
23937 
23938       if (!COMPARISON_P (x))
23939 	{
23940 	  output_operand_lossage ("invalid operand for code '%c'", code);
23941 	  return;
23942 	}
23943 
23944       fputs (arm_condition_codes[get_arm_condition_code (x)],
23945 	     stream);
23946       return;
23947 
23948     case 'D':
23949       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
23950 	 want to do that.  */
23951       if (x == const_true_rtx)
23952 	{
23953 	  output_operand_lossage ("instruction never executed");
23954 	  return;
23955 	}
23956       if (!COMPARISON_P (x))
23957 	{
23958 	  output_operand_lossage ("invalid operand for code '%c'", code);
23959 	  return;
23960 	}
23961 
23962       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
23963 				 (get_arm_condition_code (x))],
23964 	     stream);
23965       return;
23966 
23967     case 's':
23968     case 'V':
23969     case 'W':
23970     case 'X':
23971     case 'Y':
23972     case 'Z':
23973       /* Former Maverick support, removed after GCC-4.7.  */
23974       output_operand_lossage ("obsolete Maverick format code '%c'", code);
23975       return;
23976 
23977     case 'U':
23978       if (!REG_P (x)
23979 	  || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
23980 	  || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
23981 	/* Bad value for wCG register number.  */
23982 	{
23983 	  output_operand_lossage ("invalid operand for code '%c'", code);
23984 	  return;
23985 	}
23986 
23987       else
23988 	fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
23989       return;
23990 
23991       /* Print an iWMMXt control register name.  */
23992     case 'w':
23993       if (!CONST_INT_P (x)
23994 	  || INTVAL (x) < 0
23995 	  || INTVAL (x) >= 16)
23996 	/* Bad value for wC register number.  */
23997 	{
23998 	  output_operand_lossage ("invalid operand for code '%c'", code);
23999 	  return;
24000 	}
24001 
24002       else
24003 	{
24004 	  static const char * wc_reg_names [16] =
24005 	    {
24006 	      "wCID",  "wCon",  "wCSSF", "wCASF",
24007 	      "wC4",   "wC5",   "wC6",   "wC7",
24008 	      "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24009 	      "wC12",  "wC13",  "wC14",  "wC15"
24010 	    };
24011 
24012 	  fputs (wc_reg_names [INTVAL (x)], stream);
24013 	}
24014       return;
24015 
24016     /* Print the high single-precision register of a VFP double-precision
24017        register.  */
24018     case 'p':
24019       {
24020         machine_mode mode = GET_MODE (x);
24021         int regno;
24022 
24023         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24024           {
24025 	    output_operand_lossage ("invalid operand for code '%c'", code);
24026 	    return;
24027           }
24028 
24029         regno = REGNO (x);
24030         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24031           {
24032 	    output_operand_lossage ("invalid operand for code '%c'", code);
24033 	    return;
24034           }
24035 
24036 	fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24037       }
24038       return;
24039 
24040     /* Print a VFP/Neon double precision or quad precision register name.  */
24041     case 'P':
24042     case 'q':
24043       {
24044 	machine_mode mode = GET_MODE (x);
24045 	int is_quad = (code == 'q');
24046 	int regno;
24047 
24048 	if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24049 	  {
24050 	    output_operand_lossage ("invalid operand for code '%c'", code);
24051 	    return;
24052 	  }
24053 
24054 	if (!REG_P (x)
24055 	    || !IS_VFP_REGNUM (REGNO (x)))
24056 	  {
24057 	    output_operand_lossage ("invalid operand for code '%c'", code);
24058 	    return;
24059 	  }
24060 
24061 	regno = REGNO (x);
24062 	if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24063             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24064 	  {
24065 	    output_operand_lossage ("invalid operand for code '%c'", code);
24066 	    return;
24067 	  }
24068 
24069 	fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24070 	  (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24071       }
24072       return;
24073 
24074     /* These two codes print the low/high doubleword register of a Neon quad
24075        register, respectively.  For pair-structure types, can also print
24076        low/high quadword registers.  */
24077     case 'e':
24078     case 'f':
24079       {
24080         machine_mode mode = GET_MODE (x);
24081         int regno;
24082 
24083         if ((GET_MODE_SIZE (mode) != 16
24084 	     && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24085           {
24086 	    output_operand_lossage ("invalid operand for code '%c'", code);
24087 	    return;
24088           }
24089 
24090         regno = REGNO (x);
24091         if (!NEON_REGNO_OK_FOR_QUAD (regno))
24092           {
24093 	    output_operand_lossage ("invalid operand for code '%c'", code);
24094 	    return;
24095           }
24096 
24097         if (GET_MODE_SIZE (mode) == 16)
24098           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24099 				  + (code == 'f' ? 1 : 0));
24100         else
24101           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24102 				  + (code == 'f' ? 1 : 0));
24103       }
24104       return;
24105 
24106     /* Print a VFPv3 floating-point constant, represented as an integer
24107        index.  */
24108     case 'G':
24109       {
24110         int index = vfp3_const_double_index (x);
24111 	gcc_assert (index != -1);
24112 	fprintf (stream, "%d", index);
24113       }
24114       return;
24115 
24116     /* Print bits representing opcode features for Neon.
24117 
24118        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
24119        and polynomials as unsigned.
24120 
24121        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24122 
24123        Bit 2 is 1 for rounding functions, 0 otherwise.  */
24124 
24125     /* Identify the type as 's', 'u', 'p' or 'f'.  */
24126     case 'T':
24127       {
24128         HOST_WIDE_INT bits = INTVAL (x);
24129         fputc ("uspf"[bits & 3], stream);
24130       }
24131       return;
24132 
24133     /* Likewise, but signed and unsigned integers are both 'i'.  */
24134     case 'F':
24135       {
24136         HOST_WIDE_INT bits = INTVAL (x);
24137         fputc ("iipf"[bits & 3], stream);
24138       }
24139       return;
24140 
24141     /* As for 'T', but emit 'u' instead of 'p'.  */
24142     case 't':
24143       {
24144         HOST_WIDE_INT bits = INTVAL (x);
24145         fputc ("usuf"[bits & 3], stream);
24146       }
24147       return;
24148 
24149     /* Bit 2: rounding (vs none).  */
24150     case 'O':
24151       {
24152         HOST_WIDE_INT bits = INTVAL (x);
24153         fputs ((bits & 4) != 0 ? "r" : "", stream);
24154       }
24155       return;
24156 
24157     /* Memory operand for vld1/vst1 instruction.  */
24158     case 'A':
24159       {
24160 	rtx addr;
24161 	bool postinc = FALSE;
24162 	rtx postinc_reg = NULL;
24163 	unsigned align, memsize, align_bits;
24164 
24165 	gcc_assert (MEM_P (x));
24166 	addr = XEXP (x, 0);
24167 	if (GET_CODE (addr) == POST_INC)
24168 	  {
24169 	    postinc = 1;
24170 	    addr = XEXP (addr, 0);
24171 	  }
24172 	if (GET_CODE (addr) == POST_MODIFY)
24173 	  {
24174 	    postinc_reg = XEXP( XEXP (addr, 1), 1);
24175 	    addr = XEXP (addr, 0);
24176 	  }
24177 	asm_fprintf (stream, "[%r", REGNO (addr));
24178 
24179 	/* We know the alignment of this access, so we can emit a hint in the
24180 	   instruction (for some alignments) as an aid to the memory subsystem
24181 	   of the target.  */
24182 	align = MEM_ALIGN (x) >> 3;
24183 	memsize = MEM_SIZE (x);
24184 
24185 	/* Only certain alignment specifiers are supported by the hardware.  */
24186 	if (memsize == 32 && (align % 32) == 0)
24187 	  align_bits = 256;
24188 	else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24189 	  align_bits = 128;
24190 	else if (memsize >= 8 && (align % 8) == 0)
24191 	  align_bits = 64;
24192 	else
24193 	  align_bits = 0;
24194 
24195 	if (align_bits != 0)
24196 	  asm_fprintf (stream, ":%d", align_bits);
24197 
24198 	asm_fprintf (stream, "]");
24199 
24200 	if (postinc)
24201 	  fputs("!", stream);
24202 	if (postinc_reg)
24203 	  asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24204       }
24205       return;
24206 
24207     /* To print the memory operand with "Ux" or "Uj" constraint.  Based on the
24208        rtx_code the memory operands output looks like following.
24209        1. [Rn], #+/-<imm>
24210        2. [Rn, #+/-<imm>]!
24211        3. [Rn, #+/-<imm>]
24212        4. [Rn].  */
24213     case 'E':
24214       {
24215 	rtx addr;
24216 	rtx postinc_reg = NULL;
24217 	unsigned inc_val = 0;
24218 	enum rtx_code code;
24219 
24220 	gcc_assert (MEM_P (x));
24221 	addr = XEXP (x, 0);
24222 	code = GET_CODE (addr);
24223 	if (code == POST_INC || code == POST_DEC || code == PRE_INC
24224 	    || code  == PRE_DEC)
24225 	  {
24226 	    asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24227 	    inc_val = GET_MODE_SIZE (GET_MODE (x));
24228 	    if (code == POST_INC || code == POST_DEC)
24229 	      asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24230 					      ? "": "-", inc_val);
24231 	    else
24232 	      asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24233 					       ? "": "-", inc_val);
24234 	  }
24235 	else if (code == POST_MODIFY || code == PRE_MODIFY)
24236 	  {
24237 	    asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24238 	    postinc_reg = XEXP ( XEXP (x, 1), 1);
24239 	    if (postinc_reg && CONST_INT_P (postinc_reg))
24240 	      {
24241 		if (code == POST_MODIFY)
24242 		  asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24243 		else
24244 		  asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24245 	      }
24246 	  }
24247 	else if (code == PLUS)
24248 	  {
24249 	    rtx base = XEXP (addr, 0);
24250 	    rtx index = XEXP (addr, 1);
24251 
24252 	    gcc_assert (REG_P (base) && CONST_INT_P (index));
24253 
24254 	    HOST_WIDE_INT offset = INTVAL (index);
24255 	    asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24256 	  }
24257 	else
24258 	  {
24259 	    gcc_assert (REG_P (addr));
24260 	    asm_fprintf (stream, "[%r]",REGNO (addr));
24261 	  }
24262       }
24263       return;
24264 
24265     case 'C':
24266       {
24267 	rtx addr;
24268 
24269 	gcc_assert (MEM_P (x));
24270 	addr = XEXP (x, 0);
24271 	gcc_assert (REG_P (addr));
24272 	asm_fprintf (stream, "[%r]", REGNO (addr));
24273       }
24274       return;
24275 
24276     /* Translate an S register number into a D register number and element index.  */
24277     case 'y':
24278       {
24279         machine_mode mode = GET_MODE (x);
24280         int regno;
24281 
24282         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24283           {
24284 	    output_operand_lossage ("invalid operand for code '%c'", code);
24285 	    return;
24286           }
24287 
24288         regno = REGNO (x);
24289         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24290           {
24291 	    output_operand_lossage ("invalid operand for code '%c'", code);
24292 	    return;
24293           }
24294 
24295 	regno = regno - FIRST_VFP_REGNUM;
24296 	fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24297       }
24298       return;
24299 
24300     case 'v':
24301 	gcc_assert (CONST_DOUBLE_P (x));
24302 	int result;
24303 	result = vfp3_const_double_for_fract_bits (x);
24304 	if (result == 0)
24305 	  result = vfp3_const_double_for_bits (x);
24306 	fprintf (stream, "#%d", result);
24307 	return;
24308 
24309     /* Register specifier for vld1.16/vst1.16.  Translate the S register
24310        number into a D register number and element index.  */
24311     case 'z':
24312       {
24313         machine_mode mode = GET_MODE (x);
24314         int regno;
24315 
24316         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24317           {
24318 	    output_operand_lossage ("invalid operand for code '%c'", code);
24319 	    return;
24320           }
24321 
24322         regno = REGNO (x);
24323         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24324           {
24325 	    output_operand_lossage ("invalid operand for code '%c'", code);
24326 	    return;
24327           }
24328 
24329 	regno = regno - FIRST_VFP_REGNUM;
24330 	fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24331       }
24332       return;
24333 
24334     default:
24335       if (x == 0)
24336 	{
24337 	  output_operand_lossage ("missing operand");
24338 	  return;
24339 	}
24340 
24341       switch (GET_CODE (x))
24342 	{
24343 	case REG:
24344 	  asm_fprintf (stream, "%r", REGNO (x));
24345 	  break;
24346 
24347 	case MEM:
24348 	  output_address (GET_MODE (x), XEXP (x, 0));
24349 	  break;
24350 
24351 	case CONST_DOUBLE:
24352 	  {
24353             char fpstr[20];
24354             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24355 			      sizeof (fpstr), 0, 1);
24356             fprintf (stream, "#%s", fpstr);
24357 	  }
24358 	  break;
24359 
24360 	default:
24361 	  gcc_assert (GET_CODE (x) != NEG);
24362 	  fputc ('#', stream);
24363 	  if (GET_CODE (x) == HIGH)
24364 	    {
24365 	      fputs (":lower16:", stream);
24366 	      x = XEXP (x, 0);
24367 	    }
24368 
24369 	  output_addr_const (stream, x);
24370 	  break;
24371 	}
24372     }
24373 }
24374 
24375 /* Target hook for printing a memory address.  */
24376 static void
arm_print_operand_address(FILE * stream,machine_mode mode,rtx x)24377 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24378 {
24379   if (TARGET_32BIT)
24380     {
24381       int is_minus = GET_CODE (x) == MINUS;
24382 
24383       if (REG_P (x))
24384 	asm_fprintf (stream, "[%r]", REGNO (x));
24385       else if (GET_CODE (x) == PLUS || is_minus)
24386 	{
24387 	  rtx base = XEXP (x, 0);
24388 	  rtx index = XEXP (x, 1);
24389 	  HOST_WIDE_INT offset = 0;
24390 	  if (!REG_P (base)
24391 	      || (REG_P (index) && REGNO (index) == SP_REGNUM))
24392 	    {
24393 	      /* Ensure that BASE is a register.  */
24394 	      /* (one of them must be).  */
24395 	      /* Also ensure the SP is not used as in index register.  */
24396 	      std::swap (base, index);
24397 	    }
24398 	  switch (GET_CODE (index))
24399 	    {
24400 	    case CONST_INT:
24401 	      offset = INTVAL (index);
24402 	      if (is_minus)
24403 		offset = -offset;
24404 	      asm_fprintf (stream, "[%r, #%wd]",
24405 			   REGNO (base), offset);
24406 	      break;
24407 
24408 	    case REG:
24409 	      asm_fprintf (stream, "[%r, %s%r]",
24410 			   REGNO (base), is_minus ? "-" : "",
24411 			   REGNO (index));
24412 	      break;
24413 
24414 	    case MULT:
24415 	    case ASHIFTRT:
24416 	    case LSHIFTRT:
24417 	    case ASHIFT:
24418 	    case ROTATERT:
24419 	      {
24420 		asm_fprintf (stream, "[%r, %s%r",
24421 			     REGNO (base), is_minus ? "-" : "",
24422 			     REGNO (XEXP (index, 0)));
24423 		arm_print_operand (stream, index, 'S');
24424 		fputs ("]", stream);
24425 		break;
24426 	      }
24427 
24428 	    default:
24429 	      gcc_unreachable ();
24430 	    }
24431 	}
24432       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24433 	       || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24434 	{
24435 	  gcc_assert (REG_P (XEXP (x, 0)));
24436 
24437 	  if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24438 	    asm_fprintf (stream, "[%r, #%s%d]!",
24439 			 REGNO (XEXP (x, 0)),
24440 			 GET_CODE (x) == PRE_DEC ? "-" : "",
24441 			 GET_MODE_SIZE (mode));
24442 	  else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24443 	    asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24444 	  else
24445 	    asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24446 			 GET_CODE (x) == POST_DEC ? "-" : "",
24447 			 GET_MODE_SIZE (mode));
24448 	}
24449       else if (GET_CODE (x) == PRE_MODIFY)
24450 	{
24451 	  asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24452 	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24453 	    asm_fprintf (stream, "#%wd]!",
24454 			 INTVAL (XEXP (XEXP (x, 1), 1)));
24455 	  else
24456 	    asm_fprintf (stream, "%r]!",
24457 			 REGNO (XEXP (XEXP (x, 1), 1)));
24458 	}
24459       else if (GET_CODE (x) == POST_MODIFY)
24460 	{
24461 	  asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24462 	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24463 	    asm_fprintf (stream, "#%wd",
24464 			 INTVAL (XEXP (XEXP (x, 1), 1)));
24465 	  else
24466 	    asm_fprintf (stream, "%r",
24467 			 REGNO (XEXP (XEXP (x, 1), 1)));
24468 	}
24469       else output_addr_const (stream, x);
24470     }
24471   else
24472     {
24473       if (REG_P (x))
24474 	asm_fprintf (stream, "[%r]", REGNO (x));
24475       else if (GET_CODE (x) == POST_INC)
24476 	asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24477       else if (GET_CODE (x) == PLUS)
24478 	{
24479 	  gcc_assert (REG_P (XEXP (x, 0)));
24480 	  if (CONST_INT_P (XEXP (x, 1)))
24481 	    asm_fprintf (stream, "[%r, #%wd]",
24482 			 REGNO (XEXP (x, 0)),
24483 			 INTVAL (XEXP (x, 1)));
24484 	  else
24485 	    asm_fprintf (stream, "[%r, %r]",
24486 			 REGNO (XEXP (x, 0)),
24487 			 REGNO (XEXP (x, 1)));
24488 	}
24489       else
24490 	output_addr_const (stream, x);
24491     }
24492 }
24493 
24494 /* Target hook for indicating whether a punctuation character for
24495    TARGET_PRINT_OPERAND is valid.  */
24496 static bool
arm_print_operand_punct_valid_p(unsigned char code)24497 arm_print_operand_punct_valid_p (unsigned char code)
24498 {
24499   return (code == '@' || code == '|' || code == '.'
24500 	  || code == '(' || code == ')' || code == '#'
24501 	  || (TARGET_32BIT && (code == '?'))
24502 	  || (TARGET_THUMB2 && (code == '!'))
24503 	  || (TARGET_THUMB && (code == '_')));
24504 }
24505 
24506 /* Target hook for assembling integer objects.  The ARM version needs to
24507    handle word-sized values specially.  */
24508 static bool
arm_assemble_integer(rtx x,unsigned int size,int aligned_p)24509 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24510 {
24511   machine_mode mode;
24512 
24513   if (size == UNITS_PER_WORD && aligned_p)
24514     {
24515       fputs ("\t.word\t", asm_out_file);
24516       output_addr_const (asm_out_file, x);
24517 
24518       /* Mark symbols as position independent.  We only do this in the
24519 	 .text segment, not in the .data segment.  */
24520       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24521 	  (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24522 	{
24523 	  /* See legitimize_pic_address for an explanation of the
24524 	     TARGET_VXWORKS_RTP check.  */
24525 	  /* References to weak symbols cannot be resolved locally:
24526 	     they may be overridden by a non-weak definition at link
24527 	     time.  */
24528 	  if (!arm_pic_data_is_text_relative
24529 	      || (SYMBOL_REF_P (x)
24530 		  && (!SYMBOL_REF_LOCAL_P (x)
24531 		      || (SYMBOL_REF_DECL (x)
24532 			  ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24533 		      || (SYMBOL_REF_FUNCTION_P (x)
24534 			  && !arm_fdpic_local_funcdesc_p (x)))))
24535 	    {
24536 	      if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24537 		fputs ("(GOTFUNCDESC)", asm_out_file);
24538 	      else
24539 		fputs ("(GOT)", asm_out_file);
24540 	    }
24541 	  else
24542 	    {
24543 	      if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24544 		fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24545 	      else
24546 		{
24547 		  bool is_readonly;
24548 
24549 		  if (!TARGET_FDPIC
24550 		      || arm_is_segment_info_known (x, &is_readonly))
24551 		    fputs ("(GOTOFF)", asm_out_file);
24552 		  else
24553 		    fputs ("(GOT)", asm_out_file);
24554 		}
24555 	    }
24556 	}
24557 
24558       /* For FDPIC we also have to mark symbol for .data section.  */
24559       if (TARGET_FDPIC
24560 	  && !making_const_table
24561 	  && SYMBOL_REF_P (x)
24562 	  && SYMBOL_REF_FUNCTION_P (x))
24563 	fputs ("(FUNCDESC)", asm_out_file);
24564 
24565       fputc ('\n', asm_out_file);
24566       return true;
24567     }
24568 
24569   mode = GET_MODE (x);
24570 
24571   if (arm_vector_mode_supported_p (mode))
24572     {
24573       int i, units;
24574 
24575       gcc_assert (GET_CODE (x) == CONST_VECTOR);
24576 
24577       units = CONST_VECTOR_NUNITS (x);
24578       size = GET_MODE_UNIT_SIZE (mode);
24579 
24580       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24581         for (i = 0; i < units; i++)
24582 	  {
24583 	    rtx elt = CONST_VECTOR_ELT (x, i);
24584 	    assemble_integer
24585 	      (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24586 	  }
24587       else
24588         for (i = 0; i < units; i++)
24589           {
24590             rtx elt = CONST_VECTOR_ELT (x, i);
24591 	    assemble_real
24592 	      (*CONST_DOUBLE_REAL_VALUE (elt),
24593 	       as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24594 	       i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24595           }
24596 
24597       return true;
24598     }
24599 
24600   return default_assemble_integer (x, size, aligned_p);
24601 }
24602 
24603 static void
arm_elf_asm_cdtor(rtx symbol,int priority,bool is_ctor)24604 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24605 {
24606   section *s;
24607 
24608   if (!TARGET_AAPCS_BASED)
24609     {
24610       (is_ctor ?
24611        default_named_section_asm_out_constructor
24612        : default_named_section_asm_out_destructor) (symbol, priority);
24613       return;
24614     }
24615 
24616   /* Put these in the .init_array section, using a special relocation.  */
24617   if (priority != DEFAULT_INIT_PRIORITY)
24618     {
24619       char buf[18];
24620       sprintf (buf, "%s.%.5u",
24621 	       is_ctor ? ".init_array" : ".fini_array",
24622 	       priority);
24623       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24624     }
24625   else if (is_ctor)
24626     s = ctors_section;
24627   else
24628     s = dtors_section;
24629 
24630   switch_to_section (s);
24631   assemble_align (POINTER_SIZE);
24632   fputs ("\t.word\t", asm_out_file);
24633   output_addr_const (asm_out_file, symbol);
24634   fputs ("(target1)\n", asm_out_file);
24635 }
24636 
24637 /* Add a function to the list of static constructors.  */
24638 
24639 static void
arm_elf_asm_constructor(rtx symbol,int priority)24640 arm_elf_asm_constructor (rtx symbol, int priority)
24641 {
24642   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
24643 }
24644 
24645 /* Add a function to the list of static destructors.  */
24646 
24647 static void
arm_elf_asm_destructor(rtx symbol,int priority)24648 arm_elf_asm_destructor (rtx symbol, int priority)
24649 {
24650   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
24651 }
24652 
24653 /* A finite state machine takes care of noticing whether or not instructions
24654    can be conditionally executed, and thus decrease execution time and code
24655    size by deleting branch instructions.  The fsm is controlled by
24656    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
24657 
24658 /* The state of the fsm controlling condition codes are:
24659    0: normal, do nothing special
24660    1: make ASM_OUTPUT_OPCODE not output this instruction
24661    2: make ASM_OUTPUT_OPCODE not output this instruction
24662    3: make instructions conditional
24663    4: make instructions conditional
24664 
24665    State transitions (state->state by whom under condition):
24666    0 -> 1 final_prescan_insn if the `target' is a label
24667    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24668    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24669    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24670    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24671           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24672    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24673           (the target insn is arm_target_insn).
24674 
24675    If the jump clobbers the conditions then we use states 2 and 4.
24676 
24677    A similar thing can be done with conditional return insns.
24678 
24679    XXX In case the `target' is an unconditional branch, this conditionalising
24680    of the instructions always reduces code size, but not always execution
24681    time.  But then, I want to reduce the code size to somewhere near what
24682    /bin/cc produces.  */
24683 
24684 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
24685    instructions.  When a COND_EXEC instruction is seen the subsequent
24686    instructions are scanned so that multiple conditional instructions can be
24687    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
24688    specify the length and true/false mask for the IT block.  These will be
24689    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
24690 
24691 /* Returns the index of the ARM condition code string in
24692    `arm_condition_codes', or ARM_NV if the comparison is invalid.
24693    COMPARISON should be an rtx like `(eq (...) (...))'.  */
24694 
24695 enum arm_cond_code
maybe_get_arm_condition_code(rtx comparison)24696 maybe_get_arm_condition_code (rtx comparison)
24697 {
24698   machine_mode mode = GET_MODE (XEXP (comparison, 0));
24699   enum arm_cond_code code;
24700   enum rtx_code comp_code = GET_CODE (comparison);
24701 
24702   if (GET_MODE_CLASS (mode) != MODE_CC)
24703     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
24704 			   XEXP (comparison, 1));
24705 
24706   switch (mode)
24707     {
24708     case E_CC_DNEmode: code = ARM_NE; goto dominance;
24709     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
24710     case E_CC_DGEmode: code = ARM_GE; goto dominance;
24711     case E_CC_DGTmode: code = ARM_GT; goto dominance;
24712     case E_CC_DLEmode: code = ARM_LE; goto dominance;
24713     case E_CC_DLTmode: code = ARM_LT; goto dominance;
24714     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
24715     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
24716     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
24717     case E_CC_DLTUmode: code = ARM_CC;
24718 
24719     dominance:
24720       if (comp_code == EQ)
24721 	return ARM_INVERSE_CONDITION_CODE (code);
24722       if (comp_code == NE)
24723 	return code;
24724       return ARM_NV;
24725 
24726     case E_CC_NZmode:
24727       switch (comp_code)
24728 	{
24729 	case NE: return ARM_NE;
24730 	case EQ: return ARM_EQ;
24731 	case GE: return ARM_PL;
24732 	case LT: return ARM_MI;
24733 	default: return ARM_NV;
24734 	}
24735 
24736     case E_CC_Zmode:
24737       switch (comp_code)
24738 	{
24739 	case NE: return ARM_NE;
24740 	case EQ: return ARM_EQ;
24741 	default: return ARM_NV;
24742 	}
24743 
24744     case E_CC_Nmode:
24745       switch (comp_code)
24746 	{
24747 	case NE: return ARM_MI;
24748 	case EQ: return ARM_PL;
24749 	default: return ARM_NV;
24750 	}
24751 
24752     case E_CCFPEmode:
24753     case E_CCFPmode:
24754       /* We can handle all cases except UNEQ and LTGT.  */
24755       switch (comp_code)
24756 	{
24757 	case GE: return ARM_GE;
24758 	case GT: return ARM_GT;
24759 	case LE: return ARM_LS;
24760 	case LT: return ARM_MI;
24761 	case NE: return ARM_NE;
24762 	case EQ: return ARM_EQ;
24763 	case ORDERED: return ARM_VC;
24764 	case UNORDERED: return ARM_VS;
24765 	case UNLT: return ARM_LT;
24766 	case UNLE: return ARM_LE;
24767 	case UNGT: return ARM_HI;
24768 	case UNGE: return ARM_PL;
24769 	  /* UNEQ and LTGT do not have a representation.  */
24770 	case UNEQ: /* Fall through.  */
24771 	case LTGT: /* Fall through.  */
24772 	default: return ARM_NV;
24773 	}
24774 
24775     case E_CC_SWPmode:
24776       switch (comp_code)
24777 	{
24778 	case NE: return ARM_NE;
24779 	case EQ: return ARM_EQ;
24780 	case GE: return ARM_LE;
24781 	case GT: return ARM_LT;
24782 	case LE: return ARM_GE;
24783 	case LT: return ARM_GT;
24784 	case GEU: return ARM_LS;
24785 	case GTU: return ARM_CC;
24786 	case LEU: return ARM_CS;
24787 	case LTU: return ARM_HI;
24788 	default: return ARM_NV;
24789 	}
24790 
24791     case E_CC_Cmode:
24792       switch (comp_code)
24793 	{
24794 	case LTU: return ARM_CS;
24795 	case GEU: return ARM_CC;
24796 	default: return ARM_NV;
24797 	}
24798 
24799     case E_CC_NVmode:
24800       switch (comp_code)
24801 	{
24802 	case GE: return ARM_GE;
24803 	case LT: return ARM_LT;
24804 	default: return ARM_NV;
24805 	}
24806 
24807     case E_CC_Bmode:
24808       switch (comp_code)
24809 	{
24810 	case GEU: return ARM_CS;
24811 	case LTU: return ARM_CC;
24812 	default: return ARM_NV;
24813 	}
24814 
24815     case E_CC_Vmode:
24816       switch (comp_code)
24817 	{
24818 	case NE: return ARM_VS;
24819 	case EQ: return ARM_VC;
24820 	default: return ARM_NV;
24821 	}
24822 
24823     case E_CC_ADCmode:
24824       switch (comp_code)
24825 	{
24826 	case GEU: return ARM_CS;
24827 	case LTU: return ARM_CC;
24828 	default: return ARM_NV;
24829 	}
24830 
24831     case E_CCmode:
24832     case E_CC_RSBmode:
24833       switch (comp_code)
24834 	{
24835 	case NE: return ARM_NE;
24836 	case EQ: return ARM_EQ;
24837 	case GE: return ARM_GE;
24838 	case GT: return ARM_GT;
24839 	case LE: return ARM_LE;
24840 	case LT: return ARM_LT;
24841 	case GEU: return ARM_CS;
24842 	case GTU: return ARM_HI;
24843 	case LEU: return ARM_LS;
24844 	case LTU: return ARM_CC;
24845 	default: return ARM_NV;
24846 	}
24847 
24848     default: gcc_unreachable ();
24849     }
24850 }
24851 
24852 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
24853 static enum arm_cond_code
get_arm_condition_code(rtx comparison)24854 get_arm_condition_code (rtx comparison)
24855 {
24856   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
24857   gcc_assert (code != ARM_NV);
24858   return code;
24859 }
24860 
24861 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
24862    code registers when not targetting Thumb1.  The VFP condition register
24863    only exists when generating hard-float code.  */
24864 static bool
arm_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)24865 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
24866 {
24867   if (!TARGET_32BIT)
24868     return false;
24869 
24870   *p1 = CC_REGNUM;
24871   *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
24872   return true;
24873 }
24874 
24875 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
24876    instructions.  */
24877 void
thumb2_final_prescan_insn(rtx_insn * insn)24878 thumb2_final_prescan_insn (rtx_insn *insn)
24879 {
24880   rtx_insn *first_insn = insn;
24881   rtx body = PATTERN (insn);
24882   rtx predicate;
24883   enum arm_cond_code code;
24884   int n;
24885   int mask;
24886   int max;
24887 
24888   /* max_insns_skipped in the tune was already taken into account in the
24889      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
24890      just emit the IT blocks as we can.  It does not make sense to split
24891      the IT blocks.  */
24892   max = MAX_INSN_PER_IT_BLOCK;
24893 
24894   /* Remove the previous insn from the count of insns to be output.  */
24895   if (arm_condexec_count)
24896       arm_condexec_count--;
24897 
24898   /* Nothing to do if we are already inside a conditional block.  */
24899   if (arm_condexec_count)
24900     return;
24901 
24902   if (GET_CODE (body) != COND_EXEC)
24903     return;
24904 
24905   /* Conditional jumps are implemented directly.  */
24906   if (JUMP_P (insn))
24907     return;
24908 
24909   predicate = COND_EXEC_TEST (body);
24910   arm_current_cc = get_arm_condition_code (predicate);
24911 
24912   n = get_attr_ce_count (insn);
24913   arm_condexec_count = 1;
24914   arm_condexec_mask = (1 << n) - 1;
24915   arm_condexec_masklen = n;
24916   /* See if subsequent instructions can be combined into the same block.  */
24917   for (;;)
24918     {
24919       insn = next_nonnote_insn (insn);
24920 
24921       /* Jumping into the middle of an IT block is illegal, so a label or
24922          barrier terminates the block.  */
24923       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
24924 	break;
24925 
24926       body = PATTERN (insn);
24927       /* USE and CLOBBER aren't really insns, so just skip them.  */
24928       if (GET_CODE (body) == USE
24929 	  || GET_CODE (body) == CLOBBER)
24930 	continue;
24931 
24932       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
24933       if (GET_CODE (body) != COND_EXEC)
24934 	break;
24935       /* Maximum number of conditionally executed instructions in a block.  */
24936       n = get_attr_ce_count (insn);
24937       if (arm_condexec_masklen + n > max)
24938 	break;
24939 
24940       predicate = COND_EXEC_TEST (body);
24941       code = get_arm_condition_code (predicate);
24942       mask = (1 << n) - 1;
24943       if (arm_current_cc == code)
24944 	arm_condexec_mask |= (mask << arm_condexec_masklen);
24945       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
24946 	break;
24947 
24948       arm_condexec_count++;
24949       arm_condexec_masklen += n;
24950 
24951       /* A jump must be the last instruction in a conditional block.  */
24952       if (JUMP_P (insn))
24953 	break;
24954     }
24955   /* Restore recog_data (getting the attributes of other insns can
24956      destroy this array, but final.c assumes that it remains intact
24957      across this call).  */
24958   extract_constrain_insn_cached (first_insn);
24959 }
24960 
24961 void
arm_final_prescan_insn(rtx_insn * insn)24962 arm_final_prescan_insn (rtx_insn *insn)
24963 {
24964   /* BODY will hold the body of INSN.  */
24965   rtx body = PATTERN (insn);
24966 
24967   /* This will be 1 if trying to repeat the trick, and things need to be
24968      reversed if it appears to fail.  */
24969   int reverse = 0;
24970 
24971   /* If we start with a return insn, we only succeed if we find another one.  */
24972   int seeking_return = 0;
24973   enum rtx_code return_code = UNKNOWN;
24974 
24975   /* START_INSN will hold the insn from where we start looking.  This is the
24976      first insn after the following code_label if REVERSE is true.  */
24977   rtx_insn *start_insn = insn;
24978 
24979   /* If in state 4, check if the target branch is reached, in order to
24980      change back to state 0.  */
24981   if (arm_ccfsm_state == 4)
24982     {
24983       if (insn == arm_target_insn)
24984 	{
24985 	  arm_target_insn = NULL;
24986 	  arm_ccfsm_state = 0;
24987 	}
24988       return;
24989     }
24990 
24991   /* If in state 3, it is possible to repeat the trick, if this insn is an
24992      unconditional branch to a label, and immediately following this branch
24993      is the previous target label which is only used once, and the label this
24994      branch jumps to is not too far off.  */
24995   if (arm_ccfsm_state == 3)
24996     {
24997       if (simplejump_p (insn))
24998 	{
24999 	  start_insn = next_nonnote_insn (start_insn);
25000 	  if (BARRIER_P (start_insn))
25001 	    {
25002 	      /* XXX Isn't this always a barrier?  */
25003 	      start_insn = next_nonnote_insn (start_insn);
25004 	    }
25005 	  if (LABEL_P (start_insn)
25006 	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25007 	      && LABEL_NUSES (start_insn) == 1)
25008 	    reverse = TRUE;
25009 	  else
25010 	    return;
25011 	}
25012       else if (ANY_RETURN_P (body))
25013         {
25014 	  start_insn = next_nonnote_insn (start_insn);
25015 	  if (BARRIER_P (start_insn))
25016 	    start_insn = next_nonnote_insn (start_insn);
25017 	  if (LABEL_P (start_insn)
25018 	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25019 	      && LABEL_NUSES (start_insn) == 1)
25020 	    {
25021 	      reverse = TRUE;
25022 	      seeking_return = 1;
25023 	      return_code = GET_CODE (body);
25024 	    }
25025 	  else
25026 	    return;
25027         }
25028       else
25029 	return;
25030     }
25031 
25032   gcc_assert (!arm_ccfsm_state || reverse);
25033   if (!JUMP_P (insn))
25034     return;
25035 
25036   /* This jump might be paralleled with a clobber of the condition codes
25037      the jump should always come first */
25038   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25039     body = XVECEXP (body, 0, 0);
25040 
25041   if (reverse
25042       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25043 	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25044     {
25045       int insns_skipped;
25046       int fail = FALSE, succeed = FALSE;
25047       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
25048       int then_not_else = TRUE;
25049       rtx_insn *this_insn = start_insn;
25050       rtx label = 0;
25051 
25052       /* Register the insn jumped to.  */
25053       if (reverse)
25054         {
25055 	  if (!seeking_return)
25056 	    label = XEXP (SET_SRC (body), 0);
25057         }
25058       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25059 	label = XEXP (XEXP (SET_SRC (body), 1), 0);
25060       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25061 	{
25062 	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
25063 	  then_not_else = FALSE;
25064 	}
25065       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25066 	{
25067 	  seeking_return = 1;
25068 	  return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25069 	}
25070       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25071         {
25072 	  seeking_return = 1;
25073 	  return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25074 	  then_not_else = FALSE;
25075         }
25076       else
25077 	gcc_unreachable ();
25078 
25079       /* See how many insns this branch skips, and what kind of insns.  If all
25080 	 insns are okay, and the label or unconditional branch to the same
25081 	 label is not too far away, succeed.  */
25082       for (insns_skipped = 0;
25083 	   !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25084 	{
25085 	  rtx scanbody;
25086 
25087 	  this_insn = next_nonnote_insn (this_insn);
25088 	  if (!this_insn)
25089 	    break;
25090 
25091 	  switch (GET_CODE (this_insn))
25092 	    {
25093 	    case CODE_LABEL:
25094 	      /* Succeed if it is the target label, otherwise fail since
25095 		 control falls in from somewhere else.  */
25096 	      if (this_insn == label)
25097 		{
25098 		  arm_ccfsm_state = 1;
25099 		  succeed = TRUE;
25100 		}
25101 	      else
25102 		fail = TRUE;
25103 	      break;
25104 
25105 	    case BARRIER:
25106 	      /* Succeed if the following insn is the target label.
25107 		 Otherwise fail.
25108 		 If return insns are used then the last insn in a function
25109 		 will be a barrier.  */
25110 	      this_insn = next_nonnote_insn (this_insn);
25111 	      if (this_insn && this_insn == label)
25112 		{
25113 		  arm_ccfsm_state = 1;
25114 		  succeed = TRUE;
25115 		}
25116 	      else
25117 		fail = TRUE;
25118 	      break;
25119 
25120 	    case CALL_INSN:
25121 	      /* The AAPCS says that conditional calls should not be
25122 		 used since they make interworking inefficient (the
25123 		 linker can't transform BL<cond> into BLX).  That's
25124 		 only a problem if the machine has BLX.  */
25125 	      if (arm_arch5t)
25126 		{
25127 		  fail = TRUE;
25128 		  break;
25129 		}
25130 
25131 	      /* Succeed if the following insn is the target label, or
25132 		 if the following two insns are a barrier and the
25133 		 target label.  */
25134 	      this_insn = next_nonnote_insn (this_insn);
25135 	      if (this_insn && BARRIER_P (this_insn))
25136 		this_insn = next_nonnote_insn (this_insn);
25137 
25138 	      if (this_insn && this_insn == label
25139 		  && insns_skipped < max_insns_skipped)
25140 		{
25141 		  arm_ccfsm_state = 1;
25142 		  succeed = TRUE;
25143 		}
25144 	      else
25145 		fail = TRUE;
25146 	      break;
25147 
25148 	    case JUMP_INSN:
25149       	      /* If this is an unconditional branch to the same label, succeed.
25150 		 If it is to another label, do nothing.  If it is conditional,
25151 		 fail.  */
25152 	      /* XXX Probably, the tests for SET and the PC are
25153 		 unnecessary.  */
25154 
25155 	      scanbody = PATTERN (this_insn);
25156 	      if (GET_CODE (scanbody) == SET
25157 		  && GET_CODE (SET_DEST (scanbody)) == PC)
25158 		{
25159 		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25160 		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25161 		    {
25162 		      arm_ccfsm_state = 2;
25163 		      succeed = TRUE;
25164 		    }
25165 		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25166 		    fail = TRUE;
25167 		}
25168 	      /* Fail if a conditional return is undesirable (e.g. on a
25169 		 StrongARM), but still allow this if optimizing for size.  */
25170 	      else if (GET_CODE (scanbody) == return_code
25171 		       && !use_return_insn (TRUE, NULL)
25172 		       && !optimize_size)
25173 		fail = TRUE;
25174 	      else if (GET_CODE (scanbody) == return_code)
25175 	        {
25176 		  arm_ccfsm_state = 2;
25177 		  succeed = TRUE;
25178 	        }
25179 	      else if (GET_CODE (scanbody) == PARALLEL)
25180 	        {
25181 		  switch (get_attr_conds (this_insn))
25182 		    {
25183 		    case CONDS_NOCOND:
25184 		      break;
25185 		    default:
25186 		      fail = TRUE;
25187 		      break;
25188 		    }
25189 		}
25190 	      else
25191 		fail = TRUE;	/* Unrecognized jump (e.g. epilogue).  */
25192 
25193 	      break;
25194 
25195 	    case INSN:
25196 	      /* Instructions using or affecting the condition codes make it
25197 		 fail.  */
25198 	      scanbody = PATTERN (this_insn);
25199 	      if (!(GET_CODE (scanbody) == SET
25200 		    || GET_CODE (scanbody) == PARALLEL)
25201 		  || get_attr_conds (this_insn) != CONDS_NOCOND)
25202 		fail = TRUE;
25203 	      break;
25204 
25205 	    default:
25206 	      break;
25207 	    }
25208 	}
25209       if (succeed)
25210 	{
25211 	  if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25212 	    arm_target_label = CODE_LABEL_NUMBER (label);
25213 	  else
25214 	    {
25215 	      gcc_assert (seeking_return || arm_ccfsm_state == 2);
25216 
25217 	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25218 	        {
25219 		  this_insn = next_nonnote_insn (this_insn);
25220 		  gcc_assert (!this_insn
25221 			      || (!BARRIER_P (this_insn)
25222 				  && !LABEL_P (this_insn)));
25223 	        }
25224 	      if (!this_insn)
25225 	        {
25226 		  /* Oh, dear! we ran off the end.. give up.  */
25227 		  extract_constrain_insn_cached (insn);
25228 		  arm_ccfsm_state = 0;
25229 		  arm_target_insn = NULL;
25230 		  return;
25231 	        }
25232 	      arm_target_insn = this_insn;
25233 	    }
25234 
25235 	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25236 	     what it was.  */
25237 	  if (!reverse)
25238 	    arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25239 
25240 	  if (reverse || then_not_else)
25241 	    arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25242 	}
25243 
25244       /* Restore recog_data (getting the attributes of other insns can
25245 	 destroy this array, but final.c assumes that it remains intact
25246 	 across this call.  */
25247       extract_constrain_insn_cached (insn);
25248     }
25249 }
25250 
25251 /* Output IT instructions.  */
25252 void
thumb2_asm_output_opcode(FILE * stream)25253 thumb2_asm_output_opcode (FILE * stream)
25254 {
25255   char buff[5];
25256   int n;
25257 
25258   if (arm_condexec_mask)
25259     {
25260       for (n = 0; n < arm_condexec_masklen; n++)
25261 	buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25262       buff[n] = 0;
25263       asm_fprintf(stream, "i%s\t%s\n\t", buff,
25264 		  arm_condition_codes[arm_current_cc]);
25265       arm_condexec_mask = 0;
25266     }
25267 }
25268 
25269 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
25270    UNITS_PER_WORD bytes wide.  */
25271 static unsigned int
arm_hard_regno_nregs(unsigned int regno,machine_mode mode)25272 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25273 {
25274   if (TARGET_32BIT
25275       && regno > PC_REGNUM
25276       && regno != FRAME_POINTER_REGNUM
25277       && regno != ARG_POINTER_REGNUM
25278       && !IS_VFP_REGNUM (regno))
25279     return 1;
25280 
25281   return ARM_NUM_REGS (mode);
25282 }
25283 
25284 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
25285 static bool
arm_hard_regno_mode_ok(unsigned int regno,machine_mode mode)25286 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25287 {
25288   if (GET_MODE_CLASS (mode) == MODE_CC)
25289     return (regno == CC_REGNUM
25290 	    || (TARGET_VFP_BASE
25291 		&& regno == VFPCC_REGNUM));
25292 
25293   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25294     return false;
25295 
25296   if (IS_VPR_REGNUM (regno))
25297     return mode == HImode;
25298 
25299   if (TARGET_THUMB1)
25300     /* For the Thumb we only allow values bigger than SImode in
25301        registers 0 - 6, so that there is always a second low
25302        register available to hold the upper part of the value.
25303        We probably we ought to ensure that the register is the
25304        start of an even numbered register pair.  */
25305     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25306 
25307   if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25308     {
25309       if (mode == DFmode || mode == DImode)
25310 	return VFP_REGNO_OK_FOR_DOUBLE (regno);
25311 
25312       if (mode == HFmode || mode == BFmode || mode == HImode
25313 	  || mode == SFmode || mode == SImode)
25314 	return VFP_REGNO_OK_FOR_SINGLE (regno);
25315 
25316       if (TARGET_NEON)
25317         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25318                || (VALID_NEON_QREG_MODE (mode)
25319                    && NEON_REGNO_OK_FOR_QUAD (regno))
25320 	       || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25321 	       || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25322 	       || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25323 	       || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25324 	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25325      if (TARGET_HAVE_MVE)
25326        return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25327 	       || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25328 	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25329 
25330       return false;
25331     }
25332 
25333   if (TARGET_REALLY_IWMMXT)
25334     {
25335       if (IS_IWMMXT_GR_REGNUM (regno))
25336 	return mode == SImode;
25337 
25338       if (IS_IWMMXT_REGNUM (regno))
25339 	return VALID_IWMMXT_REG_MODE (mode);
25340     }
25341 
25342   /* We allow almost any value to be stored in the general registers.
25343      Restrict doubleword quantities to even register pairs in ARM state
25344      so that we can use ldrd. The same restriction applies for MVE
25345      in order to support Armv8.1-M Mainline instructions.
25346      Do not allow very large Neon structure  opaque modes in general
25347      registers; they would use too many.  */
25348   if (regno <= LAST_ARM_REGNUM)
25349     {
25350       if (ARM_NUM_REGS (mode) > 4)
25351 	return false;
25352 
25353       if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25354 	return true;
25355 
25356       return !((TARGET_LDRD || TARGET_CDE)
25357 	       && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25358     }
25359 
25360   if (regno == FRAME_POINTER_REGNUM
25361       || regno == ARG_POINTER_REGNUM)
25362     /* We only allow integers in the fake hard registers.  */
25363     return GET_MODE_CLASS (mode) == MODE_INT;
25364 
25365   return false;
25366 }
25367 
25368 /* Implement TARGET_MODES_TIEABLE_P.  */
25369 
25370 static bool
arm_modes_tieable_p(machine_mode mode1,machine_mode mode2)25371 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25372 {
25373   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25374     return true;
25375 
25376   /* We specifically want to allow elements of "structure" modes to
25377      be tieable to the structure.  This more general condition allows
25378      other rarer situations too.  */
25379   if ((TARGET_NEON
25380        && (VALID_NEON_DREG_MODE (mode1)
25381 	   || VALID_NEON_QREG_MODE (mode1)
25382 	   || VALID_NEON_STRUCT_MODE (mode1))
25383        && (VALID_NEON_DREG_MODE (mode2)
25384 	   || VALID_NEON_QREG_MODE (mode2)
25385 	   || VALID_NEON_STRUCT_MODE (mode2)))
25386       || (TARGET_HAVE_MVE
25387 	  && (VALID_MVE_MODE (mode1)
25388 	      || VALID_MVE_STRUCT_MODE (mode1))
25389 	  && (VALID_MVE_MODE (mode2)
25390 	      || VALID_MVE_STRUCT_MODE (mode2))))
25391     return true;
25392 
25393   return false;
25394 }
25395 
25396 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25397    not used in arm mode.  */
25398 
25399 enum reg_class
arm_regno_class(int regno)25400 arm_regno_class (int regno)
25401 {
25402   if (regno == PC_REGNUM)
25403     return NO_REGS;
25404 
25405   if (IS_VPR_REGNUM (regno))
25406     return VPR_REG;
25407 
25408   if (TARGET_THUMB1)
25409     {
25410       if (regno == STACK_POINTER_REGNUM)
25411 	return STACK_REG;
25412       if (regno == CC_REGNUM)
25413 	return CC_REG;
25414       if (regno < 8)
25415 	return LO_REGS;
25416       return HI_REGS;
25417     }
25418 
25419   if (TARGET_THUMB2 && regno < 8)
25420     return LO_REGS;
25421 
25422   if (   regno <= LAST_ARM_REGNUM
25423       || regno == FRAME_POINTER_REGNUM
25424       || regno == ARG_POINTER_REGNUM)
25425     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25426 
25427   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25428     return TARGET_THUMB2 ? CC_REG : NO_REGS;
25429 
25430   if (IS_VFP_REGNUM (regno))
25431     {
25432       if (regno <= D7_VFP_REGNUM)
25433 	return VFP_D0_D7_REGS;
25434       else if (regno <= LAST_LO_VFP_REGNUM)
25435         return VFP_LO_REGS;
25436       else
25437         return VFP_HI_REGS;
25438     }
25439 
25440   if (IS_IWMMXT_REGNUM (regno))
25441     return IWMMXT_REGS;
25442 
25443   if (IS_IWMMXT_GR_REGNUM (regno))
25444     return IWMMXT_GR_REGS;
25445 
25446   return NO_REGS;
25447 }
25448 
25449 /* Handle a special case when computing the offset
25450    of an argument from the frame pointer.  */
25451 int
arm_debugger_arg_offset(int value,rtx addr)25452 arm_debugger_arg_offset (int value, rtx addr)
25453 {
25454   rtx_insn *insn;
25455 
25456   /* We are only interested if dbxout_parms() failed to compute the offset.  */
25457   if (value != 0)
25458     return 0;
25459 
25460   /* We can only cope with the case where the address is held in a register.  */
25461   if (!REG_P (addr))
25462     return 0;
25463 
25464   /* If we are using the frame pointer to point at the argument, then
25465      an offset of 0 is correct.  */
25466   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25467     return 0;
25468 
25469   /* If we are using the stack pointer to point at the
25470      argument, then an offset of 0 is correct.  */
25471   /* ??? Check this is consistent with thumb2 frame layout.  */
25472   if ((TARGET_THUMB || !frame_pointer_needed)
25473       && REGNO (addr) == SP_REGNUM)
25474     return 0;
25475 
25476   /* Oh dear.  The argument is pointed to by a register rather
25477      than being held in a register, or being stored at a known
25478      offset from the frame pointer.  Since GDB only understands
25479      those two kinds of argument we must translate the address
25480      held in the register into an offset from the frame pointer.
25481      We do this by searching through the insns for the function
25482      looking to see where this register gets its value.  If the
25483      register is initialized from the frame pointer plus an offset
25484      then we are in luck and we can continue, otherwise we give up.
25485 
25486      This code is exercised by producing debugging information
25487      for a function with arguments like this:
25488 
25489            double func (double a, double b, int c, double d) {return d;}
25490 
25491      Without this code the stab for parameter 'd' will be set to
25492      an offset of 0 from the frame pointer, rather than 8.  */
25493 
25494   /* The if() statement says:
25495 
25496      If the insn is a normal instruction
25497      and if the insn is setting the value in a register
25498      and if the register being set is the register holding the address of the argument
25499      and if the address is computing by an addition
25500      that involves adding to a register
25501      which is the frame pointer
25502      a constant integer
25503 
25504      then...  */
25505 
25506   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25507     {
25508       if (   NONJUMP_INSN_P (insn)
25509 	  && GET_CODE (PATTERN (insn)) == SET
25510 	  && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25511 	  && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25512 	  && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25513 	  && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25514 	  && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25515 	     )
25516 	{
25517 	  value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25518 
25519 	  break;
25520 	}
25521     }
25522 
25523   if (value == 0)
25524     {
25525       debug_rtx (addr);
25526       warning (0, "unable to compute real location of stacked parameter");
25527       value = 8; /* XXX magic hack */
25528     }
25529 
25530   return value;
25531 }
25532 
25533 /* Implement TARGET_PROMOTED_TYPE.  */
25534 
25535 static tree
arm_promoted_type(const_tree t)25536 arm_promoted_type (const_tree t)
25537 {
25538   if (SCALAR_FLOAT_TYPE_P (t)
25539       && TYPE_PRECISION (t) == 16
25540       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25541     return float_type_node;
25542   return NULL_TREE;
25543 }
25544 
25545 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25546    This simply adds HFmode as a supported mode; even though we don't
25547    implement arithmetic on this type directly, it's supported by
25548    optabs conversions, much the way the double-word arithmetic is
25549    special-cased in the default hook.  */
25550 
25551 static bool
arm_scalar_mode_supported_p(scalar_mode mode)25552 arm_scalar_mode_supported_p (scalar_mode mode)
25553 {
25554   if (mode == HFmode)
25555     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25556   else if (ALL_FIXED_POINT_MODE_P (mode))
25557     return true;
25558   else
25559     return default_scalar_mode_supported_p (mode);
25560 }
25561 
25562 /* Set the value of FLT_EVAL_METHOD.
25563    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25564 
25565     0: evaluate all operations and constants, whose semantic type has at
25566        most the range and precision of type float, to the range and
25567        precision of float; evaluate all other operations and constants to
25568        the range and precision of the semantic type;
25569 
25570     N, where _FloatN is a supported interchange floating type
25571        evaluate all operations and constants, whose semantic type has at
25572        most the range and precision of _FloatN type, to the range and
25573        precision of the _FloatN type; evaluate all other operations and
25574        constants to the range and precision of the semantic type;
25575 
25576    If we have the ARMv8.2-A extensions then we support _Float16 in native
25577    precision, so we should set this to 16.  Otherwise, we support the type,
25578    but want to evaluate expressions in float precision, so set this to
25579    0.  */
25580 
25581 static enum flt_eval_method
arm_excess_precision(enum excess_precision_type type)25582 arm_excess_precision (enum excess_precision_type type)
25583 {
25584   switch (type)
25585     {
25586       case EXCESS_PRECISION_TYPE_FAST:
25587       case EXCESS_PRECISION_TYPE_STANDARD:
25588 	/* We can calculate either in 16-bit range and precision or
25589 	   32-bit range and precision.  Make that decision based on whether
25590 	   we have native support for the ARMv8.2-A 16-bit floating-point
25591 	   instructions or not.  */
25592 	return (TARGET_VFP_FP16INST
25593 		? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25594 		: FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25595       case EXCESS_PRECISION_TYPE_IMPLICIT:
25596 	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25597       default:
25598 	gcc_unreachable ();
25599     }
25600   return FLT_EVAL_METHOD_UNPREDICTABLE;
25601 }
25602 
25603 
25604 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
25605    _Float16 if we are using anything other than ieee format for 16-bit
25606    floating point.  Otherwise, punt to the default implementation.  */
25607 static opt_scalar_float_mode
arm_floatn_mode(int n,bool extended)25608 arm_floatn_mode (int n, bool extended)
25609 {
25610   if (!extended && n == 16)
25611     {
25612       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25613 	return HFmode;
25614       return opt_scalar_float_mode ();
25615     }
25616 
25617   return default_floatn_mode (n, extended);
25618 }
25619 
25620 
25621 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25622    not to early-clobber SRC registers in the process.
25623 
25624    We assume that the operands described by SRC and DEST represent a
25625    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
25626    number of components into which the copy has been decomposed.  */
25627 void
neon_disambiguate_copy(rtx * operands,rtx * dest,rtx * src,unsigned int count)25628 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25629 {
25630   unsigned int i;
25631 
25632   if (!reg_overlap_mentioned_p (operands[0], operands[1])
25633       || REGNO (operands[0]) < REGNO (operands[1]))
25634     {
25635       for (i = 0; i < count; i++)
25636 	{
25637 	  operands[2 * i] = dest[i];
25638 	  operands[2 * i + 1] = src[i];
25639 	}
25640     }
25641   else
25642     {
25643       for (i = 0; i < count; i++)
25644 	{
25645 	  operands[2 * i] = dest[count - i - 1];
25646 	  operands[2 * i + 1] = src[count - i - 1];
25647 	}
25648     }
25649 }
25650 
25651 /* Split operands into moves from op[1] + op[2] into op[0].  */
25652 
25653 void
neon_split_vcombine(rtx operands[3])25654 neon_split_vcombine (rtx operands[3])
25655 {
25656   unsigned int dest = REGNO (operands[0]);
25657   unsigned int src1 = REGNO (operands[1]);
25658   unsigned int src2 = REGNO (operands[2]);
25659   machine_mode halfmode = GET_MODE (operands[1]);
25660   unsigned int halfregs = REG_NREGS (operands[1]);
25661   rtx destlo, desthi;
25662 
25663   if (src1 == dest && src2 == dest + halfregs)
25664     {
25665       /* No-op move.  Can't split to nothing; emit something.  */
25666       emit_note (NOTE_INSN_DELETED);
25667       return;
25668     }
25669 
25670   /* Preserve register attributes for variable tracking.  */
25671   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25672   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25673 			       GET_MODE_SIZE (halfmode));
25674 
25675   /* Special case of reversed high/low parts.  Use VSWP.  */
25676   if (src2 == dest && src1 == dest + halfregs)
25677     {
25678       rtx x = gen_rtx_SET (destlo, operands[1]);
25679       rtx y = gen_rtx_SET (desthi, operands[2]);
25680       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25681       return;
25682     }
25683 
25684   if (!reg_overlap_mentioned_p (operands[2], destlo))
25685     {
25686       /* Try to avoid unnecessary moves if part of the result
25687 	 is in the right place already.  */
25688       if (src1 != dest)
25689 	emit_move_insn (destlo, operands[1]);
25690       if (src2 != dest + halfregs)
25691 	emit_move_insn (desthi, operands[2]);
25692     }
25693   else
25694     {
25695       if (src2 != dest + halfregs)
25696 	emit_move_insn (desthi, operands[2]);
25697       if (src1 != dest)
25698 	emit_move_insn (destlo, operands[1]);
25699     }
25700 }
25701 
25702 /* Return the number (counting from 0) of
25703    the least significant set bit in MASK.  */
25704 
25705 inline static int
number_of_first_bit_set(unsigned mask)25706 number_of_first_bit_set (unsigned mask)
25707 {
25708   return ctz_hwi (mask);
25709 }
25710 
25711 /* Like emit_multi_reg_push, but allowing for a different set of
25712    registers to be described as saved.  MASK is the set of registers
25713    to be saved; REAL_REGS is the set of registers to be described as
25714    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
25715 
25716 static rtx_insn *
thumb1_emit_multi_reg_push(unsigned long mask,unsigned long real_regs)25717 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25718 {
25719   unsigned long regno;
25720   rtx par[10], tmp, reg;
25721   rtx_insn *insn;
25722   int i, j;
25723 
25724   /* Build the parallel of the registers actually being stored.  */
25725   for (i = 0; mask; ++i, mask &= mask - 1)
25726     {
25727       regno = ctz_hwi (mask);
25728       reg = gen_rtx_REG (SImode, regno);
25729 
25730       if (i == 0)
25731 	tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25732       else
25733 	tmp = gen_rtx_USE (VOIDmode, reg);
25734 
25735       par[i] = tmp;
25736     }
25737 
25738   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25739   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25740   tmp = gen_frame_mem (BLKmode, tmp);
25741   tmp = gen_rtx_SET (tmp, par[0]);
25742   par[0] = tmp;
25743 
25744   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25745   insn = emit_insn (tmp);
25746 
25747   /* Always build the stack adjustment note for unwind info.  */
25748   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25749   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
25750   par[0] = tmp;
25751 
25752   /* Build the parallel of the registers recorded as saved for unwind.  */
25753   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25754     {
25755       regno = ctz_hwi (real_regs);
25756       reg = gen_rtx_REG (SImode, regno);
25757 
25758       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25759       tmp = gen_frame_mem (SImode, tmp);
25760       tmp = gen_rtx_SET (tmp, reg);
25761       RTX_FRAME_RELATED_P (tmp) = 1;
25762       par[j + 1] = tmp;
25763     }
25764 
25765   if (j == 0)
25766     tmp = par[0];
25767   else
25768     {
25769       RTX_FRAME_RELATED_P (par[0]) = 1;
25770       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25771     }
25772 
25773   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25774 
25775   return insn;
25776 }
25777 
25778 /* Emit code to push or pop registers to or from the stack.  F is the
25779    assembly file.  MASK is the registers to pop.  */
25780 static void
thumb_pop(FILE * f,unsigned long mask)25781 thumb_pop (FILE *f, unsigned long mask)
25782 {
25783   int regno;
25784   int lo_mask = mask & 0xFF;
25785 
25786   gcc_assert (mask);
25787 
25788   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25789     {
25790       /* Special case.  Do not generate a POP PC statement here, do it in
25791 	 thumb_exit() */
25792       thumb_exit (f, -1);
25793       return;
25794     }
25795 
25796   fprintf (f, "\tpop\t{");
25797 
25798   /* Look at the low registers first.  */
25799   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25800     {
25801       if (lo_mask & 1)
25802 	{
25803 	  asm_fprintf (f, "%r", regno);
25804 
25805 	  if ((lo_mask & ~1) != 0)
25806 	    fprintf (f, ", ");
25807 	}
25808     }
25809 
25810   if (mask & (1 << PC_REGNUM))
25811     {
25812       /* Catch popping the PC.  */
25813       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
25814 	  || IS_CMSE_ENTRY (arm_current_func_type ()))
25815 	{
25816 	  /* The PC is never poped directly, instead
25817 	     it is popped into r3 and then BX is used.  */
25818 	  fprintf (f, "}\n");
25819 
25820 	  thumb_exit (f, -1);
25821 
25822 	  return;
25823 	}
25824       else
25825 	{
25826 	  if (mask & 0xFF)
25827 	    fprintf (f, ", ");
25828 
25829 	  asm_fprintf (f, "%r", PC_REGNUM);
25830 	}
25831     }
25832 
25833   fprintf (f, "}\n");
25834 }
25835 
25836 /* Generate code to return from a thumb function.
25837    If 'reg_containing_return_addr' is -1, then the return address is
25838    actually on the stack, at the stack pointer.
25839 
25840    Note: do not forget to update length attribute of corresponding insn pattern
25841    when changing assembly output (eg. length attribute of epilogue_insns when
25842    updating Armv8-M Baseline Security Extensions register clearing
25843    sequences).  */
25844 static void
thumb_exit(FILE * f,int reg_containing_return_addr)25845 thumb_exit (FILE *f, int reg_containing_return_addr)
25846 {
25847   unsigned regs_available_for_popping;
25848   unsigned regs_to_pop;
25849   int pops_needed;
25850   unsigned available;
25851   unsigned required;
25852   machine_mode mode;
25853   int size;
25854   int restore_a4 = FALSE;
25855 
25856   /* Compute the registers we need to pop.  */
25857   regs_to_pop = 0;
25858   pops_needed = 0;
25859 
25860   if (reg_containing_return_addr == -1)
25861     {
25862       regs_to_pop |= 1 << LR_REGNUM;
25863       ++pops_needed;
25864     }
25865 
25866   if (TARGET_BACKTRACE)
25867     {
25868       /* Restore the (ARM) frame pointer and stack pointer.  */
25869       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25870       pops_needed += 2;
25871     }
25872 
25873   /* If there is nothing to pop then just emit the BX instruction and
25874      return.  */
25875   if (pops_needed == 0)
25876     {
25877       if (crtl->calls_eh_return)
25878 	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25879 
25880       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25881 	{
25882 	  /* For Armv8.1-M, this is cleared as part of the CLRM instruction
25883 	     emitted by cmse_nonsecure_entry_clear_before_return ().  */
25884 	  if (!TARGET_HAVE_FPCXT_CMSE)
25885 	    asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
25886 			 reg_containing_return_addr);
25887 	  asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
25888 	}
25889       else
25890 	asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25891       return;
25892     }
25893   /* Otherwise if we are not supporting interworking and we have not created
25894      a backtrace structure and the function was not entered in ARM mode then
25895      just pop the return address straight into the PC.  */
25896   else if (!TARGET_INTERWORK
25897 	   && !TARGET_BACKTRACE
25898 	   && !is_called_in_ARM_mode (current_function_decl)
25899 	   && !crtl->calls_eh_return
25900 	   && !IS_CMSE_ENTRY (arm_current_func_type ()))
25901     {
25902       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25903       return;
25904     }
25905 
25906   /* Find out how many of the (return) argument registers we can corrupt.  */
25907   regs_available_for_popping = 0;
25908 
25909   /* If returning via __builtin_eh_return, the bottom three registers
25910      all contain information needed for the return.  */
25911   if (crtl->calls_eh_return)
25912     size = 12;
25913   else
25914     {
25915       /* If we can deduce the registers used from the function's
25916 	 return value.  This is more reliable that examining
25917 	 df_regs_ever_live_p () because that will be set if the register is
25918 	 ever used in the function, not just if the register is used
25919 	 to hold a return value.  */
25920 
25921       if (crtl->return_rtx != 0)
25922 	mode = GET_MODE (crtl->return_rtx);
25923       else
25924 	mode = DECL_MODE (DECL_RESULT (current_function_decl));
25925 
25926       size = GET_MODE_SIZE (mode);
25927 
25928       if (size == 0)
25929 	{
25930 	  /* In a void function we can use any argument register.
25931 	     In a function that returns a structure on the stack
25932 	     we can use the second and third argument registers.  */
25933 	  if (mode == VOIDmode)
25934 	    regs_available_for_popping =
25935 	      (1 << ARG_REGISTER (1))
25936 	      | (1 << ARG_REGISTER (2))
25937 	      | (1 << ARG_REGISTER (3));
25938 	  else
25939 	    regs_available_for_popping =
25940 	      (1 << ARG_REGISTER (2))
25941 	      | (1 << ARG_REGISTER (3));
25942 	}
25943       else if (size <= 4)
25944 	regs_available_for_popping =
25945 	  (1 << ARG_REGISTER (2))
25946 	  | (1 << ARG_REGISTER (3));
25947       else if (size <= 8)
25948 	regs_available_for_popping =
25949 	  (1 << ARG_REGISTER (3));
25950     }
25951 
25952   /* Match registers to be popped with registers into which we pop them.  */
25953   for (available = regs_available_for_popping,
25954        required  = regs_to_pop;
25955        required != 0 && available != 0;
25956        available &= ~(available & - available),
25957        required  &= ~(required  & - required))
25958     -- pops_needed;
25959 
25960   /* If we have any popping registers left over, remove them.  */
25961   if (available > 0)
25962     regs_available_for_popping &= ~available;
25963 
25964   /* Otherwise if we need another popping register we can use
25965      the fourth argument register.  */
25966   else if (pops_needed)
25967     {
25968       /* If we have not found any free argument registers and
25969 	 reg a4 contains the return address, we must move it.  */
25970       if (regs_available_for_popping == 0
25971 	  && reg_containing_return_addr == LAST_ARG_REGNUM)
25972 	{
25973 	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25974 	  reg_containing_return_addr = LR_REGNUM;
25975 	}
25976       else if (size > 12)
25977 	{
25978 	  /* Register a4 is being used to hold part of the return value,
25979 	     but we have dire need of a free, low register.  */
25980 	  restore_a4 = TRUE;
25981 
25982 	  asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
25983 	}
25984 
25985       if (reg_containing_return_addr != LAST_ARG_REGNUM)
25986 	{
25987 	  /* The fourth argument register is available.  */
25988 	  regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
25989 
25990 	  --pops_needed;
25991 	}
25992     }
25993 
25994   /* Pop as many registers as we can.  */
25995   thumb_pop (f, regs_available_for_popping);
25996 
25997   /* Process the registers we popped.  */
25998   if (reg_containing_return_addr == -1)
25999     {
26000       /* The return address was popped into the lowest numbered register.  */
26001       regs_to_pop &= ~(1 << LR_REGNUM);
26002 
26003       reg_containing_return_addr =
26004 	number_of_first_bit_set (regs_available_for_popping);
26005 
26006       /* Remove this register for the mask of available registers, so that
26007          the return address will not be corrupted by further pops.  */
26008       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26009     }
26010 
26011   /* If we popped other registers then handle them here.  */
26012   if (regs_available_for_popping)
26013     {
26014       int frame_pointer;
26015 
26016       /* Work out which register currently contains the frame pointer.  */
26017       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26018 
26019       /* Move it into the correct place.  */
26020       asm_fprintf (f, "\tmov\t%r, %r\n",
26021 		   ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26022 
26023       /* (Temporarily) remove it from the mask of popped registers.  */
26024       regs_available_for_popping &= ~(1 << frame_pointer);
26025       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26026 
26027       if (regs_available_for_popping)
26028 	{
26029 	  int stack_pointer;
26030 
26031 	  /* We popped the stack pointer as well,
26032 	     find the register that contains it.  */
26033 	  stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26034 
26035 	  /* Move it into the stack register.  */
26036 	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26037 
26038 	  /* At this point we have popped all necessary registers, so
26039 	     do not worry about restoring regs_available_for_popping
26040 	     to its correct value:
26041 
26042 	     assert (pops_needed == 0)
26043 	     assert (regs_available_for_popping == (1 << frame_pointer))
26044 	     assert (regs_to_pop == (1 << STACK_POINTER))  */
26045 	}
26046       else
26047 	{
26048 	  /* Since we have just move the popped value into the frame
26049 	     pointer, the popping register is available for reuse, and
26050 	     we know that we still have the stack pointer left to pop.  */
26051 	  regs_available_for_popping |= (1 << frame_pointer);
26052 	}
26053     }
26054 
26055   /* If we still have registers left on the stack, but we no longer have
26056      any registers into which we can pop them, then we must move the return
26057      address into the link register and make available the register that
26058      contained it.  */
26059   if (regs_available_for_popping == 0 && pops_needed > 0)
26060     {
26061       regs_available_for_popping |= 1 << reg_containing_return_addr;
26062 
26063       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26064 		   reg_containing_return_addr);
26065 
26066       reg_containing_return_addr = LR_REGNUM;
26067     }
26068 
26069   /* If we have registers left on the stack then pop some more.
26070      We know that at most we will want to pop FP and SP.  */
26071   if (pops_needed > 0)
26072     {
26073       int  popped_into;
26074       int  move_to;
26075 
26076       thumb_pop (f, regs_available_for_popping);
26077 
26078       /* We have popped either FP or SP.
26079 	 Move whichever one it is into the correct register.  */
26080       popped_into = number_of_first_bit_set (regs_available_for_popping);
26081       move_to     = number_of_first_bit_set (regs_to_pop);
26082 
26083       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26084       --pops_needed;
26085     }
26086 
26087   /* If we still have not popped everything then we must have only
26088      had one register available to us and we are now popping the SP.  */
26089   if (pops_needed > 0)
26090     {
26091       int  popped_into;
26092 
26093       thumb_pop (f, regs_available_for_popping);
26094 
26095       popped_into = number_of_first_bit_set (regs_available_for_popping);
26096 
26097       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26098       /*
26099 	assert (regs_to_pop == (1 << STACK_POINTER))
26100 	assert (pops_needed == 1)
26101       */
26102     }
26103 
26104   /* If necessary restore the a4 register.  */
26105   if (restore_a4)
26106     {
26107       if (reg_containing_return_addr != LR_REGNUM)
26108 	{
26109 	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26110 	  reg_containing_return_addr = LR_REGNUM;
26111 	}
26112 
26113       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26114     }
26115 
26116   if (crtl->calls_eh_return)
26117     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26118 
26119   /* Return to caller.  */
26120   if (IS_CMSE_ENTRY (arm_current_func_type ()))
26121     {
26122       /* This is for the cases where LR is not being used to contain the return
26123          address.  It may therefore contain information that we might not want
26124 	 to leak, hence it must be cleared.  The value in R0 will never be a
26125 	 secret at this point, so it is safe to use it, see the clearing code
26126 	 in cmse_nonsecure_entry_clear_before_return ().  */
26127       if (reg_containing_return_addr != LR_REGNUM)
26128 	asm_fprintf (f, "\tmov\tlr, r0\n");
26129 
26130       /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26131 	 by cmse_nonsecure_entry_clear_before_return ().  */
26132       if (!TARGET_HAVE_FPCXT_CMSE)
26133 	asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26134       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26135     }
26136   else
26137     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26138 }
26139 
26140 /* Scan INSN just before assembler is output for it.
26141    For Thumb-1, we track the status of the condition codes; this
26142    information is used in the cbranchsi4_insn pattern.  */
26143 void
thumb1_final_prescan_insn(rtx_insn * insn)26144 thumb1_final_prescan_insn (rtx_insn *insn)
26145 {
26146   if (flag_print_asm_name)
26147     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26148 		 INSN_ADDRESSES (INSN_UID (insn)));
26149   /* Don't overwrite the previous setter when we get to a cbranch.  */
26150   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26151     {
26152       enum attr_conds conds;
26153 
26154       if (cfun->machine->thumb1_cc_insn)
26155 	{
26156 	  if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26157 	      || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26158 	    CC_STATUS_INIT;
26159 	}
26160       conds = get_attr_conds (insn);
26161       if (conds == CONDS_SET)
26162 	{
26163 	  rtx set = single_set (insn);
26164 	  cfun->machine->thumb1_cc_insn = insn;
26165 	  cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26166 	  cfun->machine->thumb1_cc_op1 = const0_rtx;
26167 	  cfun->machine->thumb1_cc_mode = CC_NZmode;
26168 	  if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26169 	    {
26170 	      rtx src1 = XEXP (SET_SRC (set), 1);
26171 	      if (src1 == const0_rtx)
26172 		cfun->machine->thumb1_cc_mode = CCmode;
26173 	    }
26174 	  else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26175 	    {
26176 	      /* Record the src register operand instead of dest because
26177 		 cprop_hardreg pass propagates src.  */
26178 	      cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26179 	    }
26180 	}
26181       else if (conds != CONDS_NOCOND)
26182 	cfun->machine->thumb1_cc_insn = NULL_RTX;
26183     }
26184 
26185     /* Check if unexpected far jump is used.  */
26186     if (cfun->machine->lr_save_eliminated
26187         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26188       internal_error("Unexpected thumb1 far jump");
26189 }
26190 
26191 int
thumb_shiftable_const(unsigned HOST_WIDE_INT val)26192 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26193 {
26194   unsigned HOST_WIDE_INT mask = 0xff;
26195   int i;
26196 
26197   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26198   if (val == 0) /* XXX */
26199     return 0;
26200 
26201   for (i = 0; i < 25; i++)
26202     if ((val & (mask << i)) == val)
26203       return 1;
26204 
26205   return 0;
26206 }
26207 
26208 /* Returns nonzero if the current function contains,
26209    or might contain a far jump.  */
26210 static int
thumb_far_jump_used_p(void)26211 thumb_far_jump_used_p (void)
26212 {
26213   rtx_insn *insn;
26214   bool far_jump = false;
26215   unsigned int func_size = 0;
26216 
26217   /* If we have already decided that far jumps may be used,
26218      do not bother checking again, and always return true even if
26219      it turns out that they are not being used.  Once we have made
26220      the decision that far jumps are present (and that hence the link
26221      register will be pushed onto the stack) we cannot go back on it.  */
26222   if (cfun->machine->far_jump_used)
26223     return 1;
26224 
26225   /* If this function is not being called from the prologue/epilogue
26226      generation code then it must be being called from the
26227      INITIAL_ELIMINATION_OFFSET macro.  */
26228   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26229     {
26230       /* In this case we know that we are being asked about the elimination
26231 	 of the arg pointer register.  If that register is not being used,
26232 	 then there are no arguments on the stack, and we do not have to
26233 	 worry that a far jump might force the prologue to push the link
26234 	 register, changing the stack offsets.  In this case we can just
26235 	 return false, since the presence of far jumps in the function will
26236 	 not affect stack offsets.
26237 
26238 	 If the arg pointer is live (or if it was live, but has now been
26239 	 eliminated and so set to dead) then we do have to test to see if
26240 	 the function might contain a far jump.  This test can lead to some
26241 	 false negatives, since before reload is completed, then length of
26242 	 branch instructions is not known, so gcc defaults to returning their
26243 	 longest length, which in turn sets the far jump attribute to true.
26244 
26245 	 A false negative will not result in bad code being generated, but it
26246 	 will result in a needless push and pop of the link register.  We
26247 	 hope that this does not occur too often.
26248 
26249 	 If we need doubleword stack alignment this could affect the other
26250 	 elimination offsets so we can't risk getting it wrong.  */
26251       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26252 	cfun->machine->arg_pointer_live = 1;
26253       else if (!cfun->machine->arg_pointer_live)
26254 	return 0;
26255     }
26256 
26257   /* We should not change far_jump_used during or after reload, as there is
26258      no chance to change stack frame layout.  */
26259   if (reload_in_progress || reload_completed)
26260     return 0;
26261 
26262   /* Check to see if the function contains a branch
26263      insn with the far jump attribute set.  */
26264   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26265     {
26266       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26267 	{
26268 	  far_jump = true;
26269 	}
26270       func_size += get_attr_length (insn);
26271     }
26272 
26273   /* Attribute far_jump will always be true for thumb1 before
26274      shorten_branch pass.  So checking far_jump attribute before
26275      shorten_branch isn't much useful.
26276 
26277      Following heuristic tries to estimate more accurately if a far jump
26278      may finally be used.  The heuristic is very conservative as there is
26279      no chance to roll-back the decision of not to use far jump.
26280 
26281      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26282      2-byte insn is associated with a 4 byte constant pool.  Using
26283      function size 2048/3 as the threshold is conservative enough.  */
26284   if (far_jump)
26285     {
26286       if ((func_size * 3) >= 2048)
26287         {
26288 	  /* Record the fact that we have decided that
26289 	     the function does use far jumps.  */
26290 	  cfun->machine->far_jump_used = 1;
26291 	  return 1;
26292 	}
26293     }
26294 
26295   return 0;
26296 }
26297 
26298 /* Return nonzero if FUNC must be entered in ARM mode.  */
26299 static bool
is_called_in_ARM_mode(tree func)26300 is_called_in_ARM_mode (tree func)
26301 {
26302   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26303 
26304   /* Ignore the problem about functions whose address is taken.  */
26305   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26306     return true;
26307 
26308 #ifdef ARM_PE
26309   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26310 #else
26311   return false;
26312 #endif
26313 }
26314 
26315 /* Given the stack offsets and register mask in OFFSETS, decide how
26316    many additional registers to push instead of subtracting a constant
26317    from SP.  For epilogues the principle is the same except we use pop.
26318    FOR_PROLOGUE indicates which we're generating.  */
26319 static int
thumb1_extra_regs_pushed(arm_stack_offsets * offsets,bool for_prologue)26320 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26321 {
26322   HOST_WIDE_INT amount;
26323   unsigned long live_regs_mask = offsets->saved_regs_mask;
26324   /* Extract a mask of the ones we can give to the Thumb's push/pop
26325      instruction.  */
26326   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26327   /* Then count how many other high registers will need to be pushed.  */
26328   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26329   int n_free, reg_base, size;
26330 
26331   if (!for_prologue && frame_pointer_needed)
26332     amount = offsets->locals_base - offsets->saved_regs;
26333   else
26334     amount = offsets->outgoing_args - offsets->saved_regs;
26335 
26336   /* If the stack frame size is 512 exactly, we can save one load
26337      instruction, which should make this a win even when optimizing
26338      for speed.  */
26339   if (!optimize_size && amount != 512)
26340     return 0;
26341 
26342   /* Can't do this if there are high registers to push.  */
26343   if (high_regs_pushed != 0)
26344     return 0;
26345 
26346   /* Shouldn't do it in the prologue if no registers would normally
26347      be pushed at all.  In the epilogue, also allow it if we'll have
26348      a pop insn for the PC.  */
26349   if  (l_mask == 0
26350        && (for_prologue
26351 	   || TARGET_BACKTRACE
26352 	   || (live_regs_mask & 1 << LR_REGNUM) == 0
26353 	   || TARGET_INTERWORK
26354 	   || crtl->args.pretend_args_size != 0))
26355     return 0;
26356 
26357   /* Don't do this if thumb_expand_prologue wants to emit instructions
26358      between the push and the stack frame allocation.  */
26359   if (for_prologue
26360       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26361 	  || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26362     return 0;
26363 
26364   reg_base = 0;
26365   n_free = 0;
26366   if (!for_prologue)
26367     {
26368       size = arm_size_return_regs ();
26369       reg_base = ARM_NUM_INTS (size);
26370       live_regs_mask >>= reg_base;
26371     }
26372 
26373   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26374 	 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26375     {
26376       live_regs_mask >>= 1;
26377       n_free++;
26378     }
26379 
26380   if (n_free == 0)
26381     return 0;
26382   gcc_assert (amount / 4 * 4 == amount);
26383 
26384   if (amount >= 512 && (amount - n_free * 4) < 512)
26385     return (amount - 508) / 4;
26386   if (amount <= n_free * 4)
26387     return amount / 4;
26388   return 0;
26389 }
26390 
26391 /* The bits which aren't usefully expanded as rtl.  */
26392 const char *
thumb1_unexpanded_epilogue(void)26393 thumb1_unexpanded_epilogue (void)
26394 {
26395   arm_stack_offsets *offsets;
26396   int regno;
26397   unsigned long live_regs_mask = 0;
26398   int high_regs_pushed = 0;
26399   int extra_pop;
26400   int had_to_push_lr;
26401   int size;
26402 
26403   if (cfun->machine->return_used_this_function != 0)
26404     return "";
26405 
26406   if (IS_NAKED (arm_current_func_type ()))
26407     return "";
26408 
26409   offsets = arm_get_frame_offsets ();
26410   live_regs_mask = offsets->saved_regs_mask;
26411   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26412 
26413   /* If we can deduce the registers used from the function's return value.
26414      This is more reliable that examining df_regs_ever_live_p () because that
26415      will be set if the register is ever used in the function, not just if
26416      the register is used to hold a return value.  */
26417   size = arm_size_return_regs ();
26418 
26419   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26420   if (extra_pop > 0)
26421     {
26422       unsigned long extra_mask = (1 << extra_pop) - 1;
26423       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26424     }
26425 
26426   /* The prolog may have pushed some high registers to use as
26427      work registers.  e.g. the testsuite file:
26428      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26429      compiles to produce:
26430 	push	{r4, r5, r6, r7, lr}
26431 	mov	r7, r9
26432 	mov	r6, r8
26433 	push	{r6, r7}
26434      as part of the prolog.  We have to undo that pushing here.  */
26435 
26436   if (high_regs_pushed)
26437     {
26438       unsigned long mask = live_regs_mask & 0xff;
26439       int next_hi_reg;
26440 
26441       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26442 
26443       if (mask == 0)
26444 	/* Oh dear!  We have no low registers into which we can pop
26445            high registers!  */
26446 	internal_error
26447 	  ("no low registers available for popping high registers");
26448 
26449       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26450 	if (live_regs_mask & (1 << next_hi_reg))
26451 	  break;
26452 
26453       while (high_regs_pushed)
26454 	{
26455 	  /* Find lo register(s) into which the high register(s) can
26456              be popped.  */
26457 	  for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26458 	    {
26459 	      if (mask & (1 << regno))
26460 		high_regs_pushed--;
26461 	      if (high_regs_pushed == 0)
26462 		break;
26463 	    }
26464 
26465 	  if (high_regs_pushed == 0 && regno >= 0)
26466 	    mask &= ~((1 << regno) - 1);
26467 
26468 	  /* Pop the values into the low register(s).  */
26469 	  thumb_pop (asm_out_file, mask);
26470 
26471 	  /* Move the value(s) into the high registers.  */
26472 	  for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26473 	    {
26474 	      if (mask & (1 << regno))
26475 		{
26476 		  asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26477 			       regno);
26478 
26479 		  for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26480 		       next_hi_reg--)
26481 		    if (live_regs_mask & (1 << next_hi_reg))
26482 		      break;
26483 		}
26484 	    }
26485 	}
26486       live_regs_mask &= ~0x0f00;
26487     }
26488 
26489   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26490   live_regs_mask &= 0xff;
26491 
26492   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26493     {
26494       /* Pop the return address into the PC.  */
26495       if (had_to_push_lr)
26496 	live_regs_mask |= 1 << PC_REGNUM;
26497 
26498       /* Either no argument registers were pushed or a backtrace
26499 	 structure was created which includes an adjusted stack
26500 	 pointer, so just pop everything.  */
26501       if (live_regs_mask)
26502 	thumb_pop (asm_out_file, live_regs_mask);
26503 
26504       /* We have either just popped the return address into the
26505 	 PC or it is was kept in LR for the entire function.
26506 	 Note that thumb_pop has already called thumb_exit if the
26507 	 PC was in the list.  */
26508       if (!had_to_push_lr)
26509 	thumb_exit (asm_out_file, LR_REGNUM);
26510     }
26511   else
26512     {
26513       /* Pop everything but the return address.  */
26514       if (live_regs_mask)
26515 	thumb_pop (asm_out_file, live_regs_mask);
26516 
26517       if (had_to_push_lr)
26518 	{
26519 	  if (size > 12)
26520 	    {
26521 	      /* We have no free low regs, so save one.  */
26522 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26523 			   LAST_ARG_REGNUM);
26524 	    }
26525 
26526 	  /* Get the return address into a temporary register.  */
26527 	  thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26528 
26529 	  if (size > 12)
26530 	    {
26531 	      /* Move the return address to lr.  */
26532 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26533 			   LAST_ARG_REGNUM);
26534 	      /* Restore the low register.  */
26535 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26536 			   IP_REGNUM);
26537 	      regno = LR_REGNUM;
26538 	    }
26539 	  else
26540 	    regno = LAST_ARG_REGNUM;
26541 	}
26542       else
26543 	regno = LR_REGNUM;
26544 
26545       /* Remove the argument registers that were pushed onto the stack.  */
26546       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26547 		   SP_REGNUM, SP_REGNUM,
26548 		   crtl->args.pretend_args_size);
26549 
26550       thumb_exit (asm_out_file, regno);
26551     }
26552 
26553   return "";
26554 }
26555 
26556 /* Functions to save and restore machine-specific function data.  */
26557 static struct machine_function *
arm_init_machine_status(void)26558 arm_init_machine_status (void)
26559 {
26560   struct machine_function *machine;
26561   machine = ggc_cleared_alloc<machine_function> ();
26562 
26563 #if ARM_FT_UNKNOWN != 0
26564   machine->func_type = ARM_FT_UNKNOWN;
26565 #endif
26566   machine->static_chain_stack_bytes = -1;
26567   return machine;
26568 }
26569 
26570 /* Return an RTX indicating where the return address to the
26571    calling function can be found.  */
26572 rtx
arm_return_addr(int count,rtx frame ATTRIBUTE_UNUSED)26573 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26574 {
26575   if (count != 0)
26576     return NULL_RTX;
26577 
26578   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26579 }
26580 
26581 /* Do anything needed before RTL is emitted for each function.  */
26582 void
arm_init_expanders(void)26583 arm_init_expanders (void)
26584 {
26585   /* Arrange to initialize and mark the machine per-function status.  */
26586   init_machine_status = arm_init_machine_status;
26587 
26588   /* This is to stop the combine pass optimizing away the alignment
26589      adjustment of va_arg.  */
26590   /* ??? It is claimed that this should not be necessary.  */
26591   if (cfun)
26592     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26593 }
26594 
26595 /* Check that FUNC is called with a different mode.  */
26596 
26597 bool
arm_change_mode_p(tree func)26598 arm_change_mode_p (tree func)
26599 {
26600   if (TREE_CODE (func) != FUNCTION_DECL)
26601     return false;
26602 
26603   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26604 
26605   if (!callee_tree)
26606     callee_tree = target_option_default_node;
26607 
26608   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26609   int flags = callee_opts->x_target_flags;
26610 
26611   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26612 }
26613 
26614 /* Like arm_compute_initial_elimination offset.  Simpler because there
26615    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
26616    to point at the base of the local variables after static stack
26617    space for a function has been allocated.  */
26618 
26619 HOST_WIDE_INT
thumb_compute_initial_elimination_offset(unsigned int from,unsigned int to)26620 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26621 {
26622   arm_stack_offsets *offsets;
26623 
26624   offsets = arm_get_frame_offsets ();
26625 
26626   switch (from)
26627     {
26628     case ARG_POINTER_REGNUM:
26629       switch (to)
26630 	{
26631 	case STACK_POINTER_REGNUM:
26632 	  return offsets->outgoing_args - offsets->saved_args;
26633 
26634 	case FRAME_POINTER_REGNUM:
26635 	  return offsets->soft_frame - offsets->saved_args;
26636 
26637 	case ARM_HARD_FRAME_POINTER_REGNUM:
26638 	  return offsets->saved_regs - offsets->saved_args;
26639 
26640 	case THUMB_HARD_FRAME_POINTER_REGNUM:
26641 	  return offsets->locals_base - offsets->saved_args;
26642 
26643 	default:
26644 	  gcc_unreachable ();
26645 	}
26646       break;
26647 
26648     case FRAME_POINTER_REGNUM:
26649       switch (to)
26650 	{
26651 	case STACK_POINTER_REGNUM:
26652 	  return offsets->outgoing_args - offsets->soft_frame;
26653 
26654 	case ARM_HARD_FRAME_POINTER_REGNUM:
26655 	  return offsets->saved_regs - offsets->soft_frame;
26656 
26657 	case THUMB_HARD_FRAME_POINTER_REGNUM:
26658 	  return offsets->locals_base - offsets->soft_frame;
26659 
26660 	default:
26661 	  gcc_unreachable ();
26662 	}
26663       break;
26664 
26665     default:
26666       gcc_unreachable ();
26667     }
26668 }
26669 
26670 /* Generate the function's prologue.  */
26671 
26672 void
thumb1_expand_prologue(void)26673 thumb1_expand_prologue (void)
26674 {
26675   rtx_insn *insn;
26676 
26677   HOST_WIDE_INT amount;
26678   HOST_WIDE_INT size;
26679   arm_stack_offsets *offsets;
26680   unsigned long func_type;
26681   int regno;
26682   unsigned long live_regs_mask;
26683   unsigned long l_mask;
26684   unsigned high_regs_pushed = 0;
26685   bool lr_needs_saving;
26686 
26687   func_type = arm_current_func_type ();
26688 
26689   /* Naked functions don't have prologues.  */
26690   if (IS_NAKED (func_type))
26691     {
26692       if (flag_stack_usage_info)
26693 	current_function_static_stack_size = 0;
26694       return;
26695     }
26696 
26697   if (IS_INTERRUPT (func_type))
26698     {
26699       error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
26700       return;
26701     }
26702 
26703   if (is_called_in_ARM_mode (current_function_decl))
26704     emit_insn (gen_prologue_thumb1_interwork ());
26705 
26706   offsets = arm_get_frame_offsets ();
26707   live_regs_mask = offsets->saved_regs_mask;
26708   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
26709 
26710   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
26711   l_mask = live_regs_mask & 0x40ff;
26712   /* Then count how many other high registers will need to be pushed.  */
26713   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26714 
26715   if (crtl->args.pretend_args_size)
26716     {
26717       rtx x = GEN_INT (-crtl->args.pretend_args_size);
26718 
26719       if (cfun->machine->uses_anonymous_args)
26720 	{
26721 	  int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26722 	  unsigned long mask;
26723 
26724 	  mask = 1ul << (LAST_ARG_REGNUM + 1);
26725 	  mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26726 
26727 	  insn = thumb1_emit_multi_reg_push (mask, 0);
26728 	}
26729       else
26730 	{
26731 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26732 					stack_pointer_rtx, x));
26733 	}
26734       RTX_FRAME_RELATED_P (insn) = 1;
26735     }
26736 
26737   if (TARGET_BACKTRACE)
26738     {
26739       HOST_WIDE_INT offset = 0;
26740       unsigned work_register;
26741       rtx work_reg, x, arm_hfp_rtx;
26742 
26743       /* We have been asked to create a stack backtrace structure.
26744          The code looks like this:
26745 
26746 	 0   .align 2
26747 	 0   func:
26748          0     sub   SP, #16         Reserve space for 4 registers.
26749 	 2     push  {R7}            Push low registers.
26750          4     add   R7, SP, #20     Get the stack pointer before the push.
26751          6     str   R7, [SP, #8]    Store the stack pointer
26752 					(before reserving the space).
26753          8     mov   R7, PC          Get hold of the start of this code + 12.
26754         10     str   R7, [SP, #16]   Store it.
26755         12     mov   R7, FP          Get hold of the current frame pointer.
26756         14     str   R7, [SP, #4]    Store it.
26757         16     mov   R7, LR          Get hold of the current return address.
26758         18     str   R7, [SP, #12]   Store it.
26759         20     add   R7, SP, #16     Point at the start of the
26760 					backtrace structure.
26761         22     mov   FP, R7          Put this value into the frame pointer.  */
26762 
26763       work_register = thumb_find_work_register (live_regs_mask);
26764       work_reg = gen_rtx_REG (SImode, work_register);
26765       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26766 
26767       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26768 				    stack_pointer_rtx, GEN_INT (-16)));
26769       RTX_FRAME_RELATED_P (insn) = 1;
26770 
26771       if (l_mask)
26772 	{
26773 	  insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26774 	  RTX_FRAME_RELATED_P (insn) = 1;
26775 	  lr_needs_saving = false;
26776 
26777 	  offset = bit_count (l_mask) * UNITS_PER_WORD;
26778 	}
26779 
26780       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26781       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26782 
26783       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26784       x = gen_frame_mem (SImode, x);
26785       emit_move_insn (x, work_reg);
26786 
26787       /* Make sure that the instruction fetching the PC is in the right place
26788 	 to calculate "start of backtrace creation code + 12".  */
26789       /* ??? The stores using the common WORK_REG ought to be enough to
26790 	 prevent the scheduler from doing anything weird.  Failing that
26791 	 we could always move all of the following into an UNSPEC_VOLATILE.  */
26792       if (l_mask)
26793 	{
26794 	  x = gen_rtx_REG (SImode, PC_REGNUM);
26795 	  emit_move_insn (work_reg, x);
26796 
26797 	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26798 	  x = gen_frame_mem (SImode, x);
26799 	  emit_move_insn (x, work_reg);
26800 
26801 	  emit_move_insn (work_reg, arm_hfp_rtx);
26802 
26803 	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
26804 	  x = gen_frame_mem (SImode, x);
26805 	  emit_move_insn (x, work_reg);
26806 	}
26807       else
26808 	{
26809 	  emit_move_insn (work_reg, arm_hfp_rtx);
26810 
26811 	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
26812 	  x = gen_frame_mem (SImode, x);
26813 	  emit_move_insn (x, work_reg);
26814 
26815 	  x = gen_rtx_REG (SImode, PC_REGNUM);
26816 	  emit_move_insn (work_reg, x);
26817 
26818 	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26819 	  x = gen_frame_mem (SImode, x);
26820 	  emit_move_insn (x, work_reg);
26821 	}
26822 
26823       x = gen_rtx_REG (SImode, LR_REGNUM);
26824       emit_move_insn (work_reg, x);
26825 
26826       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26827       x = gen_frame_mem (SImode, x);
26828       emit_move_insn (x, work_reg);
26829 
26830       x = GEN_INT (offset + 12);
26831       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26832 
26833       emit_move_insn (arm_hfp_rtx, work_reg);
26834     }
26835   /* Optimization:  If we are not pushing any low registers but we are going
26836      to push some high registers then delay our first push.  This will just
26837      be a push of LR and we can combine it with the push of the first high
26838      register.  */
26839   else if ((l_mask & 0xff) != 0
26840 	   || (high_regs_pushed == 0 && lr_needs_saving))
26841     {
26842       unsigned long mask = l_mask;
26843       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26844       insn = thumb1_emit_multi_reg_push (mask, mask);
26845       RTX_FRAME_RELATED_P (insn) = 1;
26846       lr_needs_saving = false;
26847     }
26848 
26849   if (high_regs_pushed)
26850     {
26851       unsigned pushable_regs;
26852       unsigned next_hi_reg;
26853       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26854 						 : crtl->args.info.nregs;
26855       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26856 
26857       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26858 	if (live_regs_mask & (1 << next_hi_reg))
26859 	  break;
26860 
26861       /* Here we need to mask out registers used for passing arguments
26862 	 even if they can be pushed.  This is to avoid using them to
26863 	 stash the high registers.  Such kind of stash may clobber the
26864 	 use of arguments.  */
26865       pushable_regs = l_mask & (~arg_regs_mask);
26866       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
26867 
26868       /* Normally, LR can be used as a scratch register once it has been
26869 	 saved; but if the function examines its own return address then
26870 	 the value is still live and we need to avoid using it.  */
26871       bool return_addr_live
26872 	= REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
26873 			   LR_REGNUM);
26874 
26875       if (lr_needs_saving || return_addr_live)
26876 	pushable_regs &= ~(1 << LR_REGNUM);
26877 
26878       if (pushable_regs == 0)
26879 	pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26880 
26881       while (high_regs_pushed > 0)
26882 	{
26883 	  unsigned long real_regs_mask = 0;
26884 	  unsigned long push_mask = 0;
26885 
26886 	  for (regno = LR_REGNUM; regno >= 0; regno --)
26887 	    {
26888 	      if (pushable_regs & (1 << regno))
26889 		{
26890 		  emit_move_insn (gen_rtx_REG (SImode, regno),
26891 				  gen_rtx_REG (SImode, next_hi_reg));
26892 
26893 		  high_regs_pushed --;
26894 		  real_regs_mask |= (1 << next_hi_reg);
26895 		  push_mask |= (1 << regno);
26896 
26897 		  if (high_regs_pushed)
26898 		    {
26899 		      for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26900 			   next_hi_reg --)
26901 			if (live_regs_mask & (1 << next_hi_reg))
26902 			  break;
26903 		    }
26904 		  else
26905 		    break;
26906 		}
26907 	    }
26908 
26909 	  /* If we had to find a work register and we have not yet
26910 	     saved the LR then add it to the list of regs to push.  */
26911 	  if (lr_needs_saving)
26912 	    {
26913 	      push_mask |= 1 << LR_REGNUM;
26914 	      real_regs_mask |= 1 << LR_REGNUM;
26915 	      lr_needs_saving = false;
26916 	      /* If the return address is not live at this point, we
26917 		 can add LR to the list of registers that we can use
26918 		 for pushes.  */
26919 	      if (!return_addr_live)
26920 		pushable_regs |= 1 << LR_REGNUM;
26921 	    }
26922 
26923 	  insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
26924 	  RTX_FRAME_RELATED_P (insn) = 1;
26925 	}
26926     }
26927 
26928   /* Load the pic register before setting the frame pointer,
26929      so we can use r7 as a temporary work register.  */
26930   if (flag_pic && arm_pic_register != INVALID_REGNUM)
26931     arm_load_pic_register (live_regs_mask, NULL_RTX);
26932 
26933   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26934     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26935 		    stack_pointer_rtx);
26936 
26937   size = offsets->outgoing_args - offsets->saved_args;
26938   if (flag_stack_usage_info)
26939     current_function_static_stack_size = size;
26940 
26941   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
26942   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
26943        || flag_stack_clash_protection)
26944       && size)
26945     sorry ("%<-fstack-check=specific%> for Thumb-1");
26946 
26947   amount = offsets->outgoing_args - offsets->saved_regs;
26948   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26949   if (amount)
26950     {
26951       if (amount < 512)
26952 	{
26953 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26954 					GEN_INT (- amount)));
26955 	  RTX_FRAME_RELATED_P (insn) = 1;
26956 	}
26957       else
26958 	{
26959 	  rtx reg, dwarf;
26960 
26961 	  /* The stack decrement is too big for an immediate value in a single
26962 	     insn.  In theory we could issue multiple subtracts, but after
26963 	     three of them it becomes more space efficient to place the full
26964 	     value in the constant pool and load into a register.  (Also the
26965 	     ARM debugger really likes to see only one stack decrement per
26966 	     function).  So instead we look for a scratch register into which
26967 	     we can load the decrement, and then we subtract this from the
26968 	     stack pointer.  Unfortunately on the thumb the only available
26969 	     scratch registers are the argument registers, and we cannot use
26970 	     these as they may hold arguments to the function.  Instead we
26971 	     attempt to locate a call preserved register which is used by this
26972 	     function.  If we can find one, then we know that it will have
26973 	     been pushed at the start of the prologue and so we can corrupt
26974 	     it now.  */
26975 	  for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26976 	    if (live_regs_mask & (1 << regno))
26977 	      break;
26978 
26979 	  gcc_assert(regno <= LAST_LO_REGNUM);
26980 
26981 	  reg = gen_rtx_REG (SImode, regno);
26982 
26983 	  emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26984 
26985 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26986 					stack_pointer_rtx, reg));
26987 
26988 	  dwarf = gen_rtx_SET (stack_pointer_rtx,
26989 			       plus_constant (Pmode, stack_pointer_rtx,
26990 					      -amount));
26991 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26992 	  RTX_FRAME_RELATED_P (insn) = 1;
26993 	}
26994     }
26995 
26996   if (frame_pointer_needed)
26997     thumb_set_frame_pointer (offsets);
26998 
26999   /* If we are profiling, make sure no instructions are scheduled before
27000      the call to mcount.  Similarly if the user has requested no
27001      scheduling in the prolog.  Similarly if we want non-call exceptions
27002      using the EABI unwinder, to prevent faulting instructions from being
27003      swapped with a stack adjustment.  */
27004   if (crtl->profile || !TARGET_SCHED_PROLOG
27005       || (arm_except_unwind_info (&global_options) == UI_TARGET
27006 	  && cfun->can_throw_non_call_exceptions))
27007     emit_insn (gen_blockage ());
27008 
27009   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27010   if (live_regs_mask & 0xff)
27011     cfun->machine->lr_save_eliminated = 0;
27012 }
27013 
27014 /* Clear caller saved registers not used to pass return values and leaked
27015    condition flags before exiting a cmse_nonsecure_entry function.  */
27016 
27017 void
cmse_nonsecure_entry_clear_before_return(void)27018 cmse_nonsecure_entry_clear_before_return (void)
27019 {
27020   bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27021   int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27022   uint32_t padding_bits_to_clear = 0;
27023   auto_sbitmap to_clear_bitmap (maxregno + 1);
27024   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27025   tree result_type;
27026 
27027   bitmap_clear (to_clear_bitmap);
27028   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27029   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27030 
27031   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27032      registers.  */
27033   if (clear_vfpregs)
27034     {
27035       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27036 
27037       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27038 
27039       if (!TARGET_HAVE_FPCXT_CMSE)
27040 	{
27041 	  /* Make sure we don't clear the two scratch registers used to clear
27042 	     the relevant FPSCR bits in output_return_instruction.  */
27043 	  emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27044 	  bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27045 	  emit_use (gen_rtx_REG (SImode, 4));
27046 	  bitmap_clear_bit (to_clear_bitmap, 4);
27047 	}
27048     }
27049 
27050   /* If the user has defined registers to be caller saved, these are no longer
27051      restored by the function before returning and must thus be cleared for
27052      security purposes.  */
27053   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27054     {
27055       /* We do not touch registers that can be used to pass arguments as per
27056 	 the AAPCS, since these should never be made callee-saved by user
27057 	 options.  */
27058       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27059 	continue;
27060       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27061 	continue;
27062       if (!callee_saved_reg_p (regno)
27063 	  && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27064 	      || TARGET_HARD_FLOAT))
27065 	bitmap_set_bit (to_clear_bitmap, regno);
27066     }
27067 
27068   /* Make sure we do not clear the registers used to return the result in.  */
27069   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27070   if (!VOID_TYPE_P (result_type))
27071     {
27072       uint64_t to_clear_return_mask;
27073       result_rtl = arm_function_value (result_type, current_function_decl, 0);
27074 
27075       /* No need to check that we return in registers, because we don't
27076 	 support returning on stack yet.  */
27077       gcc_assert (REG_P (result_rtl));
27078       to_clear_return_mask
27079 	= compute_not_to_clear_mask (result_type, result_rtl, 0,
27080 				     &padding_bits_to_clear);
27081       if (to_clear_return_mask)
27082 	{
27083 	  gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27084 	  for (regno = R0_REGNUM; regno <= maxregno; regno++)
27085 	    {
27086 	      if (to_clear_return_mask & (1ULL << regno))
27087 		bitmap_clear_bit (to_clear_bitmap, regno);
27088 	    }
27089 	}
27090     }
27091 
27092   if (padding_bits_to_clear != 0)
27093     {
27094       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27095       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27096 
27097       /* Padding_bits_to_clear is not 0 so we know we are dealing with
27098 	 returning a composite type, which only uses r0.  Let's make sure that
27099 	 r1-r3 is cleared too.  */
27100       bitmap_clear (to_clear_arg_regs_bitmap);
27101       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27102       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27103     }
27104 
27105   /* Clear full registers that leak before returning.  */
27106   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27107   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27108   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27109 			clearing_reg);
27110 }
27111 
27112 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27113    POP instruction can be generated.  LR should be replaced by PC.  All
27114    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27115    all we really need to check here is if single register is to be
27116    returned, or multiple register return.  */
27117 void
thumb2_expand_return(bool simple_return)27118 thumb2_expand_return (bool simple_return)
27119 {
27120   int i, num_regs;
27121   unsigned long saved_regs_mask;
27122   arm_stack_offsets *offsets;
27123 
27124   offsets = arm_get_frame_offsets ();
27125   saved_regs_mask = offsets->saved_regs_mask;
27126 
27127   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27128     if (saved_regs_mask & (1 << i))
27129       num_regs++;
27130 
27131   if (!simple_return && saved_regs_mask)
27132     {
27133       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27134 	 functions or adapt code to handle according to ACLE.  This path should
27135 	 not be reachable for cmse_nonsecure_entry functions though we prefer
27136 	 to assert it for now to ensure that future code changes do not silently
27137 	 change this behavior.  */
27138       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27139       if (num_regs == 1)
27140         {
27141           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27142           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27143           rtx addr = gen_rtx_MEM (SImode,
27144                                   gen_rtx_POST_INC (SImode,
27145                                                     stack_pointer_rtx));
27146           set_mem_alias_set (addr, get_frame_alias_set ());
27147           XVECEXP (par, 0, 0) = ret_rtx;
27148           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27149           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27150           emit_jump_insn (par);
27151         }
27152       else
27153         {
27154           saved_regs_mask &= ~ (1 << LR_REGNUM);
27155           saved_regs_mask |=   (1 << PC_REGNUM);
27156           arm_emit_multi_reg_pop (saved_regs_mask);
27157         }
27158     }
27159   else
27160     {
27161       if (IS_CMSE_ENTRY (arm_current_func_type ()))
27162 	cmse_nonsecure_entry_clear_before_return ();
27163       emit_jump_insn (simple_return_rtx);
27164     }
27165 }
27166 
27167 void
thumb1_expand_epilogue(void)27168 thumb1_expand_epilogue (void)
27169 {
27170   HOST_WIDE_INT amount;
27171   arm_stack_offsets *offsets;
27172   int regno;
27173 
27174   /* Naked functions don't have prologues.  */
27175   if (IS_NAKED (arm_current_func_type ()))
27176     return;
27177 
27178   offsets = arm_get_frame_offsets ();
27179   amount = offsets->outgoing_args - offsets->saved_regs;
27180 
27181   if (frame_pointer_needed)
27182     {
27183       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27184       amount = offsets->locals_base - offsets->saved_regs;
27185     }
27186   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27187 
27188   gcc_assert (amount >= 0);
27189   if (amount)
27190     {
27191       emit_insn (gen_blockage ());
27192 
27193       if (amount < 512)
27194 	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27195 			       GEN_INT (amount)));
27196       else
27197 	{
27198 	  /* r3 is always free in the epilogue.  */
27199 	  rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27200 
27201 	  emit_insn (gen_movsi (reg, GEN_INT (amount)));
27202 	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27203 	}
27204     }
27205 
27206   /* Emit a USE (stack_pointer_rtx), so that
27207      the stack adjustment will not be deleted.  */
27208   emit_insn (gen_force_register_use (stack_pointer_rtx));
27209 
27210   if (crtl->profile || !TARGET_SCHED_PROLOG)
27211     emit_insn (gen_blockage ());
27212 
27213   /* Emit a clobber for each insn that will be restored in the epilogue,
27214      so that flow2 will get register lifetimes correct.  */
27215   for (regno = 0; regno < 13; regno++)
27216     if (reg_needs_saving_p (regno))
27217       emit_clobber (gen_rtx_REG (SImode, regno));
27218 
27219   if (! df_regs_ever_live_p (LR_REGNUM))
27220     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27221 
27222   /* Clear all caller-saved regs that are not used to return.  */
27223   if (IS_CMSE_ENTRY (arm_current_func_type ()))
27224     cmse_nonsecure_entry_clear_before_return ();
27225 }
27226 
27227 /* Epilogue code for APCS frame.  */
27228 static void
arm_expand_epilogue_apcs_frame(bool really_return)27229 arm_expand_epilogue_apcs_frame (bool really_return)
27230 {
27231   unsigned long func_type;
27232   unsigned long saved_regs_mask;
27233   int num_regs = 0;
27234   int i;
27235   int floats_from_frame = 0;
27236   arm_stack_offsets *offsets;
27237 
27238   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27239   func_type = arm_current_func_type ();
27240 
27241   /* Get frame offsets for ARM.  */
27242   offsets = arm_get_frame_offsets ();
27243   saved_regs_mask = offsets->saved_regs_mask;
27244 
27245   /* Find the offset of the floating-point save area in the frame.  */
27246   floats_from_frame
27247     = (offsets->saved_args
27248        + arm_compute_static_chain_stack_bytes ()
27249        - offsets->frame);
27250 
27251   /* Compute how many core registers saved and how far away the floats are.  */
27252   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27253     if (saved_regs_mask & (1 << i))
27254       {
27255         num_regs++;
27256         floats_from_frame += 4;
27257       }
27258 
27259   if (TARGET_VFP_BASE)
27260     {
27261       int start_reg;
27262       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27263 
27264       /* The offset is from IP_REGNUM.  */
27265       int saved_size = arm_get_vfp_saved_size ();
27266       if (saved_size > 0)
27267         {
27268 	  rtx_insn *insn;
27269           floats_from_frame += saved_size;
27270           insn = emit_insn (gen_addsi3 (ip_rtx,
27271 					hard_frame_pointer_rtx,
27272 					GEN_INT (-floats_from_frame)));
27273 	  arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27274 				       ip_rtx, hard_frame_pointer_rtx);
27275         }
27276 
27277       /* Generate VFP register multi-pop.  */
27278       start_reg = FIRST_VFP_REGNUM;
27279 
27280       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27281         /* Look for a case where a reg does not need restoring.  */
27282 	if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27283           {
27284             if (start_reg != i)
27285               arm_emit_vfp_multi_reg_pop (start_reg,
27286                                           (i - start_reg) / 2,
27287                                           gen_rtx_REG (SImode,
27288                                                        IP_REGNUM));
27289             start_reg = i + 2;
27290           }
27291 
27292       /* Restore the remaining regs that we have discovered (or possibly
27293          even all of them, if the conditional in the for loop never
27294          fired).  */
27295       if (start_reg != i)
27296         arm_emit_vfp_multi_reg_pop (start_reg,
27297                                     (i - start_reg) / 2,
27298                                     gen_rtx_REG (SImode, IP_REGNUM));
27299     }
27300 
27301   if (TARGET_IWMMXT)
27302     {
27303       /* The frame pointer is guaranteed to be non-double-word aligned, as
27304          it is set to double-word-aligned old_stack_pointer - 4.  */
27305       rtx_insn *insn;
27306       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27307 
27308       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27309 	if (reg_needs_saving_p (i))
27310           {
27311             rtx addr = gen_frame_mem (V2SImode,
27312                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27313                                                 - lrm_count * 4));
27314             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27315             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27316                                                gen_rtx_REG (V2SImode, i),
27317                                                NULL_RTX);
27318             lrm_count += 2;
27319           }
27320     }
27321 
27322   /* saved_regs_mask should contain IP which contains old stack pointer
27323      at the time of activation creation.  Since SP and IP are adjacent registers,
27324      we can restore the value directly into SP.  */
27325   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27326   saved_regs_mask &= ~(1 << IP_REGNUM);
27327   saved_regs_mask |= (1 << SP_REGNUM);
27328 
27329   /* There are two registers left in saved_regs_mask - LR and PC.  We
27330      only need to restore LR (the return address), but to
27331      save time we can load it directly into PC, unless we need a
27332      special function exit sequence, or we are not really returning.  */
27333   if (really_return
27334       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27335       && !crtl->calls_eh_return)
27336     /* Delete LR from the register mask, so that LR on
27337        the stack is loaded into the PC in the register mask.  */
27338     saved_regs_mask &= ~(1 << LR_REGNUM);
27339   else
27340     saved_regs_mask &= ~(1 << PC_REGNUM);
27341 
27342   num_regs = bit_count (saved_regs_mask);
27343   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27344     {
27345       rtx_insn *insn;
27346       emit_insn (gen_blockage ());
27347       /* Unwind the stack to just below the saved registers.  */
27348       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27349 				    hard_frame_pointer_rtx,
27350 				    GEN_INT (- 4 * num_regs)));
27351 
27352       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27353 				   stack_pointer_rtx, hard_frame_pointer_rtx);
27354     }
27355 
27356   arm_emit_multi_reg_pop (saved_regs_mask);
27357 
27358   if (IS_INTERRUPT (func_type))
27359     {
27360       /* Interrupt handlers will have pushed the
27361          IP onto the stack, so restore it now.  */
27362       rtx_insn *insn;
27363       rtx addr = gen_rtx_MEM (SImode,
27364                               gen_rtx_POST_INC (SImode,
27365                               stack_pointer_rtx));
27366       set_mem_alias_set (addr, get_frame_alias_set ());
27367       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27368       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27369                                          gen_rtx_REG (SImode, IP_REGNUM),
27370                                          NULL_RTX);
27371     }
27372 
27373   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27374     return;
27375 
27376   if (crtl->calls_eh_return)
27377     emit_insn (gen_addsi3 (stack_pointer_rtx,
27378 			   stack_pointer_rtx,
27379 			   gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27380 
27381   if (IS_STACKALIGN (func_type))
27382     /* Restore the original stack pointer.  Before prologue, the stack was
27383        realigned and the original stack pointer saved in r0.  For details,
27384        see comment in arm_expand_prologue.  */
27385     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27386 
27387   emit_jump_insn (simple_return_rtx);
27388 }
27389 
27390 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27391    function is not a sibcall.  */
27392 void
arm_expand_epilogue(bool really_return)27393 arm_expand_epilogue (bool really_return)
27394 {
27395   unsigned long func_type;
27396   unsigned long saved_regs_mask;
27397   int num_regs = 0;
27398   int i;
27399   int amount;
27400   arm_stack_offsets *offsets;
27401 
27402   func_type = arm_current_func_type ();
27403 
27404   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27405      let output_return_instruction take care of instruction emission if any.  */
27406   if (IS_NAKED (func_type)
27407       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27408     {
27409       if (really_return)
27410         emit_jump_insn (simple_return_rtx);
27411       return;
27412     }
27413 
27414   /* If we are throwing an exception, then we really must be doing a
27415      return, so we can't tail-call.  */
27416   gcc_assert (!crtl->calls_eh_return || really_return);
27417 
27418   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27419     {
27420       arm_expand_epilogue_apcs_frame (really_return);
27421       return;
27422     }
27423 
27424   /* Get frame offsets for ARM.  */
27425   offsets = arm_get_frame_offsets ();
27426   saved_regs_mask = offsets->saved_regs_mask;
27427   num_regs = bit_count (saved_regs_mask);
27428 
27429   if (frame_pointer_needed)
27430     {
27431       rtx_insn *insn;
27432       /* Restore stack pointer if necessary.  */
27433       if (TARGET_ARM)
27434         {
27435           /* In ARM mode, frame pointer points to first saved register.
27436              Restore stack pointer to last saved register.  */
27437           amount = offsets->frame - offsets->saved_regs;
27438 
27439           /* Force out any pending memory operations that reference stacked data
27440              before stack de-allocation occurs.  */
27441           emit_insn (gen_blockage ());
27442 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27443 			    hard_frame_pointer_rtx,
27444 			    GEN_INT (amount)));
27445 	  arm_add_cfa_adjust_cfa_note (insn, amount,
27446 				       stack_pointer_rtx,
27447 				       hard_frame_pointer_rtx);
27448 
27449           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27450              deleted.  */
27451           emit_insn (gen_force_register_use (stack_pointer_rtx));
27452         }
27453       else
27454         {
27455           /* In Thumb-2 mode, the frame pointer points to the last saved
27456              register.  */
27457 	  amount = offsets->locals_base - offsets->saved_regs;
27458 	  if (amount)
27459 	    {
27460 	      insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27461 				hard_frame_pointer_rtx,
27462 				GEN_INT (amount)));
27463 	      arm_add_cfa_adjust_cfa_note (insn, amount,
27464 					   hard_frame_pointer_rtx,
27465 					   hard_frame_pointer_rtx);
27466 	    }
27467 
27468           /* Force out any pending memory operations that reference stacked data
27469              before stack de-allocation occurs.  */
27470           emit_insn (gen_blockage ());
27471 	  insn = emit_insn (gen_movsi (stack_pointer_rtx,
27472 				       hard_frame_pointer_rtx));
27473 	  arm_add_cfa_adjust_cfa_note (insn, 0,
27474 				       stack_pointer_rtx,
27475 				       hard_frame_pointer_rtx);
27476           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27477              deleted.  */
27478           emit_insn (gen_force_register_use (stack_pointer_rtx));
27479         }
27480     }
27481   else
27482     {
27483       /* Pop off outgoing args and local frame to adjust stack pointer to
27484          last saved register.  */
27485       amount = offsets->outgoing_args - offsets->saved_regs;
27486       if (amount)
27487         {
27488 	  rtx_insn *tmp;
27489           /* Force out any pending memory operations that reference stacked data
27490              before stack de-allocation occurs.  */
27491           emit_insn (gen_blockage ());
27492 	  tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27493 				       stack_pointer_rtx,
27494 				       GEN_INT (amount)));
27495 	  arm_add_cfa_adjust_cfa_note (tmp, amount,
27496 				       stack_pointer_rtx, stack_pointer_rtx);
27497           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27498              not deleted.  */
27499           emit_insn (gen_force_register_use (stack_pointer_rtx));
27500         }
27501     }
27502 
27503   if (TARGET_VFP_BASE)
27504     {
27505       /* Generate VFP register multi-pop.  */
27506       int end_reg = LAST_VFP_REGNUM + 1;
27507 
27508       /* Scan the registers in reverse order.  We need to match
27509          any groupings made in the prologue and generate matching
27510          vldm operations.  The need to match groups is because,
27511          unlike pop, vldm can only do consecutive regs.  */
27512       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27513         /* Look for a case where a reg does not need restoring.  */
27514 	if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27515           {
27516             /* Restore the regs discovered so far (from reg+2 to
27517                end_reg).  */
27518             if (end_reg > i + 2)
27519               arm_emit_vfp_multi_reg_pop (i + 2,
27520                                           (end_reg - (i + 2)) / 2,
27521                                           stack_pointer_rtx);
27522             end_reg = i;
27523           }
27524 
27525       /* Restore the remaining regs that we have discovered (or possibly
27526          even all of them, if the conditional in the for loop never
27527          fired).  */
27528       if (end_reg > i + 2)
27529         arm_emit_vfp_multi_reg_pop (i + 2,
27530                                     (end_reg - (i + 2)) / 2,
27531                                     stack_pointer_rtx);
27532     }
27533 
27534   if (TARGET_IWMMXT)
27535     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27536       if (reg_needs_saving_p (i))
27537         {
27538           rtx_insn *insn;
27539           rtx addr = gen_rtx_MEM (V2SImode,
27540                                   gen_rtx_POST_INC (SImode,
27541                                                     stack_pointer_rtx));
27542           set_mem_alias_set (addr, get_frame_alias_set ());
27543           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27544           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27545                                              gen_rtx_REG (V2SImode, i),
27546                                              NULL_RTX);
27547 	  arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27548 				       stack_pointer_rtx, stack_pointer_rtx);
27549         }
27550 
27551   if (saved_regs_mask)
27552     {
27553       rtx insn;
27554       bool return_in_pc = false;
27555 
27556       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27557           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27558 	  && !IS_CMSE_ENTRY (func_type)
27559           && !IS_STACKALIGN (func_type)
27560           && really_return
27561           && crtl->args.pretend_args_size == 0
27562           && saved_regs_mask & (1 << LR_REGNUM)
27563           && !crtl->calls_eh_return)
27564         {
27565           saved_regs_mask &= ~(1 << LR_REGNUM);
27566           saved_regs_mask |= (1 << PC_REGNUM);
27567           return_in_pc = true;
27568         }
27569 
27570       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27571         {
27572           for (i = 0; i <= LAST_ARM_REGNUM; i++)
27573             if (saved_regs_mask & (1 << i))
27574               {
27575                 rtx addr = gen_rtx_MEM (SImode,
27576                                         gen_rtx_POST_INC (SImode,
27577                                                           stack_pointer_rtx));
27578                 set_mem_alias_set (addr, get_frame_alias_set ());
27579 
27580                 if (i == PC_REGNUM)
27581                   {
27582                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27583                     XVECEXP (insn, 0, 0) = ret_rtx;
27584                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27585                                                         addr);
27586                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27587                     insn = emit_jump_insn (insn);
27588                   }
27589                 else
27590                   {
27591                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27592                                                  addr));
27593                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27594                                                        gen_rtx_REG (SImode, i),
27595                                                        NULL_RTX);
27596 		    arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27597 						 stack_pointer_rtx,
27598 						 stack_pointer_rtx);
27599                   }
27600               }
27601         }
27602       else
27603         {
27604           if (TARGET_LDRD
27605 	      && current_tune->prefer_ldrd_strd
27606               && !optimize_function_for_size_p (cfun))
27607             {
27608               if (TARGET_THUMB2)
27609                 thumb2_emit_ldrd_pop (saved_regs_mask);
27610               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27611                 arm_emit_ldrd_pop (saved_regs_mask);
27612               else
27613                 arm_emit_multi_reg_pop (saved_regs_mask);
27614             }
27615           else
27616             arm_emit_multi_reg_pop (saved_regs_mask);
27617         }
27618 
27619       if (return_in_pc)
27620         return;
27621     }
27622 
27623   amount
27624     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
27625   if (amount)
27626     {
27627       int i, j;
27628       rtx dwarf = NULL_RTX;
27629       rtx_insn *tmp =
27630 	emit_insn (gen_addsi3 (stack_pointer_rtx,
27631 			       stack_pointer_rtx,
27632 			       GEN_INT (amount)));
27633 
27634       RTX_FRAME_RELATED_P (tmp) = 1;
27635 
27636       if (cfun->machine->uses_anonymous_args)
27637 	{
27638 	  /* Restore pretend args.  Refer arm_expand_prologue on how to save
27639 	     pretend_args in stack.  */
27640 	  int num_regs = crtl->args.pretend_args_size / 4;
27641 	  saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27642 	  for (j = 0, i = 0; j < num_regs; i++)
27643 	    if (saved_regs_mask & (1 << i))
27644 	      {
27645 		rtx reg = gen_rtx_REG (SImode, i);
27646 		dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27647 		j++;
27648 	      }
27649 	  REG_NOTES (tmp) = dwarf;
27650 	}
27651       arm_add_cfa_adjust_cfa_note (tmp, amount,
27652 				   stack_pointer_rtx, stack_pointer_rtx);
27653     }
27654 
27655   if (IS_CMSE_ENTRY (func_type))
27656     {
27657       /* CMSE_ENTRY always returns.  */
27658       gcc_assert (really_return);
27659       /* Clear all caller-saved regs that are not used to return.  */
27660       cmse_nonsecure_entry_clear_before_return ();
27661 
27662       /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27663 	 VLDR.  */
27664       if (TARGET_HAVE_FPCXT_CMSE)
27665 	{
27666 	  rtx_insn *insn;
27667 
27668 	  insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
27669 						   GEN_INT (FPCXTNS_ENUM)));
27670 	  rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
27671 				  plus_constant (Pmode, stack_pointer_rtx, 4));
27672 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27673 	  RTX_FRAME_RELATED_P (insn) = 1;
27674 	}
27675     }
27676 
27677   if (!really_return)
27678     return;
27679 
27680   if (crtl->calls_eh_return)
27681     emit_insn (gen_addsi3 (stack_pointer_rtx,
27682                            stack_pointer_rtx,
27683                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27684 
27685   if (IS_STACKALIGN (func_type))
27686     /* Restore the original stack pointer.  Before prologue, the stack was
27687        realigned and the original stack pointer saved in r0.  For details,
27688        see comment in arm_expand_prologue.  */
27689     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27690 
27691   emit_jump_insn (simple_return_rtx);
27692 }
27693 
27694 /* Implementation of insn prologue_thumb1_interwork.  This is the first
27695    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
27696 
27697 const char *
thumb1_output_interwork(void)27698 thumb1_output_interwork (void)
27699 {
27700   const char * name;
27701   FILE *f = asm_out_file;
27702 
27703   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27704   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27705 	      == SYMBOL_REF);
27706   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27707 
27708   /* Generate code sequence to switch us into Thumb mode.  */
27709   /* The .code 32 directive has already been emitted by
27710      ASM_DECLARE_FUNCTION_NAME.  */
27711   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27712   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27713 
27714   /* Generate a label, so that the debugger will notice the
27715      change in instruction sets.  This label is also used by
27716      the assembler to bypass the ARM code when this function
27717      is called from a Thumb encoded function elsewhere in the
27718      same file.  Hence the definition of STUB_NAME here must
27719      agree with the definition in gas/config/tc-arm.c.  */
27720 
27721 #define STUB_NAME ".real_start_of"
27722 
27723   fprintf (f, "\t.code\t16\n");
27724 #ifdef ARM_PE
27725   if (arm_dllexport_name_p (name))
27726     name = arm_strip_name_encoding (name);
27727 #endif
27728   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27729   fprintf (f, "\t.thumb_func\n");
27730   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27731 
27732   return "";
27733 }
27734 
27735 /* Handle the case of a double word load into a low register from
27736    a computed memory address.  The computed address may involve a
27737    register which is overwritten by the load.  */
27738 const char *
thumb_load_double_from_address(rtx * operands)27739 thumb_load_double_from_address (rtx *operands)
27740 {
27741   rtx addr;
27742   rtx base;
27743   rtx offset;
27744   rtx arg1;
27745   rtx arg2;
27746 
27747   gcc_assert (REG_P (operands[0]));
27748   gcc_assert (MEM_P (operands[1]));
27749 
27750   /* Get the memory address.  */
27751   addr = XEXP (operands[1], 0);
27752 
27753   /* Work out how the memory address is computed.  */
27754   switch (GET_CODE (addr))
27755     {
27756     case REG:
27757       operands[2] = adjust_address (operands[1], SImode, 4);
27758 
27759       if (REGNO (operands[0]) == REGNO (addr))
27760 	{
27761 	  output_asm_insn ("ldr\t%H0, %2", operands);
27762 	  output_asm_insn ("ldr\t%0, %1", operands);
27763 	}
27764       else
27765 	{
27766 	  output_asm_insn ("ldr\t%0, %1", operands);
27767 	  output_asm_insn ("ldr\t%H0, %2", operands);
27768 	}
27769       break;
27770 
27771     case CONST:
27772       /* Compute <address> + 4 for the high order load.  */
27773       operands[2] = adjust_address (operands[1], SImode, 4);
27774 
27775       output_asm_insn ("ldr\t%0, %1", operands);
27776       output_asm_insn ("ldr\t%H0, %2", operands);
27777       break;
27778 
27779     case PLUS:
27780       arg1   = XEXP (addr, 0);
27781       arg2   = XEXP (addr, 1);
27782 
27783       if (CONSTANT_P (arg1))
27784 	base = arg2, offset = arg1;
27785       else
27786 	base = arg1, offset = arg2;
27787 
27788       gcc_assert (REG_P (base));
27789 
27790       /* Catch the case of <address> = <reg> + <reg> */
27791       if (REG_P (offset))
27792 	{
27793 	  int reg_offset = REGNO (offset);
27794 	  int reg_base   = REGNO (base);
27795 	  int reg_dest   = REGNO (operands[0]);
27796 
27797 	  /* Add the base and offset registers together into the
27798              higher destination register.  */
27799 	  asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27800 		       reg_dest + 1, reg_base, reg_offset);
27801 
27802 	  /* Load the lower destination register from the address in
27803              the higher destination register.  */
27804 	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27805 		       reg_dest, reg_dest + 1);
27806 
27807 	  /* Load the higher destination register from its own address
27808              plus 4.  */
27809 	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27810 		       reg_dest + 1, reg_dest + 1);
27811 	}
27812       else
27813 	{
27814 	  /* Compute <address> + 4 for the high order load.  */
27815 	  operands[2] = adjust_address (operands[1], SImode, 4);
27816 
27817 	  /* If the computed address is held in the low order register
27818 	     then load the high order register first, otherwise always
27819 	     load the low order register first.  */
27820 	  if (REGNO (operands[0]) == REGNO (base))
27821 	    {
27822 	      output_asm_insn ("ldr\t%H0, %2", operands);
27823 	      output_asm_insn ("ldr\t%0, %1", operands);
27824 	    }
27825 	  else
27826 	    {
27827 	      output_asm_insn ("ldr\t%0, %1", operands);
27828 	      output_asm_insn ("ldr\t%H0, %2", operands);
27829 	    }
27830 	}
27831       break;
27832 
27833     case LABEL_REF:
27834       /* With no registers to worry about we can just load the value
27835          directly.  */
27836       operands[2] = adjust_address (operands[1], SImode, 4);
27837 
27838       output_asm_insn ("ldr\t%H0, %2", operands);
27839       output_asm_insn ("ldr\t%0, %1", operands);
27840       break;
27841 
27842     default:
27843       gcc_unreachable ();
27844     }
27845 
27846   return "";
27847 }
27848 
27849 const char *
thumb_output_move_mem_multiple(int n,rtx * operands)27850 thumb_output_move_mem_multiple (int n, rtx *operands)
27851 {
27852   switch (n)
27853     {
27854     case 2:
27855       if (REGNO (operands[4]) > REGNO (operands[5]))
27856 	std::swap (operands[4], operands[5]);
27857 
27858       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27859       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27860       break;
27861 
27862     case 3:
27863       if (REGNO (operands[4]) > REGNO (operands[5]))
27864         std::swap (operands[4], operands[5]);
27865       if (REGNO (operands[5]) > REGNO (operands[6]))
27866         std::swap (operands[5], operands[6]);
27867       if (REGNO (operands[4]) > REGNO (operands[5]))
27868         std::swap (operands[4], operands[5]);
27869 
27870       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27871       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27872       break;
27873 
27874     default:
27875       gcc_unreachable ();
27876     }
27877 
27878   return "";
27879 }
27880 
27881 /* Output a call-via instruction for thumb state.  */
27882 const char *
thumb_call_via_reg(rtx reg)27883 thumb_call_via_reg (rtx reg)
27884 {
27885   int regno = REGNO (reg);
27886   rtx *labelp;
27887 
27888   gcc_assert (regno < LR_REGNUM);
27889 
27890   /* If we are in the normal text section we can use a single instance
27891      per compilation unit.  If we are doing function sections, then we need
27892      an entry per section, since we can't rely on reachability.  */
27893   if (in_section == text_section)
27894     {
27895       thumb_call_reg_needed = 1;
27896 
27897       if (thumb_call_via_label[regno] == NULL)
27898 	thumb_call_via_label[regno] = gen_label_rtx ();
27899       labelp = thumb_call_via_label + regno;
27900     }
27901   else
27902     {
27903       if (cfun->machine->call_via[regno] == NULL)
27904 	cfun->machine->call_via[regno] = gen_label_rtx ();
27905       labelp = cfun->machine->call_via + regno;
27906     }
27907 
27908   output_asm_insn ("bl\t%a0", labelp);
27909   return "";
27910 }
27911 
27912 /* Routines for generating rtl.  */
27913 void
thumb_expand_cpymemqi(rtx * operands)27914 thumb_expand_cpymemqi (rtx *operands)
27915 {
27916   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27917   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27918   HOST_WIDE_INT len = INTVAL (operands[2]);
27919   HOST_WIDE_INT offset = 0;
27920 
27921   while (len >= 12)
27922     {
27923       emit_insn (gen_cpymem12b (out, in, out, in));
27924       len -= 12;
27925     }
27926 
27927   if (len >= 8)
27928     {
27929       emit_insn (gen_cpymem8b (out, in, out, in));
27930       len -= 8;
27931     }
27932 
27933   if (len >= 4)
27934     {
27935       rtx reg = gen_reg_rtx (SImode);
27936       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27937       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27938       len -= 4;
27939       offset += 4;
27940     }
27941 
27942   if (len >= 2)
27943     {
27944       rtx reg = gen_reg_rtx (HImode);
27945       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27946 					      plus_constant (Pmode, in,
27947 							     offset))));
27948       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27949 								offset)),
27950 			    reg));
27951       len -= 2;
27952       offset += 2;
27953     }
27954 
27955   if (len)
27956     {
27957       rtx reg = gen_reg_rtx (QImode);
27958       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27959 					      plus_constant (Pmode, in,
27960 							     offset))));
27961       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27962 								offset)),
27963 			    reg));
27964     }
27965 }
27966 
27967 void
thumb_reload_out_hi(rtx * operands)27968 thumb_reload_out_hi (rtx *operands)
27969 {
27970   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27971 }
27972 
27973 /* Return the length of a function name prefix
27974     that starts with the character 'c'.  */
27975 static int
arm_get_strip_length(int c)27976 arm_get_strip_length (int c)
27977 {
27978   switch (c)
27979     {
27980     ARM_NAME_ENCODING_LENGTHS
27981       default: return 0;
27982     }
27983 }
27984 
27985 /* Return a pointer to a function's name with any
27986    and all prefix encodings stripped from it.  */
27987 const char *
arm_strip_name_encoding(const char * name)27988 arm_strip_name_encoding (const char *name)
27989 {
27990   int skip;
27991 
27992   while ((skip = arm_get_strip_length (* name)))
27993     name += skip;
27994 
27995   return name;
27996 }
27997 
27998 /* If there is a '*' anywhere in the name's prefix, then
27999    emit the stripped name verbatim, otherwise prepend an
28000    underscore if leading underscores are being used.  */
28001 void
arm_asm_output_labelref(FILE * stream,const char * name)28002 arm_asm_output_labelref (FILE *stream, const char *name)
28003 {
28004   int skip;
28005   int verbatim = 0;
28006 
28007   while ((skip = arm_get_strip_length (* name)))
28008     {
28009       verbatim |= (*name == '*');
28010       name += skip;
28011     }
28012 
28013   if (verbatim)
28014     fputs (name, stream);
28015   else
28016     asm_fprintf (stream, "%U%s", name);
28017 }
28018 
28019 /* This function is used to emit an EABI tag and its associated value.
28020    We emit the numerical value of the tag in case the assembler does not
28021    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28022    the tag name in a comment so that anyone reading the assembler output
28023    will know which tag is being set.
28024 
28025    This function is not static because arm-c.c needs it too.  */
28026 
28027 void
arm_emit_eabi_attribute(const char * name,int num,int val)28028 arm_emit_eabi_attribute (const char *name, int num, int val)
28029 {
28030   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28031   if (flag_verbose_asm || flag_debug_asm)
28032     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28033   asm_fprintf (asm_out_file, "\n");
28034 }
28035 
28036 /* This function is used to print CPU tuning information as comment
28037    in assembler file.  Pointers are not printed for now.  */
28038 
28039 void
arm_print_tune_info(void)28040 arm_print_tune_info (void)
28041 {
28042   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28043   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28044 	       current_tune->constant_limit);
28045   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28046 	       "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28047   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28048 	       "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28049   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28050 	       "prefetch.l1_cache_size:\t%d\n",
28051 	       current_tune->prefetch.l1_cache_size);
28052   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28053 	       "prefetch.l1_cache_line_size:\t%d\n",
28054 	       current_tune->prefetch.l1_cache_line_size);
28055   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28056 	       "prefer_constant_pool:\t%d\n",
28057 	       (int) current_tune->prefer_constant_pool);
28058   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28059 	       "branch_cost:\t(s:speed, p:predictable)\n");
28060   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28061   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28062 	       current_tune->branch_cost (false, false));
28063   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28064 	       current_tune->branch_cost (false, true));
28065   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28066 	       current_tune->branch_cost (true, false));
28067   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28068 	       current_tune->branch_cost (true, true));
28069   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28070 	       "prefer_ldrd_strd:\t%d\n",
28071 	       (int) current_tune->prefer_ldrd_strd);
28072   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28073 	       "logical_op_non_short_circuit:\t[%d,%d]\n",
28074 	       (int) current_tune->logical_op_non_short_circuit_thumb,
28075 	       (int) current_tune->logical_op_non_short_circuit_arm);
28076   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28077 	       "disparage_flag_setting_t16_encodings:\t%d\n",
28078 	       (int) current_tune->disparage_flag_setting_t16_encodings);
28079   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28080 	       "string_ops_prefer_neon:\t%d\n",
28081 	       (int) current_tune->string_ops_prefer_neon);
28082   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28083 	       "max_insns_inline_memset:\t%d\n",
28084 	       current_tune->max_insns_inline_memset);
28085   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28086 	       current_tune->fusible_ops);
28087   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28088 	       (int) current_tune->sched_autopref);
28089 }
28090 
28091 /* Print .arch and .arch_extension directives corresponding to the
28092    current architecture configuration.  */
28093 static void
arm_print_asm_arch_directives()28094 arm_print_asm_arch_directives ()
28095 {
28096   const arch_option *arch
28097     = arm_parse_arch_option_name (all_architectures, "-march",
28098 				  arm_active_target.arch_name);
28099   auto_sbitmap opt_bits (isa_num_bits);
28100 
28101   gcc_assert (arch);
28102 
28103   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
28104   arm_last_printed_arch_string = arm_active_target.arch_name;
28105   if (!arch->common.extensions)
28106     return;
28107 
28108   for (const struct cpu_arch_extension *opt = arch->common.extensions;
28109        opt->name != NULL;
28110        opt++)
28111     {
28112       if (!opt->remove)
28113 	{
28114 	  arm_initialize_isa (opt_bits, opt->isa_bits);
28115 
28116 	  /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28117 	     "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28118 	     floating point instructions is disabled.  So the following check
28119 	     restricts the printing of ".arch_extension mve" and
28120 	     ".arch_extension fp" (for mve.fp) in the assembly file.  MVE needs
28121 	     this special behaviour because the feature bit "mve" and
28122 	     "mve_float" are not part of "fpu bits", so they are not cleared
28123 	     when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28124 	     TARGET_HAVE_MVE_FLOAT are disabled.  */
28125 	  if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28126 	      || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28127 		  && !TARGET_HAVE_MVE_FLOAT))
28128 	    continue;
28129 
28130 	  /* If every feature bit of this option is set in the target
28131 	     ISA specification, print out the option name.  However,
28132 	     don't print anything if all the bits are part of the
28133 	     FPU specification.  */
28134 	  if (bitmap_subset_p (opt_bits, arm_active_target.isa)
28135 	      && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28136 	    asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
28137 	}
28138     }
28139 }
28140 
28141 static void
arm_file_start(void)28142 arm_file_start (void)
28143 {
28144   int val;
28145 
28146   if (TARGET_BPABI)
28147     {
28148       /* We don't have a specified CPU.  Use the architecture to
28149 	 generate the tags.
28150 
28151 	 Note: it might be better to do this unconditionally, then the
28152 	 assembler would not need to know about all new CPU names as
28153 	 they are added.  */
28154       if (!arm_active_target.core_name)
28155 	{
28156 	  /* armv7ve doesn't support any extensions.  */
28157 	  if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
28158 	    {
28159 	      /* Keep backward compatability for assemblers
28160 		 which don't support armv7ve.  */
28161 	      asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28162 	      asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28163 	      asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28164 	      asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28165 	      asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28166 	      arm_last_printed_arch_string = "armv7ve";
28167 	    }
28168 	  else
28169 	    arm_print_asm_arch_directives ();
28170 	}
28171       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
28172 	{
28173 	  asm_fprintf (asm_out_file, "\t.arch %s\n",
28174 		       arm_active_target.core_name + 8);
28175 	  arm_last_printed_arch_string = arm_active_target.core_name + 8;
28176 	}
28177       else
28178 	{
28179 	  const char* truncated_name
28180 	    = arm_rewrite_selected_cpu (arm_active_target.core_name);
28181 	  if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28182 	    asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28183 			 truncated_name);
28184 	  else
28185 	    asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28186 	}
28187 
28188       if (print_tune_info)
28189 	arm_print_tune_info ();
28190 
28191       if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28192 	arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28193 
28194       if (TARGET_HARD_FLOAT_ABI)
28195 	arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28196 
28197       /* Some of these attributes only apply when the corresponding features
28198 	 are used.  However we don't have any easy way of figuring this out.
28199 	 Conservatively record the setting that would have been used.  */
28200 
28201       if (flag_rounding_math)
28202 	arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28203 
28204       if (!flag_unsafe_math_optimizations)
28205 	{
28206 	  arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28207 	  arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28208 	}
28209       if (flag_signaling_nans)
28210 	arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28211 
28212       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28213 			   flag_finite_math_only ? 1 : 3);
28214 
28215       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28216       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28217       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28218 			       flag_short_enums ? 1 : 2);
28219 
28220       /* Tag_ABI_optimization_goals.  */
28221       if (optimize_size)
28222 	val = 4;
28223       else if (optimize >= 2)
28224 	val = 2;
28225       else if (optimize)
28226 	val = 1;
28227       else
28228 	val = 6;
28229       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28230 
28231       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28232 			       unaligned_access);
28233 
28234       if (arm_fp16_format)
28235 	arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28236 			     (int) arm_fp16_format);
28237 
28238       if (arm_lang_output_object_attributes_hook)
28239 	arm_lang_output_object_attributes_hook();
28240     }
28241 
28242   default_file_start ();
28243 }
28244 
28245 static void
arm_file_end(void)28246 arm_file_end (void)
28247 {
28248   int regno;
28249 
28250   if (NEED_INDICATE_EXEC_STACK)
28251     /* Add .note.GNU-stack.  */
28252     file_end_indicate_exec_stack ();
28253 
28254   if (! thumb_call_reg_needed)
28255     return;
28256 
28257   switch_to_section (text_section);
28258   asm_fprintf (asm_out_file, "\t.code 16\n");
28259   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28260 
28261   for (regno = 0; regno < LR_REGNUM; regno++)
28262     {
28263       rtx label = thumb_call_via_label[regno];
28264 
28265       if (label != 0)
28266 	{
28267 	  targetm.asm_out.internal_label (asm_out_file, "L",
28268 					  CODE_LABEL_NUMBER (label));
28269 	  asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28270 	}
28271     }
28272 }
28273 
28274 #ifndef ARM_PE
28275 /* Symbols in the text segment can be accessed without indirecting via the
28276    constant pool; it may take an extra binary operation, but this is still
28277    faster than indirecting via memory.  Don't do this when not optimizing,
28278    since we won't be calculating al of the offsets necessary to do this
28279    simplification.  */
28280 
28281 static void
arm_encode_section_info(tree decl,rtx rtl,int first)28282 arm_encode_section_info (tree decl, rtx rtl, int first)
28283 {
28284   if (optimize > 0 && TREE_CONSTANT (decl))
28285     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28286 
28287   default_encode_section_info (decl, rtl, first);
28288 }
28289 #endif /* !ARM_PE */
28290 
28291 static void
arm_internal_label(FILE * stream,const char * prefix,unsigned long labelno)28292 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28293 {
28294   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28295       && !strcmp (prefix, "L"))
28296     {
28297       arm_ccfsm_state = 0;
28298       arm_target_insn = NULL;
28299     }
28300   default_internal_label (stream, prefix, labelno);
28301 }
28302 
28303 /* Define classes to generate code as RTL or output asm to a file.
28304    Using templates then allows to use the same code to output code
28305    sequences in the two formats.  */
28306 class thumb1_const_rtl
28307 {
28308  public:
thumb1_const_rtl(rtx dst)28309   thumb1_const_rtl (rtx dst) : dst (dst) {}
28310 
mov(HOST_WIDE_INT val)28311   void mov (HOST_WIDE_INT val)
28312   {
28313     emit_set_insn (dst, GEN_INT (val));
28314   }
28315 
add(HOST_WIDE_INT val)28316   void add (HOST_WIDE_INT val)
28317   {
28318     emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28319   }
28320 
ashift(HOST_WIDE_INT shift)28321   void ashift (HOST_WIDE_INT shift)
28322   {
28323     emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28324   }
28325 
neg()28326   void neg ()
28327   {
28328     emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28329   }
28330 
28331  private:
28332   rtx dst;
28333 };
28334 
28335 class thumb1_const_print
28336 {
28337  public:
thumb1_const_print(FILE * f,int regno)28338   thumb1_const_print (FILE *f, int regno)
28339   {
28340     t_file = f;
28341     dst_regname = reg_names[regno];
28342   }
28343 
mov(HOST_WIDE_INT val)28344   void mov (HOST_WIDE_INT val)
28345   {
28346     asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28347 		 dst_regname, val);
28348   }
28349 
add(HOST_WIDE_INT val)28350   void add (HOST_WIDE_INT val)
28351   {
28352     asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28353 		 dst_regname, val);
28354   }
28355 
ashift(HOST_WIDE_INT shift)28356   void ashift (HOST_WIDE_INT shift)
28357   {
28358     asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28359 		 dst_regname, shift);
28360   }
28361 
neg()28362   void neg ()
28363   {
28364     asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28365   }
28366 
28367  private:
28368   FILE *t_file;
28369   const char *dst_regname;
28370 };
28371 
28372 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28373    Avoid generating useless code when one of the bytes is zero.  */
28374 template <class T>
28375 void
thumb1_gen_const_int_1(T dst,HOST_WIDE_INT op1)28376 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28377 {
28378   bool mov_done_p = false;
28379   unsigned HOST_WIDE_INT val = op1;
28380   int shift = 0;
28381   int i;
28382 
28383   gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28384 
28385   if (val <= 255)
28386     {
28387       dst.mov (val);
28388       return;
28389     }
28390 
28391   /* For negative numbers with the first nine bits set, build the
28392      opposite of OP1, then negate it, it's generally shorter and not
28393      longer.  */
28394   if ((val & 0xFF800000) == 0xFF800000)
28395     {
28396       thumb1_gen_const_int_1 (dst, -op1);
28397       dst.neg ();
28398       return;
28399     }
28400 
28401   /* In the general case, we need 7 instructions to build
28402      a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28403      do better if VAL is small enough, or
28404      right-shiftable by a suitable amount.  If the
28405      right-shift enables to encode at least one less byte,
28406      it's worth it: we save a adds and a lsls at the
28407      expense of a final lsls.  */
28408   int final_shift = number_of_first_bit_set (val);
28409 
28410   int leading_zeroes = clz_hwi (val);
28411   int number_of_bytes_needed
28412     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28413        / BITS_PER_UNIT) + 1;
28414   int number_of_bytes_needed2
28415     = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28416        / BITS_PER_UNIT) + 1;
28417 
28418   if (number_of_bytes_needed2 < number_of_bytes_needed)
28419     val >>= final_shift;
28420   else
28421     final_shift = 0;
28422 
28423   /* If we are in a very small range, we can use either a single movs
28424      or movs+adds.  */
28425   if (val <= 510)
28426     {
28427       if (val > 255)
28428 	{
28429 	  unsigned HOST_WIDE_INT high = val - 255;
28430 
28431 	  dst.mov (high);
28432 	  dst.add (255);
28433 	}
28434       else
28435 	dst.mov (val);
28436 
28437       if (final_shift > 0)
28438 	dst.ashift (final_shift);
28439     }
28440   else
28441     {
28442       /* General case, emit upper 3 bytes as needed.  */
28443       for (i = 0; i < 3; i++)
28444 	{
28445 	  unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28446 
28447 	  if (byte)
28448 	    {
28449 	      /* We are about to emit new bits, stop accumulating a
28450 		 shift amount, and left-shift only if we have already
28451 		 emitted some upper bits.  */
28452 	      if (mov_done_p)
28453 		{
28454 		  dst.ashift (shift);
28455 		  dst.add (byte);
28456 		}
28457 	      else
28458 		dst.mov (byte);
28459 
28460 	      /* Stop accumulating shift amount since we've just
28461 		 emitted some bits.  */
28462 	      shift = 0;
28463 
28464 	      mov_done_p = true;
28465 	    }
28466 
28467 	  if (mov_done_p)
28468 	    shift += 8;
28469 	}
28470 
28471       /* Emit lower byte.  */
28472       if (!mov_done_p)
28473 	dst.mov (val & 0xff);
28474       else
28475 	{
28476 	  dst.ashift (shift);
28477 	  if (val & 0xff)
28478 	    dst.add (val & 0xff);
28479 	}
28480 
28481       if (final_shift > 0)
28482 	dst.ashift (final_shift);
28483     }
28484 }
28485 
28486 /* Proxies for thumb1.md, since the thumb1_const_print and
28487    thumb1_const_rtl classes are not exported.  */
28488 void
thumb1_gen_const_int_rtl(rtx dst,HOST_WIDE_INT op1)28489 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28490 {
28491   thumb1_const_rtl t (dst);
28492   thumb1_gen_const_int_1 (t, op1);
28493 }
28494 
28495 void
thumb1_gen_const_int_print(rtx dst,HOST_WIDE_INT op1)28496 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28497 {
28498   thumb1_const_print t (asm_out_file, REGNO (dst));
28499   thumb1_gen_const_int_1 (t, op1);
28500 }
28501 
28502 /* Output code to add DELTA to the first argument, and then jump
28503    to FUNCTION.  Used for C++ multiple inheritance.  */
28504 
28505 static void
arm_thumb1_mi_thunk(FILE * file,tree,HOST_WIDE_INT delta,HOST_WIDE_INT,tree function)28506 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28507 		     HOST_WIDE_INT, tree function)
28508 {
28509   static int thunk_label = 0;
28510   char label[256];
28511   char labelpc[256];
28512   int mi_delta = delta;
28513   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28514   int shift = 0;
28515   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28516                     ? 1 : 0);
28517   if (mi_delta < 0)
28518     mi_delta = - mi_delta;
28519 
28520   final_start_function (emit_barrier (), file, 1);
28521 
28522   if (TARGET_THUMB1)
28523     {
28524       int labelno = thunk_label++;
28525       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28526       /* Thunks are entered in arm mode when available.  */
28527       if (TARGET_THUMB1_ONLY)
28528 	{
28529 	  /* push r3 so we can use it as a temporary.  */
28530 	  /* TODO: Omit this save if r3 is not used.  */
28531 	  fputs ("\tpush {r3}\n", file);
28532 
28533 	  /* With -mpure-code, we cannot load the address from the
28534 	     constant pool: we build it explicitly.  */
28535 	  if (target_pure_code)
28536 	    {
28537 	      fputs ("\tmovs\tr3, #:upper8_15:#", file);
28538 	      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28539 	      fputc ('\n', file);
28540 	      fputs ("\tlsls r3, #8\n", file);
28541 	      fputs ("\tadds\tr3, #:upper0_7:#", file);
28542 	      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28543 	      fputc ('\n', file);
28544 	      fputs ("\tlsls r3, #8\n", file);
28545 	      fputs ("\tadds\tr3, #:lower8_15:#", file);
28546 	      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28547 	      fputc ('\n', file);
28548 	      fputs ("\tlsls r3, #8\n", file);
28549 	      fputs ("\tadds\tr3, #:lower0_7:#", file);
28550 	      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28551 	      fputc ('\n', file);
28552 	    }
28553 	  else
28554 	    fputs ("\tldr\tr3, ", file);
28555 	}
28556       else
28557 	{
28558 	  fputs ("\tldr\tr12, ", file);
28559 	}
28560 
28561       if (!target_pure_code)
28562 	{
28563 	  assemble_name (file, label);
28564 	  fputc ('\n', file);
28565 	}
28566 
28567       if (flag_pic)
28568 	{
28569 	  /* If we are generating PIC, the ldr instruction below loads
28570 	     "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
28571 	     the address of the add + 8, so we have:
28572 
28573 	     r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28574 	         = target + 1.
28575 
28576 	     Note that we have "+ 1" because some versions of GNU ld
28577 	     don't set the low bit of the result for R_ARM_REL32
28578 	     relocations against thumb function symbols.
28579 	     On ARMv6M this is +4, not +8.  */
28580 	  ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28581 	  assemble_name (file, labelpc);
28582 	  fputs (":\n", file);
28583 	  if (TARGET_THUMB1_ONLY)
28584 	    {
28585 	      /* This is 2 insns after the start of the thunk, so we know it
28586 	         is 4-byte aligned.  */
28587 	      fputs ("\tadd\tr3, pc, r3\n", file);
28588 	      fputs ("\tmov r12, r3\n", file);
28589 	    }
28590 	  else
28591 	    fputs ("\tadd\tr12, pc, r12\n", file);
28592 	}
28593       else if (TARGET_THUMB1_ONLY)
28594 	fputs ("\tmov r12, r3\n", file);
28595     }
28596   if (TARGET_THUMB1_ONLY)
28597     {
28598       if (mi_delta > 255)
28599 	{
28600 	  /* With -mpure-code, we cannot load MI_DELTA from the
28601 	     constant pool: we build it explicitly.  */
28602 	  if (target_pure_code)
28603 	    {
28604 	      thumb1_const_print r3 (file, 3);
28605 	      thumb1_gen_const_int_1 (r3, mi_delta);
28606 	    }
28607 	  else
28608 	    {
28609 	      fputs ("\tldr\tr3, ", file);
28610 	      assemble_name (file, label);
28611 	      fputs ("+4\n", file);
28612 	    }
28613 	  asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28614 		       mi_op, this_regno, this_regno);
28615 	}
28616       else if (mi_delta != 0)
28617 	{
28618 	  /* Thumb1 unified syntax requires s suffix in instruction name when
28619 	     one of the operands is immediate.  */
28620 	  asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28621 		       mi_op, this_regno, this_regno,
28622 		       mi_delta);
28623 	}
28624     }
28625   else
28626     {
28627       /* TODO: Use movw/movt for large constants when available.  */
28628       while (mi_delta != 0)
28629 	{
28630 	  if ((mi_delta & (3 << shift)) == 0)
28631 	    shift += 2;
28632 	  else
28633 	    {
28634 	      asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28635 			   mi_op, this_regno, this_regno,
28636 			   mi_delta & (0xff << shift));
28637 	      mi_delta &= ~(0xff << shift);
28638 	      shift += 8;
28639 	    }
28640 	}
28641     }
28642   if (TARGET_THUMB1)
28643     {
28644       if (TARGET_THUMB1_ONLY)
28645 	fputs ("\tpop\t{r3}\n", file);
28646 
28647       fprintf (file, "\tbx\tr12\n");
28648 
28649       /* With -mpure-code, we don't need to emit literals for the
28650 	 function address and delta since we emitted code to build
28651 	 them.  */
28652       if (!target_pure_code)
28653 	{
28654 	  ASM_OUTPUT_ALIGN (file, 2);
28655 	  assemble_name (file, label);
28656 	  fputs (":\n", file);
28657 	  if (flag_pic)
28658 	    {
28659 	      /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
28660 	      rtx tem = XEXP (DECL_RTL (function), 0);
28661 	      /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28662 		 pipeline offset is four rather than eight.  Adjust the offset
28663 		 accordingly.  */
28664 	      tem = plus_constant (GET_MODE (tem), tem,
28665 				   TARGET_THUMB1_ONLY ? -3 : -7);
28666 	      tem = gen_rtx_MINUS (GET_MODE (tem),
28667 				   tem,
28668 				   gen_rtx_SYMBOL_REF (Pmode,
28669 						       ggc_strdup (labelpc)));
28670 	      assemble_integer (tem, 4, BITS_PER_WORD, 1);
28671 	    }
28672 	  else
28673 	    /* Output ".word .LTHUNKn".  */
28674 	    assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28675 
28676 	  if (TARGET_THUMB1_ONLY && mi_delta > 255)
28677 	    assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
28678 	}
28679     }
28680   else
28681     {
28682       fputs ("\tb\t", file);
28683       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28684       if (NEED_PLT_RELOC)
28685         fputs ("(PLT)", file);
28686       fputc ('\n', file);
28687     }
28688 
28689   final_end_function ();
28690 }
28691 
28692 /* MI thunk handling for TARGET_32BIT.  */
28693 
28694 static void
arm32_output_mi_thunk(FILE * file,tree,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)28695 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28696 		       HOST_WIDE_INT vcall_offset, tree function)
28697 {
28698   const bool long_call_p = arm_is_long_call_p (function);
28699 
28700   /* On ARM, this_regno is R0 or R1 depending on
28701      whether the function returns an aggregate or not.
28702   */
28703   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
28704 				       function)
28705 		    ? R1_REGNUM : R0_REGNUM);
28706 
28707   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
28708   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
28709   reload_completed = 1;
28710   emit_note (NOTE_INSN_PROLOGUE_END);
28711 
28712   /* Add DELTA to THIS_RTX.  */
28713   if (delta != 0)
28714     arm_split_constant (PLUS, Pmode, NULL_RTX,
28715 			delta, this_rtx, this_rtx, false);
28716 
28717   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
28718   if (vcall_offset != 0)
28719     {
28720       /* Load *THIS_RTX.  */
28721       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
28722       /* Compute *THIS_RTX + VCALL_OFFSET.  */
28723       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
28724 			  false);
28725       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
28726       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
28727       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
28728     }
28729 
28730   /* Generate a tail call to the target function.  */
28731   if (!TREE_USED (function))
28732     {
28733       assemble_external (function);
28734       TREE_USED (function) = 1;
28735     }
28736   rtx funexp = XEXP (DECL_RTL (function), 0);
28737   if (long_call_p)
28738     {
28739       emit_move_insn (temp, funexp);
28740       funexp = temp;
28741     }
28742   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
28743   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
28744   SIBLING_CALL_P (insn) = 1;
28745   emit_barrier ();
28746 
28747   /* Indirect calls require a bit of fixup in PIC mode.  */
28748   if (long_call_p)
28749     {
28750       split_all_insns_noflow ();
28751       arm_reorg ();
28752     }
28753 
28754   insn = get_insns ();
28755   shorten_branches (insn);
28756   final_start_function (insn, file, 1);
28757   final (insn, file, 1);
28758   final_end_function ();
28759 
28760   /* Stop pretending this is a post-reload pass.  */
28761   reload_completed = 0;
28762 }
28763 
28764 /* Output code to add DELTA to the first argument, and then jump
28765    to FUNCTION.  Used for C++ multiple inheritance.  */
28766 
28767 static void
arm_output_mi_thunk(FILE * file,tree thunk,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)28768 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
28769 		     HOST_WIDE_INT vcall_offset, tree function)
28770 {
28771   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
28772 
28773   assemble_start_function (thunk, fnname);
28774   if (TARGET_32BIT)
28775     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
28776   else
28777     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
28778   assemble_end_function (thunk, fnname);
28779 }
28780 
28781 int
arm_emit_vector_const(FILE * file,rtx x)28782 arm_emit_vector_const (FILE *file, rtx x)
28783 {
28784   int i;
28785   const char * pattern;
28786 
28787   gcc_assert (GET_CODE (x) == CONST_VECTOR);
28788 
28789   switch (GET_MODE (x))
28790     {
28791     case E_V2SImode: pattern = "%08x"; break;
28792     case E_V4HImode: pattern = "%04x"; break;
28793     case E_V8QImode: pattern = "%02x"; break;
28794     default:       gcc_unreachable ();
28795     }
28796 
28797   fprintf (file, "0x");
28798   for (i = CONST_VECTOR_NUNITS (x); i--;)
28799     {
28800       rtx element;
28801 
28802       element = CONST_VECTOR_ELT (x, i);
28803       fprintf (file, pattern, INTVAL (element));
28804     }
28805 
28806   return 1;
28807 }
28808 
28809 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28810    HFmode constant pool entries are actually loaded with ldr.  */
28811 void
arm_emit_fp16_const(rtx c)28812 arm_emit_fp16_const (rtx c)
28813 {
28814   long bits;
28815 
28816   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
28817   if (WORDS_BIG_ENDIAN)
28818     assemble_zeros (2);
28819   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28820   if (!WORDS_BIG_ENDIAN)
28821     assemble_zeros (2);
28822 }
28823 
28824 const char *
arm_output_load_gr(rtx * operands)28825 arm_output_load_gr (rtx *operands)
28826 {
28827   rtx reg;
28828   rtx offset;
28829   rtx wcgr;
28830   rtx sum;
28831 
28832   if (!MEM_P (operands [1])
28833       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28834       || !REG_P (reg = XEXP (sum, 0))
28835       || !CONST_INT_P (offset = XEXP (sum, 1))
28836       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28837     return "wldrw%?\t%0, %1";
28838 
28839   /* Fix up an out-of-range load of a GR register.  */
28840   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28841   wcgr = operands[0];
28842   operands[0] = reg;
28843   output_asm_insn ("ldr%?\t%0, %1", operands);
28844 
28845   operands[0] = wcgr;
28846   operands[1] = reg;
28847   output_asm_insn ("tmcr%?\t%0, %1", operands);
28848   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28849 
28850   return "";
28851 }
28852 
28853 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28854 
28855    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28856    named arg and all anonymous args onto the stack.
28857    XXX I know the prologue shouldn't be pushing registers, but it is faster
28858    that way.  */
28859 
28860 static void
arm_setup_incoming_varargs(cumulative_args_t pcum_v,const function_arg_info & arg,int * pretend_size,int second_time ATTRIBUTE_UNUSED)28861 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28862 			    const function_arg_info &arg,
28863 			    int *pretend_size,
28864 			    int second_time ATTRIBUTE_UNUSED)
28865 {
28866   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28867   int nregs;
28868 
28869   cfun->machine->uses_anonymous_args = 1;
28870   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28871     {
28872       nregs = pcum->aapcs_ncrn;
28873       if (nregs & 1)
28874 	{
28875 	  int res = arm_needs_doubleword_align (arg.mode, arg.type);
28876 	  if (res < 0 && warn_psabi)
28877 	    inform (input_location, "parameter passing for argument of "
28878 		    "type %qT changed in GCC 7.1", arg.type);
28879 	  else if (res > 0)
28880 	    {
28881 	      nregs++;
28882 	      if (res > 1 && warn_psabi)
28883 		inform (input_location,
28884 			"parameter passing for argument of type "
28885 			"%qT changed in GCC 9.1", arg.type);
28886 	    }
28887 	}
28888     }
28889   else
28890     nregs = pcum->nregs;
28891 
28892   if (nregs < NUM_ARG_REGS)
28893     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28894 }
28895 
28896 /* We can't rely on the caller doing the proper promotion when
28897    using APCS or ATPCS.  */
28898 
28899 static bool
arm_promote_prototypes(const_tree t ATTRIBUTE_UNUSED)28900 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28901 {
28902     return !TARGET_AAPCS_BASED;
28903 }
28904 
28905 static machine_mode
arm_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)28906 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28907                            machine_mode mode,
28908                            int *punsignedp ATTRIBUTE_UNUSED,
28909                            const_tree fntype ATTRIBUTE_UNUSED,
28910                            int for_return ATTRIBUTE_UNUSED)
28911 {
28912   if (GET_MODE_CLASS (mode) == MODE_INT
28913       && GET_MODE_SIZE (mode) < 4)
28914     return SImode;
28915 
28916   return mode;
28917 }
28918 
28919 
28920 static bool
arm_default_short_enums(void)28921 arm_default_short_enums (void)
28922 {
28923   return ARM_DEFAULT_SHORT_ENUMS;
28924 }
28925 
28926 
28927 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
28928 
28929 static bool
arm_align_anon_bitfield(void)28930 arm_align_anon_bitfield (void)
28931 {
28932   return TARGET_AAPCS_BASED;
28933 }
28934 
28935 
28936 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
28937 
28938 static tree
arm_cxx_guard_type(void)28939 arm_cxx_guard_type (void)
28940 {
28941   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28942 }
28943 
28944 
28945 /* The EABI says test the least significant bit of a guard variable.  */
28946 
28947 static bool
arm_cxx_guard_mask_bit(void)28948 arm_cxx_guard_mask_bit (void)
28949 {
28950   return TARGET_AAPCS_BASED;
28951 }
28952 
28953 
28954 /* The EABI specifies that all array cookies are 8 bytes long.  */
28955 
28956 static tree
arm_get_cookie_size(tree type)28957 arm_get_cookie_size (tree type)
28958 {
28959   tree size;
28960 
28961   if (!TARGET_AAPCS_BASED)
28962     return default_cxx_get_cookie_size (type);
28963 
28964   size = build_int_cst (sizetype, 8);
28965   return size;
28966 }
28967 
28968 
28969 /* The EABI says that array cookies should also contain the element size.  */
28970 
28971 static bool
arm_cookie_has_size(void)28972 arm_cookie_has_size (void)
28973 {
28974   return TARGET_AAPCS_BASED;
28975 }
28976 
28977 
28978 /* The EABI says constructors and destructors should return a pointer to
28979    the object constructed/destroyed.  */
28980 
28981 static bool
arm_cxx_cdtor_returns_this(void)28982 arm_cxx_cdtor_returns_this (void)
28983 {
28984   return TARGET_AAPCS_BASED;
28985 }
28986 
28987 /* The EABI says that an inline function may never be the key
28988    method.  */
28989 
28990 static bool
arm_cxx_key_method_may_be_inline(void)28991 arm_cxx_key_method_may_be_inline (void)
28992 {
28993   return !TARGET_AAPCS_BASED;
28994 }
28995 
28996 static void
arm_cxx_determine_class_data_visibility(tree decl)28997 arm_cxx_determine_class_data_visibility (tree decl)
28998 {
28999   if (!TARGET_AAPCS_BASED
29000       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29001     return;
29002 
29003   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29004      is exported.  However, on systems without dynamic vague linkage,
29005      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
29006   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29007     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29008   else
29009     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29010   DECL_VISIBILITY_SPECIFIED (decl) = 1;
29011 }
29012 
29013 static bool
arm_cxx_class_data_always_comdat(void)29014 arm_cxx_class_data_always_comdat (void)
29015 {
29016   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29017      vague linkage if the class has no key function.  */
29018   return !TARGET_AAPCS_BASED;
29019 }
29020 
29021 
29022 /* The EABI says __aeabi_atexit should be used to register static
29023    destructors.  */
29024 
29025 static bool
arm_cxx_use_aeabi_atexit(void)29026 arm_cxx_use_aeabi_atexit (void)
29027 {
29028   return TARGET_AAPCS_BASED;
29029 }
29030 
29031 
29032 void
arm_set_return_address(rtx source,rtx scratch)29033 arm_set_return_address (rtx source, rtx scratch)
29034 {
29035   arm_stack_offsets *offsets;
29036   HOST_WIDE_INT delta;
29037   rtx addr, mem;
29038   unsigned long saved_regs;
29039 
29040   offsets = arm_get_frame_offsets ();
29041   saved_regs = offsets->saved_regs_mask;
29042 
29043   if ((saved_regs & (1 << LR_REGNUM)) == 0)
29044     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29045   else
29046     {
29047       if (frame_pointer_needed)
29048 	addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29049       else
29050 	{
29051 	  /* LR will be the first saved register.  */
29052 	  delta = offsets->outgoing_args - (offsets->frame + 4);
29053 
29054 
29055 	  if (delta >= 4096)
29056 	    {
29057 	      emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29058 				     GEN_INT (delta & ~4095)));
29059 	      addr = scratch;
29060 	      delta &= 4095;
29061 	    }
29062 	  else
29063 	    addr = stack_pointer_rtx;
29064 
29065 	  addr = plus_constant (Pmode, addr, delta);
29066 	}
29067 
29068       /* The store needs to be marked to prevent DSE from deleting
29069 	 it as dead if it is based on fp.  */
29070       mem = gen_frame_mem (Pmode, addr);
29071       MEM_VOLATILE_P (mem) = true;
29072       emit_move_insn (mem, source);
29073     }
29074 }
29075 
29076 
29077 void
thumb_set_return_address(rtx source,rtx scratch)29078 thumb_set_return_address (rtx source, rtx scratch)
29079 {
29080   arm_stack_offsets *offsets;
29081   HOST_WIDE_INT delta;
29082   HOST_WIDE_INT limit;
29083   int reg;
29084   rtx addr, mem;
29085   unsigned long mask;
29086 
29087   emit_use (source);
29088 
29089   offsets = arm_get_frame_offsets ();
29090   mask = offsets->saved_regs_mask;
29091   if (mask & (1 << LR_REGNUM))
29092     {
29093       limit = 1024;
29094       /* Find the saved regs.  */
29095       if (frame_pointer_needed)
29096 	{
29097 	  delta = offsets->soft_frame - offsets->saved_args;
29098 	  reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29099 	  if (TARGET_THUMB1)
29100 	    limit = 128;
29101 	}
29102       else
29103 	{
29104 	  delta = offsets->outgoing_args - offsets->saved_args;
29105 	  reg = SP_REGNUM;
29106 	}
29107       /* Allow for the stack frame.  */
29108       if (TARGET_THUMB1 && TARGET_BACKTRACE)
29109 	delta -= 16;
29110       /* The link register is always the first saved register.  */
29111       delta -= 4;
29112 
29113       /* Construct the address.  */
29114       addr = gen_rtx_REG (SImode, reg);
29115       if (delta > limit)
29116 	{
29117 	  emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29118 	  emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29119 	  addr = scratch;
29120 	}
29121       else
29122 	addr = plus_constant (Pmode, addr, delta);
29123 
29124       /* The store needs to be marked to prevent DSE from deleting
29125 	 it as dead if it is based on fp.  */
29126       mem = gen_frame_mem (Pmode, addr);
29127       MEM_VOLATILE_P (mem) = true;
29128       emit_move_insn (mem, source);
29129     }
29130   else
29131     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29132 }
29133 
29134 /* Implements target hook vector_mode_supported_p.  */
29135 bool
arm_vector_mode_supported_p(machine_mode mode)29136 arm_vector_mode_supported_p (machine_mode mode)
29137 {
29138   /* Neon also supports V2SImode, etc. listed in the clause below.  */
29139   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29140       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29141       || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29142       || mode == V8BFmode))
29143     return true;
29144 
29145   if ((TARGET_NEON || TARGET_IWMMXT)
29146       && ((mode == V2SImode)
29147 	  || (mode == V4HImode)
29148 	  || (mode == V8QImode)))
29149     return true;
29150 
29151   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29152       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29153       || mode == V2HAmode))
29154     return true;
29155 
29156   if (TARGET_HAVE_MVE
29157       && (mode == V2DImode || mode == V4SImode || mode == V8HImode
29158 	  || mode == V16QImode))
29159       return true;
29160 
29161   if (TARGET_HAVE_MVE_FLOAT
29162       && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29163       return true;
29164 
29165   return false;
29166 }
29167 
29168 /* Implements target hook array_mode_supported_p.  */
29169 
29170 static bool
arm_array_mode_supported_p(machine_mode mode,unsigned HOST_WIDE_INT nelems)29171 arm_array_mode_supported_p (machine_mode mode,
29172 			    unsigned HOST_WIDE_INT nelems)
29173 {
29174   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29175      for now, as the lane-swapping logic needs to be extended in the expanders.
29176      See PR target/82518.  */
29177   if (TARGET_NEON && !BYTES_BIG_ENDIAN
29178       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29179       && (nelems >= 2 && nelems <= 4))
29180     return true;
29181 
29182   if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29183       && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29184     return true;
29185 
29186   return false;
29187 }
29188 
29189 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29190    registers when autovectorizing for Neon, at least until multiple vector
29191    widths are supported properly by the middle-end.  */
29192 
29193 static machine_mode
arm_preferred_simd_mode(scalar_mode mode)29194 arm_preferred_simd_mode (scalar_mode mode)
29195 {
29196   if (TARGET_NEON)
29197     switch (mode)
29198       {
29199       case E_HFmode:
29200 	return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29201       case E_SFmode:
29202 	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29203       case E_SImode:
29204 	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29205       case E_HImode:
29206 	return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29207       case E_QImode:
29208 	return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29209       case E_DImode:
29210 	if (!TARGET_NEON_VECTORIZE_DOUBLE)
29211 	  return V2DImode;
29212 	break;
29213 
29214       default:;
29215       }
29216 
29217   if (TARGET_REALLY_IWMMXT)
29218     switch (mode)
29219       {
29220       case E_SImode:
29221 	return V2SImode;
29222       case E_HImode:
29223 	return V4HImode;
29224       case E_QImode:
29225 	return V8QImode;
29226 
29227       default:;
29228       }
29229 
29230   if (TARGET_HAVE_MVE)
29231     switch (mode)
29232       {
29233       case E_QImode:
29234 	return V16QImode;
29235       case E_HImode:
29236 	return V8HImode;
29237       case E_SImode:
29238 	return V4SImode;
29239 
29240       default:;
29241       }
29242 
29243   if (TARGET_HAVE_MVE_FLOAT)
29244     switch (mode)
29245       {
29246       case E_HFmode:
29247 	return V8HFmode;
29248       case E_SFmode:
29249 	return V4SFmode;
29250 
29251       default:;
29252       }
29253 
29254   return word_mode;
29255 }
29256 
29257 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29258 
29259    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
29260    using r0-r4 for function arguments, r7 for the stack frame and don't have
29261    enough left over to do doubleword arithmetic.  For Thumb-2 all the
29262    potentially problematic instructions accept high registers so this is not
29263    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29264    that require many low registers.  */
29265 static bool
arm_class_likely_spilled_p(reg_class_t rclass)29266 arm_class_likely_spilled_p (reg_class_t rclass)
29267 {
29268   if ((TARGET_THUMB1 && rclass == LO_REGS)
29269       || rclass  == CC_REG)
29270     return true;
29271 
29272   return false;
29273 }
29274 
29275 /* Implements target hook small_register_classes_for_mode_p.  */
29276 bool
arm_small_register_classes_for_mode_p(machine_mode mode ATTRIBUTE_UNUSED)29277 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29278 {
29279   return TARGET_THUMB1;
29280 }
29281 
29282 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29283    ARM insns and therefore guarantee that the shift count is modulo 256.
29284    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29285    guarantee no particular behavior for out-of-range counts.  */
29286 
29287 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask(machine_mode mode)29288 arm_shift_truncation_mask (machine_mode mode)
29289 {
29290   return mode == SImode ? 255 : 0;
29291 }
29292 
29293 
29294 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29295 
29296 unsigned int
arm_dbx_register_number(unsigned int regno)29297 arm_dbx_register_number (unsigned int regno)
29298 {
29299   if (regno < 16)
29300     return regno;
29301 
29302   if (IS_VFP_REGNUM (regno))
29303     {
29304       /* See comment in arm_dwarf_register_span.  */
29305       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29306 	return 64 + regno - FIRST_VFP_REGNUM;
29307       else
29308 	return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29309     }
29310 
29311   if (IS_IWMMXT_GR_REGNUM (regno))
29312     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29313 
29314   if (IS_IWMMXT_REGNUM (regno))
29315     return 112 + regno - FIRST_IWMMXT_REGNUM;
29316 
29317   return DWARF_FRAME_REGISTERS;
29318 }
29319 
29320 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29321    GCC models tham as 64 32-bit registers, so we need to describe this to
29322    the DWARF generation code.  Other registers can use the default.  */
29323 static rtx
arm_dwarf_register_span(rtx rtl)29324 arm_dwarf_register_span (rtx rtl)
29325 {
29326   machine_mode mode;
29327   unsigned regno;
29328   rtx parts[16];
29329   int nregs;
29330   int i;
29331 
29332   regno = REGNO (rtl);
29333   if (!IS_VFP_REGNUM (regno))
29334     return NULL_RTX;
29335 
29336   /* XXX FIXME: The EABI defines two VFP register ranges:
29337 	64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29338 	256-287: D0-D31
29339      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29340      corresponding D register.  Until GDB supports this, we shall use the
29341      legacy encodings.  We also use these encodings for D0-D15 for
29342      compatibility with older debuggers.  */
29343   mode = GET_MODE (rtl);
29344   if (GET_MODE_SIZE (mode) < 8)
29345     return NULL_RTX;
29346 
29347   if (VFP_REGNO_OK_FOR_SINGLE (regno))
29348     {
29349       nregs = GET_MODE_SIZE (mode) / 4;
29350       for (i = 0; i < nregs; i += 2)
29351 	if (TARGET_BIG_END)
29352 	  {
29353 	    parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29354 	    parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29355 	  }
29356 	else
29357 	  {
29358 	    parts[i] = gen_rtx_REG (SImode, regno + i);
29359 	    parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29360 	  }
29361     }
29362   else
29363     {
29364       nregs = GET_MODE_SIZE (mode) / 8;
29365       for (i = 0; i < nregs; i++)
29366 	parts[i] = gen_rtx_REG (DImode, regno + i);
29367     }
29368 
29369   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29370 }
29371 
29372 #if ARM_UNWIND_INFO
29373 /* Emit unwind directives for a store-multiple instruction or stack pointer
29374    push during alignment.
29375    These should only ever be generated by the function prologue code, so
29376    expect them to have a particular form.
29377    The store-multiple instruction sometimes pushes pc as the last register,
29378    although it should not be tracked into unwind information, or for -Os
29379    sometimes pushes some dummy registers before first register that needs
29380    to be tracked in unwind information; such dummy registers are there just
29381    to avoid separate stack adjustment, and will not be restored in the
29382    epilogue.  */
29383 
29384 static void
arm_unwind_emit_sequence(FILE * asm_out_file,rtx p)29385 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29386 {
29387   int i;
29388   HOST_WIDE_INT offset;
29389   HOST_WIDE_INT nregs;
29390   int reg_size;
29391   unsigned reg;
29392   unsigned lastreg;
29393   unsigned padfirst = 0, padlast = 0;
29394   rtx e;
29395 
29396   e = XVECEXP (p, 0, 0);
29397   gcc_assert (GET_CODE (e) == SET);
29398 
29399   /* First insn will adjust the stack pointer.  */
29400   gcc_assert (GET_CODE (e) == SET
29401 	      && REG_P (SET_DEST (e))
29402 	      && REGNO (SET_DEST (e)) == SP_REGNUM
29403 	      && GET_CODE (SET_SRC (e)) == PLUS);
29404 
29405   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29406   nregs = XVECLEN (p, 0) - 1;
29407   gcc_assert (nregs);
29408 
29409   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29410   if (reg < 16)
29411     {
29412       /* For -Os dummy registers can be pushed at the beginning to
29413 	 avoid separate stack pointer adjustment.  */
29414       e = XVECEXP (p, 0, 1);
29415       e = XEXP (SET_DEST (e), 0);
29416       if (GET_CODE (e) == PLUS)
29417 	padfirst = INTVAL (XEXP (e, 1));
29418       gcc_assert (padfirst == 0 || optimize_size);
29419       /* The function prologue may also push pc, but not annotate it as it is
29420 	 never restored.  We turn this into a stack pointer adjustment.  */
29421       e = XVECEXP (p, 0, nregs);
29422       e = XEXP (SET_DEST (e), 0);
29423       if (GET_CODE (e) == PLUS)
29424 	padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29425       else
29426 	padlast = offset - 4;
29427       gcc_assert (padlast == 0 || padlast == 4);
29428       if (padlast == 4)
29429 	fprintf (asm_out_file, "\t.pad #4\n");
29430       reg_size = 4;
29431       fprintf (asm_out_file, "\t.save {");
29432     }
29433   else if (IS_VFP_REGNUM (reg))
29434     {
29435       reg_size = 8;
29436       fprintf (asm_out_file, "\t.vsave {");
29437     }
29438   else
29439     /* Unknown register type.  */
29440     gcc_unreachable ();
29441 
29442   /* If the stack increment doesn't match the size of the saved registers,
29443      something has gone horribly wrong.  */
29444   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29445 
29446   offset = padfirst;
29447   lastreg = 0;
29448   /* The remaining insns will describe the stores.  */
29449   for (i = 1; i <= nregs; i++)
29450     {
29451       /* Expect (set (mem <addr>) (reg)).
29452          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29453       e = XVECEXP (p, 0, i);
29454       gcc_assert (GET_CODE (e) == SET
29455 		  && MEM_P (SET_DEST (e))
29456 		  && REG_P (SET_SRC (e)));
29457 
29458       reg = REGNO (SET_SRC (e));
29459       gcc_assert (reg >= lastreg);
29460 
29461       if (i != 1)
29462 	fprintf (asm_out_file, ", ");
29463       /* We can't use %r for vfp because we need to use the
29464 	 double precision register names.  */
29465       if (IS_VFP_REGNUM (reg))
29466 	asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29467       else
29468 	asm_fprintf (asm_out_file, "%r", reg);
29469 
29470       if (flag_checking)
29471 	{
29472 	  /* Check that the addresses are consecutive.  */
29473 	  e = XEXP (SET_DEST (e), 0);
29474 	  if (GET_CODE (e) == PLUS)
29475 	    gcc_assert (REG_P (XEXP (e, 0))
29476 			&& REGNO (XEXP (e, 0)) == SP_REGNUM
29477 			&& CONST_INT_P (XEXP (e, 1))
29478 			&& offset == INTVAL (XEXP (e, 1)));
29479 	  else
29480 	    gcc_assert (i == 1
29481 			&& REG_P (e)
29482 			&& REGNO (e) == SP_REGNUM);
29483 	  offset += reg_size;
29484 	}
29485     }
29486   fprintf (asm_out_file, "}\n");
29487   if (padfirst)
29488     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29489 }
29490 
29491 /*  Emit unwind directives for a SET.  */
29492 
29493 static void
arm_unwind_emit_set(FILE * asm_out_file,rtx p)29494 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29495 {
29496   rtx e0;
29497   rtx e1;
29498   unsigned reg;
29499 
29500   e0 = XEXP (p, 0);
29501   e1 = XEXP (p, 1);
29502   switch (GET_CODE (e0))
29503     {
29504     case MEM:
29505       /* Pushing a single register.  */
29506       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29507 	  || !REG_P (XEXP (XEXP (e0, 0), 0))
29508 	  || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29509 	abort ();
29510 
29511       asm_fprintf (asm_out_file, "\t.save ");
29512       if (IS_VFP_REGNUM (REGNO (e1)))
29513 	asm_fprintf(asm_out_file, "{d%d}\n",
29514 		    (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29515       else
29516 	asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29517       break;
29518 
29519     case REG:
29520       if (REGNO (e0) == SP_REGNUM)
29521 	{
29522 	  /* A stack increment.  */
29523 	  if (GET_CODE (e1) != PLUS
29524 	      || !REG_P (XEXP (e1, 0))
29525 	      || REGNO (XEXP (e1, 0)) != SP_REGNUM
29526 	      || !CONST_INT_P (XEXP (e1, 1)))
29527 	    abort ();
29528 
29529 	  asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29530 		       -INTVAL (XEXP (e1, 1)));
29531 	}
29532       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29533 	{
29534 	  HOST_WIDE_INT offset;
29535 
29536 	  if (GET_CODE (e1) == PLUS)
29537 	    {
29538 	      if (!REG_P (XEXP (e1, 0))
29539 		  || !CONST_INT_P (XEXP (e1, 1)))
29540 		abort ();
29541 	      reg = REGNO (XEXP (e1, 0));
29542 	      offset = INTVAL (XEXP (e1, 1));
29543 	      asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29544 			   HARD_FRAME_POINTER_REGNUM, reg,
29545 			   offset);
29546 	    }
29547 	  else if (REG_P (e1))
29548 	    {
29549 	      reg = REGNO (e1);
29550 	      asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29551 			   HARD_FRAME_POINTER_REGNUM, reg);
29552 	    }
29553 	  else
29554 	    abort ();
29555 	}
29556       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29557 	{
29558 	  /* Move from sp to reg.  */
29559 	  asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29560 	}
29561      else if (GET_CODE (e1) == PLUS
29562 	      && REG_P (XEXP (e1, 0))
29563 	      && REGNO (XEXP (e1, 0)) == SP_REGNUM
29564 	      && CONST_INT_P (XEXP (e1, 1)))
29565 	{
29566 	  /* Set reg to offset from sp.  */
29567 	  asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29568 		       REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29569 	}
29570       else
29571 	abort ();
29572       break;
29573 
29574     default:
29575       abort ();
29576     }
29577 }
29578 
29579 
29580 /* Emit unwind directives for the given insn.  */
29581 
29582 static void
arm_unwind_emit(FILE * asm_out_file,rtx_insn * insn)29583 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29584 {
29585   rtx note, pat;
29586   bool handled_one = false;
29587 
29588   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29589     return;
29590 
29591   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29592       && (TREE_NOTHROW (current_function_decl)
29593 	  || crtl->all_throwers_are_sibcalls))
29594     return;
29595 
29596   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29597     return;
29598 
29599   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29600     {
29601       switch (REG_NOTE_KIND (note))
29602 	{
29603 	case REG_FRAME_RELATED_EXPR:
29604 	  pat = XEXP (note, 0);
29605 	  goto found;
29606 
29607 	case REG_CFA_REGISTER:
29608 	  pat = XEXP (note, 0);
29609 	  if (pat == NULL)
29610 	    {
29611 	      pat = PATTERN (insn);
29612 	      if (GET_CODE (pat) == PARALLEL)
29613 		pat = XVECEXP (pat, 0, 0);
29614 	    }
29615 
29616 	  /* Only emitted for IS_STACKALIGN re-alignment.  */
29617 	  {
29618 	    rtx dest, src;
29619 	    unsigned reg;
29620 
29621 	    src = SET_SRC (pat);
29622 	    dest = SET_DEST (pat);
29623 
29624 	    gcc_assert (src == stack_pointer_rtx);
29625 	    reg = REGNO (dest);
29626 	    asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29627 			 reg + 0x90, reg);
29628 	  }
29629 	  handled_one = true;
29630 	  break;
29631 
29632 	/* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
29633 	   to get correct dwarf information for shrink-wrap.  We should not
29634 	   emit unwind information for it because these are used either for
29635 	   pretend arguments or notes to adjust sp and restore registers from
29636 	   stack.  */
29637 	case REG_CFA_DEF_CFA:
29638 	case REG_CFA_ADJUST_CFA:
29639 	case REG_CFA_RESTORE:
29640 	  return;
29641 
29642 	case REG_CFA_EXPRESSION:
29643 	case REG_CFA_OFFSET:
29644 	  /* ??? Only handling here what we actually emit.  */
29645 	  gcc_unreachable ();
29646 
29647 	default:
29648 	  break;
29649 	}
29650     }
29651   if (handled_one)
29652     return;
29653   pat = PATTERN (insn);
29654  found:
29655 
29656   switch (GET_CODE (pat))
29657     {
29658     case SET:
29659       arm_unwind_emit_set (asm_out_file, pat);
29660       break;
29661 
29662     case SEQUENCE:
29663       /* Store multiple.  */
29664       arm_unwind_emit_sequence (asm_out_file, pat);
29665       break;
29666 
29667     default:
29668       abort();
29669     }
29670 }
29671 
29672 
29673 /* Output a reference from a function exception table to the type_info
29674    object X.  The EABI specifies that the symbol should be relocated by
29675    an R_ARM_TARGET2 relocation.  */
29676 
29677 static bool
arm_output_ttype(rtx x)29678 arm_output_ttype (rtx x)
29679 {
29680   fputs ("\t.word\t", asm_out_file);
29681   output_addr_const (asm_out_file, x);
29682   /* Use special relocations for symbol references.  */
29683   if (!CONST_INT_P (x))
29684     fputs ("(TARGET2)", asm_out_file);
29685   fputc ('\n', asm_out_file);
29686 
29687   return TRUE;
29688 }
29689 
29690 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
29691 
29692 static void
arm_asm_emit_except_personality(rtx personality)29693 arm_asm_emit_except_personality (rtx personality)
29694 {
29695   fputs ("\t.personality\t", asm_out_file);
29696   output_addr_const (asm_out_file, personality);
29697   fputc ('\n', asm_out_file);
29698 }
29699 #endif /* ARM_UNWIND_INFO */
29700 
29701 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
29702 
29703 static void
arm_asm_init_sections(void)29704 arm_asm_init_sections (void)
29705 {
29706 #if ARM_UNWIND_INFO
29707   exception_section = get_unnamed_section (0, output_section_asm_op,
29708 					   "\t.handlerdata");
29709 #endif /* ARM_UNWIND_INFO */
29710 
29711 #ifdef OBJECT_FORMAT_ELF
29712   if (target_pure_code)
29713     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
29714 #endif
29715 }
29716 
29717 /* Output unwind directives for the start/end of a function.  */
29718 
29719 void
arm_output_fn_unwind(FILE * f,bool prologue)29720 arm_output_fn_unwind (FILE * f, bool prologue)
29721 {
29722   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29723     return;
29724 
29725   if (prologue)
29726     fputs ("\t.fnstart\n", f);
29727   else
29728     {
29729       /* If this function will never be unwound, then mark it as such.
29730          The came condition is used in arm_unwind_emit to suppress
29731 	 the frame annotations.  */
29732       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29733 	  && (TREE_NOTHROW (current_function_decl)
29734 	      || crtl->all_throwers_are_sibcalls))
29735 	fputs("\t.cantunwind\n", f);
29736 
29737       fputs ("\t.fnend\n", f);
29738     }
29739 }
29740 
29741 static bool
arm_emit_tls_decoration(FILE * fp,rtx x)29742 arm_emit_tls_decoration (FILE *fp, rtx x)
29743 {
29744   enum tls_reloc reloc;
29745   rtx val;
29746 
29747   val = XVECEXP (x, 0, 0);
29748   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29749 
29750   output_addr_const (fp, val);
29751 
29752   switch (reloc)
29753     {
29754     case TLS_GD32:
29755       fputs ("(tlsgd)", fp);
29756       break;
29757     case TLS_GD32_FDPIC:
29758       fputs ("(tlsgd_fdpic)", fp);
29759       break;
29760     case TLS_LDM32:
29761       fputs ("(tlsldm)", fp);
29762       break;
29763     case TLS_LDM32_FDPIC:
29764       fputs ("(tlsldm_fdpic)", fp);
29765       break;
29766     case TLS_LDO32:
29767       fputs ("(tlsldo)", fp);
29768       break;
29769     case TLS_IE32:
29770       fputs ("(gottpoff)", fp);
29771       break;
29772     case TLS_IE32_FDPIC:
29773       fputs ("(gottpoff_fdpic)", fp);
29774       break;
29775     case TLS_LE32:
29776       fputs ("(tpoff)", fp);
29777       break;
29778     case TLS_DESCSEQ:
29779       fputs ("(tlsdesc)", fp);
29780       break;
29781     default:
29782       gcc_unreachable ();
29783     }
29784 
29785   switch (reloc)
29786     {
29787     case TLS_GD32:
29788     case TLS_LDM32:
29789     case TLS_IE32:
29790     case TLS_DESCSEQ:
29791       fputs (" + (. - ", fp);
29792       output_addr_const (fp, XVECEXP (x, 0, 2));
29793       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29794       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29795       output_addr_const (fp, XVECEXP (x, 0, 3));
29796       fputc (')', fp);
29797       break;
29798     default:
29799       break;
29800     }
29801 
29802   return TRUE;
29803 }
29804 
29805 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
29806 
29807 static void
arm_output_dwarf_dtprel(FILE * file,int size,rtx x)29808 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29809 {
29810   gcc_assert (size == 4);
29811   fputs ("\t.word\t", file);
29812   output_addr_const (file, x);
29813   fputs ("(tlsldo)", file);
29814 }
29815 
29816 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
29817 
29818 static bool
arm_output_addr_const_extra(FILE * fp,rtx x)29819 arm_output_addr_const_extra (FILE *fp, rtx x)
29820 {
29821   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29822     return arm_emit_tls_decoration (fp, x);
29823   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29824     {
29825       char label[256];
29826       int labelno = INTVAL (XVECEXP (x, 0, 0));
29827 
29828       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29829       assemble_name_raw (fp, label);
29830 
29831       return TRUE;
29832     }
29833   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29834     {
29835       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29836       if (GOT_PCREL)
29837 	fputs ("+.", fp);
29838       fputs ("-(", fp);
29839       output_addr_const (fp, XVECEXP (x, 0, 0));
29840       fputc (')', fp);
29841       return TRUE;
29842     }
29843   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29844     {
29845       output_addr_const (fp, XVECEXP (x, 0, 0));
29846       if (GOT_PCREL)
29847         fputs ("+.", fp);
29848       fputs ("-(", fp);
29849       output_addr_const (fp, XVECEXP (x, 0, 1));
29850       fputc (')', fp);
29851       return TRUE;
29852     }
29853   else if (GET_CODE (x) == CONST_VECTOR)
29854     return arm_emit_vector_const (fp, x);
29855 
29856   return FALSE;
29857 }
29858 
29859 /* Output assembly for a shift instruction.
29860    SET_FLAGS determines how the instruction modifies the condition codes.
29861    0 - Do not set condition codes.
29862    1 - Set condition codes.
29863    2 - Use smallest instruction.  */
29864 const char *
arm_output_shift(rtx * operands,int set_flags)29865 arm_output_shift(rtx * operands, int set_flags)
29866 {
29867   char pattern[100];
29868   static const char flag_chars[3] = {'?', '.', '!'};
29869   const char *shift;
29870   HOST_WIDE_INT val;
29871   char c;
29872 
29873   c = flag_chars[set_flags];
29874   shift = shift_op(operands[3], &val);
29875   if (shift)
29876     {
29877       if (val != -1)
29878 	operands[2] = GEN_INT(val);
29879       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29880     }
29881   else
29882     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29883 
29884   output_asm_insn (pattern, operands);
29885   return "";
29886 }
29887 
29888 /* Output assembly for a WMMX immediate shift instruction.  */
29889 const char *
arm_output_iwmmxt_shift_immediate(const char * insn_name,rtx * operands,bool wror_or_wsra)29890 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29891 {
29892   int shift = INTVAL (operands[2]);
29893   char templ[50];
29894   machine_mode opmode = GET_MODE (operands[0]);
29895 
29896   gcc_assert (shift >= 0);
29897 
29898   /* If the shift value in the register versions is > 63 (for D qualifier),
29899      31 (for W qualifier) or 15 (for H qualifier).  */
29900   if (((opmode == V4HImode) && (shift > 15))
29901 	|| ((opmode == V2SImode) && (shift > 31))
29902 	|| ((opmode == DImode) && (shift > 63)))
29903   {
29904     if (wror_or_wsra)
29905       {
29906         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29907         output_asm_insn (templ, operands);
29908         if (opmode == DImode)
29909           {
29910 	    sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29911 	    output_asm_insn (templ, operands);
29912           }
29913       }
29914     else
29915       {
29916         /* The destination register will contain all zeros.  */
29917         sprintf (templ, "wzero\t%%0");
29918         output_asm_insn (templ, operands);
29919       }
29920     return "";
29921   }
29922 
29923   if ((opmode == DImode) && (shift > 32))
29924     {
29925       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29926       output_asm_insn (templ, operands);
29927       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29928       output_asm_insn (templ, operands);
29929     }
29930   else
29931     {
29932       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29933       output_asm_insn (templ, operands);
29934     }
29935   return "";
29936 }
29937 
29938 /* Output assembly for a WMMX tinsr instruction.  */
29939 const char *
arm_output_iwmmxt_tinsr(rtx * operands)29940 arm_output_iwmmxt_tinsr (rtx *operands)
29941 {
29942   int mask = INTVAL (operands[3]);
29943   int i;
29944   char templ[50];
29945   int units = mode_nunits[GET_MODE (operands[0])];
29946   gcc_assert ((mask & (mask - 1)) == 0);
29947   for (i = 0; i < units; ++i)
29948     {
29949       if ((mask & 0x01) == 1)
29950         {
29951           break;
29952         }
29953       mask >>= 1;
29954     }
29955   gcc_assert (i < units);
29956   {
29957     switch (GET_MODE (operands[0]))
29958       {
29959       case E_V8QImode:
29960 	sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29961 	break;
29962       case E_V4HImode:
29963 	sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29964 	break;
29965       case E_V2SImode:
29966 	sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29967 	break;
29968       default:
29969 	gcc_unreachable ();
29970 	break;
29971       }
29972     output_asm_insn (templ, operands);
29973   }
29974   return "";
29975 }
29976 
29977 /* Output a Thumb-1 casesi dispatch sequence.  */
29978 const char *
thumb1_output_casesi(rtx * operands)29979 thumb1_output_casesi (rtx *operands)
29980 {
29981   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29982 
29983   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29984 
29985   switch (GET_MODE(diff_vec))
29986     {
29987     case E_QImode:
29988       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29989 	      "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29990     case E_HImode:
29991       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29992 	      "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29993     case E_SImode:
29994       return "bl\t%___gnu_thumb1_case_si";
29995     default:
29996       gcc_unreachable ();
29997     }
29998 }
29999 
30000 /* Output a Thumb-2 casesi instruction.  */
30001 const char *
thumb2_output_casesi(rtx * operands)30002 thumb2_output_casesi (rtx *operands)
30003 {
30004   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30005 
30006   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30007 
30008   output_asm_insn ("cmp\t%0, %1", operands);
30009   output_asm_insn ("bhi\t%l3", operands);
30010   switch (GET_MODE(diff_vec))
30011     {
30012     case E_QImode:
30013       return "tbb\t[%|pc, %0]";
30014     case E_HImode:
30015       return "tbh\t[%|pc, %0, lsl #1]";
30016     case E_SImode:
30017       if (flag_pic)
30018 	{
30019 	  output_asm_insn ("adr\t%4, %l2", operands);
30020 	  output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30021 	  output_asm_insn ("add\t%4, %4, %5", operands);
30022 	  return "bx\t%4";
30023 	}
30024       else
30025 	{
30026 	  output_asm_insn ("adr\t%4, %l2", operands);
30027 	  return "ldr\t%|pc, [%4, %0, lsl #2]";
30028 	}
30029     default:
30030       gcc_unreachable ();
30031     }
30032 }
30033 
30034 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
30035    per-core tuning structs.  */
30036 static int
arm_issue_rate(void)30037 arm_issue_rate (void)
30038 {
30039   return current_tune->issue_rate;
30040 }
30041 
30042 /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
30043 static int
arm_sched_variable_issue(FILE *,int,rtx_insn * insn,int more)30044 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30045 {
30046   if (DEBUG_INSN_P (insn))
30047     return more;
30048 
30049   rtx_code code = GET_CODE (PATTERN (insn));
30050   if (code == USE || code == CLOBBER)
30051     return more;
30052 
30053   if (get_attr_type (insn) == TYPE_NO_INSN)
30054     return more;
30055 
30056   return more - 1;
30057 }
30058 
30059 /* Return how many instructions should scheduler lookahead to choose the
30060    best one.  */
30061 static int
arm_first_cycle_multipass_dfa_lookahead(void)30062 arm_first_cycle_multipass_dfa_lookahead (void)
30063 {
30064   int issue_rate = arm_issue_rate ();
30065 
30066   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30067 }
30068 
30069 /* Enable modeling of L2 auto-prefetcher.  */
30070 static int
arm_first_cycle_multipass_dfa_lookahead_guard(rtx_insn * insn,int ready_index)30071 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30072 {
30073   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30074 }
30075 
30076 const char *
arm_mangle_type(const_tree type)30077 arm_mangle_type (const_tree type)
30078 {
30079   /* The ARM ABI documents (10th October 2008) say that "__va_list"
30080      has to be managled as if it is in the "std" namespace.  */
30081   if (TARGET_AAPCS_BASED
30082       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30083     return "St9__va_list";
30084 
30085   /* Half-precision floating point types.  */
30086   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30087     {
30088       if (TYPE_MODE (type) == BFmode)
30089 	return "u6__bf16";
30090       else
30091 	return "Dh";
30092     }
30093 
30094   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30095      builtin type.  */
30096   if (TYPE_NAME (type) != NULL)
30097     return arm_mangle_builtin_type (type);
30098 
30099   /* Use the default mangling.  */
30100   return NULL;
30101 }
30102 
30103 /* Order of allocation of core registers for Thumb: this allocation is
30104    written over the corresponding initial entries of the array
30105    initialized with REG_ALLOC_ORDER.  We allocate all low registers
30106    first.  Saving and restoring a low register is usually cheaper than
30107    using a call-clobbered high register.  */
30108 
30109 static const int thumb_core_reg_alloc_order[] =
30110 {
30111    3,  2,  1,  0,  4,  5,  6,  7,
30112   12, 14,  8,  9, 10, 11
30113 };
30114 
30115 /* Adjust register allocation order when compiling for Thumb.  */
30116 
30117 void
arm_order_regs_for_local_alloc(void)30118 arm_order_regs_for_local_alloc (void)
30119 {
30120   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30121   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30122   if (TARGET_THUMB)
30123     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30124             sizeof (thumb_core_reg_alloc_order));
30125 }
30126 
30127 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
30128 
30129 bool
arm_frame_pointer_required(void)30130 arm_frame_pointer_required (void)
30131 {
30132   if (SUBTARGET_FRAME_POINTER_REQUIRED)
30133     return true;
30134 
30135   /* If the function receives nonlocal gotos, it needs to save the frame
30136      pointer in the nonlocal_goto_save_area object.  */
30137   if (cfun->has_nonlocal_label)
30138     return true;
30139 
30140   /* The frame pointer is required for non-leaf APCS frames.  */
30141   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30142     return true;
30143 
30144   /* If we are probing the stack in the prologue, we will have a faulting
30145      instruction prior to the stack adjustment and this requires a frame
30146      pointer if we want to catch the exception using the EABI unwinder.  */
30147   if (!IS_INTERRUPT (arm_current_func_type ())
30148       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30149 	  || flag_stack_clash_protection)
30150       && arm_except_unwind_info (&global_options) == UI_TARGET
30151       && cfun->can_throw_non_call_exceptions)
30152     {
30153       HOST_WIDE_INT size = get_frame_size ();
30154 
30155       /* That's irrelevant if there is no stack adjustment.  */
30156       if (size <= 0)
30157 	return false;
30158 
30159       /* That's relevant only if there is a stack probe.  */
30160       if (crtl->is_leaf && !cfun->calls_alloca)
30161 	{
30162 	  /* We don't have the final size of the frame so adjust.  */
30163 	  size += 32 * UNITS_PER_WORD;
30164 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30165 	    return true;
30166 	}
30167       else
30168 	return true;
30169     }
30170 
30171   return false;
30172 }
30173 
30174 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30175    All modes except THUMB1 have conditional execution.
30176    If we have conditional arithmetic, return false before reload to
30177    enable some ifcvt transformations. */
30178 static bool
arm_have_conditional_execution(void)30179 arm_have_conditional_execution (void)
30180 {
30181   bool has_cond_exec, enable_ifcvt_trans;
30182 
30183   /* Only THUMB1 cannot support conditional execution. */
30184   has_cond_exec = !TARGET_THUMB1;
30185 
30186   /* Enable ifcvt transformations if we have conditional arithmetic, but only
30187      before reload. */
30188   enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30189 
30190   return has_cond_exec && !enable_ifcvt_trans;
30191 }
30192 
30193 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
30194 static HOST_WIDE_INT
arm_vector_alignment(const_tree type)30195 arm_vector_alignment (const_tree type)
30196 {
30197   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30198 
30199   if (TARGET_AAPCS_BASED)
30200     align = MIN (align, 64);
30201 
30202   return align;
30203 }
30204 
30205 static unsigned int
arm_autovectorize_vector_modes(vector_modes * modes,bool)30206 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30207 {
30208   if (!TARGET_NEON_VECTORIZE_DOUBLE)
30209     {
30210       modes->safe_push (V16QImode);
30211       modes->safe_push (V8QImode);
30212     }
30213   return 0;
30214 }
30215 
30216 static bool
arm_vector_alignment_reachable(const_tree type,bool is_packed)30217 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30218 {
30219   /* Vectors which aren't in packed structures will not be less aligned than
30220      the natural alignment of their element type, so this is safe.  */
30221   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30222     return !is_packed;
30223 
30224   return default_builtin_vector_alignment_reachable (type, is_packed);
30225 }
30226 
30227 static bool
arm_builtin_support_vector_misalignment(machine_mode mode,const_tree type,int misalignment,bool is_packed)30228 arm_builtin_support_vector_misalignment (machine_mode mode,
30229 					 const_tree type, int misalignment,
30230 					 bool is_packed)
30231 {
30232   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30233     {
30234       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30235 
30236       if (is_packed)
30237         return align == 1;
30238 
30239       /* If the misalignment is unknown, we should be able to handle the access
30240 	 so long as it is not to a member of a packed data structure.  */
30241       if (misalignment == -1)
30242         return true;
30243 
30244       /* Return true if the misalignment is a multiple of the natural alignment
30245          of the vector's element type.  This is probably always going to be
30246 	 true in practice, since we've already established that this isn't a
30247 	 packed access.  */
30248       return ((misalignment % align) == 0);
30249     }
30250 
30251   return default_builtin_support_vector_misalignment (mode, type, misalignment,
30252 						      is_packed);
30253 }
30254 
30255 static void
arm_conditional_register_usage(void)30256 arm_conditional_register_usage (void)
30257 {
30258   int regno;
30259 
30260   if (TARGET_THUMB1 && optimize_size)
30261     {
30262       /* When optimizing for size on Thumb-1, it's better not
30263         to use the HI regs, because of the overhead of
30264         stacking them.  */
30265       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30266 	fixed_regs[regno] = call_used_regs[regno] = 1;
30267     }
30268 
30269   /* The link register can be clobbered by any branch insn,
30270      but we have no way to track that at present, so mark
30271      it as unavailable.  */
30272   if (TARGET_THUMB1)
30273     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30274 
30275   if (TARGET_32BIT && TARGET_VFP_BASE)
30276     {
30277       /* VFPv3 registers are disabled when earlier VFP
30278 	 versions are selected due to the definition of
30279 	 LAST_VFP_REGNUM.  */
30280       for (regno = FIRST_VFP_REGNUM;
30281 	   regno <= LAST_VFP_REGNUM; ++ regno)
30282 	{
30283 	  fixed_regs[regno] = 0;
30284 	  call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30285 	    || regno >= FIRST_VFP_REGNUM + 32;
30286 	}
30287       if (TARGET_HAVE_MVE)
30288 	fixed_regs[VPR_REGNUM] = 0;
30289     }
30290 
30291   if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30292     {
30293       regno = FIRST_IWMMXT_GR_REGNUM;
30294       /* The 2002/10/09 revision of the XScale ABI has wCG0
30295          and wCG1 as call-preserved registers.  The 2002/11/21
30296          revision changed this so that all wCG registers are
30297          scratch registers.  */
30298       for (regno = FIRST_IWMMXT_GR_REGNUM;
30299 	   regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30300 	fixed_regs[regno] = 0;
30301       /* The XScale ABI has wR0 - wR9 as scratch registers,
30302 	 the rest as call-preserved registers.  */
30303       for (regno = FIRST_IWMMXT_REGNUM;
30304 	   regno <= LAST_IWMMXT_REGNUM; ++ regno)
30305 	{
30306 	  fixed_regs[regno] = 0;
30307 	  call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30308 	}
30309     }
30310 
30311   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30312     {
30313       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30314       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30315     }
30316   else if (TARGET_APCS_STACK)
30317     {
30318       fixed_regs[10]     = 1;
30319       call_used_regs[10] = 1;
30320     }
30321   /* -mcaller-super-interworking reserves r11 for calls to
30322      _interwork_r11_call_via_rN().  Making the register global
30323      is an easy way of ensuring that it remains valid for all
30324      calls.  */
30325   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30326       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30327     {
30328       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30329       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30330       if (TARGET_CALLER_INTERWORKING)
30331 	global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30332     }
30333 
30334   /* The Q and GE bits are only accessed via special ACLE patterns.  */
30335   CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30336   CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30337 
30338   SUBTARGET_CONDITIONAL_REGISTER_USAGE
30339 }
30340 
30341 static reg_class_t
arm_preferred_rename_class(reg_class_t rclass)30342 arm_preferred_rename_class (reg_class_t rclass)
30343 {
30344   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30345      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
30346      and code size can be reduced.  */
30347   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30348     return LO_REGS;
30349   else
30350     return NO_REGS;
30351 }
30352 
30353 /* Compute the attribute "length" of insn "*push_multi".
30354    So this function MUST be kept in sync with that insn pattern.  */
30355 int
arm_attr_length_push_multi(rtx parallel_op,rtx first_op)30356 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30357 {
30358   int i, regno, hi_reg;
30359   int num_saves = XVECLEN (parallel_op, 0);
30360 
30361   /* ARM mode.  */
30362   if (TARGET_ARM)
30363     return 4;
30364   /* Thumb1 mode.  */
30365   if (TARGET_THUMB1)
30366     return 2;
30367 
30368   /* Thumb2 mode.  */
30369   regno = REGNO (first_op);
30370   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30371      list is 8-bit.  Normally this means all registers in the list must be
30372      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
30373      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
30374      with 16-bit encoding.  */
30375   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30376   for (i = 1; i < num_saves && !hi_reg; i++)
30377     {
30378       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30379       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30380     }
30381 
30382   if (!hi_reg)
30383     return 2;
30384   return 4;
30385 }
30386 
30387 /* Compute the attribute "length" of insn.  Currently, this function is used
30388    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30389    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
30390    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
30391    true if OPERANDS contains insn which explicit updates base register.  */
30392 
30393 int
arm_attr_length_pop_multi(rtx * operands,bool return_pc,bool write_back_p)30394 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30395 {
30396   /* ARM mode.  */
30397   if (TARGET_ARM)
30398     return 4;
30399   /* Thumb1 mode.  */
30400   if (TARGET_THUMB1)
30401     return 2;
30402 
30403   rtx parallel_op = operands[0];
30404   /* Initialize to elements number of PARALLEL.  */
30405   unsigned indx = XVECLEN (parallel_op, 0) - 1;
30406   /* Initialize the value to base register.  */
30407   unsigned regno = REGNO (operands[1]);
30408   /* Skip return and write back pattern.
30409      We only need register pop pattern for later analysis.  */
30410   unsigned first_indx = 0;
30411   first_indx += return_pc ? 1 : 0;
30412   first_indx += write_back_p ? 1 : 0;
30413 
30414   /* A pop operation can be done through LDM or POP.  If the base register is SP
30415      and if it's with write back, then a LDM will be alias of POP.  */
30416   bool pop_p = (regno == SP_REGNUM && write_back_p);
30417   bool ldm_p = !pop_p;
30418 
30419   /* Check base register for LDM.  */
30420   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30421     return 4;
30422 
30423   /* Check each register in the list.  */
30424   for (; indx >= first_indx; indx--)
30425     {
30426       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30427       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
30428 	 comment in arm_attr_length_push_multi.  */
30429       if (REGNO_REG_CLASS (regno) == HI_REGS
30430 	  && (regno != PC_REGNUM || ldm_p))
30431 	return 4;
30432     }
30433 
30434   return 2;
30435 }
30436 
30437 /* Compute the number of instructions emitted by output_move_double.  */
30438 int
arm_count_output_move_double_insns(rtx * operands)30439 arm_count_output_move_double_insns (rtx *operands)
30440 {
30441   int count;
30442   rtx ops[2];
30443   /* output_move_double may modify the operands array, so call it
30444      here on a copy of the array.  */
30445   ops[0] = operands[0];
30446   ops[1] = operands[1];
30447   output_move_double (ops, false, &count);
30448   return count;
30449 }
30450 
30451 /* Same as above, but operands are a register/memory pair in SImode.
30452    Assumes operands has the base register in position 0 and memory in position
30453    2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
30454 int
arm_count_ldrdstrd_insns(rtx * operands,bool load)30455 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30456 {
30457   int count;
30458   rtx ops[2];
30459   int regnum, memnum;
30460   if (load)
30461     regnum = 0, memnum = 1;
30462   else
30463     regnum = 1, memnum = 0;
30464   ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30465   ops[memnum] = adjust_address (operands[2], DImode, 0);
30466   output_move_double (ops, false, &count);
30467   return count;
30468 }
30469 
30470 
30471 int
vfp3_const_double_for_fract_bits(rtx operand)30472 vfp3_const_double_for_fract_bits (rtx operand)
30473 {
30474   REAL_VALUE_TYPE r0;
30475 
30476   if (!CONST_DOUBLE_P (operand))
30477     return 0;
30478 
30479   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30480   if (exact_real_inverse (DFmode, &r0)
30481       && !REAL_VALUE_NEGATIVE (r0))
30482     {
30483       if (exact_real_truncate (DFmode, &r0))
30484 	{
30485 	  HOST_WIDE_INT value = real_to_integer (&r0);
30486 	  value = value & 0xffffffff;
30487 	  if ((value != 0) && ( (value & (value - 1)) == 0))
30488 	    {
30489 	      int ret = exact_log2 (value);
30490 	      gcc_assert (IN_RANGE (ret, 0, 31));
30491 	      return ret;
30492 	    }
30493 	}
30494     }
30495   return 0;
30496 }
30497 
30498 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30499    log2 is in [1, 32], return that log2.  Otherwise return -1.
30500    This is used in the patterns for vcvt.s32.f32 floating-point to
30501    fixed-point conversions.  */
30502 
30503 int
vfp3_const_double_for_bits(rtx x)30504 vfp3_const_double_for_bits (rtx x)
30505 {
30506   const REAL_VALUE_TYPE *r;
30507 
30508   if (!CONST_DOUBLE_P (x))
30509     return -1;
30510 
30511   r = CONST_DOUBLE_REAL_VALUE (x);
30512 
30513   if (REAL_VALUE_NEGATIVE (*r)
30514       || REAL_VALUE_ISNAN (*r)
30515       || REAL_VALUE_ISINF (*r)
30516       || !real_isinteger (r, SFmode))
30517     return -1;
30518 
30519   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30520 
30521 /* The exact_log2 above will have returned -1 if this is
30522    not an exact log2.  */
30523   if (!IN_RANGE (hwint, 1, 32))
30524     return -1;
30525 
30526   return hwint;
30527 }
30528 
30529 
30530 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
30531 
30532 static void
arm_pre_atomic_barrier(enum memmodel model)30533 arm_pre_atomic_barrier (enum memmodel model)
30534 {
30535   if (need_atomic_barrier_p (model, true))
30536     emit_insn (gen_memory_barrier ());
30537 }
30538 
30539 static void
arm_post_atomic_barrier(enum memmodel model)30540 arm_post_atomic_barrier (enum memmodel model)
30541 {
30542   if (need_atomic_barrier_p (model, false))
30543     emit_insn (gen_memory_barrier ());
30544 }
30545 
30546 /* Emit the load-exclusive and store-exclusive instructions.
30547    Use acquire and release versions if necessary.  */
30548 
30549 static void
arm_emit_load_exclusive(machine_mode mode,rtx rval,rtx mem,bool acq)30550 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30551 {
30552   rtx (*gen) (rtx, rtx);
30553 
30554   if (acq)
30555     {
30556       switch (mode)
30557         {
30558         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30559         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30560         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30561         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30562         default:
30563           gcc_unreachable ();
30564         }
30565     }
30566   else
30567     {
30568       switch (mode)
30569         {
30570         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
30571         case E_HImode: gen = gen_arm_load_exclusivehi; break;
30572         case E_SImode: gen = gen_arm_load_exclusivesi; break;
30573         case E_DImode: gen = gen_arm_load_exclusivedi; break;
30574         default:
30575           gcc_unreachable ();
30576         }
30577     }
30578 
30579   emit_insn (gen (rval, mem));
30580 }
30581 
30582 static void
arm_emit_store_exclusive(machine_mode mode,rtx bval,rtx rval,rtx mem,bool rel)30583 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30584                           rtx mem, bool rel)
30585 {
30586   rtx (*gen) (rtx, rtx, rtx);
30587 
30588   if (rel)
30589     {
30590       switch (mode)
30591         {
30592         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
30593         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
30594         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
30595         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
30596         default:
30597           gcc_unreachable ();
30598         }
30599     }
30600   else
30601     {
30602       switch (mode)
30603         {
30604         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
30605         case E_HImode: gen = gen_arm_store_exclusivehi; break;
30606         case E_SImode: gen = gen_arm_store_exclusivesi; break;
30607         case E_DImode: gen = gen_arm_store_exclusivedi; break;
30608         default:
30609           gcc_unreachable ();
30610         }
30611     }
30612 
30613   emit_insn (gen (bval, rval, mem));
30614 }
30615 
30616 /* Mark the previous jump instruction as unlikely.  */
30617 
30618 static void
emit_unlikely_jump(rtx insn)30619 emit_unlikely_jump (rtx insn)
30620 {
30621   rtx_insn *jump = emit_jump_insn (insn);
30622   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
30623 }
30624 
30625 /* Expand a compare and swap pattern.  */
30626 
30627 void
arm_expand_compare_and_swap(rtx operands[])30628 arm_expand_compare_and_swap (rtx operands[])
30629 {
30630   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30631   machine_mode mode, cmp_mode;
30632 
30633   bval = operands[0];
30634   rval = operands[1];
30635   mem = operands[2];
30636   oldval = operands[3];
30637   newval = operands[4];
30638   is_weak = operands[5];
30639   mod_s = operands[6];
30640   mod_f = operands[7];
30641   mode = GET_MODE (mem);
30642 
30643   /* Normally the succ memory model must be stronger than fail, but in the
30644      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30645      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
30646 
30647   if (TARGET_HAVE_LDACQ
30648       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
30649       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
30650     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30651 
30652   switch (mode)
30653     {
30654     case E_QImode:
30655     case E_HImode:
30656       /* For narrow modes, we're going to perform the comparison in SImode,
30657 	 so do the zero-extension now.  */
30658       rval = gen_reg_rtx (SImode);
30659       oldval = convert_modes (SImode, mode, oldval, true);
30660       /* FALLTHRU */
30661 
30662     case E_SImode:
30663       /* Force the value into a register if needed.  We waited until after
30664 	 the zero-extension above to do this properly.  */
30665       if (!arm_add_operand (oldval, SImode))
30666 	oldval = force_reg (SImode, oldval);
30667       break;
30668 
30669     case E_DImode:
30670       if (!cmpdi_operand (oldval, mode))
30671 	oldval = force_reg (mode, oldval);
30672       break;
30673 
30674     default:
30675       gcc_unreachable ();
30676     }
30677 
30678   if (TARGET_THUMB1)
30679     cmp_mode = E_SImode;
30680   else
30681     cmp_mode = CC_Zmode;
30682 
30683   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
30684   emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
30685                                         oldval, newval, is_weak, mod_s, mod_f));
30686 
30687   if (mode == QImode || mode == HImode)
30688     emit_move_insn (operands[1], gen_lowpart (mode, rval));
30689 
30690   /* In all cases, we arrange for success to be signaled by Z set.
30691      This arrangement allows for the boolean result to be used directly
30692      in a subsequent branch, post optimization.  For Thumb-1 targets, the
30693      boolean negation of the result is also stored in bval because Thumb-1
30694      backend lacks dependency tracking for CC flag due to flag-setting not
30695      being represented at RTL level.  */
30696   if (TARGET_THUMB1)
30697       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
30698   else
30699     {
30700       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
30701       emit_insn (gen_rtx_SET (bval, x));
30702     }
30703 }
30704 
30705 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
30706    another memory store between the load-exclusive and store-exclusive can
30707    reset the monitor from Exclusive to Open state.  This means we must wait
30708    until after reload to split the pattern, lest we get a register spill in
30709    the middle of the atomic sequence.  Success of the compare and swap is
30710    indicated by the Z flag set for 32bit targets and by neg_bval being zero
30711    for Thumb-1 targets (ie. negation of the boolean value returned by
30712    atomic_compare_and_swapmode standard pattern in operand 0).  */
30713 
30714 void
arm_split_compare_and_swap(rtx operands[])30715 arm_split_compare_and_swap (rtx operands[])
30716 {
30717   rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
30718   machine_mode mode;
30719   enum memmodel mod_s, mod_f;
30720   bool is_weak;
30721   rtx_code_label *label1, *label2;
30722   rtx x, cond;
30723 
30724   rval = operands[1];
30725   mem = operands[2];
30726   oldval = operands[3];
30727   newval = operands[4];
30728   is_weak = (operands[5] != const0_rtx);
30729   mod_s_rtx = operands[6];
30730   mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
30731   mod_f = memmodel_from_int (INTVAL (operands[7]));
30732   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
30733   mode = GET_MODE (mem);
30734 
30735   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
30736 
30737   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
30738   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
30739 
30740   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
30741      a full barrier is emitted after the store-release.  */
30742   if (is_armv8_sync)
30743     use_acquire = false;
30744 
30745   /* Checks whether a barrier is needed and emits one accordingly.  */
30746   if (!(use_acquire || use_release))
30747     arm_pre_atomic_barrier (mod_s);
30748 
30749   label1 = NULL;
30750   if (!is_weak)
30751     {
30752       label1 = gen_label_rtx ();
30753       emit_label (label1);
30754     }
30755   label2 = gen_label_rtx ();
30756 
30757   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30758 
30759   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
30760      as required to communicate with arm_expand_compare_and_swap.  */
30761   if (TARGET_32BIT)
30762     {
30763       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
30764       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30765       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30766 				gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30767       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30768     }
30769   else
30770     {
30771       cond = gen_rtx_NE (VOIDmode, rval, oldval);
30772       if (thumb1_cmpneg_operand (oldval, SImode))
30773 	{
30774 	  rtx src = rval;
30775 	  if (!satisfies_constraint_L (oldval))
30776 	    {
30777 	      gcc_assert (satisfies_constraint_J (oldval));
30778 
30779 	      /* For such immediates, ADDS needs the source and destination regs
30780 		 to be the same.
30781 
30782 		 Normally this would be handled by RA, but this is all happening
30783 		 after RA.  */
30784 	      emit_move_insn (neg_bval, rval);
30785 	      src = neg_bval;
30786 	    }
30787 
30788 	  emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
30789 						       label2, cond));
30790 	}
30791       else
30792 	{
30793 	  emit_move_insn (neg_bval, const1_rtx);
30794 	  emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
30795 	}
30796     }
30797 
30798   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
30799 
30800   /* Weak or strong, we want EQ to be true for success, so that we
30801      match the flags that we got from the compare above.  */
30802   if (TARGET_32BIT)
30803     {
30804       cond = gen_rtx_REG (CCmode, CC_REGNUM);
30805       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
30806       emit_insn (gen_rtx_SET (cond, x));
30807     }
30808 
30809   if (!is_weak)
30810     {
30811       /* Z is set to boolean value of !neg_bval, as required to communicate
30812 	 with arm_expand_compare_and_swap.  */
30813       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
30814       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
30815     }
30816 
30817   if (!is_mm_relaxed (mod_f))
30818     emit_label (label2);
30819 
30820   /* Checks whether a barrier is needed and emits one accordingly.  */
30821   if (is_armv8_sync
30822       || !(use_acquire || use_release))
30823     arm_post_atomic_barrier (mod_s);
30824 
30825   if (is_mm_relaxed (mod_f))
30826     emit_label (label2);
30827 }
30828 
30829 /* Split an atomic operation pattern.  Operation is given by CODE and is one
30830    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
30831    operation).  Operation is performed on the content at MEM and on VALUE
30832    following the memory model MODEL_RTX.  The content at MEM before and after
30833    the operation is returned in OLD_OUT and NEW_OUT respectively while the
30834    success of the operation is returned in COND.  Using a scratch register or
30835    an operand register for these determines what result is returned for that
30836    pattern.  */
30837 
30838 void
arm_split_atomic_op(enum rtx_code code,rtx old_out,rtx new_out,rtx mem,rtx value,rtx model_rtx,rtx cond)30839 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30840 		     rtx value, rtx model_rtx, rtx cond)
30841 {
30842   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
30843   machine_mode mode = GET_MODE (mem);
30844   machine_mode wmode = (mode == DImode ? DImode : SImode);
30845   rtx_code_label *label;
30846   bool all_low_regs, bind_old_new;
30847   rtx x;
30848 
30849   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
30850 
30851   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
30852   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
30853 
30854   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
30855      a full barrier is emitted after the store-release.  */
30856   if (is_armv8_sync)
30857     use_acquire = false;
30858 
30859   /* Checks whether a barrier is needed and emits one accordingly.  */
30860   if (!(use_acquire || use_release))
30861     arm_pre_atomic_barrier (model);
30862 
30863   label = gen_label_rtx ();
30864   emit_label (label);
30865 
30866   if (new_out)
30867     new_out = gen_lowpart (wmode, new_out);
30868   if (old_out)
30869     old_out = gen_lowpart (wmode, old_out);
30870   else
30871     old_out = new_out;
30872   value = simplify_gen_subreg (wmode, value, mode, 0);
30873 
30874   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30875 
30876   /* Does the operation require destination and first operand to use the same
30877      register?  This is decided by register constraints of relevant insn
30878      patterns in thumb1.md.  */
30879   gcc_assert (!new_out || REG_P (new_out));
30880   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
30881 		 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
30882 		 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
30883   bind_old_new =
30884     (TARGET_THUMB1
30885      && code != SET
30886      && code != MINUS
30887      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
30888 
30889   /* We want to return the old value while putting the result of the operation
30890      in the same register as the old value so copy the old value over to the
30891      destination register and use that register for the operation.  */
30892   if (old_out && bind_old_new)
30893     {
30894       emit_move_insn (new_out, old_out);
30895       old_out = new_out;
30896     }
30897 
30898   switch (code)
30899     {
30900     case SET:
30901       new_out = value;
30902       break;
30903 
30904     case NOT:
30905       x = gen_rtx_AND (wmode, old_out, value);
30906       emit_insn (gen_rtx_SET (new_out, x));
30907       x = gen_rtx_NOT (wmode, new_out);
30908       emit_insn (gen_rtx_SET (new_out, x));
30909       break;
30910 
30911     case MINUS:
30912       if (CONST_INT_P (value))
30913 	{
30914 	  value = gen_int_mode (-INTVAL (value), wmode);
30915 	  code = PLUS;
30916 	}
30917       /* FALLTHRU */
30918 
30919     case PLUS:
30920       if (mode == DImode)
30921 	{
30922 	  /* DImode plus/minus need to clobber flags.  */
30923 	  /* The adddi3 and subdi3 patterns are incorrectly written so that
30924 	     they require matching operands, even when we could easily support
30925 	     three operands.  Thankfully, this can be fixed up post-splitting,
30926 	     as the individual add+adc patterns do accept three operands and
30927 	     post-reload cprop can make these moves go away.  */
30928 	  emit_move_insn (new_out, old_out);
30929 	  if (code == PLUS)
30930 	    x = gen_adddi3 (new_out, new_out, value);
30931 	  else
30932 	    x = gen_subdi3 (new_out, new_out, value);
30933 	  emit_insn (x);
30934 	  break;
30935 	}
30936       /* FALLTHRU */
30937 
30938     default:
30939       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30940       emit_insn (gen_rtx_SET (new_out, x));
30941       break;
30942     }
30943 
30944   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30945                             use_release);
30946 
30947   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30948   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30949 
30950   /* Checks whether a barrier is needed and emits one accordingly.  */
30951   if (is_armv8_sync
30952       || !(use_acquire || use_release))
30953     arm_post_atomic_barrier (model);
30954 }
30955 
30956 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
30957    If CAN_INVERT, store either the result or its inverse in TARGET
30958    and return true if TARGET contains the inverse.  If !CAN_INVERT,
30959    always store the result in TARGET, never its inverse.
30960 
30961    Note that the handling of floating-point comparisons is not
30962    IEEE compliant.  */
30963 
30964 bool
arm_expand_vector_compare(rtx target,rtx_code code,rtx op0,rtx op1,bool can_invert)30965 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
30966 			   bool can_invert)
30967 {
30968   machine_mode cmp_result_mode = GET_MODE (target);
30969   machine_mode cmp_mode = GET_MODE (op0);
30970 
30971   bool inverted;
30972   switch (code)
30973     {
30974     /* For these we need to compute the inverse of the requested
30975        comparison.  */
30976     case UNORDERED:
30977     case UNLT:
30978     case UNLE:
30979     case UNGT:
30980     case UNGE:
30981     case UNEQ:
30982     case NE:
30983       code = reverse_condition_maybe_unordered (code);
30984       if (!can_invert)
30985 	{
30986 	  /* Recursively emit the inverted comparison into a temporary
30987 	     and then store its inverse in TARGET.  This avoids reusing
30988 	     TARGET (which for integer NE could be one of the inputs).  */
30989 	  rtx tmp = gen_reg_rtx (cmp_result_mode);
30990 	  if (arm_expand_vector_compare (tmp, code, op0, op1, true))
30991 	    gcc_unreachable ();
30992 	  emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
30993 	  return false;
30994 	}
30995       inverted = true;
30996       break;
30997 
30998     default:
30999       inverted = false;
31000       break;
31001     }
31002 
31003   switch (code)
31004     {
31005     /* These are natively supported for zero comparisons, but otherwise
31006        require the operands to be swapped.  */
31007     case LE:
31008     case LT:
31009       if (op1 != CONST0_RTX (cmp_mode))
31010 	{
31011 	  code = swap_condition (code);
31012 	  std::swap (op0, op1);
31013 	}
31014       /* Fall through.  */
31015 
31016     /* These are natively supported for both register and zero operands.  */
31017     case EQ:
31018     case GE:
31019     case GT:
31020       emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31021       return inverted;
31022 
31023     /* These are natively supported for register operands only.
31024        Comparisons with zero aren't useful and should be folded
31025        or canonicalized by target-independent code.  */
31026     case GEU:
31027     case GTU:
31028       emit_insn (gen_neon_vc (code, cmp_mode, target,
31029 			      op0, force_reg (cmp_mode, op1)));
31030       return inverted;
31031 
31032     /* These require the operands to be swapped and likewise do not
31033        support comparisons with zero.  */
31034     case LEU:
31035     case LTU:
31036       emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31037 			      target, force_reg (cmp_mode, op1), op0));
31038       return inverted;
31039 
31040     /* These need a combination of two comparisons.  */
31041     case LTGT:
31042     case ORDERED:
31043       {
31044 	/* Operands are LTGT iff (a > b || a > b).
31045 	   Operands are ORDERED iff (a > b || a <= b).  */
31046 	rtx gt_res = gen_reg_rtx (cmp_result_mode);
31047 	rtx alt_res = gen_reg_rtx (cmp_result_mode);
31048 	rtx_code alt_code = (code == LTGT ? LT : LE);
31049 	if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31050 	    || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31051 	  gcc_unreachable ();
31052 	emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31053 						     gt_res, alt_res)));
31054 	return inverted;
31055       }
31056 
31057     default:
31058       gcc_unreachable ();
31059     }
31060 }
31061 
31062 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31063    CMP_RESULT_MODE is the mode of the comparison result.  */
31064 
31065 void
arm_expand_vcond(rtx * operands,machine_mode cmp_result_mode)31066 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31067 {
31068   rtx mask = gen_reg_rtx (cmp_result_mode);
31069   bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31070 					     operands[4], operands[5], true);
31071   if (inverted)
31072     std::swap (operands[1], operands[2]);
31073   emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31074 			    mask, operands[1], operands[2]));
31075 }
31076 
31077 #define MAX_VECT_LEN 16
31078 
31079 struct expand_vec_perm_d
31080 {
31081   rtx target, op0, op1;
31082   vec_perm_indices perm;
31083   machine_mode vmode;
31084   bool one_vector_p;
31085   bool testing_p;
31086 };
31087 
31088 /* Generate a variable permutation.  */
31089 
31090 static void
arm_expand_vec_perm_1(rtx target,rtx op0,rtx op1,rtx sel)31091 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31092 {
31093   machine_mode vmode = GET_MODE (target);
31094   bool one_vector_p = rtx_equal_p (op0, op1);
31095 
31096   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31097   gcc_checking_assert (GET_MODE (op0) == vmode);
31098   gcc_checking_assert (GET_MODE (op1) == vmode);
31099   gcc_checking_assert (GET_MODE (sel) == vmode);
31100   gcc_checking_assert (TARGET_NEON);
31101 
31102   if (one_vector_p)
31103     {
31104       if (vmode == V8QImode)
31105 	emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31106       else
31107 	emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31108     }
31109   else
31110     {
31111       rtx pair;
31112 
31113       if (vmode == V8QImode)
31114 	{
31115 	  pair = gen_reg_rtx (V16QImode);
31116 	  emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31117 	  pair = gen_lowpart (TImode, pair);
31118 	  emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31119 	}
31120       else
31121 	{
31122 	  pair = gen_reg_rtx (OImode);
31123 	  emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31124 	  emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31125 	}
31126     }
31127 }
31128 
31129 void
arm_expand_vec_perm(rtx target,rtx op0,rtx op1,rtx sel)31130 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31131 {
31132   machine_mode vmode = GET_MODE (target);
31133   unsigned int nelt = GET_MODE_NUNITS (vmode);
31134   bool one_vector_p = rtx_equal_p (op0, op1);
31135   rtx mask;
31136 
31137   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31138      numbering of elements for big-endian, we must reverse the order.  */
31139   gcc_checking_assert (!BYTES_BIG_ENDIAN);
31140 
31141   /* The VTBL instruction does not use a modulo index, so we must take care
31142      of that ourselves.  */
31143   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31144   mask = gen_const_vec_duplicate (vmode, mask);
31145   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31146 
31147   arm_expand_vec_perm_1 (target, op0, op1, sel);
31148 }
31149 
31150 /* Map lane ordering between architectural lane order, and GCC lane order,
31151    taking into account ABI.  See comment above output_move_neon for details.  */
31152 
31153 static int
neon_endian_lane_map(machine_mode mode,int lane)31154 neon_endian_lane_map (machine_mode mode, int lane)
31155 {
31156   if (BYTES_BIG_ENDIAN)
31157   {
31158     int nelems = GET_MODE_NUNITS (mode);
31159     /* Reverse lane order.  */
31160     lane = (nelems - 1 - lane);
31161     /* Reverse D register order, to match ABI.  */
31162     if (GET_MODE_SIZE (mode) == 16)
31163       lane = lane ^ (nelems / 2);
31164   }
31165   return lane;
31166 }
31167 
31168 /* Some permutations index into pairs of vectors, this is a helper function
31169    to map indexes into those pairs of vectors.  */
31170 
31171 static int
neon_pair_endian_lane_map(machine_mode mode,int lane)31172 neon_pair_endian_lane_map (machine_mode mode, int lane)
31173 {
31174   int nelem = GET_MODE_NUNITS (mode);
31175   if (BYTES_BIG_ENDIAN)
31176     lane =
31177       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31178   return lane;
31179 }
31180 
31181 /* Generate or test for an insn that supports a constant permutation.  */
31182 
31183 /* Recognize patterns for the VUZP insns.  */
31184 
31185 static bool
arm_evpc_neon_vuzp(struct expand_vec_perm_d * d)31186 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31187 {
31188   unsigned int i, odd, mask, nelt = d->perm.length ();
31189   rtx out0, out1, in0, in1;
31190   int first_elem;
31191   int swap_nelt;
31192 
31193   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31194     return false;
31195 
31196   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31197      big endian pattern on 64 bit vectors, so we correct for that.  */
31198   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31199     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31200 
31201   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31202 
31203   if (first_elem == neon_endian_lane_map (d->vmode, 0))
31204     odd = 0;
31205   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31206     odd = 1;
31207   else
31208     return false;
31209   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31210 
31211   for (i = 0; i < nelt; i++)
31212     {
31213       unsigned elt =
31214 	(neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31215       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31216 	return false;
31217     }
31218 
31219   /* Success!  */
31220   if (d->testing_p)
31221     return true;
31222 
31223   in0 = d->op0;
31224   in1 = d->op1;
31225   if (swap_nelt != 0)
31226     std::swap (in0, in1);
31227 
31228   out0 = d->target;
31229   out1 = gen_reg_rtx (d->vmode);
31230   if (odd)
31231     std::swap (out0, out1);
31232 
31233   emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31234   return true;
31235 }
31236 
31237 /* Recognize patterns for the VZIP insns.  */
31238 
31239 static bool
arm_evpc_neon_vzip(struct expand_vec_perm_d * d)31240 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31241 {
31242   unsigned int i, high, mask, nelt = d->perm.length ();
31243   rtx out0, out1, in0, in1;
31244   int first_elem;
31245   bool is_swapped;
31246 
31247   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31248     return false;
31249 
31250   is_swapped = BYTES_BIG_ENDIAN;
31251 
31252   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31253 
31254   high = nelt / 2;
31255   if (first_elem == neon_endian_lane_map (d->vmode, high))
31256     ;
31257   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31258     high = 0;
31259   else
31260     return false;
31261   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31262 
31263   for (i = 0; i < nelt / 2; i++)
31264     {
31265       unsigned elt =
31266 	neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31267       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31268 	  != elt)
31269 	return false;
31270       elt =
31271 	neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31272       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31273 	  != elt)
31274 	return false;
31275     }
31276 
31277   /* Success!  */
31278   if (d->testing_p)
31279     return true;
31280 
31281   in0 = d->op0;
31282   in1 = d->op1;
31283   if (is_swapped)
31284     std::swap (in0, in1);
31285 
31286   out0 = d->target;
31287   out1 = gen_reg_rtx (d->vmode);
31288   if (high)
31289     std::swap (out0, out1);
31290 
31291   emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31292   return true;
31293 }
31294 
31295 /* Recognize patterns for the VREV insns.  */
31296 static bool
arm_evpc_neon_vrev(struct expand_vec_perm_d * d)31297 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31298 {
31299   unsigned int i, j, diff, nelt = d->perm.length ();
31300   rtx (*gen) (machine_mode, rtx, rtx);
31301 
31302   if (!d->one_vector_p)
31303     return false;
31304 
31305   diff = d->perm[0];
31306   switch (diff)
31307     {
31308     case 7:
31309        switch (d->vmode)
31310         {
31311          case E_V16QImode:
31312          case E_V8QImode:
31313           gen = gen_neon_vrev64;
31314           break;
31315          default:
31316           return false;
31317         }
31318        break;
31319     case 3:
31320        switch (d->vmode)
31321         {
31322 	case E_V16QImode:
31323 	case E_V8QImode:
31324           gen = gen_neon_vrev32;
31325           break;
31326 	case E_V8HImode:
31327 	case E_V4HImode:
31328 	case E_V8HFmode:
31329 	case E_V4HFmode:
31330           gen = gen_neon_vrev64;
31331           break;
31332 	default:
31333 	  return false;
31334 	}
31335       break;
31336     case 1:
31337       switch (d->vmode)
31338 	{
31339 	case E_V16QImode:
31340 	case E_V8QImode:
31341           gen = gen_neon_vrev16;
31342           break;
31343 	case E_V8HImode:
31344 	case E_V4HImode:
31345           gen = gen_neon_vrev32;
31346           break;
31347 	case E_V4SImode:
31348 	case E_V2SImode:
31349 	case E_V4SFmode:
31350 	case E_V2SFmode:
31351           gen = gen_neon_vrev64;
31352 	  break;
31353         default:
31354 	  return false;
31355 	}
31356       break;
31357     default:
31358       return false;
31359     }
31360 
31361   for (i = 0; i < nelt ; i += diff + 1)
31362     for (j = 0; j <= diff; j += 1)
31363       {
31364 	/* This is guaranteed to be true as the value of diff
31365 	   is 7, 3, 1 and we should have enough elements in the
31366 	   queue to generate this. Getting a vector mask with a
31367 	   value of diff other than these values implies that
31368 	   something is wrong by the time we get here.  */
31369 	gcc_assert (i + j < nelt);
31370 	if (d->perm[i + j] != i + diff - j)
31371 	  return false;
31372       }
31373 
31374   /* Success! */
31375   if (d->testing_p)
31376     return true;
31377 
31378   emit_insn (gen (d->vmode, d->target, d->op0));
31379   return true;
31380 }
31381 
31382 /* Recognize patterns for the VTRN insns.  */
31383 
31384 static bool
arm_evpc_neon_vtrn(struct expand_vec_perm_d * d)31385 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31386 {
31387   unsigned int i, odd, mask, nelt = d->perm.length ();
31388   rtx out0, out1, in0, in1;
31389 
31390   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31391     return false;
31392 
31393   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
31394   if (d->perm[0] == 0)
31395     odd = 0;
31396   else if (d->perm[0] == 1)
31397     odd = 1;
31398   else
31399     return false;
31400   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31401 
31402   for (i = 0; i < nelt; i += 2)
31403     {
31404       if (d->perm[i] != i + odd)
31405 	return false;
31406       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31407 	return false;
31408     }
31409 
31410   /* Success!  */
31411   if (d->testing_p)
31412     return true;
31413 
31414   in0 = d->op0;
31415   in1 = d->op1;
31416   if (BYTES_BIG_ENDIAN)
31417     {
31418       std::swap (in0, in1);
31419       odd = !odd;
31420     }
31421 
31422   out0 = d->target;
31423   out1 = gen_reg_rtx (d->vmode);
31424   if (odd)
31425     std::swap (out0, out1);
31426 
31427   emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31428   return true;
31429 }
31430 
31431 /* Recognize patterns for the VEXT insns.  */
31432 
31433 static bool
arm_evpc_neon_vext(struct expand_vec_perm_d * d)31434 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31435 {
31436   unsigned int i, nelt = d->perm.length ();
31437   rtx offset;
31438 
31439   unsigned int location;
31440 
31441   unsigned int next  = d->perm[0] + 1;
31442 
31443   /* TODO: Handle GCC's numbering of elements for big-endian.  */
31444   if (BYTES_BIG_ENDIAN)
31445     return false;
31446 
31447   /* Check if the extracted indexes are increasing by one.  */
31448   for (i = 1; i < nelt; next++, i++)
31449     {
31450       /* If we hit the most significant element of the 2nd vector in
31451 	 the previous iteration, no need to test further.  */
31452       if (next == 2 * nelt)
31453 	return false;
31454 
31455       /* If we are operating on only one vector: it could be a
31456 	 rotation.  If there are only two elements of size < 64, let
31457 	 arm_evpc_neon_vrev catch it.  */
31458       if (d->one_vector_p && (next == nelt))
31459 	{
31460 	  if ((nelt == 2) && (d->vmode != V2DImode))
31461 	    return false;
31462 	  else
31463 	    next = 0;
31464 	}
31465 
31466       if (d->perm[i] != next)
31467 	return false;
31468     }
31469 
31470   location = d->perm[0];
31471 
31472   /* Success! */
31473   if (d->testing_p)
31474     return true;
31475 
31476   offset = GEN_INT (location);
31477 
31478   if(d->vmode == E_DImode)
31479     return false;
31480 
31481   emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
31482   return true;
31483 }
31484 
31485 /* The NEON VTBL instruction is a fully variable permuation that's even
31486    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
31487    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
31488    can do slightly better by expanding this as a constant where we don't
31489    have to apply a mask.  */
31490 
31491 static bool
arm_evpc_neon_vtbl(struct expand_vec_perm_d * d)31492 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31493 {
31494   rtx rperm[MAX_VECT_LEN], sel;
31495   machine_mode vmode = d->vmode;
31496   unsigned int i, nelt = d->perm.length ();
31497 
31498   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31499      numbering of elements for big-endian, we must reverse the order.  */
31500   if (BYTES_BIG_ENDIAN)
31501     return false;
31502 
31503   if (d->testing_p)
31504     return true;
31505 
31506   /* Generic code will try constant permutation twice.  Once with the
31507      original mode and again with the elements lowered to QImode.
31508      So wait and don't do the selector expansion ourselves.  */
31509   if (vmode != V8QImode && vmode != V16QImode)
31510     return false;
31511 
31512   for (i = 0; i < nelt; ++i)
31513     rperm[i] = GEN_INT (d->perm[i]);
31514   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31515   sel = force_reg (vmode, sel);
31516 
31517   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31518   return true;
31519 }
31520 
31521 static bool
arm_expand_vec_perm_const_1(struct expand_vec_perm_d * d)31522 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31523 {
31524   /* Check if the input mask matches vext before reordering the
31525      operands.  */
31526   if (TARGET_NEON)
31527     if (arm_evpc_neon_vext (d))
31528       return true;
31529 
31530   /* The pattern matching functions above are written to look for a small
31531      number to begin the sequence (0, 1, N/2).  If we begin with an index
31532      from the second operand, we can swap the operands.  */
31533   unsigned int nelt = d->perm.length ();
31534   if (d->perm[0] >= nelt)
31535     {
31536       d->perm.rotate_inputs (1);
31537       std::swap (d->op0, d->op1);
31538     }
31539 
31540   if (TARGET_NEON)
31541     {
31542       if (arm_evpc_neon_vuzp (d))
31543 	return true;
31544       if (arm_evpc_neon_vzip (d))
31545 	return true;
31546       if (arm_evpc_neon_vrev (d))
31547 	return true;
31548       if (arm_evpc_neon_vtrn (d))
31549 	return true;
31550       return arm_evpc_neon_vtbl (d);
31551     }
31552   return false;
31553 }
31554 
31555 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
31556 
31557 static bool
arm_vectorize_vec_perm_const(machine_mode vmode,rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)31558 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
31559 			      const vec_perm_indices &sel)
31560 {
31561   struct expand_vec_perm_d d;
31562   int i, nelt, which;
31563 
31564   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
31565     return false;
31566 
31567   d.target = target;
31568   if (op0)
31569     {
31570       rtx nop0 = force_reg (vmode, op0);
31571       if (op0 == op1)
31572         op1 = nop0;
31573       op0 = nop0;
31574     }
31575   if (op1)
31576     op1 = force_reg (vmode, op1);
31577   d.op0 = op0;
31578   d.op1 = op1;
31579 
31580   d.vmode = vmode;
31581   gcc_assert (VECTOR_MODE_P (d.vmode));
31582   d.testing_p = !target;
31583 
31584   nelt = GET_MODE_NUNITS (d.vmode);
31585   for (i = which = 0; i < nelt; ++i)
31586     {
31587       int ei = sel[i] & (2 * nelt - 1);
31588       which |= (ei < nelt ? 1 : 2);
31589     }
31590 
31591   switch (which)
31592     {
31593     default:
31594       gcc_unreachable();
31595 
31596     case 3:
31597       d.one_vector_p = false;
31598       if (d.testing_p || !rtx_equal_p (op0, op1))
31599 	break;
31600 
31601       /* The elements of PERM do not suggest that only the first operand
31602 	 is used, but both operands are identical.  Allow easier matching
31603 	 of the permutation by folding the permutation into the single
31604 	 input vector.  */
31605       /* FALLTHRU */
31606     case 2:
31607       d.op0 = op1;
31608       d.one_vector_p = true;
31609       break;
31610 
31611     case 1:
31612       d.op1 = op0;
31613       d.one_vector_p = true;
31614       break;
31615     }
31616 
31617   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
31618 
31619   if (!d.testing_p)
31620     return arm_expand_vec_perm_const_1 (&d);
31621 
31622   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31623   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31624   if (!d.one_vector_p)
31625     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31626 
31627   start_sequence ();
31628   bool ret = arm_expand_vec_perm_const_1 (&d);
31629   end_sequence ();
31630 
31631   return ret;
31632 }
31633 
31634 bool
arm_autoinc_modes_ok_p(machine_mode mode,enum arm_auto_incmodes code)31635 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31636 {
31637   /* If we are soft float and we do not have ldrd
31638      then all auto increment forms are ok.  */
31639   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31640     return true;
31641 
31642   switch (code)
31643     {
31644       /* Post increment and Pre Decrement are supported for all
31645 	 instruction forms except for vector forms.  */
31646     case ARM_POST_INC:
31647     case ARM_PRE_DEC:
31648       if (VECTOR_MODE_P (mode))
31649 	{
31650 	  if (code != ARM_PRE_DEC)
31651 	    return true;
31652 	  else
31653 	    return false;
31654 	}
31655 
31656       return true;
31657 
31658     case ARM_POST_DEC:
31659     case ARM_PRE_INC:
31660       /* Without LDRD and mode size greater than
31661 	 word size, there is no point in auto-incrementing
31662          because ldm and stm will not have these forms.  */
31663       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31664 	return false;
31665 
31666       /* Vector and floating point modes do not support
31667 	 these auto increment forms.  */
31668       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31669 	return false;
31670 
31671       return true;
31672 
31673     default:
31674       return false;
31675 
31676     }
31677 
31678   return false;
31679 }
31680 
31681 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31682    on ARM, since we know that shifts by negative amounts are no-ops.
31683    Additionally, the default expansion code is not available or suitable
31684    for post-reload insn splits (this can occur when the register allocator
31685    chooses not to do a shift in NEON).
31686 
31687    This function is used in both initial expand and post-reload splits, and
31688    handles all kinds of 64-bit shifts.
31689 
31690    Input requirements:
31691     - It is safe for the input and output to be the same register, but
31692       early-clobber rules apply for the shift amount and scratch registers.
31693     - Shift by register requires both scratch registers.  In all other cases
31694       the scratch registers may be NULL.
31695     - Ashiftrt by a register also clobbers the CC register.  */
31696 void
arm_emit_coreregs_64bit_shift(enum rtx_code code,rtx out,rtx in,rtx amount,rtx scratch1,rtx scratch2)31697 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31698 			       rtx amount, rtx scratch1, rtx scratch2)
31699 {
31700   rtx out_high = gen_highpart (SImode, out);
31701   rtx out_low = gen_lowpart (SImode, out);
31702   rtx in_high = gen_highpart (SImode, in);
31703   rtx in_low = gen_lowpart (SImode, in);
31704 
31705   /* Terminology:
31706 	in = the register pair containing the input value.
31707 	out = the destination register pair.
31708 	up = the high- or low-part of each pair.
31709 	down = the opposite part to "up".
31710      In a shift, we can consider bits to shift from "up"-stream to
31711      "down"-stream, so in a left-shift "up" is the low-part and "down"
31712      is the high-part of each register pair.  */
31713 
31714   rtx out_up   = code == ASHIFT ? out_low : out_high;
31715   rtx out_down = code == ASHIFT ? out_high : out_low;
31716   rtx in_up   = code == ASHIFT ? in_low : in_high;
31717   rtx in_down = code == ASHIFT ? in_high : in_low;
31718 
31719   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31720   gcc_assert (out
31721 	      && (REG_P (out) || SUBREG_P (out))
31722 	      && GET_MODE (out) == DImode);
31723   gcc_assert (in
31724 	      && (REG_P (in) || SUBREG_P (in))
31725 	      && GET_MODE (in) == DImode);
31726   gcc_assert (amount
31727 	      && (((REG_P (amount) || SUBREG_P (amount))
31728 		   && GET_MODE (amount) == SImode)
31729 		  || CONST_INT_P (amount)));
31730   gcc_assert (scratch1 == NULL
31731 	      || (GET_CODE (scratch1) == SCRATCH)
31732 	      || (GET_MODE (scratch1) == SImode
31733 		  && REG_P (scratch1)));
31734   gcc_assert (scratch2 == NULL
31735 	      || (GET_CODE (scratch2) == SCRATCH)
31736 	      || (GET_MODE (scratch2) == SImode
31737 		  && REG_P (scratch2)));
31738   gcc_assert (!REG_P (out) || !REG_P (amount)
31739 	      || !HARD_REGISTER_P (out)
31740 	      || (REGNO (out) != REGNO (amount)
31741 		  && REGNO (out) + 1 != REGNO (amount)));
31742 
31743   /* Macros to make following code more readable.  */
31744   #define SUB_32(DEST,SRC) \
31745 	    gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31746   #define RSB_32(DEST,SRC) \
31747 	    gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31748   #define SUB_S_32(DEST,SRC) \
31749 	    gen_addsi3_compare0 ((DEST), (SRC), \
31750 				 GEN_INT (-32))
31751   #define SET(DEST,SRC) \
31752 	    gen_rtx_SET ((DEST), (SRC))
31753   #define SHIFT(CODE,SRC,AMOUNT) \
31754 	    gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31755   #define LSHIFT(CODE,SRC,AMOUNT) \
31756 	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31757 			    SImode, (SRC), (AMOUNT))
31758   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31759 	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31760 			    SImode, (SRC), (AMOUNT))
31761   #define ORR(A,B) \
31762 	    gen_rtx_IOR (SImode, (A), (B))
31763   #define BRANCH(COND,LABEL) \
31764 	    gen_arm_cond_branch ((LABEL), \
31765 				 gen_rtx_ ## COND (CCmode, cc_reg, \
31766 						   const0_rtx), \
31767 				 cc_reg)
31768 
31769   /* Shifts by register and shifts by constant are handled separately.  */
31770   if (CONST_INT_P (amount))
31771     {
31772       /* We have a shift-by-constant.  */
31773 
31774       /* First, handle out-of-range shift amounts.
31775 	 In both cases we try to match the result an ARM instruction in a
31776 	 shift-by-register would give.  This helps reduce execution
31777 	 differences between optimization levels, but it won't stop other
31778          parts of the compiler doing different things.  This is "undefined
31779          behavior, in any case.  */
31780       if (INTVAL (amount) <= 0)
31781 	emit_insn (gen_movdi (out, in));
31782       else if (INTVAL (amount) >= 64)
31783 	{
31784 	  if (code == ASHIFTRT)
31785 	    {
31786 	      rtx const31_rtx = GEN_INT (31);
31787 	      emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31788 	      emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31789 	    }
31790 	  else
31791 	    emit_insn (gen_movdi (out, const0_rtx));
31792 	}
31793 
31794       /* Now handle valid shifts. */
31795       else if (INTVAL (amount) < 32)
31796 	{
31797 	  /* Shifts by a constant less than 32.  */
31798 	  rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31799 
31800 	  /* Clearing the out register in DImode first avoids lots
31801 	     of spilling and results in less stack usage.
31802 	     Later this redundant insn is completely removed.
31803 	     Do that only if "in" and "out" are different registers.  */
31804 	  if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
31805 	    emit_insn (SET (out, const0_rtx));
31806 	  emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31807 	  emit_insn (SET (out_down,
31808 			  ORR (REV_LSHIFT (code, in_up, reverse_amount),
31809 			       out_down)));
31810 	  emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31811 	}
31812       else
31813 	{
31814 	  /* Shifts by a constant greater than 31.  */
31815 	  rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31816 
31817 	  if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
31818 	    emit_insn (SET (out, const0_rtx));
31819 	  emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31820 	  if (code == ASHIFTRT)
31821 	    emit_insn (gen_ashrsi3 (out_up, in_up,
31822 				    GEN_INT (31)));
31823 	  else
31824 	    emit_insn (SET (out_up, const0_rtx));
31825 	}
31826     }
31827   else
31828     {
31829       /* We have a shift-by-register.  */
31830       rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
31831 
31832       /* This alternative requires the scratch registers.  */
31833       gcc_assert (scratch1 && REG_P (scratch1));
31834       gcc_assert (scratch2 && REG_P (scratch2));
31835 
31836       /* We will need the values "amount-32" and "32-amount" later.
31837          Swapping them around now allows the later code to be more general. */
31838       switch (code)
31839 	{
31840 	case ASHIFT:
31841 	  emit_insn (SUB_32 (scratch1, amount));
31842 	  emit_insn (RSB_32 (scratch2, amount));
31843 	  break;
31844 	case ASHIFTRT:
31845 	  emit_insn (RSB_32 (scratch1, amount));
31846 	  /* Also set CC = amount > 32.  */
31847 	  emit_insn (SUB_S_32 (scratch2, amount));
31848 	  break;
31849 	case LSHIFTRT:
31850 	  emit_insn (RSB_32 (scratch1, amount));
31851 	  emit_insn (SUB_32 (scratch2, amount));
31852 	  break;
31853 	default:
31854 	  gcc_unreachable ();
31855 	}
31856 
31857       /* Emit code like this:
31858 
31859 	 arithmetic-left:
31860 	    out_down = in_down << amount;
31861 	    out_down = (in_up << (amount - 32)) | out_down;
31862 	    out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31863 	    out_up = in_up << amount;
31864 
31865 	 arithmetic-right:
31866 	    out_down = in_down >> amount;
31867 	    out_down = (in_up << (32 - amount)) | out_down;
31868 	    if (amount < 32)
31869 	      out_down = ((signed)in_up >> (amount - 32)) | out_down;
31870 	    out_up = in_up << amount;
31871 
31872 	 logical-right:
31873 	    out_down = in_down >> amount;
31874 	    out_down = (in_up << (32 - amount)) | out_down;
31875 	    if (amount < 32)
31876 	      out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31877 	    out_up = in_up << amount;
31878 
31879 	  The ARM and Thumb2 variants are the same but implemented slightly
31880 	  differently.  If this were only called during expand we could just
31881 	  use the Thumb2 case and let combine do the right thing, but this
31882 	  can also be called from post-reload splitters.  */
31883 
31884       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31885 
31886       if (!TARGET_THUMB2)
31887 	{
31888 	  /* Emit code for ARM mode.  */
31889 	  emit_insn (SET (out_down,
31890 			  ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31891 	  if (code == ASHIFTRT)
31892 	    {
31893 	      rtx_code_label *done_label = gen_label_rtx ();
31894 	      emit_jump_insn (BRANCH (LT, done_label));
31895 	      emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31896 					     out_down)));
31897 	      emit_label (done_label);
31898 	    }
31899 	  else
31900 	    emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31901 					   out_down)));
31902 	}
31903       else
31904 	{
31905 	  /* Emit code for Thumb2 mode.
31906 	     Thumb2 can't do shift and or in one insn.  */
31907 	  emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31908 	  emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31909 
31910 	  if (code == ASHIFTRT)
31911 	    {
31912 	      rtx_code_label *done_label = gen_label_rtx ();
31913 	      emit_jump_insn (BRANCH (LT, done_label));
31914 	      emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31915 	      emit_insn (SET (out_down, ORR (out_down, scratch2)));
31916 	      emit_label (done_label);
31917 	    }
31918 	  else
31919 	    {
31920 	      emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31921 	      emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31922 	    }
31923 	}
31924 
31925       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31926     }
31927 
31928   #undef SUB_32
31929   #undef RSB_32
31930   #undef SUB_S_32
31931   #undef SET
31932   #undef SHIFT
31933   #undef LSHIFT
31934   #undef REV_LSHIFT
31935   #undef ORR
31936   #undef BRANCH
31937 }
31938 
31939 /* Returns true if the pattern is a valid symbolic address, which is either a
31940    symbol_ref or (symbol_ref + addend).
31941 
31942    According to the ARM ELF ABI, the initial addend of REL-type relocations
31943    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
31944    literal field of the instruction as a 16-bit signed value in the range
31945    -32768 <= A < 32768.
31946 
31947    In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
31948    unsigned range of 0 <= A < 256 as described in the AAELF32
31949    relocation handling documentation: REL-type relocations are encoded
31950    as unsigned in this case.  */
31951 
31952 bool
arm_valid_symbolic_address_p(rtx addr)31953 arm_valid_symbolic_address_p (rtx addr)
31954 {
31955   rtx xop0, xop1 = NULL_RTX;
31956   rtx tmp = addr;
31957 
31958   if (target_word_relocations)
31959     return false;
31960 
31961   if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
31962     return true;
31963 
31964   /* (const (plus: symbol_ref const_int))  */
31965   if (GET_CODE (addr) == CONST)
31966     tmp = XEXP (addr, 0);
31967 
31968   if (GET_CODE (tmp) == PLUS)
31969     {
31970       xop0 = XEXP (tmp, 0);
31971       xop1 = XEXP (tmp, 1);
31972 
31973       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
31974 	{
31975 	  if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
31976 	    return IN_RANGE (INTVAL (xop1), 0, 0xff);
31977 	  else
31978 	    return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
31979 	}
31980     }
31981 
31982   return false;
31983 }
31984 
31985 /* Returns true if a valid comparison operation and makes
31986    the operands in a form that is valid.  */
31987 bool
arm_validize_comparison(rtx * comparison,rtx * op1,rtx * op2)31988 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31989 {
31990   enum rtx_code code = GET_CODE (*comparison);
31991   int code_int;
31992   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31993     ? GET_MODE (*op2) : GET_MODE (*op1);
31994 
31995   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31996 
31997   if (code == UNEQ || code == LTGT)
31998     return false;
31999 
32000   code_int = (int)code;
32001   arm_canonicalize_comparison (&code_int, op1, op2, 0);
32002   PUT_CODE (*comparison, (enum rtx_code)code_int);
32003 
32004   switch (mode)
32005     {
32006     case E_SImode:
32007       if (!arm_add_operand (*op1, mode))
32008 	*op1 = force_reg (mode, *op1);
32009       if (!arm_add_operand (*op2, mode))
32010 	*op2 = force_reg (mode, *op2);
32011       return true;
32012 
32013     case E_DImode:
32014       /* gen_compare_reg() will sort out any invalid operands.  */
32015       return true;
32016 
32017     case E_HFmode:
32018       if (!TARGET_VFP_FP16INST)
32019 	break;
32020       /* FP16 comparisons are done in SF mode.  */
32021       mode = SFmode;
32022       *op1 = convert_to_mode (mode, *op1, 1);
32023       *op2 = convert_to_mode (mode, *op2, 1);
32024       /* Fall through.  */
32025     case E_SFmode:
32026     case E_DFmode:
32027       if (!vfp_compare_operand (*op1, mode))
32028 	*op1 = force_reg (mode, *op1);
32029       if (!vfp_compare_operand (*op2, mode))
32030 	*op2 = force_reg (mode, *op2);
32031       return true;
32032     default:
32033       break;
32034     }
32035 
32036   return false;
32037 
32038 }
32039 
32040 /* Maximum number of instructions to set block of memory.  */
32041 static int
arm_block_set_max_insns(void)32042 arm_block_set_max_insns (void)
32043 {
32044   if (optimize_function_for_size_p (cfun))
32045     return 4;
32046   else
32047     return current_tune->max_insns_inline_memset;
32048 }
32049 
32050 /* Return TRUE if it's profitable to set block of memory for
32051    non-vectorized case.  VAL is the value to set the memory
32052    with.  LENGTH is the number of bytes to set.  ALIGN is the
32053    alignment of the destination memory in bytes.  UNALIGNED_P
32054    is TRUE if we can only set the memory with instructions
32055    meeting alignment requirements.  USE_STRD_P is TRUE if we
32056    can use strd to set the memory.  */
32057 static bool
arm_block_set_non_vect_profit_p(rtx val,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT align,bool unaligned_p,bool use_strd_p)32058 arm_block_set_non_vect_profit_p (rtx val,
32059 				 unsigned HOST_WIDE_INT length,
32060 				 unsigned HOST_WIDE_INT align,
32061 				 bool unaligned_p, bool use_strd_p)
32062 {
32063   int num = 0;
32064   /* For leftovers in bytes of 0-7, we can set the memory block using
32065      strb/strh/str with minimum instruction number.  */
32066   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32067 
32068   if (unaligned_p)
32069     {
32070       num = arm_const_inline_cost (SET, val);
32071       num += length / align + length % align;
32072     }
32073   else if (use_strd_p)
32074     {
32075       num = arm_const_double_inline_cost (val);
32076       num += (length >> 3) + leftover[length & 7];
32077     }
32078   else
32079     {
32080       num = arm_const_inline_cost (SET, val);
32081       num += (length >> 2) + leftover[length & 3];
32082     }
32083 
32084   /* We may be able to combine last pair STRH/STRB into a single STR
32085      by shifting one byte back.  */
32086   if (unaligned_access && length > 3 && (length & 3) == 3)
32087     num--;
32088 
32089   return (num <= arm_block_set_max_insns ());
32090 }
32091 
32092 /* Return TRUE if it's profitable to set block of memory for
32093    vectorized case.  LENGTH is the number of bytes to set.
32094    ALIGN is the alignment of destination memory in bytes.
32095    MODE is the vector mode used to set the memory.  */
32096 static bool
arm_block_set_vect_profit_p(unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT align,machine_mode mode)32097 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32098 			     unsigned HOST_WIDE_INT align,
32099 			     machine_mode mode)
32100 {
32101   int num;
32102   bool unaligned_p = ((align & 3) != 0);
32103   unsigned int nelt = GET_MODE_NUNITS (mode);
32104 
32105   /* Instruction loading constant value.  */
32106   num = 1;
32107   /* Instructions storing the memory.  */
32108   num += (length + nelt - 1) / nelt;
32109   /* Instructions adjusting the address expression.  Only need to
32110      adjust address expression if it's 4 bytes aligned and bytes
32111      leftover can only be stored by mis-aligned store instruction.  */
32112   if (!unaligned_p && (length & 3) != 0)
32113     num++;
32114 
32115   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
32116   if (!unaligned_p && mode == V16QImode)
32117     num--;
32118 
32119   return (num <= arm_block_set_max_insns ());
32120 }
32121 
32122 /* Set a block of memory using vectorization instructions for the
32123    unaligned case.  We fill the first LENGTH bytes of the memory
32124    area starting from DSTBASE with byte constant VALUE.  ALIGN is
32125    the alignment requirement of memory.  Return TRUE if succeeded.  */
32126 static bool
arm_block_set_unaligned_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)32127 arm_block_set_unaligned_vect (rtx dstbase,
32128 			      unsigned HOST_WIDE_INT length,
32129 			      unsigned HOST_WIDE_INT value,
32130 			      unsigned HOST_WIDE_INT align)
32131 {
32132   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32133   rtx dst, mem;
32134   rtx val_vec, reg;
32135   rtx (*gen_func) (rtx, rtx);
32136   machine_mode mode;
32137   unsigned HOST_WIDE_INT v = value;
32138   unsigned int offset = 0;
32139   gcc_assert ((align & 0x3) != 0);
32140   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32141   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32142   if (length >= nelt_v16)
32143     {
32144       mode = V16QImode;
32145       gen_func = gen_movmisalignv16qi;
32146     }
32147   else
32148     {
32149       mode = V8QImode;
32150       gen_func = gen_movmisalignv8qi;
32151     }
32152   nelt_mode = GET_MODE_NUNITS (mode);
32153   gcc_assert (length >= nelt_mode);
32154   /* Skip if it isn't profitable.  */
32155   if (!arm_block_set_vect_profit_p (length, align, mode))
32156     return false;
32157 
32158   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32159   mem = adjust_automodify_address (dstbase, mode, dst, offset);
32160 
32161   v = sext_hwi (v, BITS_PER_WORD);
32162 
32163   reg = gen_reg_rtx (mode);
32164   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32165   /* Emit instruction loading the constant value.  */
32166   emit_move_insn (reg, val_vec);
32167 
32168   /* Handle nelt_mode bytes in a vector.  */
32169   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32170     {
32171       emit_insn ((*gen_func) (mem, reg));
32172       if (i + 2 * nelt_mode <= length)
32173 	{
32174 	  emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32175 	  offset += nelt_mode;
32176 	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
32177 	}
32178     }
32179 
32180   /* If there are not less than nelt_v8 bytes leftover, we must be in
32181      V16QI mode.  */
32182   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32183 
32184   /* Handle (8, 16) bytes leftover.  */
32185   if (i + nelt_v8 < length)
32186     {
32187       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32188       offset += length - i;
32189       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32190 
32191       /* We are shifting bytes back, set the alignment accordingly.  */
32192       if ((length & 1) != 0 && align >= 2)
32193 	set_mem_align (mem, BITS_PER_UNIT);
32194 
32195       emit_insn (gen_movmisalignv16qi (mem, reg));
32196     }
32197   /* Handle (0, 8] bytes leftover.  */
32198   else if (i < length && i + nelt_v8 >= length)
32199     {
32200       if (mode == V16QImode)
32201 	reg = gen_lowpart (V8QImode, reg);
32202 
32203       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32204 					      + (nelt_mode - nelt_v8))));
32205       offset += (length - i) + (nelt_mode - nelt_v8);
32206       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32207 
32208       /* We are shifting bytes back, set the alignment accordingly.  */
32209       if ((length & 1) != 0 && align >= 2)
32210 	set_mem_align (mem, BITS_PER_UNIT);
32211 
32212       emit_insn (gen_movmisalignv8qi (mem, reg));
32213     }
32214 
32215   return true;
32216 }
32217 
32218 /* Set a block of memory using vectorization instructions for the
32219    aligned case.  We fill the first LENGTH bytes of the memory area
32220    starting from DSTBASE with byte constant VALUE.  ALIGN is the
32221    alignment requirement of memory.  Return TRUE if succeeded.  */
32222 static bool
arm_block_set_aligned_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)32223 arm_block_set_aligned_vect (rtx dstbase,
32224 			    unsigned HOST_WIDE_INT length,
32225 			    unsigned HOST_WIDE_INT value,
32226 			    unsigned HOST_WIDE_INT align)
32227 {
32228   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32229   rtx dst, addr, mem;
32230   rtx val_vec, reg;
32231   machine_mode mode;
32232   unsigned int offset = 0;
32233 
32234   gcc_assert ((align & 0x3) == 0);
32235   nelt_v8 = GET_MODE_NUNITS (V8QImode);
32236   nelt_v16 = GET_MODE_NUNITS (V16QImode);
32237   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32238     mode = V16QImode;
32239   else
32240     mode = V8QImode;
32241 
32242   nelt_mode = GET_MODE_NUNITS (mode);
32243   gcc_assert (length >= nelt_mode);
32244   /* Skip if it isn't profitable.  */
32245   if (!arm_block_set_vect_profit_p (length, align, mode))
32246     return false;
32247 
32248   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32249 
32250   reg = gen_reg_rtx (mode);
32251   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32252   /* Emit instruction loading the constant value.  */
32253   emit_move_insn (reg, val_vec);
32254 
32255   i = 0;
32256   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
32257   if (mode == V16QImode)
32258     {
32259       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32260       emit_insn (gen_movmisalignv16qi (mem, reg));
32261       i += nelt_mode;
32262       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
32263       if (i + nelt_v8 < length && i + nelt_v16 > length)
32264 	{
32265 	  emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32266 	  offset += length - nelt_mode;
32267 	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
32268 	  /* We are shifting bytes back, set the alignment accordingly.  */
32269 	  if ((length & 0x3) == 0)
32270 	    set_mem_align (mem, BITS_PER_UNIT * 4);
32271 	  else if ((length & 0x1) == 0)
32272 	    set_mem_align (mem, BITS_PER_UNIT * 2);
32273 	  else
32274 	    set_mem_align (mem, BITS_PER_UNIT);
32275 
32276 	  emit_insn (gen_movmisalignv16qi (mem, reg));
32277 	  return true;
32278 	}
32279       /* Fall through for bytes leftover.  */
32280       mode = V8QImode;
32281       nelt_mode = GET_MODE_NUNITS (mode);
32282       reg = gen_lowpart (V8QImode, reg);
32283     }
32284 
32285   /* Handle 8 bytes in a vector.  */
32286   for (; (i + nelt_mode <= length); i += nelt_mode)
32287     {
32288       addr = plus_constant (Pmode, dst, i);
32289       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32290       if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32291 	emit_move_insn (mem, reg);
32292       else
32293 	emit_insn (gen_unaligned_storev8qi (mem, reg));
32294     }
32295 
32296   /* Handle single word leftover by shifting 4 bytes back.  We can
32297      use aligned access for this case.  */
32298   if (i + UNITS_PER_WORD == length)
32299     {
32300       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32301       offset += i - UNITS_PER_WORD;
32302       mem = adjust_automodify_address (dstbase, mode, addr, offset);
32303       /* We are shifting 4 bytes back, set the alignment accordingly.  */
32304       if (align > UNITS_PER_WORD)
32305 	set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32306 
32307       emit_insn (gen_unaligned_storev8qi (mem, reg));
32308     }
32309   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32310      We have to use unaligned access for this case.  */
32311   else if (i < length)
32312     {
32313       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32314       offset += length - nelt_mode;
32315       mem = adjust_automodify_address (dstbase, mode, dst, offset);
32316       /* We are shifting bytes back, set the alignment accordingly.  */
32317       if ((length & 1) == 0)
32318 	set_mem_align (mem, BITS_PER_UNIT * 2);
32319       else
32320 	set_mem_align (mem, BITS_PER_UNIT);
32321 
32322       emit_insn (gen_movmisalignv8qi (mem, reg));
32323     }
32324 
32325   return true;
32326 }
32327 
32328 /* Set a block of memory using plain strh/strb instructions, only
32329    using instructions allowed by ALIGN on processor.  We fill the
32330    first LENGTH bytes of the memory area starting from DSTBASE
32331    with byte constant VALUE.  ALIGN is the alignment requirement
32332    of memory.  */
32333 static bool
arm_block_set_unaligned_non_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)32334 arm_block_set_unaligned_non_vect (rtx dstbase,
32335 				  unsigned HOST_WIDE_INT length,
32336 				  unsigned HOST_WIDE_INT value,
32337 				  unsigned HOST_WIDE_INT align)
32338 {
32339   unsigned int i;
32340   rtx dst, addr, mem;
32341   rtx val_exp, val_reg, reg;
32342   machine_mode mode;
32343   HOST_WIDE_INT v = value;
32344 
32345   gcc_assert (align == 1 || align == 2);
32346 
32347   if (align == 2)
32348     v |= (value << BITS_PER_UNIT);
32349 
32350   v = sext_hwi (v, BITS_PER_WORD);
32351   val_exp = GEN_INT (v);
32352   /* Skip if it isn't profitable.  */
32353   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32354 					align, true, false))
32355     return false;
32356 
32357   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32358   mode = (align == 2 ? HImode : QImode);
32359   val_reg = force_reg (SImode, val_exp);
32360   reg = gen_lowpart (mode, val_reg);
32361 
32362   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32363     {
32364       addr = plus_constant (Pmode, dst, i);
32365       mem = adjust_automodify_address (dstbase, mode, addr, i);
32366       emit_move_insn (mem, reg);
32367     }
32368 
32369   /* Handle single byte leftover.  */
32370   if (i + 1 == length)
32371     {
32372       reg = gen_lowpart (QImode, val_reg);
32373       addr = plus_constant (Pmode, dst, i);
32374       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32375       emit_move_insn (mem, reg);
32376       i++;
32377     }
32378 
32379   gcc_assert (i == length);
32380   return true;
32381 }
32382 
32383 /* Set a block of memory using plain strd/str/strh/strb instructions,
32384    to permit unaligned copies on processors which support unaligned
32385    semantics for those instructions.  We fill the first LENGTH bytes
32386    of the memory area starting from DSTBASE with byte constant VALUE.
32387    ALIGN is the alignment requirement of memory.  */
32388 static bool
arm_block_set_aligned_non_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)32389 arm_block_set_aligned_non_vect (rtx dstbase,
32390 				unsigned HOST_WIDE_INT length,
32391 				unsigned HOST_WIDE_INT value,
32392 				unsigned HOST_WIDE_INT align)
32393 {
32394   unsigned int i;
32395   rtx dst, addr, mem;
32396   rtx val_exp, val_reg, reg;
32397   unsigned HOST_WIDE_INT v;
32398   bool use_strd_p;
32399 
32400   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32401 		&& TARGET_LDRD && current_tune->prefer_ldrd_strd);
32402 
32403   v = (value | (value << 8) | (value << 16) | (value << 24));
32404   if (length < UNITS_PER_WORD)
32405     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32406 
32407   if (use_strd_p)
32408     v |= (v << BITS_PER_WORD);
32409   else
32410     v = sext_hwi (v, BITS_PER_WORD);
32411 
32412   val_exp = GEN_INT (v);
32413   /* Skip if it isn't profitable.  */
32414   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32415 					align, false, use_strd_p))
32416     {
32417       if (!use_strd_p)
32418 	return false;
32419 
32420       /* Try without strd.  */
32421       v = (v >> BITS_PER_WORD);
32422       v = sext_hwi (v, BITS_PER_WORD);
32423       val_exp = GEN_INT (v);
32424       use_strd_p = false;
32425       if (!arm_block_set_non_vect_profit_p (val_exp, length,
32426 					    align, false, use_strd_p))
32427 	return false;
32428     }
32429 
32430   i = 0;
32431   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32432   /* Handle double words using strd if possible.  */
32433   if (use_strd_p)
32434     {
32435       val_reg = force_reg (DImode, val_exp);
32436       reg = val_reg;
32437       for (; (i + 8 <= length); i += 8)
32438 	{
32439 	  addr = plus_constant (Pmode, dst, i);
32440 	  mem = adjust_automodify_address (dstbase, DImode, addr, i);
32441 	  if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32442 	    emit_move_insn (mem, reg);
32443 	  else
32444 	    emit_insn (gen_unaligned_storedi (mem, reg));
32445 	}
32446     }
32447   else
32448     val_reg = force_reg (SImode, val_exp);
32449 
32450   /* Handle words.  */
32451   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32452   for (; (i + 4 <= length); i += 4)
32453     {
32454       addr = plus_constant (Pmode, dst, i);
32455       mem = adjust_automodify_address (dstbase, SImode, addr, i);
32456       if ((align & 3) == 0)
32457 	emit_move_insn (mem, reg);
32458       else
32459 	emit_insn (gen_unaligned_storesi (mem, reg));
32460     }
32461 
32462   /* Merge last pair of STRH and STRB into a STR if possible.  */
32463   if (unaligned_access && i > 0 && (i + 3) == length)
32464     {
32465       addr = plus_constant (Pmode, dst, i - 1);
32466       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32467       /* We are shifting one byte back, set the alignment accordingly.  */
32468       if ((align & 1) == 0)
32469 	set_mem_align (mem, BITS_PER_UNIT);
32470 
32471       /* Most likely this is an unaligned access, and we can't tell at
32472 	 compilation time.  */
32473       emit_insn (gen_unaligned_storesi (mem, reg));
32474       return true;
32475     }
32476 
32477   /* Handle half word leftover.  */
32478   if (i + 2 <= length)
32479     {
32480       reg = gen_lowpart (HImode, val_reg);
32481       addr = plus_constant (Pmode, dst, i);
32482       mem = adjust_automodify_address (dstbase, HImode, addr, i);
32483       if ((align & 1) == 0)
32484 	emit_move_insn (mem, reg);
32485       else
32486 	emit_insn (gen_unaligned_storehi (mem, reg));
32487 
32488       i += 2;
32489     }
32490 
32491   /* Handle single byte leftover.  */
32492   if (i + 1 == length)
32493     {
32494       reg = gen_lowpart (QImode, val_reg);
32495       addr = plus_constant (Pmode, dst, i);
32496       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32497       emit_move_insn (mem, reg);
32498     }
32499 
32500   return true;
32501 }
32502 
32503 /* Set a block of memory using vectorization instructions for both
32504    aligned and unaligned cases.  We fill the first LENGTH bytes of
32505    the memory area starting from DSTBASE with byte constant VALUE.
32506    ALIGN is the alignment requirement of memory.  */
32507 static bool
arm_block_set_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)32508 arm_block_set_vect (rtx dstbase,
32509 		    unsigned HOST_WIDE_INT length,
32510 		    unsigned HOST_WIDE_INT value,
32511 		    unsigned HOST_WIDE_INT align)
32512 {
32513   /* Check whether we need to use unaligned store instruction.  */
32514   if (((align & 3) != 0 || (length & 3) != 0)
32515       /* Check whether unaligned store instruction is available.  */
32516       && (!unaligned_access || BYTES_BIG_ENDIAN))
32517     return false;
32518 
32519   if ((align & 3) == 0)
32520     return arm_block_set_aligned_vect (dstbase, length, value, align);
32521   else
32522     return arm_block_set_unaligned_vect (dstbase, length, value, align);
32523 }
32524 
32525 /* Expand string store operation.  Firstly we try to do that by using
32526    vectorization instructions, then try with ARM unaligned access and
32527    double-word store if profitable.  OPERANDS[0] is the destination,
32528    OPERANDS[1] is the number of bytes, operands[2] is the value to
32529    initialize the memory, OPERANDS[3] is the known alignment of the
32530    destination.  */
32531 bool
arm_gen_setmem(rtx * operands)32532 arm_gen_setmem (rtx *operands)
32533 {
32534   rtx dstbase = operands[0];
32535   unsigned HOST_WIDE_INT length;
32536   unsigned HOST_WIDE_INT value;
32537   unsigned HOST_WIDE_INT align;
32538 
32539   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32540     return false;
32541 
32542   length = UINTVAL (operands[1]);
32543   if (length > 64)
32544     return false;
32545 
32546   value = (UINTVAL (operands[2]) & 0xFF);
32547   align = UINTVAL (operands[3]);
32548   if (TARGET_NEON && length >= 8
32549       && current_tune->string_ops_prefer_neon
32550       && arm_block_set_vect (dstbase, length, value, align))
32551     return true;
32552 
32553   if (!unaligned_access && (align & 3) != 0)
32554     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32555 
32556   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32557 }
32558 
32559 
32560 static bool
arm_macro_fusion_p(void)32561 arm_macro_fusion_p (void)
32562 {
32563   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
32564 }
32565 
32566 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
32567    for MOVW / MOVT macro fusion.  */
32568 
32569 static bool
arm_sets_movw_movt_fusible_p(rtx prev_set,rtx curr_set)32570 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
32571 {
32572   /* We are trying to fuse
32573      movw imm / movt imm
32574     instructions as a group that gets scheduled together.  */
32575 
32576   rtx set_dest = SET_DEST (curr_set);
32577 
32578   if (GET_MODE (set_dest) != SImode)
32579     return false;
32580 
32581   /* We are trying to match:
32582      prev (movw)  == (set (reg r0) (const_int imm16))
32583      curr (movt) == (set (zero_extract (reg r0)
32584 					(const_int 16)
32585 					(const_int 16))
32586 			  (const_int imm16_1))
32587      or
32588      prev (movw) == (set (reg r1)
32589 			  (high (symbol_ref ("SYM"))))
32590     curr (movt) == (set (reg r0)
32591 			(lo_sum (reg r1)
32592 				(symbol_ref ("SYM"))))  */
32593 
32594     if (GET_CODE (set_dest) == ZERO_EXTRACT)
32595       {
32596 	if (CONST_INT_P (SET_SRC (curr_set))
32597 	    && CONST_INT_P (SET_SRC (prev_set))
32598 	    && REG_P (XEXP (set_dest, 0))
32599 	    && REG_P (SET_DEST (prev_set))
32600 	    && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
32601 	  return true;
32602 
32603       }
32604     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
32605 	     && REG_P (SET_DEST (curr_set))
32606 	     && REG_P (SET_DEST (prev_set))
32607 	     && GET_CODE (SET_SRC (prev_set)) == HIGH
32608 	     && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
32609       return true;
32610 
32611   return false;
32612 }
32613 
32614 static bool
aarch_macro_fusion_pair_p(rtx_insn * prev,rtx_insn * curr)32615 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
32616 {
32617   rtx prev_set = single_set (prev);
32618   rtx curr_set = single_set (curr);
32619 
32620   if (!prev_set
32621       || !curr_set)
32622     return false;
32623 
32624   if (any_condjump_p (curr))
32625     return false;
32626 
32627   if (!arm_macro_fusion_p ())
32628     return false;
32629 
32630   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
32631       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
32632     return true;
32633 
32634   return false;
32635 }
32636 
32637 /* Return true iff the instruction fusion described by OP is enabled.  */
32638 bool
arm_fusion_enabled_p(tune_params::fuse_ops op)32639 arm_fusion_enabled_p (tune_params::fuse_ops op)
32640 {
32641   return current_tune->fusible_ops & op;
32642 }
32643 
32644 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
32645    scheduled for speculative execution.  Reject the long-running division
32646    and square-root instructions.  */
32647 
32648 static bool
arm_sched_can_speculate_insn(rtx_insn * insn)32649 arm_sched_can_speculate_insn (rtx_insn *insn)
32650 {
32651   switch (get_attr_type (insn))
32652     {
32653       case TYPE_SDIV:
32654       case TYPE_UDIV:
32655       case TYPE_FDIVS:
32656       case TYPE_FDIVD:
32657       case TYPE_FSQRTS:
32658       case TYPE_FSQRTD:
32659       case TYPE_NEON_FP_SQRT_S:
32660       case TYPE_NEON_FP_SQRT_D:
32661       case TYPE_NEON_FP_SQRT_S_Q:
32662       case TYPE_NEON_FP_SQRT_D_Q:
32663       case TYPE_NEON_FP_DIV_S:
32664       case TYPE_NEON_FP_DIV_D:
32665       case TYPE_NEON_FP_DIV_S_Q:
32666       case TYPE_NEON_FP_DIV_D_Q:
32667 	return false;
32668       default:
32669 	return true;
32670     }
32671 }
32672 
32673 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
32674 
32675 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset(void)32676 arm_asan_shadow_offset (void)
32677 {
32678   return HOST_WIDE_INT_1U << 29;
32679 }
32680 
32681 
32682 /* This is a temporary fix for PR60655.  Ideally we need
32683    to handle most of these cases in the generic part but
32684    currently we reject minus (..) (sym_ref).  We try to
32685    ameliorate the case with minus (sym_ref1) (sym_ref2)
32686    where they are in the same section.  */
32687 
32688 static bool
arm_const_not_ok_for_debug_p(rtx p)32689 arm_const_not_ok_for_debug_p (rtx p)
32690 {
32691   tree decl_op0 = NULL;
32692   tree decl_op1 = NULL;
32693 
32694   if (GET_CODE (p) == UNSPEC)
32695     return true;
32696   if (GET_CODE (p) == MINUS)
32697     {
32698       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32699 	{
32700 	  decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32701 	  if (decl_op1
32702 	      && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32703 	      && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32704 	    {
32705 	      if ((VAR_P (decl_op1)
32706 		   || TREE_CODE (decl_op1) == CONST_DECL)
32707 		  && (VAR_P (decl_op0)
32708 		      || TREE_CODE (decl_op0) == CONST_DECL))
32709 		return (get_variable_section (decl_op1, false)
32710 			!= get_variable_section (decl_op0, false));
32711 
32712 	      if (TREE_CODE (decl_op1) == LABEL_DECL
32713 		  && TREE_CODE (decl_op0) == LABEL_DECL)
32714 		return (DECL_CONTEXT (decl_op1)
32715 			!= DECL_CONTEXT (decl_op0));
32716 	    }
32717 
32718 	  return true;
32719 	}
32720     }
32721 
32722   return false;
32723 }
32724 
32725 /* return TRUE if x is a reference to a value in a constant pool */
32726 extern bool
arm_is_constant_pool_ref(rtx x)32727 arm_is_constant_pool_ref (rtx x)
32728 {
32729   return (MEM_P (x)
32730 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32731 	  && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32732 }
32733 
32734 /* Remember the last target of arm_set_current_function.  */
32735 static GTY(()) tree arm_previous_fndecl;
32736 
32737 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
32738 
32739 void
save_restore_target_globals(tree new_tree)32740 save_restore_target_globals (tree new_tree)
32741 {
32742   /* If we have a previous state, use it.  */
32743   if (TREE_TARGET_GLOBALS (new_tree))
32744     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32745   else if (new_tree == target_option_default_node)
32746     restore_target_globals (&default_target_globals);
32747   else
32748     {
32749       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
32750       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
32751     }
32752 
32753   arm_option_params_internal ();
32754 }
32755 
32756 /* Invalidate arm_previous_fndecl.  */
32757 
32758 void
arm_reset_previous_fndecl(void)32759 arm_reset_previous_fndecl (void)
32760 {
32761   arm_previous_fndecl = NULL_TREE;
32762 }
32763 
32764 /* Establish appropriate back-end context for processing the function
32765    FNDECL.  The argument might be NULL to indicate processing at top
32766    level, outside of any function scope.  */
32767 
32768 static void
arm_set_current_function(tree fndecl)32769 arm_set_current_function (tree fndecl)
32770 {
32771   if (!fndecl || fndecl == arm_previous_fndecl)
32772     return;
32773 
32774   tree old_tree = (arm_previous_fndecl
32775 		   ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
32776 		   : NULL_TREE);
32777 
32778   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
32779 
32780   /* If current function has no attributes but previous one did,
32781      use the default node.  */
32782   if (! new_tree && old_tree)
32783     new_tree = target_option_default_node;
32784 
32785   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
32786      the default have been handled by save_restore_target_globals from
32787      arm_pragma_target_parse.  */
32788   if (old_tree == new_tree)
32789     return;
32790 
32791   arm_previous_fndecl = fndecl;
32792 
32793   /* First set the target options.  */
32794   cl_target_option_restore (&global_options, &global_options_set,
32795 			    TREE_TARGET_OPTION (new_tree));
32796 
32797   save_restore_target_globals (new_tree);
32798 
32799   arm_override_options_after_change_1 (&global_options, &global_options_set);
32800 }
32801 
32802 /* Implement TARGET_OPTION_PRINT.  */
32803 
32804 static void
arm_option_print(FILE * file,int indent,struct cl_target_option * ptr)32805 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
32806 {
32807   int flags = ptr->x_target_flags;
32808   const char *fpu_name;
32809 
32810   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
32811 	      ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
32812 
32813   fprintf (file, "%*sselected isa %s\n", indent, "",
32814 	   TARGET_THUMB2_P (flags) ? "thumb2" :
32815 	   TARGET_THUMB_P (flags) ? "thumb1" :
32816 	   "arm");
32817 
32818   if (ptr->x_arm_arch_string)
32819     fprintf (file, "%*sselected architecture %s\n", indent, "",
32820 	     ptr->x_arm_arch_string);
32821 
32822   if (ptr->x_arm_cpu_string)
32823     fprintf (file, "%*sselected CPU %s\n", indent, "",
32824 	     ptr->x_arm_cpu_string);
32825 
32826   if (ptr->x_arm_tune_string)
32827     fprintf (file, "%*sselected tune %s\n", indent, "",
32828 	     ptr->x_arm_tune_string);
32829 
32830   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
32831 }
32832 
32833 /* Hook to determine if one function can safely inline another.  */
32834 
32835 static bool
arm_can_inline_p(tree caller,tree callee)32836 arm_can_inline_p (tree caller, tree callee)
32837 {
32838   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32839   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32840   bool can_inline = true;
32841 
32842   struct cl_target_option *caller_opts
32843 	= TREE_TARGET_OPTION (caller_tree ? caller_tree
32844 					   : target_option_default_node);
32845 
32846   struct cl_target_option *callee_opts
32847 	= TREE_TARGET_OPTION (callee_tree ? callee_tree
32848 					   : target_option_default_node);
32849 
32850   if (callee_opts == caller_opts)
32851     return true;
32852 
32853   /* Callee's ISA features should be a subset of the caller's.  */
32854   struct arm_build_target caller_target;
32855   struct arm_build_target callee_target;
32856   caller_target.isa = sbitmap_alloc (isa_num_bits);
32857   callee_target.isa = sbitmap_alloc (isa_num_bits);
32858 
32859   arm_configure_build_target (&caller_target, caller_opts, false);
32860   arm_configure_build_target (&callee_target, callee_opts, false);
32861   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
32862     can_inline = false;
32863 
32864   sbitmap_free (caller_target.isa);
32865   sbitmap_free (callee_target.isa);
32866 
32867   /* OK to inline between different modes.
32868      Function with mode specific instructions, e.g using asm,
32869      must be explicitly protected with noinline.  */
32870   return can_inline;
32871 }
32872 
32873 /* Hook to fix function's alignment affected by target attribute.  */
32874 
32875 static void
arm_relayout_function(tree fndecl)32876 arm_relayout_function (tree fndecl)
32877 {
32878   if (DECL_USER_ALIGN (fndecl))
32879     return;
32880 
32881   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
32882 
32883   if (!callee_tree)
32884     callee_tree = target_option_default_node;
32885 
32886   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
32887   SET_DECL_ALIGN
32888     (fndecl,
32889      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
32890 }
32891 
32892 /* Inner function to process the attribute((target(...))), take an argument and
32893    set the current options from the argument.  If we have a list, recursively
32894    go over the list.  */
32895 
32896 static bool
arm_valid_target_attribute_rec(tree args,struct gcc_options * opts)32897 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
32898 {
32899   if (TREE_CODE (args) == TREE_LIST)
32900     {
32901       bool ret = true;
32902 
32903       for (; args; args = TREE_CHAIN (args))
32904 	if (TREE_VALUE (args)
32905 	    && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
32906 	  ret = false;
32907       return ret;
32908     }
32909 
32910   else if (TREE_CODE (args) != STRING_CST)
32911     {
32912       error ("attribute %<target%> argument not a string");
32913       return false;
32914     }
32915 
32916   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
32917   char *q;
32918 
32919   while ((q = strtok (argstr, ",")) != NULL)
32920     {
32921       argstr = NULL;
32922       if (!strcmp (q, "thumb"))
32923 	{
32924 	  opts->x_target_flags |= MASK_THUMB;
32925 	  if (TARGET_FDPIC && !arm_arch_thumb2)
32926 	    sorry ("FDPIC mode is not supported in Thumb-1 mode");
32927 	}
32928 
32929       else if (!strcmp (q, "arm"))
32930 	opts->x_target_flags &= ~MASK_THUMB;
32931 
32932       else if (!strcmp (q, "general-regs-only"))
32933 	opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
32934 
32935       else if (!strncmp (q, "fpu=", 4))
32936 	{
32937 	  int fpu_index;
32938 	  if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
32939 				       &fpu_index, CL_TARGET))
32940 	    {
32941 	      error ("invalid fpu for target attribute or pragma %qs", q);
32942 	      return false;
32943 	    }
32944 	  if (fpu_index == TARGET_FPU_auto)
32945 	    {
32946 	      /* This doesn't really make sense until we support
32947 		 general dynamic selection of the architecture and all
32948 		 sub-features.  */
32949 	      sorry ("auto fpu selection not currently permitted here");
32950 	      return false;
32951 	    }
32952 	  opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
32953 	}
32954       else if (!strncmp (q, "arch=", 5))
32955 	{
32956 	  char *arch = q + 5;
32957 	  const arch_option *arm_selected_arch
32958 	     = arm_parse_arch_option_name (all_architectures, "arch", arch);
32959 
32960 	  if (!arm_selected_arch)
32961 	    {
32962 	      error ("invalid architecture for target attribute or pragma %qs",
32963 		     q);
32964 	      return false;
32965 	    }
32966 
32967 	  opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
32968 	}
32969       else if (q[0] == '+')
32970 	{
32971 	  opts->x_arm_arch_string
32972 	    = xasprintf ("%s%s", opts->x_arm_arch_string, q);
32973 	}
32974       else
32975 	{
32976 	  error ("unknown target attribute or pragma %qs", q);
32977 	  return false;
32978 	}
32979     }
32980 
32981   return true;
32982 }
32983 
32984 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
32985 
32986 tree
arm_valid_target_attribute_tree(tree args,struct gcc_options * opts,struct gcc_options * opts_set)32987 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
32988 				 struct gcc_options *opts_set)
32989 {
32990   struct cl_target_option cl_opts;
32991 
32992   if (!arm_valid_target_attribute_rec (args, opts))
32993     return NULL_TREE;
32994 
32995   cl_target_option_save (&cl_opts, opts, opts_set);
32996   arm_configure_build_target (&arm_active_target, &cl_opts, false);
32997   arm_option_check_internal (opts);
32998   /* Do any overrides, such as global options arch=xxx.
32999      We do this since arm_active_target was overridden.  */
33000   arm_option_reconfigure_globals ();
33001   arm_options_perform_arch_sanity_checks ();
33002   arm_option_override_internal (opts, opts_set);
33003 
33004   return build_target_option_node (opts, opts_set);
33005 }
33006 
33007 static void
add_attribute(const char * mode,tree * attributes)33008 add_attribute (const char * mode, tree *attributes)
33009 {
33010   size_t len = strlen (mode);
33011   tree value = build_string (len, mode);
33012 
33013   TREE_TYPE (value) = build_array_type (char_type_node,
33014 					build_index_type (size_int (len)));
33015 
33016   *attributes = tree_cons (get_identifier ("target"),
33017 			   build_tree_list (NULL_TREE, value),
33018 			   *attributes);
33019 }
33020 
33021 /* For testing. Insert thumb or arm modes alternatively on functions.  */
33022 
33023 static void
arm_insert_attributes(tree fndecl,tree * attributes)33024 arm_insert_attributes (tree fndecl, tree * attributes)
33025 {
33026   const char *mode;
33027 
33028   if (! TARGET_FLIP_THUMB)
33029     return;
33030 
33031   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33032       || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33033    return;
33034 
33035   /* Nested definitions must inherit mode.  */
33036   if (current_function_decl)
33037    {
33038      mode = TARGET_THUMB ? "thumb" : "arm";
33039      add_attribute (mode, attributes);
33040      return;
33041    }
33042 
33043   /* If there is already a setting don't change it.  */
33044   if (lookup_attribute ("target", *attributes) != NULL)
33045     return;
33046 
33047   mode = thumb_flipper ? "thumb" : "arm";
33048   add_attribute (mode, attributes);
33049 
33050   thumb_flipper = !thumb_flipper;
33051 }
33052 
33053 /* Hook to validate attribute((target("string"))).  */
33054 
33055 static bool
arm_valid_target_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int ARG_UNUSED (flags))33056 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33057 			      tree args, int ARG_UNUSED (flags))
33058 {
33059   bool ret = true;
33060   struct gcc_options func_options, func_options_set;
33061   tree cur_tree, new_optimize;
33062   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33063 
33064   /* Get the optimization options of the current function.  */
33065   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33066 
33067   /* If the function changed the optimization levels as well as setting target
33068      options, start with the optimizations specified.  */
33069   if (!func_optimize)
33070     func_optimize = optimization_default_node;
33071 
33072   /* Init func_options.  */
33073   memset (&func_options, 0, sizeof (func_options));
33074   init_options_struct (&func_options, NULL);
33075   lang_hooks.init_options_struct (&func_options);
33076   memset (&func_options_set, 0, sizeof (func_options_set));
33077 
33078   /* Initialize func_options to the defaults.  */
33079   cl_optimization_restore (&func_options, &func_options_set,
33080 			   TREE_OPTIMIZATION (func_optimize));
33081 
33082   cl_target_option_restore (&func_options, &func_options_set,
33083 			    TREE_TARGET_OPTION (target_option_default_node));
33084 
33085   /* Set func_options flags with new target mode.  */
33086   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33087 					      &func_options_set);
33088 
33089   if (cur_tree == NULL_TREE)
33090     ret = false;
33091 
33092   new_optimize = build_optimization_node (&func_options, &func_options_set);
33093 
33094   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33095 
33096   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33097 
33098   return ret;
33099 }
33100 
33101 /* Match an ISA feature bitmap to a named FPU.  We always use the
33102    first entry that exactly matches the feature set, so that we
33103    effectively canonicalize the FPU name for the assembler.  */
33104 static const char*
arm_identify_fpu_from_isa(sbitmap isa)33105 arm_identify_fpu_from_isa (sbitmap isa)
33106 {
33107   auto_sbitmap fpubits (isa_num_bits);
33108   auto_sbitmap cand_fpubits (isa_num_bits);
33109 
33110   bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33111 
33112   /* If there are no ISA feature bits relating to the FPU, we must be
33113      doing soft-float.  */
33114   if (bitmap_empty_p (fpubits))
33115     return "softvfp";
33116 
33117   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33118     {
33119       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33120       if (bitmap_equal_p (fpubits, cand_fpubits))
33121 	return all_fpus[i].name;
33122     }
33123   /* We must find an entry, or things have gone wrong.  */
33124   gcc_unreachable ();
33125 }
33126 
33127 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
33128    by the function fndecl.  */
33129 void
arm_declare_function_name(FILE * stream,const char * name,tree decl)33130 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33131 {
33132   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33133 
33134   struct cl_target_option *targ_options;
33135   if (target_parts)
33136     targ_options = TREE_TARGET_OPTION (target_parts);
33137   else
33138     targ_options = TREE_TARGET_OPTION (target_option_current_node);
33139   gcc_assert (targ_options);
33140 
33141   /* Only update the assembler .arch string if it is distinct from the last
33142      such string we printed. arch_to_print is set conditionally in case
33143      targ_options->x_arm_arch_string is NULL which can be the case
33144      when cc1 is invoked directly without passing -march option.  */
33145   std::string arch_to_print;
33146   if (targ_options->x_arm_arch_string)
33147     arch_to_print = targ_options->x_arm_arch_string;
33148 
33149   if (arch_to_print != arm_last_printed_arch_string)
33150     {
33151       std::string arch_name
33152 	= arch_to_print.substr (0, arch_to_print.find ("+"));
33153       asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
33154       const arch_option *arch
33155 	= arm_parse_arch_option_name (all_architectures, "-march",
33156 				      targ_options->x_arm_arch_string);
33157       auto_sbitmap opt_bits (isa_num_bits);
33158 
33159       gcc_assert (arch);
33160       if (arch->common.extensions)
33161 	{
33162 	  for (const struct cpu_arch_extension *opt = arch->common.extensions;
33163 	       opt->name != NULL;
33164 	       opt++)
33165 	    {
33166 	      if (!opt->remove)
33167 		{
33168 		  arm_initialize_isa (opt_bits, opt->isa_bits);
33169 		  /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft"
33170 		     and "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and
33171 		     MVE with floating point instructions is disabled.  So the
33172 		     following check restricts the printing of ".arch_extension
33173 		     mve" and ".arch_extension fp" (for mve.fp) in the assembly
33174 		     file.    MVE needs this special behaviour because the
33175 		     feature bit "mve" and "mve_float" are not part of
33176 		     "fpu bits", so they are not cleared when -mfloat-abi=soft
33177 		     (i.e nofp) but the marco TARGET_HAVE_MVE and
33178 		     TARGET_HAVE_MVE_FLOAT are disabled.  */
33179 		  if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
33180 		      || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
33181 			  && !TARGET_HAVE_MVE_FLOAT))
33182 		    continue;
33183 		  if (bitmap_subset_p (opt_bits, arm_active_target.isa)
33184 		      && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
33185 		    asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
33186 				 opt->name);
33187 		}
33188 	     }
33189 	}
33190 
33191       arm_last_printed_arch_string = arch_to_print;
33192     }
33193 
33194   fprintf (stream, "\t.syntax unified\n");
33195 
33196   if (TARGET_THUMB)
33197     {
33198       if (is_called_in_ARM_mode (decl)
33199 	  || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33200 	      && cfun->is_thunk))
33201 	fprintf (stream, "\t.code 32\n");
33202       else if (TARGET_THUMB1)
33203 	fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33204       else
33205 	fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33206     }
33207   else
33208     fprintf (stream, "\t.arm\n");
33209 
33210   std::string fpu_to_print
33211     = TARGET_SOFT_FLOAT
33212 	? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
33213 
33214   if (!(!strcmp (fpu_to_print.c_str (), "softvfp") && TARGET_VFP_BASE)
33215       && (fpu_to_print != arm_last_printed_arch_string))
33216     {
33217       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
33218       arm_last_printed_fpu_string = fpu_to_print;
33219     }
33220 
33221   if (TARGET_POKE_FUNCTION_NAME)
33222     arm_poke_function_name (stream, (const char *) name);
33223 }
33224 
33225 /* If MEM is in the form of [base+offset], extract the two parts
33226    of address and set to BASE and OFFSET, otherwise return false
33227    after clearing BASE and OFFSET.  */
33228 
33229 static bool
extract_base_offset_in_addr(rtx mem,rtx * base,rtx * offset)33230 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33231 {
33232   rtx addr;
33233 
33234   gcc_assert (MEM_P (mem));
33235 
33236   addr = XEXP (mem, 0);
33237 
33238   /* Strip off const from addresses like (const (addr)).  */
33239   if (GET_CODE (addr) == CONST)
33240     addr = XEXP (addr, 0);
33241 
33242   if (REG_P (addr))
33243     {
33244       *base = addr;
33245       *offset = const0_rtx;
33246       return true;
33247     }
33248 
33249   if (GET_CODE (addr) == PLUS
33250       && GET_CODE (XEXP (addr, 0)) == REG
33251       && CONST_INT_P (XEXP (addr, 1)))
33252     {
33253       *base = XEXP (addr, 0);
33254       *offset = XEXP (addr, 1);
33255       return true;
33256     }
33257 
33258   *base = NULL_RTX;
33259   *offset = NULL_RTX;
33260 
33261   return false;
33262 }
33263 
33264 /* If INSN is a load or store of address in the form of [base+offset],
33265    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
33266    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
33267    otherwise return FALSE.  */
33268 
33269 static bool
fusion_load_store(rtx_insn * insn,rtx * base,rtx * offset,bool * is_load)33270 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33271 {
33272   rtx x, dest, src;
33273 
33274   gcc_assert (INSN_P (insn));
33275   x = PATTERN (insn);
33276   if (GET_CODE (x) != SET)
33277     return false;
33278 
33279   src = SET_SRC (x);
33280   dest = SET_DEST (x);
33281   if (REG_P (src) && MEM_P (dest))
33282     {
33283       *is_load = false;
33284       extract_base_offset_in_addr (dest, base, offset);
33285     }
33286   else if (MEM_P (src) && REG_P (dest))
33287     {
33288       *is_load = true;
33289       extract_base_offset_in_addr (src, base, offset);
33290     }
33291   else
33292     return false;
33293 
33294   return (*base != NULL_RTX && *offset != NULL_RTX);
33295 }
33296 
33297 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33298 
33299    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33300    and PRI are only calculated for these instructions.  For other instruction,
33301    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
33302    instruction fusion can be supported by returning different priorities.
33303 
33304    It's important that irrelevant instructions get the largest FUSION_PRI.  */
33305 
33306 static void
arm_sched_fusion_priority(rtx_insn * insn,int max_pri,int * fusion_pri,int * pri)33307 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33308 			   int *fusion_pri, int *pri)
33309 {
33310   int tmp, off_val;
33311   bool is_load;
33312   rtx base, offset;
33313 
33314   gcc_assert (INSN_P (insn));
33315 
33316   tmp = max_pri - 1;
33317   if (!fusion_load_store (insn, &base, &offset, &is_load))
33318     {
33319       *pri = tmp;
33320       *fusion_pri = tmp;
33321       return;
33322     }
33323 
33324   /* Load goes first.  */
33325   if (is_load)
33326     *fusion_pri = tmp - 1;
33327   else
33328     *fusion_pri = tmp - 2;
33329 
33330   tmp /= 2;
33331 
33332   /* INSN with smaller base register goes first.  */
33333   tmp -= ((REGNO (base) & 0xff) << 20);
33334 
33335   /* INSN with smaller offset goes first.  */
33336   off_val = (int)(INTVAL (offset));
33337   if (off_val >= 0)
33338     tmp -= (off_val & 0xfffff);
33339   else
33340     tmp += ((- off_val) & 0xfffff);
33341 
33342   *pri = tmp;
33343   return;
33344 }
33345 
33346 
33347 /* Construct and return a PARALLEL RTX vector with elements numbering the
33348    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33349    the vector - from the perspective of the architecture.  This does not
33350    line up with GCC's perspective on lane numbers, so we end up with
33351    different masks depending on our target endian-ness.  The diagram
33352    below may help.  We must draw the distinction when building masks
33353    which select one half of the vector.  An instruction selecting
33354    architectural low-lanes for a big-endian target, must be described using
33355    a mask selecting GCC high-lanes.
33356 
33357                  Big-Endian             Little-Endian
33358 
33359 GCC             0   1   2   3           3   2   1   0
33360               | x | x | x | x |       | x | x | x | x |
33361 Architecture    3   2   1   0           3   2   1   0
33362 
33363 Low Mask:         { 2, 3 }                { 0, 1 }
33364 High Mask:        { 0, 1 }                { 2, 3 }
33365 */
33366 
33367 rtx
arm_simd_vect_par_cnst_half(machine_mode mode,bool high)33368 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
33369 {
33370   int nunits = GET_MODE_NUNITS (mode);
33371   rtvec v = rtvec_alloc (nunits / 2);
33372   int high_base = nunits / 2;
33373   int low_base = 0;
33374   int base;
33375   rtx t1;
33376   int i;
33377 
33378   if (BYTES_BIG_ENDIAN)
33379     base = high ? low_base : high_base;
33380   else
33381     base = high ? high_base : low_base;
33382 
33383   for (i = 0; i < nunits / 2; i++)
33384     RTVEC_ELT (v, i) = GEN_INT (base + i);
33385 
33386   t1 = gen_rtx_PARALLEL (mode, v);
33387   return t1;
33388 }
33389 
33390 /* Check OP for validity as a PARALLEL RTX vector with elements
33391    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33392    from the perspective of the architecture.  See the diagram above
33393    arm_simd_vect_par_cnst_half_p for more details.  */
33394 
33395 bool
arm_simd_check_vect_par_cnst_half_p(rtx op,machine_mode mode,bool high)33396 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
33397 				       bool high)
33398 {
33399   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
33400   HOST_WIDE_INT count_op = XVECLEN (op, 0);
33401   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
33402   int i = 0;
33403 
33404   if (!VECTOR_MODE_P (mode))
33405     return false;
33406 
33407   if (count_op != count_ideal)
33408     return false;
33409 
33410   for (i = 0; i < count_ideal; i++)
33411     {
33412       rtx elt_op = XVECEXP (op, 0, i);
33413       rtx elt_ideal = XVECEXP (ideal, 0, i);
33414 
33415       if (!CONST_INT_P (elt_op)
33416 	  || INTVAL (elt_ideal) != INTVAL (elt_op))
33417 	return false;
33418     }
33419   return true;
33420 }
33421 
33422 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33423    in Thumb1.  */
33424 static bool
arm_can_output_mi_thunk(const_tree,HOST_WIDE_INT,HOST_WIDE_INT vcall_offset,const_tree)33425 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
33426 			 const_tree)
33427 {
33428   /* For now, we punt and not handle this for TARGET_THUMB1.  */
33429   if (vcall_offset && TARGET_THUMB1)
33430     return false;
33431 
33432   /* Otherwise ok.  */
33433   return true;
33434 }
33435 
33436 /* Generate RTL for a conditional branch with rtx comparison CODE in
33437    mode CC_MODE. The destination of the unlikely conditional branch
33438    is LABEL_REF.  */
33439 
33440 void
arm_gen_unlikely_cbranch(enum rtx_code code,machine_mode cc_mode,rtx label_ref)33441 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33442 			  rtx label_ref)
33443 {
33444   rtx x;
33445   x = gen_rtx_fmt_ee (code, VOIDmode,
33446 		      gen_rtx_REG (cc_mode, CC_REGNUM),
33447 		      const0_rtx);
33448 
33449   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
33450 			    gen_rtx_LABEL_REF (VOIDmode, label_ref),
33451 			    pc_rtx);
33452   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
33453 }
33454 
33455 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33456 
33457    For pure-code sections there is no letter code for this attribute, so
33458    output all the section flags numerically when this is needed.  */
33459 
33460 static bool
arm_asm_elf_flags_numeric(unsigned int flags,unsigned int * num)33461 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
33462 {
33463 
33464   if (flags & SECTION_ARM_PURECODE)
33465     {
33466       *num = 0x20000000;
33467 
33468       if (!(flags & SECTION_DEBUG))
33469 	*num |= 0x2;
33470       if (flags & SECTION_EXCLUDE)
33471 	*num |= 0x80000000;
33472       if (flags & SECTION_WRITE)
33473 	*num |= 0x1;
33474       if (flags & SECTION_CODE)
33475 	*num |= 0x4;
33476       if (flags & SECTION_MERGE)
33477 	*num |= 0x10;
33478       if (flags & SECTION_STRINGS)
33479 	*num |= 0x20;
33480       if (flags & SECTION_TLS)
33481 	*num |= 0x400;
33482       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
33483 	*num |= 0x200;
33484 
33485 	return true;
33486     }
33487 
33488   return false;
33489 }
33490 
33491 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33492 
33493    If pure-code is passed as an option, make sure all functions are in
33494    sections that have the SHF_ARM_PURECODE attribute.  */
33495 
33496 static section *
arm_function_section(tree decl,enum node_frequency freq,bool startup,bool exit)33497 arm_function_section (tree decl, enum node_frequency freq,
33498 		      bool startup, bool exit)
33499 {
33500   const char * section_name;
33501   section * sec;
33502 
33503   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
33504     return default_function_section (decl, freq, startup, exit);
33505 
33506   if (!target_pure_code)
33507     return default_function_section (decl, freq, startup, exit);
33508 
33509 
33510   section_name = DECL_SECTION_NAME (decl);
33511 
33512   /* If a function is not in a named section then it falls under the 'default'
33513      text section, also known as '.text'.  We can preserve previous behavior as
33514      the default text section already has the SHF_ARM_PURECODE section
33515      attribute.  */
33516   if (!section_name)
33517     {
33518       section *default_sec = default_function_section (decl, freq, startup,
33519 						       exit);
33520 
33521       /* If default_sec is not null, then it must be a special section like for
33522 	 example .text.startup.  We set the pure-code attribute and return the
33523 	 same section to preserve existing behavior.  */
33524       if (default_sec)
33525 	  default_sec->common.flags |= SECTION_ARM_PURECODE;
33526       return default_sec;
33527     }
33528 
33529   /* Otherwise look whether a section has already been created with
33530      'section_name'.  */
33531   sec = get_named_section (decl, section_name, 0);
33532   if (!sec)
33533     /* If that is not the case passing NULL as the section's name to
33534        'get_named_section' will create a section with the declaration's
33535        section name.  */
33536     sec = get_named_section (decl, NULL, 0);
33537 
33538   /* Set the SHF_ARM_PURECODE attribute.  */
33539   sec->common.flags |= SECTION_ARM_PURECODE;
33540 
33541   return sec;
33542 }
33543 
33544 /* Implements the TARGET_SECTION_FLAGS hook.
33545 
33546    If DECL is a function declaration and pure-code is passed as an option
33547    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
33548    section's name and RELOC indicates whether the declarations initializer may
33549    contain runtime relocations.  */
33550 
33551 static unsigned int
arm_elf_section_type_flags(tree decl,const char * name,int reloc)33552 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
33553 {
33554   unsigned int flags = default_section_type_flags (decl, name, reloc);
33555 
33556   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
33557     flags |= SECTION_ARM_PURECODE;
33558 
33559   return flags;
33560 }
33561 
33562 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
33563 
33564 static void
arm_expand_divmod_libfunc(rtx libfunc,machine_mode mode,rtx op0,rtx op1,rtx * quot_p,rtx * rem_p)33565 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
33566 			   rtx op0, rtx op1,
33567 			   rtx *quot_p, rtx *rem_p)
33568 {
33569   if (mode == SImode)
33570     gcc_assert (!TARGET_IDIV);
33571 
33572   scalar_int_mode libval_mode
33573     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
33574 
33575   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
33576 					libval_mode, op0, mode, op1, mode);
33577 
33578   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
33579   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
33580 				       GET_MODE_SIZE (mode));
33581 
33582   gcc_assert (quotient);
33583   gcc_assert (remainder);
33584 
33585   *quot_p = quotient;
33586   *rem_p = remainder;
33587 }
33588 
33589 /*  This function checks for the availability of the coprocessor builtin passed
33590     in BUILTIN for the current target.  Returns true if it is available and
33591     false otherwise.  If a BUILTIN is passed for which this function has not
33592     been implemented it will cause an exception.  */
33593 
33594 bool
arm_coproc_builtin_available(enum unspecv builtin)33595 arm_coproc_builtin_available (enum unspecv builtin)
33596 {
33597   /* None of these builtins are available in Thumb mode if the target only
33598      supports Thumb-1.  */
33599   if (TARGET_THUMB1)
33600     return false;
33601 
33602   switch (builtin)
33603     {
33604       case VUNSPEC_CDP:
33605       case VUNSPEC_LDC:
33606       case VUNSPEC_LDCL:
33607       case VUNSPEC_STC:
33608       case VUNSPEC_STCL:
33609       case VUNSPEC_MCR:
33610       case VUNSPEC_MRC:
33611 	if (arm_arch4)
33612 	  return true;
33613 	break;
33614       case VUNSPEC_CDP2:
33615       case VUNSPEC_LDC2:
33616       case VUNSPEC_LDC2L:
33617       case VUNSPEC_STC2:
33618       case VUNSPEC_STC2L:
33619       case VUNSPEC_MCR2:
33620       case VUNSPEC_MRC2:
33621 	/* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
33622 	   ARMv8-{A,M}.  */
33623 	if (arm_arch5t)
33624 	  return true;
33625 	break;
33626       case VUNSPEC_MCRR:
33627       case VUNSPEC_MRRC:
33628 	/* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
33629 	   ARMv8-{A,M}.  */
33630 	if (arm_arch6 || arm_arch5te)
33631 	  return true;
33632 	break;
33633       case VUNSPEC_MCRR2:
33634       case VUNSPEC_MRRC2:
33635 	if (arm_arch6)
33636 	  return true;
33637 	break;
33638       default:
33639 	gcc_unreachable ();
33640     }
33641   return false;
33642 }
33643 
33644 /* This function returns true if OP is a valid memory operand for the ldc and
33645    stc coprocessor instructions and false otherwise.  */
33646 
33647 bool
arm_coproc_ldc_stc_legitimate_address(rtx op)33648 arm_coproc_ldc_stc_legitimate_address (rtx op)
33649 {
33650   HOST_WIDE_INT range;
33651   /* Has to be a memory operand.  */
33652   if (!MEM_P (op))
33653     return false;
33654 
33655   op = XEXP (op, 0);
33656 
33657   /* We accept registers.  */
33658   if (REG_P (op))
33659     return true;
33660 
33661   switch GET_CODE (op)
33662     {
33663       case PLUS:
33664 	{
33665 	  /* Or registers with an offset.  */
33666 	  if (!REG_P (XEXP (op, 0)))
33667 	    return false;
33668 
33669 	  op = XEXP (op, 1);
33670 
33671 	  /* The offset must be an immediate though.  */
33672 	  if (!CONST_INT_P (op))
33673 	    return false;
33674 
33675 	  range = INTVAL (op);
33676 
33677 	  /* Within the range of [-1020,1020].  */
33678 	  if (!IN_RANGE (range, -1020, 1020))
33679 	    return false;
33680 
33681 	  /* And a multiple of 4.  */
33682 	  return (range % 4) == 0;
33683 	}
33684       case PRE_INC:
33685       case POST_INC:
33686       case PRE_DEC:
33687       case POST_DEC:
33688 	return REG_P (XEXP (op, 0));
33689       default:
33690 	gcc_unreachable ();
33691     }
33692   return false;
33693 }
33694 
33695 /* Return the diagnostic message string if conversion from FROMTYPE to
33696    TOTYPE is not allowed, NULL otherwise.  */
33697 
33698 static const char *
arm_invalid_conversion(const_tree fromtype,const_tree totype)33699 arm_invalid_conversion (const_tree fromtype, const_tree totype)
33700 {
33701   if (element_mode (fromtype) != element_mode (totype))
33702     {
33703       /* Do no allow conversions to/from BFmode scalar types.  */
33704       if (TYPE_MODE (fromtype) == BFmode)
33705 	return N_("invalid conversion from type %<bfloat16_t%>");
33706       if (TYPE_MODE (totype) == BFmode)
33707 	return N_("invalid conversion to type %<bfloat16_t%>");
33708     }
33709 
33710   /* Conversion allowed.  */
33711   return NULL;
33712 }
33713 
33714 /* Return the diagnostic message string if the unary operation OP is
33715    not permitted on TYPE, NULL otherwise.  */
33716 
33717 static const char *
arm_invalid_unary_op(int op,const_tree type)33718 arm_invalid_unary_op (int op, const_tree type)
33719 {
33720   /* Reject all single-operand operations on BFmode except for &.  */
33721   if (element_mode (type) == BFmode && op != ADDR_EXPR)
33722     return N_("operation not permitted on type %<bfloat16_t%>");
33723 
33724   /* Operation allowed.  */
33725   return NULL;
33726 }
33727 
33728 /* Return the diagnostic message string if the binary operation OP is
33729    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
33730 
33731 static const char *
arm_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)33732 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
33733 			   const_tree type2)
33734 {
33735   /* Reject all 2-operand operations on BFmode.  */
33736   if (element_mode (type1) == BFmode
33737       || element_mode (type2) == BFmode)
33738     return N_("operation not permitted on type %<bfloat16_t%>");
33739 
33740   /* Operation allowed.  */
33741   return NULL;
33742 }
33743 
33744 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
33745 
33746    In VFPv1, VFP registers could only be accessed in the mode they were
33747    set, so subregs would be invalid there.  However, we don't support
33748    VFPv1 at the moment, and the restriction was lifted in VFPv2.
33749 
33750    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
33751    VFP registers in little-endian order.  We can't describe that accurately to
33752    GCC, so avoid taking subregs of such values.
33753 
33754    The only exception is going from a 128-bit to a 64-bit type.  In that
33755    case the data layout happens to be consistent for big-endian, so we
33756    explicitly allow that case.  */
33757 
33758 static bool
arm_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)33759 arm_can_change_mode_class (machine_mode from, machine_mode to,
33760 			   reg_class_t rclass)
33761 {
33762   if (TARGET_BIG_END
33763       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
33764       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
33765 	  || GET_MODE_SIZE (to) > UNITS_PER_WORD)
33766       && reg_classes_intersect_p (VFP_REGS, rclass))
33767     return false;
33768   return true;
33769 }
33770 
33771 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
33772    strcpy from constants will be faster.  */
33773 
33774 static HOST_WIDE_INT
arm_constant_alignment(const_tree exp,HOST_WIDE_INT align)33775 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
33776 {
33777   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
33778   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
33779     return MAX (align, BITS_PER_WORD * factor);
33780   return align;
33781 }
33782 
33783 /* Emit a speculation barrier on target architectures that do not have
33784    DSB/ISB directly.  Such systems probably don't need a barrier
33785    themselves, but if the code is ever run on a later architecture, it
33786    might become a problem.  */
33787 void
arm_emit_speculation_barrier_function()33788 arm_emit_speculation_barrier_function ()
33789 {
33790   emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
33791 }
33792 
33793 /* Have we recorded an explicit access to the Q bit of APSR?.  */
33794 bool
arm_q_bit_access(void)33795 arm_q_bit_access (void)
33796 {
33797   if (cfun && cfun->decl)
33798     return lookup_attribute ("acle qbit",
33799 			     DECL_ATTRIBUTES (cfun->decl));
33800   return true;
33801 }
33802 
33803 /* Have we recorded an explicit access to the GE bits of PSTATE?.  */
33804 bool
arm_ge_bits_access(void)33805 arm_ge_bits_access (void)
33806 {
33807   if (cfun && cfun->decl)
33808     return lookup_attribute ("acle gebits",
33809 			     DECL_ATTRIBUTES (cfun->decl));
33810   return true;
33811 }
33812 
33813 /* NULL if insn INSN is valid within a low-overhead loop.
33814    Otherwise return why doloop cannot be applied.  */
33815 
33816 static const char *
arm_invalid_within_doloop(const rtx_insn * insn)33817 arm_invalid_within_doloop (const rtx_insn *insn)
33818 {
33819   if (!TARGET_HAVE_LOB)
33820     return default_invalid_within_doloop (insn);
33821 
33822   if (CALL_P (insn))
33823     return "Function call in the loop.";
33824 
33825   if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
33826     return "LR is used inside loop.";
33827 
33828   return NULL;
33829 }
33830 
33831 bool
arm_target_insn_ok_for_lob(rtx insn)33832 arm_target_insn_ok_for_lob (rtx insn)
33833 {
33834   basic_block bb = BLOCK_FOR_INSN (insn);
33835   /* Make sure the basic block of the target insn is a simple latch
33836      having as single predecessor and successor the body of the loop
33837      itself.  Only simple loops with a single basic block as body are
33838      supported for 'low over head loop' making sure that LE target is
33839      above LE itself in the generated code.  */
33840 
33841   return single_succ_p (bb)
33842     && single_pred_p (bb)
33843     && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
33844     && contains_no_active_insn_p (bb);
33845 }
33846 
33847 #if CHECKING_P
33848 namespace selftest {
33849 
33850 /* Scan the static data tables generated by parsecpu.awk looking for
33851    potential issues with the data.  We primarily check for
33852    inconsistencies in the option extensions at present (extensions
33853    that duplicate others but aren't marked as aliases).  Furthermore,
33854    for correct canonicalization later options must never be a subset
33855    of an earlier option.  Any extension should also only specify other
33856    feature bits and never an architecture bit.  The architecture is inferred
33857    from the declaration of the extension.  */
33858 static void
arm_test_cpu_arch_data(void)33859 arm_test_cpu_arch_data (void)
33860 {
33861   const arch_option *arch;
33862   const cpu_option *cpu;
33863   auto_sbitmap target_isa (isa_num_bits);
33864   auto_sbitmap isa1 (isa_num_bits);
33865   auto_sbitmap isa2 (isa_num_bits);
33866 
33867   for (arch = all_architectures; arch->common.name != NULL; ++arch)
33868     {
33869       const cpu_arch_extension *ext1, *ext2;
33870 
33871       if (arch->common.extensions == NULL)
33872 	continue;
33873 
33874       arm_initialize_isa (target_isa, arch->common.isa_bits);
33875 
33876       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
33877 	{
33878 	  if (ext1->alias)
33879 	    continue;
33880 
33881 	  arm_initialize_isa (isa1, ext1->isa_bits);
33882 	  for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
33883 	    {
33884 	      if (ext2->alias || ext1->remove != ext2->remove)
33885 		continue;
33886 
33887 	      arm_initialize_isa (isa2, ext2->isa_bits);
33888 	      /* If the option is a subset of the parent option, it doesn't
33889 		 add anything and so isn't useful.  */
33890 	      ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
33891 
33892 	      /* If the extension specifies any architectural bits then
33893 		 disallow it.  Extensions should only specify feature bits.  */
33894 	      ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
33895 	    }
33896 	}
33897     }
33898 
33899   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
33900     {
33901       const cpu_arch_extension *ext1, *ext2;
33902 
33903       if (cpu->common.extensions == NULL)
33904 	continue;
33905 
33906       arm_initialize_isa (target_isa, arch->common.isa_bits);
33907 
33908       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
33909 	{
33910 	  if (ext1->alias)
33911 	    continue;
33912 
33913 	  arm_initialize_isa (isa1, ext1->isa_bits);
33914 	  for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
33915 	    {
33916 	      if (ext2->alias || ext1->remove != ext2->remove)
33917 		continue;
33918 
33919 	      arm_initialize_isa (isa2, ext2->isa_bits);
33920 	      /* If the option is a subset of the parent option, it doesn't
33921 		 add anything and so isn't useful.  */
33922 	      ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
33923 
33924 	      /* If the extension specifies any architectural bits then
33925 		 disallow it.  Extensions should only specify feature bits.  */
33926 	      ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
33927 	    }
33928 	}
33929     }
33930 }
33931 
33932 /* Scan the static data tables generated by parsecpu.awk looking for
33933    potential issues with the data.  Here we check for consistency between the
33934    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
33935    a feature bit that is not defined by any FPU flag.  */
33936 static void
arm_test_fpu_data(void)33937 arm_test_fpu_data (void)
33938 {
33939   auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
33940   auto_sbitmap fpubits (isa_num_bits);
33941   auto_sbitmap tmpset (isa_num_bits);
33942 
33943   static const enum isa_feature fpu_bitlist_internal[]
33944     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
33945   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
33946 
33947   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33948   {
33949     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
33950     bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
33951     bitmap_clear (isa_all_fpubits_internal);
33952     bitmap_copy (isa_all_fpubits_internal, tmpset);
33953   }
33954 
33955   if (!bitmap_empty_p (isa_all_fpubits_internal))
33956     {
33957 	fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
33958 			 " group that are not defined by any FPU.\n"
33959 			 "       Check your arm-cpus.in.\n");
33960 	ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
33961     }
33962 }
33963 
33964 static void
arm_run_selftests(void)33965 arm_run_selftests (void)
33966 {
33967   arm_test_cpu_arch_data ();
33968   arm_test_fpu_data ();
33969 }
33970 } /* Namespace selftest.  */
33971 
33972 #undef TARGET_RUN_TARGET_SELFTESTS
33973 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
33974 #endif /* CHECKING_P */
33975 
33976 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
33977    Unlike the arm version, we do NOT implement asm flag outputs.  */
33978 
33979 rtx_insn *
thumb1_md_asm_adjust(vec<rtx> & outputs,vec<rtx> &,vec<machine_mode> &,vec<const char * > & constraints,vec<rtx> &,HARD_REG_SET &)33980 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
33981 		      vec<machine_mode> & /*input_modes*/,
33982 		      vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
33983 		      HARD_REG_SET & /*clobbered_regs*/)
33984 {
33985   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
33986     if (strncmp (constraints[i], "=@cc", 4) == 0)
33987       {
33988 	sorry ("asm flags not supported in thumb1 mode");
33989 	break;
33990       }
33991   return NULL;
33992 }
33993 
33994 /* Generate code to enable conditional branches in functions over 1 MiB.
33995    Parameters are:
33996      operands: is the operands list of the asm insn (see arm_cond_branch or
33997        arm_cond_branch_reversed).
33998      pos_label: is an index into the operands array where operands[pos_label] is
33999        the asm label of the final jump destination.
34000      dest: is a string which is used to generate the asm label of the intermediate
34001        destination
34002    branch_format: is a string denoting the intermediate branch format, e.g.
34003      "beq", "bne", etc.  */
34004 
34005 const char *
arm_gen_far_branch(rtx * operands,int pos_label,const char * dest,const char * branch_format)34006 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34007 		    const char * branch_format)
34008 {
34009   rtx_code_label * tmp_label = gen_label_rtx ();
34010   char label_buf[256];
34011   char buffer[128];
34012   ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34013 			CODE_LABEL_NUMBER (tmp_label));
34014   const char *label_ptr = arm_strip_name_encoding (label_buf);
34015   rtx dest_label = operands[pos_label];
34016   operands[pos_label] = tmp_label;
34017 
34018   snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34019   output_asm_insn (buffer, operands);
34020 
34021   snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34022   operands[pos_label] = dest_label;
34023   output_asm_insn (buffer, operands);
34024   return "";
34025 }
34026 
34027 /* If given mode matches, load from memory to LO_REGS.
34028    (i.e [Rn], Rn <= LO_REGS).  */
34029 enum reg_class
arm_mode_base_reg_class(machine_mode mode)34030 arm_mode_base_reg_class (machine_mode mode)
34031 {
34032   if (TARGET_HAVE_MVE
34033       && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34034     return LO_REGS;
34035 
34036   return MODE_BASE_REG_REG_CLASS (mode);
34037 }
34038 
34039 struct gcc_target targetm = TARGET_INITIALIZER;
34040 
34041 #include "gt-arm.h"
34042