1 /* Output routines for GCC for ARM.
2    Copyright (C) 1991-2020 Free Software Foundation, Inc.
3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4    and Martin Simmons (@harleqn.co.uk).
5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
6 
7    This file is part of GCC.
8 
9    GCC is free software; you can redistribute it and/or modify it
10    under the terms of the GNU General Public License as published
11    by the Free Software Foundation; either version 3, or (at your
12    option) any later version.
13 
14    GCC is distributed in the hope that it will be useful, but WITHOUT
15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
17    License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with GCC; see the file COPYING3.  If not see
21    <http://www.gnu.org/licenses/>.  */
22 
23 #define IN_TARGET_CODE 1
24 
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "opts.h"
63 #include "dumpfile.h"
64 #include "target-globals.h"
65 #include "builtins.h"
66 #include "tm-constrs.h"
67 #include "rtl-iter.h"
68 #include "optabs-libfuncs.h"
69 #include "gimplify.h"
70 #include "gimple.h"
71 #include "selftest.h"
72 
73 /* This file should be included last.  */
74 #include "target-def.h"
75 
76 /* Forward definitions of types.  */
77 typedef struct minipool_node    Mnode;
78 typedef struct minipool_fixup   Mfix;
79 
80 void (*arm_lang_output_object_attributes_hook)(void);
81 
82 struct four_ints
83 {
84   int i[4];
85 };
86 
87 /* Forward function declarations.  */
88 static bool arm_const_not_ok_for_debug_p (rtx);
89 static int arm_needs_doubleword_align (machine_mode, const_tree);
90 static int arm_compute_static_chain_stack_bytes (void);
91 static arm_stack_offsets *arm_get_frame_offsets (void);
92 static void arm_compute_frame_layout (void);
93 static void arm_add_gc_roots (void);
94 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
95 			     unsigned HOST_WIDE_INT, rtx, rtx, int, int);
96 static unsigned bit_count (unsigned long);
97 static unsigned bitmap_popcount (const sbitmap);
98 static int arm_address_register_rtx_p (rtx, int);
99 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
100 static bool is_called_in_ARM_mode (tree);
101 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
102 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
103 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
104 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
105 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
106 inline static int thumb1_index_register_rtx_p (rtx, int);
107 static int thumb_far_jump_used_p (void);
108 static bool thumb_force_lr_save (void);
109 static unsigned arm_size_return_regs (void);
110 static bool arm_assemble_integer (rtx, unsigned int, int);
111 static void arm_print_operand (FILE *, rtx, int);
112 static void arm_print_operand_address (FILE *, machine_mode, rtx);
113 static bool arm_print_operand_punct_valid_p (unsigned char code);
114 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
115 static arm_cc get_arm_condition_code (rtx);
116 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
117 static const char *output_multi_immediate (rtx *, const char *, const char *,
118 					   int, HOST_WIDE_INT);
119 static const char *shift_op (rtx, HOST_WIDE_INT *);
120 static struct machine_function *arm_init_machine_status (void);
121 static void thumb_exit (FILE *, int);
122 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
123 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
124 static Mnode *add_minipool_forward_ref (Mfix *);
125 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
126 static Mnode *add_minipool_backward_ref (Mfix *);
127 static void assign_minipool_offsets (Mfix *);
128 static void arm_print_value (FILE *, rtx);
129 static void dump_minipool (rtx_insn *);
130 static int arm_barrier_cost (rtx_insn *);
131 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
132 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
133 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
134 			       machine_mode, rtx);
135 static void arm_reorg (void);
136 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
137 static unsigned long arm_compute_save_reg0_reg12_mask (void);
138 static unsigned long arm_compute_save_core_reg_mask (void);
139 static unsigned long arm_isr_value (tree);
140 static unsigned long arm_compute_func_type (void);
141 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
142 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
143 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
144 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
145 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
146 #endif
147 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
148 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
149 static void arm_output_function_epilogue (FILE *);
150 static void arm_output_function_prologue (FILE *);
151 static int arm_comp_type_attributes (const_tree, const_tree);
152 static void arm_set_default_type_attributes (tree);
153 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
154 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
155 static int optimal_immediate_sequence (enum rtx_code code,
156 				       unsigned HOST_WIDE_INT val,
157 				       struct four_ints *return_sequence);
158 static int optimal_immediate_sequence_1 (enum rtx_code code,
159 					 unsigned HOST_WIDE_INT val,
160 					 struct four_ints *return_sequence,
161 					 int i);
162 static int arm_get_strip_length (int);
163 static bool arm_function_ok_for_sibcall (tree, tree);
164 static machine_mode arm_promote_function_mode (const_tree,
165 						    machine_mode, int *,
166 						    const_tree, int);
167 static bool arm_return_in_memory (const_tree, const_tree);
168 static rtx arm_function_value (const_tree, const_tree, bool);
169 static rtx arm_libcall_value_1 (machine_mode);
170 static rtx arm_libcall_value (machine_mode, const_rtx);
171 static bool arm_function_value_regno_p (const unsigned int);
172 static void arm_internal_label (FILE *, const char *, unsigned long);
173 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
174 				 tree);
175 static bool arm_have_conditional_execution (void);
176 static bool arm_cannot_force_const_mem (machine_mode, rtx);
177 static bool arm_legitimate_constant_p (machine_mode, rtx);
178 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
179 static int arm_insn_cost (rtx_insn *, bool);
180 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
181 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
182 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
183 static void emit_constant_insn (rtx cond, rtx pattern);
184 static rtx_insn *emit_set_insn (rtx, rtx);
185 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
186 static rtx emit_multi_reg_push (unsigned long, unsigned long);
187 static void arm_emit_multi_reg_pop (unsigned long);
188 static int vfp_emit_fstmd (int, int);
189 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
190 static int arm_arg_partial_bytes (cumulative_args_t,
191 				  const function_arg_info &);
192 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
193 static void arm_function_arg_advance (cumulative_args_t,
194 				      const function_arg_info &);
195 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
196 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
197 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
198 				      const_tree);
199 static rtx aapcs_libcall_value (machine_mode);
200 static int aapcs_select_return_coproc (const_tree, const_tree);
201 
202 #ifdef OBJECT_FORMAT_ELF
203 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
204 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
205 #endif
206 #ifndef ARM_PE
207 static void arm_encode_section_info (tree, rtx, int);
208 #endif
209 
210 static void arm_file_end (void);
211 static void arm_file_start (void);
212 static void arm_insert_attributes (tree, tree *);
213 
214 static void arm_setup_incoming_varargs (cumulative_args_t,
215 					const function_arg_info &, int *, int);
216 static bool arm_pass_by_reference (cumulative_args_t,
217 				   const function_arg_info &);
218 static bool arm_promote_prototypes (const_tree);
219 static bool arm_default_short_enums (void);
220 static bool arm_align_anon_bitfield (void);
221 static bool arm_return_in_msb (const_tree);
222 static bool arm_must_pass_in_stack (const function_arg_info &);
223 static bool arm_return_in_memory (const_tree, const_tree);
224 #if ARM_UNWIND_INFO
225 static void arm_unwind_emit (FILE *, rtx_insn *);
226 static bool arm_output_ttype (rtx);
227 static void arm_asm_emit_except_personality (rtx);
228 #endif
229 static void arm_asm_init_sections (void);
230 static rtx arm_dwarf_register_span (rtx);
231 
232 static tree arm_cxx_guard_type (void);
233 static bool arm_cxx_guard_mask_bit (void);
234 static tree arm_get_cookie_size (tree);
235 static bool arm_cookie_has_size (void);
236 static bool arm_cxx_cdtor_returns_this (void);
237 static bool arm_cxx_key_method_may_be_inline (void);
238 static void arm_cxx_determine_class_data_visibility (tree);
239 static bool arm_cxx_class_data_always_comdat (void);
240 static bool arm_cxx_use_aeabi_atexit (void);
241 static void arm_init_libfuncs (void);
242 static tree arm_build_builtin_va_list (void);
243 static void arm_expand_builtin_va_start (tree, rtx);
244 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
245 static void arm_option_override (void);
246 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
247 static void arm_option_restore (struct gcc_options *,
248 				struct cl_target_option *);
249 static void arm_override_options_after_change (void);
250 static void arm_option_print (FILE *, int, struct cl_target_option *);
251 static void arm_set_current_function (tree);
252 static bool arm_can_inline_p (tree, tree);
253 static void arm_relayout_function (tree);
254 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
255 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
256 static bool arm_sched_can_speculate_insn (rtx_insn *);
257 static bool arm_macro_fusion_p (void);
258 static bool arm_cannot_copy_insn_p (rtx_insn *);
259 static int arm_issue_rate (void);
260 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
264 static bool arm_output_addr_const_extra (FILE *, rtx);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree);
267 static tree arm_promoted_type (const_tree t);
268 static bool arm_scalar_mode_supported_p (scalar_mode);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx, tree, rtx);
273 static rtx arm_trampoline_adjust_address (rtx);
274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool arm_array_mode_supported_p (machine_mode,
279 					unsigned HOST_WIDE_INT);
280 static machine_mode arm_preferred_simd_mode (scalar_mode);
281 static bool arm_class_likely_spilled_p (reg_class_t);
282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
285 						     const_tree type,
286 						     int misalignment,
287 						     bool is_packed);
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
291 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
296 
297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
298 					  const vec_perm_indices &);
299 
300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
301 
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 					   tree vectype,
304 					   int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 				   enum vect_cost_for_stmt kind,
307 				   struct _stmt_vec_info *stmt_info,
308 				   int misalign,
309 				   enum vect_cost_model_location where);
310 
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 					 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314 
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 				     const_tree);
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 						int reloc);
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 static rtx_insn * thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
329 					vec<const char *> &, vec<rtx> &,
330 					HARD_REG_SET &);
331 static const char *arm_identify_fpu_from_isa (sbitmap);
332 
333 /* Table of machine attributes.  */
334 static const struct attribute_spec arm_attribute_table[] =
335 {
336   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
337        affects_type_identity, handler, exclude } */
338   /* Function calls made to this symbol must be done indirectly, because
339      it may lie outside of the 26 bit addressing range of a normal function
340      call.  */
341   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
342   /* Whereas these functions are always known to reside within the 26 bit
343      addressing range.  */
344   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
345   /* Specify the procedure call conventions for a function.  */
346   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
347     NULL },
348   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
349   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
350     NULL },
351   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
352     NULL },
353   { "naked",        0, 0, true,  false, false, false,
354     arm_handle_fndecl_attribute, NULL },
355 #ifdef ARM_PE
356   /* ARM/PE has three new attributes:
357      interfacearm - ?
358      dllexport - for exporting a function/variable that will live in a dll
359      dllimport - for importing a function/variable from a dll
360 
361      Microsoft allows multiple declspecs in one __declspec, separating
362      them with spaces.  We do NOT support this.  Instead, use __declspec
363      multiple times.
364   */
365   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
366   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
367   { "interfacearm", 0, 0, true,  false, false, false,
368     arm_handle_fndecl_attribute, NULL },
369 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
370   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
371     NULL },
372   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
373     NULL },
374   { "notshared",    0, 0, false, true, false, false,
375     arm_handle_notshared_attribute, NULL },
376 #endif
377   /* ARMv8-M Security Extensions support.  */
378   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
379     arm_handle_cmse_nonsecure_entry, NULL },
380   { "cmse_nonsecure_call", 0, 0, true, false, false, true,
381     arm_handle_cmse_nonsecure_call, NULL },
382   { "Advanced SIMD type", 0, 0, false, true, false, true, NULL, NULL },
383   { NULL, 0, 0, false, false, false, false, NULL, NULL }
384 };
385 
386 /* Initialize the GCC target structure.  */
387 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
388 #undef  TARGET_MERGE_DECL_ATTRIBUTES
389 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
390 #endif
391 
392 #undef TARGET_CHECK_BUILTIN_CALL
393 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
394 
395 #undef TARGET_LEGITIMIZE_ADDRESS
396 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
397 
398 #undef  TARGET_ATTRIBUTE_TABLE
399 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
400 
401 #undef  TARGET_INSERT_ATTRIBUTES
402 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
403 
404 #undef TARGET_ASM_FILE_START
405 #define TARGET_ASM_FILE_START arm_file_start
406 #undef TARGET_ASM_FILE_END
407 #define TARGET_ASM_FILE_END arm_file_end
408 
409 #undef  TARGET_ASM_ALIGNED_SI_OP
410 #define TARGET_ASM_ALIGNED_SI_OP NULL
411 #undef  TARGET_ASM_INTEGER
412 #define TARGET_ASM_INTEGER arm_assemble_integer
413 
414 #undef TARGET_PRINT_OPERAND
415 #define TARGET_PRINT_OPERAND arm_print_operand
416 #undef TARGET_PRINT_OPERAND_ADDRESS
417 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
418 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
419 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
420 
421 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
422 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
423 
424 #undef  TARGET_ASM_FUNCTION_PROLOGUE
425 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
426 
427 #undef  TARGET_ASM_FUNCTION_EPILOGUE
428 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
429 
430 #undef TARGET_CAN_INLINE_P
431 #define TARGET_CAN_INLINE_P arm_can_inline_p
432 
433 #undef TARGET_RELAYOUT_FUNCTION
434 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
435 
436 #undef  TARGET_OPTION_OVERRIDE
437 #define TARGET_OPTION_OVERRIDE arm_option_override
438 
439 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
440 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
441 
442 #undef TARGET_OPTION_SAVE
443 #define TARGET_OPTION_SAVE arm_option_save
444 
445 #undef TARGET_OPTION_RESTORE
446 #define TARGET_OPTION_RESTORE arm_option_restore
447 
448 #undef TARGET_OPTION_PRINT
449 #define TARGET_OPTION_PRINT arm_option_print
450 
451 #undef  TARGET_COMP_TYPE_ATTRIBUTES
452 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
453 
454 #undef TARGET_SCHED_CAN_SPECULATE_INSN
455 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
456 
457 #undef TARGET_SCHED_MACRO_FUSION_P
458 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
459 
460 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
461 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
462 
463 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
464 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
465 
466 #undef  TARGET_SCHED_ADJUST_COST
467 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
468 
469 #undef TARGET_SET_CURRENT_FUNCTION
470 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
471 
472 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
473 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
474 
475 #undef TARGET_SCHED_REORDER
476 #define TARGET_SCHED_REORDER arm_sched_reorder
477 
478 #undef TARGET_REGISTER_MOVE_COST
479 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
480 
481 #undef TARGET_MEMORY_MOVE_COST
482 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
483 
484 #undef TARGET_ENCODE_SECTION_INFO
485 #ifdef ARM_PE
486 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
487 #else
488 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
489 #endif
490 
491 #undef  TARGET_STRIP_NAME_ENCODING
492 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
493 
494 #undef  TARGET_ASM_INTERNAL_LABEL
495 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
496 
497 #undef TARGET_FLOATN_MODE
498 #define TARGET_FLOATN_MODE arm_floatn_mode
499 
500 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
501 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
502 
503 #undef  TARGET_FUNCTION_VALUE
504 #define TARGET_FUNCTION_VALUE arm_function_value
505 
506 #undef  TARGET_LIBCALL_VALUE
507 #define TARGET_LIBCALL_VALUE arm_libcall_value
508 
509 #undef TARGET_FUNCTION_VALUE_REGNO_P
510 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
511 
512 #undef  TARGET_ASM_OUTPUT_MI_THUNK
513 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
514 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
515 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
516 
517 #undef  TARGET_RTX_COSTS
518 #define TARGET_RTX_COSTS arm_rtx_costs
519 #undef  TARGET_ADDRESS_COST
520 #define TARGET_ADDRESS_COST arm_address_cost
521 #undef TARGET_INSN_COST
522 #define TARGET_INSN_COST arm_insn_cost
523 
524 #undef TARGET_SHIFT_TRUNCATION_MASK
525 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
526 #undef TARGET_VECTOR_MODE_SUPPORTED_P
527 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
528 #undef TARGET_ARRAY_MODE_SUPPORTED_P
529 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
530 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
531 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
532 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
533 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
534   arm_autovectorize_vector_modes
535 
536 #undef  TARGET_MACHINE_DEPENDENT_REORG
537 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
538 
539 #undef  TARGET_INIT_BUILTINS
540 #define TARGET_INIT_BUILTINS  arm_init_builtins
541 #undef  TARGET_EXPAND_BUILTIN
542 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
543 #undef  TARGET_BUILTIN_DECL
544 #define TARGET_BUILTIN_DECL arm_builtin_decl
545 
546 #undef TARGET_INIT_LIBFUNCS
547 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
548 
549 #undef TARGET_PROMOTE_FUNCTION_MODE
550 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
551 #undef TARGET_PROMOTE_PROTOTYPES
552 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
553 #undef TARGET_PASS_BY_REFERENCE
554 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
555 #undef TARGET_ARG_PARTIAL_BYTES
556 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
557 #undef TARGET_FUNCTION_ARG
558 #define TARGET_FUNCTION_ARG arm_function_arg
559 #undef TARGET_FUNCTION_ARG_ADVANCE
560 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
561 #undef TARGET_FUNCTION_ARG_PADDING
562 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
563 #undef TARGET_FUNCTION_ARG_BOUNDARY
564 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
565 
566 #undef  TARGET_SETUP_INCOMING_VARARGS
567 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
568 
569 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
570 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
571 
572 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
573 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
574 #undef TARGET_TRAMPOLINE_INIT
575 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
576 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
577 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
578 
579 #undef TARGET_WARN_FUNC_RETURN
580 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
581 
582 #undef TARGET_DEFAULT_SHORT_ENUMS
583 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
584 
585 #undef TARGET_ALIGN_ANON_BITFIELD
586 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
587 
588 #undef TARGET_NARROW_VOLATILE_BITFIELD
589 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
590 
591 #undef TARGET_CXX_GUARD_TYPE
592 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
593 
594 #undef TARGET_CXX_GUARD_MASK_BIT
595 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
596 
597 #undef TARGET_CXX_GET_COOKIE_SIZE
598 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
599 
600 #undef TARGET_CXX_COOKIE_HAS_SIZE
601 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
602 
603 #undef TARGET_CXX_CDTOR_RETURNS_THIS
604 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
605 
606 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
607 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
608 
609 #undef TARGET_CXX_USE_AEABI_ATEXIT
610 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
611 
612 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
613 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
614   arm_cxx_determine_class_data_visibility
615 
616 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
617 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
618 
619 #undef TARGET_RETURN_IN_MSB
620 #define TARGET_RETURN_IN_MSB arm_return_in_msb
621 
622 #undef TARGET_RETURN_IN_MEMORY
623 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
624 
625 #undef TARGET_MUST_PASS_IN_STACK
626 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
627 
628 #if ARM_UNWIND_INFO
629 #undef TARGET_ASM_UNWIND_EMIT
630 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
631 
632 /* EABI unwinding tables use a different format for the typeinfo tables.  */
633 #undef TARGET_ASM_TTYPE
634 #define TARGET_ASM_TTYPE arm_output_ttype
635 
636 #undef TARGET_ARM_EABI_UNWINDER
637 #define TARGET_ARM_EABI_UNWINDER true
638 
639 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
640 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
641 
642 #endif /* ARM_UNWIND_INFO */
643 
644 #undef TARGET_ASM_INIT_SECTIONS
645 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
646 
647 #undef TARGET_DWARF_REGISTER_SPAN
648 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
649 
650 #undef  TARGET_CANNOT_COPY_INSN_P
651 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
652 
653 #ifdef HAVE_AS_TLS
654 #undef TARGET_HAVE_TLS
655 #define TARGET_HAVE_TLS true
656 #endif
657 
658 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
659 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
660 
661 #undef TARGET_LEGITIMATE_CONSTANT_P
662 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
663 
664 #undef TARGET_CANNOT_FORCE_CONST_MEM
665 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
666 
667 #undef TARGET_MAX_ANCHOR_OFFSET
668 #define TARGET_MAX_ANCHOR_OFFSET 4095
669 
670 /* The minimum is set such that the total size of the block
671    for a particular anchor is -4088 + 1 + 4095 bytes, which is
672    divisible by eight, ensuring natural spacing of anchors.  */
673 #undef TARGET_MIN_ANCHOR_OFFSET
674 #define TARGET_MIN_ANCHOR_OFFSET -4088
675 
676 #undef TARGET_SCHED_ISSUE_RATE
677 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
678 
679 #undef TARGET_SCHED_VARIABLE_ISSUE
680 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
681 
682 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
683 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
684   arm_first_cycle_multipass_dfa_lookahead
685 
686 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
687 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
688   arm_first_cycle_multipass_dfa_lookahead_guard
689 
690 #undef TARGET_MANGLE_TYPE
691 #define TARGET_MANGLE_TYPE arm_mangle_type
692 
693 #undef TARGET_INVALID_CONVERSION
694 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
695 
696 #undef TARGET_INVALID_UNARY_OP
697 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
698 
699 #undef TARGET_INVALID_BINARY_OP
700 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
701 
702 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
703 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
704 
705 #undef TARGET_BUILD_BUILTIN_VA_LIST
706 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
707 #undef TARGET_EXPAND_BUILTIN_VA_START
708 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
709 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
710 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
711 
712 #ifdef HAVE_AS_TLS
713 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
714 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
715 #endif
716 
717 #undef TARGET_LEGITIMATE_ADDRESS_P
718 #define TARGET_LEGITIMATE_ADDRESS_P	arm_legitimate_address_p
719 
720 #undef TARGET_PREFERRED_RELOAD_CLASS
721 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
722 
723 #undef TARGET_PROMOTED_TYPE
724 #define TARGET_PROMOTED_TYPE arm_promoted_type
725 
726 #undef TARGET_SCALAR_MODE_SUPPORTED_P
727 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
728 
729 #undef TARGET_COMPUTE_FRAME_LAYOUT
730 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
731 
732 #undef TARGET_FRAME_POINTER_REQUIRED
733 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
734 
735 #undef TARGET_CAN_ELIMINATE
736 #define TARGET_CAN_ELIMINATE arm_can_eliminate
737 
738 #undef TARGET_CONDITIONAL_REGISTER_USAGE
739 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
740 
741 #undef TARGET_CLASS_LIKELY_SPILLED_P
742 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
743 
744 #undef TARGET_VECTORIZE_BUILTINS
745 #define TARGET_VECTORIZE_BUILTINS
746 
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
749   arm_builtin_vectorized_function
750 
751 #undef TARGET_VECTOR_ALIGNMENT
752 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
753 
754 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
755 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
756   arm_vector_alignment_reachable
757 
758 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
759 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
760   arm_builtin_support_vector_misalignment
761 
762 #undef TARGET_PREFERRED_RENAME_CLASS
763 #define TARGET_PREFERRED_RENAME_CLASS \
764   arm_preferred_rename_class
765 
766 #undef TARGET_VECTORIZE_VEC_PERM_CONST
767 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
768 
769 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
770 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
771   arm_builtin_vectorization_cost
772 #undef TARGET_VECTORIZE_ADD_STMT_COST
773 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
774 
775 #undef TARGET_CANONICALIZE_COMPARISON
776 #define TARGET_CANONICALIZE_COMPARISON \
777   arm_canonicalize_comparison
778 
779 #undef TARGET_ASAN_SHADOW_OFFSET
780 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
781 
782 #undef MAX_INSN_PER_IT_BLOCK
783 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
784 
785 #undef TARGET_CAN_USE_DOLOOP_P
786 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
787 
788 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
789 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
790 
791 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
792 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
793 
794 #undef TARGET_SCHED_FUSION_PRIORITY
795 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
796 
797 #undef  TARGET_ASM_FUNCTION_SECTION
798 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
799 
800 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
801 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
802 
803 #undef TARGET_SECTION_TYPE_FLAGS
804 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
805 
806 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
807 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
808 
809 #undef TARGET_C_EXCESS_PRECISION
810 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
811 
812 /* Although the architecture reserves bits 0 and 1, only the former is
813    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
814 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
815 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
816 
817 #undef TARGET_FIXED_CONDITION_CODE_REGS
818 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
819 
820 #undef TARGET_HARD_REGNO_NREGS
821 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
822 #undef TARGET_HARD_REGNO_MODE_OK
823 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
824 
825 #undef TARGET_MODES_TIEABLE_P
826 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
827 
828 #undef TARGET_CAN_CHANGE_MODE_CLASS
829 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
830 
831 #undef TARGET_CONSTANT_ALIGNMENT
832 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
833 
834 #undef TARGET_MD_ASM_ADJUST
835 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
836 
837 /* Obstack for minipool constant handling.  */
838 static struct obstack minipool_obstack;
839 static char *         minipool_startobj;
840 
841 /* The maximum number of insns skipped which
842    will be conditionalised if possible.  */
843 static int max_insns_skipped = 5;
844 
845 extern FILE * asm_out_file;
846 
847 /* True if we are currently building a constant table.  */
848 int making_const_table;
849 
850 /* The processor for which instructions should be scheduled.  */
851 enum processor_type arm_tune = TARGET_CPU_arm_none;
852 
853 /* The current tuning set.  */
854 const struct tune_params *current_tune;
855 
856 /* Which floating point hardware to schedule for.  */
857 int arm_fpu_attr;
858 
859 /* Used for Thumb call_via trampolines.  */
860 rtx thumb_call_via_label[14];
861 static int thumb_call_reg_needed;
862 
863 /* The bits in this mask specify which instruction scheduling options should
864    be used.  */
865 unsigned int tune_flags = 0;
866 
867 /* The highest ARM architecture version supported by the
868    target.  */
869 enum base_architecture arm_base_arch = BASE_ARCH_0;
870 
871 /* Active target architecture and tuning.  */
872 
873 struct arm_build_target arm_active_target;
874 
875 /* The following are used in the arm.md file as equivalents to bits
876    in the above two flag variables.  */
877 
878 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
879 int arm_arch4 = 0;
880 
881 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
882 int arm_arch4t = 0;
883 
884 /* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
885 int arm_arch5t = 0;
886 
887 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
888 int arm_arch5te = 0;
889 
890 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
891 int arm_arch6 = 0;
892 
893 /* Nonzero if this chip supports the ARM 6K extensions.  */
894 int arm_arch6k = 0;
895 
896 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
897 int arm_arch6kz = 0;
898 
899 /* Nonzero if instructions present in ARMv6-M can be used.  */
900 int arm_arch6m = 0;
901 
902 /* Nonzero if this chip supports the ARM 7 extensions.  */
903 int arm_arch7 = 0;
904 
905 /* Nonzero if this chip supports the Large Physical Address Extension.  */
906 int arm_arch_lpae = 0;
907 
908 /* Nonzero if instructions not present in the 'M' profile can be used.  */
909 int arm_arch_notm = 0;
910 
911 /* Nonzero if instructions present in ARMv7E-M can be used.  */
912 int arm_arch7em = 0;
913 
914 /* Nonzero if instructions present in ARMv8 can be used.  */
915 int arm_arch8 = 0;
916 
917 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
918 int arm_arch8_1 = 0;
919 
920 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
921 int arm_arch8_2 = 0;
922 
923 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
924 int arm_arch8_3 = 0;
925 
926 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
927 int arm_arch8_4 = 0;
928 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
929    extensions.  */
930 int arm_arch8_1m_main = 0;
931 
932 /* Nonzero if this chip supports the FP16 instructions extension of ARM
933    Architecture 8.2.  */
934 int arm_fp16_inst = 0;
935 
936 /* Nonzero if this chip can benefit from load scheduling.  */
937 int arm_ld_sched = 0;
938 
939 /* Nonzero if this chip is a StrongARM.  */
940 int arm_tune_strongarm = 0;
941 
942 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
943 int arm_arch_iwmmxt = 0;
944 
945 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
946 int arm_arch_iwmmxt2 = 0;
947 
948 /* Nonzero if this chip is an XScale.  */
949 int arm_arch_xscale = 0;
950 
951 /* Nonzero if tuning for XScale  */
952 int arm_tune_xscale = 0;
953 
954 /* Nonzero if we want to tune for stores that access the write-buffer.
955    This typically means an ARM6 or ARM7 with MMU or MPU.  */
956 int arm_tune_wbuf = 0;
957 
958 /* Nonzero if tuning for Cortex-A9.  */
959 int arm_tune_cortex_a9 = 0;
960 
961 /* Nonzero if we should define __THUMB_INTERWORK__ in the
962    preprocessor.
963    XXX This is a bit of a hack, it's intended to help work around
964    problems in GLD which doesn't understand that armv5t code is
965    interworking clean.  */
966 int arm_cpp_interwork = 0;
967 
968 /* Nonzero if chip supports Thumb 1.  */
969 int arm_arch_thumb1;
970 
971 /* Nonzero if chip supports Thumb 2.  */
972 int arm_arch_thumb2;
973 
974 /* Nonzero if chip supports integer division instruction.  */
975 int arm_arch_arm_hwdiv;
976 int arm_arch_thumb_hwdiv;
977 
978 /* Nonzero if chip disallows volatile memory access in IT block.  */
979 int arm_arch_no_volatile_ce;
980 
981 /* Nonzero if we shouldn't use literal pools.  */
982 bool arm_disable_literal_pool = false;
983 
984 /* The register number to be used for the PIC offset register.  */
985 unsigned arm_pic_register = INVALID_REGNUM;
986 
987 enum arm_pcs arm_pcs_default;
988 
989 /* For an explanation of these variables, see final_prescan_insn below.  */
990 int arm_ccfsm_state;
991 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
992 enum arm_cond_code arm_current_cc;
993 
994 rtx arm_target_insn;
995 int arm_target_label;
996 /* The number of conditionally executed insns, including the current insn.  */
997 int arm_condexec_count = 0;
998 /* A bitmask specifying the patterns for the IT block.
999    Zero means do not output an IT block before this insn. */
1000 int arm_condexec_mask = 0;
1001 /* The number of bits used in arm_condexec_mask.  */
1002 int arm_condexec_masklen = 0;
1003 
1004 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
1005 int arm_arch_crc = 0;
1006 
1007 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
1008 int arm_arch_dotprod = 0;
1009 
1010 /* Nonzero if chip supports the ARMv8-M security extensions.  */
1011 int arm_arch_cmse = 0;
1012 
1013 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
1014 int arm_m_profile_small_mul = 0;
1015 
1016 /* Nonzero if chip supports the AdvSIMD I8MM instructions.  */
1017 int arm_arch_i8mm = 0;
1018 
1019 /* Nonzero if chip supports the BFloat16 instructions.  */
1020 int arm_arch_bf16 = 0;
1021 
1022 /* Nonzero if chip supports the Custom Datapath Extension.  */
1023 int arm_arch_cde = 0;
1024 int arm_arch_cde_coproc = 0;
1025 const int arm_arch_cde_coproc_bits[] = {
1026   0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1027 };
1028 
1029 /* The condition codes of the ARM, and the inverse function.  */
1030 static const char * const arm_condition_codes[] =
1031 {
1032   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1033   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1034 };
1035 
1036 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1037 int arm_regs_in_sequence[] =
1038 {
1039   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1040 };
1041 
1042 #define DEF_FP_SYSREG(reg) #reg,
1043 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1044   FP_SYSREGS
1045 };
1046 #undef DEF_FP_SYSREG
1047 
1048 #define ARM_LSL_NAME "lsl"
1049 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1050 
1051 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1052 				   | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1053 				   | (1 << PIC_OFFSET_TABLE_REGNUM)))
1054 
1055 /* Initialization code.  */
1056 
1057 struct cpu_tune
1058 {
1059   enum processor_type scheduler;
1060   unsigned int tune_flags;
1061   const struct tune_params *tune;
1062 };
1063 
1064 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1065 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1066   {								\
1067     num_slots,							\
1068     l1_size,							\
1069     l1_line_size						\
1070   }
1071 
1072 /* arm generic vectorizer costs.  */
1073 static const
1074 struct cpu_vec_costs arm_default_vec_cost = {
1075   1,					/* scalar_stmt_cost.  */
1076   1,					/* scalar load_cost.  */
1077   1,					/* scalar_store_cost.  */
1078   1,					/* vec_stmt_cost.  */
1079   1,					/* vec_to_scalar_cost.  */
1080   1,					/* scalar_to_vec_cost.  */
1081   1,					/* vec_align_load_cost.  */
1082   1,					/* vec_unalign_load_cost.  */
1083   1,					/* vec_unalign_store_cost.  */
1084   1,					/* vec_store_cost.  */
1085   3,					/* cond_taken_branch_cost.  */
1086   1,					/* cond_not_taken_branch_cost.  */
1087 };
1088 
1089 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1090 #include "aarch-cost-tables.h"
1091 
1092 
1093 
1094 const struct cpu_cost_table cortexa9_extra_costs =
1095 {
1096   /* ALU */
1097   {
1098     0,			/* arith.  */
1099     0,			/* logical.  */
1100     0,			/* shift.  */
1101     COSTS_N_INSNS (1),	/* shift_reg.  */
1102     COSTS_N_INSNS (1),	/* arith_shift.  */
1103     COSTS_N_INSNS (2),	/* arith_shift_reg.  */
1104     0,			/* log_shift.  */
1105     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1106     COSTS_N_INSNS (1),	/* extend.  */
1107     COSTS_N_INSNS (2),	/* extend_arith.  */
1108     COSTS_N_INSNS (1),	/* bfi.  */
1109     COSTS_N_INSNS (1),	/* bfx.  */
1110     0,			/* clz.  */
1111     0,			/* rev.  */
1112     0,			/* non_exec.  */
1113     true		/* non_exec_costs_exec.  */
1114   },
1115   {
1116     /* MULT SImode */
1117     {
1118       COSTS_N_INSNS (3),	/* simple.  */
1119       COSTS_N_INSNS (3),	/* flag_setting.  */
1120       COSTS_N_INSNS (2),	/* extend.  */
1121       COSTS_N_INSNS (3),	/* add.  */
1122       COSTS_N_INSNS (2),	/* extend_add.  */
1123       COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A9.  */
1124     },
1125     /* MULT DImode */
1126     {
1127       0,			/* simple (N/A).  */
1128       0,			/* flag_setting (N/A).  */
1129       COSTS_N_INSNS (4),	/* extend.  */
1130       0,			/* add (N/A).  */
1131       COSTS_N_INSNS (4),	/* extend_add.  */
1132       0				/* idiv (N/A).  */
1133     }
1134   },
1135   /* LD/ST */
1136   {
1137     COSTS_N_INSNS (2),	/* load.  */
1138     COSTS_N_INSNS (2),	/* load_sign_extend.  */
1139     COSTS_N_INSNS (2),	/* ldrd.  */
1140     COSTS_N_INSNS (2),	/* ldm_1st.  */
1141     1,			/* ldm_regs_per_insn_1st.  */
1142     2,			/* ldm_regs_per_insn_subsequent.  */
1143     COSTS_N_INSNS (5),	/* loadf.  */
1144     COSTS_N_INSNS (5),	/* loadd.  */
1145     COSTS_N_INSNS (1),  /* load_unaligned.  */
1146     COSTS_N_INSNS (2),	/* store.  */
1147     COSTS_N_INSNS (2),	/* strd.  */
1148     COSTS_N_INSNS (2),	/* stm_1st.  */
1149     1,			/* stm_regs_per_insn_1st.  */
1150     2,			/* stm_regs_per_insn_subsequent.  */
1151     COSTS_N_INSNS (1),	/* storef.  */
1152     COSTS_N_INSNS (1),	/* stored.  */
1153     COSTS_N_INSNS (1),	/* store_unaligned.  */
1154     COSTS_N_INSNS (1),	/* loadv.  */
1155     COSTS_N_INSNS (1)	/* storev.  */
1156   },
1157   {
1158     /* FP SFmode */
1159     {
1160       COSTS_N_INSNS (14),	/* div.  */
1161       COSTS_N_INSNS (4),	/* mult.  */
1162       COSTS_N_INSNS (7),	/* mult_addsub. */
1163       COSTS_N_INSNS (30),	/* fma.  */
1164       COSTS_N_INSNS (3),	/* addsub.  */
1165       COSTS_N_INSNS (1),	/* fpconst.  */
1166       COSTS_N_INSNS (1),	/* neg.  */
1167       COSTS_N_INSNS (3),	/* compare.  */
1168       COSTS_N_INSNS (3),	/* widen.  */
1169       COSTS_N_INSNS (3),	/* narrow.  */
1170       COSTS_N_INSNS (3),	/* toint.  */
1171       COSTS_N_INSNS (3),	/* fromint.  */
1172       COSTS_N_INSNS (3)		/* roundint.  */
1173     },
1174     /* FP DFmode */
1175     {
1176       COSTS_N_INSNS (24),	/* div.  */
1177       COSTS_N_INSNS (5),	/* mult.  */
1178       COSTS_N_INSNS (8),	/* mult_addsub.  */
1179       COSTS_N_INSNS (30),	/* fma.  */
1180       COSTS_N_INSNS (3),	/* addsub.  */
1181       COSTS_N_INSNS (1),	/* fpconst.  */
1182       COSTS_N_INSNS (1),	/* neg.  */
1183       COSTS_N_INSNS (3),	/* compare.  */
1184       COSTS_N_INSNS (3),	/* widen.  */
1185       COSTS_N_INSNS (3),	/* narrow.  */
1186       COSTS_N_INSNS (3),	/* toint.  */
1187       COSTS_N_INSNS (3),	/* fromint.  */
1188       COSTS_N_INSNS (3)		/* roundint.  */
1189     }
1190   },
1191   /* Vector */
1192   {
1193     COSTS_N_INSNS (1)	/* alu.  */
1194   }
1195 };
1196 
1197 const struct cpu_cost_table cortexa8_extra_costs =
1198 {
1199   /* ALU */
1200   {
1201     0,			/* arith.  */
1202     0,			/* logical.  */
1203     COSTS_N_INSNS (1),	/* shift.  */
1204     0,			/* shift_reg.  */
1205     COSTS_N_INSNS (1),	/* arith_shift.  */
1206     0,			/* arith_shift_reg.  */
1207     COSTS_N_INSNS (1),	/* log_shift.  */
1208     0,			/* log_shift_reg.  */
1209     0,			/* extend.  */
1210     0,			/* extend_arith.  */
1211     0,			/* bfi.  */
1212     0,			/* bfx.  */
1213     0,			/* clz.  */
1214     0,			/* rev.  */
1215     0,			/* non_exec.  */
1216     true		/* non_exec_costs_exec.  */
1217   },
1218   {
1219     /* MULT SImode */
1220     {
1221       COSTS_N_INSNS (1),	/* simple.  */
1222       COSTS_N_INSNS (1),	/* flag_setting.  */
1223       COSTS_N_INSNS (1),	/* extend.  */
1224       COSTS_N_INSNS (1),	/* add.  */
1225       COSTS_N_INSNS (1),	/* extend_add.  */
1226       COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A8.  */
1227     },
1228     /* MULT DImode */
1229     {
1230       0,			/* simple (N/A).  */
1231       0,			/* flag_setting (N/A).  */
1232       COSTS_N_INSNS (2),	/* extend.  */
1233       0,			/* add (N/A).  */
1234       COSTS_N_INSNS (2),	/* extend_add.  */
1235       0				/* idiv (N/A).  */
1236     }
1237   },
1238   /* LD/ST */
1239   {
1240     COSTS_N_INSNS (1),	/* load.  */
1241     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1242     COSTS_N_INSNS (1),	/* ldrd.  */
1243     COSTS_N_INSNS (1),	/* ldm_1st.  */
1244     1,			/* ldm_regs_per_insn_1st.  */
1245     2,			/* ldm_regs_per_insn_subsequent.  */
1246     COSTS_N_INSNS (1),	/* loadf.  */
1247     COSTS_N_INSNS (1),	/* loadd.  */
1248     COSTS_N_INSNS (1),  /* load_unaligned.  */
1249     COSTS_N_INSNS (1),	/* store.  */
1250     COSTS_N_INSNS (1),	/* strd.  */
1251     COSTS_N_INSNS (1),	/* stm_1st.  */
1252     1,			/* stm_regs_per_insn_1st.  */
1253     2,			/* stm_regs_per_insn_subsequent.  */
1254     COSTS_N_INSNS (1),	/* storef.  */
1255     COSTS_N_INSNS (1),	/* stored.  */
1256     COSTS_N_INSNS (1),	/* store_unaligned.  */
1257     COSTS_N_INSNS (1),	/* loadv.  */
1258     COSTS_N_INSNS (1)	/* storev.  */
1259   },
1260   {
1261     /* FP SFmode */
1262     {
1263       COSTS_N_INSNS (36),	/* div.  */
1264       COSTS_N_INSNS (11),	/* mult.  */
1265       COSTS_N_INSNS (20),	/* mult_addsub. */
1266       COSTS_N_INSNS (30),	/* fma.  */
1267       COSTS_N_INSNS (9),	/* addsub.  */
1268       COSTS_N_INSNS (3),	/* fpconst.  */
1269       COSTS_N_INSNS (3),	/* neg.  */
1270       COSTS_N_INSNS (6),	/* compare.  */
1271       COSTS_N_INSNS (4),	/* widen.  */
1272       COSTS_N_INSNS (4),	/* narrow.  */
1273       COSTS_N_INSNS (8),	/* toint.  */
1274       COSTS_N_INSNS (8),	/* fromint.  */
1275       COSTS_N_INSNS (8)		/* roundint.  */
1276     },
1277     /* FP DFmode */
1278     {
1279       COSTS_N_INSNS (64),	/* div.  */
1280       COSTS_N_INSNS (16),	/* mult.  */
1281       COSTS_N_INSNS (25),	/* mult_addsub.  */
1282       COSTS_N_INSNS (30),	/* fma.  */
1283       COSTS_N_INSNS (9),	/* addsub.  */
1284       COSTS_N_INSNS (3),	/* fpconst.  */
1285       COSTS_N_INSNS (3),	/* neg.  */
1286       COSTS_N_INSNS (6),	/* compare.  */
1287       COSTS_N_INSNS (6),	/* widen.  */
1288       COSTS_N_INSNS (6),	/* narrow.  */
1289       COSTS_N_INSNS (8),	/* toint.  */
1290       COSTS_N_INSNS (8),	/* fromint.  */
1291       COSTS_N_INSNS (8)		/* roundint.  */
1292     }
1293   },
1294   /* Vector */
1295   {
1296     COSTS_N_INSNS (1)	/* alu.  */
1297   }
1298 };
1299 
1300 const struct cpu_cost_table cortexa5_extra_costs =
1301 {
1302   /* ALU */
1303   {
1304     0,			/* arith.  */
1305     0,			/* logical.  */
1306     COSTS_N_INSNS (1),	/* shift.  */
1307     COSTS_N_INSNS (1),	/* shift_reg.  */
1308     COSTS_N_INSNS (1),	/* arith_shift.  */
1309     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1310     COSTS_N_INSNS (1),	/* log_shift.  */
1311     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1312     COSTS_N_INSNS (1),	/* extend.  */
1313     COSTS_N_INSNS (1),	/* extend_arith.  */
1314     COSTS_N_INSNS (1),	/* bfi.  */
1315     COSTS_N_INSNS (1),	/* bfx.  */
1316     COSTS_N_INSNS (1),	/* clz.  */
1317     COSTS_N_INSNS (1),	/* rev.  */
1318     0,			/* non_exec.  */
1319     true		/* non_exec_costs_exec.  */
1320   },
1321 
1322   {
1323     /* MULT SImode */
1324     {
1325       0,			/* simple.  */
1326       COSTS_N_INSNS (1),	/* flag_setting.  */
1327       COSTS_N_INSNS (1),	/* extend.  */
1328       COSTS_N_INSNS (1),	/* add.  */
1329       COSTS_N_INSNS (1),	/* extend_add.  */
1330       COSTS_N_INSNS (7)		/* idiv.  */
1331     },
1332     /* MULT DImode */
1333     {
1334       0,			/* simple (N/A).  */
1335       0,			/* flag_setting (N/A).  */
1336       COSTS_N_INSNS (1),	/* extend.  */
1337       0,			/* add.  */
1338       COSTS_N_INSNS (2),	/* extend_add.  */
1339       0				/* idiv (N/A).  */
1340     }
1341   },
1342   /* LD/ST */
1343   {
1344     COSTS_N_INSNS (1),	/* load.  */
1345     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1346     COSTS_N_INSNS (6),	/* ldrd.  */
1347     COSTS_N_INSNS (1),	/* ldm_1st.  */
1348     1,			/* ldm_regs_per_insn_1st.  */
1349     2,			/* ldm_regs_per_insn_subsequent.  */
1350     COSTS_N_INSNS (2),	/* loadf.  */
1351     COSTS_N_INSNS (4),	/* loadd.  */
1352     COSTS_N_INSNS (1),	/* load_unaligned.  */
1353     COSTS_N_INSNS (1),	/* store.  */
1354     COSTS_N_INSNS (3),	/* strd.  */
1355     COSTS_N_INSNS (1),	/* stm_1st.  */
1356     1,			/* stm_regs_per_insn_1st.  */
1357     2,			/* stm_regs_per_insn_subsequent.  */
1358     COSTS_N_INSNS (2),	/* storef.  */
1359     COSTS_N_INSNS (2),	/* stored.  */
1360     COSTS_N_INSNS (1),	/* store_unaligned.  */
1361     COSTS_N_INSNS (1),	/* loadv.  */
1362     COSTS_N_INSNS (1)	/* storev.  */
1363   },
1364   {
1365     /* FP SFmode */
1366     {
1367       COSTS_N_INSNS (15),	/* div.  */
1368       COSTS_N_INSNS (3),	/* mult.  */
1369       COSTS_N_INSNS (7),	/* mult_addsub. */
1370       COSTS_N_INSNS (7),	/* fma.  */
1371       COSTS_N_INSNS (3),	/* addsub.  */
1372       COSTS_N_INSNS (3),	/* fpconst.  */
1373       COSTS_N_INSNS (3),	/* neg.  */
1374       COSTS_N_INSNS (3),	/* compare.  */
1375       COSTS_N_INSNS (3),	/* widen.  */
1376       COSTS_N_INSNS (3),	/* narrow.  */
1377       COSTS_N_INSNS (3),	/* toint.  */
1378       COSTS_N_INSNS (3),	/* fromint.  */
1379       COSTS_N_INSNS (3)		/* roundint.  */
1380     },
1381     /* FP DFmode */
1382     {
1383       COSTS_N_INSNS (30),	/* div.  */
1384       COSTS_N_INSNS (6),	/* mult.  */
1385       COSTS_N_INSNS (10),	/* mult_addsub.  */
1386       COSTS_N_INSNS (7),	/* fma.  */
1387       COSTS_N_INSNS (3),	/* addsub.  */
1388       COSTS_N_INSNS (3),	/* fpconst.  */
1389       COSTS_N_INSNS (3),	/* neg.  */
1390       COSTS_N_INSNS (3),	/* compare.  */
1391       COSTS_N_INSNS (3),	/* widen.  */
1392       COSTS_N_INSNS (3),	/* narrow.  */
1393       COSTS_N_INSNS (3),	/* toint.  */
1394       COSTS_N_INSNS (3),	/* fromint.  */
1395       COSTS_N_INSNS (3)		/* roundint.  */
1396     }
1397   },
1398   /* Vector */
1399   {
1400     COSTS_N_INSNS (1)	/* alu.  */
1401   }
1402 };
1403 
1404 
1405 const struct cpu_cost_table cortexa7_extra_costs =
1406 {
1407   /* ALU */
1408   {
1409     0,			/* arith.  */
1410     0,			/* logical.  */
1411     COSTS_N_INSNS (1),	/* shift.  */
1412     COSTS_N_INSNS (1),	/* shift_reg.  */
1413     COSTS_N_INSNS (1),	/* arith_shift.  */
1414     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1415     COSTS_N_INSNS (1),	/* log_shift.  */
1416     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1417     COSTS_N_INSNS (1),	/* extend.  */
1418     COSTS_N_INSNS (1),	/* extend_arith.  */
1419     COSTS_N_INSNS (1),	/* bfi.  */
1420     COSTS_N_INSNS (1),	/* bfx.  */
1421     COSTS_N_INSNS (1),	/* clz.  */
1422     COSTS_N_INSNS (1),	/* rev.  */
1423     0,			/* non_exec.  */
1424     true		/* non_exec_costs_exec.  */
1425   },
1426 
1427   {
1428     /* MULT SImode */
1429     {
1430       0,			/* simple.  */
1431       COSTS_N_INSNS (1),	/* flag_setting.  */
1432       COSTS_N_INSNS (1),	/* extend.  */
1433       COSTS_N_INSNS (1),	/* add.  */
1434       COSTS_N_INSNS (1),	/* extend_add.  */
1435       COSTS_N_INSNS (7)		/* idiv.  */
1436     },
1437     /* MULT DImode */
1438     {
1439       0,			/* simple (N/A).  */
1440       0,			/* flag_setting (N/A).  */
1441       COSTS_N_INSNS (1),	/* extend.  */
1442       0,			/* add.  */
1443       COSTS_N_INSNS (2),	/* extend_add.  */
1444       0				/* idiv (N/A).  */
1445     }
1446   },
1447   /* LD/ST */
1448   {
1449     COSTS_N_INSNS (1),	/* load.  */
1450     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1451     COSTS_N_INSNS (3),	/* ldrd.  */
1452     COSTS_N_INSNS (1),	/* ldm_1st.  */
1453     1,			/* ldm_regs_per_insn_1st.  */
1454     2,			/* ldm_regs_per_insn_subsequent.  */
1455     COSTS_N_INSNS (2),	/* loadf.  */
1456     COSTS_N_INSNS (2),	/* loadd.  */
1457     COSTS_N_INSNS (1),	/* load_unaligned.  */
1458     COSTS_N_INSNS (1),	/* store.  */
1459     COSTS_N_INSNS (3),	/* strd.  */
1460     COSTS_N_INSNS (1),	/* stm_1st.  */
1461     1,			/* stm_regs_per_insn_1st.  */
1462     2,			/* stm_regs_per_insn_subsequent.  */
1463     COSTS_N_INSNS (2),	/* storef.  */
1464     COSTS_N_INSNS (2),	/* stored.  */
1465     COSTS_N_INSNS (1),	/* store_unaligned.  */
1466     COSTS_N_INSNS (1),	/* loadv.  */
1467     COSTS_N_INSNS (1)	/* storev.  */
1468   },
1469   {
1470     /* FP SFmode */
1471     {
1472       COSTS_N_INSNS (15),	/* div.  */
1473       COSTS_N_INSNS (3),	/* mult.  */
1474       COSTS_N_INSNS (7),	/* mult_addsub. */
1475       COSTS_N_INSNS (7),	/* fma.  */
1476       COSTS_N_INSNS (3),	/* addsub.  */
1477       COSTS_N_INSNS (3),	/* fpconst.  */
1478       COSTS_N_INSNS (3),	/* neg.  */
1479       COSTS_N_INSNS (3),	/* compare.  */
1480       COSTS_N_INSNS (3),	/* widen.  */
1481       COSTS_N_INSNS (3),	/* narrow.  */
1482       COSTS_N_INSNS (3),	/* toint.  */
1483       COSTS_N_INSNS (3),	/* fromint.  */
1484       COSTS_N_INSNS (3)		/* roundint.  */
1485     },
1486     /* FP DFmode */
1487     {
1488       COSTS_N_INSNS (30),	/* div.  */
1489       COSTS_N_INSNS (6),	/* mult.  */
1490       COSTS_N_INSNS (10),	/* mult_addsub.  */
1491       COSTS_N_INSNS (7),	/* fma.  */
1492       COSTS_N_INSNS (3),	/* addsub.  */
1493       COSTS_N_INSNS (3),	/* fpconst.  */
1494       COSTS_N_INSNS (3),	/* neg.  */
1495       COSTS_N_INSNS (3),	/* compare.  */
1496       COSTS_N_INSNS (3),	/* widen.  */
1497       COSTS_N_INSNS (3),	/* narrow.  */
1498       COSTS_N_INSNS (3),	/* toint.  */
1499       COSTS_N_INSNS (3),	/* fromint.  */
1500       COSTS_N_INSNS (3)		/* roundint.  */
1501     }
1502   },
1503   /* Vector */
1504   {
1505     COSTS_N_INSNS (1)	/* alu.  */
1506   }
1507 };
1508 
1509 const struct cpu_cost_table cortexa12_extra_costs =
1510 {
1511   /* ALU */
1512   {
1513     0,			/* arith.  */
1514     0,			/* logical.  */
1515     0,			/* shift.  */
1516     COSTS_N_INSNS (1),	/* shift_reg.  */
1517     COSTS_N_INSNS (1),	/* arith_shift.  */
1518     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1519     COSTS_N_INSNS (1),	/* log_shift.  */
1520     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1521     0,			/* extend.  */
1522     COSTS_N_INSNS (1),	/* extend_arith.  */
1523     0,			/* bfi.  */
1524     COSTS_N_INSNS (1),	/* bfx.  */
1525     COSTS_N_INSNS (1),	/* clz.  */
1526     COSTS_N_INSNS (1),	/* rev.  */
1527     0,			/* non_exec.  */
1528     true		/* non_exec_costs_exec.  */
1529   },
1530   /* MULT SImode */
1531   {
1532     {
1533       COSTS_N_INSNS (2),	/* simple.  */
1534       COSTS_N_INSNS (3),	/* flag_setting.  */
1535       COSTS_N_INSNS (2),	/* extend.  */
1536       COSTS_N_INSNS (3),	/* add.  */
1537       COSTS_N_INSNS (2),	/* extend_add.  */
1538       COSTS_N_INSNS (18)	/* idiv.  */
1539     },
1540     /* MULT DImode */
1541     {
1542       0,			/* simple (N/A).  */
1543       0,			/* flag_setting (N/A).  */
1544       COSTS_N_INSNS (3),	/* extend.  */
1545       0,			/* add (N/A).  */
1546       COSTS_N_INSNS (3),	/* extend_add.  */
1547       0				/* idiv (N/A).  */
1548     }
1549   },
1550   /* LD/ST */
1551   {
1552     COSTS_N_INSNS (3),	/* load.  */
1553     COSTS_N_INSNS (3),	/* load_sign_extend.  */
1554     COSTS_N_INSNS (3),	/* ldrd.  */
1555     COSTS_N_INSNS (3),	/* ldm_1st.  */
1556     1,			/* ldm_regs_per_insn_1st.  */
1557     2,			/* ldm_regs_per_insn_subsequent.  */
1558     COSTS_N_INSNS (3),	/* loadf.  */
1559     COSTS_N_INSNS (3),	/* loadd.  */
1560     0,			/* load_unaligned.  */
1561     0,			/* store.  */
1562     0,			/* strd.  */
1563     0,			/* stm_1st.  */
1564     1,			/* stm_regs_per_insn_1st.  */
1565     2,			/* stm_regs_per_insn_subsequent.  */
1566     COSTS_N_INSNS (2),	/* storef.  */
1567     COSTS_N_INSNS (2),	/* stored.  */
1568     0,			/* store_unaligned.  */
1569     COSTS_N_INSNS (1),	/* loadv.  */
1570     COSTS_N_INSNS (1)	/* storev.  */
1571   },
1572   {
1573     /* FP SFmode */
1574     {
1575       COSTS_N_INSNS (17),	/* div.  */
1576       COSTS_N_INSNS (4),	/* mult.  */
1577       COSTS_N_INSNS (8),	/* mult_addsub. */
1578       COSTS_N_INSNS (8),	/* fma.  */
1579       COSTS_N_INSNS (4),	/* addsub.  */
1580       COSTS_N_INSNS (2),	/* fpconst. */
1581       COSTS_N_INSNS (2),	/* neg.  */
1582       COSTS_N_INSNS (2),	/* compare.  */
1583       COSTS_N_INSNS (4),	/* widen.  */
1584       COSTS_N_INSNS (4),	/* narrow.  */
1585       COSTS_N_INSNS (4),	/* toint.  */
1586       COSTS_N_INSNS (4),	/* fromint.  */
1587       COSTS_N_INSNS (4)		/* roundint.  */
1588     },
1589     /* FP DFmode */
1590     {
1591       COSTS_N_INSNS (31),	/* div.  */
1592       COSTS_N_INSNS (4),	/* mult.  */
1593       COSTS_N_INSNS (8),	/* mult_addsub.  */
1594       COSTS_N_INSNS (8),	/* fma.  */
1595       COSTS_N_INSNS (4),	/* addsub.  */
1596       COSTS_N_INSNS (2),	/* fpconst.  */
1597       COSTS_N_INSNS (2),	/* neg.  */
1598       COSTS_N_INSNS (2),	/* compare.  */
1599       COSTS_N_INSNS (4),	/* widen.  */
1600       COSTS_N_INSNS (4),	/* narrow.  */
1601       COSTS_N_INSNS (4),	/* toint.  */
1602       COSTS_N_INSNS (4),	/* fromint.  */
1603       COSTS_N_INSNS (4)		/* roundint.  */
1604     }
1605   },
1606   /* Vector */
1607   {
1608     COSTS_N_INSNS (1)	/* alu.  */
1609   }
1610 };
1611 
1612 const struct cpu_cost_table cortexa15_extra_costs =
1613 {
1614   /* ALU */
1615   {
1616     0,			/* arith.  */
1617     0,			/* logical.  */
1618     0,			/* shift.  */
1619     0,			/* shift_reg.  */
1620     COSTS_N_INSNS (1),	/* arith_shift.  */
1621     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1622     COSTS_N_INSNS (1),	/* log_shift.  */
1623     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1624     0,			/* extend.  */
1625     COSTS_N_INSNS (1),	/* extend_arith.  */
1626     COSTS_N_INSNS (1),	/* bfi.  */
1627     0,			/* bfx.  */
1628     0,			/* clz.  */
1629     0,			/* rev.  */
1630     0,			/* non_exec.  */
1631     true		/* non_exec_costs_exec.  */
1632   },
1633   /* MULT SImode */
1634   {
1635     {
1636       COSTS_N_INSNS (2),	/* simple.  */
1637       COSTS_N_INSNS (3),	/* flag_setting.  */
1638       COSTS_N_INSNS (2),	/* extend.  */
1639       COSTS_N_INSNS (2),	/* add.  */
1640       COSTS_N_INSNS (2),	/* extend_add.  */
1641       COSTS_N_INSNS (18)	/* idiv.  */
1642     },
1643     /* MULT DImode */
1644     {
1645       0,			/* simple (N/A).  */
1646       0,			/* flag_setting (N/A).  */
1647       COSTS_N_INSNS (3),	/* extend.  */
1648       0,			/* add (N/A).  */
1649       COSTS_N_INSNS (3),	/* extend_add.  */
1650       0				/* idiv (N/A).  */
1651     }
1652   },
1653   /* LD/ST */
1654   {
1655     COSTS_N_INSNS (3),	/* load.  */
1656     COSTS_N_INSNS (3),	/* load_sign_extend.  */
1657     COSTS_N_INSNS (3),	/* ldrd.  */
1658     COSTS_N_INSNS (4),	/* ldm_1st.  */
1659     1,			/* ldm_regs_per_insn_1st.  */
1660     2,			/* ldm_regs_per_insn_subsequent.  */
1661     COSTS_N_INSNS (4),	/* loadf.  */
1662     COSTS_N_INSNS (4),	/* loadd.  */
1663     0,			/* load_unaligned.  */
1664     0,			/* store.  */
1665     0,			/* strd.  */
1666     COSTS_N_INSNS (1),	/* stm_1st.  */
1667     1,			/* stm_regs_per_insn_1st.  */
1668     2,			/* stm_regs_per_insn_subsequent.  */
1669     0,			/* storef.  */
1670     0,			/* stored.  */
1671     0,			/* store_unaligned.  */
1672     COSTS_N_INSNS (1),	/* loadv.  */
1673     COSTS_N_INSNS (1)	/* storev.  */
1674   },
1675   {
1676     /* FP SFmode */
1677     {
1678       COSTS_N_INSNS (17),	/* div.  */
1679       COSTS_N_INSNS (4),	/* mult.  */
1680       COSTS_N_INSNS (8),	/* mult_addsub. */
1681       COSTS_N_INSNS (8),	/* fma.  */
1682       COSTS_N_INSNS (4),	/* addsub.  */
1683       COSTS_N_INSNS (2),	/* fpconst. */
1684       COSTS_N_INSNS (2),	/* neg.  */
1685       COSTS_N_INSNS (5),	/* compare.  */
1686       COSTS_N_INSNS (4),	/* widen.  */
1687       COSTS_N_INSNS (4),	/* narrow.  */
1688       COSTS_N_INSNS (4),	/* toint.  */
1689       COSTS_N_INSNS (4),	/* fromint.  */
1690       COSTS_N_INSNS (4)		/* roundint.  */
1691     },
1692     /* FP DFmode */
1693     {
1694       COSTS_N_INSNS (31),	/* div.  */
1695       COSTS_N_INSNS (4),	/* mult.  */
1696       COSTS_N_INSNS (8),	/* mult_addsub.  */
1697       COSTS_N_INSNS (8),	/* fma.  */
1698       COSTS_N_INSNS (4),	/* addsub.  */
1699       COSTS_N_INSNS (2),	/* fpconst.  */
1700       COSTS_N_INSNS (2),	/* neg.  */
1701       COSTS_N_INSNS (2),	/* compare.  */
1702       COSTS_N_INSNS (4),	/* widen.  */
1703       COSTS_N_INSNS (4),	/* narrow.  */
1704       COSTS_N_INSNS (4),	/* toint.  */
1705       COSTS_N_INSNS (4),	/* fromint.  */
1706       COSTS_N_INSNS (4)		/* roundint.  */
1707     }
1708   },
1709   /* Vector */
1710   {
1711     COSTS_N_INSNS (1)	/* alu.  */
1712   }
1713 };
1714 
1715 const struct cpu_cost_table v7m_extra_costs =
1716 {
1717   /* ALU */
1718   {
1719     0,			/* arith.  */
1720     0,			/* logical.  */
1721     0,			/* shift.  */
1722     0,			/* shift_reg.  */
1723     0,			/* arith_shift.  */
1724     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1725     0,			/* log_shift.  */
1726     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1727     0,			/* extend.  */
1728     COSTS_N_INSNS (1),	/* extend_arith.  */
1729     0,			/* bfi.  */
1730     0,			/* bfx.  */
1731     0,			/* clz.  */
1732     0,			/* rev.  */
1733     COSTS_N_INSNS (1),	/* non_exec.  */
1734     false		/* non_exec_costs_exec.  */
1735   },
1736   {
1737     /* MULT SImode */
1738     {
1739       COSTS_N_INSNS (1),	/* simple.  */
1740       COSTS_N_INSNS (1),	/* flag_setting.  */
1741       COSTS_N_INSNS (2),	/* extend.  */
1742       COSTS_N_INSNS (1),	/* add.  */
1743       COSTS_N_INSNS (3),	/* extend_add.  */
1744       COSTS_N_INSNS (8)		/* idiv.  */
1745     },
1746     /* MULT DImode */
1747     {
1748       0,			/* simple (N/A).  */
1749       0,			/* flag_setting (N/A).  */
1750       COSTS_N_INSNS (2),	/* extend.  */
1751       0,			/* add (N/A).  */
1752       COSTS_N_INSNS (3),	/* extend_add.  */
1753       0				/* idiv (N/A).  */
1754     }
1755   },
1756   /* LD/ST */
1757   {
1758     COSTS_N_INSNS (2),	/* load.  */
1759     0,			/* load_sign_extend.  */
1760     COSTS_N_INSNS (3),	/* ldrd.  */
1761     COSTS_N_INSNS (2),	/* ldm_1st.  */
1762     1,			/* ldm_regs_per_insn_1st.  */
1763     1,			/* ldm_regs_per_insn_subsequent.  */
1764     COSTS_N_INSNS (2),	/* loadf.  */
1765     COSTS_N_INSNS (3),	/* loadd.  */
1766     COSTS_N_INSNS (1),  /* load_unaligned.  */
1767     COSTS_N_INSNS (2),	/* store.  */
1768     COSTS_N_INSNS (3),	/* strd.  */
1769     COSTS_N_INSNS (2),	/* stm_1st.  */
1770     1,			/* stm_regs_per_insn_1st.  */
1771     1,			/* stm_regs_per_insn_subsequent.  */
1772     COSTS_N_INSNS (2),	/* storef.  */
1773     COSTS_N_INSNS (3),	/* stored.  */
1774     COSTS_N_INSNS (1),	/* store_unaligned.  */
1775     COSTS_N_INSNS (1),	/* loadv.  */
1776     COSTS_N_INSNS (1)	/* storev.  */
1777   },
1778   {
1779     /* FP SFmode */
1780     {
1781       COSTS_N_INSNS (7),	/* div.  */
1782       COSTS_N_INSNS (2),	/* mult.  */
1783       COSTS_N_INSNS (5),	/* mult_addsub.  */
1784       COSTS_N_INSNS (3),	/* fma.  */
1785       COSTS_N_INSNS (1),	/* addsub.  */
1786       0,			/* fpconst.  */
1787       0,			/* neg.  */
1788       0,			/* compare.  */
1789       0,			/* widen.  */
1790       0,			/* narrow.  */
1791       0,			/* toint.  */
1792       0,			/* fromint.  */
1793       0				/* roundint.  */
1794     },
1795     /* FP DFmode */
1796     {
1797       COSTS_N_INSNS (15),	/* div.  */
1798       COSTS_N_INSNS (5),	/* mult.  */
1799       COSTS_N_INSNS (7),	/* mult_addsub.  */
1800       COSTS_N_INSNS (7),	/* fma.  */
1801       COSTS_N_INSNS (3),	/* addsub.  */
1802       0,			/* fpconst.  */
1803       0,			/* neg.  */
1804       0,			/* compare.  */
1805       0,			/* widen.  */
1806       0,			/* narrow.  */
1807       0,			/* toint.  */
1808       0,			/* fromint.  */
1809       0				/* roundint.  */
1810     }
1811   },
1812   /* Vector */
1813   {
1814     COSTS_N_INSNS (1)	/* alu.  */
1815   }
1816 };
1817 
1818 const struct addr_mode_cost_table generic_addr_mode_costs =
1819 {
1820   /* int.  */
1821   {
1822     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1823     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1824     COSTS_N_INSNS (0)	/* AMO_WB.  */
1825   },
1826   /* float.  */
1827   {
1828     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1829     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1830     COSTS_N_INSNS (0)	/* AMO_WB.  */
1831   },
1832   /* vector.  */
1833   {
1834     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1835     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1836     COSTS_N_INSNS (0)	/* AMO_WB.  */
1837   }
1838 };
1839 
1840 const struct tune_params arm_slowmul_tune =
1841 {
1842   &generic_extra_costs,			/* Insn extra costs.  */
1843   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1844   NULL,					/* Sched adj cost.  */
1845   arm_default_branch_cost,
1846   &arm_default_vec_cost,
1847   3,						/* Constant limit.  */
1848   5,						/* Max cond insns.  */
1849   8,						/* Memset max inline.  */
1850   1,						/* Issue rate.  */
1851   ARM_PREFETCH_NOT_BENEFICIAL,
1852   tune_params::PREF_CONST_POOL_TRUE,
1853   tune_params::PREF_LDRD_FALSE,
1854   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1855   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1856   tune_params::DISPARAGE_FLAGS_NEITHER,
1857   tune_params::PREF_NEON_STRINGOPS_FALSE,
1858   tune_params::FUSE_NOTHING,
1859   tune_params::SCHED_AUTOPREF_OFF
1860 };
1861 
1862 const struct tune_params arm_fastmul_tune =
1863 {
1864   &generic_extra_costs,			/* Insn extra costs.  */
1865   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1866   NULL,					/* Sched adj cost.  */
1867   arm_default_branch_cost,
1868   &arm_default_vec_cost,
1869   1,						/* Constant limit.  */
1870   5,						/* Max cond insns.  */
1871   8,						/* Memset max inline.  */
1872   1,						/* Issue rate.  */
1873   ARM_PREFETCH_NOT_BENEFICIAL,
1874   tune_params::PREF_CONST_POOL_TRUE,
1875   tune_params::PREF_LDRD_FALSE,
1876   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1877   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1878   tune_params::DISPARAGE_FLAGS_NEITHER,
1879   tune_params::PREF_NEON_STRINGOPS_FALSE,
1880   tune_params::FUSE_NOTHING,
1881   tune_params::SCHED_AUTOPREF_OFF
1882 };
1883 
1884 /* StrongARM has early execution of branches, so a sequence that is worth
1885    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1886 
1887 const struct tune_params arm_strongarm_tune =
1888 {
1889   &generic_extra_costs,			/* Insn extra costs.  */
1890   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1891   NULL,					/* Sched adj cost.  */
1892   arm_default_branch_cost,
1893   &arm_default_vec_cost,
1894   1,						/* Constant limit.  */
1895   3,						/* Max cond insns.  */
1896   8,						/* Memset max inline.  */
1897   1,						/* Issue rate.  */
1898   ARM_PREFETCH_NOT_BENEFICIAL,
1899   tune_params::PREF_CONST_POOL_TRUE,
1900   tune_params::PREF_LDRD_FALSE,
1901   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1902   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1903   tune_params::DISPARAGE_FLAGS_NEITHER,
1904   tune_params::PREF_NEON_STRINGOPS_FALSE,
1905   tune_params::FUSE_NOTHING,
1906   tune_params::SCHED_AUTOPREF_OFF
1907 };
1908 
1909 const struct tune_params arm_xscale_tune =
1910 {
1911   &generic_extra_costs,			/* Insn extra costs.  */
1912   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1913   xscale_sched_adjust_cost,
1914   arm_default_branch_cost,
1915   &arm_default_vec_cost,
1916   2,						/* Constant limit.  */
1917   3,						/* Max cond insns.  */
1918   8,						/* Memset max inline.  */
1919   1,						/* Issue rate.  */
1920   ARM_PREFETCH_NOT_BENEFICIAL,
1921   tune_params::PREF_CONST_POOL_TRUE,
1922   tune_params::PREF_LDRD_FALSE,
1923   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1924   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1925   tune_params::DISPARAGE_FLAGS_NEITHER,
1926   tune_params::PREF_NEON_STRINGOPS_FALSE,
1927   tune_params::FUSE_NOTHING,
1928   tune_params::SCHED_AUTOPREF_OFF
1929 };
1930 
1931 const struct tune_params arm_9e_tune =
1932 {
1933   &generic_extra_costs,			/* Insn extra costs.  */
1934   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1935   NULL,					/* Sched adj cost.  */
1936   arm_default_branch_cost,
1937   &arm_default_vec_cost,
1938   1,						/* Constant limit.  */
1939   5,						/* Max cond insns.  */
1940   8,						/* Memset max inline.  */
1941   1,						/* Issue rate.  */
1942   ARM_PREFETCH_NOT_BENEFICIAL,
1943   tune_params::PREF_CONST_POOL_TRUE,
1944   tune_params::PREF_LDRD_FALSE,
1945   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1946   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1947   tune_params::DISPARAGE_FLAGS_NEITHER,
1948   tune_params::PREF_NEON_STRINGOPS_FALSE,
1949   tune_params::FUSE_NOTHING,
1950   tune_params::SCHED_AUTOPREF_OFF
1951 };
1952 
1953 const struct tune_params arm_marvell_pj4_tune =
1954 {
1955   &generic_extra_costs,			/* Insn extra costs.  */
1956   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1957   NULL,					/* Sched adj cost.  */
1958   arm_default_branch_cost,
1959   &arm_default_vec_cost,
1960   1,						/* Constant limit.  */
1961   5,						/* Max cond insns.  */
1962   8,						/* Memset max inline.  */
1963   2,						/* Issue rate.  */
1964   ARM_PREFETCH_NOT_BENEFICIAL,
1965   tune_params::PREF_CONST_POOL_TRUE,
1966   tune_params::PREF_LDRD_FALSE,
1967   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1968   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1969   tune_params::DISPARAGE_FLAGS_NEITHER,
1970   tune_params::PREF_NEON_STRINGOPS_FALSE,
1971   tune_params::FUSE_NOTHING,
1972   tune_params::SCHED_AUTOPREF_OFF
1973 };
1974 
1975 const struct tune_params arm_v6t2_tune =
1976 {
1977   &generic_extra_costs,			/* Insn extra costs.  */
1978   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1979   NULL,					/* Sched adj cost.  */
1980   arm_default_branch_cost,
1981   &arm_default_vec_cost,
1982   1,						/* Constant limit.  */
1983   5,						/* Max cond insns.  */
1984   8,						/* Memset max inline.  */
1985   1,						/* Issue rate.  */
1986   ARM_PREFETCH_NOT_BENEFICIAL,
1987   tune_params::PREF_CONST_POOL_FALSE,
1988   tune_params::PREF_LDRD_FALSE,
1989   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1990   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1991   tune_params::DISPARAGE_FLAGS_NEITHER,
1992   tune_params::PREF_NEON_STRINGOPS_FALSE,
1993   tune_params::FUSE_NOTHING,
1994   tune_params::SCHED_AUTOPREF_OFF
1995 };
1996 
1997 
1998 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1999 const struct tune_params arm_cortex_tune =
2000 {
2001   &generic_extra_costs,
2002   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2003   NULL,					/* Sched adj cost.  */
2004   arm_default_branch_cost,
2005   &arm_default_vec_cost,
2006   1,						/* Constant limit.  */
2007   5,						/* Max cond insns.  */
2008   8,						/* Memset max inline.  */
2009   2,						/* Issue rate.  */
2010   ARM_PREFETCH_NOT_BENEFICIAL,
2011   tune_params::PREF_CONST_POOL_FALSE,
2012   tune_params::PREF_LDRD_FALSE,
2013   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2014   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2015   tune_params::DISPARAGE_FLAGS_NEITHER,
2016   tune_params::PREF_NEON_STRINGOPS_FALSE,
2017   tune_params::FUSE_NOTHING,
2018   tune_params::SCHED_AUTOPREF_OFF
2019 };
2020 
2021 const struct tune_params arm_cortex_a8_tune =
2022 {
2023   &cortexa8_extra_costs,
2024   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2025   NULL,					/* Sched adj cost.  */
2026   arm_default_branch_cost,
2027   &arm_default_vec_cost,
2028   1,						/* Constant limit.  */
2029   5,						/* Max cond insns.  */
2030   8,						/* Memset max inline.  */
2031   2,						/* Issue rate.  */
2032   ARM_PREFETCH_NOT_BENEFICIAL,
2033   tune_params::PREF_CONST_POOL_FALSE,
2034   tune_params::PREF_LDRD_FALSE,
2035   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2036   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2037   tune_params::DISPARAGE_FLAGS_NEITHER,
2038   tune_params::PREF_NEON_STRINGOPS_TRUE,
2039   tune_params::FUSE_NOTHING,
2040   tune_params::SCHED_AUTOPREF_OFF
2041 };
2042 
2043 const struct tune_params arm_cortex_a7_tune =
2044 {
2045   &cortexa7_extra_costs,
2046   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2047   NULL,					/* Sched adj cost.  */
2048   arm_default_branch_cost,
2049   &arm_default_vec_cost,
2050   1,						/* Constant limit.  */
2051   5,						/* Max cond insns.  */
2052   8,						/* Memset max inline.  */
2053   2,						/* Issue rate.  */
2054   ARM_PREFETCH_NOT_BENEFICIAL,
2055   tune_params::PREF_CONST_POOL_FALSE,
2056   tune_params::PREF_LDRD_FALSE,
2057   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2058   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2059   tune_params::DISPARAGE_FLAGS_NEITHER,
2060   tune_params::PREF_NEON_STRINGOPS_TRUE,
2061   tune_params::FUSE_NOTHING,
2062   tune_params::SCHED_AUTOPREF_OFF
2063 };
2064 
2065 const struct tune_params arm_cortex_a15_tune =
2066 {
2067   &cortexa15_extra_costs,
2068   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2069   NULL,					/* Sched adj cost.  */
2070   arm_default_branch_cost,
2071   &arm_default_vec_cost,
2072   1,						/* Constant limit.  */
2073   2,						/* Max cond insns.  */
2074   8,						/* Memset max inline.  */
2075   3,						/* Issue rate.  */
2076   ARM_PREFETCH_NOT_BENEFICIAL,
2077   tune_params::PREF_CONST_POOL_FALSE,
2078   tune_params::PREF_LDRD_TRUE,
2079   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2080   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2081   tune_params::DISPARAGE_FLAGS_ALL,
2082   tune_params::PREF_NEON_STRINGOPS_TRUE,
2083   tune_params::FUSE_NOTHING,
2084   tune_params::SCHED_AUTOPREF_FULL
2085 };
2086 
2087 const struct tune_params arm_cortex_a35_tune =
2088 {
2089   &cortexa53_extra_costs,
2090   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2091   NULL,					/* Sched adj cost.  */
2092   arm_default_branch_cost,
2093   &arm_default_vec_cost,
2094   1,						/* Constant limit.  */
2095   5,						/* Max cond insns.  */
2096   8,						/* Memset max inline.  */
2097   1,						/* Issue rate.  */
2098   ARM_PREFETCH_NOT_BENEFICIAL,
2099   tune_params::PREF_CONST_POOL_FALSE,
2100   tune_params::PREF_LDRD_FALSE,
2101   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2102   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2103   tune_params::DISPARAGE_FLAGS_NEITHER,
2104   tune_params::PREF_NEON_STRINGOPS_TRUE,
2105   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2106   tune_params::SCHED_AUTOPREF_OFF
2107 };
2108 
2109 const struct tune_params arm_cortex_a53_tune =
2110 {
2111   &cortexa53_extra_costs,
2112   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2113   NULL,					/* Sched adj cost.  */
2114   arm_default_branch_cost,
2115   &arm_default_vec_cost,
2116   1,						/* Constant limit.  */
2117   5,						/* Max cond insns.  */
2118   8,						/* Memset max inline.  */
2119   2,						/* Issue rate.  */
2120   ARM_PREFETCH_NOT_BENEFICIAL,
2121   tune_params::PREF_CONST_POOL_FALSE,
2122   tune_params::PREF_LDRD_FALSE,
2123   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2124   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2125   tune_params::DISPARAGE_FLAGS_NEITHER,
2126   tune_params::PREF_NEON_STRINGOPS_TRUE,
2127   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2128   tune_params::SCHED_AUTOPREF_OFF
2129 };
2130 
2131 const struct tune_params arm_cortex_a57_tune =
2132 {
2133   &cortexa57_extra_costs,
2134   &generic_addr_mode_costs,		/* addressing mode costs */
2135   NULL,					/* Sched adj cost.  */
2136   arm_default_branch_cost,
2137   &arm_default_vec_cost,
2138   1,						/* Constant limit.  */
2139   2,						/* Max cond insns.  */
2140   8,						/* Memset max inline.  */
2141   3,						/* Issue rate.  */
2142   ARM_PREFETCH_NOT_BENEFICIAL,
2143   tune_params::PREF_CONST_POOL_FALSE,
2144   tune_params::PREF_LDRD_TRUE,
2145   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2146   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2147   tune_params::DISPARAGE_FLAGS_ALL,
2148   tune_params::PREF_NEON_STRINGOPS_TRUE,
2149   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2150   tune_params::SCHED_AUTOPREF_FULL
2151 };
2152 
2153 const struct tune_params arm_exynosm1_tune =
2154 {
2155   &exynosm1_extra_costs,
2156   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2157   NULL,						/* Sched adj cost.  */
2158   arm_default_branch_cost,
2159   &arm_default_vec_cost,
2160   1,						/* Constant limit.  */
2161   2,						/* Max cond insns.  */
2162   8,						/* Memset max inline.  */
2163   3,						/* Issue rate.  */
2164   ARM_PREFETCH_NOT_BENEFICIAL,
2165   tune_params::PREF_CONST_POOL_FALSE,
2166   tune_params::PREF_LDRD_TRUE,
2167   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,	/* Thumb.  */
2168   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,	/* ARM.  */
2169   tune_params::DISPARAGE_FLAGS_ALL,
2170   tune_params::PREF_NEON_STRINGOPS_TRUE,
2171   tune_params::FUSE_NOTHING,
2172   tune_params::SCHED_AUTOPREF_OFF
2173 };
2174 
2175 const struct tune_params arm_xgene1_tune =
2176 {
2177   &xgene1_extra_costs,
2178   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2179   NULL,					/* Sched adj cost.  */
2180   arm_default_branch_cost,
2181   &arm_default_vec_cost,
2182   1,						/* Constant limit.  */
2183   2,						/* Max cond insns.  */
2184   32,						/* Memset max inline.  */
2185   4,						/* Issue rate.  */
2186   ARM_PREFETCH_NOT_BENEFICIAL,
2187   tune_params::PREF_CONST_POOL_FALSE,
2188   tune_params::PREF_LDRD_TRUE,
2189   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2190   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2191   tune_params::DISPARAGE_FLAGS_ALL,
2192   tune_params::PREF_NEON_STRINGOPS_FALSE,
2193   tune_params::FUSE_NOTHING,
2194   tune_params::SCHED_AUTOPREF_OFF
2195 };
2196 
2197 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2198    less appealing.  Set max_insns_skipped to a low value.  */
2199 
2200 const struct tune_params arm_cortex_a5_tune =
2201 {
2202   &cortexa5_extra_costs,
2203   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2204   NULL,					/* Sched adj cost.  */
2205   arm_cortex_a5_branch_cost,
2206   &arm_default_vec_cost,
2207   1,						/* Constant limit.  */
2208   1,						/* Max cond insns.  */
2209   8,						/* Memset max inline.  */
2210   2,						/* Issue rate.  */
2211   ARM_PREFETCH_NOT_BENEFICIAL,
2212   tune_params::PREF_CONST_POOL_FALSE,
2213   tune_params::PREF_LDRD_FALSE,
2214   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2215   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2216   tune_params::DISPARAGE_FLAGS_NEITHER,
2217   tune_params::PREF_NEON_STRINGOPS_TRUE,
2218   tune_params::FUSE_NOTHING,
2219   tune_params::SCHED_AUTOPREF_OFF
2220 };
2221 
2222 const struct tune_params arm_cortex_a9_tune =
2223 {
2224   &cortexa9_extra_costs,
2225   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2226   cortex_a9_sched_adjust_cost,
2227   arm_default_branch_cost,
2228   &arm_default_vec_cost,
2229   1,						/* Constant limit.  */
2230   5,						/* Max cond insns.  */
2231   8,						/* Memset max inline.  */
2232   2,						/* Issue rate.  */
2233   ARM_PREFETCH_BENEFICIAL(4,32,32),
2234   tune_params::PREF_CONST_POOL_FALSE,
2235   tune_params::PREF_LDRD_FALSE,
2236   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2237   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2238   tune_params::DISPARAGE_FLAGS_NEITHER,
2239   tune_params::PREF_NEON_STRINGOPS_FALSE,
2240   tune_params::FUSE_NOTHING,
2241   tune_params::SCHED_AUTOPREF_OFF
2242 };
2243 
2244 const struct tune_params arm_cortex_a12_tune =
2245 {
2246   &cortexa12_extra_costs,
2247   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2248   NULL,					/* Sched adj cost.  */
2249   arm_default_branch_cost,
2250   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2251   1,						/* Constant limit.  */
2252   2,						/* Max cond insns.  */
2253   8,						/* Memset max inline.  */
2254   2,						/* Issue rate.  */
2255   ARM_PREFETCH_NOT_BENEFICIAL,
2256   tune_params::PREF_CONST_POOL_FALSE,
2257   tune_params::PREF_LDRD_TRUE,
2258   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2259   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2260   tune_params::DISPARAGE_FLAGS_ALL,
2261   tune_params::PREF_NEON_STRINGOPS_TRUE,
2262   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2263   tune_params::SCHED_AUTOPREF_OFF
2264 };
2265 
2266 const struct tune_params arm_cortex_a73_tune =
2267 {
2268   &cortexa57_extra_costs,
2269   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2270   NULL,						/* Sched adj cost.  */
2271   arm_default_branch_cost,
2272   &arm_default_vec_cost,			/* Vectorizer costs.  */
2273   1,						/* Constant limit.  */
2274   2,						/* Max cond insns.  */
2275   8,						/* Memset max inline.  */
2276   2,						/* Issue rate.  */
2277   ARM_PREFETCH_NOT_BENEFICIAL,
2278   tune_params::PREF_CONST_POOL_FALSE,
2279   tune_params::PREF_LDRD_TRUE,
2280   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2281   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2282   tune_params::DISPARAGE_FLAGS_ALL,
2283   tune_params::PREF_NEON_STRINGOPS_TRUE,
2284   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2285   tune_params::SCHED_AUTOPREF_FULL
2286 };
2287 
2288 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2289    cycle to execute each.  An LDR from the constant pool also takes two cycles
2290    to execute, but mildly increases pipelining opportunity (consecutive
2291    loads/stores can be pipelined together, saving one cycle), and may also
2292    improve icache utilisation.  Hence we prefer the constant pool for such
2293    processors.  */
2294 
2295 const struct tune_params arm_v7m_tune =
2296 {
2297   &v7m_extra_costs,
2298   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2299   NULL,					/* Sched adj cost.  */
2300   arm_cortex_m_branch_cost,
2301   &arm_default_vec_cost,
2302   1,						/* Constant limit.  */
2303   2,						/* Max cond insns.  */
2304   8,						/* Memset max inline.  */
2305   1,						/* Issue rate.  */
2306   ARM_PREFETCH_NOT_BENEFICIAL,
2307   tune_params::PREF_CONST_POOL_TRUE,
2308   tune_params::PREF_LDRD_FALSE,
2309   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2310   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2311   tune_params::DISPARAGE_FLAGS_NEITHER,
2312   tune_params::PREF_NEON_STRINGOPS_FALSE,
2313   tune_params::FUSE_NOTHING,
2314   tune_params::SCHED_AUTOPREF_OFF
2315 };
2316 
2317 /* Cortex-M7 tuning.  */
2318 
2319 const struct tune_params arm_cortex_m7_tune =
2320 {
2321   &v7m_extra_costs,
2322   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2323   NULL,					/* Sched adj cost.  */
2324   arm_cortex_m7_branch_cost,
2325   &arm_default_vec_cost,
2326   0,						/* Constant limit.  */
2327   1,						/* Max cond insns.  */
2328   8,						/* Memset max inline.  */
2329   2,						/* Issue rate.  */
2330   ARM_PREFETCH_NOT_BENEFICIAL,
2331   tune_params::PREF_CONST_POOL_TRUE,
2332   tune_params::PREF_LDRD_FALSE,
2333   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2334   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2335   tune_params::DISPARAGE_FLAGS_NEITHER,
2336   tune_params::PREF_NEON_STRINGOPS_FALSE,
2337   tune_params::FUSE_NOTHING,
2338   tune_params::SCHED_AUTOPREF_OFF
2339 };
2340 
2341 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2342    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2343    cortex-m23.  */
2344 const struct tune_params arm_v6m_tune =
2345 {
2346   &generic_extra_costs,			/* Insn extra costs.  */
2347   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2348   NULL,					/* Sched adj cost.  */
2349   arm_default_branch_cost,
2350   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2351   1,						/* Constant limit.  */
2352   5,						/* Max cond insns.  */
2353   8,						/* Memset max inline.  */
2354   1,						/* Issue rate.  */
2355   ARM_PREFETCH_NOT_BENEFICIAL,
2356   tune_params::PREF_CONST_POOL_FALSE,
2357   tune_params::PREF_LDRD_FALSE,
2358   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2359   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2360   tune_params::DISPARAGE_FLAGS_NEITHER,
2361   tune_params::PREF_NEON_STRINGOPS_FALSE,
2362   tune_params::FUSE_NOTHING,
2363   tune_params::SCHED_AUTOPREF_OFF
2364 };
2365 
2366 const struct tune_params arm_fa726te_tune =
2367 {
2368   &generic_extra_costs,				/* Insn extra costs.  */
2369   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2370   fa726te_sched_adjust_cost,
2371   arm_default_branch_cost,
2372   &arm_default_vec_cost,
2373   1,						/* Constant limit.  */
2374   5,						/* Max cond insns.  */
2375   8,						/* Memset max inline.  */
2376   2,						/* Issue rate.  */
2377   ARM_PREFETCH_NOT_BENEFICIAL,
2378   tune_params::PREF_CONST_POOL_TRUE,
2379   tune_params::PREF_LDRD_FALSE,
2380   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2381   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2382   tune_params::DISPARAGE_FLAGS_NEITHER,
2383   tune_params::PREF_NEON_STRINGOPS_FALSE,
2384   tune_params::FUSE_NOTHING,
2385   tune_params::SCHED_AUTOPREF_OFF
2386 };
2387 
2388 /* Auto-generated CPU, FPU and architecture tables.  */
2389 #include "arm-cpu-data.h"
2390 
2391 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2392    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2393    is thus chosen to be big enough to hold the longest architecture name.  */
2394 
2395 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2396 
2397 /* Supported TLS relocations.  */
2398 
2399 enum tls_reloc {
2400   TLS_GD32,
2401   TLS_GD32_FDPIC,
2402   TLS_LDM32,
2403   TLS_LDM32_FDPIC,
2404   TLS_LDO32,
2405   TLS_IE32,
2406   TLS_IE32_FDPIC,
2407   TLS_LE32,
2408   TLS_DESCSEQ	/* GNU scheme */
2409 };
2410 
2411 /* The maximum number of insns to be used when loading a constant.  */
2412 inline static int
arm_constant_limit(bool size_p)2413 arm_constant_limit (bool size_p)
2414 {
2415   return size_p ? 1 : current_tune->constant_limit;
2416 }
2417 
2418 /* Emit an insn that's a simple single-set.  Both the operands must be known
2419    to be valid.  */
2420 inline static rtx_insn *
emit_set_insn(rtx x,rtx y)2421 emit_set_insn (rtx x, rtx y)
2422 {
2423   return emit_insn (gen_rtx_SET (x, y));
2424 }
2425 
2426 /* Return the number of bits set in VALUE.  */
2427 static unsigned
bit_count(unsigned long value)2428 bit_count (unsigned long value)
2429 {
2430   unsigned long count = 0;
2431 
2432   while (value)
2433     {
2434       count++;
2435       value &= value - 1;  /* Clear the least-significant set bit.  */
2436     }
2437 
2438   return count;
2439 }
2440 
2441 /* Return the number of bits set in BMAP.  */
2442 static unsigned
bitmap_popcount(const sbitmap bmap)2443 bitmap_popcount (const sbitmap bmap)
2444 {
2445   unsigned int count = 0;
2446   unsigned int n = 0;
2447   sbitmap_iterator sbi;
2448 
2449   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2450     count++;
2451   return count;
2452 }
2453 
2454 typedef struct
2455 {
2456   machine_mode mode;
2457   const char *name;
2458 } arm_fixed_mode_set;
2459 
2460 /* A small helper for setting fixed-point library libfuncs.  */
2461 
2462 static void
arm_set_fixed_optab_libfunc(optab optable,machine_mode mode,const char * funcname,const char * modename,int num_suffix)2463 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2464 			     const char *funcname, const char *modename,
2465 			     int num_suffix)
2466 {
2467   char buffer[50];
2468 
2469   if (num_suffix == 0)
2470     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2471   else
2472     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2473 
2474   set_optab_libfunc (optable, mode, buffer);
2475 }
2476 
2477 static void
arm_set_fixed_conv_libfunc(convert_optab optable,machine_mode to,machine_mode from,const char * funcname,const char * toname,const char * fromname)2478 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2479 			    machine_mode from, const char *funcname,
2480 			    const char *toname, const char *fromname)
2481 {
2482   char buffer[50];
2483   const char *maybe_suffix_2 = "";
2484 
2485   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2486   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2487       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2488       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2489     maybe_suffix_2 = "2";
2490 
2491   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2492 	   maybe_suffix_2);
2493 
2494   set_conv_libfunc (optable, to, from, buffer);
2495 }
2496 
2497 static GTY(()) rtx speculation_barrier_libfunc;
2498 
2499 /* Record that we have no arithmetic or comparison libfuncs for
2500    machine mode MODE.  */
2501 
2502 static void
arm_block_arith_comp_libfuncs_for_mode(machine_mode mode)2503 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2504 {
2505   /* Arithmetic.  */
2506   set_optab_libfunc (add_optab, mode, NULL);
2507   set_optab_libfunc (sdiv_optab, mode, NULL);
2508   set_optab_libfunc (smul_optab, mode, NULL);
2509   set_optab_libfunc (neg_optab, mode, NULL);
2510   set_optab_libfunc (sub_optab, mode, NULL);
2511 
2512   /* Comparisons.  */
2513   set_optab_libfunc (eq_optab, mode, NULL);
2514   set_optab_libfunc (ne_optab, mode, NULL);
2515   set_optab_libfunc (lt_optab, mode, NULL);
2516   set_optab_libfunc (le_optab, mode, NULL);
2517   set_optab_libfunc (ge_optab, mode, NULL);
2518   set_optab_libfunc (gt_optab, mode, NULL);
2519   set_optab_libfunc (unord_optab, mode, NULL);
2520 }
2521 
2522 /* Set up library functions unique to ARM.  */
2523 static void
arm_init_libfuncs(void)2524 arm_init_libfuncs (void)
2525 {
2526   machine_mode mode_iter;
2527 
2528   /* For Linux, we have access to kernel support for atomic operations.  */
2529   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2530     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2531 
2532   /* There are no special library functions unless we are using the
2533      ARM BPABI.  */
2534   if (!TARGET_BPABI)
2535     return;
2536 
2537   /* The functions below are described in Section 4 of the "Run-Time
2538      ABI for the ARM architecture", Version 1.0.  */
2539 
2540   /* Double-precision floating-point arithmetic.  Table 2.  */
2541   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2542   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2543   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2544   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2545   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2546 
2547   /* Double-precision comparisons.  Table 3.  */
2548   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2549   set_optab_libfunc (ne_optab, DFmode, NULL);
2550   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2551   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2552   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2553   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2554   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2555 
2556   /* Single-precision floating-point arithmetic.  Table 4.  */
2557   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2558   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2559   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2560   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2561   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2562 
2563   /* Single-precision comparisons.  Table 5.  */
2564   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2565   set_optab_libfunc (ne_optab, SFmode, NULL);
2566   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2567   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2568   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2569   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2570   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2571 
2572   /* Floating-point to integer conversions.  Table 6.  */
2573   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2574   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2575   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2576   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2577   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2578   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2579   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2580   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2581 
2582   /* Conversions between floating types.  Table 7.  */
2583   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2584   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2585 
2586   /* Integer to floating-point conversions.  Table 8.  */
2587   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2588   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2589   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2590   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2591   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2592   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2593   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2594   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2595 
2596   /* Long long.  Table 9.  */
2597   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2598   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2599   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2600   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2601   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2602   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2603   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2604   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2605 
2606   /* Integer (32/32->32) division.  \S 4.3.1.  */
2607   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2608   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2609 
2610   /* The divmod functions are designed so that they can be used for
2611      plain division, even though they return both the quotient and the
2612      remainder.  The quotient is returned in the usual location (i.e.,
2613      r0 for SImode, {r0, r1} for DImode), just as would be expected
2614      for an ordinary division routine.  Because the AAPCS calling
2615      conventions specify that all of { r0, r1, r2, r3 } are
2616      callee-saved registers, there is no need to tell the compiler
2617      explicitly that those registers are clobbered by these
2618      routines.  */
2619   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2620   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2621 
2622   /* For SImode division the ABI provides div-without-mod routines,
2623      which are faster.  */
2624   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2625   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2626 
2627   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2628      divmod libcalls instead.  */
2629   set_optab_libfunc (smod_optab, DImode, NULL);
2630   set_optab_libfunc (umod_optab, DImode, NULL);
2631   set_optab_libfunc (smod_optab, SImode, NULL);
2632   set_optab_libfunc (umod_optab, SImode, NULL);
2633 
2634   /* Half-precision float operations.  The compiler handles all operations
2635      with NULL libfuncs by converting the SFmode.  */
2636   switch (arm_fp16_format)
2637     {
2638     case ARM_FP16_FORMAT_IEEE:
2639     case ARM_FP16_FORMAT_ALTERNATIVE:
2640 
2641       /* Conversions.  */
2642       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2643 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2644 			 ? "__gnu_f2h_ieee"
2645 			 : "__gnu_f2h_alternative"));
2646       set_conv_libfunc (sext_optab, SFmode, HFmode,
2647 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2648 			 ? "__gnu_h2f_ieee"
2649 			 : "__gnu_h2f_alternative"));
2650 
2651       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2652 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2653 			 ? "__gnu_d2h_ieee"
2654 			 : "__gnu_d2h_alternative"));
2655 
2656       arm_block_arith_comp_libfuncs_for_mode (HFmode);
2657       break;
2658 
2659     default:
2660       break;
2661     }
2662 
2663   /* For all possible libcalls in BFmode, record NULL.  */
2664   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2665     {
2666       set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2667       set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2668       set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2669       set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2670     }
2671   arm_block_arith_comp_libfuncs_for_mode (BFmode);
2672 
2673   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2674   {
2675     const arm_fixed_mode_set fixed_arith_modes[] =
2676       {
2677 	{ E_QQmode, "qq" },
2678 	{ E_UQQmode, "uqq" },
2679 	{ E_HQmode, "hq" },
2680 	{ E_UHQmode, "uhq" },
2681 	{ E_SQmode, "sq" },
2682 	{ E_USQmode, "usq" },
2683 	{ E_DQmode, "dq" },
2684 	{ E_UDQmode, "udq" },
2685 	{ E_TQmode, "tq" },
2686 	{ E_UTQmode, "utq" },
2687 	{ E_HAmode, "ha" },
2688 	{ E_UHAmode, "uha" },
2689 	{ E_SAmode, "sa" },
2690 	{ E_USAmode, "usa" },
2691 	{ E_DAmode, "da" },
2692 	{ E_UDAmode, "uda" },
2693 	{ E_TAmode, "ta" },
2694 	{ E_UTAmode, "uta" }
2695       };
2696     const arm_fixed_mode_set fixed_conv_modes[] =
2697       {
2698 	{ E_QQmode, "qq" },
2699 	{ E_UQQmode, "uqq" },
2700 	{ E_HQmode, "hq" },
2701 	{ E_UHQmode, "uhq" },
2702 	{ E_SQmode, "sq" },
2703 	{ E_USQmode, "usq" },
2704 	{ E_DQmode, "dq" },
2705 	{ E_UDQmode, "udq" },
2706 	{ E_TQmode, "tq" },
2707 	{ E_UTQmode, "utq" },
2708 	{ E_HAmode, "ha" },
2709 	{ E_UHAmode, "uha" },
2710 	{ E_SAmode, "sa" },
2711 	{ E_USAmode, "usa" },
2712 	{ E_DAmode, "da" },
2713 	{ E_UDAmode, "uda" },
2714 	{ E_TAmode, "ta" },
2715 	{ E_UTAmode, "uta" },
2716 	{ E_QImode, "qi" },
2717 	{ E_HImode, "hi" },
2718 	{ E_SImode, "si" },
2719 	{ E_DImode, "di" },
2720 	{ E_TImode, "ti" },
2721 	{ E_SFmode, "sf" },
2722 	{ E_DFmode, "df" }
2723       };
2724     unsigned int i, j;
2725 
2726     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2727       {
2728 	arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2729 				     "add", fixed_arith_modes[i].name, 3);
2730 	arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2731 				     "ssadd", fixed_arith_modes[i].name, 3);
2732 	arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2733 				     "usadd", fixed_arith_modes[i].name, 3);
2734 	arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2735 				     "sub", fixed_arith_modes[i].name, 3);
2736 	arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2737 				     "sssub", fixed_arith_modes[i].name, 3);
2738 	arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2739 				     "ussub", fixed_arith_modes[i].name, 3);
2740 	arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2741 				     "mul", fixed_arith_modes[i].name, 3);
2742 	arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2743 				     "ssmul", fixed_arith_modes[i].name, 3);
2744 	arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2745 				     "usmul", fixed_arith_modes[i].name, 3);
2746 	arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2747 				     "div", fixed_arith_modes[i].name, 3);
2748 	arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2749 				     "udiv", fixed_arith_modes[i].name, 3);
2750 	arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2751 				     "ssdiv", fixed_arith_modes[i].name, 3);
2752 	arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2753 				     "usdiv", fixed_arith_modes[i].name, 3);
2754 	arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2755 				     "neg", fixed_arith_modes[i].name, 2);
2756 	arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2757 				     "ssneg", fixed_arith_modes[i].name, 2);
2758 	arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2759 				     "usneg", fixed_arith_modes[i].name, 2);
2760 	arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2761 				     "ashl", fixed_arith_modes[i].name, 3);
2762 	arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2763 				     "ashr", fixed_arith_modes[i].name, 3);
2764 	arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2765 				     "lshr", fixed_arith_modes[i].name, 3);
2766 	arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2767 				     "ssashl", fixed_arith_modes[i].name, 3);
2768 	arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2769 				     "usashl", fixed_arith_modes[i].name, 3);
2770 	arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2771 				     "cmp", fixed_arith_modes[i].name, 2);
2772       }
2773 
2774     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2775       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2776 	{
2777 	  if (i == j
2778 	      || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2779 		  && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2780 	    continue;
2781 
2782 	  arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2783 				      fixed_conv_modes[j].mode, "fract",
2784 				      fixed_conv_modes[i].name,
2785 				      fixed_conv_modes[j].name);
2786 	  arm_set_fixed_conv_libfunc (satfract_optab,
2787 				      fixed_conv_modes[i].mode,
2788 				      fixed_conv_modes[j].mode, "satfract",
2789 				      fixed_conv_modes[i].name,
2790 				      fixed_conv_modes[j].name);
2791 	  arm_set_fixed_conv_libfunc (fractuns_optab,
2792 				      fixed_conv_modes[i].mode,
2793 				      fixed_conv_modes[j].mode, "fractuns",
2794 				      fixed_conv_modes[i].name,
2795 				      fixed_conv_modes[j].name);
2796 	  arm_set_fixed_conv_libfunc (satfractuns_optab,
2797 				      fixed_conv_modes[i].mode,
2798 				      fixed_conv_modes[j].mode, "satfractuns",
2799 				      fixed_conv_modes[i].name,
2800 				      fixed_conv_modes[j].name);
2801 	}
2802   }
2803 
2804   if (TARGET_AAPCS_BASED)
2805     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2806 
2807   speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2808 }
2809 
2810 /* On AAPCS systems, this is the "struct __va_list".  */
2811 static GTY(()) tree va_list_type;
2812 
2813 /* Return the type to use as __builtin_va_list.  */
2814 static tree
arm_build_builtin_va_list(void)2815 arm_build_builtin_va_list (void)
2816 {
2817   tree va_list_name;
2818   tree ap_field;
2819 
2820   if (!TARGET_AAPCS_BASED)
2821     return std_build_builtin_va_list ();
2822 
2823   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2824      defined as:
2825 
2826        struct __va_list
2827        {
2828 	 void *__ap;
2829        };
2830 
2831      The C Library ABI further reinforces this definition in \S
2832      4.1.
2833 
2834      We must follow this definition exactly.  The structure tag
2835      name is visible in C++ mangled names, and thus forms a part
2836      of the ABI.  The field name may be used by people who
2837      #include <stdarg.h>.  */
2838   /* Create the type.  */
2839   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2840   /* Give it the required name.  */
2841   va_list_name = build_decl (BUILTINS_LOCATION,
2842 			     TYPE_DECL,
2843 			     get_identifier ("__va_list"),
2844 			     va_list_type);
2845   DECL_ARTIFICIAL (va_list_name) = 1;
2846   TYPE_NAME (va_list_type) = va_list_name;
2847   TYPE_STUB_DECL (va_list_type) = va_list_name;
2848   /* Create the __ap field.  */
2849   ap_field = build_decl (BUILTINS_LOCATION,
2850 			 FIELD_DECL,
2851 			 get_identifier ("__ap"),
2852 			 ptr_type_node);
2853   DECL_ARTIFICIAL (ap_field) = 1;
2854   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2855   TYPE_FIELDS (va_list_type) = ap_field;
2856   /* Compute its layout.  */
2857   layout_type (va_list_type);
2858 
2859   return va_list_type;
2860 }
2861 
2862 /* Return an expression of type "void *" pointing to the next
2863    available argument in a variable-argument list.  VALIST is the
2864    user-level va_list object, of type __builtin_va_list.  */
2865 static tree
arm_extract_valist_ptr(tree valist)2866 arm_extract_valist_ptr (tree valist)
2867 {
2868   if (TREE_TYPE (valist) == error_mark_node)
2869     return error_mark_node;
2870 
2871   /* On an AAPCS target, the pointer is stored within "struct
2872      va_list".  */
2873   if (TARGET_AAPCS_BASED)
2874     {
2875       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2876       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2877 		       valist, ap_field, NULL_TREE);
2878     }
2879 
2880   return valist;
2881 }
2882 
2883 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2884 static void
arm_expand_builtin_va_start(tree valist,rtx nextarg)2885 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2886 {
2887   valist = arm_extract_valist_ptr (valist);
2888   std_expand_builtin_va_start (valist, nextarg);
2889 }
2890 
2891 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2892 static tree
arm_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)2893 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2894 			  gimple_seq *post_p)
2895 {
2896   valist = arm_extract_valist_ptr (valist);
2897   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2898 }
2899 
2900 /* Check any incompatible options that the user has specified.  */
2901 static void
arm_option_check_internal(struct gcc_options * opts)2902 arm_option_check_internal (struct gcc_options *opts)
2903 {
2904   int flags = opts->x_target_flags;
2905 
2906   /* iWMMXt and NEON are incompatible.  */
2907   if (TARGET_IWMMXT
2908       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2909     error ("iWMMXt and NEON are incompatible");
2910 
2911   /* Make sure that the processor choice does not conflict with any of the
2912      other command line choices.  */
2913   if (TARGET_ARM_P (flags)
2914       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2915     error ("target CPU does not support ARM mode");
2916 
2917   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2918   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2919     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2920 
2921   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2922     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2923 
2924   /* If this target is normally configured to use APCS frames, warn if they
2925      are turned off and debugging is turned on.  */
2926   if (TARGET_ARM_P (flags)
2927       && write_symbols != NO_DEBUG
2928       && !TARGET_APCS_FRAME
2929       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2930     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2931 	     "debugging");
2932 
2933   /* iWMMXt unsupported under Thumb mode.  */
2934   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2935     error ("iWMMXt unsupported under Thumb mode");
2936 
2937   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2938     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2939 
2940   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2941     {
2942       error ("RTP PIC is incompatible with Thumb");
2943       flag_pic = 0;
2944     }
2945 
2946   if (target_pure_code || target_slow_flash_data)
2947     {
2948       const char *flag = (target_pure_code ? "-mpure-code" :
2949 					     "-mslow-flash-data");
2950       bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2951 
2952       /* We only support -mslow-flash-data on M-profile targets with
2953 	 MOVT.  */
2954       if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2955 	error ("%s only supports non-pic code on M-profile targets with the "
2956 	       "MOVT instruction", flag);
2957 
2958       /* We only support -mpure-code on M-profile targets.  */
2959       if (target_pure_code && common_unsupported_modes)
2960 	error ("%s only supports non-pic code on M-profile targets", flag);
2961 
2962       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2963 	 -mword-relocations forbids relocation of MOVT/MOVW.  */
2964       if (target_word_relocations)
2965 	error ("%s incompatible with %<-mword-relocations%>", flag);
2966     }
2967 }
2968 
2969 /* Recompute the global settings depending on target attribute options.  */
2970 
2971 static void
arm_option_params_internal(void)2972 arm_option_params_internal (void)
2973 {
2974   /* If we are not using the default (ARM mode) section anchor offset
2975      ranges, then set the correct ranges now.  */
2976   if (TARGET_THUMB1)
2977     {
2978       /* Thumb-1 LDR instructions cannot have negative offsets.
2979          Permissible positive offset ranges are 5-bit (for byte loads),
2980          6-bit (for halfword loads), or 7-bit (for word loads).
2981          Empirical results suggest a 7-bit anchor range gives the best
2982          overall code size.  */
2983       targetm.min_anchor_offset = 0;
2984       targetm.max_anchor_offset = 127;
2985     }
2986   else if (TARGET_THUMB2)
2987     {
2988       /* The minimum is set such that the total size of the block
2989          for a particular anchor is 248 + 1 + 4095 bytes, which is
2990          divisible by eight, ensuring natural spacing of anchors.  */
2991       targetm.min_anchor_offset = -248;
2992       targetm.max_anchor_offset = 4095;
2993     }
2994   else
2995     {
2996       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2997       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2998     }
2999 
3000   /* Increase the number of conditional instructions with -Os.  */
3001   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3002 
3003   /* For THUMB2, we limit the conditional sequence to one IT block.  */
3004   if (TARGET_THUMB2)
3005     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3006 
3007   if (TARGET_THUMB1)
3008     targetm.md_asm_adjust = thumb1_md_asm_adjust;
3009   else
3010     targetm.md_asm_adjust = arm_md_asm_adjust;
3011 }
3012 
3013 /* True if -mflip-thumb should next add an attribute for the default
3014    mode, false if it should next add an attribute for the opposite mode.  */
3015 static GTY(()) bool thumb_flipper;
3016 
3017 /* Options after initial target override.  */
3018 static GTY(()) tree init_optimize;
3019 
3020 static void
arm_override_options_after_change_1(struct gcc_options * opts)3021 arm_override_options_after_change_1 (struct gcc_options *opts)
3022 {
3023   /* -falign-functions without argument: supply one.  */
3024   if (opts->x_flag_align_functions && !opts->x_str_align_functions)
3025     opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3026       && opts->x_optimize_size ? "2" : "4";
3027 }
3028 
3029 /* Implement targetm.override_options_after_change.  */
3030 
3031 static void
arm_override_options_after_change(void)3032 arm_override_options_after_change (void)
3033 {
3034   arm_configure_build_target (&arm_active_target,
3035 			      TREE_TARGET_OPTION (target_option_default_node),
3036 			      false);
3037 
3038   arm_override_options_after_change_1 (&global_options);
3039 }
3040 
3041 /* Implement TARGET_OPTION_SAVE.  */
3042 static void
arm_option_save(struct cl_target_option * ptr,struct gcc_options * opts)3043 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
3044 {
3045   ptr->x_arm_arch_string = opts->x_arm_arch_string;
3046   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
3047   ptr->x_arm_tune_string = opts->x_arm_tune_string;
3048 }
3049 
3050 /* Implement TARGET_OPTION_RESTORE.  */
3051 static void
arm_option_restore(struct gcc_options * opts,struct cl_target_option * ptr)3052 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
3053 {
3054   opts->x_arm_arch_string = ptr->x_arm_arch_string;
3055   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
3056   opts->x_arm_tune_string = ptr->x_arm_tune_string;
3057   arm_configure_build_target (&arm_active_target, ptr, false);
3058   arm_option_reconfigure_globals ();
3059 }
3060 
3061 /* Reset options between modes that the user has specified.  */
3062 static void
arm_option_override_internal(struct gcc_options * opts,struct gcc_options * opts_set)3063 arm_option_override_internal (struct gcc_options *opts,
3064 			      struct gcc_options *opts_set)
3065 {
3066   arm_override_options_after_change_1 (opts);
3067 
3068   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3069     {
3070       /* The default is to enable interworking, so this warning message would
3071 	 be confusing to users who have just compiled with
3072 	 eg, -march=armv4.  */
3073       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3074       opts->x_target_flags &= ~MASK_INTERWORK;
3075     }
3076 
3077   if (TARGET_THUMB_P (opts->x_target_flags)
3078       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3079     {
3080       warning (0, "target CPU does not support THUMB instructions");
3081       opts->x_target_flags &= ~MASK_THUMB;
3082     }
3083 
3084   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3085     {
3086       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3087       opts->x_target_flags &= ~MASK_APCS_FRAME;
3088     }
3089 
3090   /* Callee super interworking implies thumb interworking.  Adding
3091      this to the flags here simplifies the logic elsewhere.  */
3092   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3093     opts->x_target_flags |= MASK_INTERWORK;
3094 
3095   /* need to remember initial values so combinaisons of options like
3096      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3097   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3098 
3099   if (! opts_set->x_arm_restrict_it)
3100     opts->x_arm_restrict_it = arm_arch8;
3101 
3102   /* ARM execution state and M profile don't have [restrict] IT.  */
3103   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3104     opts->x_arm_restrict_it = 0;
3105 
3106   /* Use the IT size from CPU specific tuning unless -mrestrict-it is used.  */
3107   if (!opts_set->x_arm_restrict_it
3108       && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3109     opts->x_arm_restrict_it = 0;
3110 
3111   /* Enable -munaligned-access by default for
3112      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3113      i.e. Thumb2 and ARM state only.
3114      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3115      - ARMv8 architecture-base processors.
3116 
3117      Disable -munaligned-access by default for
3118      - all pre-ARMv6 architecture-based processors
3119      - ARMv6-M architecture-based processors
3120      - ARMv8-M Baseline processors.  */
3121 
3122   if (! opts_set->x_unaligned_access)
3123     {
3124       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3125 			  && arm_arch6 && (arm_arch_notm || arm_arch7));
3126     }
3127   else if (opts->x_unaligned_access == 1
3128 	   && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3129     {
3130       warning (0, "target CPU does not support unaligned accesses");
3131      opts->x_unaligned_access = 0;
3132     }
3133 
3134   /* Don't warn since it's on by default in -O2.  */
3135   if (TARGET_THUMB1_P (opts->x_target_flags))
3136     opts->x_flag_schedule_insns = 0;
3137   else
3138     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3139 
3140   /* Disable shrink-wrap when optimizing function for size, since it tends to
3141      generate additional returns.  */
3142   if (optimize_function_for_size_p (cfun)
3143       && TARGET_THUMB2_P (opts->x_target_flags))
3144     opts->x_flag_shrink_wrap = false;
3145   else
3146     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3147 
3148   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3149      - epilogue_insns - does not accurately model the corresponding insns
3150      emitted in the asm file.  In particular, see the comment in thumb_exit
3151      'Find out how many of the (return) argument registers we can corrupt'.
3152      As a consequence, the epilogue may clobber registers without fipa-ra
3153      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3154      TODO: Accurately model clobbers for epilogue_insns and reenable
3155      fipa-ra.  */
3156   if (TARGET_THUMB1_P (opts->x_target_flags))
3157     opts->x_flag_ipa_ra = 0;
3158   else
3159     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3160 
3161   /* Thumb2 inline assembly code should always use unified syntax.
3162      This will apply to ARM and Thumb1 eventually.  */
3163   if (TARGET_THUMB2_P (opts->x_target_flags))
3164     opts->x_inline_asm_unified = true;
3165 
3166 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3167   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3168 #endif
3169 }
3170 
3171 static sbitmap isa_all_fpubits_internal;
3172 static sbitmap isa_all_fpbits;
3173 static sbitmap isa_quirkbits;
3174 
3175 /* Configure a build target TARGET from the user-specified options OPTS and
3176    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3177    architecture have been specified, but the two are not identical.  */
3178 void
arm_configure_build_target(struct arm_build_target * target,struct cl_target_option * opts,bool warn_compatible)3179 arm_configure_build_target (struct arm_build_target *target,
3180 			    struct cl_target_option *opts,
3181 			    bool warn_compatible)
3182 {
3183   const cpu_option *arm_selected_tune = NULL;
3184   const arch_option *arm_selected_arch = NULL;
3185   const cpu_option *arm_selected_cpu = NULL;
3186   const arm_fpu_desc *arm_selected_fpu = NULL;
3187   const char *tune_opts = NULL;
3188   const char *arch_opts = NULL;
3189   const char *cpu_opts = NULL;
3190 
3191   bitmap_clear (target->isa);
3192   target->core_name = NULL;
3193   target->arch_name = NULL;
3194 
3195   if (opts->x_arm_arch_string)
3196     {
3197       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3198 						      "-march",
3199 						      opts->x_arm_arch_string);
3200       arch_opts = strchr (opts->x_arm_arch_string, '+');
3201     }
3202 
3203   if (opts->x_arm_cpu_string)
3204     {
3205       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3206 						    opts->x_arm_cpu_string);
3207       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3208       arm_selected_tune = arm_selected_cpu;
3209       /* If taking the tuning from -mcpu, we don't need to rescan the
3210 	 options for tuning.  */
3211     }
3212 
3213   if (opts->x_arm_tune_string)
3214     {
3215       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3216 						     opts->x_arm_tune_string);
3217       tune_opts = strchr (opts->x_arm_tune_string, '+');
3218     }
3219 
3220   if (arm_selected_arch)
3221     {
3222       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3223       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3224 				 arch_opts);
3225 
3226       if (arm_selected_cpu)
3227 	{
3228 	  auto_sbitmap cpu_isa (isa_num_bits);
3229 	  auto_sbitmap isa_delta (isa_num_bits);
3230 
3231 	  arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3232 	  arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3233 				     cpu_opts);
3234 	  bitmap_xor (isa_delta, cpu_isa, target->isa);
3235 	  /* Ignore any bits that are quirk bits.  */
3236 	  bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3237 	  /* Ignore (for now) any bits that might be set by -mfpu.  */
3238 	  bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits_internal);
3239 
3240 	  /* And if the target ISA lacks floating point, ignore any
3241 	     extensions that depend on that.  */
3242 	  if (!bitmap_bit_p (target->isa, isa_bit_vfpv2))
3243 	    bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3244 
3245 	  if (!bitmap_empty_p (isa_delta))
3246 	    {
3247 	      if (warn_compatible)
3248 		warning (0, "switch %<-mcpu=%s%> conflicts "
3249 			 "with %<-march=%s%> switch",
3250 			 arm_selected_cpu->common.name,
3251 			 arm_selected_arch->common.name);
3252 	      /* -march wins for code generation.
3253 		 -mcpu wins for default tuning.  */
3254 	      if (!arm_selected_tune)
3255 		arm_selected_tune = arm_selected_cpu;
3256 
3257 	      arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3258 	      target->arch_name = arm_selected_arch->common.name;
3259 	    }
3260 	  else
3261 	    {
3262 	      /* Architecture and CPU are essentially the same.
3263 		 Prefer the CPU setting.  */
3264 	      arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3265 	      target->core_name = arm_selected_cpu->common.name;
3266 	      /* Copy the CPU's capabilities, so that we inherit the
3267 		 appropriate extensions and quirks.  */
3268 	      bitmap_copy (target->isa, cpu_isa);
3269 	    }
3270 	}
3271       else
3272 	{
3273 	  /* Pick a CPU based on the architecture.  */
3274 	  arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3275 	  target->arch_name = arm_selected_arch->common.name;
3276 	  /* Note: target->core_name is left unset in this path.  */
3277 	}
3278     }
3279   else if (arm_selected_cpu)
3280     {
3281       target->core_name = arm_selected_cpu->common.name;
3282       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3283       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3284 				 cpu_opts);
3285       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3286     }
3287   /* If the user did not specify a processor or architecture, choose
3288      one for them.  */
3289   else
3290     {
3291       const cpu_option *sel;
3292       auto_sbitmap sought_isa (isa_num_bits);
3293       bitmap_clear (sought_isa);
3294       auto_sbitmap default_isa (isa_num_bits);
3295 
3296       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3297 						    TARGET_CPU_DEFAULT);
3298       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3299       gcc_assert (arm_selected_cpu->common.name);
3300 
3301       /* RWE: All of the selection logic below (to the end of this
3302 	 'if' clause) looks somewhat suspect.  It appears to be mostly
3303 	 there to support forcing thumb support when the default CPU
3304 	 does not have thumb (somewhat dubious in terms of what the
3305 	 user might be expecting).  I think it should be removed once
3306 	 support for the pre-thumb era cores is removed.  */
3307       sel = arm_selected_cpu;
3308       arm_initialize_isa (default_isa, sel->common.isa_bits);
3309       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3310 				 cpu_opts);
3311 
3312       /* Now check to see if the user has specified any command line
3313 	 switches that require certain abilities from the cpu.  */
3314 
3315       if (TARGET_INTERWORK || TARGET_THUMB)
3316 	bitmap_set_bit (sought_isa, isa_bit_thumb);
3317 
3318       /* If there are such requirements and the default CPU does not
3319 	 satisfy them, we need to run over the complete list of
3320 	 cores looking for one that is satisfactory.  */
3321       if (!bitmap_empty_p (sought_isa)
3322 	  && !bitmap_subset_p (sought_isa, default_isa))
3323 	{
3324 	  auto_sbitmap candidate_isa (isa_num_bits);
3325 	  /* We're only interested in a CPU with at least the
3326 	     capabilities of the default CPU and the required
3327 	     additional features.  */
3328 	  bitmap_ior (default_isa, default_isa, sought_isa);
3329 
3330 	  /* Try to locate a CPU type that supports all of the abilities
3331 	     of the default CPU, plus the extra abilities requested by
3332 	     the user.  */
3333 	  for (sel = all_cores; sel->common.name != NULL; sel++)
3334 	    {
3335 	      arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3336 	      /* An exact match?  */
3337 	      if (bitmap_equal_p (default_isa, candidate_isa))
3338 		break;
3339 	    }
3340 
3341 	  if (sel->common.name == NULL)
3342 	    {
3343 	      unsigned current_bit_count = isa_num_bits;
3344 	      const cpu_option *best_fit = NULL;
3345 
3346 	      /* Ideally we would like to issue an error message here
3347 		 saying that it was not possible to find a CPU compatible
3348 		 with the default CPU, but which also supports the command
3349 		 line options specified by the programmer, and so they
3350 		 ought to use the -mcpu=<name> command line option to
3351 		 override the default CPU type.
3352 
3353 		 If we cannot find a CPU that has exactly the
3354 		 characteristics of the default CPU and the given
3355 		 command line options we scan the array again looking
3356 		 for a best match.  The best match must have at least
3357 		 the capabilities of the perfect match.  */
3358 	      for (sel = all_cores; sel->common.name != NULL; sel++)
3359 		{
3360 		  arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3361 
3362 		  if (bitmap_subset_p (default_isa, candidate_isa))
3363 		    {
3364 		      unsigned count;
3365 
3366 		      bitmap_and_compl (candidate_isa, candidate_isa,
3367 					default_isa);
3368 		      count = bitmap_popcount (candidate_isa);
3369 
3370 		      if (count < current_bit_count)
3371 			{
3372 			  best_fit = sel;
3373 			  current_bit_count = count;
3374 			}
3375 		    }
3376 
3377 		  gcc_assert (best_fit);
3378 		  sel = best_fit;
3379 		}
3380 	    }
3381 	  arm_selected_cpu = sel;
3382 	}
3383 
3384       /* Now we know the CPU, we can finally initialize the target
3385 	 structure.  */
3386       target->core_name = arm_selected_cpu->common.name;
3387       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3388       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3389 				 cpu_opts);
3390       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3391     }
3392 
3393   gcc_assert (arm_selected_cpu);
3394   gcc_assert (arm_selected_arch);
3395 
3396   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3397     {
3398       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3399       auto_sbitmap fpu_bits (isa_num_bits);
3400 
3401       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3402       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3403       bitmap_ior (target->isa, target->isa, fpu_bits);
3404     }
3405 
3406   /* If we have the soft-float ABI, clear any feature bits relating to use of
3407      floating-point operations.  They'll just confuse things later on.  */
3408   if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3409     bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3410 
3411   /* There may be implied bits which we still need to enable. These are
3412      non-named features which are needed to complete other sets of features,
3413      but cannot be enabled from arm-cpus.in due to being shared between
3414      multiple fgroups. Each entry in all_implied_fbits is of the form
3415      ante -> cons, meaning that if the feature "ante" is enabled, we should
3416      implicitly enable "cons".  */
3417   const struct fbit_implication *impl = all_implied_fbits;
3418   while (impl->ante)
3419     {
3420       if (bitmap_bit_p (target->isa, impl->ante))
3421 	bitmap_set_bit (target->isa, impl->cons);
3422       impl++;
3423     }
3424 
3425   if (!arm_selected_tune)
3426     arm_selected_tune = arm_selected_cpu;
3427   else /* Validate the features passed to -mtune.  */
3428     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3429 
3430   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3431 
3432   /* Finish initializing the target structure.  */
3433   if (!target->arch_name)
3434     target->arch_name = arm_selected_arch->common.name;
3435   target->arch_pp_name = arm_selected_arch->arch;
3436   target->base_arch = arm_selected_arch->base_arch;
3437   target->profile = arm_selected_arch->profile;
3438 
3439   target->tune_flags = tune_data->tune_flags;
3440   target->tune = tune_data->tune;
3441   target->tune_core = tune_data->scheduler;
3442 }
3443 
3444 /* Fix up any incompatible options that the user has specified.  */
3445 static void
arm_option_override(void)3446 arm_option_override (void)
3447 {
3448   static const enum isa_feature fpu_bitlist_internal[]
3449     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3450   /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main.  */
3451   static const enum isa_feature fp_bitlist[]
3452     = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3453   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3454   cl_target_option opts;
3455 
3456   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3457   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3458 
3459   isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3460   isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3461   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3462   arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3463 
3464   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3465 
3466   if (!global_options_set.x_arm_fpu_index)
3467     {
3468       bool ok;
3469       int fpu_index;
3470 
3471       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3472 				  CL_TARGET);
3473       gcc_assert (ok);
3474       arm_fpu_index = (enum fpu_type) fpu_index;
3475     }
3476 
3477   cl_target_option_save (&opts, &global_options);
3478   arm_configure_build_target (&arm_active_target, &opts, true);
3479 
3480 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3481   SUBTARGET_OVERRIDE_OPTIONS;
3482 #endif
3483 
3484   /* Initialize boolean versions of the architectural flags, for use
3485      in the arm.md file and for enabling feature flags.  */
3486   arm_option_reconfigure_globals ();
3487 
3488   arm_tune = arm_active_target.tune_core;
3489   tune_flags = arm_active_target.tune_flags;
3490   current_tune = arm_active_target.tune;
3491 
3492   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3493   if (TARGET_APCS_FRAME)
3494     flag_shrink_wrap = false;
3495 
3496   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3497     {
3498       warning (0, "%<-mapcs-stack-check%> incompatible with "
3499 	       "%<-mno-apcs-frame%>");
3500       target_flags |= MASK_APCS_FRAME;
3501     }
3502 
3503   if (TARGET_POKE_FUNCTION_NAME)
3504     target_flags |= MASK_APCS_FRAME;
3505 
3506   if (TARGET_APCS_REENT && flag_pic)
3507     error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3508 
3509   if (TARGET_APCS_REENT)
3510     warning (0, "APCS reentrant code not supported.  Ignored");
3511 
3512   /* Set up some tuning parameters.  */
3513   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3514   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3515   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3516   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3517   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3518   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3519 
3520   /* For arm2/3 there is no need to do any scheduling if we are doing
3521      software floating-point.  */
3522   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3523     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3524 
3525   /* Override the default structure alignment for AAPCS ABI.  */
3526   if (!global_options_set.x_arm_structure_size_boundary)
3527     {
3528       if (TARGET_AAPCS_BASED)
3529 	arm_structure_size_boundary = 8;
3530     }
3531   else
3532     {
3533       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3534 
3535       if (arm_structure_size_boundary != 8
3536 	  && arm_structure_size_boundary != 32
3537 	  && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3538 	{
3539 	  if (ARM_DOUBLEWORD_ALIGN)
3540 	    warning (0,
3541 		     "structure size boundary can only be set to 8, 32 or 64");
3542 	  else
3543 	    warning (0, "structure size boundary can only be set to 8 or 32");
3544 	  arm_structure_size_boundary
3545 	    = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3546 	}
3547     }
3548 
3549   if (TARGET_VXWORKS_RTP)
3550     {
3551       if (!global_options_set.x_arm_pic_data_is_text_relative)
3552 	arm_pic_data_is_text_relative = 0;
3553     }
3554   else if (flag_pic
3555 	   && !arm_pic_data_is_text_relative
3556 	   && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3557     /* When text & data segments don't have a fixed displacement, the
3558        intended use is with a single, read only, pic base register.
3559        Unless the user explicitly requested not to do that, set
3560        it.  */
3561     target_flags |= MASK_SINGLE_PIC_BASE;
3562 
3563   /* If stack checking is disabled, we can use r10 as the PIC register,
3564      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3565   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3566     {
3567       if (TARGET_VXWORKS_RTP)
3568 	warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3569       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3570     }
3571 
3572   if (flag_pic && TARGET_VXWORKS_RTP)
3573     arm_pic_register = 9;
3574 
3575   /* If in FDPIC mode then force arm_pic_register to be r9.  */
3576   if (TARGET_FDPIC)
3577     {
3578       arm_pic_register = FDPIC_REGNUM;
3579       if (TARGET_THUMB1)
3580 	sorry ("FDPIC mode is not supported in Thumb-1 mode");
3581     }
3582 
3583   if (arm_pic_register_string != NULL)
3584     {
3585       int pic_register = decode_reg_name (arm_pic_register_string);
3586 
3587       if (!flag_pic)
3588 	warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3589 
3590       /* Prevent the user from choosing an obviously stupid PIC register.  */
3591       else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3592 	       || pic_register == HARD_FRAME_POINTER_REGNUM
3593 	       || pic_register == STACK_POINTER_REGNUM
3594 	       || pic_register >= PC_REGNUM
3595 	       || (TARGET_VXWORKS_RTP
3596 		   && (unsigned int) pic_register != arm_pic_register))
3597 	error ("unable to use %qs for PIC register", arm_pic_register_string);
3598       else
3599 	arm_pic_register = pic_register;
3600     }
3601 
3602   if (flag_pic)
3603     target_word_relocations = 1;
3604 
3605   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3606   if (fix_cm3_ldrd == 2)
3607     {
3608       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3609 	fix_cm3_ldrd = 1;
3610       else
3611 	fix_cm3_ldrd = 0;
3612     }
3613 
3614   /* Enable fix_vlldm by default if required.  */
3615   if (fix_vlldm == 2)
3616     {
3617       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3618 	fix_vlldm = 1;
3619       else
3620 	fix_vlldm = 0;
3621     }
3622 
3623   /* Hot/Cold partitioning is not currently supported, since we can't
3624      handle literal pool placement in that case.  */
3625   if (flag_reorder_blocks_and_partition)
3626     {
3627       inform (input_location,
3628 	      "%<-freorder-blocks-and-partition%> not supported "
3629 	      "on this architecture");
3630       flag_reorder_blocks_and_partition = 0;
3631       flag_reorder_blocks = 1;
3632     }
3633 
3634   if (flag_pic)
3635     /* Hoisting PIC address calculations more aggressively provides a small,
3636        but measurable, size reduction for PIC code.  Therefore, we decrease
3637        the bar for unrestricted expression hoisting to the cost of PIC address
3638        calculation, which is 2 instructions.  */
3639     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3640 			 param_gcse_unrestricted_cost, 2);
3641 
3642   /* ARM EABI defaults to strict volatile bitfields.  */
3643   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3644       && abi_version_at_least(2))
3645     flag_strict_volatile_bitfields = 1;
3646 
3647   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3648      have deemed it beneficial (signified by setting
3649      prefetch.num_slots to 1 or more).  */
3650   if (flag_prefetch_loop_arrays < 0
3651       && HAVE_prefetch
3652       && optimize >= 3
3653       && current_tune->prefetch.num_slots > 0)
3654     flag_prefetch_loop_arrays = 1;
3655 
3656   /* Set up parameters to be used in prefetching algorithm.  Do not
3657      override the defaults unless we are tuning for a core we have
3658      researched values for.  */
3659   if (current_tune->prefetch.num_slots > 0)
3660     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3661 			 param_simultaneous_prefetches,
3662 			 current_tune->prefetch.num_slots);
3663   if (current_tune->prefetch.l1_cache_line_size >= 0)
3664     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3665 			 param_l1_cache_line_size,
3666 			 current_tune->prefetch.l1_cache_line_size);
3667   if (current_tune->prefetch.l1_cache_size >= 0)
3668     SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3669 			 param_l1_cache_size,
3670 			 current_tune->prefetch.l1_cache_size);
3671 
3672   /* Look through ready list and all of queue for instructions
3673      relevant for L2 auto-prefetcher.  */
3674   int sched_autopref_queue_depth;
3675 
3676   switch (current_tune->sched_autopref)
3677     {
3678     case tune_params::SCHED_AUTOPREF_OFF:
3679       sched_autopref_queue_depth = -1;
3680       break;
3681 
3682     case tune_params::SCHED_AUTOPREF_RANK:
3683       sched_autopref_queue_depth = 0;
3684       break;
3685 
3686     case tune_params::SCHED_AUTOPREF_FULL:
3687       sched_autopref_queue_depth = max_insn_queue_index + 1;
3688       break;
3689 
3690     default:
3691       gcc_unreachable ();
3692     }
3693 
3694   SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3695 		       param_sched_autopref_queue_depth,
3696 		       sched_autopref_queue_depth);
3697 
3698   /* Currently, for slow flash data, we just disable literal pools.  We also
3699      disable it for pure-code.  */
3700   if (target_slow_flash_data || target_pure_code)
3701     arm_disable_literal_pool = true;
3702 
3703   /* Disable scheduling fusion by default if it's not armv7 processor
3704      or doesn't prefer ldrd/strd.  */
3705   if (flag_schedule_fusion == 2
3706       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3707     flag_schedule_fusion = 0;
3708 
3709   /* Need to remember initial options before they are overriden.  */
3710   init_optimize = build_optimization_node (&global_options);
3711 
3712   arm_options_perform_arch_sanity_checks ();
3713   arm_option_override_internal (&global_options, &global_options_set);
3714   arm_option_check_internal (&global_options);
3715   arm_option_params_internal ();
3716 
3717   /* Create the default target_options structure.  */
3718   target_option_default_node = target_option_current_node
3719     = build_target_option_node (&global_options);
3720 
3721   /* Register global variables with the garbage collector.  */
3722   arm_add_gc_roots ();
3723 
3724   /* Init initial mode for testing.  */
3725   thumb_flipper = TARGET_THUMB;
3726 }
3727 
3728 
3729 /* Reconfigure global status flags from the active_target.isa.  */
3730 void
arm_option_reconfigure_globals(void)3731 arm_option_reconfigure_globals (void)
3732 {
3733   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3734   arm_base_arch = arm_active_target.base_arch;
3735 
3736   /* Initialize boolean versions of the architectural flags, for use
3737      in the arm.md file.  */
3738   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3739   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3740   arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3741   arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3742   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3743   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3744   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3745   arm_arch6m = arm_arch6 && !arm_arch_notm;
3746   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3747   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3748   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3749   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3750   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3751   arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3752   arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3753   arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3754 				    isa_bit_armv8_1m_main);
3755   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3756   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3757   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3758   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3759   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3760   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3761   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3762   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3763   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3764   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3765   arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3766   arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3767 
3768   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3769   if (arm_fp16_inst)
3770     {
3771       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3772 	error ("selected fp16 options are incompatible");
3773       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3774     }
3775 
3776   arm_arch_cde = 0;
3777   arm_arch_cde_coproc = 0;
3778   int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3779 		    isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3780 		    isa_bit_cdecp6, isa_bit_cdecp7};
3781   for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3782     {
3783       int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3784       if (cde_bit)
3785 	{
3786 	  arm_arch_cde |= cde_bit;
3787 	  arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3788 	}
3789     }
3790 
3791   /* And finally, set up some quirks.  */
3792   arm_arch_no_volatile_ce
3793     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3794   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3795 					    isa_bit_quirk_armv6kz);
3796 
3797   /* Use the cp15 method if it is available.  */
3798   if (target_thread_pointer == TP_AUTO)
3799     {
3800       if (arm_arch6k && !TARGET_THUMB1)
3801 	target_thread_pointer = TP_CP15;
3802       else
3803 	target_thread_pointer = TP_SOFT;
3804     }
3805 }
3806 
3807 /* Perform some validation between the desired architecture and the rest of the
3808    options.  */
3809 void
arm_options_perform_arch_sanity_checks(void)3810 arm_options_perform_arch_sanity_checks (void)
3811 {
3812   /* V5T code we generate is completely interworking capable, so we turn off
3813      TARGET_INTERWORK here to avoid many tests later on.  */
3814 
3815   /* XXX However, we must pass the right pre-processor defines to CPP
3816      or GLD can get confused.  This is a hack.  */
3817   if (TARGET_INTERWORK)
3818     arm_cpp_interwork = 1;
3819 
3820   if (arm_arch5t)
3821     target_flags &= ~MASK_INTERWORK;
3822 
3823   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3824     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3825 
3826   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3827     error ("iwmmxt abi requires an iwmmxt capable cpu");
3828 
3829   /* BPABI targets use linker tricks to allow interworking on cores
3830      without thumb support.  */
3831   if (TARGET_INTERWORK
3832       && !TARGET_BPABI
3833       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3834     {
3835       warning (0, "target CPU does not support interworking" );
3836       target_flags &= ~MASK_INTERWORK;
3837     }
3838 
3839   /* If soft-float is specified then don't use FPU.  */
3840   if (TARGET_SOFT_FLOAT)
3841     arm_fpu_attr = FPU_NONE;
3842   else
3843     arm_fpu_attr = FPU_VFP;
3844 
3845   if (TARGET_AAPCS_BASED)
3846     {
3847       if (TARGET_CALLER_INTERWORKING)
3848 	error ("AAPCS does not support %<-mcaller-super-interworking%>");
3849       else
3850 	if (TARGET_CALLEE_INTERWORKING)
3851 	  error ("AAPCS does not support %<-mcallee-super-interworking%>");
3852     }
3853 
3854   /* __fp16 support currently assumes the core has ldrh.  */
3855   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3856     sorry ("__fp16 and no ldrh");
3857 
3858   if (use_cmse && !arm_arch_cmse)
3859     error ("target CPU does not support ARMv8-M Security Extensions");
3860 
3861   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3862      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3863   if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3864     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3865 
3866 
3867   if (TARGET_AAPCS_BASED)
3868     {
3869       if (arm_abi == ARM_ABI_IWMMXT)
3870 	arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3871       else if (TARGET_HARD_FLOAT_ABI)
3872 	{
3873 	  arm_pcs_default = ARM_PCS_AAPCS_VFP;
3874 	  if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3875 	      && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3876 	    error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3877 	}
3878       else
3879 	arm_pcs_default = ARM_PCS_AAPCS;
3880     }
3881   else
3882     {
3883       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3884 	sorry ("%<-mfloat-abi=hard%> and VFP");
3885 
3886       if (arm_abi == ARM_ABI_APCS)
3887 	arm_pcs_default = ARM_PCS_APCS;
3888       else
3889 	arm_pcs_default = ARM_PCS_ATPCS;
3890     }
3891 }
3892 
3893 /* Test whether a local function descriptor is canonical, i.e.,
3894    whether we can use GOTOFFFUNCDESC to compute the address of the
3895    function.  */
3896 static bool
arm_fdpic_local_funcdesc_p(rtx fnx)3897 arm_fdpic_local_funcdesc_p (rtx fnx)
3898 {
3899   tree fn;
3900   enum symbol_visibility vis;
3901   bool ret;
3902 
3903   if (!TARGET_FDPIC)
3904     return true;
3905 
3906   if (! SYMBOL_REF_LOCAL_P (fnx))
3907     return false;
3908 
3909   fn = SYMBOL_REF_DECL (fnx);
3910 
3911   if (! fn)
3912     return false;
3913 
3914   vis = DECL_VISIBILITY (fn);
3915 
3916   if (vis == VISIBILITY_PROTECTED)
3917     /* Private function descriptors for protected functions are not
3918        canonical.  Temporarily change the visibility to global so that
3919        we can ensure uniqueness of funcdesc pointers.  */
3920     DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
3921 
3922   ret = default_binds_local_p_1 (fn, flag_pic);
3923 
3924   DECL_VISIBILITY (fn) = vis;
3925 
3926   return ret;
3927 }
3928 
3929 static void
arm_add_gc_roots(void)3930 arm_add_gc_roots (void)
3931 {
3932   gcc_obstack_init(&minipool_obstack);
3933   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3934 }
3935 
3936 /* A table of known ARM exception types.
3937    For use with the interrupt function attribute.  */
3938 
3939 typedef struct
3940 {
3941   const char *const arg;
3942   const unsigned long return_value;
3943 }
3944 isr_attribute_arg;
3945 
3946 static const isr_attribute_arg isr_attribute_args [] =
3947 {
3948   { "IRQ",   ARM_FT_ISR },
3949   { "irq",   ARM_FT_ISR },
3950   { "FIQ",   ARM_FT_FIQ },
3951   { "fiq",   ARM_FT_FIQ },
3952   { "ABORT", ARM_FT_ISR },
3953   { "abort", ARM_FT_ISR },
3954   { "UNDEF", ARM_FT_EXCEPTION },
3955   { "undef", ARM_FT_EXCEPTION },
3956   { "SWI",   ARM_FT_EXCEPTION },
3957   { "swi",   ARM_FT_EXCEPTION },
3958   { NULL,    ARM_FT_NORMAL }
3959 };
3960 
3961 /* Returns the (interrupt) function type of the current
3962    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3963 
3964 static unsigned long
arm_isr_value(tree argument)3965 arm_isr_value (tree argument)
3966 {
3967   const isr_attribute_arg * ptr;
3968   const char *              arg;
3969 
3970   if (!arm_arch_notm)
3971     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3972 
3973   /* No argument - default to IRQ.  */
3974   if (argument == NULL_TREE)
3975     return ARM_FT_ISR;
3976 
3977   /* Get the value of the argument.  */
3978   if (TREE_VALUE (argument) == NULL_TREE
3979       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3980     return ARM_FT_UNKNOWN;
3981 
3982   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3983 
3984   /* Check it against the list of known arguments.  */
3985   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3986     if (streq (arg, ptr->arg))
3987       return ptr->return_value;
3988 
3989   /* An unrecognized interrupt type.  */
3990   return ARM_FT_UNKNOWN;
3991 }
3992 
3993 /* Computes the type of the current function.  */
3994 
3995 static unsigned long
arm_compute_func_type(void)3996 arm_compute_func_type (void)
3997 {
3998   unsigned long type = ARM_FT_UNKNOWN;
3999   tree a;
4000   tree attr;
4001 
4002   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4003 
4004   /* Decide if the current function is volatile.  Such functions
4005      never return, and many memory cycles can be saved by not storing
4006      register values that will never be needed again.  This optimization
4007      was added to speed up context switching in a kernel application.  */
4008   if (optimize > 0
4009       && (TREE_NOTHROW (current_function_decl)
4010           || !(flag_unwind_tables
4011                || (flag_exceptions
4012 		   && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4013       && TREE_THIS_VOLATILE (current_function_decl))
4014     type |= ARM_FT_VOLATILE;
4015 
4016   if (cfun->static_chain_decl != NULL)
4017     type |= ARM_FT_NESTED;
4018 
4019   attr = DECL_ATTRIBUTES (current_function_decl);
4020 
4021   a = lookup_attribute ("naked", attr);
4022   if (a != NULL_TREE)
4023     type |= ARM_FT_NAKED;
4024 
4025   a = lookup_attribute ("isr", attr);
4026   if (a == NULL_TREE)
4027     a = lookup_attribute ("interrupt", attr);
4028 
4029   if (a == NULL_TREE)
4030     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4031   else
4032     type |= arm_isr_value (TREE_VALUE (a));
4033 
4034   if (lookup_attribute ("cmse_nonsecure_entry", attr))
4035     type |= ARM_FT_CMSE_ENTRY;
4036 
4037   return type;
4038 }
4039 
4040 /* Returns the type of the current function.  */
4041 
4042 unsigned long
arm_current_func_type(void)4043 arm_current_func_type (void)
4044 {
4045   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4046     cfun->machine->func_type = arm_compute_func_type ();
4047 
4048   return cfun->machine->func_type;
4049 }
4050 
4051 bool
arm_allocate_stack_slots_for_args(void)4052 arm_allocate_stack_slots_for_args (void)
4053 {
4054   /* Naked functions should not allocate stack slots for arguments.  */
4055   return !IS_NAKED (arm_current_func_type ());
4056 }
4057 
4058 static bool
arm_warn_func_return(tree decl)4059 arm_warn_func_return (tree decl)
4060 {
4061   /* Naked functions are implemented entirely in assembly, including the
4062      return sequence, so suppress warnings about this.  */
4063   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4064 }
4065 
4066 
4067 /* Output assembler code for a block containing the constant parts
4068    of a trampoline, leaving space for the variable parts.
4069 
4070    On the ARM, (if r8 is the static chain regnum, and remembering that
4071    referencing pc adds an offset of 8) the trampoline looks like:
4072 	   ldr 		r8, [pc, #0]
4073 	   ldr		pc, [pc]
4074 	   .word	static chain value
4075 	   .word	function's address
4076    XXX FIXME: When the trampoline returns, r8 will be clobbered.
4077 
4078    In FDPIC mode, the trampoline looks like:
4079 	   .word	trampoline address
4080 	   .word	trampoline GOT address
4081 	   ldr 		r12, [pc, #8] ; #4 for Arm mode
4082 	   ldr 		r9,  [pc, #8] ; #4 for Arm mode
4083 	   ldr		pc,  [pc, #8] ; #4 for Arm mode
4084 	   .word	static chain value
4085 	   .word	GOT address
4086 	   .word	function's address
4087 */
4088 
4089 static void
arm_asm_trampoline_template(FILE * f)4090 arm_asm_trampoline_template (FILE *f)
4091 {
4092   fprintf (f, "\t.syntax unified\n");
4093 
4094   if (TARGET_FDPIC)
4095     {
4096       /* The first two words are a function descriptor pointing to the
4097 	 trampoline code just below.  */
4098       if (TARGET_ARM)
4099 	fprintf (f, "\t.arm\n");
4100       else if (TARGET_THUMB2)
4101 	fprintf (f, "\t.thumb\n");
4102       else
4103 	/* Only ARM and Thumb-2 are supported.  */
4104 	gcc_unreachable ();
4105 
4106       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4107       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4108       /* Trampoline code which sets the static chain register but also
4109 	 PIC register before jumping into real code.  */
4110       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4111 		   STATIC_CHAIN_REGNUM, PC_REGNUM,
4112 		   TARGET_THUMB2 ? 8 : 4);
4113       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4114 		   PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4115 		   TARGET_THUMB2 ? 8 : 4);
4116       asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4117 		   PC_REGNUM, PC_REGNUM,
4118 		   TARGET_THUMB2 ? 8 : 4);
4119       assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4120     }
4121   else if (TARGET_ARM)
4122     {
4123       fprintf (f, "\t.arm\n");
4124       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4125       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4126     }
4127   else if (TARGET_THUMB2)
4128     {
4129       fprintf (f, "\t.thumb\n");
4130       /* The Thumb-2 trampoline is similar to the arm implementation.
4131 	 Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
4132       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4133 		   STATIC_CHAIN_REGNUM, PC_REGNUM);
4134       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4135     }
4136   else
4137     {
4138       ASM_OUTPUT_ALIGN (f, 2);
4139       fprintf (f, "\t.code\t16\n");
4140       fprintf (f, ".Ltrampoline_start:\n");
4141       asm_fprintf (f, "\tpush\t{r0, r1}\n");
4142       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4143       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4144       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4145       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4146       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4147     }
4148   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4149   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4150 }
4151 
4152 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
4153 
4154 static void
arm_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)4155 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4156 {
4157   rtx fnaddr, mem, a_tramp;
4158 
4159   emit_block_move (m_tramp, assemble_trampoline_template (),
4160 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4161 
4162   if (TARGET_FDPIC)
4163     {
4164       rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4165       rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4166       rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4167       /* The function start address is at offset 8, but in Thumb mode
4168 	 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4169 	 below.  */
4170       rtx trampoline_code_start
4171 	= plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4172 
4173       /* Write initial funcdesc which points to the trampoline.  */
4174       mem = adjust_address (m_tramp, SImode, 0);
4175       emit_move_insn (mem, trampoline_code_start);
4176       mem = adjust_address (m_tramp, SImode, 4);
4177       emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4178       /* Setup static chain.  */
4179       mem = adjust_address (m_tramp, SImode, 20);
4180       emit_move_insn (mem, chain_value);
4181       /* GOT + real function entry point.  */
4182       mem = adjust_address (m_tramp, SImode, 24);
4183       emit_move_insn (mem, gotaddr);
4184       mem = adjust_address (m_tramp, SImode, 28);
4185       emit_move_insn (mem, fnaddr);
4186     }
4187   else
4188     {
4189       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4190       emit_move_insn (mem, chain_value);
4191 
4192       mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4193       fnaddr = XEXP (DECL_RTL (fndecl), 0);
4194       emit_move_insn (mem, fnaddr);
4195     }
4196 
4197   a_tramp = XEXP (m_tramp, 0);
4198   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4199 		     LCT_NORMAL, VOIDmode, a_tramp, Pmode,
4200 		     plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
4201 }
4202 
4203 /* Thumb trampolines should be entered in thumb mode, so set
4204    the bottom bit of the address.  */
4205 
4206 static rtx
arm_trampoline_adjust_address(rtx addr)4207 arm_trampoline_adjust_address (rtx addr)
4208 {
4209   /* For FDPIC don't fix trampoline address since it's a function
4210      descriptor and not a function address.  */
4211   if (TARGET_THUMB && !TARGET_FDPIC)
4212     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4213 				NULL, 0, OPTAB_LIB_WIDEN);
4214   return addr;
4215 }
4216 
4217 /* Return 1 if it is possible to return using a single instruction.
4218    If SIBLING is non-null, this is a test for a return before a sibling
4219    call.  SIBLING is the call insn, so we can examine its register usage.  */
4220 
4221 int
use_return_insn(int iscond,rtx sibling)4222 use_return_insn (int iscond, rtx sibling)
4223 {
4224   int regno;
4225   unsigned int func_type;
4226   unsigned long saved_int_regs;
4227   unsigned HOST_WIDE_INT stack_adjust;
4228   arm_stack_offsets *offsets;
4229 
4230   /* Never use a return instruction before reload has run.  */
4231   if (!reload_completed)
4232     return 0;
4233 
4234   func_type = arm_current_func_type ();
4235 
4236   /* Naked, volatile and stack alignment functions need special
4237      consideration.  */
4238   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4239     return 0;
4240 
4241   /* So do interrupt functions that use the frame pointer and Thumb
4242      interrupt functions.  */
4243   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4244     return 0;
4245 
4246   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4247       && !optimize_function_for_size_p (cfun))
4248     return 0;
4249 
4250   offsets = arm_get_frame_offsets ();
4251   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4252 
4253   /* As do variadic functions.  */
4254   if (crtl->args.pretend_args_size
4255       || cfun->machine->uses_anonymous_args
4256       /* Or if the function calls __builtin_eh_return () */
4257       || crtl->calls_eh_return
4258       /* Or if the function calls alloca */
4259       || cfun->calls_alloca
4260       /* Or if there is a stack adjustment.  However, if the stack pointer
4261 	 is saved on the stack, we can use a pre-incrementing stack load.  */
4262       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4263 				 && stack_adjust == 4))
4264       /* Or if the static chain register was saved above the frame, under the
4265 	 assumption that the stack pointer isn't saved on the stack.  */
4266       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4267           && arm_compute_static_chain_stack_bytes() != 0))
4268     return 0;
4269 
4270   saved_int_regs = offsets->saved_regs_mask;
4271 
4272   /* Unfortunately, the insn
4273 
4274        ldmib sp, {..., sp, ...}
4275 
4276      triggers a bug on most SA-110 based devices, such that the stack
4277      pointer won't be correctly restored if the instruction takes a
4278      page fault.  We work around this problem by popping r3 along with
4279      the other registers, since that is never slower than executing
4280      another instruction.
4281 
4282      We test for !arm_arch5t here, because code for any architecture
4283      less than this could potentially be run on one of the buggy
4284      chips.  */
4285   if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4286     {
4287       /* Validate that r3 is a call-clobbered register (always true in
4288 	 the default abi) ...  */
4289       if (!call_used_or_fixed_reg_p (3))
4290 	return 0;
4291 
4292       /* ... that it isn't being used for a return value ... */
4293       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4294 	return 0;
4295 
4296       /* ... or for a tail-call argument ...  */
4297       if (sibling)
4298 	{
4299 	  gcc_assert (CALL_P (sibling));
4300 
4301 	  if (find_regno_fusage (sibling, USE, 3))
4302 	    return 0;
4303 	}
4304 
4305       /* ... and that there are no call-saved registers in r0-r2
4306 	 (always true in the default ABI).  */
4307       if (saved_int_regs & 0x7)
4308 	return 0;
4309     }
4310 
4311   /* Can't be done if interworking with Thumb, and any registers have been
4312      stacked.  */
4313   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4314     return 0;
4315 
4316   /* On StrongARM, conditional returns are expensive if they aren't
4317      taken and multiple registers have been stacked.  */
4318   if (iscond && arm_tune_strongarm)
4319     {
4320       /* Conditional return when just the LR is stored is a simple
4321 	 conditional-load instruction, that's not expensive.  */
4322       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4323 	return 0;
4324 
4325       if (flag_pic
4326 	  && arm_pic_register != INVALID_REGNUM
4327 	  && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4328 	return 0;
4329     }
4330 
4331   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4332      several instructions if anything needs to be popped.  Armv8.1-M Mainline
4333      also needs several instructions to save and restore FP context.  */
4334   if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4335     return 0;
4336 
4337   /* If there are saved registers but the LR isn't saved, then we need
4338      two instructions for the return.  */
4339   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4340     return 0;
4341 
4342   /* Can't be done if any of the VFP regs are pushed,
4343      since this also requires an insn.  */
4344   if (TARGET_VFP_BASE)
4345     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4346       if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
4347 	return 0;
4348 
4349   if (TARGET_REALLY_IWMMXT)
4350     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4351       if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
4352 	return 0;
4353 
4354   return 1;
4355 }
4356 
4357 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4358    shrink-wrapping if possible.  This is the case if we need to emit a
4359    prologue, which we can test by looking at the offsets.  */
4360 bool
use_simple_return_p(void)4361 use_simple_return_p (void)
4362 {
4363   arm_stack_offsets *offsets;
4364 
4365   /* Note this function can be called before or after reload.  */
4366   if (!reload_completed)
4367     arm_compute_frame_layout ();
4368 
4369   offsets = arm_get_frame_offsets ();
4370   return offsets->outgoing_args != 0;
4371 }
4372 
4373 /* Return TRUE if int I is a valid immediate ARM constant.  */
4374 
4375 int
const_ok_for_arm(HOST_WIDE_INT i)4376 const_ok_for_arm (HOST_WIDE_INT i)
4377 {
4378   int lowbit;
4379 
4380   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4381      be all zero, or all one.  */
4382   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4383       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4384 	  != ((~(unsigned HOST_WIDE_INT) 0)
4385 	      & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4386     return FALSE;
4387 
4388   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4389 
4390   /* Fast return for 0 and small values.  We must do this for zero, since
4391      the code below can't handle that one case.  */
4392   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4393     return TRUE;
4394 
4395   /* Get the number of trailing zeros.  */
4396   lowbit = ffs((int) i) - 1;
4397 
4398   /* Only even shifts are allowed in ARM mode so round down to the
4399      nearest even number.  */
4400   if (TARGET_ARM)
4401     lowbit &= ~1;
4402 
4403   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4404     return TRUE;
4405 
4406   if (TARGET_ARM)
4407     {
4408       /* Allow rotated constants in ARM mode.  */
4409       if (lowbit <= 4
4410 	   && ((i & ~0xc000003f) == 0
4411 	       || (i & ~0xf000000f) == 0
4412 	       || (i & ~0xfc000003) == 0))
4413 	return TRUE;
4414     }
4415   else if (TARGET_THUMB2)
4416     {
4417       HOST_WIDE_INT v;
4418 
4419       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4420       v = i & 0xff;
4421       v |= v << 16;
4422       if (i == v || i == (v | (v << 8)))
4423 	return TRUE;
4424 
4425       /* Allow repeated pattern 0xXY00XY00.  */
4426       v = i & 0xff00;
4427       v |= v << 16;
4428       if (i == v)
4429 	return TRUE;
4430     }
4431   else if (TARGET_HAVE_MOVT)
4432     {
4433       /* Thumb-1 Targets with MOVT.  */
4434       if (i > 0xffff)
4435 	return FALSE;
4436       else
4437 	return TRUE;
4438     }
4439 
4440   return FALSE;
4441 }
4442 
4443 /* Return true if I is a valid constant for the operation CODE.  */
4444 int
const_ok_for_op(HOST_WIDE_INT i,enum rtx_code code)4445 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4446 {
4447   if (const_ok_for_arm (i))
4448     return 1;
4449 
4450   switch (code)
4451     {
4452     case SET:
4453       /* See if we can use movw.  */
4454       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4455 	return 1;
4456       else
4457 	/* Otherwise, try mvn.  */
4458 	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4459 
4460     case PLUS:
4461       /* See if we can use addw or subw.  */
4462       if (TARGET_THUMB2
4463 	  && ((i & 0xfffff000) == 0
4464 	      || ((-i) & 0xfffff000) == 0))
4465 	return 1;
4466       /* Fall through.  */
4467     case COMPARE:
4468     case EQ:
4469     case NE:
4470     case GT:
4471     case LE:
4472     case LT:
4473     case GE:
4474     case GEU:
4475     case LTU:
4476     case GTU:
4477     case LEU:
4478     case UNORDERED:
4479     case ORDERED:
4480     case UNEQ:
4481     case UNGE:
4482     case UNLT:
4483     case UNGT:
4484     case UNLE:
4485       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4486 
4487     case MINUS:		/* Should only occur with (MINUS I reg) => rsb */
4488     case XOR:
4489       return 0;
4490 
4491     case IOR:
4492       if (TARGET_THUMB2)
4493 	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4494       return 0;
4495 
4496     case AND:
4497       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4498 
4499     default:
4500       gcc_unreachable ();
4501     }
4502 }
4503 
4504 /* Return true if I is a valid di mode constant for the operation CODE.  */
4505 int
const_ok_for_dimode_op(HOST_WIDE_INT i,enum rtx_code code)4506 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4507 {
4508   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4509   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4510   rtx hi = GEN_INT (hi_val);
4511   rtx lo = GEN_INT (lo_val);
4512 
4513   if (TARGET_THUMB1)
4514     return 0;
4515 
4516   switch (code)
4517     {
4518     case AND:
4519     case IOR:
4520     case XOR:
4521       return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4522 	     || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4523     case PLUS:
4524       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4525 
4526     default:
4527       return 0;
4528     }
4529 }
4530 
4531 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
4532    Avoid generating useless code when one of the bytes is zero.  */
4533 void
thumb1_gen_const_int(rtx op0,HOST_WIDE_INT op1)4534 thumb1_gen_const_int (rtx op0, HOST_WIDE_INT op1)
4535 {
4536   bool mov_done_p = false;
4537   int i;
4538 
4539   /* Emit upper 3 bytes if needed.  */
4540   for (i = 0; i < 3; i++)
4541     {
4542       int byte = (op1 >> (8 * (3 - i))) & 0xff;
4543 
4544       if (byte)
4545 	{
4546 	  emit_set_insn (op0, mov_done_p
4547 			 ? gen_rtx_PLUS (SImode,op0, GEN_INT (byte))
4548 			 : GEN_INT (byte));
4549 	  mov_done_p = true;
4550 	}
4551 
4552       if (mov_done_p)
4553 	emit_set_insn (op0, gen_rtx_ASHIFT (SImode, op0, GEN_INT (8)));
4554     }
4555 
4556   /* Emit lower byte if needed.  */
4557   if (!mov_done_p)
4558     emit_set_insn (op0, GEN_INT (op1 & 0xff));
4559   else if (op1 & 0xff)
4560     emit_set_insn (op0, gen_rtx_PLUS (SImode, op0, GEN_INT (op1 & 0xff)));
4561 }
4562 
4563 /* Emit a sequence of insns to handle a large constant.
4564    CODE is the code of the operation required, it can be any of SET, PLUS,
4565    IOR, AND, XOR, MINUS;
4566    MODE is the mode in which the operation is being performed;
4567    VAL is the integer to operate on;
4568    SOURCE is the other operand (a register, or a null-pointer for SET);
4569    SUBTARGETS means it is safe to create scratch registers if that will
4570    either produce a simpler sequence, or we will want to cse the values.
4571    Return value is the number of insns emitted.  */
4572 
4573 /* ??? Tweak this for thumb2.  */
4574 int
arm_split_constant(enum rtx_code code,machine_mode mode,rtx insn,HOST_WIDE_INT val,rtx target,rtx source,int subtargets)4575 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4576 		    HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4577 {
4578   rtx cond;
4579 
4580   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4581     cond = COND_EXEC_TEST (PATTERN (insn));
4582   else
4583     cond = NULL_RTX;
4584 
4585   if (subtargets || code == SET
4586       || (REG_P (target) && REG_P (source)
4587 	  && REGNO (target) != REGNO (source)))
4588     {
4589       /* After arm_reorg has been called, we can't fix up expensive
4590 	 constants by pushing them into memory so we must synthesize
4591 	 them in-line, regardless of the cost.  This is only likely to
4592 	 be more costly on chips that have load delay slots and we are
4593 	 compiling without running the scheduler (so no splitting
4594 	 occurred before the final instruction emission).
4595 
4596 	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4597       */
4598       if (!cfun->machine->after_arm_reorg
4599 	  && !cond
4600 	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4601 				1, 0)
4602 	      > (arm_constant_limit (optimize_function_for_size_p (cfun))
4603 		 + (code != SET))))
4604 	{
4605 	  if (code == SET)
4606 	    {
4607 	      /* Currently SET is the only monadic value for CODE, all
4608 		 the rest are diadic.  */
4609 	      if (TARGET_USE_MOVT)
4610 		arm_emit_movpair (target, GEN_INT (val));
4611 	      else
4612 		emit_set_insn (target, GEN_INT (val));
4613 
4614 	      return 1;
4615 	    }
4616 	  else
4617 	    {
4618 	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4619 
4620 	      if (TARGET_USE_MOVT)
4621 		arm_emit_movpair (temp, GEN_INT (val));
4622 	      else
4623 		emit_set_insn (temp, GEN_INT (val));
4624 
4625 	      /* For MINUS, the value is subtracted from, since we never
4626 		 have subtraction of a constant.  */
4627 	      if (code == MINUS)
4628 		emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4629 	      else
4630 		emit_set_insn (target,
4631 			       gen_rtx_fmt_ee (code, mode, source, temp));
4632 	      return 2;
4633 	    }
4634 	}
4635     }
4636 
4637   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4638 			   1);
4639 }
4640 
4641 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4642    ARM/THUMB2 immediates, and add up to VAL.
4643    Thr function return value gives the number of insns required.  */
4644 static int
optimal_immediate_sequence(enum rtx_code code,unsigned HOST_WIDE_INT val,struct four_ints * return_sequence)4645 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4646 			    struct four_ints *return_sequence)
4647 {
4648   int best_consecutive_zeros = 0;
4649   int i;
4650   int best_start = 0;
4651   int insns1, insns2;
4652   struct four_ints tmp_sequence;
4653 
4654   /* If we aren't targeting ARM, the best place to start is always at
4655      the bottom, otherwise look more closely.  */
4656   if (TARGET_ARM)
4657     {
4658       for (i = 0; i < 32; i += 2)
4659 	{
4660 	  int consecutive_zeros = 0;
4661 
4662 	  if (!(val & (3 << i)))
4663 	    {
4664 	      while ((i < 32) && !(val & (3 << i)))
4665 		{
4666 		  consecutive_zeros += 2;
4667 		  i += 2;
4668 		}
4669 	      if (consecutive_zeros > best_consecutive_zeros)
4670 		{
4671 		  best_consecutive_zeros = consecutive_zeros;
4672 		  best_start = i - consecutive_zeros;
4673 		}
4674 	      i -= 2;
4675 	    }
4676 	}
4677     }
4678 
4679   /* So long as it won't require any more insns to do so, it's
4680      desirable to emit a small constant (in bits 0...9) in the last
4681      insn.  This way there is more chance that it can be combined with
4682      a later addressing insn to form a pre-indexed load or store
4683      operation.  Consider:
4684 
4685 	   *((volatile int *)0xe0000100) = 1;
4686 	   *((volatile int *)0xe0000110) = 2;
4687 
4688      We want this to wind up as:
4689 
4690 	    mov rA, #0xe0000000
4691 	    mov rB, #1
4692 	    str rB, [rA, #0x100]
4693 	    mov rB, #2
4694 	    str rB, [rA, #0x110]
4695 
4696      rather than having to synthesize both large constants from scratch.
4697 
4698      Therefore, we calculate how many insns would be required to emit
4699      the constant starting from `best_start', and also starting from
4700      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4701      yield a shorter sequence, we may as well use zero.  */
4702   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4703   if (best_start != 0
4704       && ((HOST_WIDE_INT_1U << best_start) < val))
4705     {
4706       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4707       if (insns2 <= insns1)
4708 	{
4709 	  *return_sequence = tmp_sequence;
4710 	  insns1 = insns2;
4711 	}
4712     }
4713 
4714   return insns1;
4715 }
4716 
4717 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4718 static int
optimal_immediate_sequence_1(enum rtx_code code,unsigned HOST_WIDE_INT val,struct four_ints * return_sequence,int i)4719 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4720 			     struct four_ints *return_sequence, int i)
4721 {
4722   int remainder = val & 0xffffffff;
4723   int insns = 0;
4724 
4725   /* Try and find a way of doing the job in either two or three
4726      instructions.
4727 
4728      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4729      location.  We start at position I.  This may be the MSB, or
4730      optimial_immediate_sequence may have positioned it at the largest block
4731      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4732      wrapping around to the top of the word when we drop off the bottom.
4733      In the worst case this code should produce no more than four insns.
4734 
4735      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4736      constants, shifted to any arbitrary location.  We should always start
4737      at the MSB.  */
4738   do
4739     {
4740       int end;
4741       unsigned int b1, b2, b3, b4;
4742       unsigned HOST_WIDE_INT result;
4743       int loc;
4744 
4745       gcc_assert (insns < 4);
4746 
4747       if (i <= 0)
4748 	i += 32;
4749 
4750       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4751       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4752 	{
4753 	  loc = i;
4754 	  if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4755 	    /* We can use addw/subw for the last 12 bits.  */
4756 	    result = remainder;
4757 	  else
4758 	    {
4759 	      /* Use an 8-bit shifted/rotated immediate.  */
4760 	      end = i - 8;
4761 	      if (end < 0)
4762 		end += 32;
4763 	      result = remainder & ((0x0ff << end)
4764 				   | ((i < end) ? (0xff >> (32 - end))
4765 						: 0));
4766 	      i -= 8;
4767 	    }
4768 	}
4769       else
4770 	{
4771 	  /* Arm allows rotates by a multiple of two. Thumb-2 allows
4772 	     arbitrary shifts.  */
4773 	  i -= TARGET_ARM ? 2 : 1;
4774 	  continue;
4775 	}
4776 
4777       /* Next, see if we can do a better job with a thumb2 replicated
4778 	 constant.
4779 
4780          We do it this way around to catch the cases like 0x01F001E0 where
4781 	 two 8-bit immediates would work, but a replicated constant would
4782 	 make it worse.
4783 
4784          TODO: 16-bit constants that don't clear all the bits, but still win.
4785          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4786       if (TARGET_THUMB2)
4787 	{
4788 	  b1 = (remainder & 0xff000000) >> 24;
4789 	  b2 = (remainder & 0x00ff0000) >> 16;
4790 	  b3 = (remainder & 0x0000ff00) >> 8;
4791 	  b4 = remainder & 0xff;
4792 
4793 	  if (loc > 24)
4794 	    {
4795 	      /* The 8-bit immediate already found clears b1 (and maybe b2),
4796 		 but must leave b3 and b4 alone.  */
4797 
4798 	      /* First try to find a 32-bit replicated constant that clears
4799 		 almost everything.  We can assume that we can't do it in one,
4800 		 or else we wouldn't be here.  */
4801 	      unsigned int tmp = b1 & b2 & b3 & b4;
4802 	      unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4803 				  + (tmp << 24);
4804 	      unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4805 					    + (tmp == b3) + (tmp == b4);
4806 	      if (tmp
4807 		  && (matching_bytes >= 3
4808 		      || (matching_bytes == 2
4809 			  && const_ok_for_op (remainder & ~tmp2, code))))
4810 		{
4811 		  /* At least 3 of the bytes match, and the fourth has at
4812 		     least as many bits set, or two of the bytes match
4813 		     and it will only require one more insn to finish.  */
4814 		  result = tmp2;
4815 		  i = tmp != b1 ? 32
4816 		      : tmp != b2 ? 24
4817 		      : tmp != b3 ? 16
4818 		      : 8;
4819 		}
4820 
4821 	      /* Second, try to find a 16-bit replicated constant that can
4822 		 leave three of the bytes clear.  If b2 or b4 is already
4823 		 zero, then we can.  If the 8-bit from above would not
4824 		 clear b2 anyway, then we still win.  */
4825 	      else if (b1 == b3 && (!b2 || !b4
4826 			       || (remainder & 0x00ff0000 & ~result)))
4827 		{
4828 		  result = remainder & 0xff00ff00;
4829 		  i = 24;
4830 		}
4831 	    }
4832 	  else if (loc > 16)
4833 	    {
4834 	      /* The 8-bit immediate already found clears b2 (and maybe b3)
4835 		 and we don't get here unless b1 is alredy clear, but it will
4836 		 leave b4 unchanged.  */
4837 
4838 	      /* If we can clear b2 and b4 at once, then we win, since the
4839 		 8-bits couldn't possibly reach that far.  */
4840 	      if (b2 == b4)
4841 		{
4842 		  result = remainder & 0x00ff00ff;
4843 		  i = 16;
4844 		}
4845 	    }
4846 	}
4847 
4848       return_sequence->i[insns++] = result;
4849       remainder &= ~result;
4850 
4851       if (code == SET || code == MINUS)
4852 	code = PLUS;
4853     }
4854   while (remainder);
4855 
4856   return insns;
4857 }
4858 
4859 /* Emit an instruction with the indicated PATTERN.  If COND is
4860    non-NULL, conditionalize the execution of the instruction on COND
4861    being true.  */
4862 
4863 static void
emit_constant_insn(rtx cond,rtx pattern)4864 emit_constant_insn (rtx cond, rtx pattern)
4865 {
4866   if (cond)
4867     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4868   emit_insn (pattern);
4869 }
4870 
4871 /* As above, but extra parameter GENERATE which, if clear, suppresses
4872    RTL generation.  */
4873 
4874 static int
arm_gen_constant(enum rtx_code code,machine_mode mode,rtx cond,unsigned HOST_WIDE_INT val,rtx target,rtx source,int subtargets,int generate)4875 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4876 		  unsigned HOST_WIDE_INT val, rtx target, rtx source,
4877 		  int subtargets, int generate)
4878 {
4879   int can_invert = 0;
4880   int can_negate = 0;
4881   int final_invert = 0;
4882   int i;
4883   int set_sign_bit_copies = 0;
4884   int clear_sign_bit_copies = 0;
4885   int clear_zero_bit_copies = 0;
4886   int set_zero_bit_copies = 0;
4887   int insns = 0, neg_insns, inv_insns;
4888   unsigned HOST_WIDE_INT temp1, temp2;
4889   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4890   struct four_ints *immediates;
4891   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4892 
4893   /* Find out which operations are safe for a given CODE.  Also do a quick
4894      check for degenerate cases; these can occur when DImode operations
4895      are split.  */
4896   switch (code)
4897     {
4898     case SET:
4899       can_invert = 1;
4900       break;
4901 
4902     case PLUS:
4903       can_negate = 1;
4904       break;
4905 
4906     case IOR:
4907       if (remainder == 0xffffffff)
4908 	{
4909 	  if (generate)
4910 	    emit_constant_insn (cond,
4911 				gen_rtx_SET (target,
4912 					     GEN_INT (ARM_SIGN_EXTEND (val))));
4913 	  return 1;
4914 	}
4915 
4916       if (remainder == 0)
4917 	{
4918 	  if (reload_completed && rtx_equal_p (target, source))
4919 	    return 0;
4920 
4921 	  if (generate)
4922 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4923 	  return 1;
4924 	}
4925       break;
4926 
4927     case AND:
4928       if (remainder == 0)
4929 	{
4930 	  if (generate)
4931 	    emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4932 	  return 1;
4933 	}
4934       if (remainder == 0xffffffff)
4935 	{
4936 	  if (reload_completed && rtx_equal_p (target, source))
4937 	    return 0;
4938 	  if (generate)
4939 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4940 	  return 1;
4941 	}
4942       can_invert = 1;
4943       break;
4944 
4945     case XOR:
4946       if (remainder == 0)
4947 	{
4948 	  if (reload_completed && rtx_equal_p (target, source))
4949 	    return 0;
4950 	  if (generate)
4951 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4952 	  return 1;
4953 	}
4954 
4955       if (remainder == 0xffffffff)
4956 	{
4957 	  if (generate)
4958 	    emit_constant_insn (cond,
4959 				gen_rtx_SET (target,
4960 					     gen_rtx_NOT (mode, source)));
4961 	  return 1;
4962 	}
4963       final_invert = 1;
4964       break;
4965 
4966     case MINUS:
4967       /* We treat MINUS as (val - source), since (source - val) is always
4968 	 passed as (source + (-val)).  */
4969       if (remainder == 0)
4970 	{
4971 	  if (generate)
4972 	    emit_constant_insn (cond,
4973 				gen_rtx_SET (target,
4974 					     gen_rtx_NEG (mode, source)));
4975 	  return 1;
4976 	}
4977       if (const_ok_for_arm (val))
4978 	{
4979 	  if (generate)
4980 	    emit_constant_insn (cond,
4981 				gen_rtx_SET (target,
4982 					     gen_rtx_MINUS (mode, GEN_INT (val),
4983 							    source)));
4984 	  return 1;
4985 	}
4986 
4987       break;
4988 
4989     default:
4990       gcc_unreachable ();
4991     }
4992 
4993   /* If we can do it in one insn get out quickly.  */
4994   if (const_ok_for_op (val, code))
4995     {
4996       if (generate)
4997 	emit_constant_insn (cond,
4998 			    gen_rtx_SET (target,
4999 					 (source
5000 					  ? gen_rtx_fmt_ee (code, mode, source,
5001 							    GEN_INT (val))
5002 					  : GEN_INT (val))));
5003       return 1;
5004     }
5005 
5006   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5007      insn.  */
5008   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5009       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5010     {
5011       if (generate)
5012 	{
5013 	  if (mode == SImode && i == 16)
5014 	    /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5015 	       smaller insn.  */
5016 	    emit_constant_insn (cond,
5017 				gen_zero_extendhisi2
5018 				(target, gen_lowpart (HImode, source)));
5019 	  else
5020 	    /* Extz only supports SImode, but we can coerce the operands
5021 	       into that mode.  */
5022 	    emit_constant_insn (cond,
5023 				gen_extzv_t2 (gen_lowpart (SImode, target),
5024 					      gen_lowpart (SImode, source),
5025 					      GEN_INT (i), const0_rtx));
5026 	}
5027 
5028       return 1;
5029     }
5030 
5031   /* Calculate a few attributes that may be useful for specific
5032      optimizations.  */
5033   /* Count number of leading zeros.  */
5034   for (i = 31; i >= 0; i--)
5035     {
5036       if ((remainder & (1 << i)) == 0)
5037 	clear_sign_bit_copies++;
5038       else
5039 	break;
5040     }
5041 
5042   /* Count number of leading 1's.  */
5043   for (i = 31; i >= 0; i--)
5044     {
5045       if ((remainder & (1 << i)) != 0)
5046 	set_sign_bit_copies++;
5047       else
5048 	break;
5049     }
5050 
5051   /* Count number of trailing zero's.  */
5052   for (i = 0; i <= 31; i++)
5053     {
5054       if ((remainder & (1 << i)) == 0)
5055 	clear_zero_bit_copies++;
5056       else
5057 	break;
5058     }
5059 
5060   /* Count number of trailing 1's.  */
5061   for (i = 0; i <= 31; i++)
5062     {
5063       if ((remainder & (1 << i)) != 0)
5064 	set_zero_bit_copies++;
5065       else
5066 	break;
5067     }
5068 
5069   switch (code)
5070     {
5071     case SET:
5072       /* See if we can do this by sign_extending a constant that is known
5073 	 to be negative.  This is a good, way of doing it, since the shift
5074 	 may well merge into a subsequent insn.  */
5075       if (set_sign_bit_copies > 1)
5076 	{
5077 	  if (const_ok_for_arm
5078 	      (temp1 = ARM_SIGN_EXTEND (remainder
5079 					<< (set_sign_bit_copies - 1))))
5080 	    {
5081 	      if (generate)
5082 		{
5083 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5084 		  emit_constant_insn (cond,
5085 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
5086 		  emit_constant_insn (cond,
5087 				      gen_ashrsi3 (target, new_src,
5088 						   GEN_INT (set_sign_bit_copies - 1)));
5089 		}
5090 	      return 2;
5091 	    }
5092 	  /* For an inverted constant, we will need to set the low bits,
5093 	     these will be shifted out of harm's way.  */
5094 	  temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5095 	  if (const_ok_for_arm (~temp1))
5096 	    {
5097 	      if (generate)
5098 		{
5099 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5100 		  emit_constant_insn (cond,
5101 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
5102 		  emit_constant_insn (cond,
5103 				      gen_ashrsi3 (target, new_src,
5104 						   GEN_INT (set_sign_bit_copies - 1)));
5105 		}
5106 	      return 2;
5107 	    }
5108 	}
5109 
5110       /* See if we can calculate the value as the difference between two
5111 	 valid immediates.  */
5112       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5113 	{
5114 	  int topshift = clear_sign_bit_copies & ~1;
5115 
5116 	  temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5117 				   & (0xff000000 >> topshift));
5118 
5119 	  /* If temp1 is zero, then that means the 9 most significant
5120 	     bits of remainder were 1 and we've caused it to overflow.
5121 	     When topshift is 0 we don't need to do anything since we
5122 	     can borrow from 'bit 32'.  */
5123 	  if (temp1 == 0 && topshift != 0)
5124 	    temp1 = 0x80000000 >> (topshift - 1);
5125 
5126 	  temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5127 
5128 	  if (const_ok_for_arm (temp2))
5129 	    {
5130 	      if (generate)
5131 		{
5132 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5133 		  emit_constant_insn (cond,
5134 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
5135 		  emit_constant_insn (cond,
5136 				      gen_addsi3 (target, new_src,
5137 						  GEN_INT (-temp2)));
5138 		}
5139 
5140 	      return 2;
5141 	    }
5142 	}
5143 
5144       /* See if we can generate this by setting the bottom (or the top)
5145 	 16 bits, and then shifting these into the other half of the
5146 	 word.  We only look for the simplest cases, to do more would cost
5147 	 too much.  Be careful, however, not to generate this when the
5148 	 alternative would take fewer insns.  */
5149       if (val & 0xffff0000)
5150 	{
5151 	  temp1 = remainder & 0xffff0000;
5152 	  temp2 = remainder & 0x0000ffff;
5153 
5154 	  /* Overlaps outside this range are best done using other methods.  */
5155 	  for (i = 9; i < 24; i++)
5156 	    {
5157 	      if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5158 		  && !const_ok_for_arm (temp2))
5159 		{
5160 		  rtx new_src = (subtargets
5161 				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5162 				 : target);
5163 		  insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5164 					    source, subtargets, generate);
5165 		  source = new_src;
5166 		  if (generate)
5167 		    emit_constant_insn
5168 		      (cond,
5169 		       gen_rtx_SET
5170 		       (target,
5171 			gen_rtx_IOR (mode,
5172 				     gen_rtx_ASHIFT (mode, source,
5173 						     GEN_INT (i)),
5174 				     source)));
5175 		  return insns + 1;
5176 		}
5177 	    }
5178 
5179 	  /* Don't duplicate cases already considered.  */
5180 	  for (i = 17; i < 24; i++)
5181 	    {
5182 	      if (((temp1 | (temp1 >> i)) == remainder)
5183 		  && !const_ok_for_arm (temp1))
5184 		{
5185 		  rtx new_src = (subtargets
5186 				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5187 				 : target);
5188 		  insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5189 					    source, subtargets, generate);
5190 		  source = new_src;
5191 		  if (generate)
5192 		    emit_constant_insn
5193 		      (cond,
5194 		       gen_rtx_SET (target,
5195 				    gen_rtx_IOR
5196 				    (mode,
5197 				     gen_rtx_LSHIFTRT (mode, source,
5198 						       GEN_INT (i)),
5199 				     source)));
5200 		  return insns + 1;
5201 		}
5202 	    }
5203 	}
5204       break;
5205 
5206     case IOR:
5207     case XOR:
5208       /* If we have IOR or XOR, and the constant can be loaded in a
5209 	 single instruction, and we can find a temporary to put it in,
5210 	 then this can be done in two instructions instead of 3-4.  */
5211       if (subtargets
5212 	  /* TARGET can't be NULL if SUBTARGETS is 0 */
5213 	  || (reload_completed && !reg_mentioned_p (target, source)))
5214 	{
5215 	  if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5216 	    {
5217 	      if (generate)
5218 		{
5219 		  rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5220 
5221 		  emit_constant_insn (cond,
5222 				      gen_rtx_SET (sub, GEN_INT (val)));
5223 		  emit_constant_insn (cond,
5224 				      gen_rtx_SET (target,
5225 						   gen_rtx_fmt_ee (code, mode,
5226 								   source, sub)));
5227 		}
5228 	      return 2;
5229 	    }
5230 	}
5231 
5232       if (code == XOR)
5233 	break;
5234 
5235       /*  Convert.
5236 	  x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5237 	                     and the remainder 0s for e.g. 0xfff00000)
5238 	  x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5239 
5240 	  This can be done in 2 instructions by using shifts with mov or mvn.
5241 	  e.g. for
5242 	  x = x | 0xfff00000;
5243 	  we generate.
5244 	  mvn	r0, r0, asl #12
5245 	  mvn	r0, r0, lsr #12  */
5246       if (set_sign_bit_copies > 8
5247 	  && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5248 	{
5249 	  if (generate)
5250 	    {
5251 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5252 	      rtx shift = GEN_INT (set_sign_bit_copies);
5253 
5254 	      emit_constant_insn
5255 		(cond,
5256 		 gen_rtx_SET (sub,
5257 			      gen_rtx_NOT (mode,
5258 					   gen_rtx_ASHIFT (mode,
5259 							   source,
5260 							   shift))));
5261 	      emit_constant_insn
5262 		(cond,
5263 		 gen_rtx_SET (target,
5264 			      gen_rtx_NOT (mode,
5265 					   gen_rtx_LSHIFTRT (mode, sub,
5266 							     shift))));
5267 	    }
5268 	  return 2;
5269 	}
5270 
5271       /* Convert
5272 	  x = y | constant (which has set_zero_bit_copies number of trailing ones).
5273 	   to
5274 	  x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5275 
5276 	  For eg. r0 = r0 | 0xfff
5277 	       mvn	r0, r0, lsr #12
5278 	       mvn	r0, r0, asl #12
5279 
5280       */
5281       if (set_zero_bit_copies > 8
5282 	  && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5283 	{
5284 	  if (generate)
5285 	    {
5286 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5287 	      rtx shift = GEN_INT (set_zero_bit_copies);
5288 
5289 	      emit_constant_insn
5290 		(cond,
5291 		 gen_rtx_SET (sub,
5292 			      gen_rtx_NOT (mode,
5293 					   gen_rtx_LSHIFTRT (mode,
5294 							     source,
5295 							     shift))));
5296 	      emit_constant_insn
5297 		(cond,
5298 		 gen_rtx_SET (target,
5299 			      gen_rtx_NOT (mode,
5300 					   gen_rtx_ASHIFT (mode, sub,
5301 							   shift))));
5302 	    }
5303 	  return 2;
5304 	}
5305 
5306       /* This will never be reached for Thumb2 because orn is a valid
5307 	 instruction. This is for Thumb1 and the ARM 32 bit cases.
5308 
5309 	 x = y | constant (such that ~constant is a valid constant)
5310 	 Transform this to
5311 	 x = ~(~y & ~constant).
5312       */
5313       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5314 	{
5315 	  if (generate)
5316 	    {
5317 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5318 	      emit_constant_insn (cond,
5319 				  gen_rtx_SET (sub,
5320 					       gen_rtx_NOT (mode, source)));
5321 	      source = sub;
5322 	      if (subtargets)
5323 		sub = gen_reg_rtx (mode);
5324 	      emit_constant_insn (cond,
5325 				  gen_rtx_SET (sub,
5326 					       gen_rtx_AND (mode, source,
5327 							    GEN_INT (temp1))));
5328 	      emit_constant_insn (cond,
5329 				  gen_rtx_SET (target,
5330 					       gen_rtx_NOT (mode, sub)));
5331 	    }
5332 	  return 3;
5333 	}
5334       break;
5335 
5336     case AND:
5337       /* See if two shifts will do 2 or more insn's worth of work.  */
5338       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5339 	{
5340 	  HOST_WIDE_INT shift_mask = ((0xffffffff
5341 				       << (32 - clear_sign_bit_copies))
5342 				      & 0xffffffff);
5343 
5344 	  if ((remainder | shift_mask) != 0xffffffff)
5345 	    {
5346 	      HOST_WIDE_INT new_val
5347 	        = ARM_SIGN_EXTEND (remainder | shift_mask);
5348 
5349 	      if (generate)
5350 		{
5351 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5352 		  insns = arm_gen_constant (AND, SImode, cond, new_val,
5353 					    new_src, source, subtargets, 1);
5354 		  source = new_src;
5355 		}
5356 	      else
5357 		{
5358 		  rtx targ = subtargets ? NULL_RTX : target;
5359 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5360 					    targ, source, subtargets, 0);
5361 		}
5362 	    }
5363 
5364 	  if (generate)
5365 	    {
5366 	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5367 	      rtx shift = GEN_INT (clear_sign_bit_copies);
5368 
5369 	      emit_insn (gen_ashlsi3 (new_src, source, shift));
5370 	      emit_insn (gen_lshrsi3 (target, new_src, shift));
5371 	    }
5372 
5373 	  return insns + 2;
5374 	}
5375 
5376       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5377 	{
5378 	  HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5379 
5380 	  if ((remainder | shift_mask) != 0xffffffff)
5381 	    {
5382 	      HOST_WIDE_INT new_val
5383 	        = ARM_SIGN_EXTEND (remainder | shift_mask);
5384 	      if (generate)
5385 		{
5386 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5387 
5388 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5389 					    new_src, source, subtargets, 1);
5390 		  source = new_src;
5391 		}
5392 	      else
5393 		{
5394 		  rtx targ = subtargets ? NULL_RTX : target;
5395 
5396 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5397 					    targ, source, subtargets, 0);
5398 		}
5399 	    }
5400 
5401 	  if (generate)
5402 	    {
5403 	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5404 	      rtx shift = GEN_INT (clear_zero_bit_copies);
5405 
5406 	      emit_insn (gen_lshrsi3 (new_src, source, shift));
5407 	      emit_insn (gen_ashlsi3 (target, new_src, shift));
5408 	    }
5409 
5410 	  return insns + 2;
5411 	}
5412 
5413       break;
5414 
5415     default:
5416       break;
5417     }
5418 
5419   /* Calculate what the instruction sequences would be if we generated it
5420      normally, negated, or inverted.  */
5421   if (code == AND)
5422     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5423     insns = 99;
5424   else
5425     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5426 
5427   if (can_negate)
5428     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5429 					    &neg_immediates);
5430   else
5431     neg_insns = 99;
5432 
5433   if (can_invert || final_invert)
5434     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5435 					    &inv_immediates);
5436   else
5437     inv_insns = 99;
5438 
5439   immediates = &pos_immediates;
5440 
5441   /* Is the negated immediate sequence more efficient?  */
5442   if (neg_insns < insns && neg_insns <= inv_insns)
5443     {
5444       insns = neg_insns;
5445       immediates = &neg_immediates;
5446     }
5447   else
5448     can_negate = 0;
5449 
5450   /* Is the inverted immediate sequence more efficient?
5451      We must allow for an extra NOT instruction for XOR operations, although
5452      there is some chance that the final 'mvn' will get optimized later.  */
5453   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5454     {
5455       insns = inv_insns;
5456       immediates = &inv_immediates;
5457     }
5458   else
5459     {
5460       can_invert = 0;
5461       final_invert = 0;
5462     }
5463 
5464   /* Now output the chosen sequence as instructions.  */
5465   if (generate)
5466     {
5467       for (i = 0; i < insns; i++)
5468 	{
5469 	  rtx new_src, temp1_rtx;
5470 
5471 	  temp1 = immediates->i[i];
5472 
5473 	  if (code == SET || code == MINUS)
5474 	    new_src = (subtargets ? gen_reg_rtx (mode) : target);
5475 	  else if ((final_invert || i < (insns - 1)) && subtargets)
5476 	    new_src = gen_reg_rtx (mode);
5477 	  else
5478 	    new_src = target;
5479 
5480 	  if (can_invert)
5481 	    temp1 = ~temp1;
5482 	  else if (can_negate)
5483 	    temp1 = -temp1;
5484 
5485 	  temp1 = trunc_int_for_mode (temp1, mode);
5486 	  temp1_rtx = GEN_INT (temp1);
5487 
5488 	  if (code == SET)
5489 	    ;
5490 	  else if (code == MINUS)
5491 	    temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5492 	  else
5493 	    temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5494 
5495 	  emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5496 	  source = new_src;
5497 
5498 	  if (code == SET)
5499 	    {
5500 	      can_negate = can_invert;
5501 	      can_invert = 0;
5502 	      code = PLUS;
5503 	    }
5504 	  else if (code == MINUS)
5505 	    code = PLUS;
5506 	}
5507     }
5508 
5509   if (final_invert)
5510     {
5511       if (generate)
5512 	emit_constant_insn (cond, gen_rtx_SET (target,
5513 					       gen_rtx_NOT (mode, source)));
5514       insns++;
5515     }
5516 
5517   return insns;
5518 }
5519 
5520 /* Return TRUE if op is a constant where both the low and top words are
5521    suitable for RSB/RSC instructions.  This is never true for Thumb, since
5522    we do not have RSC in that case.  */
5523 static bool
arm_const_double_prefer_rsbs_rsc(rtx op)5524 arm_const_double_prefer_rsbs_rsc (rtx op)
5525 {
5526   /* Thumb lacks RSC, so we never prefer that sequence.  */
5527   if (TARGET_THUMB || !CONST_INT_P (op))
5528     return false;
5529   HOST_WIDE_INT hi, lo;
5530   lo = UINTVAL (op) & 0xffffffffULL;
5531   hi = UINTVAL (op) >> 32;
5532   return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5533 }
5534 
5535 /* Canonicalize a comparison so that we are more likely to recognize it.
5536    This can be done for a few constant compares, where we can make the
5537    immediate value easier to load.  */
5538 
5539 static void
arm_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)5540 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5541 			     bool op0_preserve_value)
5542 {
5543   machine_mode mode;
5544   unsigned HOST_WIDE_INT i, maxval;
5545 
5546   mode = GET_MODE (*op0);
5547   if (mode == VOIDmode)
5548     mode = GET_MODE (*op1);
5549 
5550   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5551 
5552   /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc).  In
5553      ARM mode we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be
5554      either reversed or (for constant OP1) adjusted to GE/LT.
5555      Similarly for GTU/LEU in Thumb mode.  */
5556   if (mode == DImode)
5557     {
5558 
5559       if (*code == GT || *code == LE
5560 	  || *code == GTU || *code == LEU)
5561 	{
5562 	  /* Missing comparison.  First try to use an available
5563 	     comparison.  */
5564 	  if (CONST_INT_P (*op1))
5565 	    {
5566 	      i = INTVAL (*op1);
5567 	      switch (*code)
5568 		{
5569 		case GT:
5570 		case LE:
5571 		  if (i != maxval)
5572 		    {
5573 		      /* Try to convert to GE/LT, unless that would be more
5574 			 expensive.  */
5575 		      if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5576 			  && arm_const_double_prefer_rsbs_rsc (*op1))
5577 			return;
5578 		      *op1 = GEN_INT (i + 1);
5579 		      *code = *code == GT ? GE : LT;
5580 		    }
5581 		  else
5582 		    {
5583 		      /* GT maxval is always false, LE maxval is always true.
5584 			 We can't fold that away here as we must make a
5585 			 comparison, but we can fold them to comparisons
5586 			 with the same result that can be handled:
5587 			   op0 GT maxval -> op0 LT minval
5588 			   op0 LE maxval -> op0 GE minval
5589 			 where minval = (-maxval - 1).  */
5590 		      *op1 = GEN_INT (-maxval - 1);
5591 		      *code = *code == GT ? LT : GE;
5592 		    }
5593 		  return;
5594 
5595 		case GTU:
5596 		case LEU:
5597 		  if (i != ~((unsigned HOST_WIDE_INT) 0))
5598 		    {
5599 		      /* Try to convert to GEU/LTU, unless that would
5600 			 be more expensive.  */
5601 		      if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5602 			  && arm_const_double_prefer_rsbs_rsc (*op1))
5603 			return;
5604 		      *op1 = GEN_INT (i + 1);
5605 		      *code = *code == GTU ? GEU : LTU;
5606 		    }
5607 		  else
5608 		    {
5609 		      /* GTU ~0 is always false, LEU ~0 is always true.
5610 			 We can't fold that away here as we must make a
5611 			 comparison, but we can fold them to comparisons
5612 			 with the same result that can be handled:
5613 			   op0 GTU ~0 -> op0 LTU 0
5614 			   op0 LEU ~0 -> op0 GEU 0.  */
5615 		      *op1 = const0_rtx;
5616 		      *code = *code == GTU ? LTU : GEU;
5617 		    }
5618 		  return;
5619 
5620 		default:
5621 		  gcc_unreachable ();
5622 		}
5623 	    }
5624 
5625 	  if (!op0_preserve_value)
5626 	    {
5627 	      std::swap (*op0, *op1);
5628 	      *code = (int)swap_condition ((enum rtx_code)*code);
5629 	    }
5630 	}
5631       return;
5632     }
5633 
5634   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5635      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5636      to facilitate possible combining with a cmp into 'ands'.  */
5637   if (mode == SImode
5638       && GET_CODE (*op0) == ZERO_EXTEND
5639       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5640       && GET_MODE (XEXP (*op0, 0)) == QImode
5641       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5642       && subreg_lowpart_p (XEXP (*op0, 0))
5643       && *op1 == const0_rtx)
5644     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5645 			GEN_INT (255));
5646 
5647   /* Comparisons smaller than DImode.  Only adjust comparisons against
5648      an out-of-range constant.  */
5649   if (!CONST_INT_P (*op1)
5650       || const_ok_for_arm (INTVAL (*op1))
5651       || const_ok_for_arm (- INTVAL (*op1)))
5652     return;
5653 
5654   i = INTVAL (*op1);
5655 
5656   switch (*code)
5657     {
5658     case EQ:
5659     case NE:
5660       return;
5661 
5662     case GT:
5663     case LE:
5664       if (i != maxval
5665 	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5666 	{
5667 	  *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5668 	  *code = *code == GT ? GE : LT;
5669 	  return;
5670 	}
5671       break;
5672 
5673     case GE:
5674     case LT:
5675       if (i != ~maxval
5676 	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5677 	{
5678 	  *op1 = GEN_INT (i - 1);
5679 	  *code = *code == GE ? GT : LE;
5680 	  return;
5681 	}
5682       break;
5683 
5684     case GTU:
5685     case LEU:
5686       if (i != ~((unsigned HOST_WIDE_INT) 0)
5687 	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5688 	{
5689 	  *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5690 	  *code = *code == GTU ? GEU : LTU;
5691 	  return;
5692 	}
5693       break;
5694 
5695     case GEU:
5696     case LTU:
5697       if (i != 0
5698 	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5699 	{
5700 	  *op1 = GEN_INT (i - 1);
5701 	  *code = *code == GEU ? GTU : LEU;
5702 	  return;
5703 	}
5704       break;
5705 
5706     default:
5707       gcc_unreachable ();
5708     }
5709 }
5710 
5711 
5712 /* Define how to find the value returned by a function.  */
5713 
5714 static rtx
arm_function_value(const_tree type,const_tree func,bool outgoing ATTRIBUTE_UNUSED)5715 arm_function_value(const_tree type, const_tree func,
5716 		   bool outgoing ATTRIBUTE_UNUSED)
5717 {
5718   machine_mode mode;
5719   int unsignedp ATTRIBUTE_UNUSED;
5720   rtx r ATTRIBUTE_UNUSED;
5721 
5722   mode = TYPE_MODE (type);
5723 
5724   if (TARGET_AAPCS_BASED)
5725     return aapcs_allocate_return_reg (mode, type, func);
5726 
5727   /* Promote integer types.  */
5728   if (INTEGRAL_TYPE_P (type))
5729     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5730 
5731   /* Promotes small structs returned in a register to full-word size
5732      for big-endian AAPCS.  */
5733   if (arm_return_in_msb (type))
5734     {
5735       HOST_WIDE_INT size = int_size_in_bytes (type);
5736       if (size % UNITS_PER_WORD != 0)
5737 	{
5738 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5739 	  mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5740 	}
5741     }
5742 
5743   return arm_libcall_value_1 (mode);
5744 }
5745 
5746 /* libcall hashtable helpers.  */
5747 
5748 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5749 {
5750   static inline hashval_t hash (const rtx_def *);
5751   static inline bool equal (const rtx_def *, const rtx_def *);
5752   static inline void remove (rtx_def *);
5753 };
5754 
5755 inline bool
equal(const rtx_def * p1,const rtx_def * p2)5756 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5757 {
5758   return rtx_equal_p (p1, p2);
5759 }
5760 
5761 inline hashval_t
hash(const rtx_def * p1)5762 libcall_hasher::hash (const rtx_def *p1)
5763 {
5764   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5765 }
5766 
5767 typedef hash_table<libcall_hasher> libcall_table_type;
5768 
5769 static void
add_libcall(libcall_table_type * htab,rtx libcall)5770 add_libcall (libcall_table_type *htab, rtx libcall)
5771 {
5772   *htab->find_slot (libcall, INSERT) = libcall;
5773 }
5774 
5775 static bool
arm_libcall_uses_aapcs_base(const_rtx libcall)5776 arm_libcall_uses_aapcs_base (const_rtx libcall)
5777 {
5778   static bool init_done = false;
5779   static libcall_table_type *libcall_htab = NULL;
5780 
5781   if (!init_done)
5782     {
5783       init_done = true;
5784 
5785       libcall_htab = new libcall_table_type (31);
5786       add_libcall (libcall_htab,
5787 		   convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5788       add_libcall (libcall_htab,
5789 		   convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5790       add_libcall (libcall_htab,
5791 		   convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5792       add_libcall (libcall_htab,
5793 		   convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5794 
5795       add_libcall (libcall_htab,
5796 		   convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5797       add_libcall (libcall_htab,
5798 		   convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5799       add_libcall (libcall_htab,
5800 		   convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5801       add_libcall (libcall_htab,
5802 		   convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5803 
5804       add_libcall (libcall_htab,
5805 		   convert_optab_libfunc (sext_optab, SFmode, HFmode));
5806       add_libcall (libcall_htab,
5807 		   convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5808       add_libcall (libcall_htab,
5809 		   convert_optab_libfunc (sfix_optab, SImode, DFmode));
5810       add_libcall (libcall_htab,
5811 		   convert_optab_libfunc (ufix_optab, SImode, DFmode));
5812       add_libcall (libcall_htab,
5813 		   convert_optab_libfunc (sfix_optab, DImode, DFmode));
5814       add_libcall (libcall_htab,
5815 		   convert_optab_libfunc (ufix_optab, DImode, DFmode));
5816       add_libcall (libcall_htab,
5817 		   convert_optab_libfunc (sfix_optab, DImode, SFmode));
5818       add_libcall (libcall_htab,
5819 		   convert_optab_libfunc (ufix_optab, DImode, SFmode));
5820       add_libcall (libcall_htab,
5821 		   convert_optab_libfunc (sfix_optab, SImode, SFmode));
5822       add_libcall (libcall_htab,
5823 		   convert_optab_libfunc (ufix_optab, SImode, SFmode));
5824 
5825       /* Values from double-precision helper functions are returned in core
5826 	 registers if the selected core only supports single-precision
5827 	 arithmetic, even if we are using the hard-float ABI.  The same is
5828 	 true for single-precision helpers except in case of MVE, because in
5829 	 MVE we will be using the hard-float ABI on a CPU which doesn't support
5830 	 single-precision operations in hardware.  In MVE the following check
5831 	 enables use of emulation for the single-precision arithmetic
5832 	 operations.  */
5833       if (TARGET_HAVE_MVE)
5834 	{
5835 	  add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5836 	  add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5837 	  add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5838 	  add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5839 	  add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5840 	  add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5841 	  add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5842 	  add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5843 	  add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5844 	  add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5845 	  add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5846 	}
5847       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5848       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5849       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5850       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5851       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5852       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5853       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5854       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5855       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5856       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5857       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5858       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5859 							SFmode));
5860       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5861 							DFmode));
5862       add_libcall (libcall_htab,
5863 		   convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5864     }
5865 
5866   return libcall && libcall_htab->find (libcall) != NULL;
5867 }
5868 
5869 static rtx
arm_libcall_value_1(machine_mode mode)5870 arm_libcall_value_1 (machine_mode mode)
5871 {
5872   if (TARGET_AAPCS_BASED)
5873     return aapcs_libcall_value (mode);
5874   else if (TARGET_IWMMXT_ABI
5875 	   && arm_vector_mode_supported_p (mode))
5876     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5877   else
5878     return gen_rtx_REG (mode, ARG_REGISTER (1));
5879 }
5880 
5881 /* Define how to find the value returned by a library function
5882    assuming the value has mode MODE.  */
5883 
5884 static rtx
arm_libcall_value(machine_mode mode,const_rtx libcall)5885 arm_libcall_value (machine_mode mode, const_rtx libcall)
5886 {
5887   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5888       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5889     {
5890       /* The following libcalls return their result in integer registers,
5891 	 even though they return a floating point value.  */
5892       if (arm_libcall_uses_aapcs_base (libcall))
5893 	return gen_rtx_REG (mode, ARG_REGISTER(1));
5894 
5895     }
5896 
5897   return arm_libcall_value_1 (mode);
5898 }
5899 
5900 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5901 
5902 static bool
arm_function_value_regno_p(const unsigned int regno)5903 arm_function_value_regno_p (const unsigned int regno)
5904 {
5905   if (regno == ARG_REGISTER (1)
5906       || (TARGET_32BIT
5907 	  && TARGET_AAPCS_BASED
5908 	  && TARGET_HARD_FLOAT
5909 	  && regno == FIRST_VFP_REGNUM)
5910       || (TARGET_IWMMXT_ABI
5911 	  && regno == FIRST_IWMMXT_REGNUM))
5912     return true;
5913 
5914   return false;
5915 }
5916 
5917 /* Determine the amount of memory needed to store the possible return
5918    registers of an untyped call.  */
5919 int
arm_apply_result_size(void)5920 arm_apply_result_size (void)
5921 {
5922   int size = 16;
5923 
5924   if (TARGET_32BIT)
5925     {
5926       if (TARGET_HARD_FLOAT_ABI)
5927 	size += 32;
5928       if (TARGET_IWMMXT_ABI)
5929 	size += 8;
5930     }
5931 
5932   return size;
5933 }
5934 
5935 /* Decide whether TYPE should be returned in memory (true)
5936    or in a register (false).  FNTYPE is the type of the function making
5937    the call.  */
5938 static bool
arm_return_in_memory(const_tree type,const_tree fntype)5939 arm_return_in_memory (const_tree type, const_tree fntype)
5940 {
5941   HOST_WIDE_INT size;
5942 
5943   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5944 
5945   if (TARGET_AAPCS_BASED)
5946     {
5947       /* Simple, non-aggregate types (ie not including vectors and
5948 	 complex) are always returned in a register (or registers).
5949 	 We don't care about which register here, so we can short-cut
5950 	 some of the detail.  */
5951       if (!AGGREGATE_TYPE_P (type)
5952 	  && TREE_CODE (type) != VECTOR_TYPE
5953 	  && TREE_CODE (type) != COMPLEX_TYPE)
5954 	return false;
5955 
5956       /* Any return value that is no larger than one word can be
5957 	 returned in r0.  */
5958       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5959 	return false;
5960 
5961       /* Check any available co-processors to see if they accept the
5962 	 type as a register candidate (VFP, for example, can return
5963 	 some aggregates in consecutive registers).  These aren't
5964 	 available if the call is variadic.  */
5965       if (aapcs_select_return_coproc (type, fntype) >= 0)
5966 	return false;
5967 
5968       /* Vector values should be returned using ARM registers, not
5969 	 memory (unless they're over 16 bytes, which will break since
5970 	 we only have four call-clobbered registers to play with).  */
5971       if (TREE_CODE (type) == VECTOR_TYPE)
5972 	return (size < 0 || size > (4 * UNITS_PER_WORD));
5973 
5974       /* The rest go in memory.  */
5975       return true;
5976     }
5977 
5978   if (TREE_CODE (type) == VECTOR_TYPE)
5979     return (size < 0 || size > (4 * UNITS_PER_WORD));
5980 
5981   if (!AGGREGATE_TYPE_P (type) &&
5982       (TREE_CODE (type) != VECTOR_TYPE))
5983     /* All simple types are returned in registers.  */
5984     return false;
5985 
5986   if (arm_abi != ARM_ABI_APCS)
5987     {
5988       /* ATPCS and later return aggregate types in memory only if they are
5989 	 larger than a word (or are variable size).  */
5990       return (size < 0 || size > UNITS_PER_WORD);
5991     }
5992 
5993   /* For the arm-wince targets we choose to be compatible with Microsoft's
5994      ARM and Thumb compilers, which always return aggregates in memory.  */
5995 #ifndef ARM_WINCE
5996   /* All structures/unions bigger than one word are returned in memory.
5997      Also catch the case where int_size_in_bytes returns -1.  In this case
5998      the aggregate is either huge or of variable size, and in either case
5999      we will want to return it via memory and not in a register.  */
6000   if (size < 0 || size > UNITS_PER_WORD)
6001     return true;
6002 
6003   if (TREE_CODE (type) == RECORD_TYPE)
6004     {
6005       tree field;
6006 
6007       /* For a struct the APCS says that we only return in a register
6008 	 if the type is 'integer like' and every addressable element
6009 	 has an offset of zero.  For practical purposes this means
6010 	 that the structure can have at most one non bit-field element
6011 	 and that this element must be the first one in the structure.  */
6012 
6013       /* Find the first field, ignoring non FIELD_DECL things which will
6014 	 have been created by C++.  */
6015       /* NOTE: This code is deprecated and has not been updated to handle
6016 	 DECL_FIELD_ABI_IGNORED.  */
6017       for (field = TYPE_FIELDS (type);
6018 	   field && TREE_CODE (field) != FIELD_DECL;
6019 	   field = DECL_CHAIN (field))
6020 	continue;
6021 
6022       if (field == NULL)
6023 	return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
6024 
6025       /* Check that the first field is valid for returning in a register.  */
6026 
6027       /* ... Floats are not allowed */
6028       if (FLOAT_TYPE_P (TREE_TYPE (field)))
6029 	return true;
6030 
6031       /* ... Aggregates that are not themselves valid for returning in
6032 	 a register are not allowed.  */
6033       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6034 	return true;
6035 
6036       /* Now check the remaining fields, if any.  Only bitfields are allowed,
6037 	 since they are not addressable.  */
6038       for (field = DECL_CHAIN (field);
6039 	   field;
6040 	   field = DECL_CHAIN (field))
6041 	{
6042 	  if (TREE_CODE (field) != FIELD_DECL)
6043 	    continue;
6044 
6045 	  if (!DECL_BIT_FIELD_TYPE (field))
6046 	    return true;
6047 	}
6048 
6049       return false;
6050     }
6051 
6052   if (TREE_CODE (type) == UNION_TYPE)
6053     {
6054       tree field;
6055 
6056       /* Unions can be returned in registers if every element is
6057 	 integral, or can be returned in an integer register.  */
6058       for (field = TYPE_FIELDS (type);
6059 	   field;
6060 	   field = DECL_CHAIN (field))
6061 	{
6062 	  if (TREE_CODE (field) != FIELD_DECL)
6063 	    continue;
6064 
6065 	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
6066 	    return true;
6067 
6068 	  if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6069 	    return true;
6070 	}
6071 
6072       return false;
6073     }
6074 #endif /* not ARM_WINCE */
6075 
6076   /* Return all other types in memory.  */
6077   return true;
6078 }
6079 
6080 const struct pcs_attribute_arg
6081 {
6082   const char *arg;
6083   enum arm_pcs value;
6084 } pcs_attribute_args[] =
6085   {
6086     {"aapcs", ARM_PCS_AAPCS},
6087     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6088 #if 0
6089     /* We could recognize these, but changes would be needed elsewhere
6090      * to implement them.  */
6091     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6092     {"atpcs", ARM_PCS_ATPCS},
6093     {"apcs", ARM_PCS_APCS},
6094 #endif
6095     {NULL, ARM_PCS_UNKNOWN}
6096   };
6097 
6098 static enum arm_pcs
arm_pcs_from_attribute(tree attr)6099 arm_pcs_from_attribute (tree attr)
6100 {
6101   const struct pcs_attribute_arg *ptr;
6102   const char *arg;
6103 
6104   /* Get the value of the argument.  */
6105   if (TREE_VALUE (attr) == NULL_TREE
6106       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6107     return ARM_PCS_UNKNOWN;
6108 
6109   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6110 
6111   /* Check it against the list of known arguments.  */
6112   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6113     if (streq (arg, ptr->arg))
6114       return ptr->value;
6115 
6116   /* An unrecognized interrupt type.  */
6117   return ARM_PCS_UNKNOWN;
6118 }
6119 
6120 /* Get the PCS variant to use for this call.  TYPE is the function's type
6121    specification, DECL is the specific declartion.  DECL may be null if
6122    the call could be indirect or if this is a library call.  */
6123 static enum arm_pcs
arm_get_pcs_model(const_tree type,const_tree decl)6124 arm_get_pcs_model (const_tree type, const_tree decl)
6125 {
6126   bool user_convention = false;
6127   enum arm_pcs user_pcs = arm_pcs_default;
6128   tree attr;
6129 
6130   gcc_assert (type);
6131 
6132   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6133   if (attr)
6134     {
6135       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6136       user_convention = true;
6137     }
6138 
6139   if (TARGET_AAPCS_BASED)
6140     {
6141       /* Detect varargs functions.  These always use the base rules
6142 	 (no argument is ever a candidate for a co-processor
6143 	 register).  */
6144       bool base_rules = stdarg_p (type);
6145 
6146       if (user_convention)
6147 	{
6148 	  if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6149 	    sorry ("non-AAPCS derived PCS variant");
6150 	  else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6151 	    error ("variadic functions must use the base AAPCS variant");
6152 	}
6153 
6154       if (base_rules)
6155 	return ARM_PCS_AAPCS;
6156       else if (user_convention)
6157 	return user_pcs;
6158       else if (decl && flag_unit_at_a_time)
6159 	{
6160 	  /* Local functions never leak outside this compilation unit,
6161 	     so we are free to use whatever conventions are
6162 	     appropriate.  */
6163 	  /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
6164 	  cgraph_node *local_info_node
6165 	    = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6166 	  if (local_info_node && local_info_node->local)
6167 	    return ARM_PCS_AAPCS_LOCAL;
6168 	}
6169     }
6170   else if (user_convention && user_pcs != arm_pcs_default)
6171     sorry ("PCS variant");
6172 
6173   /* For everything else we use the target's default.  */
6174   return arm_pcs_default;
6175 }
6176 
6177 
6178 static void
aapcs_vfp_cum_init(CUMULATIVE_ARGS * pcum ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,rtx libcall ATTRIBUTE_UNUSED,const_tree fndecl ATTRIBUTE_UNUSED)6179 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6180 		    const_tree fntype ATTRIBUTE_UNUSED,
6181 		    rtx libcall ATTRIBUTE_UNUSED,
6182 		    const_tree fndecl ATTRIBUTE_UNUSED)
6183 {
6184   /* Record the unallocated VFP registers.  */
6185   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6186   pcum->aapcs_vfp_reg_alloc = 0;
6187 }
6188 
6189 /* Bitmasks that indicate whether earlier versions of GCC would have
6190    taken a different path through the ABI logic.  This should result in
6191    a -Wpsabi warning if the earlier path led to a different ABI decision.
6192 
6193    WARN_PSABI_EMPTY_CXX17_BASE
6194       Indicates that the type includes an artificial empty C++17 base field
6195       that, prior to GCC 10.1, would prevent the type from being treated as
6196       a HFA or HVA.  See PR94711 for details.
6197 
6198    WARN_PSABI_NO_UNIQUE_ADDRESS
6199       Indicates that the type includes an empty [[no_unique_address]] field
6200       that, prior to GCC 10.1, would prevent the type from being treated as
6201       a HFA or HVA.  */
6202 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6203 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6204 
6205 /* Walk down the type tree of TYPE counting consecutive base elements.
6206    If *MODEP is VOIDmode, then set it to the first valid floating point
6207    type.  If a non-floating point type is found, or if a floating point
6208    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6209    otherwise return the count in the sub-tree.
6210 
6211    The WARN_PSABI_FLAGS argument allows the caller to check whether this
6212    function has changed its behavior relative to earlier versions of GCC.
6213    Normally the argument should be nonnull and point to a zero-initialized
6214    variable.  The function then records whether the ABI decision might
6215    be affected by a known fix to the ABI logic, setting the associated
6216    WARN_PSABI_* bits if so.
6217 
6218    When the argument is instead a null pointer, the function tries to
6219    simulate the behavior of GCC before all such ABI fixes were made.
6220    This is useful to check whether the function returns something
6221    different after the ABI fixes.  */
6222 static int
aapcs_vfp_sub_candidate(const_tree type,machine_mode * modep,unsigned int * warn_psabi_flags)6223 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6224 			 unsigned int *warn_psabi_flags)
6225 {
6226   machine_mode mode;
6227   HOST_WIDE_INT size;
6228 
6229   switch (TREE_CODE (type))
6230     {
6231     case REAL_TYPE:
6232       mode = TYPE_MODE (type);
6233       if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6234 	return -1;
6235 
6236       if (*modep == VOIDmode)
6237 	*modep = mode;
6238 
6239       if (*modep == mode)
6240 	return 1;
6241 
6242       break;
6243 
6244     case COMPLEX_TYPE:
6245       mode = TYPE_MODE (TREE_TYPE (type));
6246       if (mode != DFmode && mode != SFmode)
6247 	return -1;
6248 
6249       if (*modep == VOIDmode)
6250 	*modep = mode;
6251 
6252       if (*modep == mode)
6253 	return 2;
6254 
6255       break;
6256 
6257     case VECTOR_TYPE:
6258       /* Use V2SImode and V4SImode as representatives of all 64-bit
6259 	 and 128-bit vector types, whether or not those modes are
6260 	 supported with the present options.  */
6261       size = int_size_in_bytes (type);
6262       switch (size)
6263 	{
6264 	case 8:
6265 	  mode = V2SImode;
6266 	  break;
6267 	case 16:
6268 	  mode = V4SImode;
6269 	  break;
6270 	default:
6271 	  return -1;
6272 	}
6273 
6274       if (*modep == VOIDmode)
6275 	*modep = mode;
6276 
6277       /* Vector modes are considered to be opaque: two vectors are
6278 	 equivalent for the purposes of being homogeneous aggregates
6279 	 if they are the same size.  */
6280       if (*modep == mode)
6281 	return 1;
6282 
6283       break;
6284 
6285     case ARRAY_TYPE:
6286       {
6287 	int count;
6288 	tree index = TYPE_DOMAIN (type);
6289 
6290 	/* Can't handle incomplete types nor sizes that are not
6291 	   fixed.  */
6292 	if (!COMPLETE_TYPE_P (type)
6293 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6294 	  return -1;
6295 
6296 	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6297 					 warn_psabi_flags);
6298 	if (count == -1
6299 	    || !index
6300 	    || !TYPE_MAX_VALUE (index)
6301 	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6302 	    || !TYPE_MIN_VALUE (index)
6303 	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6304 	    || count < 0)
6305 	  return -1;
6306 
6307 	count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6308 		      - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6309 
6310 	/* There must be no padding.  */
6311 	if (wi::to_wide (TYPE_SIZE (type))
6312 	    != count * GET_MODE_BITSIZE (*modep))
6313 	  return -1;
6314 
6315 	return count;
6316       }
6317 
6318     case RECORD_TYPE:
6319       {
6320 	int count = 0;
6321 	int sub_count;
6322 	tree field;
6323 
6324 	/* Can't handle incomplete types nor sizes that are not
6325 	   fixed.  */
6326 	if (!COMPLETE_TYPE_P (type)
6327 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6328 	  return -1;
6329 
6330 	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6331 	  {
6332 	    if (TREE_CODE (field) != FIELD_DECL)
6333 	      continue;
6334 
6335 	    if (DECL_FIELD_ABI_IGNORED (field))
6336 	      {
6337 		/* See whether this is something that earlier versions of
6338 		   GCC failed to ignore.  */
6339 		unsigned int flag;
6340 		if (lookup_attribute ("no_unique_address",
6341 				      DECL_ATTRIBUTES (field)))
6342 		  flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6343 		else if (cxx17_empty_base_field_p (field))
6344 		  flag = WARN_PSABI_EMPTY_CXX17_BASE;
6345 		else
6346 		  /* No compatibility problem.  */
6347 		  continue;
6348 
6349 		/* Simulate the old behavior when WARN_PSABI_FLAGS is null.  */
6350 		if (warn_psabi_flags)
6351 		  {
6352 		    *warn_psabi_flags |= flag;
6353 		    continue;
6354 		  }
6355 	      }
6356 
6357 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6358 						 warn_psabi_flags);
6359 	    if (sub_count < 0)
6360 	      return -1;
6361 	    count += sub_count;
6362 	  }
6363 
6364 	/* There must be no padding.  */
6365 	if (wi::to_wide (TYPE_SIZE (type))
6366 	    != count * GET_MODE_BITSIZE (*modep))
6367 	  return -1;
6368 
6369 	return count;
6370       }
6371 
6372     case UNION_TYPE:
6373     case QUAL_UNION_TYPE:
6374       {
6375 	/* These aren't very interesting except in a degenerate case.  */
6376 	int count = 0;
6377 	int sub_count;
6378 	tree field;
6379 
6380 	/* Can't handle incomplete types nor sizes that are not
6381 	   fixed.  */
6382 	if (!COMPLETE_TYPE_P (type)
6383 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6384 	  return -1;
6385 
6386 	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6387 	  {
6388 	    if (TREE_CODE (field) != FIELD_DECL)
6389 	      continue;
6390 
6391 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6392 						 warn_psabi_flags);
6393 	    if (sub_count < 0)
6394 	      return -1;
6395 	    count = count > sub_count ? count : sub_count;
6396 	  }
6397 
6398 	/* There must be no padding.  */
6399 	if (wi::to_wide (TYPE_SIZE (type))
6400 	    != count * GET_MODE_BITSIZE (*modep))
6401 	  return -1;
6402 
6403 	return count;
6404       }
6405 
6406     default:
6407       break;
6408     }
6409 
6410   return -1;
6411 }
6412 
6413 /* Return true if PCS_VARIANT should use VFP registers.  */
6414 static bool
use_vfp_abi(enum arm_pcs pcs_variant,bool is_double)6415 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6416 {
6417   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6418     {
6419       static bool seen_thumb1_vfp = false;
6420 
6421       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6422 	{
6423 	  sorry ("Thumb-1 hard-float VFP ABI");
6424 	  /* sorry() is not immediately fatal, so only display this once.  */
6425 	  seen_thumb1_vfp = true;
6426 	}
6427 
6428       return true;
6429     }
6430 
6431   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6432     return false;
6433 
6434   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6435 	 (TARGET_VFP_DOUBLE || !is_double));
6436 }
6437 
6438 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6439    suitable for passing or returning in VFP registers for the PCS
6440    variant selected.  If it is, then *BASE_MODE is updated to contain
6441    a machine mode describing each element of the argument's type and
6442    *COUNT to hold the number of such elements.  */
6443 static bool
aapcs_vfp_is_call_or_return_candidate(enum arm_pcs pcs_variant,machine_mode mode,const_tree type,machine_mode * base_mode,int * count)6444 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6445 				       machine_mode mode, const_tree type,
6446 				       machine_mode *base_mode, int *count)
6447 {
6448   machine_mode new_mode = VOIDmode;
6449 
6450   /* If we have the type information, prefer that to working things
6451      out from the mode.  */
6452   if (type)
6453     {
6454       unsigned int warn_psabi_flags = 0;
6455       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6456 					      &warn_psabi_flags);
6457       if (ag_count > 0 && ag_count <= 4)
6458 	{
6459 	  static unsigned last_reported_type_uid;
6460 	  unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6461 	  int alt;
6462 	  if (warn_psabi
6463 	      && warn_psabi_flags
6464 	      && uid != last_reported_type_uid
6465 	      && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6466 		  != ag_count))
6467 	    {
6468 	      const char *url
6469 		= CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6470 	      gcc_assert (alt == -1);
6471 	      last_reported_type_uid = uid;
6472 	      /* Use TYPE_MAIN_VARIANT to strip any redundant const
6473 		 qualification.  */
6474 	      if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6475 		inform (input_location, "parameter passing for argument of "
6476 			"type %qT with %<[[no_unique_address]]%> members "
6477 			"changed %{in GCC 10.1%}",
6478 			TYPE_MAIN_VARIANT (type), url);
6479 	      else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6480 		inform (input_location, "parameter passing for argument of "
6481 			"type %qT when C++17 is enabled changed to match "
6482 			"C++14 %{in GCC 10.1%}",
6483 			TYPE_MAIN_VARIANT (type), url);
6484 	    }
6485 	  *count = ag_count;
6486 	}
6487       else
6488 	return false;
6489     }
6490   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6491 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6492 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6493     {
6494       *count = 1;
6495       new_mode = mode;
6496     }
6497   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6498     {
6499       *count = 2;
6500       new_mode = (mode == DCmode ? DFmode : SFmode);
6501     }
6502   else
6503     return false;
6504 
6505 
6506   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6507     return false;
6508 
6509   *base_mode = new_mode;
6510 
6511   if (TARGET_GENERAL_REGS_ONLY)
6512     error ("argument of type %qT not permitted with -mgeneral-regs-only",
6513 	   type);
6514 
6515   return true;
6516 }
6517 
6518 static bool
aapcs_vfp_is_return_candidate(enum arm_pcs pcs_variant,machine_mode mode,const_tree type)6519 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6520 			       machine_mode mode, const_tree type)
6521 {
6522   int count ATTRIBUTE_UNUSED;
6523   machine_mode ag_mode ATTRIBUTE_UNUSED;
6524 
6525   if (!use_vfp_abi (pcs_variant, false))
6526     return false;
6527   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6528 						&ag_mode, &count);
6529 }
6530 
6531 static bool
aapcs_vfp_is_call_candidate(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type)6532 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6533 			     const_tree type)
6534 {
6535   if (!use_vfp_abi (pcum->pcs_variant, false))
6536     return false;
6537 
6538   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6539 						&pcum->aapcs_vfp_rmode,
6540 						&pcum->aapcs_vfp_rcount);
6541 }
6542 
6543 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6544    for the behaviour of this function.  */
6545 
6546 static bool
aapcs_vfp_allocate(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type ATTRIBUTE_UNUSED)6547 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6548 		    const_tree type  ATTRIBUTE_UNUSED)
6549 {
6550   int rmode_size
6551     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6552   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6553   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6554   int regno;
6555 
6556   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6557     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6558       {
6559 	pcum->aapcs_vfp_reg_alloc = mask << regno;
6560 	if (mode == BLKmode
6561 	    || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6562 	    || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6563 	  {
6564 	    int i;
6565 	    int rcount = pcum->aapcs_vfp_rcount;
6566 	    int rshift = shift;
6567 	    machine_mode rmode = pcum->aapcs_vfp_rmode;
6568 	    rtx par;
6569 	    if (!(TARGET_NEON || TARGET_HAVE_MVE))
6570 	      {
6571 		/* Avoid using unsupported vector modes.  */
6572 		if (rmode == V2SImode)
6573 		  rmode = DImode;
6574 		else if (rmode == V4SImode)
6575 		  {
6576 		    rmode = DImode;
6577 		    rcount *= 2;
6578 		    rshift /= 2;
6579 		  }
6580 	      }
6581 	    par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6582 	    for (i = 0; i < rcount; i++)
6583 	      {
6584 		rtx tmp = gen_rtx_REG (rmode,
6585 				       FIRST_VFP_REGNUM + regno + i * rshift);
6586 		tmp = gen_rtx_EXPR_LIST
6587 		  (VOIDmode, tmp,
6588 		   GEN_INT (i * GET_MODE_SIZE (rmode)));
6589 		XVECEXP (par, 0, i) = tmp;
6590 	      }
6591 
6592 	    pcum->aapcs_reg = par;
6593 	  }
6594 	else
6595 	  pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6596 	return true;
6597       }
6598   return false;
6599 }
6600 
6601 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6602    comment there for the behaviour of this function.  */
6603 
6604 static rtx
aapcs_vfp_allocate_return_reg(enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,machine_mode mode,const_tree type ATTRIBUTE_UNUSED)6605 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6606 			       machine_mode mode,
6607 			       const_tree type ATTRIBUTE_UNUSED)
6608 {
6609   if (!use_vfp_abi (pcs_variant, false))
6610     return NULL;
6611 
6612   if (mode == BLKmode
6613       || (GET_MODE_CLASS (mode) == MODE_INT
6614 	  && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6615 	  && !(TARGET_NEON || TARGET_HAVE_MVE)))
6616     {
6617       int count;
6618       machine_mode ag_mode;
6619       int i;
6620       rtx par;
6621       int shift;
6622 
6623       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6624 					     &ag_mode, &count);
6625 
6626       if (!(TARGET_NEON || TARGET_HAVE_MVE))
6627 	{
6628 	  if (ag_mode == V2SImode)
6629 	    ag_mode = DImode;
6630 	  else if (ag_mode == V4SImode)
6631 	    {
6632 	      ag_mode = DImode;
6633 	      count *= 2;
6634 	    }
6635 	}
6636       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6637       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6638       for (i = 0; i < count; i++)
6639 	{
6640 	  rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6641 	  tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6642 				   GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6643 	  XVECEXP (par, 0, i) = tmp;
6644 	}
6645 
6646       return par;
6647     }
6648 
6649   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6650 }
6651 
6652 static void
aapcs_vfp_advance(CUMULATIVE_ARGS * pcum ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED)6653 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6654 		   machine_mode mode  ATTRIBUTE_UNUSED,
6655 		   const_tree type  ATTRIBUTE_UNUSED)
6656 {
6657   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6658   pcum->aapcs_vfp_reg_alloc = 0;
6659   return;
6660 }
6661 
6662 #define AAPCS_CP(X)				\
6663   {						\
6664     aapcs_ ## X ## _cum_init,			\
6665     aapcs_ ## X ## _is_call_candidate,		\
6666     aapcs_ ## X ## _allocate,			\
6667     aapcs_ ## X ## _is_return_candidate,	\
6668     aapcs_ ## X ## _allocate_return_reg,	\
6669     aapcs_ ## X ## _advance			\
6670   }
6671 
6672 /* Table of co-processors that can be used to pass arguments in
6673    registers.  Idealy no arugment should be a candidate for more than
6674    one co-processor table entry, but the table is processed in order
6675    and stops after the first match.  If that entry then fails to put
6676    the argument into a co-processor register, the argument will go on
6677    the stack.  */
6678 static struct
6679 {
6680   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6681   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6682 
6683   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6684      BLKmode) is a candidate for this co-processor's registers; this
6685      function should ignore any position-dependent state in
6686      CUMULATIVE_ARGS and only use call-type dependent information.  */
6687   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6688 
6689   /* Return true if the argument does get a co-processor register; it
6690      should set aapcs_reg to an RTX of the register allocated as is
6691      required for a return from FUNCTION_ARG.  */
6692   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6693 
6694   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6695      be returned in this co-processor's registers.  */
6696   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6697 
6698   /* Allocate and return an RTX element to hold the return type of a call.  This
6699      routine must not fail and will only be called if is_return_candidate
6700      returned true with the same parameters.  */
6701   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6702 
6703   /* Finish processing this argument and prepare to start processing
6704      the next one.  */
6705   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6706 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6707   {
6708     AAPCS_CP(vfp)
6709   };
6710 
6711 #undef AAPCS_CP
6712 
6713 static int
aapcs_select_call_coproc(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type)6714 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6715 			  const_tree type)
6716 {
6717   int i;
6718 
6719   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6720     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6721       return i;
6722 
6723   return -1;
6724 }
6725 
6726 static int
aapcs_select_return_coproc(const_tree type,const_tree fntype)6727 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6728 {
6729   /* We aren't passed a decl, so we can't check that a call is local.
6730      However, it isn't clear that that would be a win anyway, since it
6731      might limit some tail-calling opportunities.  */
6732   enum arm_pcs pcs_variant;
6733 
6734   if (fntype)
6735     {
6736       const_tree fndecl = NULL_TREE;
6737 
6738       if (TREE_CODE (fntype) == FUNCTION_DECL)
6739 	{
6740 	  fndecl = fntype;
6741 	  fntype = TREE_TYPE (fntype);
6742 	}
6743 
6744       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6745     }
6746   else
6747     pcs_variant = arm_pcs_default;
6748 
6749   if (pcs_variant != ARM_PCS_AAPCS)
6750     {
6751       int i;
6752 
6753       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6754 	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6755 							TYPE_MODE (type),
6756 							type))
6757 	  return i;
6758     }
6759   return -1;
6760 }
6761 
6762 static rtx
aapcs_allocate_return_reg(machine_mode mode,const_tree type,const_tree fntype)6763 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6764 			   const_tree fntype)
6765 {
6766   /* We aren't passed a decl, so we can't check that a call is local.
6767      However, it isn't clear that that would be a win anyway, since it
6768      might limit some tail-calling opportunities.  */
6769   enum arm_pcs pcs_variant;
6770   int unsignedp ATTRIBUTE_UNUSED;
6771 
6772   if (fntype)
6773     {
6774       const_tree fndecl = NULL_TREE;
6775 
6776       if (TREE_CODE (fntype) == FUNCTION_DECL)
6777 	{
6778 	  fndecl = fntype;
6779 	  fntype = TREE_TYPE (fntype);
6780 	}
6781 
6782       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6783     }
6784   else
6785     pcs_variant = arm_pcs_default;
6786 
6787   /* Promote integer types.  */
6788   if (type && INTEGRAL_TYPE_P (type))
6789     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6790 
6791   if (pcs_variant != ARM_PCS_AAPCS)
6792     {
6793       int i;
6794 
6795       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6796 	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6797 							type))
6798 	  return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6799 							     mode, type);
6800     }
6801 
6802   /* Promotes small structs returned in a register to full-word size
6803      for big-endian AAPCS.  */
6804   if (type && arm_return_in_msb (type))
6805     {
6806       HOST_WIDE_INT size = int_size_in_bytes (type);
6807       if (size % UNITS_PER_WORD != 0)
6808 	{
6809 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6810 	  mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6811 	}
6812     }
6813 
6814   return gen_rtx_REG (mode, R0_REGNUM);
6815 }
6816 
6817 static rtx
aapcs_libcall_value(machine_mode mode)6818 aapcs_libcall_value (machine_mode mode)
6819 {
6820   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6821       && GET_MODE_SIZE (mode) <= 4)
6822     mode = SImode;
6823 
6824   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6825 }
6826 
6827 /* Lay out a function argument using the AAPCS rules.  The rule
6828    numbers referred to here are those in the AAPCS.  */
6829 static void
aapcs_layout_arg(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type,bool named)6830 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6831 		  const_tree type, bool named)
6832 {
6833   int nregs, nregs2;
6834   int ncrn;
6835 
6836   /* We only need to do this once per argument.  */
6837   if (pcum->aapcs_arg_processed)
6838     return;
6839 
6840   pcum->aapcs_arg_processed = true;
6841 
6842   /* Special case: if named is false then we are handling an incoming
6843      anonymous argument which is on the stack.  */
6844   if (!named)
6845     return;
6846 
6847   /* Is this a potential co-processor register candidate?  */
6848   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6849     {
6850       int slot = aapcs_select_call_coproc (pcum, mode, type);
6851       pcum->aapcs_cprc_slot = slot;
6852 
6853       /* We don't have to apply any of the rules from part B of the
6854 	 preparation phase, these are handled elsewhere in the
6855 	 compiler.  */
6856 
6857       if (slot >= 0)
6858 	{
6859 	  /* A Co-processor register candidate goes either in its own
6860 	     class of registers or on the stack.  */
6861 	  if (!pcum->aapcs_cprc_failed[slot])
6862 	    {
6863 	      /* C1.cp - Try to allocate the argument to co-processor
6864 		 registers.  */
6865 	      if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6866 		return;
6867 
6868 	      /* C2.cp - Put the argument on the stack and note that we
6869 		 can't assign any more candidates in this slot.  We also
6870 		 need to note that we have allocated stack space, so that
6871 		 we won't later try to split a non-cprc candidate between
6872 		 core registers and the stack.  */
6873 	      pcum->aapcs_cprc_failed[slot] = true;
6874 	      pcum->can_split = false;
6875 	    }
6876 
6877 	  /* We didn't get a register, so this argument goes on the
6878 	     stack.  */
6879 	  gcc_assert (pcum->can_split == false);
6880 	  return;
6881 	}
6882     }
6883 
6884   /* C3 - For double-word aligned arguments, round the NCRN up to the
6885      next even number.  */
6886   ncrn = pcum->aapcs_ncrn;
6887   if (ncrn & 1)
6888     {
6889       int res = arm_needs_doubleword_align (mode, type);
6890       /* Only warn during RTL expansion of call stmts, otherwise we would
6891 	 warn e.g. during gimplification even on functions that will be
6892 	 always inlined, and we'd warn multiple times.  Don't warn when
6893 	 called in expand_function_start either, as we warn instead in
6894 	 arm_function_arg_boundary in that case.  */
6895       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6896 	inform (input_location, "parameter passing for argument of type "
6897 		"%qT changed in GCC 7.1", type);
6898       else if (res > 0)
6899 	ncrn++;
6900     }
6901 
6902   nregs = ARM_NUM_REGS2(mode, type);
6903 
6904   /* Sigh, this test should really assert that nregs > 0, but a GCC
6905      extension allows empty structs and then gives them empty size; it
6906      then allows such a structure to be passed by value.  For some of
6907      the code below we have to pretend that such an argument has
6908      non-zero size so that we 'locate' it correctly either in
6909      registers or on the stack.  */
6910   gcc_assert (nregs >= 0);
6911 
6912   nregs2 = nregs ? nregs : 1;
6913 
6914   /* C4 - Argument fits entirely in core registers.  */
6915   if (ncrn + nregs2 <= NUM_ARG_REGS)
6916     {
6917       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6918       pcum->aapcs_next_ncrn = ncrn + nregs;
6919       return;
6920     }
6921 
6922   /* C5 - Some core registers left and there are no arguments already
6923      on the stack: split this argument between the remaining core
6924      registers and the stack.  */
6925   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6926     {
6927       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6928       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6929       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6930       return;
6931     }
6932 
6933   /* C6 - NCRN is set to 4.  */
6934   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6935 
6936   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6937   return;
6938 }
6939 
6940 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6941    for a call to a function whose data type is FNTYPE.
6942    For a library call, FNTYPE is NULL.  */
6943 void
arm_init_cumulative_args(CUMULATIVE_ARGS * pcum,tree fntype,rtx libname,tree fndecl ATTRIBUTE_UNUSED)6944 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6945 			  rtx libname,
6946 			  tree fndecl ATTRIBUTE_UNUSED)
6947 {
6948   /* Long call handling.  */
6949   if (fntype)
6950     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6951   else
6952     pcum->pcs_variant = arm_pcs_default;
6953 
6954   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6955     {
6956       if (arm_libcall_uses_aapcs_base (libname))
6957 	pcum->pcs_variant = ARM_PCS_AAPCS;
6958 
6959       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6960       pcum->aapcs_reg = NULL_RTX;
6961       pcum->aapcs_partial = 0;
6962       pcum->aapcs_arg_processed = false;
6963       pcum->aapcs_cprc_slot = -1;
6964       pcum->can_split = true;
6965 
6966       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6967 	{
6968 	  int i;
6969 
6970 	  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6971 	    {
6972 	      pcum->aapcs_cprc_failed[i] = false;
6973 	      aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6974 	    }
6975 	}
6976       return;
6977     }
6978 
6979   /* Legacy ABIs */
6980 
6981   /* On the ARM, the offset starts at 0.  */
6982   pcum->nregs = 0;
6983   pcum->iwmmxt_nregs = 0;
6984   pcum->can_split = true;
6985 
6986   /* Varargs vectors are treated the same as long long.
6987      named_count avoids having to change the way arm handles 'named' */
6988   pcum->named_count = 0;
6989   pcum->nargs = 0;
6990 
6991   if (TARGET_REALLY_IWMMXT && fntype)
6992     {
6993       tree fn_arg;
6994 
6995       for (fn_arg = TYPE_ARG_TYPES (fntype);
6996 	   fn_arg;
6997 	   fn_arg = TREE_CHAIN (fn_arg))
6998 	pcum->named_count += 1;
6999 
7000       if (! pcum->named_count)
7001 	pcum->named_count = INT_MAX;
7002     }
7003 }
7004 
7005 /* Return 2 if double word alignment is required for argument passing,
7006    but wasn't required before the fix for PR88469.
7007    Return 1 if double word alignment is required for argument passing.
7008    Return -1 if double word alignment used to be required for argument
7009    passing before PR77728 ABI fix, but is not required anymore.
7010    Return 0 if double word alignment is not required and wasn't requried
7011    before either.  */
7012 static int
arm_needs_doubleword_align(machine_mode mode,const_tree type)7013 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7014 {
7015   if (!type)
7016     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7017 
7018   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
7019   if (!AGGREGATE_TYPE_P (type))
7020     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7021 
7022   /* Array types: Use member alignment of element type.  */
7023   if (TREE_CODE (type) == ARRAY_TYPE)
7024     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7025 
7026   int ret = 0;
7027   int ret2 = 0;
7028   /* Record/aggregate types: Use greatest member alignment of any member.
7029 
7030      Note that we explicitly consider zero-sized fields here, even though
7031      they don't map to AAPCS machine types.  For example, in:
7032 
7033 	 struct __attribute__((aligned(8))) empty {};
7034 
7035 	 struct s {
7036 	   [[no_unique_address]] empty e;
7037 	   int x;
7038 	 };
7039 
7040      "s" contains only one Fundamental Data Type (the int field)
7041      but gains 8-byte alignment and size thanks to "e".  */
7042   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7043     if (DECL_ALIGN (field) > PARM_BOUNDARY)
7044       {
7045 	if (TREE_CODE (field) == FIELD_DECL)
7046 	  return 1;
7047 	else
7048 	  /* Before PR77728 fix, we were incorrectly considering also
7049 	     other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7050 	     Make sure we can warn about that with -Wpsabi.  */
7051 	  ret = -1;
7052       }
7053     else if (TREE_CODE (field) == FIELD_DECL
7054 	     && DECL_BIT_FIELD_TYPE (field)
7055 	     && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7056       ret2 = 1;
7057 
7058   if (ret2)
7059     return 2;
7060 
7061   return ret;
7062 }
7063 
7064 
7065 /* Determine where to put an argument to a function.
7066    Value is zero to push the argument on the stack,
7067    or a hard register in which to store the argument.
7068 
7069    CUM is a variable of type CUMULATIVE_ARGS which gives info about
7070     the preceding args and about the function being called.
7071    ARG is a description of the argument.
7072 
7073    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7074    other arguments are passed on the stack.  If (NAMED == 0) (which happens
7075    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7076    defined), say it is passed in the stack (function_prologue will
7077    indeed make it pass in the stack if necessary).  */
7078 
7079 static rtx
arm_function_arg(cumulative_args_t pcum_v,const function_arg_info & arg)7080 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7081 {
7082   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7083   int nregs;
7084 
7085   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
7086      a call insn (op3 of a call_value insn).  */
7087   if (arg.end_marker_p ())
7088     return const0_rtx;
7089 
7090   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7091     {
7092       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7093       return pcum->aapcs_reg;
7094     }
7095 
7096   /* Varargs vectors are treated the same as long long.
7097      named_count avoids having to change the way arm handles 'named' */
7098   if (TARGET_IWMMXT_ABI
7099       && arm_vector_mode_supported_p (arg.mode)
7100       && pcum->named_count > pcum->nargs + 1)
7101     {
7102       if (pcum->iwmmxt_nregs <= 9)
7103 	return gen_rtx_REG (arg.mode,
7104 			    pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7105       else
7106 	{
7107 	  pcum->can_split = false;
7108 	  return NULL_RTX;
7109 	}
7110     }
7111 
7112   /* Put doubleword aligned quantities in even register pairs.  */
7113   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7114     {
7115       int res = arm_needs_doubleword_align (arg.mode, arg.type);
7116       if (res < 0 && warn_psabi)
7117 	inform (input_location, "parameter passing for argument of type "
7118 		"%qT changed in GCC 7.1", arg.type);
7119       else if (res > 0)
7120 	{
7121 	  pcum->nregs++;
7122 	  if (res > 1 && warn_psabi)
7123 	    inform (input_location, "parameter passing for argument of type "
7124 		    "%qT changed in GCC 9.1", arg.type);
7125 	}
7126     }
7127 
7128   /* Only allow splitting an arg between regs and memory if all preceding
7129      args were allocated to regs.  For args passed by reference we only count
7130      the reference pointer.  */
7131   if (pcum->can_split)
7132     nregs = 1;
7133   else
7134     nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7135 
7136   if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7137     return NULL_RTX;
7138 
7139   return gen_rtx_REG (arg.mode, pcum->nregs);
7140 }
7141 
7142 static unsigned int
arm_function_arg_boundary(machine_mode mode,const_tree type)7143 arm_function_arg_boundary (machine_mode mode, const_tree type)
7144 {
7145   if (!ARM_DOUBLEWORD_ALIGN)
7146     return PARM_BOUNDARY;
7147 
7148   int res = arm_needs_doubleword_align (mode, type);
7149   if (res < 0 && warn_psabi)
7150     inform (input_location, "parameter passing for argument of type %qT "
7151 	    "changed in GCC 7.1", type);
7152   if (res > 1 && warn_psabi)
7153     inform (input_location, "parameter passing for argument of type "
7154 	    "%qT changed in GCC 9.1", type);
7155 
7156   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7157 }
7158 
7159 static int
arm_arg_partial_bytes(cumulative_args_t pcum_v,const function_arg_info & arg)7160 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7161 {
7162   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7163   int nregs = pcum->nregs;
7164 
7165   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7166     {
7167       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7168       return pcum->aapcs_partial;
7169     }
7170 
7171   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7172     return 0;
7173 
7174   if (NUM_ARG_REGS > nregs
7175       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7176       && pcum->can_split)
7177     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7178 
7179   return 0;
7180 }
7181 
7182 /* Update the data in PCUM to advance over argument ARG.  */
7183 
7184 static void
arm_function_arg_advance(cumulative_args_t pcum_v,const function_arg_info & arg)7185 arm_function_arg_advance (cumulative_args_t pcum_v,
7186 			  const function_arg_info &arg)
7187 {
7188   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7189 
7190   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7191     {
7192       aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7193 
7194       if (pcum->aapcs_cprc_slot >= 0)
7195 	{
7196 	  aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7197 							      arg.type);
7198 	  pcum->aapcs_cprc_slot = -1;
7199 	}
7200 
7201       /* Generic stuff.  */
7202       pcum->aapcs_arg_processed = false;
7203       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7204       pcum->aapcs_reg = NULL_RTX;
7205       pcum->aapcs_partial = 0;
7206     }
7207   else
7208     {
7209       pcum->nargs += 1;
7210       if (arm_vector_mode_supported_p (arg.mode)
7211 	  && pcum->named_count > pcum->nargs
7212 	  && TARGET_IWMMXT_ABI)
7213 	pcum->iwmmxt_nregs += 1;
7214       else
7215 	pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7216     }
7217 }
7218 
7219 /* Variable sized types are passed by reference.  This is a GCC
7220    extension to the ARM ABI.  */
7221 
7222 static bool
arm_pass_by_reference(cumulative_args_t,const function_arg_info & arg)7223 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7224 {
7225   return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7226 }
7227 
7228 /* Encode the current state of the #pragma [no_]long_calls.  */
7229 typedef enum
7230 {
7231   OFF,		/* No #pragma [no_]long_calls is in effect.  */
7232   LONG,		/* #pragma long_calls is in effect.  */
7233   SHORT		/* #pragma no_long_calls is in effect.  */
7234 } arm_pragma_enum;
7235 
7236 static arm_pragma_enum arm_pragma_long_calls = OFF;
7237 
7238 void
arm_pr_long_calls(struct cpp_reader * pfile ATTRIBUTE_UNUSED)7239 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7240 {
7241   arm_pragma_long_calls = LONG;
7242 }
7243 
7244 void
arm_pr_no_long_calls(struct cpp_reader * pfile ATTRIBUTE_UNUSED)7245 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7246 {
7247   arm_pragma_long_calls = SHORT;
7248 }
7249 
7250 void
arm_pr_long_calls_off(struct cpp_reader * pfile ATTRIBUTE_UNUSED)7251 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7252 {
7253   arm_pragma_long_calls = OFF;
7254 }
7255 
7256 /* Handle an attribute requiring a FUNCTION_DECL;
7257    arguments as in struct attribute_spec.handler.  */
7258 static tree
arm_handle_fndecl_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7259 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7260 			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7261 {
7262   if (TREE_CODE (*node) != FUNCTION_DECL)
7263     {
7264       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7265 	       name);
7266       *no_add_attrs = true;
7267     }
7268 
7269   return NULL_TREE;
7270 }
7271 
7272 /* Handle an "interrupt" or "isr" attribute;
7273    arguments as in struct attribute_spec.handler.  */
7274 static tree
arm_handle_isr_attribute(tree * node,tree name,tree args,int flags,bool * no_add_attrs)7275 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7276 			  bool *no_add_attrs)
7277 {
7278   if (DECL_P (*node))
7279     {
7280       if (TREE_CODE (*node) != FUNCTION_DECL)
7281 	{
7282 	  warning (OPT_Wattributes, "%qE attribute only applies to functions",
7283 		   name);
7284 	  *no_add_attrs = true;
7285 	}
7286       /* FIXME: the argument if any is checked for type attributes;
7287 	 should it be checked for decl ones?  */
7288     }
7289   else
7290     {
7291       if (TREE_CODE (*node) == FUNCTION_TYPE
7292 	  || TREE_CODE (*node) == METHOD_TYPE)
7293 	{
7294 	  if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7295 	    {
7296 	      warning (OPT_Wattributes, "%qE attribute ignored",
7297 		       name);
7298 	      *no_add_attrs = true;
7299 	    }
7300 	}
7301       else if (TREE_CODE (*node) == POINTER_TYPE
7302 	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7303 		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7304 	       && arm_isr_value (args) != ARM_FT_UNKNOWN)
7305 	{
7306 	  *node = build_variant_type_copy (*node);
7307 	  TREE_TYPE (*node) = build_type_attribute_variant
7308 	    (TREE_TYPE (*node),
7309 	     tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7310 	  *no_add_attrs = true;
7311 	}
7312       else
7313 	{
7314 	  /* Possibly pass this attribute on from the type to a decl.  */
7315 	  if (flags & ((int) ATTR_FLAG_DECL_NEXT
7316 		       | (int) ATTR_FLAG_FUNCTION_NEXT
7317 		       | (int) ATTR_FLAG_ARRAY_NEXT))
7318 	    {
7319 	      *no_add_attrs = true;
7320 	      return tree_cons (name, args, NULL_TREE);
7321 	    }
7322 	  else
7323 	    {
7324 	      warning (OPT_Wattributes, "%qE attribute ignored",
7325 		       name);
7326 	    }
7327 	}
7328     }
7329 
7330   return NULL_TREE;
7331 }
7332 
7333 /* Handle a "pcs" attribute; arguments as in struct
7334    attribute_spec.handler.  */
7335 static tree
arm_handle_pcs_attribute(tree * node ATTRIBUTE_UNUSED,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7336 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7337 			  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7338 {
7339   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7340     {
7341       warning (OPT_Wattributes, "%qE attribute ignored", name);
7342       *no_add_attrs = true;
7343     }
7344   return NULL_TREE;
7345 }
7346 
7347 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7348 /* Handle the "notshared" attribute.  This attribute is another way of
7349    requesting hidden visibility.  ARM's compiler supports
7350    "__declspec(notshared)"; we support the same thing via an
7351    attribute.  */
7352 
7353 static tree
arm_handle_notshared_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7354 arm_handle_notshared_attribute (tree *node,
7355 				tree name ATTRIBUTE_UNUSED,
7356 				tree args ATTRIBUTE_UNUSED,
7357 				int flags ATTRIBUTE_UNUSED,
7358 				bool *no_add_attrs)
7359 {
7360   tree decl = TYPE_NAME (*node);
7361 
7362   if (decl)
7363     {
7364       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7365       DECL_VISIBILITY_SPECIFIED (decl) = 1;
7366       *no_add_attrs = false;
7367     }
7368   return NULL_TREE;
7369 }
7370 #endif
7371 
7372 /* This function returns true if a function with declaration FNDECL and type
7373    FNTYPE uses the stack to pass arguments or return variables and false
7374    otherwise.  This is used for functions with the attributes
7375    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7376    diagnostic messages if the stack is used.  NAME is the name of the attribute
7377    used.  */
7378 
7379 static bool
cmse_func_args_or_return_in_stack(tree fndecl,tree name,tree fntype)7380 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7381 {
7382   function_args_iterator args_iter;
7383   CUMULATIVE_ARGS args_so_far_v;
7384   cumulative_args_t args_so_far;
7385   bool first_param = true;
7386   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7387 
7388   /* Error out if any argument is passed on the stack.  */
7389   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7390   args_so_far = pack_cumulative_args (&args_so_far_v);
7391   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7392     {
7393       rtx arg_rtx;
7394 
7395       prev_arg_type = arg_type;
7396       if (VOID_TYPE_P (arg_type))
7397 	continue;
7398 
7399       function_arg_info arg (arg_type, /*named=*/true);
7400       if (!first_param)
7401 	/* ??? We should advance after processing the argument and pass
7402 	   the argument we're advancing past.  */
7403 	arm_function_arg_advance (args_so_far, arg);
7404       arg_rtx = arm_function_arg (args_so_far, arg);
7405       if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7406 	{
7407 	  error ("%qE attribute not available to functions with arguments "
7408 		 "passed on the stack", name);
7409 	  return true;
7410 	}
7411       first_param = false;
7412     }
7413 
7414   /* Error out for variadic functions since we cannot control how many
7415      arguments will be passed and thus stack could be used.  stdarg_p () is not
7416      used for the checking to avoid browsing arguments twice.  */
7417   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7418     {
7419       error ("%qE attribute not available to functions with variable number "
7420 	     "of arguments", name);
7421       return true;
7422     }
7423 
7424   /* Error out if return value is passed on the stack.  */
7425   ret_type = TREE_TYPE (fntype);
7426   if (arm_return_in_memory (ret_type, fntype))
7427     {
7428       error ("%qE attribute not available to functions that return value on "
7429 	     "the stack", name);
7430       return true;
7431     }
7432   return false;
7433 }
7434 
7435 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7436    function will check whether the attribute is allowed here and will add the
7437    attribute to the function declaration tree or otherwise issue a warning.  */
7438 
7439 static tree
arm_handle_cmse_nonsecure_entry(tree * node,tree name,tree,int,bool * no_add_attrs)7440 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7441 				 tree /* args */,
7442 				 int /* flags */,
7443 				 bool *no_add_attrs)
7444 {
7445   tree fndecl;
7446 
7447   if (!use_cmse)
7448     {
7449       *no_add_attrs = true;
7450       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7451 	       "option.", name);
7452       return NULL_TREE;
7453     }
7454 
7455   /* Ignore attribute for function types.  */
7456   if (TREE_CODE (*node) != FUNCTION_DECL)
7457     {
7458       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7459 	       name);
7460       *no_add_attrs = true;
7461       return NULL_TREE;
7462     }
7463 
7464   fndecl = *node;
7465 
7466   /* Warn for static linkage functions.  */
7467   if (!TREE_PUBLIC (fndecl))
7468     {
7469       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7470 	       "with static linkage", name);
7471       *no_add_attrs = true;
7472       return NULL_TREE;
7473     }
7474 
7475   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7476 						TREE_TYPE (fndecl));
7477   return NULL_TREE;
7478 }
7479 
7480 
7481 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7482    function will check whether the attribute is allowed here and will add the
7483    attribute to the function type tree or otherwise issue a diagnostic.  The
7484    reason we check this at declaration time is to only allow the use of the
7485    attribute with declarations of function pointers and not function
7486    declarations.  This function checks NODE is of the expected type and issues
7487    diagnostics otherwise using NAME.  If it is not of the expected type
7488    *NO_ADD_ATTRS will be set to true.  */
7489 
7490 static tree
arm_handle_cmse_nonsecure_call(tree * node,tree name,tree,int,bool * no_add_attrs)7491 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7492 				 tree /* args */,
7493 				 int /* flags */,
7494 				 bool *no_add_attrs)
7495 {
7496   tree decl = NULL_TREE, fntype = NULL_TREE;
7497   tree type;
7498 
7499   if (!use_cmse)
7500     {
7501       *no_add_attrs = true;
7502       warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7503 	       "option.", name);
7504       return NULL_TREE;
7505     }
7506 
7507   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7508     {
7509       decl = *node;
7510       fntype = TREE_TYPE (decl);
7511     }
7512 
7513   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7514     fntype = TREE_TYPE (fntype);
7515 
7516   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7517     {
7518 	warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7519 		 "function pointer", name);
7520 	*no_add_attrs = true;
7521 	return NULL_TREE;
7522     }
7523 
7524   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7525 
7526   if (*no_add_attrs)
7527     return NULL_TREE;
7528 
7529   /* Prevent trees being shared among function types with and without
7530      cmse_nonsecure_call attribute.  */
7531   type = TREE_TYPE (decl);
7532 
7533   type = build_distinct_type_copy (type);
7534   TREE_TYPE (decl) = type;
7535   fntype = type;
7536 
7537   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7538     {
7539       type = fntype;
7540       fntype = TREE_TYPE (fntype);
7541       fntype = build_distinct_type_copy (fntype);
7542       TREE_TYPE (type) = fntype;
7543     }
7544 
7545   /* Construct a type attribute and add it to the function type.  */
7546   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7547 			  TYPE_ATTRIBUTES (fntype));
7548   TYPE_ATTRIBUTES (fntype) = attrs;
7549   return NULL_TREE;
7550 }
7551 
7552 /* Return 0 if the attributes for two types are incompatible, 1 if they
7553    are compatible, and 2 if they are nearly compatible (which causes a
7554    warning to be generated).  */
7555 static int
arm_comp_type_attributes(const_tree type1,const_tree type2)7556 arm_comp_type_attributes (const_tree type1, const_tree type2)
7557 {
7558   int l1, l2, s1, s2;
7559 
7560   /* Check for mismatch of non-default calling convention.  */
7561   if (TREE_CODE (type1) != FUNCTION_TYPE)
7562     return 1;
7563 
7564   /* Check for mismatched call attributes.  */
7565   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7566   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7567   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7568   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7569 
7570   /* Only bother to check if an attribute is defined.  */
7571   if (l1 | l2 | s1 | s2)
7572     {
7573       /* If one type has an attribute, the other must have the same attribute.  */
7574       if ((l1 != l2) || (s1 != s2))
7575 	return 0;
7576 
7577       /* Disallow mixed attributes.  */
7578       if ((l1 & s2) || (l2 & s1))
7579 	return 0;
7580     }
7581 
7582   /* Check for mismatched ISR attribute.  */
7583   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7584   if (! l1)
7585     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7586   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7587   if (! l2)
7588     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7589   if (l1 != l2)
7590     return 0;
7591 
7592   l1 = lookup_attribute ("cmse_nonsecure_call",
7593 			 TYPE_ATTRIBUTES (type1)) != NULL;
7594   l2 = lookup_attribute ("cmse_nonsecure_call",
7595 			 TYPE_ATTRIBUTES (type2)) != NULL;
7596 
7597   if (l1 != l2)
7598     return 0;
7599 
7600   return 1;
7601 }
7602 
7603 /*  Assigns default attributes to newly defined type.  This is used to
7604     set short_call/long_call attributes for function types of
7605     functions defined inside corresponding #pragma scopes.  */
7606 static void
arm_set_default_type_attributes(tree type)7607 arm_set_default_type_attributes (tree type)
7608 {
7609   /* Add __attribute__ ((long_call)) to all functions, when
7610      inside #pragma long_calls or __attribute__ ((short_call)),
7611      when inside #pragma no_long_calls.  */
7612   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7613     {
7614       tree type_attr_list, attr_name;
7615       type_attr_list = TYPE_ATTRIBUTES (type);
7616 
7617       if (arm_pragma_long_calls == LONG)
7618  	attr_name = get_identifier ("long_call");
7619       else if (arm_pragma_long_calls == SHORT)
7620  	attr_name = get_identifier ("short_call");
7621       else
7622  	return;
7623 
7624       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7625       TYPE_ATTRIBUTES (type) = type_attr_list;
7626     }
7627 }
7628 
7629 /* Return true if DECL is known to be linked into section SECTION.  */
7630 
7631 static bool
arm_function_in_section_p(tree decl,section * section)7632 arm_function_in_section_p (tree decl, section *section)
7633 {
7634   /* We can only be certain about the prevailing symbol definition.  */
7635   if (!decl_binds_to_current_def_p (decl))
7636     return false;
7637 
7638   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7639   if (!DECL_SECTION_NAME (decl))
7640     {
7641       /* Make sure that we will not create a unique section for DECL.  */
7642       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7643 	return false;
7644     }
7645 
7646   return function_section (decl) == section;
7647 }
7648 
7649 /* Return nonzero if a 32-bit "long_call" should be generated for
7650    a call from the current function to DECL.  We generate a long_call
7651    if the function:
7652 
7653         a.  has an __attribute__((long call))
7654      or b.  is within the scope of a #pragma long_calls
7655      or c.  the -mlong-calls command line switch has been specified
7656 
7657    However we do not generate a long call if the function:
7658 
7659         d.  has an __attribute__ ((short_call))
7660      or e.  is inside the scope of a #pragma no_long_calls
7661      or f.  is defined in the same section as the current function.  */
7662 
7663 bool
arm_is_long_call_p(tree decl)7664 arm_is_long_call_p (tree decl)
7665 {
7666   tree attrs;
7667 
7668   if (!decl)
7669     return TARGET_LONG_CALLS;
7670 
7671   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7672   if (lookup_attribute ("short_call", attrs))
7673     return false;
7674 
7675   /* For "f", be conservative, and only cater for cases in which the
7676      whole of the current function is placed in the same section.  */
7677   if (!flag_reorder_blocks_and_partition
7678       && TREE_CODE (decl) == FUNCTION_DECL
7679       && arm_function_in_section_p (decl, current_function_section ()))
7680     return false;
7681 
7682   if (lookup_attribute ("long_call", attrs))
7683     return true;
7684 
7685   return TARGET_LONG_CALLS;
7686 }
7687 
7688 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7689 static bool
arm_function_ok_for_sibcall(tree decl,tree exp)7690 arm_function_ok_for_sibcall (tree decl, tree exp)
7691 {
7692   unsigned long func_type;
7693 
7694   if (cfun->machine->sibcall_blocked)
7695     return false;
7696 
7697   if (TARGET_FDPIC)
7698     {
7699       /* In FDPIC, never tailcall something for which we have no decl:
7700 	 the target function could be in a different module, requiring
7701 	 a different FDPIC register value.  */
7702       if (decl == NULL)
7703 	return false;
7704     }
7705 
7706   /* Never tailcall something if we are generating code for Thumb-1.  */
7707   if (TARGET_THUMB1)
7708     return false;
7709 
7710   /* The PIC register is live on entry to VxWorks PLT entries, so we
7711      must make the call before restoring the PIC register.  */
7712   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7713     return false;
7714 
7715   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7716      may be used both as target of the call and base register for restoring
7717      the VFP registers  */
7718   if (TARGET_APCS_FRAME && TARGET_ARM
7719       && TARGET_HARD_FLOAT
7720       && decl && arm_is_long_call_p (decl))
7721     return false;
7722 
7723   /* If we are interworking and the function is not declared static
7724      then we can't tail-call it unless we know that it exists in this
7725      compilation unit (since it might be a Thumb routine).  */
7726   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7727       && !TREE_ASM_WRITTEN (decl))
7728     return false;
7729 
7730   func_type = arm_current_func_type ();
7731   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7732   if (IS_INTERRUPT (func_type))
7733     return false;
7734 
7735   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7736      generated for entry functions themselves.  */
7737   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7738     return false;
7739 
7740   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7741      this would complicate matters for later code generation.  */
7742   if (TREE_CODE (exp) == CALL_EXPR)
7743     {
7744       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7745       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7746 	return false;
7747     }
7748 
7749   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7750     {
7751       /* Check that the return value locations are the same.  For
7752 	 example that we aren't returning a value from the sibling in
7753 	 a VFP register but then need to transfer it to a core
7754 	 register.  */
7755       rtx a, b;
7756       tree decl_or_type = decl;
7757 
7758       /* If it is an indirect function pointer, get the function type.  */
7759       if (!decl)
7760 	decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7761 
7762       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7763       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7764 			      cfun->decl, false);
7765       if (!rtx_equal_p (a, b))
7766 	return false;
7767     }
7768 
7769   /* Never tailcall if function may be called with a misaligned SP.  */
7770   if (IS_STACKALIGN (func_type))
7771     return false;
7772 
7773   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7774      references should become a NOP.  Don't convert such calls into
7775      sibling calls.  */
7776   if (TARGET_AAPCS_BASED
7777       && arm_abi == ARM_ABI_AAPCS
7778       && decl
7779       && DECL_WEAK (decl))
7780     return false;
7781 
7782   /* We cannot do a tailcall for an indirect call by descriptor if all the
7783      argument registers are used because the only register left to load the
7784      address is IP and it will already contain the static chain.  */
7785   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7786     {
7787       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7788       CUMULATIVE_ARGS cum;
7789       cumulative_args_t cum_v;
7790 
7791       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7792       cum_v = pack_cumulative_args (&cum);
7793 
7794       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7795 	{
7796 	  tree type = TREE_VALUE (t);
7797 	  if (!VOID_TYPE_P (type))
7798 	    {
7799 	      function_arg_info arg (type, /*named=*/true);
7800 	      arm_function_arg_advance (cum_v, arg);
7801 	    }
7802 	}
7803 
7804       function_arg_info arg (integer_type_node, /*named=*/true);
7805       if (!arm_function_arg (cum_v, arg))
7806 	return false;
7807     }
7808 
7809   /* Everything else is ok.  */
7810   return true;
7811 }
7812 
7813 
7814 /* Addressing mode support functions.  */
7815 
7816 /* Return nonzero if X is a legitimate immediate operand when compiling
7817    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7818 int
legitimate_pic_operand_p(rtx x)7819 legitimate_pic_operand_p (rtx x)
7820 {
7821   if (GET_CODE (x) == SYMBOL_REF
7822       || (GET_CODE (x) == CONST
7823 	  && GET_CODE (XEXP (x, 0)) == PLUS
7824 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7825     return 0;
7826 
7827   return 1;
7828 }
7829 
7830 /* Record that the current function needs a PIC register.  If PIC_REG is null,
7831    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
7832    both case cfun->machine->pic_reg is initialized if we have not already done
7833    so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
7834    PIC register is reloaded in the current position of the instruction stream
7835    irregardless of whether it was loaded before.  Otherwise, it is only loaded
7836    if not already done so (crtl->uses_pic_offset_table is null).  Note that
7837    nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7838    is only supported iff COMPUTE_NOW is false.  */
7839 
7840 static void
require_pic_register(rtx pic_reg,bool compute_now)7841 require_pic_register (rtx pic_reg, bool compute_now)
7842 {
7843   gcc_assert (compute_now == (pic_reg != NULL_RTX));
7844 
7845   /* A lot of the logic here is made obscure by the fact that this
7846      routine gets called as part of the rtx cost estimation process.
7847      We don't want those calls to affect any assumptions about the real
7848      function; and further, we can't call entry_of_function() until we
7849      start the real expansion process.  */
7850   if (!crtl->uses_pic_offset_table || compute_now)
7851     {
7852       gcc_assert (can_create_pseudo_p ()
7853 		  || (pic_reg != NULL_RTX
7854 		      && REG_P (pic_reg)
7855 		      && GET_MODE (pic_reg) == Pmode));
7856       if (arm_pic_register != INVALID_REGNUM
7857 	  && !compute_now
7858 	  && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7859 	{
7860 	  if (!cfun->machine->pic_reg)
7861 	    cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7862 
7863 	  /* Play games to avoid marking the function as needing pic
7864 	     if we are being called as part of the cost-estimation
7865 	     process.  */
7866 	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7867 	    crtl->uses_pic_offset_table = 1;
7868 	}
7869       else
7870 	{
7871 	  rtx_insn *seq, *insn;
7872 
7873 	  if (pic_reg == NULL_RTX)
7874 	    pic_reg = gen_reg_rtx (Pmode);
7875 	  if (!cfun->machine->pic_reg)
7876 	    cfun->machine->pic_reg = pic_reg;
7877 
7878 	  /* Play games to avoid marking the function as needing pic
7879 	     if we are being called as part of the cost-estimation
7880 	     process.  */
7881 	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7882 	    {
7883 	      crtl->uses_pic_offset_table = 1;
7884 	      start_sequence ();
7885 
7886 	      if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7887 		  && arm_pic_register > LAST_LO_REGNUM
7888 		  && !compute_now)
7889 		emit_move_insn (cfun->machine->pic_reg,
7890 				gen_rtx_REG (Pmode, arm_pic_register));
7891 	      else
7892 		arm_load_pic_register (0UL, pic_reg);
7893 
7894 	      seq = get_insns ();
7895 	      end_sequence ();
7896 
7897 	      for (insn = seq; insn; insn = NEXT_INSN (insn))
7898 		if (INSN_P (insn))
7899 		  INSN_LOCATION (insn) = prologue_location;
7900 
7901 	      /* We can be called during expansion of PHI nodes, where
7902 	         we can't yet emit instructions directly in the final
7903 		 insn stream.  Queue the insns on the entry edge, they will
7904 		 be committed after everything else is expanded.  */
7905 	      if (currently_expanding_to_rtl)
7906 		insert_insn_on_edge (seq,
7907 				     single_succ_edge
7908 				     (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7909 	      else
7910 		emit_insn (seq);
7911 	    }
7912 	}
7913     }
7914 }
7915 
7916 /* Generate insns to calculate the address of ORIG in pic mode.  */
7917 static rtx_insn *
calculate_pic_address_constant(rtx reg,rtx pic_reg,rtx orig)7918 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
7919 {
7920   rtx pat;
7921   rtx mem;
7922 
7923   pat = gen_calculate_pic_address (reg, pic_reg, orig);
7924 
7925   /* Make the MEM as close to a constant as possible.  */
7926   mem = SET_SRC (pat);
7927   gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7928   MEM_READONLY_P (mem) = 1;
7929   MEM_NOTRAP_P (mem) = 1;
7930 
7931   return emit_insn (pat);
7932 }
7933 
7934 /* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
7935    created to hold the result of the load.  If not NULL, PIC_REG indicates
7936    which register to use as PIC register, otherwise it is decided by register
7937    allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
7938    location in the instruction stream, irregardless of whether it was loaded
7939    previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7940    true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7941 
7942    Returns the register REG into which the PIC load is performed.  */
7943 
7944 rtx
legitimize_pic_address(rtx orig,machine_mode mode,rtx reg,rtx pic_reg,bool compute_now)7945 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7946 			bool compute_now)
7947 {
7948   gcc_assert (compute_now == (pic_reg != NULL_RTX));
7949 
7950   if (GET_CODE (orig) == SYMBOL_REF
7951       || GET_CODE (orig) == LABEL_REF)
7952     {
7953       if (reg == 0)
7954 	{
7955 	  gcc_assert (can_create_pseudo_p ());
7956 	  reg = gen_reg_rtx (Pmode);
7957 	}
7958 
7959       /* VxWorks does not impose a fixed gap between segments; the run-time
7960 	 gap can be different from the object-file gap.  We therefore can't
7961 	 use GOTOFF unless we are absolutely sure that the symbol is in the
7962 	 same segment as the GOT.  Unfortunately, the flexibility of linker
7963 	 scripts means that we can't be sure of that in general, so assume
7964 	 that GOTOFF is never valid on VxWorks.  */
7965       /* References to weak symbols cannot be resolved locally: they
7966 	 may be overridden by a non-weak definition at link time.  */
7967       rtx_insn *insn;
7968       if ((GET_CODE (orig) == LABEL_REF
7969 	   || (GET_CODE (orig) == SYMBOL_REF
7970 	       && SYMBOL_REF_LOCAL_P (orig)
7971 	       && (SYMBOL_REF_DECL (orig)
7972 		   ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
7973 	       && (!SYMBOL_REF_FUNCTION_P (orig)
7974 		   || arm_fdpic_local_funcdesc_p (orig))))
7975 	  && NEED_GOT_RELOC
7976 	  && arm_pic_data_is_text_relative)
7977 	insn = arm_pic_static_addr (orig, reg);
7978       else
7979 	{
7980 	  /* If this function doesn't have a pic register, create one now.  */
7981 	  require_pic_register (pic_reg, compute_now);
7982 
7983 	  if (pic_reg == NULL_RTX)
7984 	    pic_reg = cfun->machine->pic_reg;
7985 
7986 	  insn = calculate_pic_address_constant (reg, pic_reg, orig);
7987 	}
7988 
7989       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7990 	 by loop.  */
7991       set_unique_reg_note (insn, REG_EQUAL, orig);
7992 
7993       return reg;
7994     }
7995   else if (GET_CODE (orig) == CONST)
7996     {
7997       rtx base, offset;
7998 
7999       if (GET_CODE (XEXP (orig, 0)) == PLUS
8000 	  && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8001 	return orig;
8002 
8003       /* Handle the case where we have: const (UNSPEC_TLS).  */
8004       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8005 	  && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8006 	return orig;
8007 
8008       /* Handle the case where we have:
8009          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
8010          CONST_INT.  */
8011       if (GET_CODE (XEXP (orig, 0)) == PLUS
8012           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8013           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8014         {
8015 	  gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8016 	  return orig;
8017 	}
8018 
8019       if (reg == 0)
8020 	{
8021 	  gcc_assert (can_create_pseudo_p ());
8022 	  reg = gen_reg_rtx (Pmode);
8023 	}
8024 
8025       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8026 
8027       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8028 				     pic_reg, compute_now);
8029       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8030 				       base == reg ? 0 : reg, pic_reg,
8031 				       compute_now);
8032 
8033       if (CONST_INT_P (offset))
8034 	{
8035 	  /* The base register doesn't really matter, we only want to
8036 	     test the index for the appropriate mode.  */
8037 	  if (!arm_legitimate_index_p (mode, offset, SET, 0))
8038 	    {
8039 	      gcc_assert (can_create_pseudo_p ());
8040 	      offset = force_reg (Pmode, offset);
8041 	    }
8042 
8043 	  if (CONST_INT_P (offset))
8044 	    return plus_constant (Pmode, base, INTVAL (offset));
8045 	}
8046 
8047       if (GET_MODE_SIZE (mode) > 4
8048 	  && (GET_MODE_CLASS (mode) == MODE_INT
8049 	      || TARGET_SOFT_FLOAT))
8050 	{
8051 	  emit_insn (gen_addsi3 (reg, base, offset));
8052 	  return reg;
8053 	}
8054 
8055       return gen_rtx_PLUS (Pmode, base, offset);
8056     }
8057 
8058   return orig;
8059 }
8060 
8061 
8062 /* Whether a register is callee saved or not.  This is necessary because high
8063    registers are marked as caller saved when optimizing for size on Thumb-1
8064    targets despite being callee saved in order to avoid using them.  */
8065 #define callee_saved_reg_p(reg) \
8066   (!call_used_or_fixed_reg_p (reg) \
8067    || (TARGET_THUMB1 && optimize_size \
8068        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8069 
8070 /* Return a mask for the call-clobbered low registers that are unused
8071    at the end of the prologue.  */
8072 static unsigned long
thumb1_prologue_unused_call_clobbered_lo_regs(void)8073 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8074 {
8075   unsigned long mask = 0;
8076   bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8077 
8078   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8079     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8080       mask |= 1 << (reg - FIRST_LO_REGNUM);
8081   return mask;
8082 }
8083 
8084 /* Similarly for the start of the epilogue.  */
8085 static unsigned long
thumb1_epilogue_unused_call_clobbered_lo_regs(void)8086 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8087 {
8088   unsigned long mask = 0;
8089   bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8090 
8091   for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8092     if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8093       mask |= 1 << (reg - FIRST_LO_REGNUM);
8094   return mask;
8095 }
8096 
8097 /* Find a spare register to use during the prolog of a function.  */
8098 
8099 static int
thumb_find_work_register(unsigned long pushed_regs_mask)8100 thumb_find_work_register (unsigned long pushed_regs_mask)
8101 {
8102   int reg;
8103 
8104   unsigned long unused_regs
8105     = thumb1_prologue_unused_call_clobbered_lo_regs ();
8106 
8107   /* Check the argument registers first as these are call-used.  The
8108      register allocation order means that sometimes r3 might be used
8109      but earlier argument registers might not, so check them all.  */
8110   for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8111     if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8112       return reg;
8113 
8114   /* Otherwise look for a call-saved register that is going to be pushed.  */
8115   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8116     if (pushed_regs_mask & (1 << reg))
8117       return reg;
8118 
8119   if (TARGET_THUMB2)
8120     {
8121       /* Thumb-2 can use high regs.  */
8122       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8123 	if (pushed_regs_mask & (1 << reg))
8124 	  return reg;
8125     }
8126   /* Something went wrong - thumb_compute_save_reg_mask()
8127      should have arranged for a suitable register to be pushed.  */
8128   gcc_unreachable ();
8129 }
8130 
8131 static GTY(()) int pic_labelno;
8132 
8133 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
8134    low register.  */
8135 
8136 void
arm_load_pic_register(unsigned long saved_regs ATTRIBUTE_UNUSED,rtx pic_reg)8137 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8138 {
8139   rtx l1, labelno, pic_tmp, pic_rtx;
8140 
8141   if (crtl->uses_pic_offset_table == 0
8142       || TARGET_SINGLE_PIC_BASE
8143       || TARGET_FDPIC)
8144     return;
8145 
8146   gcc_assert (flag_pic);
8147 
8148   if (pic_reg == NULL_RTX)
8149     pic_reg = cfun->machine->pic_reg;
8150   if (TARGET_VXWORKS_RTP)
8151     {
8152       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8153       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8154       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8155 
8156       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8157 
8158       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8159       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8160     }
8161   else
8162     {
8163       /* We use an UNSPEC rather than a LABEL_REF because this label
8164 	 never appears in the code stream.  */
8165 
8166       labelno = GEN_INT (pic_labelno++);
8167       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8168       l1 = gen_rtx_CONST (VOIDmode, l1);
8169 
8170       /* On the ARM the PC register contains 'dot + 8' at the time of the
8171 	 addition, on the Thumb it is 'dot + 4'.  */
8172       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8173       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8174 				UNSPEC_GOTSYM_OFF);
8175       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8176 
8177       if (TARGET_32BIT)
8178 	{
8179 	  emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8180 	}
8181       else /* TARGET_THUMB1 */
8182 	{
8183 	  if (arm_pic_register != INVALID_REGNUM
8184 	      && REGNO (pic_reg) > LAST_LO_REGNUM)
8185 	    {
8186 	      /* We will have pushed the pic register, so we should always be
8187 		 able to find a work register.  */
8188 	      pic_tmp = gen_rtx_REG (SImode,
8189 				     thumb_find_work_register (saved_regs));
8190 	      emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8191 	      emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8192 	      emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8193 	    }
8194 	  else if (arm_pic_register != INVALID_REGNUM
8195 		   && arm_pic_register > LAST_LO_REGNUM
8196 		   && REGNO (pic_reg) <= LAST_LO_REGNUM)
8197 	    {
8198 	      emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8199 	      emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8200 	      emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8201 	    }
8202 	  else
8203 	    emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8204 	}
8205     }
8206 
8207   /* Need to emit this whether or not we obey regdecls,
8208      since setjmp/longjmp can cause life info to screw up.  */
8209   emit_use (pic_reg);
8210 }
8211 
8212 /* Try to determine whether an object, referenced via ORIG, will be
8213    placed in the text or data segment.  This is used in FDPIC mode, to
8214    decide which relocations to use when accessing ORIG.  *IS_READONLY
8215    is set to true if ORIG is a read-only location, false otherwise.
8216    Return true if we could determine the location of ORIG, false
8217    otherwise.  *IS_READONLY is valid only when we return true.  */
8218 static bool
arm_is_segment_info_known(rtx orig,bool * is_readonly)8219 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8220 {
8221   *is_readonly = false;
8222 
8223   if (GET_CODE (orig) == LABEL_REF)
8224     {
8225       *is_readonly = true;
8226       return true;
8227     }
8228 
8229   if (SYMBOL_REF_P (orig))
8230     {
8231       if (CONSTANT_POOL_ADDRESS_P (orig))
8232 	{
8233 	  *is_readonly = true;
8234 	  return true;
8235 	}
8236       if (SYMBOL_REF_LOCAL_P (orig)
8237 	  && !SYMBOL_REF_EXTERNAL_P (orig)
8238 	  && SYMBOL_REF_DECL (orig)
8239 	  && (!DECL_P (SYMBOL_REF_DECL (orig))
8240 	      || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8241 	{
8242 	  tree decl = SYMBOL_REF_DECL (orig);
8243 	  tree init = (TREE_CODE (decl) == VAR_DECL)
8244 	    ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8245 	    ? decl : 0;
8246 	  int reloc = 0;
8247 	  bool named_section, readonly;
8248 
8249 	  if (init && init != error_mark_node)
8250 	    reloc = compute_reloc_for_constant (init);
8251 
8252 	  named_section = TREE_CODE (decl) == VAR_DECL
8253 	    && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8254 	  readonly = decl_readonly_section (decl, reloc);
8255 
8256 	  /* We don't know where the link script will put a named
8257 	     section, so return false in such a case.  */
8258 	  if (named_section)
8259 	    return false;
8260 
8261 	  *is_readonly = readonly;
8262 	  return true;
8263 	}
8264 
8265       /* We don't know.  */
8266       return false;
8267     }
8268 
8269   gcc_unreachable ();
8270 }
8271 
8272 /* Generate code to load the address of a static var when flag_pic is set.  */
8273 static rtx_insn *
arm_pic_static_addr(rtx orig,rtx reg)8274 arm_pic_static_addr (rtx orig, rtx reg)
8275 {
8276   rtx l1, labelno, offset_rtx;
8277   rtx_insn *insn;
8278 
8279   gcc_assert (flag_pic);
8280 
8281   bool is_readonly = false;
8282   bool info_known = false;
8283 
8284   if (TARGET_FDPIC
8285       && SYMBOL_REF_P (orig)
8286       && !SYMBOL_REF_FUNCTION_P (orig))
8287     info_known = arm_is_segment_info_known (orig, &is_readonly);
8288 
8289   if (TARGET_FDPIC
8290       && SYMBOL_REF_P (orig)
8291       && !SYMBOL_REF_FUNCTION_P (orig)
8292       && !info_known)
8293     {
8294       /* We don't know where orig is stored, so we have be
8295 	 pessimistic and use a GOT relocation.  */
8296       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8297 
8298       insn = calculate_pic_address_constant (reg, pic_reg, orig);
8299     }
8300   else if (TARGET_FDPIC
8301 	   && SYMBOL_REF_P (orig)
8302 	   && (SYMBOL_REF_FUNCTION_P (orig)
8303 	       || !is_readonly))
8304     {
8305       /* We use the GOTOFF relocation.  */
8306       rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8307 
8308       rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8309       emit_insn (gen_movsi (reg, l1));
8310       insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8311     }
8312   else
8313     {
8314       /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8315 	 PC-relative access.  */
8316       /* We use an UNSPEC rather than a LABEL_REF because this label
8317 	 never appears in the code stream.  */
8318       labelno = GEN_INT (pic_labelno++);
8319       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8320       l1 = gen_rtx_CONST (VOIDmode, l1);
8321 
8322       /* On the ARM the PC register contains 'dot + 8' at the time of the
8323 	 addition, on the Thumb it is 'dot + 4'.  */
8324       offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8325       offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8326 				   UNSPEC_SYMBOL_OFFSET);
8327       offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8328 
8329       insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8330 						   labelno));
8331     }
8332 
8333   return insn;
8334 }
8335 
8336 /* Return nonzero if X is valid as an ARM state addressing register.  */
8337 static int
arm_address_register_rtx_p(rtx x,int strict_p)8338 arm_address_register_rtx_p (rtx x, int strict_p)
8339 {
8340   int regno;
8341 
8342   if (!REG_P (x))
8343     return 0;
8344 
8345   regno = REGNO (x);
8346 
8347   if (strict_p)
8348     return ARM_REGNO_OK_FOR_BASE_P (regno);
8349 
8350   return (regno <= LAST_ARM_REGNUM
8351 	  || regno >= FIRST_PSEUDO_REGISTER
8352 	  || regno == FRAME_POINTER_REGNUM
8353 	  || regno == ARG_POINTER_REGNUM);
8354 }
8355 
8356 /* Return TRUE if this rtx is the difference of a symbol and a label,
8357    and will reduce to a PC-relative relocation in the object file.
8358    Expressions like this can be left alone when generating PIC, rather
8359    than forced through the GOT.  */
8360 static int
pcrel_constant_p(rtx x)8361 pcrel_constant_p (rtx x)
8362 {
8363   if (GET_CODE (x) == MINUS)
8364     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8365 
8366   return FALSE;
8367 }
8368 
8369 /* Return true if X will surely end up in an index register after next
8370    splitting pass.  */
8371 static bool
will_be_in_index_register(const_rtx x)8372 will_be_in_index_register (const_rtx x)
8373 {
8374   /* arm.md: calculate_pic_address will split this into a register.  */
8375   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8376 }
8377 
8378 /* Return nonzero if X is a valid ARM state address operand.  */
8379 int
arm_legitimate_address_outer_p(machine_mode mode,rtx x,RTX_CODE outer,int strict_p)8380 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8381 			        int strict_p)
8382 {
8383   bool use_ldrd;
8384   enum rtx_code code = GET_CODE (x);
8385 
8386   if (arm_address_register_rtx_p (x, strict_p))
8387     return 1;
8388 
8389   use_ldrd = (TARGET_LDRD
8390 	      && (mode == DImode || mode == DFmode));
8391 
8392   if (code == POST_INC || code == PRE_DEC
8393       || ((code == PRE_INC || code == POST_DEC)
8394 	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8395     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8396 
8397   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8398 	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8399 	   && GET_CODE (XEXP (x, 1)) == PLUS
8400 	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8401     {
8402       rtx addend = XEXP (XEXP (x, 1), 1);
8403 
8404       /* Don't allow ldrd post increment by register because it's hard
8405 	 to fixup invalid register choices.  */
8406       if (use_ldrd
8407 	  && GET_CODE (x) == POST_MODIFY
8408 	  && REG_P (addend))
8409 	return 0;
8410 
8411       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8412 	      && arm_legitimate_index_p (mode, addend, outer, strict_p));
8413     }
8414 
8415   /* After reload constants split into minipools will have addresses
8416      from a LABEL_REF.  */
8417   else if (reload_completed
8418 	   && (code == LABEL_REF
8419 	       || (code == CONST
8420 		   && GET_CODE (XEXP (x, 0)) == PLUS
8421 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8422 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8423     return 1;
8424 
8425   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8426     return 0;
8427 
8428   else if (code == PLUS)
8429     {
8430       rtx xop0 = XEXP (x, 0);
8431       rtx xop1 = XEXP (x, 1);
8432 
8433       return ((arm_address_register_rtx_p (xop0, strict_p)
8434 	       && ((CONST_INT_P (xop1)
8435 		    && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8436 		   || (!strict_p && will_be_in_index_register (xop1))))
8437 	      || (arm_address_register_rtx_p (xop1, strict_p)
8438 		  && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8439     }
8440 
8441 #if 0
8442   /* Reload currently can't handle MINUS, so disable this for now */
8443   else if (GET_CODE (x) == MINUS)
8444     {
8445       rtx xop0 = XEXP (x, 0);
8446       rtx xop1 = XEXP (x, 1);
8447 
8448       return (arm_address_register_rtx_p (xop0, strict_p)
8449 	      && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8450     }
8451 #endif
8452 
8453   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8454 	   && code == SYMBOL_REF
8455 	   && CONSTANT_POOL_ADDRESS_P (x)
8456 	   && ! (flag_pic
8457 		 && symbol_mentioned_p (get_pool_constant (x))
8458 		 && ! pcrel_constant_p (get_pool_constant (x))))
8459     return 1;
8460 
8461   return 0;
8462 }
8463 
8464 /* Return true if we can avoid creating a constant pool entry for x.  */
8465 static bool
can_avoid_literal_pool_for_label_p(rtx x)8466 can_avoid_literal_pool_for_label_p (rtx x)
8467 {
8468   /* Normally we can assign constant values to target registers without
8469      the help of constant pool.  But there are cases we have to use constant
8470      pool like:
8471      1) assign a label to register.
8472      2) sign-extend a 8bit value to 32bit and then assign to register.
8473 
8474      Constant pool access in format:
8475      (set (reg r0) (mem (symbol_ref (".LC0"))))
8476      will cause the use of literal pool (later in function arm_reorg).
8477      So here we mark such format as an invalid format, then the compiler
8478      will adjust it into:
8479      (set (reg r0) (symbol_ref (".LC0")))
8480      (set (reg r0) (mem (reg r0))).
8481      No extra register is required, and (mem (reg r0)) won't cause the use
8482      of literal pools.  */
8483   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
8484       && CONSTANT_POOL_ADDRESS_P (x))
8485     return 1;
8486   return 0;
8487 }
8488 
8489 
8490 /* Return nonzero if X is a valid Thumb-2 address operand.  */
8491 static int
thumb2_legitimate_address_p(machine_mode mode,rtx x,int strict_p)8492 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8493 {
8494   bool use_ldrd;
8495   enum rtx_code code = GET_CODE (x);
8496 
8497   if (TARGET_HAVE_MVE
8498       && (mode == V8QImode || mode == E_V4QImode || mode == V4HImode))
8499     return mve_vector_mem_operand (mode, x, strict_p);
8500 
8501   if (arm_address_register_rtx_p (x, strict_p))
8502     return 1;
8503 
8504   use_ldrd = (TARGET_LDRD
8505 	      && (mode == DImode || mode == DFmode));
8506 
8507   if (code == POST_INC || code == PRE_DEC
8508       || ((code == PRE_INC || code == POST_DEC)
8509 	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8510     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8511 
8512   else if ((code == POST_MODIFY || code == PRE_MODIFY)
8513 	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8514 	   && GET_CODE (XEXP (x, 1)) == PLUS
8515 	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8516     {
8517       /* Thumb-2 only has autoincrement by constant.  */
8518       rtx addend = XEXP (XEXP (x, 1), 1);
8519       HOST_WIDE_INT offset;
8520 
8521       if (!CONST_INT_P (addend))
8522 	return 0;
8523 
8524       offset = INTVAL(addend);
8525       if (GET_MODE_SIZE (mode) <= 4)
8526 	return (offset > -256 && offset < 256);
8527 
8528       return (use_ldrd && offset > -1024 && offset < 1024
8529 	      && (offset & 3) == 0);
8530     }
8531 
8532   /* After reload constants split into minipools will have addresses
8533      from a LABEL_REF.  */
8534   else if (reload_completed
8535 	   && (code == LABEL_REF
8536 	       || (code == CONST
8537 		   && GET_CODE (XEXP (x, 0)) == PLUS
8538 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8539 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8540     return 1;
8541 
8542   else if (mode == TImode
8543 	   || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8544 	   || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8545     return 0;
8546 
8547   else if (code == PLUS)
8548     {
8549       rtx xop0 = XEXP (x, 0);
8550       rtx xop1 = XEXP (x, 1);
8551 
8552       return ((arm_address_register_rtx_p (xop0, strict_p)
8553 	       && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8554 		   || (!strict_p && will_be_in_index_register (xop1))))
8555 	      || (arm_address_register_rtx_p (xop1, strict_p)
8556 		  && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8557     }
8558 
8559   else if (can_avoid_literal_pool_for_label_p (x))
8560     return 0;
8561 
8562   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8563 	   && code == SYMBOL_REF
8564 	   && CONSTANT_POOL_ADDRESS_P (x)
8565 	   && ! (flag_pic
8566 		 && symbol_mentioned_p (get_pool_constant (x))
8567 		 && ! pcrel_constant_p (get_pool_constant (x))))
8568     return 1;
8569 
8570   return 0;
8571 }
8572 
8573 /* Return nonzero if INDEX is valid for an address index operand in
8574    ARM state.  */
8575 static int
arm_legitimate_index_p(machine_mode mode,rtx index,RTX_CODE outer,int strict_p)8576 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8577 			int strict_p)
8578 {
8579   HOST_WIDE_INT range;
8580   enum rtx_code code = GET_CODE (index);
8581 
8582   /* Standard coprocessor addressing modes.  */
8583   if (TARGET_HARD_FLOAT
8584       && (mode == SFmode || mode == DFmode))
8585     return (code == CONST_INT && INTVAL (index) < 1024
8586 	    && INTVAL (index) > -1024
8587 	    && (INTVAL (index) & 3) == 0);
8588 
8589   /* For quad modes, we restrict the constant offset to be slightly less
8590      than what the instruction format permits.  We do this because for
8591      quad mode moves, we will actually decompose them into two separate
8592      double-mode reads or writes.  INDEX must therefore be a valid
8593      (double-mode) offset and so should INDEX+8.  */
8594   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8595     return (code == CONST_INT
8596 	    && INTVAL (index) < 1016
8597 	    && INTVAL (index) > -1024
8598 	    && (INTVAL (index) & 3) == 0);
8599 
8600   /* We have no such constraint on double mode offsets, so we permit the
8601      full range of the instruction format.  */
8602   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8603     return (code == CONST_INT
8604 	    && INTVAL (index) < 1024
8605 	    && INTVAL (index) > -1024
8606 	    && (INTVAL (index) & 3) == 0);
8607 
8608   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8609     return (code == CONST_INT
8610 	    && INTVAL (index) < 1024
8611 	    && INTVAL (index) > -1024
8612 	    && (INTVAL (index) & 3) == 0);
8613 
8614   if (arm_address_register_rtx_p (index, strict_p)
8615       && (GET_MODE_SIZE (mode) <= 4))
8616     return 1;
8617 
8618   if (mode == DImode || mode == DFmode)
8619     {
8620       if (code == CONST_INT)
8621 	{
8622 	  HOST_WIDE_INT val = INTVAL (index);
8623 
8624 	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8625 	     If vldr is selected it uses arm_coproc_mem_operand.  */
8626 	  if (TARGET_LDRD)
8627 	    return val > -256 && val < 256;
8628 	  else
8629 	    return val > -4096 && val < 4092;
8630 	}
8631 
8632       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8633     }
8634 
8635   if (GET_MODE_SIZE (mode) <= 4
8636       && ! (arm_arch4
8637 	    && (mode == HImode
8638 		|| mode == HFmode
8639 		|| (mode == QImode && outer == SIGN_EXTEND))))
8640     {
8641       if (code == MULT)
8642 	{
8643 	  rtx xiop0 = XEXP (index, 0);
8644 	  rtx xiop1 = XEXP (index, 1);
8645 
8646 	  return ((arm_address_register_rtx_p (xiop0, strict_p)
8647 		   && power_of_two_operand (xiop1, SImode))
8648 		  || (arm_address_register_rtx_p (xiop1, strict_p)
8649 		      && power_of_two_operand (xiop0, SImode)));
8650 	}
8651       else if (code == LSHIFTRT || code == ASHIFTRT
8652 	       || code == ASHIFT || code == ROTATERT)
8653 	{
8654 	  rtx op = XEXP (index, 1);
8655 
8656 	  return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8657 		  && CONST_INT_P (op)
8658 		  && INTVAL (op) > 0
8659 		  && INTVAL (op) <= 31);
8660 	}
8661     }
8662 
8663   /* For ARM v4 we may be doing a sign-extend operation during the
8664      load.  */
8665   if (arm_arch4)
8666     {
8667       if (mode == HImode
8668 	  || mode == HFmode
8669 	  || (outer == SIGN_EXTEND && mode == QImode))
8670 	range = 256;
8671       else
8672 	range = 4096;
8673     }
8674   else
8675     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8676 
8677   return (code == CONST_INT
8678 	  && INTVAL (index) < range
8679 	  && INTVAL (index) > -range);
8680 }
8681 
8682 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8683    index operand.  i.e. 1, 2, 4 or 8.  */
8684 static bool
thumb2_index_mul_operand(rtx op)8685 thumb2_index_mul_operand (rtx op)
8686 {
8687   HOST_WIDE_INT val;
8688 
8689   if (!CONST_INT_P (op))
8690     return false;
8691 
8692   val = INTVAL(op);
8693   return (val == 1 || val == 2 || val == 4 || val == 8);
8694 }
8695 
8696 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8697 static int
thumb2_legitimate_index_p(machine_mode mode,rtx index,int strict_p)8698 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8699 {
8700   enum rtx_code code = GET_CODE (index);
8701 
8702   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8703   /* Standard coprocessor addressing modes.  */
8704   if (TARGET_VFP_BASE
8705       && (mode == SFmode || mode == DFmode))
8706     return (code == CONST_INT && INTVAL (index) < 1024
8707 	    /* Thumb-2 allows only > -256 index range for it's core register
8708 	       load/stores. Since we allow SF/DF in core registers, we have
8709 	       to use the intersection between -256~4096 (core) and -1024~1024
8710 	       (coprocessor).  */
8711 	    && INTVAL (index) > -256
8712 	    && (INTVAL (index) & 3) == 0);
8713 
8714   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8715     {
8716       /* For DImode assume values will usually live in core regs
8717 	 and only allow LDRD addressing modes.  */
8718       if (!TARGET_LDRD || mode != DImode)
8719 	return (code == CONST_INT
8720 		&& INTVAL (index) < 1024
8721 		&& INTVAL (index) > -1024
8722 		&& (INTVAL (index) & 3) == 0);
8723     }
8724 
8725   /* For quad modes, we restrict the constant offset to be slightly less
8726      than what the instruction format permits.  We do this because for
8727      quad mode moves, we will actually decompose them into two separate
8728      double-mode reads or writes.  INDEX must therefore be a valid
8729      (double-mode) offset and so should INDEX+8.  */
8730   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8731     return (code == CONST_INT
8732 	    && INTVAL (index) < 1016
8733 	    && INTVAL (index) > -1024
8734 	    && (INTVAL (index) & 3) == 0);
8735 
8736   /* We have no such constraint on double mode offsets, so we permit the
8737      full range of the instruction format.  */
8738   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8739     return (code == CONST_INT
8740 	    && INTVAL (index) < 1024
8741 	    && INTVAL (index) > -1024
8742 	    && (INTVAL (index) & 3) == 0);
8743 
8744   if (arm_address_register_rtx_p (index, strict_p)
8745       && (GET_MODE_SIZE (mode) <= 4))
8746     return 1;
8747 
8748   if (mode == DImode || mode == DFmode)
8749     {
8750       if (code == CONST_INT)
8751 	{
8752 	  HOST_WIDE_INT val = INTVAL (index);
8753 	  /* Thumb-2 ldrd only has reg+const addressing modes.
8754 	     Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8755 	     If vldr is selected it uses arm_coproc_mem_operand.  */
8756 	  if (TARGET_LDRD)
8757 	    return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8758 	  else
8759 	    return IN_RANGE (val, -255, 4095 - 4);
8760 	}
8761       else
8762 	return 0;
8763     }
8764 
8765   if (code == MULT)
8766     {
8767       rtx xiop0 = XEXP (index, 0);
8768       rtx xiop1 = XEXP (index, 1);
8769 
8770       return ((arm_address_register_rtx_p (xiop0, strict_p)
8771 	       && thumb2_index_mul_operand (xiop1))
8772 	      || (arm_address_register_rtx_p (xiop1, strict_p)
8773 		  && thumb2_index_mul_operand (xiop0)));
8774     }
8775   else if (code == ASHIFT)
8776     {
8777       rtx op = XEXP (index, 1);
8778 
8779       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8780 	      && CONST_INT_P (op)
8781 	      && INTVAL (op) > 0
8782 	      && INTVAL (op) <= 3);
8783     }
8784 
8785   return (code == CONST_INT
8786 	  && INTVAL (index) < 4096
8787 	  && INTVAL (index) > -256);
8788 }
8789 
8790 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8791 static int
thumb1_base_register_rtx_p(rtx x,machine_mode mode,int strict_p)8792 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8793 {
8794   int regno;
8795 
8796   if (!REG_P (x))
8797     return 0;
8798 
8799   regno = REGNO (x);
8800 
8801   if (strict_p)
8802     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8803 
8804   return (regno <= LAST_LO_REGNUM
8805 	  || regno > LAST_VIRTUAL_REGISTER
8806 	  || regno == FRAME_POINTER_REGNUM
8807 	  || (GET_MODE_SIZE (mode) >= 4
8808 	      && (regno == STACK_POINTER_REGNUM
8809 		  || regno >= FIRST_PSEUDO_REGISTER
8810 		  || x == hard_frame_pointer_rtx
8811 		  || x == arg_pointer_rtx)));
8812 }
8813 
8814 /* Return nonzero if x is a legitimate index register.  This is the case
8815    for any base register that can access a QImode object.  */
8816 inline static int
thumb1_index_register_rtx_p(rtx x,int strict_p)8817 thumb1_index_register_rtx_p (rtx x, int strict_p)
8818 {
8819   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8820 }
8821 
8822 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8823 
8824    The AP may be eliminated to either the SP or the FP, so we use the
8825    least common denominator, e.g. SImode, and offsets from 0 to 64.
8826 
8827    ??? Verify whether the above is the right approach.
8828 
8829    ??? Also, the FP may be eliminated to the SP, so perhaps that
8830    needs special handling also.
8831 
8832    ??? Look at how the mips16 port solves this problem.  It probably uses
8833    better ways to solve some of these problems.
8834 
8835    Although it is not incorrect, we don't accept QImode and HImode
8836    addresses based on the frame pointer or arg pointer until the
8837    reload pass starts.  This is so that eliminating such addresses
8838    into stack based ones won't produce impossible code.  */
8839 int
thumb1_legitimate_address_p(machine_mode mode,rtx x,int strict_p)8840 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8841 {
8842   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8843     return 0;
8844 
8845   /* ??? Not clear if this is right.  Experiment.  */
8846   if (GET_MODE_SIZE (mode) < 4
8847       && !(reload_in_progress || reload_completed)
8848       && (reg_mentioned_p (frame_pointer_rtx, x)
8849 	  || reg_mentioned_p (arg_pointer_rtx, x)
8850 	  || reg_mentioned_p (virtual_incoming_args_rtx, x)
8851 	  || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8852 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8853 	  || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8854     return 0;
8855 
8856   /* Accept any base register.  SP only in SImode or larger.  */
8857   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8858     return 1;
8859 
8860   /* This is PC relative data before arm_reorg runs.  */
8861   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8862 	   && GET_CODE (x) == SYMBOL_REF
8863 	   && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
8864 	   && !arm_disable_literal_pool)
8865     return 1;
8866 
8867   /* This is PC relative data after arm_reorg runs.  */
8868   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8869 	   && reload_completed
8870 	   && (GET_CODE (x) == LABEL_REF
8871 	       || (GET_CODE (x) == CONST
8872 		   && GET_CODE (XEXP (x, 0)) == PLUS
8873 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8874 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8875     return 1;
8876 
8877   /* Post-inc indexing only supported for SImode and larger.  */
8878   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8879 	   && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8880     return 1;
8881 
8882   else if (GET_CODE (x) == PLUS)
8883     {
8884       /* REG+REG address can be any two index registers.  */
8885       /* We disallow FRAME+REG addressing since we know that FRAME
8886 	 will be replaced with STACK, and SP relative addressing only
8887 	 permits SP+OFFSET.  */
8888       if (GET_MODE_SIZE (mode) <= 4
8889 	  && XEXP (x, 0) != frame_pointer_rtx
8890 	  && XEXP (x, 1) != frame_pointer_rtx
8891 	  && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8892 	  && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8893 	      || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8894 	return 1;
8895 
8896       /* REG+const has 5-7 bit offset for non-SP registers.  */
8897       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8898 		|| XEXP (x, 0) == arg_pointer_rtx)
8899 	       && CONST_INT_P (XEXP (x, 1))
8900 	       && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8901 	return 1;
8902 
8903       /* REG+const has 10-bit offset for SP, but only SImode and
8904 	 larger is supported.  */
8905       /* ??? Should probably check for DI/DFmode overflow here
8906 	 just like GO_IF_LEGITIMATE_OFFSET does.  */
8907       else if (REG_P (XEXP (x, 0))
8908 	       && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8909 	       && GET_MODE_SIZE (mode) >= 4
8910 	       && CONST_INT_P (XEXP (x, 1))
8911 	       && INTVAL (XEXP (x, 1)) >= 0
8912 	       && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8913 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
8914 	return 1;
8915 
8916       else if (REG_P (XEXP (x, 0))
8917 	       && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8918 		   || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8919 		   || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8920 		       && REGNO (XEXP (x, 0))
8921 			  <= LAST_VIRTUAL_POINTER_REGISTER))
8922 	       && GET_MODE_SIZE (mode) >= 4
8923 	       && CONST_INT_P (XEXP (x, 1))
8924 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
8925 	return 1;
8926     }
8927 
8928   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8929 	   && GET_MODE_SIZE (mode) == 4
8930 	   && GET_CODE (x) == SYMBOL_REF
8931 	   && CONSTANT_POOL_ADDRESS_P (x)
8932 	   && !arm_disable_literal_pool
8933 	   && ! (flag_pic
8934 		 && symbol_mentioned_p (get_pool_constant (x))
8935 		 && ! pcrel_constant_p (get_pool_constant (x))))
8936     return 1;
8937 
8938   return 0;
8939 }
8940 
8941 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8942    instruction of mode MODE.  */
8943 int
thumb_legitimate_offset_p(machine_mode mode,HOST_WIDE_INT val)8944 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8945 {
8946   switch (GET_MODE_SIZE (mode))
8947     {
8948     case 1:
8949       return val >= 0 && val < 32;
8950 
8951     case 2:
8952       return val >= 0 && val < 64 && (val & 1) == 0;
8953 
8954     default:
8955       return (val >= 0
8956 	      && (val + GET_MODE_SIZE (mode)) <= 128
8957 	      && (val & 3) == 0);
8958     }
8959 }
8960 
8961 bool
arm_legitimate_address_p(machine_mode mode,rtx x,bool strict_p)8962 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8963 {
8964   if (TARGET_ARM)
8965     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8966   else if (TARGET_THUMB2)
8967     return thumb2_legitimate_address_p (mode, x, strict_p);
8968   else /* if (TARGET_THUMB1) */
8969     return thumb1_legitimate_address_p (mode, x, strict_p);
8970 }
8971 
8972 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8973 
8974    Given an rtx X being reloaded into a reg required to be
8975    in class CLASS, return the class of reg to actually use.
8976    In general this is just CLASS, but for the Thumb core registers and
8977    immediate constants we prefer a LO_REGS class or a subset.  */
8978 
8979 static reg_class_t
arm_preferred_reload_class(rtx x ATTRIBUTE_UNUSED,reg_class_t rclass)8980 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8981 {
8982   if (TARGET_32BIT)
8983     return rclass;
8984   else
8985     {
8986       if (rclass == GENERAL_REGS)
8987 	return LO_REGS;
8988       else
8989 	return rclass;
8990     }
8991 }
8992 
8993 /* Build the SYMBOL_REF for __tls_get_addr.  */
8994 
8995 static GTY(()) rtx tls_get_addr_libfunc;
8996 
8997 static rtx
get_tls_get_addr(void)8998 get_tls_get_addr (void)
8999 {
9000   if (!tls_get_addr_libfunc)
9001     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9002   return tls_get_addr_libfunc;
9003 }
9004 
9005 rtx
arm_load_tp(rtx target)9006 arm_load_tp (rtx target)
9007 {
9008   if (!target)
9009     target = gen_reg_rtx (SImode);
9010 
9011   if (TARGET_HARD_TP)
9012     {
9013       /* Can return in any reg.  */
9014       emit_insn (gen_load_tp_hard (target));
9015     }
9016   else
9017     {
9018       /* Always returned in r0.  Immediately copy the result into a pseudo,
9019 	 otherwise other uses of r0 (e.g. setting up function arguments) may
9020 	 clobber the value.  */
9021 
9022       rtx tmp;
9023 
9024       if (TARGET_FDPIC)
9025 	{
9026 	  rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9027 	  rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9028 
9029 	  emit_insn (gen_load_tp_soft_fdpic ());
9030 
9031 	  /* Restore r9.  */
9032 	  emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9033 	}
9034       else
9035 	emit_insn (gen_load_tp_soft ());
9036 
9037       tmp = gen_rtx_REG (SImode, R0_REGNUM);
9038       emit_move_insn (target, tmp);
9039     }
9040   return target;
9041 }
9042 
9043 static rtx
load_tls_operand(rtx x,rtx reg)9044 load_tls_operand (rtx x, rtx reg)
9045 {
9046   rtx tmp;
9047 
9048   if (reg == NULL_RTX)
9049     reg = gen_reg_rtx (SImode);
9050 
9051   tmp = gen_rtx_CONST (SImode, x);
9052 
9053   emit_move_insn (reg, tmp);
9054 
9055   return reg;
9056 }
9057 
9058 static rtx_insn *
arm_call_tls_get_addr(rtx x,rtx reg,rtx * valuep,int reloc)9059 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9060 {
9061   rtx label, labelno = NULL_RTX, sum;
9062 
9063   gcc_assert (reloc != TLS_DESCSEQ);
9064   start_sequence ();
9065 
9066   if (TARGET_FDPIC)
9067     {
9068       sum = gen_rtx_UNSPEC (Pmode,
9069 			    gen_rtvec (2, x, GEN_INT (reloc)),
9070 			    UNSPEC_TLS);
9071     }
9072   else
9073     {
9074       labelno = GEN_INT (pic_labelno++);
9075       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9076       label = gen_rtx_CONST (VOIDmode, label);
9077 
9078       sum = gen_rtx_UNSPEC (Pmode,
9079 			    gen_rtvec (4, x, GEN_INT (reloc), label,
9080 				       GEN_INT (TARGET_ARM ? 8 : 4)),
9081 			    UNSPEC_TLS);
9082     }
9083   reg = load_tls_operand (sum, reg);
9084 
9085   if (TARGET_FDPIC)
9086       emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9087   else if (TARGET_ARM)
9088     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9089   else
9090     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9091 
9092   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9093 				     LCT_PURE, /* LCT_CONST?  */
9094 				     Pmode, reg, Pmode);
9095 
9096   rtx_insn *insns = get_insns ();
9097   end_sequence ();
9098 
9099   return insns;
9100 }
9101 
9102 static rtx
arm_tls_descseq_addr(rtx x,rtx reg)9103 arm_tls_descseq_addr (rtx x, rtx reg)
9104 {
9105   rtx labelno = GEN_INT (pic_labelno++);
9106   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9107   rtx sum = gen_rtx_UNSPEC (Pmode,
9108 			    gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9109 				       gen_rtx_CONST (VOIDmode, label),
9110 				       GEN_INT (!TARGET_ARM)),
9111 			    UNSPEC_TLS);
9112   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9113 
9114   emit_insn (gen_tlscall (x, labelno));
9115   if (!reg)
9116     reg = gen_reg_rtx (SImode);
9117   else
9118     gcc_assert (REGNO (reg) != R0_REGNUM);
9119 
9120   emit_move_insn (reg, reg0);
9121 
9122   return reg;
9123 }
9124 
9125 
9126 rtx
legitimize_tls_address(rtx x,rtx reg)9127 legitimize_tls_address (rtx x, rtx reg)
9128 {
9129   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9130   rtx_insn *insns;
9131   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9132 
9133   switch (model)
9134     {
9135     case TLS_MODEL_GLOBAL_DYNAMIC:
9136       if (TARGET_GNU2_TLS)
9137 	{
9138 	  gcc_assert (!TARGET_FDPIC);
9139 
9140 	  reg = arm_tls_descseq_addr (x, reg);
9141 
9142 	  tp = arm_load_tp (NULL_RTX);
9143 
9144 	  dest = gen_rtx_PLUS (Pmode, tp, reg);
9145 	}
9146       else
9147 	{
9148 	  /* Original scheme */
9149 	  if (TARGET_FDPIC)
9150 	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9151 	  else
9152 	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9153 	  dest = gen_reg_rtx (Pmode);
9154 	  emit_libcall_block (insns, dest, ret, x);
9155 	}
9156       return dest;
9157 
9158     case TLS_MODEL_LOCAL_DYNAMIC:
9159       if (TARGET_GNU2_TLS)
9160 	{
9161 	  gcc_assert (!TARGET_FDPIC);
9162 
9163 	  reg = arm_tls_descseq_addr (x, reg);
9164 
9165 	  tp = arm_load_tp (NULL_RTX);
9166 
9167 	  dest = gen_rtx_PLUS (Pmode, tp, reg);
9168 	}
9169       else
9170 	{
9171 	  if (TARGET_FDPIC)
9172 	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9173 	  else
9174 	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9175 
9176 	  /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9177 	     share the LDM result with other LD model accesses.  */
9178 	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9179 				UNSPEC_TLS);
9180 	  dest = gen_reg_rtx (Pmode);
9181 	  emit_libcall_block (insns, dest, ret, eqv);
9182 
9183 	  /* Load the addend.  */
9184 	  addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9185 						     GEN_INT (TLS_LDO32)),
9186 				   UNSPEC_TLS);
9187 	  addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9188 	  dest = gen_rtx_PLUS (Pmode, dest, addend);
9189 	}
9190       return dest;
9191 
9192     case TLS_MODEL_INITIAL_EXEC:
9193       if (TARGET_FDPIC)
9194 	{
9195 	  sum = gen_rtx_UNSPEC (Pmode,
9196 				gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9197 				UNSPEC_TLS);
9198 	  reg = load_tls_operand (sum, reg);
9199 	  emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9200 	  emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9201 	}
9202       else
9203 	{
9204 	  labelno = GEN_INT (pic_labelno++);
9205 	  label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9206 	  label = gen_rtx_CONST (VOIDmode, label);
9207 	  sum = gen_rtx_UNSPEC (Pmode,
9208 				gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9209 					   GEN_INT (TARGET_ARM ? 8 : 4)),
9210 				UNSPEC_TLS);
9211 	  reg = load_tls_operand (sum, reg);
9212 
9213 	  if (TARGET_ARM)
9214 	    emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9215 	  else if (TARGET_THUMB2)
9216 	    emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9217 	  else
9218 	    {
9219 	      emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9220 	      emit_move_insn (reg, gen_const_mem (SImode, reg));
9221 	    }
9222 	}
9223 
9224       tp = arm_load_tp (NULL_RTX);
9225 
9226       return gen_rtx_PLUS (Pmode, tp, reg);
9227 
9228     case TLS_MODEL_LOCAL_EXEC:
9229       tp = arm_load_tp (NULL_RTX);
9230 
9231       reg = gen_rtx_UNSPEC (Pmode,
9232 			    gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9233 			    UNSPEC_TLS);
9234       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9235 
9236       return gen_rtx_PLUS (Pmode, tp, reg);
9237 
9238     default:
9239       abort ();
9240     }
9241 }
9242 
9243 /* Try machine-dependent ways of modifying an illegitimate address
9244    to be legitimate.  If we find one, return the new, valid address.  */
9245 rtx
arm_legitimize_address(rtx x,rtx orig_x,machine_mode mode)9246 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9247 {
9248   if (arm_tls_referenced_p (x))
9249     {
9250       rtx addend = NULL;
9251 
9252       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9253 	{
9254 	  addend = XEXP (XEXP (x, 0), 1);
9255 	  x = XEXP (XEXP (x, 0), 0);
9256 	}
9257 
9258       if (GET_CODE (x) != SYMBOL_REF)
9259 	return x;
9260 
9261       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9262 
9263       x = legitimize_tls_address (x, NULL_RTX);
9264 
9265       if (addend)
9266 	{
9267 	  x = gen_rtx_PLUS (SImode, x, addend);
9268 	  orig_x = x;
9269 	}
9270       else
9271 	return x;
9272     }
9273 
9274   if (TARGET_THUMB1)
9275     return thumb_legitimize_address (x, orig_x, mode);
9276 
9277   if (GET_CODE (x) == PLUS)
9278     {
9279       rtx xop0 = XEXP (x, 0);
9280       rtx xop1 = XEXP (x, 1);
9281 
9282       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9283 	xop0 = force_reg (SImode, xop0);
9284 
9285       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9286 	  && !symbol_mentioned_p (xop1))
9287 	xop1 = force_reg (SImode, xop1);
9288 
9289       if (ARM_BASE_REGISTER_RTX_P (xop0)
9290 	  && CONST_INT_P (xop1))
9291 	{
9292 	  HOST_WIDE_INT n, low_n;
9293 	  rtx base_reg, val;
9294 	  n = INTVAL (xop1);
9295 
9296 	  /* VFP addressing modes actually allow greater offsets, but for
9297 	     now we just stick with the lowest common denominator.  */
9298 	  if (mode == DImode || mode == DFmode)
9299 	    {
9300 	      low_n = n & 0x0f;
9301 	      n &= ~0x0f;
9302 	      if (low_n > 4)
9303 		{
9304 		  n += 16;
9305 		  low_n -= 16;
9306 		}
9307 	    }
9308 	  else
9309 	    {
9310 	      low_n = ((mode) == TImode ? 0
9311 		       : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9312 	      n -= low_n;
9313 	    }
9314 
9315 	  base_reg = gen_reg_rtx (SImode);
9316 	  val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9317 	  emit_move_insn (base_reg, val);
9318 	  x = plus_constant (Pmode, base_reg, low_n);
9319 	}
9320       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9321 	x = gen_rtx_PLUS (SImode, xop0, xop1);
9322     }
9323 
9324   /* XXX We don't allow MINUS any more -- see comment in
9325      arm_legitimate_address_outer_p ().  */
9326   else if (GET_CODE (x) == MINUS)
9327     {
9328       rtx xop0 = XEXP (x, 0);
9329       rtx xop1 = XEXP (x, 1);
9330 
9331       if (CONSTANT_P (xop0))
9332 	xop0 = force_reg (SImode, xop0);
9333 
9334       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9335 	xop1 = force_reg (SImode, xop1);
9336 
9337       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9338 	x = gen_rtx_MINUS (SImode, xop0, xop1);
9339     }
9340 
9341   /* Make sure to take full advantage of the pre-indexed addressing mode
9342      with absolute addresses which often allows for the base register to
9343      be factorized for multiple adjacent memory references, and it might
9344      even allows for the mini pool to be avoided entirely. */
9345   else if (CONST_INT_P (x) && optimize > 0)
9346     {
9347       unsigned int bits;
9348       HOST_WIDE_INT mask, base, index;
9349       rtx base_reg;
9350 
9351       /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9352 	 only use a 8-bit index. So let's use a 12-bit index for
9353 	 SImode only and hope that arm_gen_constant will enable LDRB
9354 	 to use more bits. */
9355       bits = (mode == SImode) ? 12 : 8;
9356       mask = (1 << bits) - 1;
9357       base = INTVAL (x) & ~mask;
9358       index = INTVAL (x) & mask;
9359       if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9360 	{
9361 	  /* It'll most probably be more efficient to generate the
9362 	     base with more bits set and use a negative index instead.
9363 	     Don't do this for Thumb as negative offsets are much more
9364 	     limited.  */
9365 	  base |= mask;
9366 	  index -= mask;
9367 	}
9368       base_reg = force_reg (SImode, GEN_INT (base));
9369       x = plus_constant (Pmode, base_reg, index);
9370     }
9371 
9372   if (flag_pic)
9373     {
9374       /* We need to find and carefully transform any SYMBOL and LABEL
9375 	 references; so go back to the original address expression.  */
9376       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9377 					  false /*compute_now*/);
9378 
9379       if (new_x != orig_x)
9380 	x = new_x;
9381     }
9382 
9383   return x;
9384 }
9385 
9386 
9387 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9388    to be legitimate.  If we find one, return the new, valid address.  */
9389 rtx
thumb_legitimize_address(rtx x,rtx orig_x,machine_mode mode)9390 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9391 {
9392   if (GET_CODE (x) == PLUS
9393       && CONST_INT_P (XEXP (x, 1))
9394       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9395 	  || INTVAL (XEXP (x, 1)) < 0))
9396     {
9397       rtx xop0 = XEXP (x, 0);
9398       rtx xop1 = XEXP (x, 1);
9399       HOST_WIDE_INT offset = INTVAL (xop1);
9400 
9401       /* Try and fold the offset into a biasing of the base register and
9402 	 then offsetting that.  Don't do this when optimizing for space
9403 	 since it can cause too many CSEs.  */
9404       if (optimize_size && offset >= 0
9405 	  && offset < 256 + 31 * GET_MODE_SIZE (mode))
9406 	{
9407 	  HOST_WIDE_INT delta;
9408 
9409 	  if (offset >= 256)
9410 	    delta = offset - (256 - GET_MODE_SIZE (mode));
9411 	  else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9412 	    delta = 31 * GET_MODE_SIZE (mode);
9413 	  else
9414 	    delta = offset & (~31 * GET_MODE_SIZE (mode));
9415 
9416 	  xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9417 				NULL_RTX);
9418 	  x = plus_constant (Pmode, xop0, delta);
9419 	}
9420       else if (offset < 0 && offset > -256)
9421 	/* Small negative offsets are best done with a subtract before the
9422 	   dereference, forcing these into a register normally takes two
9423 	   instructions.  */
9424 	x = force_operand (x, NULL_RTX);
9425       else
9426 	{
9427 	  /* For the remaining cases, force the constant into a register.  */
9428 	  xop1 = force_reg (SImode, xop1);
9429 	  x = gen_rtx_PLUS (SImode, xop0, xop1);
9430 	}
9431     }
9432   else if (GET_CODE (x) == PLUS
9433 	   && s_register_operand (XEXP (x, 1), SImode)
9434 	   && !s_register_operand (XEXP (x, 0), SImode))
9435     {
9436       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9437 
9438       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9439     }
9440 
9441   if (flag_pic)
9442     {
9443       /* We need to find and carefully transform any SYMBOL and LABEL
9444 	 references; so go back to the original address expression.  */
9445       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9446 					  false /*compute_now*/);
9447 
9448       if (new_x != orig_x)
9449 	x = new_x;
9450     }
9451 
9452   return x;
9453 }
9454 
9455 /* Return TRUE if X contains any TLS symbol references.  */
9456 
9457 bool
arm_tls_referenced_p(rtx x)9458 arm_tls_referenced_p (rtx x)
9459 {
9460   if (! TARGET_HAVE_TLS)
9461     return false;
9462 
9463   subrtx_iterator::array_type array;
9464   FOR_EACH_SUBRTX (iter, array, x, ALL)
9465     {
9466       const_rtx x = *iter;
9467       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
9468 	{
9469 	  /* ARM currently does not provide relocations to encode TLS variables
9470 	     into AArch32 instructions, only data, so there is no way to
9471 	     currently implement these if a literal pool is disabled.  */
9472 	  if (arm_disable_literal_pool)
9473 	    sorry ("accessing thread-local storage is not currently supported "
9474 		   "with %<-mpure-code%> or %<-mslow-flash-data%>");
9475 
9476 	  return true;
9477 	}
9478 
9479       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9480 	 TLS offsets, not real symbol references.  */
9481       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9482 	iter.skip_subrtxes ();
9483     }
9484   return false;
9485 }
9486 
9487 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9488 
9489    On the ARM, allow any integer (invalid ones are removed later by insn
9490    patterns), nice doubles and symbol_refs which refer to the function's
9491    constant pool XXX.
9492 
9493    When generating pic allow anything.  */
9494 
9495 static bool
arm_legitimate_constant_p_1(machine_mode,rtx x)9496 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9497 {
9498   if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9499     return false;
9500 
9501   return flag_pic || !label_mentioned_p (x);
9502 }
9503 
9504 static bool
thumb_legitimate_constant_p(machine_mode mode ATTRIBUTE_UNUSED,rtx x)9505 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9506 {
9507   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9508      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
9509      for ARMv8-M Baseline or later the result is valid.  */
9510   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9511     x = XEXP (x, 0);
9512 
9513   return (CONST_INT_P (x)
9514 	  || CONST_DOUBLE_P (x)
9515 	  || CONSTANT_ADDRESS_P (x)
9516 	  || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
9517 	  || flag_pic);
9518 }
9519 
9520 static bool
arm_legitimate_constant_p(machine_mode mode,rtx x)9521 arm_legitimate_constant_p (machine_mode mode, rtx x)
9522 {
9523   return (!arm_cannot_force_const_mem (mode, x)
9524 	  && (TARGET_32BIT
9525 	      ? arm_legitimate_constant_p_1 (mode, x)
9526 	      : thumb_legitimate_constant_p (mode, x)));
9527 }
9528 
9529 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
9530 
9531 static bool
arm_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)9532 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9533 {
9534   rtx base, offset;
9535   split_const (x, &base, &offset);
9536 
9537   if (SYMBOL_REF_P (base))
9538     {
9539       /* Function symbols cannot have an offset due to the Thumb bit.  */
9540       if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9541 	  && INTVAL (offset) != 0)
9542 	return true;
9543 
9544       if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9545 	  && !offset_within_block_p (base, INTVAL (offset)))
9546 	return true;
9547     }
9548   return arm_tls_referenced_p (x);
9549 }
9550 
9551 #define REG_OR_SUBREG_REG(X)						\
9552   (REG_P (X)							\
9553    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
9554 
9555 #define REG_OR_SUBREG_RTX(X)			\
9556    (REG_P (X) ? (X) : SUBREG_REG (X))
9557 
9558 static inline int
thumb1_rtx_costs(rtx x,enum rtx_code code,enum rtx_code outer)9559 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9560 {
9561   machine_mode mode = GET_MODE (x);
9562   int total, words;
9563 
9564   switch (code)
9565     {
9566     case ASHIFT:
9567     case ASHIFTRT:
9568     case LSHIFTRT:
9569     case ROTATERT:
9570       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9571 
9572     case PLUS:
9573     case MINUS:
9574     case COMPARE:
9575     case NEG:
9576     case NOT:
9577       return COSTS_N_INSNS (1);
9578 
9579     case MULT:
9580       if (arm_arch6m && arm_m_profile_small_mul)
9581 	return COSTS_N_INSNS (32);
9582 
9583       if (CONST_INT_P (XEXP (x, 1)))
9584 	{
9585 	  int cycles = 0;
9586 	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9587 
9588 	  while (i)
9589 	    {
9590 	      i >>= 2;
9591 	      cycles++;
9592 	    }
9593 	  return COSTS_N_INSNS (2) + cycles;
9594 	}
9595       return COSTS_N_INSNS (1) + 16;
9596 
9597     case SET:
9598       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9599 	 the mode.  */
9600       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9601       return (COSTS_N_INSNS (words)
9602 	      + 4 * ((MEM_P (SET_SRC (x)))
9603 		     + MEM_P (SET_DEST (x))));
9604 
9605     case CONST_INT:
9606       if (outer == SET)
9607 	{
9608 	  if (UINTVAL (x) < 256
9609 	      /* 16-bit constant.  */
9610 	      || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9611 	    return 0;
9612 	  if (thumb_shiftable_const (INTVAL (x)))
9613 	    return COSTS_N_INSNS (2);
9614 	  return arm_disable_literal_pool
9615 	    ? COSTS_N_INSNS (8)
9616 	    : COSTS_N_INSNS (3);
9617 	}
9618       else if ((outer == PLUS || outer == COMPARE)
9619 	       && INTVAL (x) < 256 && INTVAL (x) > -256)
9620 	return 0;
9621       else if ((outer == IOR || outer == XOR || outer == AND)
9622 	       && INTVAL (x) < 256 && INTVAL (x) >= -256)
9623 	return COSTS_N_INSNS (1);
9624       else if (outer == AND)
9625 	{
9626 	  int i;
9627 	  /* This duplicates the tests in the andsi3 expander.  */
9628 	  for (i = 9; i <= 31; i++)
9629 	    if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9630 		|| (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9631 	      return COSTS_N_INSNS (2);
9632 	}
9633       else if (outer == ASHIFT || outer == ASHIFTRT
9634 	       || outer == LSHIFTRT)
9635 	return 0;
9636       return COSTS_N_INSNS (2);
9637 
9638     case CONST:
9639     case CONST_DOUBLE:
9640     case LABEL_REF:
9641     case SYMBOL_REF:
9642       return COSTS_N_INSNS (3);
9643 
9644     case UDIV:
9645     case UMOD:
9646     case DIV:
9647     case MOD:
9648       return 100;
9649 
9650     case TRUNCATE:
9651       return 99;
9652 
9653     case AND:
9654     case XOR:
9655     case IOR:
9656       /* XXX guess.  */
9657       return 8;
9658 
9659     case MEM:
9660       /* XXX another guess.  */
9661       /* Memory costs quite a lot for the first word, but subsequent words
9662 	 load at the equivalent of a single insn each.  */
9663       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9664 	      + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9665 		 ? 4 : 0));
9666 
9667     case IF_THEN_ELSE:
9668       /* XXX a guess.  */
9669       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9670 	return 14;
9671       return 2;
9672 
9673     case SIGN_EXTEND:
9674     case ZERO_EXTEND:
9675       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9676       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9677 
9678       if (mode == SImode)
9679 	return total;
9680 
9681       if (arm_arch6)
9682 	return total + COSTS_N_INSNS (1);
9683 
9684       /* Assume a two-shift sequence.  Increase the cost slightly so
9685 	 we prefer actual shifts over an extend operation.  */
9686       return total + 1 + COSTS_N_INSNS (2);
9687 
9688     default:
9689       return 99;
9690     }
9691 }
9692 
9693 /* Estimates the size cost of thumb1 instructions.
9694    For now most of the code is copied from thumb1_rtx_costs. We need more
9695    fine grain tuning when we have more related test cases.  */
9696 static inline int
thumb1_size_rtx_costs(rtx x,enum rtx_code code,enum rtx_code outer)9697 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9698 {
9699   machine_mode mode = GET_MODE (x);
9700   int words, cost;
9701 
9702   switch (code)
9703     {
9704     case ASHIFT:
9705     case ASHIFTRT:
9706     case LSHIFTRT:
9707     case ROTATERT:
9708       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9709 
9710     case PLUS:
9711     case MINUS:
9712       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9713 	 defined by RTL expansion, especially for the expansion of
9714 	 multiplication.  */
9715       if ((GET_CODE (XEXP (x, 0)) == MULT
9716 	   && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9717 	  || (GET_CODE (XEXP (x, 1)) == MULT
9718 	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9719 	return COSTS_N_INSNS (2);
9720       /* Fall through.  */
9721     case COMPARE:
9722     case NEG:
9723     case NOT:
9724       return COSTS_N_INSNS (1);
9725 
9726     case MULT:
9727       if (CONST_INT_P (XEXP (x, 1)))
9728         {
9729           /* Thumb1 mul instruction can't operate on const. We must Load it
9730              into a register first.  */
9731           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9732 	  /* For the targets which have a very small and high-latency multiply
9733 	     unit, we prefer to synthesize the mult with up to 5 instructions,
9734 	     giving a good balance between size and performance.  */
9735 	  if (arm_arch6m && arm_m_profile_small_mul)
9736 	    return COSTS_N_INSNS (5);
9737 	  else
9738 	    return COSTS_N_INSNS (1) + const_size;
9739         }
9740       return COSTS_N_INSNS (1);
9741 
9742     case SET:
9743       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9744 	 the mode.  */
9745       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9746       cost = COSTS_N_INSNS (words);
9747       if (satisfies_constraint_J (SET_SRC (x))
9748 	  || satisfies_constraint_K (SET_SRC (x))
9749 	     /* Too big an immediate for a 2-byte mov, using MOVT.  */
9750 	  || (CONST_INT_P (SET_SRC (x))
9751 	      && UINTVAL (SET_SRC (x)) >= 256
9752 	      && TARGET_HAVE_MOVT
9753 	      && satisfies_constraint_j (SET_SRC (x)))
9754 	     /* thumb1_movdi_insn.  */
9755 	  || ((words > 1) && MEM_P (SET_SRC (x))))
9756 	cost += COSTS_N_INSNS (1);
9757       return cost;
9758 
9759     case CONST_INT:
9760       if (outer == SET)
9761         {
9762           if (UINTVAL (x) < 256)
9763             return COSTS_N_INSNS (1);
9764 	  /* movw is 4byte long.  */
9765 	  if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9766 	    return COSTS_N_INSNS (2);
9767 	  /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9768 	  if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9769             return COSTS_N_INSNS (2);
9770 	  /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9771           if (thumb_shiftable_const (INTVAL (x)))
9772             return COSTS_N_INSNS (2);
9773 	  return arm_disable_literal_pool
9774 	    ? COSTS_N_INSNS (8)
9775 	    : COSTS_N_INSNS (3);
9776         }
9777       else if ((outer == PLUS || outer == COMPARE)
9778                && INTVAL (x) < 256 && INTVAL (x) > -256)
9779         return 0;
9780       else if ((outer == IOR || outer == XOR || outer == AND)
9781                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9782         return COSTS_N_INSNS (1);
9783       else if (outer == AND)
9784         {
9785           int i;
9786           /* This duplicates the tests in the andsi3 expander.  */
9787           for (i = 9; i <= 31; i++)
9788             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9789                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9790               return COSTS_N_INSNS (2);
9791         }
9792       else if (outer == ASHIFT || outer == ASHIFTRT
9793                || outer == LSHIFTRT)
9794         return 0;
9795       return COSTS_N_INSNS (2);
9796 
9797     case CONST:
9798     case CONST_DOUBLE:
9799     case LABEL_REF:
9800     case SYMBOL_REF:
9801       return COSTS_N_INSNS (3);
9802 
9803     case UDIV:
9804     case UMOD:
9805     case DIV:
9806     case MOD:
9807       return 100;
9808 
9809     case TRUNCATE:
9810       return 99;
9811 
9812     case AND:
9813     case XOR:
9814     case IOR:
9815       return COSTS_N_INSNS (1);
9816 
9817     case MEM:
9818       return (COSTS_N_INSNS (1)
9819 	      + COSTS_N_INSNS (1)
9820 		* ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9821               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9822                  ? COSTS_N_INSNS (1) : 0));
9823 
9824     case IF_THEN_ELSE:
9825       /* XXX a guess.  */
9826       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9827         return 14;
9828       return 2;
9829 
9830     case ZERO_EXTEND:
9831       /* XXX still guessing.  */
9832       switch (GET_MODE (XEXP (x, 0)))
9833         {
9834           case E_QImode:
9835             return (1 + (mode == DImode ? 4 : 0)
9836                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9837 
9838           case E_HImode:
9839             return (4 + (mode == DImode ? 4 : 0)
9840                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9841 
9842           case E_SImode:
9843             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9844 
9845           default:
9846             return 99;
9847         }
9848 
9849     default:
9850       return 99;
9851     }
9852 }
9853 
9854 /* Helper function for arm_rtx_costs.  If one operand of the OP, a
9855    PLUS, adds the carry flag, then return the other operand.  If
9856    neither is a carry, return OP unchanged.  */
9857 static rtx
strip_carry_operation(rtx op)9858 strip_carry_operation (rtx op)
9859 {
9860   gcc_assert (GET_CODE (op) == PLUS);
9861   if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
9862     return XEXP (op, 1);
9863   else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
9864     return XEXP (op, 0);
9865   return op;
9866 }
9867 
9868 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9869    operand, then return the operand that is being shifted.  If the shift
9870    is not by a constant, then set SHIFT_REG to point to the operand.
9871    Return NULL if OP is not a shifter operand.  */
9872 static rtx
shifter_op_p(rtx op,rtx * shift_reg)9873 shifter_op_p (rtx op, rtx *shift_reg)
9874 {
9875   enum rtx_code code = GET_CODE (op);
9876 
9877   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9878       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9879     return XEXP (op, 0);
9880   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9881     return XEXP (op, 0);
9882   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9883 	   || code == ASHIFTRT)
9884     {
9885       if (!CONST_INT_P (XEXP (op, 1)))
9886 	*shift_reg = XEXP (op, 1);
9887       return XEXP (op, 0);
9888     }
9889 
9890   return NULL;
9891 }
9892 
9893 static bool
arm_unspec_cost(rtx x,enum rtx_code,bool speed_p,int * cost)9894 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9895 {
9896   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9897   rtx_code code = GET_CODE (x);
9898   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9899 
9900   switch (XINT (x, 1))
9901     {
9902     case UNSPEC_UNALIGNED_LOAD:
9903       /* We can only do unaligned loads into the integer unit, and we can't
9904 	 use LDM or LDRD.  */
9905       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9906       if (speed_p)
9907 	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9908 		  + extra_cost->ldst.load_unaligned);
9909 
9910 #ifdef NOT_YET
9911       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9912 				 ADDR_SPACE_GENERIC, speed_p);
9913 #endif
9914       return true;
9915 
9916     case UNSPEC_UNALIGNED_STORE:
9917       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9918       if (speed_p)
9919 	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9920 		  + extra_cost->ldst.store_unaligned);
9921 
9922       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9923 #ifdef NOT_YET
9924       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9925 				 ADDR_SPACE_GENERIC, speed_p);
9926 #endif
9927       return true;
9928 
9929     case UNSPEC_VRINTZ:
9930     case UNSPEC_VRINTP:
9931     case UNSPEC_VRINTM:
9932     case UNSPEC_VRINTR:
9933     case UNSPEC_VRINTX:
9934     case UNSPEC_VRINTA:
9935       if (speed_p)
9936         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9937 
9938       return true;
9939     default:
9940       *cost = COSTS_N_INSNS (2);
9941       break;
9942     }
9943   return true;
9944 }
9945 
9946 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9947    call (one insn for -Os) and then one for processing the result.  */
9948 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9949 
9950 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)				\
9951 	do								\
9952 	  {								\
9953 	    shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);	\
9954 	    if (shift_op != NULL					\
9955 	        && arm_rtx_shift_left_p (XEXP (x, IDX)))		\
9956 	      {								\
9957 	        if (shift_reg)						\
9958 		  {							\
9959 		    if (speed_p)					\
9960 		      *cost += extra_cost->alu.arith_shift_reg;		\
9961 		    *cost += rtx_cost (shift_reg, GET_MODE (shift_reg),	\
9962 				       ASHIFT, 1, speed_p);		\
9963 		  }							\
9964 	        else if (speed_p)					\
9965 		  *cost += extra_cost->alu.arith_shift;			\
9966 									\
9967 		*cost += (rtx_cost (shift_op, GET_MODE (shift_op),	\
9968 				    ASHIFT, 0, speed_p)			\
9969 			  + rtx_cost (XEXP (x, 1 - IDX),		\
9970 				      GET_MODE (shift_op),		\
9971 			              OP, 1, speed_p));			\
9972 	        return true;						\
9973 	      }								\
9974 	  }								\
9975 	while (0)
9976 
9977 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
9978    considering the costs of the addressing mode and memory access
9979    separately.  */
9980 static bool
arm_mem_costs(rtx x,const struct cpu_cost_table * extra_cost,int * cost,bool speed_p)9981 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9982 	       int *cost, bool speed_p)
9983 {
9984   machine_mode mode = GET_MODE (x);
9985 
9986   *cost = COSTS_N_INSNS (1);
9987 
9988   if (flag_pic
9989       && GET_CODE (XEXP (x, 0)) == PLUS
9990       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9991     /* This will be split into two instructions.  Add the cost of the
9992        additional instruction here.  The cost of the memory access is computed
9993        below.  See arm.md:calculate_pic_address.  */
9994     *cost += COSTS_N_INSNS (1);
9995 
9996   /* Calculate cost of the addressing mode.  */
9997   if (speed_p)
9998     {
9999       arm_addr_mode_op op_type;
10000       switch (GET_CODE (XEXP (x, 0)))
10001 	{
10002 	default:
10003 	case REG:
10004 	  op_type = AMO_DEFAULT;
10005 	  break;
10006 	case MINUS:
10007 	  /* MINUS does not appear in RTL, but the architecture supports it,
10008 	     so handle this case defensively.  */
10009 	  /* fall through */
10010 	case PLUS:
10011 	  op_type = AMO_NO_WB;
10012 	  break;
10013 	case PRE_INC:
10014 	case PRE_DEC:
10015 	case POST_INC:
10016 	case POST_DEC:
10017 	case PRE_MODIFY:
10018 	case POST_MODIFY:
10019 	  op_type = AMO_WB;
10020 	  break;
10021 	}
10022 
10023       if (VECTOR_MODE_P (mode))
10024 	  *cost += current_tune->addr_mode_costs->vector[op_type];
10025       else if (FLOAT_MODE_P (mode))
10026 	  *cost += current_tune->addr_mode_costs->fp[op_type];
10027       else
10028 	  *cost += current_tune->addr_mode_costs->integer[op_type];
10029     }
10030 
10031   /* Calculate cost of memory access.  */
10032   if (speed_p)
10033     {
10034       if (FLOAT_MODE_P (mode))
10035 	{
10036 	  if (GET_MODE_SIZE (mode) == 8)
10037 	    *cost += extra_cost->ldst.loadd;
10038 	  else
10039 	    *cost += extra_cost->ldst.loadf;
10040 	}
10041       else if (VECTOR_MODE_P (mode))
10042 	*cost += extra_cost->ldst.loadv;
10043       else
10044 	{
10045 	  /* Integer modes */
10046 	  if (GET_MODE_SIZE (mode) == 8)
10047 	    *cost += extra_cost->ldst.ldrd;
10048 	  else
10049 	    *cost += extra_cost->ldst.load;
10050 	}
10051     }
10052 
10053   return true;
10054 }
10055 
10056 /* RTX costs.  Make an estimate of the cost of executing the operation
10057    X, which is contained within an operation with code OUTER_CODE.
10058    SPEED_P indicates whether the cost desired is the performance cost,
10059    or the size cost.  The estimate is stored in COST and the return
10060    value is TRUE if the cost calculation is final, or FALSE if the
10061    caller should recurse through the operands of X to add additional
10062    costs.
10063 
10064    We currently make no attempt to model the size savings of Thumb-2
10065    16-bit instructions.  At the normal points in compilation where
10066    this code is called we have no measure of whether the condition
10067    flags are live or not, and thus no realistic way to determine what
10068    the size will eventually be.  */
10069 static bool
arm_rtx_costs_internal(rtx x,enum rtx_code code,enum rtx_code outer_code,const struct cpu_cost_table * extra_cost,int * cost,bool speed_p)10070 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10071 		   const struct cpu_cost_table *extra_cost,
10072 		   int *cost, bool speed_p)
10073 {
10074   machine_mode mode = GET_MODE (x);
10075 
10076   *cost = COSTS_N_INSNS (1);
10077 
10078   if (TARGET_THUMB1)
10079     {
10080       if (speed_p)
10081 	*cost = thumb1_rtx_costs (x, code, outer_code);
10082       else
10083 	*cost = thumb1_size_rtx_costs (x, code, outer_code);
10084       return true;
10085     }
10086 
10087   switch (code)
10088     {
10089     case SET:
10090       *cost = 0;
10091       /* SET RTXs don't have a mode so we get it from the destination.  */
10092       mode = GET_MODE (SET_DEST (x));
10093 
10094       if (REG_P (SET_SRC (x))
10095 	  && REG_P (SET_DEST (x)))
10096 	{
10097 	  /* Assume that most copies can be done with a single insn,
10098 	     unless we don't have HW FP, in which case everything
10099 	     larger than word mode will require two insns.  */
10100 	  *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10101 				   && GET_MODE_SIZE (mode) > 4)
10102 				  || mode == DImode)
10103 				 ? 2 : 1);
10104 	  /* Conditional register moves can be encoded
10105 	     in 16 bits in Thumb mode.  */
10106 	  if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10107 	    *cost >>= 1;
10108 
10109 	  return true;
10110 	}
10111 
10112       if (CONST_INT_P (SET_SRC (x)))
10113 	{
10114 	  /* Handle CONST_INT here, since the value doesn't have a mode
10115 	     and we would otherwise be unable to work out the true cost.  */
10116 	  *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10117 			    0, speed_p);
10118 	  outer_code = SET;
10119 	  /* Slightly lower the cost of setting a core reg to a constant.
10120 	     This helps break up chains and allows for better scheduling.  */
10121 	  if (REG_P (SET_DEST (x))
10122 	      && REGNO (SET_DEST (x)) <= LR_REGNUM)
10123 	    *cost -= 1;
10124 	  x = SET_SRC (x);
10125 	  /* Immediate moves with an immediate in the range [0, 255] can be
10126 	     encoded in 16 bits in Thumb mode.  */
10127 	  if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10128 	      && INTVAL (x) >= 0 && INTVAL (x) <=255)
10129 	    *cost >>= 1;
10130 	  goto const_int_cost;
10131 	}
10132 
10133       return false;
10134 
10135     case MEM:
10136       return arm_mem_costs (x, extra_cost, cost, speed_p);
10137 
10138     case PARALLEL:
10139     {
10140    /* Calculations of LDM costs are complex.  We assume an initial cost
10141    (ldm_1st) which will load the number of registers mentioned in
10142    ldm_regs_per_insn_1st registers; then each additional
10143    ldm_regs_per_insn_subsequent registers cost one more insn.  The
10144    formula for N regs is thus:
10145 
10146    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10147 			     + ldm_regs_per_insn_subsequent - 1)
10148 			    / ldm_regs_per_insn_subsequent).
10149 
10150    Additional costs may also be added for addressing.  A similar
10151    formula is used for STM.  */
10152 
10153       bool is_ldm = load_multiple_operation (x, SImode);
10154       bool is_stm = store_multiple_operation (x, SImode);
10155 
10156       if (is_ldm || is_stm)
10157         {
10158 	  if (speed_p)
10159 	    {
10160 	      HOST_WIDE_INT nregs = XVECLEN (x, 0);
10161 	      HOST_WIDE_INT regs_per_insn_1st = is_ldm
10162 	                              ? extra_cost->ldst.ldm_regs_per_insn_1st
10163 	                              : extra_cost->ldst.stm_regs_per_insn_1st;
10164 	      HOST_WIDE_INT regs_per_insn_sub = is_ldm
10165 	                       ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10166 	                       : extra_cost->ldst.stm_regs_per_insn_subsequent;
10167 
10168 	      *cost += regs_per_insn_1st
10169 	               + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10170 					    + regs_per_insn_sub - 1)
10171 					  / regs_per_insn_sub);
10172 	      return true;
10173 	    }
10174 
10175         }
10176       return false;
10177     }
10178     case DIV:
10179     case UDIV:
10180       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10181 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10182 	*cost += COSTS_N_INSNS (speed_p
10183 			       ? extra_cost->fp[mode != SFmode].div : 0);
10184       else if (mode == SImode && TARGET_IDIV)
10185 	*cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10186       else
10187 	*cost = LIBCALL_COST (2);
10188 
10189       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10190 	 possible udiv is prefered.  */
10191       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10192       return false;	/* All arguments must be in registers.  */
10193 
10194     case MOD:
10195       /* MOD by a power of 2 can be expanded as:
10196 	 rsbs    r1, r0, #0
10197 	 and     r0, r0, #(n - 1)
10198 	 and     r1, r1, #(n - 1)
10199 	 rsbpl   r0, r1, #0.  */
10200       if (CONST_INT_P (XEXP (x, 1))
10201 	  && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10202 	  && mode == SImode)
10203 	{
10204 	  *cost += COSTS_N_INSNS (3);
10205 
10206 	  if (speed_p)
10207 	    *cost += 2 * extra_cost->alu.logical
10208 		     + extra_cost->alu.arith;
10209 	  return true;
10210 	}
10211 
10212     /* Fall-through.  */
10213     case UMOD:
10214       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10215 	 possible udiv is prefered.  */
10216       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10217       return false;	/* All arguments must be in registers.  */
10218 
10219     case ROTATE:
10220       if (mode == SImode && REG_P (XEXP (x, 1)))
10221 	{
10222 	  *cost += (COSTS_N_INSNS (1)
10223 		   + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10224 	  if (speed_p)
10225 	    *cost += extra_cost->alu.shift_reg;
10226 	  return true;
10227 	}
10228       /* Fall through */
10229     case ROTATERT:
10230     case ASHIFT:
10231     case LSHIFTRT:
10232     case ASHIFTRT:
10233       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10234 	{
10235 	  *cost += (COSTS_N_INSNS (2)
10236 		   + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10237 	  if (speed_p)
10238 	    *cost += 2 * extra_cost->alu.shift;
10239 	  /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
10240 	  if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10241 	    *cost += 1;
10242 	  return true;
10243 	}
10244       else if (mode == SImode)
10245 	{
10246 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10247 	  /* Slightly disparage register shifts at -Os, but not by much.  */
10248 	  if (!CONST_INT_P (XEXP (x, 1)))
10249 	    *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10250 		      + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10251 	  return true;
10252 	}
10253       else if (GET_MODE_CLASS (mode) == MODE_INT
10254 	       && GET_MODE_SIZE (mode) < 4)
10255 	{
10256 	  if (code == ASHIFT)
10257 	    {
10258 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10259 	      /* Slightly disparage register shifts at -Os, but not by
10260 	         much.  */
10261 	      if (!CONST_INT_P (XEXP (x, 1)))
10262 		*cost += (speed_p ? extra_cost->alu.shift_reg : 1
10263 			  + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10264 	    }
10265 	  else if (code == LSHIFTRT || code == ASHIFTRT)
10266 	    {
10267 	      if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10268 		{
10269 		  /* Can use SBFX/UBFX.  */
10270 		  if (speed_p)
10271 		    *cost += extra_cost->alu.bfx;
10272 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10273 		}
10274 	      else
10275 		{
10276 		  *cost += COSTS_N_INSNS (1);
10277 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10278 		  if (speed_p)
10279 		    {
10280 		      if (CONST_INT_P (XEXP (x, 1)))
10281 			*cost += 2 * extra_cost->alu.shift;
10282 		      else
10283 			*cost += (extra_cost->alu.shift
10284 				  + extra_cost->alu.shift_reg);
10285 		    }
10286 		  else
10287 		    /* Slightly disparage register shifts.  */
10288 		    *cost += !CONST_INT_P (XEXP (x, 1));
10289 		}
10290 	    }
10291 	  else /* Rotates.  */
10292 	    {
10293 	      *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10294 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10295 	      if (speed_p)
10296 		{
10297 		  if (CONST_INT_P (XEXP (x, 1)))
10298 		    *cost += (2 * extra_cost->alu.shift
10299 			      + extra_cost->alu.log_shift);
10300 		  else
10301 		    *cost += (extra_cost->alu.shift
10302 			      + extra_cost->alu.shift_reg
10303 			      + extra_cost->alu.log_shift_reg);
10304 		}
10305 	    }
10306 	  return true;
10307 	}
10308 
10309       *cost = LIBCALL_COST (2);
10310       return false;
10311 
10312     case BSWAP:
10313       if (arm_arch6)
10314         {
10315           if (mode == SImode)
10316             {
10317               if (speed_p)
10318                 *cost += extra_cost->alu.rev;
10319 
10320               return false;
10321             }
10322         }
10323       else
10324         {
10325         /* No rev instruction available.  Look at arm_legacy_rev
10326            and thumb_legacy_rev for the form of RTL used then.  */
10327           if (TARGET_THUMB)
10328             {
10329               *cost += COSTS_N_INSNS (9);
10330 
10331               if (speed_p)
10332                 {
10333                   *cost += 6 * extra_cost->alu.shift;
10334                   *cost += 3 * extra_cost->alu.logical;
10335                 }
10336             }
10337           else
10338             {
10339               *cost += COSTS_N_INSNS (4);
10340 
10341               if (speed_p)
10342                 {
10343                   *cost += 2 * extra_cost->alu.shift;
10344                   *cost += extra_cost->alu.arith_shift;
10345                   *cost += 2 * extra_cost->alu.logical;
10346                 }
10347             }
10348           return true;
10349         }
10350       return false;
10351 
10352     case MINUS:
10353       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10354 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10355 	{
10356 	  if (GET_CODE (XEXP (x, 0)) == MULT
10357 	      || GET_CODE (XEXP (x, 1)) == MULT)
10358 	    {
10359 	      rtx mul_op0, mul_op1, sub_op;
10360 
10361 	      if (speed_p)
10362 		*cost += extra_cost->fp[mode != SFmode].mult_addsub;
10363 
10364 	      if (GET_CODE (XEXP (x, 0)) == MULT)
10365 		{
10366 		  mul_op0 = XEXP (XEXP (x, 0), 0);
10367 		  mul_op1 = XEXP (XEXP (x, 0), 1);
10368 		  sub_op = XEXP (x, 1);
10369 		}
10370 	      else
10371 		{
10372 		  mul_op0 = XEXP (XEXP (x, 1), 0);
10373 		  mul_op1 = XEXP (XEXP (x, 1), 1);
10374 		  sub_op = XEXP (x, 0);
10375 		}
10376 
10377 	      /* The first operand of the multiply may be optionally
10378 		 negated.  */
10379 	      if (GET_CODE (mul_op0) == NEG)
10380 		mul_op0 = XEXP (mul_op0, 0);
10381 
10382 	      *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10383 			+ rtx_cost (mul_op1, mode, code, 0, speed_p)
10384 			+ rtx_cost (sub_op, mode, code, 0, speed_p));
10385 
10386 	      return true;
10387 	    }
10388 
10389 	  if (speed_p)
10390 	    *cost += extra_cost->fp[mode != SFmode].addsub;
10391 	  return false;
10392 	}
10393 
10394       if (mode == SImode)
10395 	{
10396 	  rtx shift_by_reg = NULL;
10397 	  rtx shift_op;
10398 	  rtx non_shift_op;
10399 	  rtx op0 = XEXP (x, 0);
10400 	  rtx op1 = XEXP (x, 1);
10401 
10402 	  /* Factor out any borrow operation.  There's more than one way
10403 	     of expressing this; try to recognize them all.  */
10404 	  if (GET_CODE (op0) == MINUS)
10405 	    {
10406 	      if (arm_borrow_operation (op1, SImode))
10407 		{
10408 		  op1 = XEXP (op0, 1);
10409 		  op0 = XEXP (op0, 0);
10410 		}
10411 	      else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10412 		op0 = XEXP (op0, 0);
10413 	    }
10414 	  else if (GET_CODE (op1) == PLUS
10415 		   && arm_borrow_operation (XEXP (op1, 0), SImode))
10416 	    op1 = XEXP (op1, 0);
10417 	  else if (GET_CODE (op0) == NEG
10418 		   && arm_borrow_operation (op1, SImode))
10419 	    {
10420 	      /* Negate with carry-in.  For Thumb2 this is done with
10421 		 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10422 		 RSC instruction that exists in Arm mode.  */
10423 	      if (speed_p)
10424 		*cost += (TARGET_THUMB2
10425 			  ? extra_cost->alu.arith_shift
10426 			  : extra_cost->alu.arith);
10427 	      *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10428 	      return true;
10429 	    }
10430 	  /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10431 	     Note we do mean ~borrow here.  */
10432 	  else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10433 	    {
10434 	      *cost += rtx_cost (op1, mode, code, 1, speed_p);
10435 	      return true;
10436 	    }
10437 
10438 	  shift_op = shifter_op_p (op0, &shift_by_reg);
10439 	  if (shift_op == NULL)
10440 	    {
10441 	      shift_op = shifter_op_p (op1, &shift_by_reg);
10442 	      non_shift_op = op0;
10443 	    }
10444 	  else
10445 	    non_shift_op = op1;
10446 
10447 	  if (shift_op != NULL)
10448 	    {
10449 	      if (shift_by_reg != NULL)
10450 		{
10451 		  if (speed_p)
10452 		    *cost += extra_cost->alu.arith_shift_reg;
10453 		  *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10454 		}
10455 	      else if (speed_p)
10456 		*cost += extra_cost->alu.arith_shift;
10457 
10458 	      *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10459 	      *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10460 	      return true;
10461 	    }
10462 
10463 	  if (arm_arch_thumb2
10464 	      && GET_CODE (XEXP (x, 1)) == MULT)
10465 	    {
10466 	      /* MLS.  */
10467 	      if (speed_p)
10468 		*cost += extra_cost->mult[0].add;
10469 	      *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10470 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10471 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10472 	      return true;
10473 	    }
10474 
10475 	  if (CONST_INT_P (op0))
10476 	    {
10477 	      int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10478 					    INTVAL (op0), NULL_RTX,
10479 					    NULL_RTX, 1, 0);
10480 	      *cost = COSTS_N_INSNS (insns);
10481 	      if (speed_p)
10482 		*cost += insns * extra_cost->alu.arith;
10483 	      *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10484 	      return true;
10485 	    }
10486 	  else if (speed_p)
10487 	    *cost += extra_cost->alu.arith;
10488 
10489 	  /* Don't recurse as we don't want to cost any borrow that
10490 	     we've stripped.  */
10491 	  *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10492 	  *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10493 	  return true;
10494 	}
10495 
10496       if (GET_MODE_CLASS (mode) == MODE_INT
10497 	  && GET_MODE_SIZE (mode) < 4)
10498 	{
10499 	  rtx shift_op, shift_reg;
10500 	  shift_reg = NULL;
10501 
10502 	  /* We check both sides of the MINUS for shifter operands since,
10503 	     unlike PLUS, it's not commutative.  */
10504 
10505 	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10506 	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10507 
10508 	  /* Slightly disparage, as we might need to widen the result.  */
10509 	  *cost += 1;
10510 	  if (speed_p)
10511 	    *cost += extra_cost->alu.arith;
10512 
10513 	  if (CONST_INT_P (XEXP (x, 0)))
10514 	    {
10515 	      *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10516 	      return true;
10517 	    }
10518 
10519 	  return false;
10520 	}
10521 
10522       if (mode == DImode)
10523 	{
10524 	  *cost += COSTS_N_INSNS (1);
10525 
10526 	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10527 	    {
10528 	      rtx op1 = XEXP (x, 1);
10529 
10530 	      if (speed_p)
10531 		*cost += 2 * extra_cost->alu.arith;
10532 
10533 	      if (GET_CODE (op1) == ZERO_EXTEND)
10534 		*cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10535 				   0, speed_p);
10536 	      else
10537 		*cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10538 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10539 				 0, speed_p);
10540 	      return true;
10541 	    }
10542 	  else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10543 	    {
10544 	      if (speed_p)
10545 		*cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10546 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10547 				  0, speed_p)
10548 			+ rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10549 	      return true;
10550 	    }
10551 	  else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10552 		   || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10553 	    {
10554 	      if (speed_p)
10555 		*cost += (extra_cost->alu.arith
10556 			  + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10557 			     ? extra_cost->alu.arith
10558 			     : extra_cost->alu.arith_shift));
10559 	      *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10560 			+ rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10561 				    GET_CODE (XEXP (x, 1)), 0, speed_p));
10562 	      return true;
10563 	    }
10564 
10565 	  if (speed_p)
10566 	    *cost += 2 * extra_cost->alu.arith;
10567 	  return false;
10568 	}
10569 
10570       /* Vector mode?  */
10571 
10572       *cost = LIBCALL_COST (2);
10573       return false;
10574 
10575     case PLUS:
10576       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10577 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10578 	{
10579 	  if (GET_CODE (XEXP (x, 0)) == MULT)
10580 	    {
10581 	      rtx mul_op0, mul_op1, add_op;
10582 
10583 	      if (speed_p)
10584 		*cost += extra_cost->fp[mode != SFmode].mult_addsub;
10585 
10586 	      mul_op0 = XEXP (XEXP (x, 0), 0);
10587 	      mul_op1 = XEXP (XEXP (x, 0), 1);
10588 	      add_op = XEXP (x, 1);
10589 
10590 	      *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10591 			+ rtx_cost (mul_op1, mode, code, 0, speed_p)
10592 			+ rtx_cost (add_op, mode, code, 0, speed_p));
10593 
10594 	      return true;
10595 	    }
10596 
10597 	  if (speed_p)
10598 	    *cost += extra_cost->fp[mode != SFmode].addsub;
10599 	  return false;
10600 	}
10601       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10602 	{
10603 	  *cost = LIBCALL_COST (2);
10604 	  return false;
10605 	}
10606 
10607 	/* Narrow modes can be synthesized in SImode, but the range
10608 	   of useful sub-operations is limited.  Check for shift operations
10609 	   on one of the operands.  Only left shifts can be used in the
10610 	   narrow modes.  */
10611       if (GET_MODE_CLASS (mode) == MODE_INT
10612 	  && GET_MODE_SIZE (mode) < 4)
10613 	{
10614 	  rtx shift_op, shift_reg;
10615 	  shift_reg = NULL;
10616 
10617 	  HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10618 
10619 	  if (CONST_INT_P (XEXP (x, 1)))
10620 	    {
10621 	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10622 					    INTVAL (XEXP (x, 1)), NULL_RTX,
10623 					    NULL_RTX, 1, 0);
10624 	      *cost = COSTS_N_INSNS (insns);
10625 	      if (speed_p)
10626 		*cost += insns * extra_cost->alu.arith;
10627 	      /* Slightly penalize a narrow operation as the result may
10628 		 need widening.  */
10629 	      *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10630 	      return true;
10631 	    }
10632 
10633 	  /* Slightly penalize a narrow operation as the result may
10634 	     need widening.  */
10635 	  *cost += 1;
10636 	  if (speed_p)
10637 	    *cost += extra_cost->alu.arith;
10638 
10639 	  return false;
10640 	}
10641 
10642       if (mode == SImode)
10643 	{
10644 	  rtx shift_op, shift_reg;
10645 
10646 	  if (TARGET_INT_SIMD
10647 	      && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10648 		  || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10649 	    {
10650 	      /* UXTA[BH] or SXTA[BH].  */
10651 	      if (speed_p)
10652 		*cost += extra_cost->alu.extend_arith;
10653 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10654 				  0, speed_p)
10655 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10656 	      return true;
10657 	    }
10658 
10659 	  rtx op0 = XEXP (x, 0);
10660 	  rtx op1 = XEXP (x, 1);
10661 
10662 	  /* Handle a side effect of adding in the carry to an addition.  */
10663 	  if (GET_CODE (op0) == PLUS
10664 	      && arm_carry_operation (op1, mode))
10665 	    {
10666 	      op1 = XEXP (op0, 1);
10667 	      op0 = XEXP (op0, 0);
10668 	    }
10669 	  else if (GET_CODE (op1) == PLUS
10670 		   && arm_carry_operation (op0, mode))
10671 	    {
10672 	      op0 = XEXP (op1, 0);
10673 	      op1 = XEXP (op1, 1);
10674 	    }
10675 	  else if (GET_CODE (op0) == PLUS)
10676 	    {
10677 	      op0 = strip_carry_operation (op0);
10678 	      if (swap_commutative_operands_p (op0, op1))
10679 		std::swap (op0, op1);
10680 	    }
10681 
10682 	  if (arm_carry_operation (op0, mode))
10683 	    {
10684 	      /* Adding the carry to a register is a canonicalization of
10685 		 adding 0 to the register plus the carry.  */
10686 	      if (speed_p)
10687 		*cost += extra_cost->alu.arith;
10688 	      *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10689 	      return true;
10690 	    }
10691 
10692 	  shift_reg = NULL;
10693 	  shift_op = shifter_op_p (op0, &shift_reg);
10694 	  if (shift_op != NULL)
10695 	    {
10696 	      if (shift_reg)
10697 		{
10698 		  if (speed_p)
10699 		    *cost += extra_cost->alu.arith_shift_reg;
10700 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10701 		}
10702 	      else if (speed_p)
10703 		*cost += extra_cost->alu.arith_shift;
10704 
10705 	      *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10706 			+ rtx_cost (op1, mode, PLUS, 1, speed_p));
10707 	      return true;
10708 	    }
10709 
10710 	  if (GET_CODE (op0) == MULT)
10711 	    {
10712 	      rtx mul_op = op0;
10713 
10714 	      if (TARGET_DSP_MULTIPLY
10715 		  && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10716 		       && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10717 			   || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10718 			       && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10719 			       && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10720 		      || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10721 			  && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10722 			  && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10723 			  && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10724 			      || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10725 				  && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10726 				  && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10727 				      == 16))))))
10728 		{
10729 		  /* SMLA[BT][BT].  */
10730 		  if (speed_p)
10731 		    *cost += extra_cost->mult[0].extend_add;
10732 		  *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10733 				      SIGN_EXTEND, 0, speed_p)
10734 			    + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10735 					SIGN_EXTEND, 0, speed_p)
10736 			    + rtx_cost (op1, mode, PLUS, 1, speed_p));
10737 		  return true;
10738 		}
10739 
10740 	      if (speed_p)
10741 		*cost += extra_cost->mult[0].add;
10742 	      *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10743 			+ rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10744 			+ rtx_cost (op1, mode, PLUS, 1, speed_p));
10745 	      return true;
10746 	    }
10747 
10748 	  if (CONST_INT_P (op1))
10749 	    {
10750 	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10751 					    INTVAL (op1), NULL_RTX,
10752 					    NULL_RTX, 1, 0);
10753 	      *cost = COSTS_N_INSNS (insns);
10754 	      if (speed_p)
10755 		*cost += insns * extra_cost->alu.arith;
10756 	      *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10757 	      return true;
10758 	    }
10759 
10760 	  if (speed_p)
10761 	    *cost += extra_cost->alu.arith;
10762 
10763 	  /* Don't recurse here because we want to test the operands
10764 	     without any carry operation.  */
10765 	  *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10766 	  *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10767 	  return true;
10768 	}
10769 
10770       if (mode == DImode)
10771 	{
10772 	  if (GET_CODE (XEXP (x, 0)) == MULT
10773 	      && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10774 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10775 		  || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10776 		      && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10777 	    {
10778 	      if (speed_p)
10779 		*cost += extra_cost->mult[1].extend_add;
10780 	      *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10781 				  ZERO_EXTEND, 0, speed_p)
10782 			+ rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10783 				    ZERO_EXTEND, 0, speed_p)
10784 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10785 	      return true;
10786 	    }
10787 
10788 	  *cost += COSTS_N_INSNS (1);
10789 
10790 	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10791 	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10792 	    {
10793 	      if (speed_p)
10794 		*cost += (extra_cost->alu.arith
10795 			  + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10796 			     ? extra_cost->alu.arith
10797 			     : extra_cost->alu.arith_shift));
10798 
10799 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10800 				  0, speed_p)
10801 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10802 	      return true;
10803 	    }
10804 
10805 	  if (speed_p)
10806 	    *cost += 2 * extra_cost->alu.arith;
10807 	  return false;
10808 	}
10809 
10810       /* Vector mode?  */
10811       *cost = LIBCALL_COST (2);
10812       return false;
10813     case IOR:
10814       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10815         {
10816           if (speed_p)
10817             *cost += extra_cost->alu.rev;
10818 
10819           return true;
10820         }
10821     /* Fall through.  */
10822     case AND: case XOR:
10823       if (mode == SImode)
10824 	{
10825 	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10826 	  rtx op0 = XEXP (x, 0);
10827 	  rtx shift_op, shift_reg;
10828 
10829 	  if (subcode == NOT
10830 	      && (code == AND
10831 		  || (code == IOR && TARGET_THUMB2)))
10832 	    op0 = XEXP (op0, 0);
10833 
10834 	  shift_reg = NULL;
10835 	  shift_op = shifter_op_p (op0, &shift_reg);
10836 	  if (shift_op != NULL)
10837 	    {
10838 	      if (shift_reg)
10839 		{
10840 		  if (speed_p)
10841 		    *cost += extra_cost->alu.log_shift_reg;
10842 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10843 		}
10844 	      else if (speed_p)
10845 		*cost += extra_cost->alu.log_shift;
10846 
10847 	      *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10848 			+ rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10849 	      return true;
10850 	    }
10851 
10852 	  if (CONST_INT_P (XEXP (x, 1)))
10853 	    {
10854 	      int insns = arm_gen_constant (code, SImode, NULL_RTX,
10855 					    INTVAL (XEXP (x, 1)), NULL_RTX,
10856 					    NULL_RTX, 1, 0);
10857 
10858 	      *cost = COSTS_N_INSNS (insns);
10859 	      if (speed_p)
10860 		*cost += insns * extra_cost->alu.logical;
10861 	      *cost += rtx_cost (op0, mode, code, 0, speed_p);
10862 	      return true;
10863 	    }
10864 
10865 	  if (speed_p)
10866 	    *cost += extra_cost->alu.logical;
10867 	  *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10868 		    + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10869 	  return true;
10870 	}
10871 
10872       if (mode == DImode)
10873 	{
10874 	  rtx op0 = XEXP (x, 0);
10875 	  enum rtx_code subcode = GET_CODE (op0);
10876 
10877 	  *cost += COSTS_N_INSNS (1);
10878 
10879 	  if (subcode == NOT
10880 	      && (code == AND
10881 		  || (code == IOR && TARGET_THUMB2)))
10882 	    op0 = XEXP (op0, 0);
10883 
10884 	  if (GET_CODE (op0) == ZERO_EXTEND)
10885 	    {
10886 	      if (speed_p)
10887 		*cost += 2 * extra_cost->alu.logical;
10888 
10889 	      *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10890 				  0, speed_p)
10891 			+ rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10892 	      return true;
10893 	    }
10894 	  else if (GET_CODE (op0) == SIGN_EXTEND)
10895 	    {
10896 	      if (speed_p)
10897 		*cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10898 
10899 	      *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10900 				  0, speed_p)
10901 			+ rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10902 	      return true;
10903 	    }
10904 
10905 	  if (speed_p)
10906 	    *cost += 2 * extra_cost->alu.logical;
10907 
10908 	  return true;
10909 	}
10910       /* Vector mode?  */
10911 
10912       *cost = LIBCALL_COST (2);
10913       return false;
10914 
10915     case MULT:
10916       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10917 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10918 	{
10919 	  rtx op0 = XEXP (x, 0);
10920 
10921 	  if (GET_CODE (op0) == NEG && !flag_rounding_math)
10922 	    op0 = XEXP (op0, 0);
10923 
10924 	  if (speed_p)
10925 	    *cost += extra_cost->fp[mode != SFmode].mult;
10926 
10927 	  *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10928 		    + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10929 	  return true;
10930 	}
10931       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10932 	{
10933 	  *cost = LIBCALL_COST (2);
10934 	  return false;
10935 	}
10936 
10937       if (mode == SImode)
10938 	{
10939 	  if (TARGET_DSP_MULTIPLY
10940 	      && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10941 		   && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10942 		       || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10943 			   && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10944 			   && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10945 		  || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10946 		      && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10947 		      && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10948 		      && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10949 			  || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10950 			      && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10951 			      && (INTVAL (XEXP (XEXP (x, 1), 1))
10952 				  == 16))))))
10953 	    {
10954 	      /* SMUL[TB][TB].  */
10955 	      if (speed_p)
10956 		*cost += extra_cost->mult[0].extend;
10957 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10958 				 SIGN_EXTEND, 0, speed_p);
10959 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10960 				 SIGN_EXTEND, 1, speed_p);
10961 	      return true;
10962 	    }
10963 	  if (speed_p)
10964 	    *cost += extra_cost->mult[0].simple;
10965 	  return false;
10966 	}
10967 
10968       if (mode == DImode)
10969 	{
10970 	  if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10971 		&& GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10972 	       || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10973 		   && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10974 	    {
10975 	      if (speed_p)
10976 		*cost += extra_cost->mult[1].extend;
10977 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10978 				  ZERO_EXTEND, 0, speed_p)
10979 			+ rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10980 				    ZERO_EXTEND, 0, speed_p));
10981 	      return true;
10982 	    }
10983 
10984 	  *cost = LIBCALL_COST (2);
10985 	  return false;
10986 	}
10987 
10988       /* Vector mode?  */
10989       *cost = LIBCALL_COST (2);
10990       return false;
10991 
10992     case NEG:
10993       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10994 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10995 	{
10996 	  if (GET_CODE (XEXP (x, 0)) == MULT)
10997 	    {
10998 	      /* VNMUL.  */
10999 	      *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11000 	      return true;
11001 	    }
11002 
11003 	  if (speed_p)
11004 	    *cost += extra_cost->fp[mode != SFmode].neg;
11005 
11006 	  return false;
11007 	}
11008       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11009 	{
11010 	  *cost = LIBCALL_COST (1);
11011 	  return false;
11012 	}
11013 
11014       if (mode == SImode)
11015 	{
11016 	  if (GET_CODE (XEXP (x, 0)) == ABS)
11017 	    {
11018 	      *cost += COSTS_N_INSNS (1);
11019 	      /* Assume the non-flag-changing variant.  */
11020 	      if (speed_p)
11021 		*cost += (extra_cost->alu.log_shift
11022 			  + extra_cost->alu.arith_shift);
11023 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11024 	      return true;
11025 	    }
11026 
11027 	  if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11028 	      || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11029 	    {
11030 	      *cost += COSTS_N_INSNS (1);
11031 	      /* No extra cost for MOV imm and MVN imm.  */
11032 	      /* If the comparison op is using the flags, there's no further
11033 		 cost, otherwise we need to add the cost of the comparison.  */
11034 	      if (!(REG_P (XEXP (XEXP (x, 0), 0))
11035 		    && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11036 		    && XEXP (XEXP (x, 0), 1) == const0_rtx))
11037 		{
11038 		  mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11039 		  *cost += (COSTS_N_INSNS (1)
11040 			    + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11041 					0, speed_p)
11042 			    + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11043 					1, speed_p));
11044 		  if (speed_p)
11045 		    *cost += extra_cost->alu.arith;
11046 		}
11047 	      return true;
11048 	    }
11049 
11050 	  if (speed_p)
11051 	    *cost += extra_cost->alu.arith;
11052 	  return false;
11053 	}
11054 
11055       if (GET_MODE_CLASS (mode) == MODE_INT
11056 	  && GET_MODE_SIZE (mode) < 4)
11057 	{
11058 	  /* Slightly disparage, as we might need an extend operation.  */
11059 	  *cost += 1;
11060 	  if (speed_p)
11061 	    *cost += extra_cost->alu.arith;
11062 	  return false;
11063 	}
11064 
11065       if (mode == DImode)
11066 	{
11067 	  *cost += COSTS_N_INSNS (1);
11068 	  if (speed_p)
11069 	    *cost += 2 * extra_cost->alu.arith;
11070 	  return false;
11071 	}
11072 
11073       /* Vector mode?  */
11074       *cost = LIBCALL_COST (1);
11075       return false;
11076 
11077     case NOT:
11078       if (mode == SImode)
11079 	{
11080 	  rtx shift_op;
11081 	  rtx shift_reg = NULL;
11082 
11083 	  shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11084 
11085 	  if (shift_op)
11086 	    {
11087 	      if (shift_reg != NULL)
11088 		{
11089 		  if (speed_p)
11090 		    *cost += extra_cost->alu.log_shift_reg;
11091 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11092 		}
11093 	      else if (speed_p)
11094 		*cost += extra_cost->alu.log_shift;
11095 	      *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11096 	      return true;
11097 	    }
11098 
11099 	  if (speed_p)
11100 	    *cost += extra_cost->alu.logical;
11101 	  return false;
11102 	}
11103       if (mode == DImode)
11104 	{
11105 	  *cost += COSTS_N_INSNS (1);
11106 	  return false;
11107 	}
11108 
11109       /* Vector mode?  */
11110 
11111       *cost += LIBCALL_COST (1);
11112       return false;
11113 
11114     case IF_THEN_ELSE:
11115       {
11116         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11117 	  {
11118 	    *cost += COSTS_N_INSNS (3);
11119 	    return true;
11120 	  }
11121 	int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11122 	int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11123 
11124 	*cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11125 	/* Assume that if one arm of the if_then_else is a register,
11126 	   that it will be tied with the result and eliminate the
11127 	   conditional insn.  */
11128 	if (REG_P (XEXP (x, 1)))
11129 	  *cost += op2cost;
11130 	else if (REG_P (XEXP (x, 2)))
11131 	  *cost += op1cost;
11132 	else
11133 	  {
11134 	    if (speed_p)
11135 	      {
11136 		if (extra_cost->alu.non_exec_costs_exec)
11137 		  *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11138 		else
11139 		  *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11140 	      }
11141 	    else
11142 	      *cost += op1cost + op2cost;
11143 	  }
11144       }
11145       return true;
11146 
11147     case COMPARE:
11148       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11149 	*cost = 0;
11150       else
11151 	{
11152 	  machine_mode op0mode;
11153 	  /* We'll mostly assume that the cost of a compare is the cost of the
11154 	     LHS.  However, there are some notable exceptions.  */
11155 
11156 	  /* Floating point compares are never done as side-effects.  */
11157 	  op0mode = GET_MODE (XEXP (x, 0));
11158 	  if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11159 	      && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11160 	    {
11161 	      if (speed_p)
11162 		*cost += extra_cost->fp[op0mode != SFmode].compare;
11163 
11164 	      if (XEXP (x, 1) == CONST0_RTX (op0mode))
11165 		{
11166 		  *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11167 		  return true;
11168 		}
11169 
11170 	      return false;
11171 	    }
11172 	  else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11173 	    {
11174 	      *cost = LIBCALL_COST (2);
11175 	      return false;
11176 	    }
11177 
11178 	  /* DImode compares normally take two insns.  */
11179 	  if (op0mode == DImode)
11180 	    {
11181 	      *cost += COSTS_N_INSNS (1);
11182 	      if (speed_p)
11183 		*cost += 2 * extra_cost->alu.arith;
11184 	      return false;
11185 	    }
11186 
11187 	  if (op0mode == SImode)
11188 	    {
11189 	      rtx shift_op;
11190 	      rtx shift_reg;
11191 
11192 	      if (XEXP (x, 1) == const0_rtx
11193 		  && !(REG_P (XEXP (x, 0))
11194 		       || (GET_CODE (XEXP (x, 0)) == SUBREG
11195 			   && REG_P (SUBREG_REG (XEXP (x, 0))))))
11196 		{
11197 		  *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11198 
11199 		  /* Multiply operations that set the flags are often
11200 		     significantly more expensive.  */
11201 		  if (speed_p
11202 		      && GET_CODE (XEXP (x, 0)) == MULT
11203 		      && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11204 		    *cost += extra_cost->mult[0].flag_setting;
11205 
11206 		  if (speed_p
11207 		      && GET_CODE (XEXP (x, 0)) == PLUS
11208 		      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11209 		      && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11210 							    0), 1), mode))
11211 		    *cost += extra_cost->mult[0].flag_setting;
11212 		  return true;
11213 		}
11214 
11215 	      shift_reg = NULL;
11216 	      shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11217 	      if (shift_op != NULL)
11218 		{
11219 		  if (shift_reg != NULL)
11220 		    {
11221 		      *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11222 					 1, speed_p);
11223 		      if (speed_p)
11224 			*cost += extra_cost->alu.arith_shift_reg;
11225 		    }
11226 		  else if (speed_p)
11227 		    *cost += extra_cost->alu.arith_shift;
11228 		  *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11229 		  *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11230 		  return true;
11231 		}
11232 
11233 	      if (speed_p)
11234 		*cost += extra_cost->alu.arith;
11235 	      if (CONST_INT_P (XEXP (x, 1))
11236 		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11237 		{
11238 		  *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11239 		  return true;
11240 		}
11241 	      return false;
11242 	    }
11243 
11244 	  /* Vector mode?  */
11245 
11246 	  *cost = LIBCALL_COST (2);
11247 	  return false;
11248 	}
11249       return true;
11250 
11251     case EQ:
11252     case NE:
11253     case LT:
11254     case LE:
11255     case GT:
11256     case GE:
11257     case LTU:
11258     case LEU:
11259     case GEU:
11260     case GTU:
11261     case ORDERED:
11262     case UNORDERED:
11263     case UNEQ:
11264     case UNLE:
11265     case UNLT:
11266     case UNGE:
11267     case UNGT:
11268     case LTGT:
11269       if (outer_code == SET)
11270 	{
11271 	  /* Is it a store-flag operation?  */
11272 	  if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11273 	      && XEXP (x, 1) == const0_rtx)
11274 	    {
11275 	      /* Thumb also needs an IT insn.  */
11276 	      *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11277 	      return true;
11278 	    }
11279 	  if (XEXP (x, 1) == const0_rtx)
11280 	    {
11281 	      switch (code)
11282 		{
11283 		case LT:
11284 		  /* LSR Rd, Rn, #31.  */
11285 		  if (speed_p)
11286 		    *cost += extra_cost->alu.shift;
11287 		  break;
11288 
11289 		case EQ:
11290 		  /* RSBS T1, Rn, #0
11291 		     ADC  Rd, Rn, T1.  */
11292 
11293 		case NE:
11294 		  /* SUBS T1, Rn, #1
11295 		     SBC  Rd, Rn, T1.  */
11296 		  *cost += COSTS_N_INSNS (1);
11297 		  break;
11298 
11299 		case LE:
11300 		  /* RSBS T1, Rn, Rn, LSR #31
11301 		     ADC  Rd, Rn, T1. */
11302 		  *cost += COSTS_N_INSNS (1);
11303 		  if (speed_p)
11304 		    *cost += extra_cost->alu.arith_shift;
11305 		  break;
11306 
11307 		case GT:
11308 		  /* RSB  Rd, Rn, Rn, ASR #1
11309 		     LSR  Rd, Rd, #31.  */
11310 		  *cost += COSTS_N_INSNS (1);
11311 		  if (speed_p)
11312 		    *cost += (extra_cost->alu.arith_shift
11313 			      + extra_cost->alu.shift);
11314 		  break;
11315 
11316 		case GE:
11317 		  /* ASR  Rd, Rn, #31
11318 		     ADD  Rd, Rn, #1.  */
11319 		  *cost += COSTS_N_INSNS (1);
11320 		  if (speed_p)
11321 		    *cost += extra_cost->alu.shift;
11322 		  break;
11323 
11324 		default:
11325 		  /* Remaining cases are either meaningless or would take
11326 		     three insns anyway.  */
11327 		  *cost = COSTS_N_INSNS (3);
11328 		  break;
11329 		}
11330 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11331 	      return true;
11332 	    }
11333 	  else
11334 	    {
11335 	      *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11336 	      if (CONST_INT_P (XEXP (x, 1))
11337 		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11338 		{
11339 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11340 		  return true;
11341 		}
11342 
11343 	      return false;
11344 	    }
11345 	}
11346       /* Not directly inside a set.  If it involves the condition code
11347 	 register it must be the condition for a branch, cond_exec or
11348 	 I_T_E operation.  Since the comparison is performed elsewhere
11349 	 this is just the control part which has no additional
11350 	 cost.  */
11351       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11352 	       && XEXP (x, 1) == const0_rtx)
11353 	{
11354 	  *cost = 0;
11355 	  return true;
11356 	}
11357       return false;
11358 
11359     case ABS:
11360       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11361 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
11362 	{
11363 	  if (speed_p)
11364 	    *cost += extra_cost->fp[mode != SFmode].neg;
11365 
11366 	  return false;
11367 	}
11368       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11369 	{
11370 	  *cost = LIBCALL_COST (1);
11371 	  return false;
11372 	}
11373 
11374       if (mode == SImode)
11375 	{
11376 	  if (speed_p)
11377 	    *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11378 	  return false;
11379 	}
11380       /* Vector mode?  */
11381       *cost = LIBCALL_COST (1);
11382       return false;
11383 
11384     case SIGN_EXTEND:
11385       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11386 	  && MEM_P (XEXP (x, 0)))
11387 	{
11388 	  if (mode == DImode)
11389 	    *cost += COSTS_N_INSNS (1);
11390 
11391 	  if (!speed_p)
11392 	    return true;
11393 
11394 	  if (GET_MODE (XEXP (x, 0)) == SImode)
11395 	    *cost += extra_cost->ldst.load;
11396 	  else
11397 	    *cost += extra_cost->ldst.load_sign_extend;
11398 
11399 	  if (mode == DImode)
11400 	    *cost += extra_cost->alu.shift;
11401 
11402 	  return true;
11403 	}
11404 
11405       /* Widening from less than 32-bits requires an extend operation.  */
11406       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11407 	{
11408 	  /* We have SXTB/SXTH.  */
11409 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11410 	  if (speed_p)
11411 	    *cost += extra_cost->alu.extend;
11412 	}
11413       else if (GET_MODE (XEXP (x, 0)) != SImode)
11414 	{
11415 	  /* Needs two shifts.  */
11416 	  *cost += COSTS_N_INSNS (1);
11417 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11418 	  if (speed_p)
11419 	    *cost += 2 * extra_cost->alu.shift;
11420 	}
11421 
11422       /* Widening beyond 32-bits requires one more insn.  */
11423       if (mode == DImode)
11424 	{
11425 	  *cost += COSTS_N_INSNS (1);
11426 	  if (speed_p)
11427 	    *cost += extra_cost->alu.shift;
11428 	}
11429 
11430       return true;
11431 
11432     case ZERO_EXTEND:
11433       if ((arm_arch4
11434 	   || GET_MODE (XEXP (x, 0)) == SImode
11435 	   || GET_MODE (XEXP (x, 0)) == QImode)
11436 	  && MEM_P (XEXP (x, 0)))
11437 	{
11438 	  *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11439 
11440 	  if (mode == DImode)
11441 	    *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
11442 
11443 	  return true;
11444 	}
11445 
11446       /* Widening from less than 32-bits requires an extend operation.  */
11447       if (GET_MODE (XEXP (x, 0)) == QImode)
11448 	{
11449 	  /* UXTB can be a shorter instruction in Thumb2, but it might
11450 	     be slower than the AND Rd, Rn, #255 alternative.  When
11451 	     optimizing for speed it should never be slower to use
11452 	     AND, and we don't really model 16-bit vs 32-bit insns
11453 	     here.  */
11454 	  if (speed_p)
11455 	    *cost += extra_cost->alu.logical;
11456 	}
11457       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11458 	{
11459 	  /* We have UXTB/UXTH.  */
11460 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11461 	  if (speed_p)
11462 	    *cost += extra_cost->alu.extend;
11463 	}
11464       else if (GET_MODE (XEXP (x, 0)) != SImode)
11465 	{
11466 	  /* Needs two shifts.  It's marginally preferable to use
11467 	     shifts rather than two BIC instructions as the second
11468 	     shift may merge with a subsequent insn as a shifter
11469 	     op.  */
11470 	  *cost = COSTS_N_INSNS (2);
11471 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11472 	  if (speed_p)
11473 	    *cost += 2 * extra_cost->alu.shift;
11474 	}
11475 
11476       /* Widening beyond 32-bits requires one more insn.  */
11477       if (mode == DImode)
11478 	{
11479 	  *cost += COSTS_N_INSNS (1);	/* No speed penalty.  */
11480 	}
11481 
11482       return true;
11483 
11484     case CONST_INT:
11485       *cost = 0;
11486       /* CONST_INT has no mode, so we cannot tell for sure how many
11487 	 insns are really going to be needed.  The best we can do is
11488 	 look at the value passed.  If it fits in SImode, then assume
11489 	 that's the mode it will be used for.  Otherwise assume it
11490 	 will be used in DImode.  */
11491       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11492 	mode = SImode;
11493       else
11494 	mode = DImode;
11495 
11496       /* Avoid blowing up in arm_gen_constant ().  */
11497       if (!(outer_code == PLUS
11498 	    || outer_code == AND
11499 	    || outer_code == IOR
11500 	    || outer_code == XOR
11501 	    || outer_code == MINUS))
11502 	outer_code = SET;
11503 
11504     const_int_cost:
11505       if (mode == SImode)
11506 	{
11507 	  *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11508 						    INTVAL (x), NULL, NULL,
11509 						    0, 0));
11510 	  /* Extra costs?  */
11511 	}
11512       else
11513 	{
11514 	  *cost += COSTS_N_INSNS (arm_gen_constant
11515 				  (outer_code, SImode, NULL,
11516 				   trunc_int_for_mode (INTVAL (x), SImode),
11517 				   NULL, NULL, 0, 0)
11518 				  + arm_gen_constant (outer_code, SImode, NULL,
11519 						      INTVAL (x) >> 32, NULL,
11520 						      NULL, 0, 0));
11521 	  /* Extra costs?  */
11522 	}
11523 
11524       return true;
11525 
11526     case CONST:
11527     case LABEL_REF:
11528     case SYMBOL_REF:
11529       if (speed_p)
11530 	{
11531 	  if (arm_arch_thumb2 && !flag_pic)
11532 	    *cost += COSTS_N_INSNS (1);
11533 	  else
11534 	    *cost += extra_cost->ldst.load;
11535 	}
11536       else
11537 	*cost += COSTS_N_INSNS (1);
11538 
11539       if (flag_pic)
11540 	{
11541 	  *cost += COSTS_N_INSNS (1);
11542 	  if (speed_p)
11543 	    *cost += extra_cost->alu.arith;
11544 	}
11545 
11546       return true;
11547 
11548     case CONST_FIXED:
11549       *cost = COSTS_N_INSNS (4);
11550       /* Fixme.  */
11551       return true;
11552 
11553     case CONST_DOUBLE:
11554       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11555 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
11556 	{
11557 	  if (vfp3_const_double_rtx (x))
11558 	    {
11559 	      if (speed_p)
11560 		*cost += extra_cost->fp[mode == DFmode].fpconst;
11561 	      return true;
11562 	    }
11563 
11564 	  if (speed_p)
11565 	    {
11566 	      if (mode == DFmode)
11567 		*cost += extra_cost->ldst.loadd;
11568 	      else
11569 		*cost += extra_cost->ldst.loadf;
11570 	    }
11571 	  else
11572 	    *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11573 
11574 	  return true;
11575 	}
11576       *cost = COSTS_N_INSNS (4);
11577       return true;
11578 
11579     case CONST_VECTOR:
11580       /* Fixme.  */
11581       if (((TARGET_NEON && TARGET_HARD_FLOAT
11582 	    && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11583 	   || TARGET_HAVE_MVE)
11584 	  && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11585 	*cost = COSTS_N_INSNS (1);
11586       else
11587 	*cost = COSTS_N_INSNS (4);
11588       return true;
11589 
11590     case HIGH:
11591     case LO_SUM:
11592       /* When optimizing for size, we prefer constant pool entries to
11593 	 MOVW/MOVT pairs, so bump the cost of these slightly.  */
11594       if (!speed_p)
11595 	*cost += 1;
11596       return true;
11597 
11598     case CLZ:
11599       if (speed_p)
11600 	*cost += extra_cost->alu.clz;
11601       return false;
11602 
11603     case SMIN:
11604       if (XEXP (x, 1) == const0_rtx)
11605 	{
11606 	  if (speed_p)
11607 	    *cost += extra_cost->alu.log_shift;
11608 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11609 	  return true;
11610 	}
11611       /* Fall through.  */
11612     case SMAX:
11613     case UMIN:
11614     case UMAX:
11615       *cost += COSTS_N_INSNS (1);
11616       return false;
11617 
11618     case TRUNCATE:
11619       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11620 	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11621 	  && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11622 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11623 	  && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11624 	       && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11625 	      || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11626 		  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11627 		      == ZERO_EXTEND))))
11628 	{
11629 	  if (speed_p)
11630 	    *cost += extra_cost->mult[1].extend;
11631 	  *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11632 			      ZERO_EXTEND, 0, speed_p)
11633 		    + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11634 				ZERO_EXTEND, 0, speed_p));
11635 	  return true;
11636 	}
11637       *cost = LIBCALL_COST (1);
11638       return false;
11639 
11640     case UNSPEC_VOLATILE:
11641     case UNSPEC:
11642       return arm_unspec_cost (x, outer_code, speed_p, cost);
11643 
11644     case PC:
11645       /* Reading the PC is like reading any other register.  Writing it
11646 	 is more expensive, but we take that into account elsewhere.  */
11647       *cost = 0;
11648       return true;
11649 
11650     case ZERO_EXTRACT:
11651       /* TODO: Simple zero_extract of bottom bits using AND.  */
11652       /* Fall through.  */
11653     case SIGN_EXTRACT:
11654       if (arm_arch6
11655 	  && mode == SImode
11656 	  && CONST_INT_P (XEXP (x, 1))
11657 	  && CONST_INT_P (XEXP (x, 2)))
11658 	{
11659 	  if (speed_p)
11660 	    *cost += extra_cost->alu.bfx;
11661 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11662 	  return true;
11663 	}
11664       /* Without UBFX/SBFX, need to resort to shift operations.  */
11665       *cost += COSTS_N_INSNS (1);
11666       if (speed_p)
11667 	*cost += 2 * extra_cost->alu.shift;
11668       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11669       return true;
11670 
11671     case FLOAT_EXTEND:
11672       if (TARGET_HARD_FLOAT)
11673 	{
11674 	  if (speed_p)
11675 	    *cost += extra_cost->fp[mode == DFmode].widen;
11676 	  if (!TARGET_VFP5
11677 	      && GET_MODE (XEXP (x, 0)) == HFmode)
11678 	    {
11679 	      /* Pre v8, widening HF->DF is a two-step process, first
11680 	         widening to SFmode.  */
11681 	      *cost += COSTS_N_INSNS (1);
11682 	      if (speed_p)
11683 		*cost += extra_cost->fp[0].widen;
11684 	    }
11685 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11686 	  return true;
11687 	}
11688 
11689       *cost = LIBCALL_COST (1);
11690       return false;
11691 
11692     case FLOAT_TRUNCATE:
11693       if (TARGET_HARD_FLOAT)
11694 	{
11695 	  if (speed_p)
11696 	    *cost += extra_cost->fp[mode == DFmode].narrow;
11697 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11698 	  return true;
11699 	  /* Vector modes?  */
11700 	}
11701       *cost = LIBCALL_COST (1);
11702       return false;
11703 
11704     case FMA:
11705       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11706         {
11707           rtx op0 = XEXP (x, 0);
11708           rtx op1 = XEXP (x, 1);
11709           rtx op2 = XEXP (x, 2);
11710 
11711 
11712           /* vfms or vfnma.  */
11713           if (GET_CODE (op0) == NEG)
11714             op0 = XEXP (op0, 0);
11715 
11716           /* vfnms or vfnma.  */
11717           if (GET_CODE (op2) == NEG)
11718             op2 = XEXP (op2, 0);
11719 
11720           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11721           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11722           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11723 
11724           if (speed_p)
11725             *cost += extra_cost->fp[mode ==DFmode].fma;
11726 
11727           return true;
11728         }
11729 
11730       *cost = LIBCALL_COST (3);
11731       return false;
11732 
11733     case FIX:
11734     case UNSIGNED_FIX:
11735       if (TARGET_HARD_FLOAT)
11736 	{
11737 	  /* The *combine_vcvtf2i reduces a vmul+vcvt into
11738 	     a vcvt fixed-point conversion.  */
11739 	  if (code == FIX && mode == SImode
11740 	      && GET_CODE (XEXP (x, 0)) == FIX
11741 	      && GET_MODE (XEXP (x, 0)) == SFmode
11742 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11743 	      && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11744 		 > 0)
11745 	    {
11746 	      if (speed_p)
11747 		*cost += extra_cost->fp[0].toint;
11748 
11749 	      *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11750 				 code, 0, speed_p);
11751 	      return true;
11752 	    }
11753 
11754 	  if (GET_MODE_CLASS (mode) == MODE_INT)
11755 	    {
11756 	      mode = GET_MODE (XEXP (x, 0));
11757 	      if (speed_p)
11758 		*cost += extra_cost->fp[mode == DFmode].toint;
11759 	      /* Strip of the 'cost' of rounding towards zero.  */
11760 	      if (GET_CODE (XEXP (x, 0)) == FIX)
11761 		*cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11762 				   0, speed_p);
11763 	      else
11764 		*cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11765 	      /* ??? Increase the cost to deal with transferring from
11766 		 FP -> CORE registers?  */
11767 	      return true;
11768 	    }
11769 	  else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11770 		   && TARGET_VFP5)
11771 	    {
11772 	      if (speed_p)
11773 		*cost += extra_cost->fp[mode == DFmode].roundint;
11774 	      return false;
11775 	    }
11776 	  /* Vector costs? */
11777 	}
11778       *cost = LIBCALL_COST (1);
11779       return false;
11780 
11781     case FLOAT:
11782     case UNSIGNED_FLOAT:
11783       if (TARGET_HARD_FLOAT)
11784 	{
11785 	  /* ??? Increase the cost to deal with transferring from CORE
11786 	     -> FP registers?  */
11787 	  if (speed_p)
11788 	    *cost += extra_cost->fp[mode == DFmode].fromint;
11789 	  return false;
11790 	}
11791       *cost = LIBCALL_COST (1);
11792       return false;
11793 
11794     case CALL:
11795       return true;
11796 
11797     case ASM_OPERANDS:
11798       {
11799       /* Just a guess.  Guess number of instructions in the asm
11800          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11801          though (see PR60663).  */
11802         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11803         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11804 
11805         *cost = COSTS_N_INSNS (asm_length + num_operands);
11806         return true;
11807       }
11808     default:
11809       if (mode != VOIDmode)
11810 	*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11811       else
11812 	*cost = COSTS_N_INSNS (4); /* Who knows?  */
11813       return false;
11814     }
11815 }
11816 
11817 #undef HANDLE_NARROW_SHIFT_ARITH
11818 
11819 /* RTX costs entry point.  */
11820 
11821 static bool
arm_rtx_costs(rtx x,machine_mode mode ATTRIBUTE_UNUSED,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)11822 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11823 	       int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11824 {
11825   bool result;
11826   int code = GET_CODE (x);
11827   gcc_assert (current_tune->insn_extra_cost);
11828 
11829   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
11830 				(enum rtx_code) outer_code,
11831 				current_tune->insn_extra_cost,
11832 				total, speed);
11833 
11834   if (dump_file && arm_verbose_cost)
11835     {
11836       print_rtl_single (dump_file, x);
11837       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11838 	       *total, result ? "final" : "partial");
11839     }
11840   return result;
11841 }
11842 
11843 static int
arm_insn_cost(rtx_insn * insn,bool speed)11844 arm_insn_cost (rtx_insn *insn, bool speed)
11845 {
11846   int cost;
11847 
11848   /* Don't cost a simple reg-reg move at a full insn cost: such moves
11849      will likely disappear during register allocation.  */
11850   if (!reload_completed
11851       && GET_CODE (PATTERN (insn)) == SET
11852       && REG_P (SET_DEST (PATTERN (insn)))
11853       && REG_P (SET_SRC (PATTERN (insn))))
11854     return 2;
11855   cost = pattern_cost (PATTERN (insn), speed);
11856   /* If the cost is zero, then it's likely a complex insn.  We don't want the
11857      cost of these to be less than something we know about.  */
11858   return cost ? cost : COSTS_N_INSNS (2);
11859 }
11860 
11861 /* All address computations that can be done are free, but rtx cost returns
11862    the same for practically all of them.  So we weight the different types
11863    of address here in the order (most pref first):
11864    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11865 static inline int
arm_arm_address_cost(rtx x)11866 arm_arm_address_cost (rtx x)
11867 {
11868   enum rtx_code c  = GET_CODE (x);
11869 
11870   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11871     return 0;
11872   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11873     return 10;
11874 
11875   if (c == PLUS)
11876     {
11877       if (CONST_INT_P (XEXP (x, 1)))
11878 	return 2;
11879 
11880       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11881 	return 3;
11882 
11883       return 4;
11884     }
11885 
11886   return 6;
11887 }
11888 
11889 static inline int
arm_thumb_address_cost(rtx x)11890 arm_thumb_address_cost (rtx x)
11891 {
11892   enum rtx_code c  = GET_CODE (x);
11893 
11894   if (c == REG)
11895     return 1;
11896   if (c == PLUS
11897       && REG_P (XEXP (x, 0))
11898       && CONST_INT_P (XEXP (x, 1)))
11899     return 1;
11900 
11901   return 2;
11902 }
11903 
11904 static int
arm_address_cost(rtx x,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)11905 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11906 		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11907 {
11908   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11909 }
11910 
11911 /* Adjust cost hook for XScale.  */
11912 static bool
xscale_sched_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int * cost)11913 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11914 			  int * cost)
11915 {
11916   /* Some true dependencies can have a higher cost depending
11917      on precisely how certain input operands are used.  */
11918   if (dep_type == 0
11919       && recog_memoized (insn) >= 0
11920       && recog_memoized (dep) >= 0)
11921     {
11922       int shift_opnum = get_attr_shift (insn);
11923       enum attr_type attr_type = get_attr_type (dep);
11924 
11925       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11926 	 operand for INSN.  If we have a shifted input operand and the
11927 	 instruction we depend on is another ALU instruction, then we may
11928 	 have to account for an additional stall.  */
11929       if (shift_opnum != 0
11930 	  && (attr_type == TYPE_ALU_SHIFT_IMM
11931 	      || attr_type == TYPE_ALUS_SHIFT_IMM
11932 	      || attr_type == TYPE_LOGIC_SHIFT_IMM
11933 	      || attr_type == TYPE_LOGICS_SHIFT_IMM
11934 	      || attr_type == TYPE_ALU_SHIFT_REG
11935 	      || attr_type == TYPE_ALUS_SHIFT_REG
11936 	      || attr_type == TYPE_LOGIC_SHIFT_REG
11937 	      || attr_type == TYPE_LOGICS_SHIFT_REG
11938 	      || attr_type == TYPE_MOV_SHIFT
11939 	      || attr_type == TYPE_MVN_SHIFT
11940 	      || attr_type == TYPE_MOV_SHIFT_REG
11941 	      || attr_type == TYPE_MVN_SHIFT_REG))
11942 	{
11943 	  rtx shifted_operand;
11944 	  int opno;
11945 
11946 	  /* Get the shifted operand.  */
11947 	  extract_insn (insn);
11948 	  shifted_operand = recog_data.operand[shift_opnum];
11949 
11950 	  /* Iterate over all the operands in DEP.  If we write an operand
11951 	     that overlaps with SHIFTED_OPERAND, then we have increase the
11952 	     cost of this dependency.  */
11953 	  extract_insn (dep);
11954 	  preprocess_constraints (dep);
11955 	  for (opno = 0; opno < recog_data.n_operands; opno++)
11956 	    {
11957 	      /* We can ignore strict inputs.  */
11958 	      if (recog_data.operand_type[opno] == OP_IN)
11959 		continue;
11960 
11961 	      if (reg_overlap_mentioned_p (recog_data.operand[opno],
11962 					   shifted_operand))
11963 		{
11964 		  *cost = 2;
11965 		  return false;
11966 		}
11967 	    }
11968 	}
11969     }
11970   return true;
11971 }
11972 
11973 /* Adjust cost hook for Cortex A9.  */
11974 static bool
cortex_a9_sched_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int * cost)11975 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11976 			     int * cost)
11977 {
11978   switch (dep_type)
11979     {
11980     case REG_DEP_ANTI:
11981       *cost = 0;
11982       return false;
11983 
11984     case REG_DEP_TRUE:
11985     case REG_DEP_OUTPUT:
11986 	if (recog_memoized (insn) >= 0
11987 	    && recog_memoized (dep) >= 0)
11988 	  {
11989 	    if (GET_CODE (PATTERN (insn)) == SET)
11990 	      {
11991 		if (GET_MODE_CLASS
11992 		    (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11993 		  || GET_MODE_CLASS
11994 		    (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11995 		  {
11996 		    enum attr_type attr_type_insn = get_attr_type (insn);
11997 		    enum attr_type attr_type_dep = get_attr_type (dep);
11998 
11999 		    /* By default all dependencies of the form
12000 		       s0 = s0 <op> s1
12001 		       s0 = s0 <op> s2
12002 		       have an extra latency of 1 cycle because
12003 		       of the input and output dependency in this
12004 		       case. However this gets modeled as an true
12005 		       dependency and hence all these checks.  */
12006 		    if (REG_P (SET_DEST (PATTERN (insn)))
12007 			&& reg_set_p (SET_DEST (PATTERN (insn)), dep))
12008 		      {
12009 			/* FMACS is a special case where the dependent
12010 			   instruction can be issued 3 cycles before
12011 			   the normal latency in case of an output
12012 			   dependency.  */
12013 			if ((attr_type_insn == TYPE_FMACS
12014 			     || attr_type_insn == TYPE_FMACD)
12015 			    && (attr_type_dep == TYPE_FMACS
12016 				|| attr_type_dep == TYPE_FMACD))
12017 			  {
12018 			    if (dep_type == REG_DEP_OUTPUT)
12019 			      *cost = insn_default_latency (dep) - 3;
12020 			    else
12021 			      *cost = insn_default_latency (dep);
12022 			    return false;
12023 			  }
12024 			else
12025 			  {
12026 			    if (dep_type == REG_DEP_OUTPUT)
12027 			      *cost = insn_default_latency (dep) + 1;
12028 			    else
12029 			      *cost = insn_default_latency (dep);
12030 			  }
12031 			return false;
12032 		      }
12033 		  }
12034 	      }
12035 	  }
12036 	break;
12037 
12038     default:
12039       gcc_unreachable ();
12040     }
12041 
12042   return true;
12043 }
12044 
12045 /* Adjust cost hook for FA726TE.  */
12046 static bool
fa726te_sched_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int * cost)12047 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12048 			   int * cost)
12049 {
12050   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12051      have penalty of 3.  */
12052   if (dep_type == REG_DEP_TRUE
12053       && recog_memoized (insn) >= 0
12054       && recog_memoized (dep) >= 0
12055       && get_attr_conds (dep) == CONDS_SET)
12056     {
12057       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
12058       if (get_attr_conds (insn) == CONDS_USE
12059           && get_attr_type (insn) != TYPE_BRANCH)
12060         {
12061           *cost = 3;
12062           return false;
12063         }
12064 
12065       if (GET_CODE (PATTERN (insn)) == COND_EXEC
12066           || get_attr_conds (insn) == CONDS_USE)
12067         {
12068           *cost = 0;
12069           return false;
12070         }
12071     }
12072 
12073   return true;
12074 }
12075 
12076 /* Implement TARGET_REGISTER_MOVE_COST.
12077 
12078    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12079    it is typically more expensive than a single memory access.  We set
12080    the cost to less than two memory accesses so that floating
12081    point to integer conversion does not go through memory.  */
12082 
12083 int
arm_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)12084 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12085 			reg_class_t from, reg_class_t to)
12086 {
12087   if (TARGET_32BIT)
12088     {
12089       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12090 	  || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12091 	return 15;
12092       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12093 	       || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12094 	return 4;
12095       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12096 	return 20;
12097       else
12098 	return 2;
12099     }
12100   else
12101     {
12102       if (from == HI_REGS || to == HI_REGS)
12103 	return 4;
12104       else
12105 	return 2;
12106     }
12107 }
12108 
12109 /* Implement TARGET_MEMORY_MOVE_COST.  */
12110 
12111 int
arm_memory_move_cost(machine_mode mode,reg_class_t rclass,bool in ATTRIBUTE_UNUSED)12112 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12113 		      bool in ATTRIBUTE_UNUSED)
12114 {
12115   if (TARGET_32BIT)
12116     return 10;
12117   else
12118     {
12119       if (GET_MODE_SIZE (mode) < 4)
12120 	return 8;
12121       else
12122 	return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12123     }
12124 }
12125 
12126 /* Vectorizer cost model implementation.  */
12127 
12128 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
12129 static int
arm_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)12130 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12131 				tree vectype,
12132 				int misalign ATTRIBUTE_UNUSED)
12133 {
12134   unsigned elements;
12135 
12136   switch (type_of_cost)
12137     {
12138       case scalar_stmt:
12139         return current_tune->vec_costs->scalar_stmt_cost;
12140 
12141       case scalar_load:
12142         return current_tune->vec_costs->scalar_load_cost;
12143 
12144       case scalar_store:
12145         return current_tune->vec_costs->scalar_store_cost;
12146 
12147       case vector_stmt:
12148         return current_tune->vec_costs->vec_stmt_cost;
12149 
12150       case vector_load:
12151         return current_tune->vec_costs->vec_align_load_cost;
12152 
12153       case vector_store:
12154         return current_tune->vec_costs->vec_store_cost;
12155 
12156       case vec_to_scalar:
12157         return current_tune->vec_costs->vec_to_scalar_cost;
12158 
12159       case scalar_to_vec:
12160         return current_tune->vec_costs->scalar_to_vec_cost;
12161 
12162       case unaligned_load:
12163       case vector_gather_load:
12164         return current_tune->vec_costs->vec_unalign_load_cost;
12165 
12166       case unaligned_store:
12167       case vector_scatter_store:
12168         return current_tune->vec_costs->vec_unalign_store_cost;
12169 
12170       case cond_branch_taken:
12171         return current_tune->vec_costs->cond_taken_branch_cost;
12172 
12173       case cond_branch_not_taken:
12174         return current_tune->vec_costs->cond_not_taken_branch_cost;
12175 
12176       case vec_perm:
12177       case vec_promote_demote:
12178         return current_tune->vec_costs->vec_stmt_cost;
12179 
12180       case vec_construct:
12181 	elements = TYPE_VECTOR_SUBPARTS (vectype);
12182 	return elements / 2 + 1;
12183 
12184       default:
12185         gcc_unreachable ();
12186     }
12187 }
12188 
12189 /* Implement targetm.vectorize.add_stmt_cost.  */
12190 
12191 static unsigned
arm_add_stmt_cost(void * data,int count,enum vect_cost_for_stmt kind,struct _stmt_vec_info * stmt_info,int misalign,enum vect_cost_model_location where)12192 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
12193 		   struct _stmt_vec_info *stmt_info, int misalign,
12194 		   enum vect_cost_model_location where)
12195 {
12196   unsigned *cost = (unsigned *) data;
12197   unsigned retval = 0;
12198 
12199   if (flag_vect_cost_model)
12200     {
12201       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
12202       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
12203 
12204       /* Statements in an inner loop relative to the loop being
12205 	 vectorized are weighted more heavily.  The value here is
12206 	 arbitrary and could potentially be improved with analysis.  */
12207       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
12208 	count *= 50;  /* FIXME.  */
12209 
12210       retval = (unsigned) (count * stmt_cost);
12211       cost[where] += retval;
12212     }
12213 
12214   return retval;
12215 }
12216 
12217 /* Return true if and only if this insn can dual-issue only as older.  */
12218 static bool
cortexa7_older_only(rtx_insn * insn)12219 cortexa7_older_only (rtx_insn *insn)
12220 {
12221   if (recog_memoized (insn) < 0)
12222     return false;
12223 
12224   switch (get_attr_type (insn))
12225     {
12226     case TYPE_ALU_DSP_REG:
12227     case TYPE_ALU_SREG:
12228     case TYPE_ALUS_SREG:
12229     case TYPE_LOGIC_REG:
12230     case TYPE_LOGICS_REG:
12231     case TYPE_ADC_REG:
12232     case TYPE_ADCS_REG:
12233     case TYPE_ADR:
12234     case TYPE_BFM:
12235     case TYPE_REV:
12236     case TYPE_MVN_REG:
12237     case TYPE_SHIFT_IMM:
12238     case TYPE_SHIFT_REG:
12239     case TYPE_LOAD_BYTE:
12240     case TYPE_LOAD_4:
12241     case TYPE_STORE_4:
12242     case TYPE_FFARITHS:
12243     case TYPE_FADDS:
12244     case TYPE_FFARITHD:
12245     case TYPE_FADDD:
12246     case TYPE_FMOV:
12247     case TYPE_F_CVT:
12248     case TYPE_FCMPS:
12249     case TYPE_FCMPD:
12250     case TYPE_FCONSTS:
12251     case TYPE_FCONSTD:
12252     case TYPE_FMULS:
12253     case TYPE_FMACS:
12254     case TYPE_FMULD:
12255     case TYPE_FMACD:
12256     case TYPE_FDIVS:
12257     case TYPE_FDIVD:
12258     case TYPE_F_MRC:
12259     case TYPE_F_MRRC:
12260     case TYPE_F_FLAG:
12261     case TYPE_F_LOADS:
12262     case TYPE_F_STORES:
12263       return true;
12264     default:
12265       return false;
12266     }
12267 }
12268 
12269 /* Return true if and only if this insn can dual-issue as younger.  */
12270 static bool
cortexa7_younger(FILE * file,int verbose,rtx_insn * insn)12271 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12272 {
12273   if (recog_memoized (insn) < 0)
12274     {
12275       if (verbose > 5)
12276         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12277       return false;
12278     }
12279 
12280   switch (get_attr_type (insn))
12281     {
12282     case TYPE_ALU_IMM:
12283     case TYPE_ALUS_IMM:
12284     case TYPE_LOGIC_IMM:
12285     case TYPE_LOGICS_IMM:
12286     case TYPE_EXTEND:
12287     case TYPE_MVN_IMM:
12288     case TYPE_MOV_IMM:
12289     case TYPE_MOV_REG:
12290     case TYPE_MOV_SHIFT:
12291     case TYPE_MOV_SHIFT_REG:
12292     case TYPE_BRANCH:
12293     case TYPE_CALL:
12294       return true;
12295     default:
12296       return false;
12297     }
12298 }
12299 
12300 
12301 /* Look for an instruction that can dual issue only as an older
12302    instruction, and move it in front of any instructions that can
12303    dual-issue as younger, while preserving the relative order of all
12304    other instructions in the ready list.  This is a hueuristic to help
12305    dual-issue in later cycles, by postponing issue of more flexible
12306    instructions.  This heuristic may affect dual issue opportunities
12307    in the current cycle.  */
12308 static void
cortexa7_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * n_readyp,int clock)12309 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12310 			int *n_readyp, int clock)
12311 {
12312   int i;
12313   int first_older_only = -1, first_younger = -1;
12314 
12315   if (verbose > 5)
12316     fprintf (file,
12317              ";; sched_reorder for cycle %d with %d insns in ready list\n",
12318              clock,
12319              *n_readyp);
12320 
12321   /* Traverse the ready list from the head (the instruction to issue
12322      first), and looking for the first instruction that can issue as
12323      younger and the first instruction that can dual-issue only as
12324      older.  */
12325   for (i = *n_readyp - 1; i >= 0; i--)
12326     {
12327       rtx_insn *insn = ready[i];
12328       if (cortexa7_older_only (insn))
12329         {
12330           first_older_only = i;
12331           if (verbose > 5)
12332             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12333           break;
12334         }
12335       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12336         first_younger = i;
12337     }
12338 
12339   /* Nothing to reorder because either no younger insn found or insn
12340      that can dual-issue only as older appears before any insn that
12341      can dual-issue as younger.  */
12342   if (first_younger == -1)
12343     {
12344       if (verbose > 5)
12345         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12346       return;
12347     }
12348 
12349   /* Nothing to reorder because no older-only insn in the ready list.  */
12350   if (first_older_only == -1)
12351     {
12352       if (verbose > 5)
12353         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12354       return;
12355     }
12356 
12357   /* Move first_older_only insn before first_younger.  */
12358   if (verbose > 5)
12359     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12360              INSN_UID(ready [first_older_only]),
12361              INSN_UID(ready [first_younger]));
12362   rtx_insn *first_older_only_insn = ready [first_older_only];
12363   for (i = first_older_only; i < first_younger; i++)
12364     {
12365       ready[i] = ready[i+1];
12366     }
12367 
12368   ready[i] = first_older_only_insn;
12369   return;
12370 }
12371 
12372 /* Implement TARGET_SCHED_REORDER. */
12373 static int
arm_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * n_readyp,int clock)12374 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12375                    int clock)
12376 {
12377   switch (arm_tune)
12378     {
12379     case TARGET_CPU_cortexa7:
12380       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12381       break;
12382     default:
12383       /* Do nothing for other cores.  */
12384       break;
12385     }
12386 
12387   return arm_issue_rate ();
12388 }
12389 
12390 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12391    It corrects the value of COST based on the relationship between
12392    INSN and DEP through the dependence LINK.  It returns the new
12393    value. There is a per-core adjust_cost hook to adjust scheduler costs
12394    and the per-core hook can choose to completely override the generic
12395    adjust_cost function. Only put bits of code into arm_adjust_cost that
12396    are common across all cores.  */
12397 static int
arm_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int cost,unsigned int)12398 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12399 		 unsigned int)
12400 {
12401   rtx i_pat, d_pat;
12402 
12403  /* When generating Thumb-1 code, we want to place flag-setting operations
12404     close to a conditional branch which depends on them, so that we can
12405     omit the comparison. */
12406   if (TARGET_THUMB1
12407       && dep_type == 0
12408       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12409       && recog_memoized (dep) >= 0
12410       && get_attr_conds (dep) == CONDS_SET)
12411     return 0;
12412 
12413   if (current_tune->sched_adjust_cost != NULL)
12414     {
12415       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12416 	return cost;
12417     }
12418 
12419   /* XXX Is this strictly true?  */
12420   if (dep_type == REG_DEP_ANTI
12421       || dep_type == REG_DEP_OUTPUT)
12422     return 0;
12423 
12424   /* Call insns don't incur a stall, even if they follow a load.  */
12425   if (dep_type == 0
12426       && CALL_P (insn))
12427     return 1;
12428 
12429   if ((i_pat = single_set (insn)) != NULL
12430       && MEM_P (SET_SRC (i_pat))
12431       && (d_pat = single_set (dep)) != NULL
12432       && MEM_P (SET_DEST (d_pat)))
12433     {
12434       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12435       /* This is a load after a store, there is no conflict if the load reads
12436 	 from a cached area.  Assume that loads from the stack, and from the
12437 	 constant pool are cached, and that others will miss.  This is a
12438 	 hack.  */
12439 
12440       if ((GET_CODE (src_mem) == SYMBOL_REF
12441 	   && CONSTANT_POOL_ADDRESS_P (src_mem))
12442 	  || reg_mentioned_p (stack_pointer_rtx, src_mem)
12443 	  || reg_mentioned_p (frame_pointer_rtx, src_mem)
12444 	  || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12445 	return 1;
12446     }
12447 
12448   return cost;
12449 }
12450 
12451 int
arm_max_conditional_execute(void)12452 arm_max_conditional_execute (void)
12453 {
12454   return max_insns_skipped;
12455 }
12456 
12457 static int
arm_default_branch_cost(bool speed_p,bool predictable_p ATTRIBUTE_UNUSED)12458 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12459 {
12460   if (TARGET_32BIT)
12461     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12462   else
12463     return (optimize > 0) ? 2 : 0;
12464 }
12465 
12466 static int
arm_cortex_a5_branch_cost(bool speed_p,bool predictable_p)12467 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12468 {
12469   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12470 }
12471 
12472 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12473    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12474    sequences of non-executed instructions in IT blocks probably take the same
12475    amount of time as executed instructions (and the IT instruction itself takes
12476    space in icache).  This function was experimentally determined to give good
12477    results on a popular embedded benchmark.  */
12478 
12479 static int
arm_cortex_m_branch_cost(bool speed_p,bool predictable_p)12480 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12481 {
12482   return (TARGET_32BIT && speed_p) ? 1
12483          : arm_default_branch_cost (speed_p, predictable_p);
12484 }
12485 
12486 static int
arm_cortex_m7_branch_cost(bool speed_p,bool predictable_p)12487 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12488 {
12489   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12490 }
12491 
12492 static bool fp_consts_inited = false;
12493 
12494 static REAL_VALUE_TYPE value_fp0;
12495 
12496 static void
init_fp_table(void)12497 init_fp_table (void)
12498 {
12499   REAL_VALUE_TYPE r;
12500 
12501   r = REAL_VALUE_ATOF ("0", DFmode);
12502   value_fp0 = r;
12503   fp_consts_inited = true;
12504 }
12505 
12506 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12507 int
arm_const_double_rtx(rtx x)12508 arm_const_double_rtx (rtx x)
12509 {
12510   const REAL_VALUE_TYPE *r;
12511 
12512   if (!fp_consts_inited)
12513     init_fp_table ();
12514 
12515   r = CONST_DOUBLE_REAL_VALUE (x);
12516   if (REAL_VALUE_MINUS_ZERO (*r))
12517     return 0;
12518 
12519   if (real_equal (r, &value_fp0))
12520     return 1;
12521 
12522   return 0;
12523 }
12524 
12525 /* VFPv3 has a fairly wide range of representable immediates, formed from
12526    "quarter-precision" floating-point values. These can be evaluated using this
12527    formula (with ^ for exponentiation):
12528 
12529      -1^s * n * 2^-r
12530 
12531    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12532    16 <= n <= 31 and 0 <= r <= 7.
12533 
12534    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12535 
12536      - A (most-significant) is the sign bit.
12537      - BCD are the exponent (encoded as r XOR 3).
12538      - EFGH are the mantissa (encoded as n - 16).
12539 */
12540 
12541 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12542    fconst[sd] instruction, or -1 if X isn't suitable.  */
12543 static int
vfp3_const_double_index(rtx x)12544 vfp3_const_double_index (rtx x)
12545 {
12546   REAL_VALUE_TYPE r, m;
12547   int sign, exponent;
12548   unsigned HOST_WIDE_INT mantissa, mant_hi;
12549   unsigned HOST_WIDE_INT mask;
12550   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12551   bool fail;
12552 
12553   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12554     return -1;
12555 
12556   r = *CONST_DOUBLE_REAL_VALUE (x);
12557 
12558   /* We can't represent these things, so detect them first.  */
12559   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12560     return -1;
12561 
12562   /* Extract sign, exponent and mantissa.  */
12563   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12564   r = real_value_abs (&r);
12565   exponent = REAL_EXP (&r);
12566   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12567      highest (sign) bit, with a fixed binary point at bit point_pos.
12568      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12569      bits for the mantissa, this may fail (low bits would be lost).  */
12570   real_ldexp (&m, &r, point_pos - exponent);
12571   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12572   mantissa = w.elt (0);
12573   mant_hi = w.elt (1);
12574 
12575   /* If there are bits set in the low part of the mantissa, we can't
12576      represent this value.  */
12577   if (mantissa != 0)
12578     return -1;
12579 
12580   /* Now make it so that mantissa contains the most-significant bits, and move
12581      the point_pos to indicate that the least-significant bits have been
12582      discarded.  */
12583   point_pos -= HOST_BITS_PER_WIDE_INT;
12584   mantissa = mant_hi;
12585 
12586   /* We can permit four significant bits of mantissa only, plus a high bit
12587      which is always 1.  */
12588   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12589   if ((mantissa & mask) != 0)
12590     return -1;
12591 
12592   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12593   mantissa >>= point_pos - 5;
12594 
12595   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12596      floating-point immediate zero with Neon using an integer-zero load, but
12597      that case is handled elsewhere.)  */
12598   if (mantissa == 0)
12599     return -1;
12600 
12601   gcc_assert (mantissa >= 16 && mantissa <= 31);
12602 
12603   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12604      normalized significands are in the range [1, 2). (Our mantissa is shifted
12605      left 4 places at this point relative to normalized IEEE754 values).  GCC
12606      internally uses [0.5, 1) (see real.c), so the exponent returned from
12607      REAL_EXP must be altered.  */
12608   exponent = 5 - exponent;
12609 
12610   if (exponent < 0 || exponent > 7)
12611     return -1;
12612 
12613   /* Sign, mantissa and exponent are now in the correct form to plug into the
12614      formula described in the comment above.  */
12615   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12616 }
12617 
12618 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12619 int
vfp3_const_double_rtx(rtx x)12620 vfp3_const_double_rtx (rtx x)
12621 {
12622   if (!TARGET_VFP3)
12623     return 0;
12624 
12625   return vfp3_const_double_index (x) != -1;
12626 }
12627 
12628 /* Recognize immediates which can be used in various Neon and MVE instructions.
12629    Legal immediates are described by the following table (for VMVN variants, the
12630    bitwise inverse of the constant shown is recognized. In either case, VMOV
12631    is output and the correct instruction to use for a given constant is chosen
12632    by the assembler). The constant shown is replicated across all elements of
12633    the destination vector.
12634 
12635    insn elems variant constant (binary)
12636    ---- ----- ------- -----------------
12637    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12638    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12639    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12640    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12641    vmov  i16     4    00000000 abcdefgh
12642    vmov  i16     5    abcdefgh 00000000
12643    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12644    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12645    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12646    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12647    vmvn  i16    10    00000000 abcdefgh
12648    vmvn  i16    11    abcdefgh 00000000
12649    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12650    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12651    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12652    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12653    vmov   i8    16    abcdefgh
12654    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12655                       eeeeeeee ffffffff gggggggg hhhhhhhh
12656    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12657    vmov  f32    19    00000000 00000000 00000000 00000000
12658 
12659    For case 18, B = !b. Representable values are exactly those accepted by
12660    vfp3_const_double_index, but are output as floating-point numbers rather
12661    than indices.
12662 
12663    For case 19, we will change it to vmov.i32 when assembling.
12664 
12665    Variants 0-5 (inclusive) may also be used as immediates for the second
12666    operand of VORR/VBIC instructions.
12667 
12668    The INVERSE argument causes the bitwise inverse of the given operand to be
12669    recognized instead (used for recognizing legal immediates for the VAND/VORN
12670    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12671    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12672    output, rather than the real insns vbic/vorr).
12673 
12674    INVERSE makes no difference to the recognition of float vectors.
12675 
12676    The return value is the variant of immediate as shown in the above table, or
12677    -1 if the given value doesn't match any of the listed patterns.
12678 */
12679 static int
simd_valid_immediate(rtx op,machine_mode mode,int inverse,rtx * modconst,int * elementwidth)12680 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12681 		      rtx *modconst, int *elementwidth)
12682 {
12683 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)	\
12684   matches = 1;					\
12685   for (i = 0; i < idx; i += (STRIDE))		\
12686     if (!(TEST))				\
12687       matches = 0;				\
12688   if (matches)					\
12689     {						\
12690       immtype = (CLASS);			\
12691       elsize = (ELSIZE);			\
12692       break;					\
12693     }
12694 
12695   unsigned int i, elsize = 0, idx = 0, n_elts;
12696   unsigned int innersize;
12697   unsigned char bytes[16] = {};
12698   int immtype = -1, matches;
12699   unsigned int invmask = inverse ? 0xff : 0;
12700   bool vector = GET_CODE (op) == CONST_VECTOR;
12701 
12702   if (vector)
12703     n_elts = CONST_VECTOR_NUNITS (op);
12704   else
12705     {
12706       n_elts = 1;
12707       gcc_assert (mode != VOIDmode);
12708     }
12709 
12710   innersize = GET_MODE_UNIT_SIZE (mode);
12711 
12712   /* Only support 128-bit vectors for MVE.  */
12713   if (TARGET_HAVE_MVE && (!vector || n_elts * innersize != 16))
12714     return -1;
12715 
12716   /* Vectors of float constants.  */
12717   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12718     {
12719       rtx el0 = CONST_VECTOR_ELT (op, 0);
12720 
12721       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12722         return -1;
12723 
12724       /* FP16 vectors cannot be represented.  */
12725       if (GET_MODE_INNER (mode) == HFmode)
12726 	return -1;
12727 
12728       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
12729 	 are distinct in this context.  */
12730       if (!const_vec_duplicate_p (op))
12731 	return -1;
12732 
12733       if (modconst)
12734         *modconst = CONST_VECTOR_ELT (op, 0);
12735 
12736       if (elementwidth)
12737         *elementwidth = 0;
12738 
12739       if (el0 == CONST0_RTX (GET_MODE (el0)))
12740 	return 19;
12741       else
12742 	return 18;
12743     }
12744 
12745   /* The tricks done in the code below apply for little-endian vector layout.
12746      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12747      FIXME: Implement logic for big-endian vectors.  */
12748   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12749     return -1;
12750 
12751   /* Splat vector constant out into a byte vector.  */
12752   for (i = 0; i < n_elts; i++)
12753     {
12754       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12755       unsigned HOST_WIDE_INT elpart;
12756 
12757       gcc_assert (CONST_INT_P (el));
12758       elpart = INTVAL (el);
12759 
12760       for (unsigned int byte = 0; byte < innersize; byte++)
12761 	{
12762 	  bytes[idx++] = (elpart & 0xff) ^ invmask;
12763 	  elpart >>= BITS_PER_UNIT;
12764 	}
12765     }
12766 
12767   /* Sanity check.  */
12768   gcc_assert (idx == GET_MODE_SIZE (mode));
12769 
12770   do
12771     {
12772       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12773 		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12774 
12775       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12776 		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12777 
12778       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12779 		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12780 
12781       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12782 		       && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12783 
12784       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12785 
12786       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12787 
12788       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12789 		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12790 
12791       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12792 		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12793 
12794       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12795 		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12796 
12797       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12798 		       && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12799 
12800       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12801 
12802       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12803 
12804       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12805 			&& bytes[i + 2] == 0 && bytes[i + 3] == 0);
12806 
12807       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12808 			&& bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12809 
12810       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12811 			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12812 
12813       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12814 			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12815 
12816       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12817 
12818       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12819 			&& bytes[i] == bytes[(i + 8) % idx]);
12820     }
12821   while (0);
12822 
12823   if (immtype == -1)
12824     return -1;
12825 
12826   if (elementwidth)
12827     *elementwidth = elsize;
12828 
12829   if (modconst)
12830     {
12831       unsigned HOST_WIDE_INT imm = 0;
12832 
12833       /* Un-invert bytes of recognized vector, if necessary.  */
12834       if (invmask != 0)
12835         for (i = 0; i < idx; i++)
12836           bytes[i] ^= invmask;
12837 
12838       if (immtype == 17)
12839         {
12840           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12841           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12842 
12843           for (i = 0; i < 8; i++)
12844             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12845                    << (i * BITS_PER_UNIT);
12846 
12847           *modconst = GEN_INT (imm);
12848         }
12849       else
12850         {
12851           unsigned HOST_WIDE_INT imm = 0;
12852 
12853           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12854             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12855 
12856           *modconst = GEN_INT (imm);
12857         }
12858     }
12859 
12860   return immtype;
12861 #undef CHECK
12862 }
12863 
12864 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
12865    implicitly, VMVN) immediate.  Write back width per element to *ELEMENTWIDTH
12866    (or zero for float elements), and a modified constant (whatever should be
12867    output for a VMOV) in *MODCONST.  "neon_immediate_valid_for_move" function is
12868    modified to "simd_immediate_valid_for_move" as this function will be used
12869    both by neon and mve.  */
12870 int
simd_immediate_valid_for_move(rtx op,machine_mode mode,rtx * modconst,int * elementwidth)12871 simd_immediate_valid_for_move (rtx op, machine_mode mode,
12872 			       rtx *modconst, int *elementwidth)
12873 {
12874   rtx tmpconst;
12875   int tmpwidth;
12876   int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12877 
12878   if (retval == -1)
12879     return 0;
12880 
12881   if (modconst)
12882     *modconst = tmpconst;
12883 
12884   if (elementwidth)
12885     *elementwidth = tmpwidth;
12886 
12887   return 1;
12888 }
12889 
12890 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12891    the immediate is valid, write a constant suitable for using as an operand
12892    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12893    *ELEMENTWIDTH.  See simd_valid_immediate for description of INVERSE.  */
12894 
12895 int
neon_immediate_valid_for_logic(rtx op,machine_mode mode,int inverse,rtx * modconst,int * elementwidth)12896 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12897 				rtx *modconst, int *elementwidth)
12898 {
12899   rtx tmpconst;
12900   int tmpwidth;
12901   int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12902 
12903   if (retval < 0 || retval > 5)
12904     return 0;
12905 
12906   if (modconst)
12907     *modconst = tmpconst;
12908 
12909   if (elementwidth)
12910     *elementwidth = tmpwidth;
12911 
12912   return 1;
12913 }
12914 
12915 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12916    the immediate is valid, write a constant suitable for using as an operand
12917    to VSHR/VSHL to *MODCONST and the corresponding element width to
12918    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12919    because they have different limitations.  */
12920 
12921 int
neon_immediate_valid_for_shift(rtx op,machine_mode mode,rtx * modconst,int * elementwidth,bool isleftshift)12922 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12923 				rtx *modconst, int *elementwidth,
12924 				bool isleftshift)
12925 {
12926   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12927   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12928   unsigned HOST_WIDE_INT last_elt = 0;
12929   unsigned HOST_WIDE_INT maxshift;
12930 
12931   /* Split vector constant out into a byte vector.  */
12932   for (i = 0; i < n_elts; i++)
12933     {
12934       rtx el = CONST_VECTOR_ELT (op, i);
12935       unsigned HOST_WIDE_INT elpart;
12936 
12937       if (CONST_INT_P (el))
12938         elpart = INTVAL (el);
12939       else if (CONST_DOUBLE_P (el))
12940         return 0;
12941       else
12942         gcc_unreachable ();
12943 
12944       if (i != 0 && elpart != last_elt)
12945         return 0;
12946 
12947       last_elt = elpart;
12948     }
12949 
12950   /* Shift less than element size.  */
12951   maxshift = innersize * 8;
12952 
12953   if (isleftshift)
12954     {
12955       /* Left shift immediate value can be from 0 to <size>-1.  */
12956       if (last_elt >= maxshift)
12957         return 0;
12958     }
12959   else
12960     {
12961       /* Right shift immediate value can be from 1 to <size>.  */
12962       if (last_elt == 0 || last_elt > maxshift)
12963 	return 0;
12964     }
12965 
12966   if (elementwidth)
12967     *elementwidth = innersize * 8;
12968 
12969   if (modconst)
12970     *modconst = CONST_VECTOR_ELT (op, 0);
12971 
12972   return 1;
12973 }
12974 
12975 /* Return a string suitable for output of Neon immediate logic operation
12976    MNEM.  */
12977 
12978 char *
neon_output_logic_immediate(const char * mnem,rtx * op2,machine_mode mode,int inverse,int quad)12979 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12980 			     int inverse, int quad)
12981 {
12982   int width, is_valid;
12983   static char templ[40];
12984 
12985   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12986 
12987   gcc_assert (is_valid != 0);
12988 
12989   if (quad)
12990     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12991   else
12992     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12993 
12994   return templ;
12995 }
12996 
12997 /* Return a string suitable for output of Neon immediate shift operation
12998    (VSHR or VSHL) MNEM.  */
12999 
13000 char *
neon_output_shift_immediate(const char * mnem,char sign,rtx * op2,machine_mode mode,int quad,bool isleftshift)13001 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13002 			     machine_mode mode, int quad,
13003 			     bool isleftshift)
13004 {
13005   int width, is_valid;
13006   static char templ[40];
13007 
13008   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13009   gcc_assert (is_valid != 0);
13010 
13011   if (quad)
13012     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13013   else
13014     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13015 
13016   return templ;
13017 }
13018 
13019 /* Output a sequence of pairwise operations to implement a reduction.
13020    NOTE: We do "too much work" here, because pairwise operations work on two
13021    registers-worth of operands in one go. Unfortunately we can't exploit those
13022    extra calculations to do the full operation in fewer steps, I don't think.
13023    Although all vector elements of the result but the first are ignored, we
13024    actually calculate the same result in each of the elements. An alternative
13025    such as initially loading a vector with zero to use as each of the second
13026    operands would use up an additional register and take an extra instruction,
13027    for no particular gain.  */
13028 
13029 void
neon_pairwise_reduce(rtx op0,rtx op1,machine_mode mode,rtx (* reduc)(rtx,rtx,rtx))13030 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13031 		      rtx (*reduc) (rtx, rtx, rtx))
13032 {
13033   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13034   rtx tmpsum = op1;
13035 
13036   for (i = parts / 2; i >= 1; i /= 2)
13037     {
13038       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13039       emit_insn (reduc (dest, tmpsum, tmpsum));
13040       tmpsum = dest;
13041     }
13042 }
13043 
13044 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13045    loaded into a register using VDUP.
13046 
13047    If this is the case, and GENERATE is set, we also generate
13048    instructions to do this and return an RTX to assign to the register.  */
13049 
13050 static rtx
neon_vdup_constant(rtx vals,bool generate)13051 neon_vdup_constant (rtx vals, bool generate)
13052 {
13053   machine_mode mode = GET_MODE (vals);
13054   machine_mode inner_mode = GET_MODE_INNER (mode);
13055   rtx x;
13056 
13057   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13058     return NULL_RTX;
13059 
13060   if (!const_vec_duplicate_p (vals, &x))
13061     /* The elements are not all the same.  We could handle repeating
13062        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13063        {0, C, 0, C, 0, C, 0, C} which can be loaded using
13064        vdup.i16).  */
13065     return NULL_RTX;
13066 
13067   if (!generate)
13068     return x;
13069 
13070   /* We can load this constant by using VDUP and a constant in a
13071      single ARM register.  This will be cheaper than a vector
13072      load.  */
13073 
13074   x = copy_to_mode_reg (inner_mode, x);
13075   return gen_vec_duplicate (mode, x);
13076 }
13077 
13078 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13079    constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13080    into a register.
13081 
13082    If this is the case, and GENERATE is set, we also generate code to do
13083    this and return an RTX to copy into the register.  */
13084 
13085 rtx
neon_make_constant(rtx vals,bool generate)13086 neon_make_constant (rtx vals, bool generate)
13087 {
13088   machine_mode mode = GET_MODE (vals);
13089   rtx target;
13090   rtx const_vec = NULL_RTX;
13091   int n_elts = GET_MODE_NUNITS (mode);
13092   int n_const = 0;
13093   int i;
13094 
13095   if (GET_CODE (vals) == CONST_VECTOR)
13096     const_vec = vals;
13097   else if (GET_CODE (vals) == PARALLEL)
13098     {
13099       /* A CONST_VECTOR must contain only CONST_INTs and
13100 	 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13101 	 Only store valid constants in a CONST_VECTOR.  */
13102       for (i = 0; i < n_elts; ++i)
13103 	{
13104 	  rtx x = XVECEXP (vals, 0, i);
13105 	  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13106 	    n_const++;
13107 	}
13108       if (n_const == n_elts)
13109 	const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13110     }
13111   else
13112     gcc_unreachable ();
13113 
13114   if (const_vec != NULL
13115       && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13116     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
13117     return const_vec;
13118   else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13119     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
13120        pipeline cycle; creating the constant takes one or two ARM
13121        pipeline cycles.  */
13122     return target;
13123   else if (const_vec != NULL_RTX)
13124     /* Load from constant pool.  On Cortex-A8 this takes two cycles
13125        (for either double or quad vectors).  We cannot take advantage
13126        of single-cycle VLD1 because we need a PC-relative addressing
13127        mode.  */
13128     return arm_disable_literal_pool ? NULL_RTX : const_vec;
13129   else
13130     /* A PARALLEL containing something not valid inside CONST_VECTOR.
13131        We cannot construct an initializer.  */
13132     return NULL_RTX;
13133 }
13134 
13135 /* Initialize vector TARGET to VALS.  */
13136 
13137 void
neon_expand_vector_init(rtx target,rtx vals)13138 neon_expand_vector_init (rtx target, rtx vals)
13139 {
13140   machine_mode mode = GET_MODE (target);
13141   machine_mode inner_mode = GET_MODE_INNER (mode);
13142   int n_elts = GET_MODE_NUNITS (mode);
13143   int n_var = 0, one_var = -1;
13144   bool all_same = true;
13145   rtx x, mem;
13146   int i;
13147 
13148   for (i = 0; i < n_elts; ++i)
13149     {
13150       x = XVECEXP (vals, 0, i);
13151       if (!CONSTANT_P (x))
13152 	++n_var, one_var = i;
13153 
13154       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13155 	all_same = false;
13156     }
13157 
13158   if (n_var == 0)
13159     {
13160       rtx constant = neon_make_constant (vals);
13161       if (constant != NULL_RTX)
13162 	{
13163 	  emit_move_insn (target, constant);
13164 	  return;
13165 	}
13166     }
13167 
13168   /* Splat a single non-constant element if we can.  */
13169   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13170     {
13171       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13172       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13173       return;
13174     }
13175 
13176   /* One field is non-constant.  Load constant then overwrite varying
13177      field.  This is more efficient than using the stack.  */
13178   if (n_var == 1)
13179     {
13180       rtx copy = copy_rtx (vals);
13181       rtx merge_mask = GEN_INT (1 << one_var);
13182 
13183       /* Load constant part of vector, substitute neighboring value for
13184 	 varying element.  */
13185       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13186       neon_expand_vector_init (target, copy);
13187 
13188       /* Insert variable.  */
13189       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13190       emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13191       return;
13192     }
13193 
13194   /* Construct the vector in memory one field at a time
13195      and load the whole vector.  */
13196   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13197   for (i = 0; i < n_elts; i++)
13198     emit_move_insn (adjust_address_nv (mem, inner_mode,
13199 				    i * GET_MODE_SIZE (inner_mode)),
13200 		    XVECEXP (vals, 0, i));
13201   emit_move_insn (target, mem);
13202 }
13203 
13204 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
13205    ERR if it doesn't.  EXP indicates the source location, which includes the
13206    inlining history for intrinsics.  */
13207 
13208 static void
bounds_check(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high,const_tree exp,const char * desc)13209 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13210 	      const_tree exp, const char *desc)
13211 {
13212   HOST_WIDE_INT lane;
13213 
13214   gcc_assert (CONST_INT_P (operand));
13215 
13216   lane = INTVAL (operand);
13217 
13218   if (lane < low || lane >= high)
13219     {
13220       if (exp)
13221 	error ("%K%s %wd out of range %wd - %wd",
13222 	       exp, desc, lane, low, high - 1);
13223       else
13224 	error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13225     }
13226 }
13227 
13228 /* Bounds-check lanes.  */
13229 
13230 void
neon_lane_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high,const_tree exp)13231 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13232 		  const_tree exp)
13233 {
13234   bounds_check (operand, low, high, exp, "lane");
13235 }
13236 
13237 /* Bounds-check constants.  */
13238 
13239 void
arm_const_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high)13240 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13241 {
13242   bounds_check (operand, low, high, NULL_TREE, "constant");
13243 }
13244 
13245 HOST_WIDE_INT
neon_element_bits(machine_mode mode)13246 neon_element_bits (machine_mode mode)
13247 {
13248   return GET_MODE_UNIT_BITSIZE (mode);
13249 }
13250 
13251 
13252 /* Predicates for `match_operand' and `match_operator'.  */
13253 
13254 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13255    WB level is 2 if full writeback address modes are allowed, 1
13256    if limited writeback address modes (POST_INC and PRE_DEC) are
13257    allowed and 0 if no writeback at all is supported.  */
13258 
13259 int
arm_coproc_mem_operand_wb(rtx op,int wb_level)13260 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13261 {
13262   gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13263   rtx ind;
13264 
13265   /* Reject eliminable registers.  */
13266   if (! (reload_in_progress || reload_completed || lra_in_progress)
13267       && (   reg_mentioned_p (frame_pointer_rtx, op)
13268 	  || reg_mentioned_p (arg_pointer_rtx, op)
13269 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
13270 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13271 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13272 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13273     return FALSE;
13274 
13275   /* Constants are converted into offsets from labels.  */
13276   if (!MEM_P (op))
13277     return FALSE;
13278 
13279   ind = XEXP (op, 0);
13280 
13281   if (reload_completed
13282       && (GET_CODE (ind) == LABEL_REF
13283 	  || (GET_CODE (ind) == CONST
13284 	      && GET_CODE (XEXP (ind, 0)) == PLUS
13285 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13286 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13287     return TRUE;
13288 
13289   /* Match: (mem (reg)).  */
13290   if (REG_P (ind))
13291     return arm_address_register_rtx_p (ind, 0);
13292 
13293   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
13294      acceptable in any case (subject to verification by
13295      arm_address_register_rtx_p).  We need full writeback to accept
13296      PRE_INC and POST_DEC, and at least restricted writeback for
13297      PRE_INC and POST_DEC.  */
13298   if (wb_level > 0
13299       && (GET_CODE (ind) == POST_INC
13300 	  || GET_CODE (ind) == PRE_DEC
13301 	  || (wb_level > 1
13302 	      && (GET_CODE (ind) == PRE_INC
13303 		  || GET_CODE (ind) == POST_DEC))))
13304     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13305 
13306   if (wb_level > 1
13307       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13308       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13309       && GET_CODE (XEXP (ind, 1)) == PLUS
13310       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13311     ind = XEXP (ind, 1);
13312 
13313   /* Match:
13314      (plus (reg)
13315 	   (const))
13316 
13317      The encoded immediate for 16-bit modes is multiplied by 2,
13318      while the encoded immediate for 32-bit and 64-bit modes is
13319      multiplied by 4.  */
13320   int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13321   if (GET_CODE (ind) == PLUS
13322       && REG_P (XEXP (ind, 0))
13323       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13324       && CONST_INT_P (XEXP (ind, 1))
13325       && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13326       && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13327     return TRUE;
13328 
13329   return FALSE;
13330 }
13331 
13332 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13333    WB is true if full writeback address modes are allowed and is false
13334    if limited writeback address modes (POST_INC and PRE_DEC) are
13335    allowed.  */
13336 
arm_coproc_mem_operand(rtx op,bool wb)13337 int arm_coproc_mem_operand (rtx op, bool wb)
13338 {
13339   return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13340 }
13341 
13342 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13343    context in which no writeback address modes are allowed.  */
13344 
13345 int
arm_coproc_mem_operand_no_writeback(rtx op)13346 arm_coproc_mem_operand_no_writeback (rtx op)
13347 {
13348   return arm_coproc_mem_operand_wb (op, 0);
13349 }
13350 
13351 /* This function returns TRUE on matching mode and op.
13352 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13353 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13).  */
13354 int
mve_vector_mem_operand(machine_mode mode,rtx op,bool strict)13355 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13356 {
13357   enum rtx_code code;
13358   HOST_WIDE_INT val;
13359   int  reg_no;
13360 
13361   /* Match: (mem (reg)).  */
13362   if (REG_P (op))
13363     {
13364       int reg_no = REGNO (op);
13365       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13366 	       ? reg_no <= LAST_LO_REGNUM
13367 	       :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13368 	      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13369     }
13370   code = GET_CODE (op);
13371 
13372   if (code == POST_INC || code == PRE_DEC
13373       || code == PRE_INC || code == POST_DEC)
13374     {
13375       reg_no = REGNO (XEXP (op, 0));
13376       return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13377 	       ? reg_no <= LAST_LO_REGNUM
13378 	       :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13379 	      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13380     }
13381   else if ((code == POST_MODIFY || code == PRE_MODIFY)
13382 	   && GET_CODE (XEXP (op, 1)) == PLUS && REG_P (XEXP (XEXP (op, 1), 1)))
13383     {
13384       reg_no = REGNO (XEXP (op, 0));
13385       val = INTVAL (XEXP ( XEXP (op, 1), 1));
13386       switch (mode)
13387 	{
13388 	  case E_V16QImode:
13389 	    if (abs_hwi (val))
13390 	      return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13391 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13392 	    return FALSE;
13393 	  case E_V8HImode:
13394 	  case E_V8HFmode:
13395 	    if (abs (val) <= 255)
13396 	      return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13397 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13398 	    return FALSE;
13399 	  case E_V8QImode:
13400 	  case E_V4QImode:
13401 	    if (abs_hwi (val))
13402 	      return (reg_no <= LAST_LO_REGNUM
13403 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13404 	    return FALSE;
13405 	  case E_V4HImode:
13406 	  case E_V4HFmode:
13407 	    if (val % 2 == 0 && abs (val) <= 254)
13408 	      return (reg_no <= LAST_LO_REGNUM
13409 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13410 	    return FALSE;
13411 	  case E_V4SImode:
13412 	  case E_V4SFmode:
13413 	    if (val % 4 == 0 && abs (val) <= 508)
13414 	      return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13415 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13416 	    return FALSE;
13417 	  case E_V2DImode:
13418 	  case E_V2DFmode:
13419 	  case E_TImode:
13420 	    if (val % 4 == 0 && val >= 0 && val <= 1020)
13421 	      return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13422 		      || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13423 	    return FALSE;
13424 	  default:
13425 	    return FALSE;
13426 	}
13427     }
13428   return FALSE;
13429 }
13430 
13431 /* Return TRUE if OP is a memory operand which we can load or store a vector
13432    to/from. TYPE is one of the following values:
13433     0 - Vector load/stor (vldr)
13434     1 - Core registers (ldm)
13435     2 - Element/structure loads (vld1)
13436  */
13437 int
neon_vector_mem_operand(rtx op,int type,bool strict)13438 neon_vector_mem_operand (rtx op, int type, bool strict)
13439 {
13440   rtx ind;
13441 
13442   /* Reject eliminable registers.  */
13443   if (strict && ! (reload_in_progress || reload_completed)
13444       && (reg_mentioned_p (frame_pointer_rtx, op)
13445 	  || reg_mentioned_p (arg_pointer_rtx, op)
13446 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
13447 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13448 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13449 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13450     return FALSE;
13451 
13452   /* Constants are converted into offsets from labels.  */
13453   if (!MEM_P (op))
13454     return FALSE;
13455 
13456   ind = XEXP (op, 0);
13457 
13458   if (reload_completed
13459       && (GET_CODE (ind) == LABEL_REF
13460 	  || (GET_CODE (ind) == CONST
13461 	      && GET_CODE (XEXP (ind, 0)) == PLUS
13462 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13463 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13464     return TRUE;
13465 
13466   /* Match: (mem (reg)).  */
13467   if (REG_P (ind))
13468     return arm_address_register_rtx_p (ind, 0);
13469 
13470   /* Allow post-increment with Neon registers.  */
13471   if ((type != 1 && GET_CODE (ind) == POST_INC)
13472       || (type == 0 && GET_CODE (ind) == PRE_DEC))
13473     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13474 
13475   /* Allow post-increment by register for VLDn */
13476   if (type == 2 && GET_CODE (ind) == POST_MODIFY
13477       && GET_CODE (XEXP (ind, 1)) == PLUS
13478       && REG_P (XEXP (XEXP (ind, 1), 1))
13479       && REG_P (XEXP (ind, 0))
13480       && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13481      return true;
13482 
13483   /* Match:
13484      (plus (reg)
13485           (const)).  */
13486   if (type == 0
13487       && GET_CODE (ind) == PLUS
13488       && REG_P (XEXP (ind, 0))
13489       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13490       && CONST_INT_P (XEXP (ind, 1))
13491       && INTVAL (XEXP (ind, 1)) > -1024
13492       /* For quad modes, we restrict the constant offset to be slightly less
13493 	 than what the instruction format permits.  We have no such constraint
13494 	 on double mode offsets.  (This must match arm_legitimate_index_p.)  */
13495       && (INTVAL (XEXP (ind, 1))
13496 	  < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13497       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13498     return TRUE;
13499 
13500   return FALSE;
13501 }
13502 
13503 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13504    type.  */
13505 int
neon_struct_mem_operand(rtx op)13506 neon_struct_mem_operand (rtx op)
13507 {
13508   rtx ind;
13509 
13510   /* Reject eliminable registers.  */
13511   if (! (reload_in_progress || reload_completed)
13512       && (   reg_mentioned_p (frame_pointer_rtx, op)
13513 	  || reg_mentioned_p (arg_pointer_rtx, op)
13514 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
13515 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13516 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13517 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13518     return FALSE;
13519 
13520   /* Constants are converted into offsets from labels.  */
13521   if (!MEM_P (op))
13522     return FALSE;
13523 
13524   ind = XEXP (op, 0);
13525 
13526   if (reload_completed
13527       && (GET_CODE (ind) == LABEL_REF
13528 	  || (GET_CODE (ind) == CONST
13529 	      && GET_CODE (XEXP (ind, 0)) == PLUS
13530 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13531 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13532     return TRUE;
13533 
13534   /* Match: (mem (reg)).  */
13535   if (REG_P (ind))
13536     return arm_address_register_rtx_p (ind, 0);
13537 
13538   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13539   if (GET_CODE (ind) == POST_INC
13540       || GET_CODE (ind) == PRE_DEC)
13541     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13542 
13543   return FALSE;
13544 }
13545 
13546 /* Prepares the operands for the VCMLA by lane instruction such that the right
13547    register number is selected.  This instruction is special in that it always
13548    requires a D register, however there is a choice to be made between Dn[0],
13549    Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13550 
13551    The VCMLA by lane function always selects two values. For instance given D0
13552    and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13553    used by the instruction.  However given V4SF then index 0 and 1 are valid as
13554    D0[0] or D1[0] are both valid.
13555 
13556    This function centralizes that information based on OPERANDS, OPERANDS[3]
13557    will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13558    updated to contain the right index.  */
13559 
13560 rtx *
neon_vcmla_lane_prepare_operands(rtx * operands)13561 neon_vcmla_lane_prepare_operands (rtx *operands)
13562 {
13563   int lane = INTVAL (operands[4]);
13564   machine_mode constmode = SImode;
13565   machine_mode mode = GET_MODE (operands[3]);
13566   int regno = REGNO (operands[3]);
13567   regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13568   if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13569     {
13570       operands[3] = gen_int_mode (regno + 1, constmode);
13571       operands[4]
13572 	= gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13573     }
13574   else
13575     {
13576       operands[3] = gen_int_mode (regno, constmode);
13577       operands[4] = gen_int_mode (lane, constmode);
13578     }
13579   return operands;
13580 }
13581 
13582 
13583 /* Return true if X is a register that will be eliminated later on.  */
13584 int
arm_eliminable_register(rtx x)13585 arm_eliminable_register (rtx x)
13586 {
13587   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13588 		       || REGNO (x) == ARG_POINTER_REGNUM
13589 		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13590 			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13591 }
13592 
13593 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13594    coprocessor registers.  Otherwise return NO_REGS.  */
13595 
13596 enum reg_class
coproc_secondary_reload_class(machine_mode mode,rtx x,bool wb)13597 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13598 {
13599   if (mode == HFmode)
13600     {
13601       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13602 	return GENERAL_REGS;
13603       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13604 	return NO_REGS;
13605       return GENERAL_REGS;
13606     }
13607 
13608   /* The neon move patterns handle all legitimate vector and struct
13609      addresses.  */
13610   if (TARGET_NEON
13611       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13612       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13613 	  || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13614 	  || VALID_NEON_STRUCT_MODE (mode)))
13615     return NO_REGS;
13616 
13617   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13618     return NO_REGS;
13619 
13620   return GENERAL_REGS;
13621 }
13622 
13623 /* Values which must be returned in the most-significant end of the return
13624    register.  */
13625 
13626 static bool
arm_return_in_msb(const_tree valtype)13627 arm_return_in_msb (const_tree valtype)
13628 {
13629   return (TARGET_AAPCS_BASED
13630           && BYTES_BIG_ENDIAN
13631 	  && (AGGREGATE_TYPE_P (valtype)
13632 	      || TREE_CODE (valtype) == COMPLEX_TYPE
13633 	      || FIXED_POINT_TYPE_P (valtype)));
13634 }
13635 
13636 /* Return TRUE if X references a SYMBOL_REF.  */
13637 int
symbol_mentioned_p(rtx x)13638 symbol_mentioned_p (rtx x)
13639 {
13640   const char * fmt;
13641   int i;
13642 
13643   if (GET_CODE (x) == SYMBOL_REF)
13644     return 1;
13645 
13646   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13647      are constant offsets, not symbols.  */
13648   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13649     return 0;
13650 
13651   fmt = GET_RTX_FORMAT (GET_CODE (x));
13652 
13653   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13654     {
13655       if (fmt[i] == 'E')
13656 	{
13657 	  int j;
13658 
13659 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13660 	    if (symbol_mentioned_p (XVECEXP (x, i, j)))
13661 	      return 1;
13662 	}
13663       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13664 	return 1;
13665     }
13666 
13667   return 0;
13668 }
13669 
13670 /* Return TRUE if X references a LABEL_REF.  */
13671 int
label_mentioned_p(rtx x)13672 label_mentioned_p (rtx x)
13673 {
13674   const char * fmt;
13675   int i;
13676 
13677   if (GET_CODE (x) == LABEL_REF)
13678     return 1;
13679 
13680   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13681      instruction, but they are constant offsets, not symbols.  */
13682   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13683     return 0;
13684 
13685   fmt = GET_RTX_FORMAT (GET_CODE (x));
13686   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13687     {
13688       if (fmt[i] == 'E')
13689 	{
13690 	  int j;
13691 
13692 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13693 	    if (label_mentioned_p (XVECEXP (x, i, j)))
13694 	      return 1;
13695 	}
13696       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13697 	return 1;
13698     }
13699 
13700   return 0;
13701 }
13702 
13703 int
tls_mentioned_p(rtx x)13704 tls_mentioned_p (rtx x)
13705 {
13706   switch (GET_CODE (x))
13707     {
13708     case CONST:
13709       return tls_mentioned_p (XEXP (x, 0));
13710 
13711     case UNSPEC:
13712       if (XINT (x, 1) == UNSPEC_TLS)
13713 	return 1;
13714 
13715     /* Fall through.  */
13716     default:
13717       return 0;
13718     }
13719 }
13720 
13721 /* Must not copy any rtx that uses a pc-relative address.
13722    Also, disallow copying of load-exclusive instructions that
13723    may appear after splitting of compare-and-swap-style operations
13724    so as to prevent those loops from being transformed away from their
13725    canonical forms (see PR 69904).  */
13726 
13727 static bool
arm_cannot_copy_insn_p(rtx_insn * insn)13728 arm_cannot_copy_insn_p (rtx_insn *insn)
13729 {
13730   /* The tls call insn cannot be copied, as it is paired with a data
13731      word.  */
13732   if (recog_memoized (insn) == CODE_FOR_tlscall)
13733     return true;
13734 
13735   subrtx_iterator::array_type array;
13736   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13737     {
13738       const_rtx x = *iter;
13739       if (GET_CODE (x) == UNSPEC
13740 	  && (XINT (x, 1) == UNSPEC_PIC_BASE
13741 	      || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13742 	return true;
13743     }
13744 
13745   rtx set = single_set (insn);
13746   if (set)
13747     {
13748       rtx src = SET_SRC (set);
13749       if (GET_CODE (src) == ZERO_EXTEND)
13750 	src = XEXP (src, 0);
13751 
13752       /* Catch the load-exclusive and load-acquire operations.  */
13753       if (GET_CODE (src) == UNSPEC_VOLATILE
13754 	  && (XINT (src, 1) == VUNSPEC_LL
13755 	      || XINT (src, 1) == VUNSPEC_LAX))
13756 	return true;
13757     }
13758   return false;
13759 }
13760 
13761 enum rtx_code
minmax_code(rtx x)13762 minmax_code (rtx x)
13763 {
13764   enum rtx_code code = GET_CODE (x);
13765 
13766   switch (code)
13767     {
13768     case SMAX:
13769       return GE;
13770     case SMIN:
13771       return LE;
13772     case UMIN:
13773       return LEU;
13774     case UMAX:
13775       return GEU;
13776     default:
13777       gcc_unreachable ();
13778     }
13779 }
13780 
13781 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
13782 
13783 bool
arm_sat_operator_match(rtx lo_bound,rtx hi_bound,int * mask,bool * signed_sat)13784 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13785 			int *mask, bool *signed_sat)
13786 {
13787   /* The high bound must be a power of two minus one.  */
13788   int log = exact_log2 (INTVAL (hi_bound) + 1);
13789   if (log == -1)
13790     return false;
13791 
13792   /* The low bound is either zero (for usat) or one less than the
13793      negation of the high bound (for ssat).  */
13794   if (INTVAL (lo_bound) == 0)
13795     {
13796       if (mask)
13797         *mask = log;
13798       if (signed_sat)
13799         *signed_sat = false;
13800 
13801       return true;
13802     }
13803 
13804   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13805     {
13806       if (mask)
13807         *mask = log + 1;
13808       if (signed_sat)
13809         *signed_sat = true;
13810 
13811       return true;
13812     }
13813 
13814   return false;
13815 }
13816 
13817 /* Return 1 if memory locations are adjacent.  */
13818 int
adjacent_mem_locations(rtx a,rtx b)13819 adjacent_mem_locations (rtx a, rtx b)
13820 {
13821   /* We don't guarantee to preserve the order of these memory refs.  */
13822   if (volatile_refs_p (a) || volatile_refs_p (b))
13823     return 0;
13824 
13825   if ((REG_P (XEXP (a, 0))
13826        || (GET_CODE (XEXP (a, 0)) == PLUS
13827 	   && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13828       && (REG_P (XEXP (b, 0))
13829 	  || (GET_CODE (XEXP (b, 0)) == PLUS
13830 	      && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13831     {
13832       HOST_WIDE_INT val0 = 0, val1 = 0;
13833       rtx reg0, reg1;
13834       int val_diff;
13835 
13836       if (GET_CODE (XEXP (a, 0)) == PLUS)
13837         {
13838 	  reg0 = XEXP (XEXP (a, 0), 0);
13839 	  val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13840         }
13841       else
13842 	reg0 = XEXP (a, 0);
13843 
13844       if (GET_CODE (XEXP (b, 0)) == PLUS)
13845         {
13846 	  reg1 = XEXP (XEXP (b, 0), 0);
13847 	  val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13848         }
13849       else
13850 	reg1 = XEXP (b, 0);
13851 
13852       /* Don't accept any offset that will require multiple
13853 	 instructions to handle, since this would cause the
13854 	 arith_adjacentmem pattern to output an overlong sequence.  */
13855       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13856 	return 0;
13857 
13858       /* Don't allow an eliminable register: register elimination can make
13859 	 the offset too large.  */
13860       if (arm_eliminable_register (reg0))
13861 	return 0;
13862 
13863       val_diff = val1 - val0;
13864 
13865       if (arm_ld_sched)
13866 	{
13867 	  /* If the target has load delay slots, then there's no benefit
13868 	     to using an ldm instruction unless the offset is zero and
13869 	     we are optimizing for size.  */
13870 	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13871 		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13872 		  && (val_diff == 4 || val_diff == -4));
13873 	}
13874 
13875       return ((REGNO (reg0) == REGNO (reg1))
13876 	      && (val_diff == 4 || val_diff == -4));
13877     }
13878 
13879   return 0;
13880 }
13881 
13882 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13883    for load operations, false for store operations.  CONSECUTIVE is true
13884    if the register numbers in the operation must be consecutive in the register
13885    bank. RETURN_PC is true if value is to be loaded in PC.
13886    The pattern we are trying to match for load is:
13887      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13888       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13889        :
13890        :
13891       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13892      ]
13893      where
13894      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13895      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13896      3.  If consecutive is TRUE, then for kth register being loaded,
13897          REGNO (R_dk) = REGNO (R_d0) + k.
13898    The pattern for store is similar.  */
13899 bool
ldm_stm_operation_p(rtx op,bool load,machine_mode mode,bool consecutive,bool return_pc)13900 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13901                      bool consecutive, bool return_pc)
13902 {
13903   HOST_WIDE_INT count = XVECLEN (op, 0);
13904   rtx reg, mem, addr;
13905   unsigned regno;
13906   unsigned first_regno;
13907   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13908   rtx elt;
13909   bool addr_reg_in_reglist = false;
13910   bool update = false;
13911   int reg_increment;
13912   int offset_adj;
13913   int regs_per_val;
13914 
13915   /* If not in SImode, then registers must be consecutive
13916      (e.g., VLDM instructions for DFmode).  */
13917   gcc_assert ((mode == SImode) || consecutive);
13918   /* Setting return_pc for stores is illegal.  */
13919   gcc_assert (!return_pc || load);
13920 
13921   /* Set up the increments and the regs per val based on the mode.  */
13922   reg_increment = GET_MODE_SIZE (mode);
13923   regs_per_val = reg_increment / 4;
13924   offset_adj = return_pc ? 1 : 0;
13925 
13926   if (count <= 1
13927       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13928       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13929     return false;
13930 
13931   /* Check if this is a write-back.  */
13932   elt = XVECEXP (op, 0, offset_adj);
13933   if (GET_CODE (SET_SRC (elt)) == PLUS)
13934     {
13935       i++;
13936       base = 1;
13937       update = true;
13938 
13939       /* The offset adjustment must be the number of registers being
13940          popped times the size of a single register.  */
13941       if (!REG_P (SET_DEST (elt))
13942           || !REG_P (XEXP (SET_SRC (elt), 0))
13943           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13944           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13945           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13946              ((count - 1 - offset_adj) * reg_increment))
13947         return false;
13948     }
13949 
13950   i = i + offset_adj;
13951   base = base + offset_adj;
13952   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13953      success depends on the type: VLDM can do just one reg,
13954      LDM must do at least two.  */
13955   if ((count <= i) && (mode == SImode))
13956       return false;
13957 
13958   elt = XVECEXP (op, 0, i - 1);
13959   if (GET_CODE (elt) != SET)
13960     return false;
13961 
13962   if (load)
13963     {
13964       reg = SET_DEST (elt);
13965       mem = SET_SRC (elt);
13966     }
13967   else
13968     {
13969       reg = SET_SRC (elt);
13970       mem = SET_DEST (elt);
13971     }
13972 
13973   if (!REG_P (reg) || !MEM_P (mem))
13974     return false;
13975 
13976   regno = REGNO (reg);
13977   first_regno = regno;
13978   addr = XEXP (mem, 0);
13979   if (GET_CODE (addr) == PLUS)
13980     {
13981       if (!CONST_INT_P (XEXP (addr, 1)))
13982 	return false;
13983 
13984       offset = INTVAL (XEXP (addr, 1));
13985       addr = XEXP (addr, 0);
13986     }
13987 
13988   if (!REG_P (addr))
13989     return false;
13990 
13991   /* Don't allow SP to be loaded unless it is also the base register. It
13992      guarantees that SP is reset correctly when an LDM instruction
13993      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13994   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13995     return false;
13996 
13997   if (regno == REGNO (addr))
13998     addr_reg_in_reglist = true;
13999 
14000   for (; i < count; i++)
14001     {
14002       elt = XVECEXP (op, 0, i);
14003       if (GET_CODE (elt) != SET)
14004         return false;
14005 
14006       if (load)
14007         {
14008           reg = SET_DEST (elt);
14009           mem = SET_SRC (elt);
14010         }
14011       else
14012         {
14013           reg = SET_SRC (elt);
14014           mem = SET_DEST (elt);
14015         }
14016 
14017       if (!REG_P (reg)
14018           || GET_MODE (reg) != mode
14019           || REGNO (reg) <= regno
14020           || (consecutive
14021               && (REGNO (reg) !=
14022                   (unsigned int) (first_regno + regs_per_val * (i - base))))
14023           /* Don't allow SP to be loaded unless it is also the base register. It
14024              guarantees that SP is reset correctly when an LDM instruction
14025              is interrupted. Otherwise, we might end up with a corrupt stack.  */
14026           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14027           || !MEM_P (mem)
14028           || GET_MODE (mem) != mode
14029           || ((GET_CODE (XEXP (mem, 0)) != PLUS
14030 	       || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14031 	       || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14032 	       || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14033                    offset + (i - base) * reg_increment))
14034 	      && (!REG_P (XEXP (mem, 0))
14035 		  || offset + (i - base) * reg_increment != 0)))
14036         return false;
14037 
14038       regno = REGNO (reg);
14039       if (regno == REGNO (addr))
14040         addr_reg_in_reglist = true;
14041     }
14042 
14043   if (load)
14044     {
14045       if (update && addr_reg_in_reglist)
14046         return false;
14047 
14048       /* For Thumb-1, address register is always modified - either by write-back
14049          or by explicit load.  If the pattern does not describe an update,
14050          then the address register must be in the list of loaded registers.  */
14051       if (TARGET_THUMB1)
14052         return update || addr_reg_in_reglist;
14053     }
14054 
14055   return true;
14056 }
14057 
14058 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14059    or VSCCLRM (otherwise) insn.  To be a valid CLRM pattern, OP must have the
14060    following form:
14061 
14062    [(set (reg:SI <N>) (const_int 0))
14063     (set (reg:SI <M>) (const_int 0))
14064     ...
14065     (unspec_volatile [(const_int 0)]
14066 		     VUNSPEC_CLRM_APSR)
14067     (clobber (reg:CC CC_REGNUM))
14068    ]
14069 
14070    Any number (including 0) of set expressions is valid, the volatile unspec is
14071    optional.  All registers but SP and PC are allowed and registers must be in
14072    strict increasing order.
14073 
14074    To be a valid VSCCLRM pattern, OP must have the following form:
14075 
14076    [(unspec_volatile [(const_int 0)]
14077 		     VUNSPEC_VSCCLRM_VPR)
14078     (set (reg:SF <N>) (const_int 0))
14079     (set (reg:SF <M>) (const_int 0))
14080     ...
14081    ]
14082 
14083    As with CLRM, any number (including 0) of set expressions is valid, however
14084    the volatile unspec is mandatory here.  Any VFP single-precision register is
14085    accepted but all registers must be consecutive and in increasing order.  */
14086 
14087 bool
clear_operation_p(rtx op,bool vfp)14088 clear_operation_p (rtx op, bool vfp)
14089 {
14090   unsigned regno;
14091   unsigned last_regno = INVALID_REGNUM;
14092   rtx elt, reg, zero;
14093   int count = XVECLEN (op, 0);
14094   int first_set = vfp ? 1 : 0;
14095   machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14096 
14097   for (int i = first_set; i < count; i++)
14098     {
14099       elt = XVECEXP (op, 0, i);
14100 
14101       if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14102 	{
14103 	  if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14104 	      || XVECLEN (elt, 0) != 1
14105 	      || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14106 	      || i != count - 2)
14107 	    return false;
14108 
14109 	  continue;
14110 	}
14111 
14112       if (GET_CODE (elt) == CLOBBER)
14113 	continue;
14114 
14115       if (GET_CODE (elt) != SET)
14116 	return false;
14117 
14118       reg = SET_DEST (elt);
14119       zero = SET_SRC (elt);
14120 
14121       if (!REG_P (reg)
14122 	  || GET_MODE (reg) != expected_mode
14123 	  || zero != CONST0_RTX (SImode))
14124 	return false;
14125 
14126       regno = REGNO (reg);
14127 
14128       if (vfp)
14129 	{
14130 	  if (i != first_set && regno != last_regno + 1)
14131 	    return false;
14132 	}
14133       else
14134 	{
14135 	  if (regno == SP_REGNUM || regno == PC_REGNUM)
14136 	    return false;
14137 	  if (i != first_set && regno <= last_regno)
14138 	    return false;
14139 	}
14140 
14141       last_regno = regno;
14142     }
14143 
14144   return true;
14145 }
14146 
14147 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14148    or stores (depending on IS_STORE) into a load-multiple or store-multiple
14149    instruction.  ADD_OFFSET is nonzero if the base address register needs
14150    to be modified with an add instruction before we can use it.  */
14151 
14152 static bool
multiple_operation_profitable_p(bool is_store ATTRIBUTE_UNUSED,int nops,HOST_WIDE_INT add_offset)14153 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14154 				 int nops, HOST_WIDE_INT add_offset)
14155  {
14156   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14157      if the offset isn't small enough.  The reason 2 ldrs are faster
14158      is because these ARMs are able to do more than one cache access
14159      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
14160      whilst the ARM8 has a double bandwidth cache.  This means that
14161      these cores can do both an instruction fetch and a data fetch in
14162      a single cycle, so the trick of calculating the address into a
14163      scratch register (one of the result regs) and then doing a load
14164      multiple actually becomes slower (and no smaller in code size).
14165      That is the transformation
14166 
14167  	ldr	rd1, [rbase + offset]
14168  	ldr	rd2, [rbase + offset + 4]
14169 
14170      to
14171 
14172  	add	rd1, rbase, offset
14173  	ldmia	rd1, {rd1, rd2}
14174 
14175      produces worse code -- '3 cycles + any stalls on rd2' instead of
14176      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
14177      access per cycle, the first sequence could never complete in less
14178      than 6 cycles, whereas the ldm sequence would only take 5 and
14179      would make better use of sequential accesses if not hitting the
14180      cache.
14181 
14182      We cheat here and test 'arm_ld_sched' which we currently know to
14183      only be true for the ARM8, ARM9 and StrongARM.  If this ever
14184      changes, then the test below needs to be reworked.  */
14185   if (nops == 2 && arm_ld_sched && add_offset != 0)
14186     return false;
14187 
14188   /* XScale has load-store double instructions, but they have stricter
14189      alignment requirements than load-store multiple, so we cannot
14190      use them.
14191 
14192      For XScale ldm requires 2 + NREGS cycles to complete and blocks
14193      the pipeline until completion.
14194 
14195 	NREGS		CYCLES
14196 	  1		  3
14197 	  2		  4
14198 	  3		  5
14199 	  4		  6
14200 
14201      An ldr instruction takes 1-3 cycles, but does not block the
14202      pipeline.
14203 
14204 	NREGS		CYCLES
14205 	  1		 1-3
14206 	  2		 2-6
14207 	  3		 3-9
14208 	  4		 4-12
14209 
14210      Best case ldr will always win.  However, the more ldr instructions
14211      we issue, the less likely we are to be able to schedule them well.
14212      Using ldr instructions also increases code size.
14213 
14214      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14215      for counts of 3 or 4 regs.  */
14216   if (nops <= 2 && arm_tune_xscale && !optimize_size)
14217     return false;
14218   return true;
14219 }
14220 
14221 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14222    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14223    an array ORDER which describes the sequence to use when accessing the
14224    offsets that produces an ascending order.  In this sequence, each
14225    offset must be larger by exactly 4 than the previous one.  ORDER[0]
14226    must have been filled in with the lowest offset by the caller.
14227    If UNSORTED_REGS is nonnull, it is an array of register numbers that
14228    we use to verify that ORDER produces an ascending order of registers.
14229    Return true if it was possible to construct such an order, false if
14230    not.  */
14231 
14232 static bool
compute_offset_order(int nops,HOST_WIDE_INT * unsorted_offsets,int * order,int * unsorted_regs)14233 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14234 		      int *unsorted_regs)
14235 {
14236   int i;
14237   for (i = 1; i < nops; i++)
14238     {
14239       int j;
14240 
14241       order[i] = order[i - 1];
14242       for (j = 0; j < nops; j++)
14243 	if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14244 	  {
14245 	    /* We must find exactly one offset that is higher than the
14246 	       previous one by 4.  */
14247 	    if (order[i] != order[i - 1])
14248 	      return false;
14249 	    order[i] = j;
14250 	  }
14251       if (order[i] == order[i - 1])
14252 	return false;
14253       /* The register numbers must be ascending.  */
14254       if (unsorted_regs != NULL
14255 	  && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14256 	return false;
14257     }
14258   return true;
14259 }
14260 
14261 /* Used to determine in a peephole whether a sequence of load
14262    instructions can be changed into a load-multiple instruction.
14263    NOPS is the number of separate load instructions we are examining.  The
14264    first NOPS entries in OPERANDS are the destination registers, the
14265    next NOPS entries are memory operands.  If this function is
14266    successful, *BASE is set to the common base register of the memory
14267    accesses; *LOAD_OFFSET is set to the first memory location's offset
14268    from that base register.
14269    REGS is an array filled in with the destination register numbers.
14270    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14271    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
14272    the sequence of registers in REGS matches the loads from ascending memory
14273    locations, and the function verifies that the register numbers are
14274    themselves ascending.  If CHECK_REGS is false, the register numbers
14275    are stored in the order they are found in the operands.  */
14276 static int
load_multiple_sequence(rtx * operands,int nops,int * regs,int * saved_order,int * base,HOST_WIDE_INT * load_offset,bool check_regs)14277 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14278 			int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14279 {
14280   int unsorted_regs[MAX_LDM_STM_OPS];
14281   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14282   int order[MAX_LDM_STM_OPS];
14283   int base_reg = -1;
14284   int i, ldm_case;
14285 
14286   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14287      easily extended if required.  */
14288   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14289 
14290   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14291 
14292   /* Loop over the operands and check that the memory references are
14293      suitable (i.e. immediate offsets from the same base register).  At
14294      the same time, extract the target register, and the memory
14295      offsets.  */
14296   for (i = 0; i < nops; i++)
14297     {
14298       rtx reg;
14299       rtx offset;
14300 
14301       /* Convert a subreg of a mem into the mem itself.  */
14302       if (GET_CODE (operands[nops + i]) == SUBREG)
14303 	operands[nops + i] = alter_subreg (operands + (nops + i), true);
14304 
14305       gcc_assert (MEM_P (operands[nops + i]));
14306 
14307       /* Don't reorder volatile memory references; it doesn't seem worth
14308 	 looking for the case where the order is ok anyway.  */
14309       if (MEM_VOLATILE_P (operands[nops + i]))
14310 	return 0;
14311 
14312       offset = const0_rtx;
14313 
14314       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14315 	   || (GET_CODE (reg) == SUBREG
14316 	       && REG_P (reg = SUBREG_REG (reg))))
14317 	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14318 	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14319 		  || (GET_CODE (reg) == SUBREG
14320 		      && REG_P (reg = SUBREG_REG (reg))))
14321 	      && (CONST_INT_P (offset
14322 		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
14323 	{
14324 	  if (i == 0)
14325 	    {
14326 	      base_reg = REGNO (reg);
14327 	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14328 		return 0;
14329 	    }
14330 	  else if (base_reg != (int) REGNO (reg))
14331 	    /* Not addressed from the same base register.  */
14332 	    return 0;
14333 
14334 	  unsorted_regs[i] = (REG_P (operands[i])
14335 			      ? REGNO (operands[i])
14336 			      : REGNO (SUBREG_REG (operands[i])));
14337 
14338 	  /* If it isn't an integer register, or if it overwrites the
14339 	     base register but isn't the last insn in the list, then
14340 	     we can't do this.  */
14341 	  if (unsorted_regs[i] < 0
14342 	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14343 	      || unsorted_regs[i] > 14
14344 	      || (i != nops - 1 && unsorted_regs[i] == base_reg))
14345 	    return 0;
14346 
14347           /* Don't allow SP to be loaded unless it is also the base
14348              register.  It guarantees that SP is reset correctly when
14349              an LDM instruction is interrupted.  Otherwise, we might
14350              end up with a corrupt stack.  */
14351           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14352             return 0;
14353 
14354 	  unsorted_offsets[i] = INTVAL (offset);
14355 	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14356 	    order[0] = i;
14357 	}
14358       else
14359 	/* Not a suitable memory address.  */
14360 	return 0;
14361     }
14362 
14363   /* All the useful information has now been extracted from the
14364      operands into unsorted_regs and unsorted_offsets; additionally,
14365      order[0] has been set to the lowest offset in the list.  Sort
14366      the offsets into order, verifying that they are adjacent, and
14367      check that the register numbers are ascending.  */
14368   if (!compute_offset_order (nops, unsorted_offsets, order,
14369 			     check_regs ? unsorted_regs : NULL))
14370     return 0;
14371 
14372   if (saved_order)
14373     memcpy (saved_order, order, sizeof order);
14374 
14375   if (base)
14376     {
14377       *base = base_reg;
14378 
14379       for (i = 0; i < nops; i++)
14380 	regs[i] = unsorted_regs[check_regs ? order[i] : i];
14381 
14382       *load_offset = unsorted_offsets[order[0]];
14383     }
14384 
14385   if (unsorted_offsets[order[0]] == 0)
14386     ldm_case = 1; /* ldmia */
14387   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14388     ldm_case = 2; /* ldmib */
14389   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14390     ldm_case = 3; /* ldmda */
14391   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14392     ldm_case = 4; /* ldmdb */
14393   else if (const_ok_for_arm (unsorted_offsets[order[0]])
14394 	   || const_ok_for_arm (-unsorted_offsets[order[0]]))
14395     ldm_case = 5;
14396   else
14397     return 0;
14398 
14399   if (!multiple_operation_profitable_p (false, nops,
14400 					ldm_case == 5
14401 					? unsorted_offsets[order[0]] : 0))
14402     return 0;
14403 
14404   return ldm_case;
14405 }
14406 
14407 /* Used to determine in a peephole whether a sequence of store instructions can
14408    be changed into a store-multiple instruction.
14409    NOPS is the number of separate store instructions we are examining.
14410    NOPS_TOTAL is the total number of instructions recognized by the peephole
14411    pattern.
14412    The first NOPS entries in OPERANDS are the source registers, the next
14413    NOPS entries are memory operands.  If this function is successful, *BASE is
14414    set to the common base register of the memory accesses; *LOAD_OFFSET is set
14415    to the first memory location's offset from that base register.  REGS is an
14416    array filled in with the source register numbers, REG_RTXS (if nonnull) is
14417    likewise filled with the corresponding rtx's.
14418    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14419    numbers to an ascending order of stores.
14420    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14421    from ascending memory locations, and the function verifies that the register
14422    numbers are themselves ascending.  If CHECK_REGS is false, the register
14423    numbers are stored in the order they are found in the operands.  */
14424 static int
store_multiple_sequence(rtx * operands,int nops,int nops_total,int * regs,rtx * reg_rtxs,int * saved_order,int * base,HOST_WIDE_INT * load_offset,bool check_regs)14425 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14426 			 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14427 			 HOST_WIDE_INT *load_offset, bool check_regs)
14428 {
14429   int unsorted_regs[MAX_LDM_STM_OPS];
14430   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14431   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14432   int order[MAX_LDM_STM_OPS];
14433   int base_reg = -1;
14434   rtx base_reg_rtx = NULL;
14435   int i, stm_case;
14436 
14437   /* Write back of base register is currently only supported for Thumb 1.  */
14438   int base_writeback = TARGET_THUMB1;
14439 
14440   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14441      easily extended if required.  */
14442   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14443 
14444   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14445 
14446   /* Loop over the operands and check that the memory references are
14447      suitable (i.e. immediate offsets from the same base register).  At
14448      the same time, extract the target register, and the memory
14449      offsets.  */
14450   for (i = 0; i < nops; i++)
14451     {
14452       rtx reg;
14453       rtx offset;
14454 
14455       /* Convert a subreg of a mem into the mem itself.  */
14456       if (GET_CODE (operands[nops + i]) == SUBREG)
14457 	operands[nops + i] = alter_subreg (operands + (nops + i), true);
14458 
14459       gcc_assert (MEM_P (operands[nops + i]));
14460 
14461       /* Don't reorder volatile memory references; it doesn't seem worth
14462 	 looking for the case where the order is ok anyway.  */
14463       if (MEM_VOLATILE_P (operands[nops + i]))
14464 	return 0;
14465 
14466       offset = const0_rtx;
14467 
14468       if ((REG_P (reg = XEXP (operands[nops + i], 0))
14469 	   || (GET_CODE (reg) == SUBREG
14470 	       && REG_P (reg = SUBREG_REG (reg))))
14471 	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14472 	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14473 		  || (GET_CODE (reg) == SUBREG
14474 		      && REG_P (reg = SUBREG_REG (reg))))
14475 	      && (CONST_INT_P (offset
14476 		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
14477 	{
14478 	  unsorted_reg_rtxs[i] = (REG_P (operands[i])
14479 				  ? operands[i] : SUBREG_REG (operands[i]));
14480 	  unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14481 
14482 	  if (i == 0)
14483 	    {
14484 	      base_reg = REGNO (reg);
14485 	      base_reg_rtx = reg;
14486 	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14487 		return 0;
14488 	    }
14489 	  else if (base_reg != (int) REGNO (reg))
14490 	    /* Not addressed from the same base register.  */
14491 	    return 0;
14492 
14493 	  /* If it isn't an integer register, then we can't do this.  */
14494 	  if (unsorted_regs[i] < 0
14495 	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14496 	      /* The effects are unpredictable if the base register is
14497 		 both updated and stored.  */
14498 	      || (base_writeback && unsorted_regs[i] == base_reg)
14499 	      || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14500 	      || unsorted_regs[i] > 14)
14501 	    return 0;
14502 
14503 	  unsorted_offsets[i] = INTVAL (offset);
14504 	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14505 	    order[0] = i;
14506 	}
14507       else
14508 	/* Not a suitable memory address.  */
14509 	return 0;
14510     }
14511 
14512   /* All the useful information has now been extracted from the
14513      operands into unsorted_regs and unsorted_offsets; additionally,
14514      order[0] has been set to the lowest offset in the list.  Sort
14515      the offsets into order, verifying that they are adjacent, and
14516      check that the register numbers are ascending.  */
14517   if (!compute_offset_order (nops, unsorted_offsets, order,
14518 			     check_regs ? unsorted_regs : NULL))
14519     return 0;
14520 
14521   if (saved_order)
14522     memcpy (saved_order, order, sizeof order);
14523 
14524   if (base)
14525     {
14526       *base = base_reg;
14527 
14528       for (i = 0; i < nops; i++)
14529 	{
14530 	  regs[i] = unsorted_regs[check_regs ? order[i] : i];
14531 	  if (reg_rtxs)
14532 	    reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14533 	}
14534 
14535       *load_offset = unsorted_offsets[order[0]];
14536     }
14537 
14538   if (TARGET_THUMB1
14539       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14540     return 0;
14541 
14542   if (unsorted_offsets[order[0]] == 0)
14543     stm_case = 1; /* stmia */
14544   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14545     stm_case = 2; /* stmib */
14546   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14547     stm_case = 3; /* stmda */
14548   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14549     stm_case = 4; /* stmdb */
14550   else
14551     return 0;
14552 
14553   if (!multiple_operation_profitable_p (false, nops, 0))
14554     return 0;
14555 
14556   return stm_case;
14557 }
14558 
14559 /* Routines for use in generating RTL.  */
14560 
14561 /* Generate a load-multiple instruction.  COUNT is the number of loads in
14562    the instruction; REGS and MEMS are arrays containing the operands.
14563    BASEREG is the base register to be used in addressing the memory operands.
14564    WBACK_OFFSET is nonzero if the instruction should update the base
14565    register.  */
14566 
14567 static rtx
arm_gen_load_multiple_1(int count,int * regs,rtx * mems,rtx basereg,HOST_WIDE_INT wback_offset)14568 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14569 			 HOST_WIDE_INT wback_offset)
14570 {
14571   int i = 0, j;
14572   rtx result;
14573 
14574   if (!multiple_operation_profitable_p (false, count, 0))
14575     {
14576       rtx seq;
14577 
14578       start_sequence ();
14579 
14580       for (i = 0; i < count; i++)
14581 	emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14582 
14583       if (wback_offset != 0)
14584 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14585 
14586       seq = get_insns ();
14587       end_sequence ();
14588 
14589       return seq;
14590     }
14591 
14592   result = gen_rtx_PARALLEL (VOIDmode,
14593 			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14594   if (wback_offset != 0)
14595     {
14596       XVECEXP (result, 0, 0)
14597 	= gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14598       i = 1;
14599       count++;
14600     }
14601 
14602   for (j = 0; i < count; i++, j++)
14603     XVECEXP (result, 0, i)
14604       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14605 
14606   return result;
14607 }
14608 
14609 /* Generate a store-multiple instruction.  COUNT is the number of stores in
14610    the instruction; REGS and MEMS are arrays containing the operands.
14611    BASEREG is the base register to be used in addressing the memory operands.
14612    WBACK_OFFSET is nonzero if the instruction should update the base
14613    register.  */
14614 
14615 static rtx
arm_gen_store_multiple_1(int count,int * regs,rtx * mems,rtx basereg,HOST_WIDE_INT wback_offset)14616 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14617 			  HOST_WIDE_INT wback_offset)
14618 {
14619   int i = 0, j;
14620   rtx result;
14621 
14622   if (GET_CODE (basereg) == PLUS)
14623     basereg = XEXP (basereg, 0);
14624 
14625   if (!multiple_operation_profitable_p (false, count, 0))
14626     {
14627       rtx seq;
14628 
14629       start_sequence ();
14630 
14631       for (i = 0; i < count; i++)
14632 	emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14633 
14634       if (wback_offset != 0)
14635 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14636 
14637       seq = get_insns ();
14638       end_sequence ();
14639 
14640       return seq;
14641     }
14642 
14643   result = gen_rtx_PARALLEL (VOIDmode,
14644 			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14645   if (wback_offset != 0)
14646     {
14647       XVECEXP (result, 0, 0)
14648 	= gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14649       i = 1;
14650       count++;
14651     }
14652 
14653   for (j = 0; i < count; i++, j++)
14654     XVECEXP (result, 0, i)
14655       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14656 
14657   return result;
14658 }
14659 
14660 /* Generate either a load-multiple or a store-multiple instruction.  This
14661    function can be used in situations where we can start with a single MEM
14662    rtx and adjust its address upwards.
14663    COUNT is the number of operations in the instruction, not counting a
14664    possible update of the base register.  REGS is an array containing the
14665    register operands.
14666    BASEREG is the base register to be used in addressing the memory operands,
14667    which are constructed from BASEMEM.
14668    WRITE_BACK specifies whether the generated instruction should include an
14669    update of the base register.
14670    OFFSETP is used to pass an offset to and from this function; this offset
14671    is not used when constructing the address (instead BASEMEM should have an
14672    appropriate offset in its address), it is used only for setting
14673    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
14674 
14675 static rtx
arm_gen_multiple_op(bool is_load,int * regs,int count,rtx basereg,bool write_back,rtx basemem,HOST_WIDE_INT * offsetp)14676 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14677 		     bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14678 {
14679   rtx mems[MAX_LDM_STM_OPS];
14680   HOST_WIDE_INT offset = *offsetp;
14681   int i;
14682 
14683   gcc_assert (count <= MAX_LDM_STM_OPS);
14684 
14685   if (GET_CODE (basereg) == PLUS)
14686     basereg = XEXP (basereg, 0);
14687 
14688   for (i = 0; i < count; i++)
14689     {
14690       rtx addr = plus_constant (Pmode, basereg, i * 4);
14691       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14692       offset += 4;
14693     }
14694 
14695   if (write_back)
14696     *offsetp = offset;
14697 
14698   if (is_load)
14699     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14700 				    write_back ? 4 * count : 0);
14701   else
14702     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14703 				     write_back ? 4 * count : 0);
14704 }
14705 
14706 rtx
arm_gen_load_multiple(int * regs,int count,rtx basereg,int write_back,rtx basemem,HOST_WIDE_INT * offsetp)14707 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14708 		       rtx basemem, HOST_WIDE_INT *offsetp)
14709 {
14710   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14711 			      offsetp);
14712 }
14713 
14714 rtx
arm_gen_store_multiple(int * regs,int count,rtx basereg,int write_back,rtx basemem,HOST_WIDE_INT * offsetp)14715 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14716 			rtx basemem, HOST_WIDE_INT *offsetp)
14717 {
14718   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14719 			      offsetp);
14720 }
14721 
14722 /* Called from a peephole2 expander to turn a sequence of loads into an
14723    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14724    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14725    is true if we can reorder the registers because they are used commutatively
14726    subsequently.
14727    Returns true iff we could generate a new instruction.  */
14728 
14729 bool
gen_ldm_seq(rtx * operands,int nops,bool sort_regs)14730 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14731 {
14732   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14733   rtx mems[MAX_LDM_STM_OPS];
14734   int i, j, base_reg;
14735   rtx base_reg_rtx;
14736   HOST_WIDE_INT offset;
14737   int write_back = FALSE;
14738   int ldm_case;
14739   rtx addr;
14740 
14741   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14742 				     &base_reg, &offset, !sort_regs);
14743 
14744   if (ldm_case == 0)
14745     return false;
14746 
14747   if (sort_regs)
14748     for (i = 0; i < nops - 1; i++)
14749       for (j = i + 1; j < nops; j++)
14750 	if (regs[i] > regs[j])
14751 	  {
14752 	    int t = regs[i];
14753 	    regs[i] = regs[j];
14754 	    regs[j] = t;
14755 	  }
14756   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14757 
14758   if (TARGET_THUMB1)
14759     {
14760       gcc_assert (ldm_case == 1 || ldm_case == 5);
14761 
14762       /* Thumb-1 ldm uses writeback except if the base is loaded.  */
14763       write_back = true;
14764       for (i = 0; i < nops; i++)
14765 	if (base_reg == regs[i])
14766 	  write_back = false;
14767 
14768       /* Ensure the base is dead if it is updated.  */
14769       if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
14770 	return false;
14771     }
14772 
14773   if (ldm_case == 5)
14774     {
14775       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14776       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14777       offset = 0;
14778       base_reg_rtx = newbase;
14779     }
14780 
14781   for (i = 0; i < nops; i++)
14782     {
14783       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14784       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14785 					      SImode, addr, 0);
14786     }
14787   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14788 				      write_back ? offset + i * 4 : 0));
14789   return true;
14790 }
14791 
14792 /* Called from a peephole2 expander to turn a sequence of stores into an
14793    STM instruction.  OPERANDS are the operands found by the peephole matcher;
14794    NOPS indicates how many separate stores we are trying to combine.
14795    Returns true iff we could generate a new instruction.  */
14796 
14797 bool
gen_stm_seq(rtx * operands,int nops)14798 gen_stm_seq (rtx *operands, int nops)
14799 {
14800   int i;
14801   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14802   rtx mems[MAX_LDM_STM_OPS];
14803   int base_reg;
14804   rtx base_reg_rtx;
14805   HOST_WIDE_INT offset;
14806   int write_back = FALSE;
14807   int stm_case;
14808   rtx addr;
14809   bool base_reg_dies;
14810 
14811   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14812 				      mem_order, &base_reg, &offset, true);
14813 
14814   if (stm_case == 0)
14815     return false;
14816 
14817   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14818 
14819   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14820   if (TARGET_THUMB1)
14821     {
14822       gcc_assert (base_reg_dies);
14823       write_back = TRUE;
14824     }
14825 
14826   if (stm_case == 5)
14827     {
14828       gcc_assert (base_reg_dies);
14829       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14830       offset = 0;
14831     }
14832 
14833   addr = plus_constant (Pmode, base_reg_rtx, offset);
14834 
14835   for (i = 0; i < nops; i++)
14836     {
14837       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14838       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14839 					      SImode, addr, 0);
14840     }
14841   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14842 				       write_back ? offset + i * 4 : 0));
14843   return true;
14844 }
14845 
14846 /* Called from a peephole2 expander to turn a sequence of stores that are
14847    preceded by constant loads into an STM instruction.  OPERANDS are the
14848    operands found by the peephole matcher; NOPS indicates how many
14849    separate stores we are trying to combine; there are 2 * NOPS
14850    instructions in the peephole.
14851    Returns true iff we could generate a new instruction.  */
14852 
14853 bool
gen_const_stm_seq(rtx * operands,int nops)14854 gen_const_stm_seq (rtx *operands, int nops)
14855 {
14856   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14857   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14858   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14859   rtx mems[MAX_LDM_STM_OPS];
14860   int base_reg;
14861   rtx base_reg_rtx;
14862   HOST_WIDE_INT offset;
14863   int write_back = FALSE;
14864   int stm_case;
14865   rtx addr;
14866   bool base_reg_dies;
14867   int i, j;
14868   HARD_REG_SET allocated;
14869 
14870   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14871 				      mem_order, &base_reg, &offset, false);
14872 
14873   if (stm_case == 0)
14874     return false;
14875 
14876   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14877 
14878   /* If the same register is used more than once, try to find a free
14879      register.  */
14880   CLEAR_HARD_REG_SET (allocated);
14881   for (i = 0; i < nops; i++)
14882     {
14883       for (j = i + 1; j < nops; j++)
14884 	if (regs[i] == regs[j])
14885 	  {
14886 	    rtx t = peep2_find_free_register (0, nops * 2,
14887 					      TARGET_THUMB1 ? "l" : "r",
14888 					      SImode, &allocated);
14889 	    if (t == NULL_RTX)
14890 	      return false;
14891 	    reg_rtxs[i] = t;
14892 	    regs[i] = REGNO (t);
14893 	  }
14894     }
14895 
14896   /* Compute an ordering that maps the register numbers to an ascending
14897      sequence.  */
14898   reg_order[0] = 0;
14899   for (i = 0; i < nops; i++)
14900     if (regs[i] < regs[reg_order[0]])
14901       reg_order[0] = i;
14902 
14903   for (i = 1; i < nops; i++)
14904     {
14905       int this_order = reg_order[i - 1];
14906       for (j = 0; j < nops; j++)
14907 	if (regs[j] > regs[reg_order[i - 1]]
14908 	    && (this_order == reg_order[i - 1]
14909 		|| regs[j] < regs[this_order]))
14910 	  this_order = j;
14911       reg_order[i] = this_order;
14912     }
14913 
14914   /* Ensure that registers that must be live after the instruction end
14915      up with the correct value.  */
14916   for (i = 0; i < nops; i++)
14917     {
14918       int this_order = reg_order[i];
14919       if ((this_order != mem_order[i]
14920 	   || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14921 	  && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14922 	return false;
14923     }
14924 
14925   /* Load the constants.  */
14926   for (i = 0; i < nops; i++)
14927     {
14928       rtx op = operands[2 * nops + mem_order[i]];
14929       sorted_regs[i] = regs[reg_order[i]];
14930       emit_move_insn (reg_rtxs[reg_order[i]], op);
14931     }
14932 
14933   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14934 
14935   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14936   if (TARGET_THUMB1)
14937     {
14938       gcc_assert (base_reg_dies);
14939       write_back = TRUE;
14940     }
14941 
14942   if (stm_case == 5)
14943     {
14944       gcc_assert (base_reg_dies);
14945       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14946       offset = 0;
14947     }
14948 
14949   addr = plus_constant (Pmode, base_reg_rtx, offset);
14950 
14951   for (i = 0; i < nops; i++)
14952     {
14953       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14954       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14955 					      SImode, addr, 0);
14956     }
14957   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14958 				       write_back ? offset + i * 4 : 0));
14959   return true;
14960 }
14961 
14962 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14963    unaligned copies on processors which support unaligned semantics for those
14964    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14965    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14966    An interleave factor of 1 (the minimum) will perform no interleaving.
14967    Load/store multiple are used for aligned addresses where possible.  */
14968 
14969 static void
arm_block_move_unaligned_straight(rtx dstbase,rtx srcbase,HOST_WIDE_INT length,unsigned int interleave_factor)14970 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14971 				   HOST_WIDE_INT length,
14972 				   unsigned int interleave_factor)
14973 {
14974   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14975   int *regnos = XALLOCAVEC (int, interleave_factor);
14976   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14977   HOST_WIDE_INT i, j;
14978   HOST_WIDE_INT remaining = length, words;
14979   rtx halfword_tmp = NULL, byte_tmp = NULL;
14980   rtx dst, src;
14981   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14982   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14983   HOST_WIDE_INT srcoffset, dstoffset;
14984   HOST_WIDE_INT src_autoinc, dst_autoinc;
14985   rtx mem, addr;
14986 
14987   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14988 
14989   /* Use hard registers if we have aligned source or destination so we can use
14990      load/store multiple with contiguous registers.  */
14991   if (dst_aligned || src_aligned)
14992     for (i = 0; i < interleave_factor; i++)
14993       regs[i] = gen_rtx_REG (SImode, i);
14994   else
14995     for (i = 0; i < interleave_factor; i++)
14996       regs[i] = gen_reg_rtx (SImode);
14997 
14998   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14999   src = copy_addr_to_reg (XEXP (srcbase, 0));
15000 
15001   srcoffset = dstoffset = 0;
15002 
15003   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15004      For copying the last bytes we want to subtract this offset again.  */
15005   src_autoinc = dst_autoinc = 0;
15006 
15007   for (i = 0; i < interleave_factor; i++)
15008     regnos[i] = i;
15009 
15010   /* Copy BLOCK_SIZE_BYTES chunks.  */
15011 
15012   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15013     {
15014       /* Load words.  */
15015       if (src_aligned && interleave_factor > 1)
15016 	{
15017 	  emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15018 					    TRUE, srcbase, &srcoffset));
15019 	  src_autoinc += UNITS_PER_WORD * interleave_factor;
15020 	}
15021       else
15022 	{
15023 	  for (j = 0; j < interleave_factor; j++)
15024 	    {
15025 	      addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15026 						 - src_autoinc));
15027 	      mem = adjust_automodify_address (srcbase, SImode, addr,
15028 					       srcoffset + j * UNITS_PER_WORD);
15029 	      emit_insn (gen_unaligned_loadsi (regs[j], mem));
15030 	    }
15031 	  srcoffset += block_size_bytes;
15032 	}
15033 
15034       /* Store words.  */
15035       if (dst_aligned && interleave_factor > 1)
15036 	{
15037 	  emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15038 					     TRUE, dstbase, &dstoffset));
15039 	  dst_autoinc += UNITS_PER_WORD * interleave_factor;
15040 	}
15041       else
15042 	{
15043 	  for (j = 0; j < interleave_factor; j++)
15044 	    {
15045 	      addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15046 						 - dst_autoinc));
15047 	      mem = adjust_automodify_address (dstbase, SImode, addr,
15048 					       dstoffset + j * UNITS_PER_WORD);
15049 	      emit_insn (gen_unaligned_storesi (mem, regs[j]));
15050 	    }
15051 	  dstoffset += block_size_bytes;
15052 	}
15053 
15054       remaining -= block_size_bytes;
15055     }
15056 
15057   /* Copy any whole words left (note these aren't interleaved with any
15058      subsequent halfword/byte load/stores in the interests of simplicity).  */
15059 
15060   words = remaining / UNITS_PER_WORD;
15061 
15062   gcc_assert (words < interleave_factor);
15063 
15064   if (src_aligned && words > 1)
15065     {
15066       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15067 					&srcoffset));
15068       src_autoinc += UNITS_PER_WORD * words;
15069     }
15070   else
15071     {
15072       for (j = 0; j < words; j++)
15073 	{
15074 	  addr = plus_constant (Pmode, src,
15075 				srcoffset + j * UNITS_PER_WORD - src_autoinc);
15076 	  mem = adjust_automodify_address (srcbase, SImode, addr,
15077 					   srcoffset + j * UNITS_PER_WORD);
15078 	  if (src_aligned)
15079 	    emit_move_insn (regs[j], mem);
15080 	  else
15081 	    emit_insn (gen_unaligned_loadsi (regs[j], mem));
15082 	}
15083       srcoffset += words * UNITS_PER_WORD;
15084     }
15085 
15086   if (dst_aligned && words > 1)
15087     {
15088       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15089 					 &dstoffset));
15090       dst_autoinc += words * UNITS_PER_WORD;
15091     }
15092   else
15093     {
15094       for (j = 0; j < words; j++)
15095 	{
15096 	  addr = plus_constant (Pmode, dst,
15097 				dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15098 	  mem = adjust_automodify_address (dstbase, SImode, addr,
15099 					   dstoffset + j * UNITS_PER_WORD);
15100 	  if (dst_aligned)
15101 	    emit_move_insn (mem, regs[j]);
15102 	  else
15103 	    emit_insn (gen_unaligned_storesi (mem, regs[j]));
15104 	}
15105       dstoffset += words * UNITS_PER_WORD;
15106     }
15107 
15108   remaining -= words * UNITS_PER_WORD;
15109 
15110   gcc_assert (remaining < 4);
15111 
15112   /* Copy a halfword if necessary.  */
15113 
15114   if (remaining >= 2)
15115     {
15116       halfword_tmp = gen_reg_rtx (SImode);
15117 
15118       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15119       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15120       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15121 
15122       /* Either write out immediately, or delay until we've loaded the last
15123 	 byte, depending on interleave factor.  */
15124       if (interleave_factor == 1)
15125 	{
15126 	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15127 	  mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15128 	  emit_insn (gen_unaligned_storehi (mem,
15129 		       gen_lowpart (HImode, halfword_tmp)));
15130 	  halfword_tmp = NULL;
15131 	  dstoffset += 2;
15132 	}
15133 
15134       remaining -= 2;
15135       srcoffset += 2;
15136     }
15137 
15138   gcc_assert (remaining < 2);
15139 
15140   /* Copy last byte.  */
15141 
15142   if ((remaining & 1) != 0)
15143     {
15144       byte_tmp = gen_reg_rtx (SImode);
15145 
15146       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15147       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15148       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15149 
15150       if (interleave_factor == 1)
15151 	{
15152 	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15153 	  mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15154 	  emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15155 	  byte_tmp = NULL;
15156 	  dstoffset++;
15157 	}
15158 
15159       remaining--;
15160       srcoffset++;
15161     }
15162 
15163   /* Store last halfword if we haven't done so already.  */
15164 
15165   if (halfword_tmp)
15166     {
15167       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15168       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15169       emit_insn (gen_unaligned_storehi (mem,
15170 		   gen_lowpart (HImode, halfword_tmp)));
15171       dstoffset += 2;
15172     }
15173 
15174   /* Likewise for last byte.  */
15175 
15176   if (byte_tmp)
15177     {
15178       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15179       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15180       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15181       dstoffset++;
15182     }
15183 
15184   gcc_assert (remaining == 0 && srcoffset == dstoffset);
15185 }
15186 
15187 /* From mips_adjust_block_mem:
15188 
15189    Helper function for doing a loop-based block operation on memory
15190    reference MEM.  Each iteration of the loop will operate on LENGTH
15191    bytes of MEM.
15192 
15193    Create a new base register for use within the loop and point it to
15194    the start of MEM.  Create a new memory reference that uses this
15195    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
15196 
15197 static void
arm_adjust_block_mem(rtx mem,HOST_WIDE_INT length,rtx * loop_reg,rtx * loop_mem)15198 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15199 		      rtx *loop_mem)
15200 {
15201   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15202 
15203   /* Although the new mem does not refer to a known location,
15204      it does keep up to LENGTH bytes of alignment.  */
15205   *loop_mem = change_address (mem, BLKmode, *loop_reg);
15206   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15207 }
15208 
15209 /* From mips_block_move_loop:
15210 
15211    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15212    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
15213    the memory regions do not overlap.  */
15214 
15215 static void
arm_block_move_unaligned_loop(rtx dest,rtx src,HOST_WIDE_INT length,unsigned int interleave_factor,HOST_WIDE_INT bytes_per_iter)15216 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15217 			       unsigned int interleave_factor,
15218 			       HOST_WIDE_INT bytes_per_iter)
15219 {
15220   rtx src_reg, dest_reg, final_src, test;
15221   HOST_WIDE_INT leftover;
15222 
15223   leftover = length % bytes_per_iter;
15224   length -= leftover;
15225 
15226   /* Create registers and memory references for use within the loop.  */
15227   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15228   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15229 
15230   /* Calculate the value that SRC_REG should have after the last iteration of
15231      the loop.  */
15232   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15233 				   0, 0, OPTAB_WIDEN);
15234 
15235   /* Emit the start of the loop.  */
15236   rtx_code_label *label = gen_label_rtx ();
15237   emit_label (label);
15238 
15239   /* Emit the loop body.  */
15240   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15241 				     interleave_factor);
15242 
15243   /* Move on to the next block.  */
15244   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15245   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15246 
15247   /* Emit the loop condition.  */
15248   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15249   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15250 
15251   /* Mop up any left-over bytes.  */
15252   if (leftover)
15253     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15254 }
15255 
15256 /* Emit a block move when either the source or destination is unaligned (not
15257    aligned to a four-byte boundary).  This may need further tuning depending on
15258    core type, optimize_size setting, etc.  */
15259 
15260 static int
arm_cpymemqi_unaligned(rtx * operands)15261 arm_cpymemqi_unaligned (rtx *operands)
15262 {
15263   HOST_WIDE_INT length = INTVAL (operands[2]);
15264 
15265   if (optimize_size)
15266     {
15267       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15268       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15269       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15270 	 size of code if optimizing for size.  We'll use ldm/stm if src_aligned
15271 	 or dst_aligned though: allow more interleaving in those cases since the
15272 	 resulting code can be smaller.  */
15273       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15274       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15275 
15276       if (length > 12)
15277 	arm_block_move_unaligned_loop (operands[0], operands[1], length,
15278 				       interleave_factor, bytes_per_iter);
15279       else
15280 	arm_block_move_unaligned_straight (operands[0], operands[1], length,
15281 					   interleave_factor);
15282     }
15283   else
15284     {
15285       /* Note that the loop created by arm_block_move_unaligned_loop may be
15286 	 subject to loop unrolling, which makes tuning this condition a little
15287 	 redundant.  */
15288       if (length > 32)
15289 	arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15290       else
15291 	arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15292     }
15293 
15294   return 1;
15295 }
15296 
15297 int
arm_gen_cpymemqi(rtx * operands)15298 arm_gen_cpymemqi (rtx *operands)
15299 {
15300   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15301   HOST_WIDE_INT srcoffset, dstoffset;
15302   rtx src, dst, srcbase, dstbase;
15303   rtx part_bytes_reg = NULL;
15304   rtx mem;
15305 
15306   if (!CONST_INT_P (operands[2])
15307       || !CONST_INT_P (operands[3])
15308       || INTVAL (operands[2]) > 64)
15309     return 0;
15310 
15311   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15312     return arm_cpymemqi_unaligned (operands);
15313 
15314   if (INTVAL (operands[3]) & 3)
15315     return 0;
15316 
15317   dstbase = operands[0];
15318   srcbase = operands[1];
15319 
15320   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15321   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15322 
15323   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15324   out_words_to_go = INTVAL (operands[2]) / 4;
15325   last_bytes = INTVAL (operands[2]) & 3;
15326   dstoffset = srcoffset = 0;
15327 
15328   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15329     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15330 
15331   while (in_words_to_go >= 2)
15332     {
15333       if (in_words_to_go > 4)
15334 	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15335 					  TRUE, srcbase, &srcoffset));
15336       else
15337 	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15338 					  src, FALSE, srcbase,
15339 					  &srcoffset));
15340 
15341       if (out_words_to_go)
15342 	{
15343 	  if (out_words_to_go > 4)
15344 	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15345 					       TRUE, dstbase, &dstoffset));
15346 	  else if (out_words_to_go != 1)
15347 	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15348 					       out_words_to_go, dst,
15349 					       (last_bytes == 0
15350 						? FALSE : TRUE),
15351 					       dstbase, &dstoffset));
15352 	  else
15353 	    {
15354 	      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15355 	      emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15356 	      if (last_bytes != 0)
15357 		{
15358 		  emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15359 		  dstoffset += 4;
15360 		}
15361 	    }
15362 	}
15363 
15364       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15365       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15366     }
15367 
15368   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
15369   if (out_words_to_go)
15370     {
15371       rtx sreg;
15372 
15373       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15374       sreg = copy_to_reg (mem);
15375 
15376       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15377       emit_move_insn (mem, sreg);
15378       in_words_to_go--;
15379 
15380       gcc_assert (!in_words_to_go);	/* Sanity check */
15381     }
15382 
15383   if (in_words_to_go)
15384     {
15385       gcc_assert (in_words_to_go > 0);
15386 
15387       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15388       part_bytes_reg = copy_to_mode_reg (SImode, mem);
15389     }
15390 
15391   gcc_assert (!last_bytes || part_bytes_reg);
15392 
15393   if (BYTES_BIG_ENDIAN && last_bytes)
15394     {
15395       rtx tmp = gen_reg_rtx (SImode);
15396 
15397       /* The bytes we want are in the top end of the word.  */
15398       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15399 			      GEN_INT (8 * (4 - last_bytes))));
15400       part_bytes_reg = tmp;
15401 
15402       while (last_bytes)
15403 	{
15404 	  mem = adjust_automodify_address (dstbase, QImode,
15405 					   plus_constant (Pmode, dst,
15406 							  last_bytes - 1),
15407 					   dstoffset + last_bytes - 1);
15408 	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15409 
15410 	  if (--last_bytes)
15411 	    {
15412 	      tmp = gen_reg_rtx (SImode);
15413 	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15414 	      part_bytes_reg = tmp;
15415 	    }
15416 	}
15417 
15418     }
15419   else
15420     {
15421       if (last_bytes > 1)
15422 	{
15423 	  mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15424 	  emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15425 	  last_bytes -= 2;
15426 	  if (last_bytes)
15427 	    {
15428 	      rtx tmp = gen_reg_rtx (SImode);
15429 	      emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15430 	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15431 	      part_bytes_reg = tmp;
15432 	      dstoffset += 2;
15433 	    }
15434 	}
15435 
15436       if (last_bytes)
15437 	{
15438 	  mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15439 	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15440 	}
15441     }
15442 
15443   return 1;
15444 }
15445 
15446 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15447 by mode size.  */
15448 inline static rtx
next_consecutive_mem(rtx mem)15449 next_consecutive_mem (rtx mem)
15450 {
15451   machine_mode mode = GET_MODE (mem);
15452   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15453   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15454 
15455   return adjust_automodify_address (mem, mode, addr, offset);
15456 }
15457 
15458 /* Copy using LDRD/STRD instructions whenever possible.
15459    Returns true upon success. */
15460 bool
gen_cpymem_ldrd_strd(rtx * operands)15461 gen_cpymem_ldrd_strd (rtx *operands)
15462 {
15463   unsigned HOST_WIDE_INT len;
15464   HOST_WIDE_INT align;
15465   rtx src, dst, base;
15466   rtx reg0;
15467   bool src_aligned, dst_aligned;
15468   bool src_volatile, dst_volatile;
15469 
15470   gcc_assert (CONST_INT_P (operands[2]));
15471   gcc_assert (CONST_INT_P (operands[3]));
15472 
15473   len = UINTVAL (operands[2]);
15474   if (len > 64)
15475     return false;
15476 
15477   /* Maximum alignment we can assume for both src and dst buffers.  */
15478   align = INTVAL (operands[3]);
15479 
15480   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15481     return false;
15482 
15483   /* Place src and dst addresses in registers
15484      and update the corresponding mem rtx.  */
15485   dst = operands[0];
15486   dst_volatile = MEM_VOLATILE_P (dst);
15487   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15488   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15489   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15490 
15491   src = operands[1];
15492   src_volatile = MEM_VOLATILE_P (src);
15493   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15494   base = copy_to_mode_reg (SImode, XEXP (src, 0));
15495   src = adjust_automodify_address (src, VOIDmode, base, 0);
15496 
15497   if (!unaligned_access && !(src_aligned && dst_aligned))
15498     return false;
15499 
15500   if (src_volatile || dst_volatile)
15501     return false;
15502 
15503   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
15504   if (!(dst_aligned || src_aligned))
15505     return arm_gen_cpymemqi (operands);
15506 
15507   /* If the either src or dst is unaligned we'll be accessing it as pairs
15508      of unaligned SImode accesses.  Otherwise we can generate DImode
15509      ldrd/strd instructions.  */
15510   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15511   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15512 
15513   while (len >= 8)
15514     {
15515       len -= 8;
15516       reg0 = gen_reg_rtx (DImode);
15517       rtx low_reg = NULL_RTX;
15518       rtx hi_reg = NULL_RTX;
15519 
15520       if (!src_aligned || !dst_aligned)
15521 	{
15522 	  low_reg = gen_lowpart (SImode, reg0);
15523 	  hi_reg = gen_highpart_mode (SImode, DImode, reg0);
15524 	}
15525       if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15526 	emit_move_insn (reg0, src);
15527       else if (src_aligned)
15528 	emit_insn (gen_unaligned_loaddi (reg0, src));
15529       else
15530 	{
15531 	  emit_insn (gen_unaligned_loadsi (low_reg, src));
15532 	  src = next_consecutive_mem (src);
15533 	  emit_insn (gen_unaligned_loadsi (hi_reg, src));
15534 	}
15535 
15536       if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15537 	emit_move_insn (dst, reg0);
15538       else if (dst_aligned)
15539 	emit_insn (gen_unaligned_storedi (dst, reg0));
15540       else
15541 	{
15542 	  emit_insn (gen_unaligned_storesi (dst, low_reg));
15543 	  dst = next_consecutive_mem (dst);
15544 	  emit_insn (gen_unaligned_storesi (dst, hi_reg));
15545 	}
15546 
15547       src = next_consecutive_mem (src);
15548       dst = next_consecutive_mem (dst);
15549     }
15550 
15551   gcc_assert (len < 8);
15552   if (len >= 4)
15553     {
15554       /* More than a word but less than a double-word to copy.  Copy a word.  */
15555       reg0 = gen_reg_rtx (SImode);
15556       src = adjust_address (src, SImode, 0);
15557       dst = adjust_address (dst, SImode, 0);
15558       if (src_aligned)
15559         emit_move_insn (reg0, src);
15560       else
15561         emit_insn (gen_unaligned_loadsi (reg0, src));
15562 
15563       if (dst_aligned)
15564         emit_move_insn (dst, reg0);
15565       else
15566         emit_insn (gen_unaligned_storesi (dst, reg0));
15567 
15568       src = next_consecutive_mem (src);
15569       dst = next_consecutive_mem (dst);
15570       len -= 4;
15571     }
15572 
15573   if (len == 0)
15574     return true;
15575 
15576   /* Copy the remaining bytes.  */
15577   if (len >= 2)
15578     {
15579       dst = adjust_address (dst, HImode, 0);
15580       src = adjust_address (src, HImode, 0);
15581       reg0 = gen_reg_rtx (SImode);
15582       if (src_aligned)
15583         emit_insn (gen_zero_extendhisi2 (reg0, src));
15584       else
15585         emit_insn (gen_unaligned_loadhiu (reg0, src));
15586 
15587       if (dst_aligned)
15588         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15589       else
15590         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15591 
15592       src = next_consecutive_mem (src);
15593       dst = next_consecutive_mem (dst);
15594       if (len == 2)
15595         return true;
15596     }
15597 
15598   dst = adjust_address (dst, QImode, 0);
15599   src = adjust_address (src, QImode, 0);
15600   reg0 = gen_reg_rtx (QImode);
15601   emit_move_insn (reg0, src);
15602   emit_move_insn (dst, reg0);
15603   return true;
15604 }
15605 
15606 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15607    into its component 32-bit subregs.  OP2 may be an immediate
15608    constant and we want to simplify it in that case.  */
15609 void
arm_decompose_di_binop(rtx op1,rtx op2,rtx * lo_op1,rtx * hi_op1,rtx * lo_op2,rtx * hi_op2)15610 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15611 			rtx *lo_op2, rtx *hi_op2)
15612 {
15613   *lo_op1 = gen_lowpart (SImode, op1);
15614   *hi_op1 = gen_highpart (SImode, op1);
15615   *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15616 				 subreg_lowpart_offset (SImode, DImode));
15617   *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15618 				 subreg_highpart_offset (SImode, DImode));
15619 }
15620 
15621 /* Select a dominance comparison mode if possible for a test of the general
15622    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
15623    COND_OR == DOM_CC_X_AND_Y => (X && Y)
15624    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15625    COND_OR == DOM_CC_X_OR_Y => (X || Y)
15626    In all cases OP will be either EQ or NE, but we don't need to know which
15627    here.  If we are unable to support a dominance comparison we return
15628    CC mode.  This will then fail to match for the RTL expressions that
15629    generate this call.  */
15630 machine_mode
arm_select_dominance_cc_mode(rtx x,rtx y,HOST_WIDE_INT cond_or)15631 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15632 {
15633   enum rtx_code cond1, cond2;
15634   int swapped = 0;
15635 
15636   /* Currently we will probably get the wrong result if the individual
15637      comparisons are not simple.  This also ensures that it is safe to
15638      reverse a comparison if necessary.  */
15639   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15640        != CCmode)
15641       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15642 	  != CCmode))
15643     return CCmode;
15644 
15645   /* The if_then_else variant of this tests the second condition if the
15646      first passes, but is true if the first fails.  Reverse the first
15647      condition to get a true "inclusive-or" expression.  */
15648   if (cond_or == DOM_CC_NX_OR_Y)
15649     cond1 = reverse_condition (cond1);
15650 
15651   /* If the comparisons are not equal, and one doesn't dominate the other,
15652      then we can't do this.  */
15653   if (cond1 != cond2
15654       && !comparison_dominates_p (cond1, cond2)
15655       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15656     return CCmode;
15657 
15658   if (swapped)
15659     std::swap (cond1, cond2);
15660 
15661   switch (cond1)
15662     {
15663     case EQ:
15664       if (cond_or == DOM_CC_X_AND_Y)
15665 	return CC_DEQmode;
15666 
15667       switch (cond2)
15668 	{
15669 	case EQ: return CC_DEQmode;
15670 	case LE: return CC_DLEmode;
15671 	case LEU: return CC_DLEUmode;
15672 	case GE: return CC_DGEmode;
15673 	case GEU: return CC_DGEUmode;
15674 	default: gcc_unreachable ();
15675 	}
15676 
15677     case LT:
15678       if (cond_or == DOM_CC_X_AND_Y)
15679 	return CC_DLTmode;
15680 
15681       switch (cond2)
15682 	{
15683 	case  LT:
15684 	    return CC_DLTmode;
15685 	case LE:
15686 	  return CC_DLEmode;
15687 	case NE:
15688 	  return CC_DNEmode;
15689 	default:
15690 	  gcc_unreachable ();
15691 	}
15692 
15693     case GT:
15694       if (cond_or == DOM_CC_X_AND_Y)
15695 	return CC_DGTmode;
15696 
15697       switch (cond2)
15698 	{
15699 	case GT:
15700 	  return CC_DGTmode;
15701 	case GE:
15702 	  return CC_DGEmode;
15703 	case NE:
15704 	  return CC_DNEmode;
15705 	default:
15706 	  gcc_unreachable ();
15707 	}
15708 
15709     case LTU:
15710       if (cond_or == DOM_CC_X_AND_Y)
15711 	return CC_DLTUmode;
15712 
15713       switch (cond2)
15714 	{
15715 	case LTU:
15716 	  return CC_DLTUmode;
15717 	case LEU:
15718 	  return CC_DLEUmode;
15719 	case NE:
15720 	  return CC_DNEmode;
15721 	default:
15722 	  gcc_unreachable ();
15723 	}
15724 
15725     case GTU:
15726       if (cond_or == DOM_CC_X_AND_Y)
15727 	return CC_DGTUmode;
15728 
15729       switch (cond2)
15730 	{
15731 	case GTU:
15732 	  return CC_DGTUmode;
15733 	case GEU:
15734 	  return CC_DGEUmode;
15735 	case NE:
15736 	  return CC_DNEmode;
15737 	default:
15738 	  gcc_unreachable ();
15739 	}
15740 
15741     /* The remaining cases only occur when both comparisons are the
15742        same.  */
15743     case NE:
15744       gcc_assert (cond1 == cond2);
15745       return CC_DNEmode;
15746 
15747     case LE:
15748       gcc_assert (cond1 == cond2);
15749       return CC_DLEmode;
15750 
15751     case GE:
15752       gcc_assert (cond1 == cond2);
15753       return CC_DGEmode;
15754 
15755     case LEU:
15756       gcc_assert (cond1 == cond2);
15757       return CC_DLEUmode;
15758 
15759     case GEU:
15760       gcc_assert (cond1 == cond2);
15761       return CC_DGEUmode;
15762 
15763     default:
15764       gcc_unreachable ();
15765     }
15766 }
15767 
15768 machine_mode
arm_select_cc_mode(enum rtx_code op,rtx x,rtx y)15769 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15770 {
15771   /* All floating point compares return CCFP if it is an equality
15772      comparison, and CCFPE otherwise.  */
15773   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15774     {
15775       switch (op)
15776 	{
15777 	case EQ:
15778 	case NE:
15779 	case UNORDERED:
15780 	case ORDERED:
15781 	case UNLT:
15782 	case UNLE:
15783 	case UNGT:
15784 	case UNGE:
15785 	case UNEQ:
15786 	case LTGT:
15787 	  return CCFPmode;
15788 
15789 	case LT:
15790 	case LE:
15791 	case GT:
15792 	case GE:
15793 	  return CCFPEmode;
15794 
15795 	default:
15796 	  gcc_unreachable ();
15797 	}
15798     }
15799 
15800   /* A compare with a shifted operand.  Because of canonicalization, the
15801      comparison will have to be swapped when we emit the assembler.  */
15802   if (GET_MODE (y) == SImode
15803       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15804       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15805 	  || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15806 	  || GET_CODE (x) == ROTATERT))
15807     return CC_SWPmode;
15808 
15809   /* A widened compare of the sum of a value plus a carry against a
15810      constant.  This is a representation of RSC.  We want to swap the
15811      result of the comparison at output.  Not valid if the Z bit is
15812      needed.  */
15813   if (GET_MODE (x) == DImode
15814       && GET_CODE (x) == PLUS
15815       && arm_borrow_operation (XEXP (x, 1), DImode)
15816       && CONST_INT_P (y)
15817       && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15818 	   && (op == LE || op == GT))
15819 	  || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
15820 	      && (op == LEU || op == GTU))))
15821     return CC_SWPmode;
15822 
15823   /* If X is a constant we want to use CC_RSBmode.  This is
15824      non-canonical, but arm_gen_compare_reg uses this to generate the
15825      correct canonical form.  */
15826   if (GET_MODE (y) == SImode
15827       && (REG_P (y) || GET_CODE (y) == SUBREG)
15828       && CONST_INT_P (x))
15829     return CC_RSBmode;
15830 
15831   /* This operation is performed swapped, but since we only rely on the Z
15832      flag we don't need an additional mode.  */
15833   if (GET_MODE (y) == SImode
15834       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15835       && GET_CODE (x) == NEG
15836       && (op ==	EQ || op == NE))
15837     return CC_Zmode;
15838 
15839   /* This is a special case that is used by combine to allow a
15840      comparison of a shifted byte load to be split into a zero-extend
15841      followed by a comparison of the shifted integer (only valid for
15842      equalities and unsigned inequalities).  */
15843   if (GET_MODE (x) == SImode
15844       && GET_CODE (x) == ASHIFT
15845       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15846       && GET_CODE (XEXP (x, 0)) == SUBREG
15847       && MEM_P (SUBREG_REG (XEXP (x, 0)))
15848       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15849       && (op == EQ || op == NE
15850 	  || op == GEU || op == GTU || op == LTU || op == LEU)
15851       && CONST_INT_P (y))
15852     return CC_Zmode;
15853 
15854   /* A construct for a conditional compare, if the false arm contains
15855      0, then both conditions must be true, otherwise either condition
15856      must be true.  Not all conditions are possible, so CCmode is
15857      returned if it can't be done.  */
15858   if (GET_CODE (x) == IF_THEN_ELSE
15859       && (XEXP (x, 2) == const0_rtx
15860 	  || XEXP (x, 2) == const1_rtx)
15861       && COMPARISON_P (XEXP (x, 0))
15862       && COMPARISON_P (XEXP (x, 1)))
15863     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15864 					 INTVAL (XEXP (x, 2)));
15865 
15866   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
15867   if (GET_CODE (x) == AND
15868       && (op == EQ || op == NE)
15869       && COMPARISON_P (XEXP (x, 0))
15870       && COMPARISON_P (XEXP (x, 1)))
15871     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15872 					 DOM_CC_X_AND_Y);
15873 
15874   if (GET_CODE (x) == IOR
15875       && (op == EQ || op == NE)
15876       && COMPARISON_P (XEXP (x, 0))
15877       && COMPARISON_P (XEXP (x, 1)))
15878     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15879 					 DOM_CC_X_OR_Y);
15880 
15881   /* An operation (on Thumb) where we want to test for a single bit.
15882      This is done by shifting that bit up into the top bit of a
15883      scratch register; we can then branch on the sign bit.  */
15884   if (TARGET_THUMB1
15885       && GET_MODE (x) == SImode
15886       && (op == EQ || op == NE)
15887       && GET_CODE (x) == ZERO_EXTRACT
15888       && XEXP (x, 1) == const1_rtx)
15889     return CC_Nmode;
15890 
15891   /* An operation that sets the condition codes as a side-effect, the
15892      V flag is not set correctly, so we can only use comparisons where
15893      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15894      instead.)  */
15895   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15896   if (GET_MODE (x) == SImode
15897       && y == const0_rtx
15898       && (op == EQ || op == NE || op == LT || op == GE)
15899       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15900 	  || GET_CODE (x) == AND || GET_CODE (x) == IOR
15901 	  || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15902 	  || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15903 	  || GET_CODE (x) == LSHIFTRT
15904 	  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15905 	  || GET_CODE (x) == ROTATERT
15906 	  || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15907     return CC_NZmode;
15908 
15909   /* A comparison of ~reg with a const is really a special
15910      canoncialization of compare (~const, reg), which is a reverse
15911      subtract operation.  We may not get here if CONST is 0, but that
15912      doesn't matter because ~0 isn't a valid immediate for RSB.  */
15913   if (GET_MODE (x) == SImode
15914       && GET_CODE (x) == NOT
15915       && CONST_INT_P (y))
15916     return CC_RSBmode;
15917 
15918   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15919     return CC_Zmode;
15920 
15921   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15922       && GET_CODE (x) == PLUS
15923       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15924     return CC_Cmode;
15925 
15926   if (GET_MODE (x) == DImode
15927       && GET_CODE (x) == PLUS
15928       && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
15929       && CONST_INT_P (y)
15930       && UINTVAL (y) == 0x800000000
15931       && (op == GEU || op == LTU))
15932     return CC_ADCmode;
15933 
15934   if (GET_MODE (x) == DImode
15935       && (op == GE || op == LT)
15936       && GET_CODE (x) == SIGN_EXTEND
15937       && ((GET_CODE (y) == PLUS
15938 	   && arm_borrow_operation (XEXP (y, 0), DImode))
15939 	  || arm_borrow_operation (y, DImode)))
15940     return CC_NVmode;
15941 
15942   if (GET_MODE (x) == DImode
15943       && (op == GEU || op == LTU)
15944       && GET_CODE (x) == ZERO_EXTEND
15945       && ((GET_CODE (y) == PLUS
15946 	   && arm_borrow_operation (XEXP (y, 0), DImode))
15947 	  || arm_borrow_operation (y, DImode)))
15948     return CC_Bmode;
15949 
15950   if (GET_MODE (x) == DImode
15951       && (op == EQ || op == NE)
15952       && (GET_CODE (x) == PLUS
15953 	  || GET_CODE (x) == MINUS)
15954       && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15955 	  || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
15956       && GET_CODE (y) == SIGN_EXTEND
15957       && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
15958     return CC_Vmode;
15959 
15960   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15961     return GET_MODE (x);
15962 
15963   return CCmode;
15964 }
15965 
15966 /* X and Y are two (DImode) things to compare for the condition CODE.  Emit
15967    the sequence of instructions needed to generate a suitable condition
15968    code register.  Return the CC register result.  */
15969 static rtx
arm_gen_dicompare_reg(rtx_code code,rtx x,rtx y,rtx scratch)15970 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
15971 {
15972   machine_mode mode;
15973   rtx cc_reg;
15974 
15975     /* We don't currently handle DImode in thumb1, but rely on libgcc.  */
15976   gcc_assert (TARGET_32BIT);
15977   gcc_assert (!CONST_INT_P (x));
15978 
15979   rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
15980 				  subreg_lowpart_offset (SImode, DImode));
15981   rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
15982 				  subreg_highpart_offset (SImode, DImode));
15983   rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
15984 				  subreg_lowpart_offset (SImode, DImode));
15985   rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
15986 				  subreg_highpart_offset (SImode, DImode));
15987   switch (code)
15988     {
15989     case EQ:
15990     case NE:
15991       {
15992 	if (y_lo == const0_rtx || y_hi == const0_rtx)
15993 	  {
15994 	    if (y_lo != const0_rtx)
15995 	      {
15996 		rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
15997 
15998 		gcc_assert (y_hi == const0_rtx);
15999 		y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16000 		if (!arm_add_operand (y_lo, SImode))
16001 		  y_lo = force_reg (SImode, y_lo);
16002 		emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16003 		x_lo = scratch2;
16004 	      }
16005 	    else if (y_hi != const0_rtx)
16006 	      {
16007 		rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16008 
16009 		y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16010 		if (!arm_add_operand (y_hi, SImode))
16011 		  y_hi = force_reg (SImode, y_hi);
16012 		emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16013 		x_hi = scratch2;
16014 	      }
16015 
16016 	    if (!scratch)
16017 	      {
16018 		gcc_assert (!reload_completed);
16019 		scratch = gen_rtx_SCRATCH (SImode);
16020 	      }
16021 
16022 	    rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16023 	    cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16024 
16025 	    rtx set
16026 	      = gen_rtx_SET (cc_reg,
16027 			     gen_rtx_COMPARE (CC_NZmode,
16028 					      gen_rtx_IOR (SImode, x_lo, x_hi),
16029 					      const0_rtx));
16030 	    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16031 							      clobber)));
16032 	    return cc_reg;
16033 	  }
16034 
16035 	if (!arm_add_operand (y_lo, SImode))
16036 	  y_lo = force_reg (SImode, y_lo);
16037 
16038 	if (!arm_add_operand (y_hi, SImode))
16039 	  y_hi = force_reg (SImode, y_hi);
16040 
16041 	rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16042 	rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16043 	rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16044 	mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16045 	cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16046 
16047 	emit_insn (gen_rtx_SET (cc_reg,
16048 				gen_rtx_COMPARE (mode, conjunction,
16049 						 const0_rtx)));
16050 	return cc_reg;
16051       }
16052 
16053     case LT:
16054     case GE:
16055       {
16056 	if (y_lo == const0_rtx)
16057 	  {
16058 	    /* If the low word of y is 0, then this is simply a normal
16059 	       compare of the upper words.  */
16060 	    if (!arm_add_operand (y_hi, SImode))
16061 	      y_hi = force_reg (SImode, y_hi);
16062 
16063 	    return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16064 	  }
16065 
16066 	if (!arm_add_operand (y_lo, SImode))
16067 	  y_lo = force_reg (SImode, y_lo);
16068 
16069 	rtx cmp1
16070 	  = gen_rtx_LTU (DImode,
16071 			 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16072 			 const0_rtx);
16073 
16074 	if (!scratch)
16075 	  scratch = gen_rtx_SCRATCH (SImode);
16076 
16077 	if (!arm_not_operand (y_hi, SImode))
16078 	  y_hi = force_reg (SImode, y_hi);
16079 
16080 	rtx_insn *insn;
16081 	if (y_hi == const0_rtx)
16082 	  insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16083 							   cmp1));
16084 	else if (CONST_INT_P (y_hi))
16085 	  insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16086 							     y_hi, cmp1));
16087 	else
16088 	  insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16089 							 cmp1));
16090 	return SET_DEST (single_set (insn));
16091       }
16092 
16093     case LE:
16094     case GT:
16095       {
16096 	/* During expansion, we only expect to get here if y is a
16097 	   constant that we want to handle, otherwise we should have
16098 	   swapped the operands already.  */
16099 	gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16100 
16101 	if (!const_ok_for_arm (INTVAL (y_lo)))
16102 	  y_lo = force_reg (SImode, y_lo);
16103 
16104 	/* Perform a reverse subtract and compare.  */
16105 	rtx cmp1
16106 	  = gen_rtx_LTU (DImode,
16107 			 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16108 			 const0_rtx);
16109 	rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16110 								 x_hi, cmp1));
16111 	return SET_DEST (single_set (insn));
16112       }
16113 
16114     case LTU:
16115     case GEU:
16116       {
16117 	if (y_lo == const0_rtx)
16118 	  {
16119 	    /* If the low word of y is 0, then this is simply a normal
16120 	       compare of the upper words.  */
16121 	    if (!arm_add_operand (y_hi, SImode))
16122 	      y_hi = force_reg (SImode, y_hi);
16123 
16124 	    return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16125 	  }
16126 
16127 	if (!arm_add_operand (y_lo, SImode))
16128 	  y_lo = force_reg (SImode, y_lo);
16129 
16130 	rtx cmp1
16131 	  = gen_rtx_LTU (DImode,
16132 			 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16133 			 const0_rtx);
16134 
16135 	if (!scratch)
16136 	  scratch = gen_rtx_SCRATCH (SImode);
16137 	if (!arm_not_operand (y_hi, SImode))
16138 	  y_hi = force_reg (SImode, y_hi);
16139 
16140 	rtx_insn *insn;
16141 	if (y_hi == const0_rtx)
16142 	  insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16143 							  cmp1));
16144 	else if (CONST_INT_P (y_hi))
16145 	  {
16146 	    /* Constant is viewed as unsigned when zero-extended.  */
16147 	    y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16148 	    insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16149 							      y_hi, cmp1));
16150 	  }
16151 	else
16152 	  insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16153 							cmp1));
16154 	return SET_DEST (single_set (insn));
16155       }
16156 
16157     case LEU:
16158     case GTU:
16159       {
16160 	/* During expansion, we only expect to get here if y is a
16161 	   constant that we want to handle, otherwise we should have
16162 	   swapped the operands already.  */
16163 	gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16164 
16165 	if (!const_ok_for_arm (INTVAL (y_lo)))
16166 	  y_lo = force_reg (SImode, y_lo);
16167 
16168 	/* Perform a reverse subtract and compare.  */
16169 	rtx cmp1
16170 	  = gen_rtx_LTU (DImode,
16171 			 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16172 			 const0_rtx);
16173 	y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16174 	rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16175 								x_hi, cmp1));
16176 	return SET_DEST (single_set (insn));
16177       }
16178 
16179     default:
16180       gcc_unreachable ();
16181     }
16182 }
16183 
16184 /* X and Y are two things to compare using CODE.  Emit the compare insn and
16185    return the rtx for register 0 in the proper mode.  */
16186 rtx
arm_gen_compare_reg(rtx_code code,rtx x,rtx y,rtx scratch)16187 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16188 {
16189   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16190     return arm_gen_dicompare_reg (code, x, y, scratch);
16191 
16192   machine_mode mode = SELECT_CC_MODE (code, x, y);
16193   rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16194   if (mode == CC_RSBmode)
16195     {
16196       if (!scratch)
16197 	scratch = gen_rtx_SCRATCH (SImode);
16198       emit_insn (gen_rsb_imm_compare_scratch (scratch,
16199 					      GEN_INT (~UINTVAL (x)), y));
16200     }
16201   else
16202     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16203 
16204   return cc_reg;
16205 }
16206 
16207 /* Generate a sequence of insns that will generate the correct return
16208    address mask depending on the physical architecture that the program
16209    is running on.  */
16210 rtx
arm_gen_return_addr_mask(void)16211 arm_gen_return_addr_mask (void)
16212 {
16213   rtx reg = gen_reg_rtx (Pmode);
16214 
16215   emit_insn (gen_return_addr_mask (reg));
16216   return reg;
16217 }
16218 
16219 void
arm_reload_in_hi(rtx * operands)16220 arm_reload_in_hi (rtx *operands)
16221 {
16222   rtx ref = operands[1];
16223   rtx base, scratch;
16224   HOST_WIDE_INT offset = 0;
16225 
16226   if (GET_CODE (ref) == SUBREG)
16227     {
16228       offset = SUBREG_BYTE (ref);
16229       ref = SUBREG_REG (ref);
16230     }
16231 
16232   if (REG_P (ref))
16233     {
16234       /* We have a pseudo which has been spilt onto the stack; there
16235 	 are two cases here: the first where there is a simple
16236 	 stack-slot replacement and a second where the stack-slot is
16237 	 out of range, or is used as a subreg.  */
16238       if (reg_equiv_mem (REGNO (ref)))
16239 	{
16240 	  ref = reg_equiv_mem (REGNO (ref));
16241 	  base = find_replacement (&XEXP (ref, 0));
16242 	}
16243       else
16244 	/* The slot is out of range, or was dressed up in a SUBREG.  */
16245 	base = reg_equiv_address (REGNO (ref));
16246 
16247       /* PR 62554: If there is no equivalent memory location then just move
16248 	 the value as an SImode register move.  This happens when the target
16249 	 architecture variant does not have an HImode register move.  */
16250       if (base == NULL)
16251 	{
16252 	  gcc_assert (REG_P (operands[0]));
16253 	  emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16254 				gen_rtx_SUBREG (SImode, ref, 0)));
16255 	  return;
16256 	}
16257     }
16258   else
16259     base = find_replacement (&XEXP (ref, 0));
16260 
16261   /* Handle the case where the address is too complex to be offset by 1.  */
16262   if (GET_CODE (base) == MINUS
16263       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16264     {
16265       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16266 
16267       emit_set_insn (base_plus, base);
16268       base = base_plus;
16269     }
16270   else if (GET_CODE (base) == PLUS)
16271     {
16272       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16273       HOST_WIDE_INT hi, lo;
16274 
16275       offset += INTVAL (XEXP (base, 1));
16276       base = XEXP (base, 0);
16277 
16278       /* Rework the address into a legal sequence of insns.  */
16279       /* Valid range for lo is -4095 -> 4095 */
16280       lo = (offset >= 0
16281 	    ? (offset & 0xfff)
16282 	    : -((-offset) & 0xfff));
16283 
16284       /* Corner case, if lo is the max offset then we would be out of range
16285 	 once we have added the additional 1 below, so bump the msb into the
16286 	 pre-loading insn(s).  */
16287       if (lo == 4095)
16288 	lo &= 0x7ff;
16289 
16290       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16291 	     ^ (HOST_WIDE_INT) 0x80000000)
16292 	    - (HOST_WIDE_INT) 0x80000000);
16293 
16294       gcc_assert (hi + lo == offset);
16295 
16296       if (hi != 0)
16297 	{
16298 	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16299 
16300 	  /* Get the base address; addsi3 knows how to handle constants
16301 	     that require more than one insn.  */
16302 	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16303 	  base = base_plus;
16304 	  offset = lo;
16305 	}
16306     }
16307 
16308   /* Operands[2] may overlap operands[0] (though it won't overlap
16309      operands[1]), that's why we asked for a DImode reg -- so we can
16310      use the bit that does not overlap.  */
16311   if (REGNO (operands[2]) == REGNO (operands[0]))
16312     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16313   else
16314     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16315 
16316   emit_insn (gen_zero_extendqisi2 (scratch,
16317 				   gen_rtx_MEM (QImode,
16318 						plus_constant (Pmode, base,
16319 							       offset))));
16320   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16321 				   gen_rtx_MEM (QImode,
16322 						plus_constant (Pmode, base,
16323 							       offset + 1))));
16324   if (!BYTES_BIG_ENDIAN)
16325     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16326 		   gen_rtx_IOR (SImode,
16327 				gen_rtx_ASHIFT
16328 				(SImode,
16329 				 gen_rtx_SUBREG (SImode, operands[0], 0),
16330 				 GEN_INT (8)),
16331 				scratch));
16332   else
16333     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16334 		   gen_rtx_IOR (SImode,
16335 				gen_rtx_ASHIFT (SImode, scratch,
16336 						GEN_INT (8)),
16337 				gen_rtx_SUBREG (SImode, operands[0], 0)));
16338 }
16339 
16340 /* Handle storing a half-word to memory during reload by synthesizing as two
16341    byte stores.  Take care not to clobber the input values until after we
16342    have moved them somewhere safe.  This code assumes that if the DImode
16343    scratch in operands[2] overlaps either the input value or output address
16344    in some way, then that value must die in this insn (we absolutely need
16345    two scratch registers for some corner cases).  */
16346 void
arm_reload_out_hi(rtx * operands)16347 arm_reload_out_hi (rtx *operands)
16348 {
16349   rtx ref = operands[0];
16350   rtx outval = operands[1];
16351   rtx base, scratch;
16352   HOST_WIDE_INT offset = 0;
16353 
16354   if (GET_CODE (ref) == SUBREG)
16355     {
16356       offset = SUBREG_BYTE (ref);
16357       ref = SUBREG_REG (ref);
16358     }
16359 
16360   if (REG_P (ref))
16361     {
16362       /* We have a pseudo which has been spilt onto the stack; there
16363 	 are two cases here: the first where there is a simple
16364 	 stack-slot replacement and a second where the stack-slot is
16365 	 out of range, or is used as a subreg.  */
16366       if (reg_equiv_mem (REGNO (ref)))
16367 	{
16368 	  ref = reg_equiv_mem (REGNO (ref));
16369 	  base = find_replacement (&XEXP (ref, 0));
16370 	}
16371       else
16372 	/* The slot is out of range, or was dressed up in a SUBREG.  */
16373 	base = reg_equiv_address (REGNO (ref));
16374 
16375       /* PR 62254: If there is no equivalent memory location then just move
16376 	 the value as an SImode register move.  This happens when the target
16377 	 architecture variant does not have an HImode register move.  */
16378       if (base == NULL)
16379 	{
16380 	  gcc_assert (REG_P (outval) || SUBREG_P (outval));
16381 
16382 	  if (REG_P (outval))
16383 	    {
16384 	      emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16385 				    gen_rtx_SUBREG (SImode, outval, 0)));
16386 	    }
16387 	  else /* SUBREG_P (outval)  */
16388 	    {
16389 	      if (GET_MODE (SUBREG_REG (outval)) == SImode)
16390 		emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16391 				      SUBREG_REG (outval)));
16392 	      else
16393 		/* FIXME: Handle other cases ?  */
16394 		gcc_unreachable ();
16395 	    }
16396 	  return;
16397 	}
16398     }
16399   else
16400     base = find_replacement (&XEXP (ref, 0));
16401 
16402   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16403 
16404   /* Handle the case where the address is too complex to be offset by 1.  */
16405   if (GET_CODE (base) == MINUS
16406       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16407     {
16408       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16409 
16410       /* Be careful not to destroy OUTVAL.  */
16411       if (reg_overlap_mentioned_p (base_plus, outval))
16412 	{
16413 	  /* Updating base_plus might destroy outval, see if we can
16414 	     swap the scratch and base_plus.  */
16415 	  if (!reg_overlap_mentioned_p (scratch, outval))
16416 	    std::swap (scratch, base_plus);
16417 	  else
16418 	    {
16419 	      rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16420 
16421 	      /* Be conservative and copy OUTVAL into the scratch now,
16422 		 this should only be necessary if outval is a subreg
16423 		 of something larger than a word.  */
16424 	      /* XXX Might this clobber base?  I can't see how it can,
16425 		 since scratch is known to overlap with OUTVAL, and
16426 		 must be wider than a word.  */
16427 	      emit_insn (gen_movhi (scratch_hi, outval));
16428 	      outval = scratch_hi;
16429 	    }
16430 	}
16431 
16432       emit_set_insn (base_plus, base);
16433       base = base_plus;
16434     }
16435   else if (GET_CODE (base) == PLUS)
16436     {
16437       /* The addend must be CONST_INT, or we would have dealt with it above.  */
16438       HOST_WIDE_INT hi, lo;
16439 
16440       offset += INTVAL (XEXP (base, 1));
16441       base = XEXP (base, 0);
16442 
16443       /* Rework the address into a legal sequence of insns.  */
16444       /* Valid range for lo is -4095 -> 4095 */
16445       lo = (offset >= 0
16446 	    ? (offset & 0xfff)
16447 	    : -((-offset) & 0xfff));
16448 
16449       /* Corner case, if lo is the max offset then we would be out of range
16450 	 once we have added the additional 1 below, so bump the msb into the
16451 	 pre-loading insn(s).  */
16452       if (lo == 4095)
16453 	lo &= 0x7ff;
16454 
16455       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16456 	     ^ (HOST_WIDE_INT) 0x80000000)
16457 	    - (HOST_WIDE_INT) 0x80000000);
16458 
16459       gcc_assert (hi + lo == offset);
16460 
16461       if (hi != 0)
16462 	{
16463 	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16464 
16465 	  /* Be careful not to destroy OUTVAL.  */
16466 	  if (reg_overlap_mentioned_p (base_plus, outval))
16467 	    {
16468 	      /* Updating base_plus might destroy outval, see if we
16469 		 can swap the scratch and base_plus.  */
16470 	      if (!reg_overlap_mentioned_p (scratch, outval))
16471 	        std::swap (scratch, base_plus);
16472 	      else
16473 		{
16474 		  rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16475 
16476 		  /* Be conservative and copy outval into scratch now,
16477 		     this should only be necessary if outval is a
16478 		     subreg of something larger than a word.  */
16479 		  /* XXX Might this clobber base?  I can't see how it
16480 		     can, since scratch is known to overlap with
16481 		     outval.  */
16482 		  emit_insn (gen_movhi (scratch_hi, outval));
16483 		  outval = scratch_hi;
16484 		}
16485 	    }
16486 
16487 	  /* Get the base address; addsi3 knows how to handle constants
16488 	     that require more than one insn.  */
16489 	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16490 	  base = base_plus;
16491 	  offset = lo;
16492 	}
16493     }
16494 
16495   if (BYTES_BIG_ENDIAN)
16496     {
16497       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16498 					 plus_constant (Pmode, base,
16499 							offset + 1)),
16500 			    gen_lowpart (QImode, outval)));
16501       emit_insn (gen_lshrsi3 (scratch,
16502 			      gen_rtx_SUBREG (SImode, outval, 0),
16503 			      GEN_INT (8)));
16504       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16505 								offset)),
16506 			    gen_lowpart (QImode, scratch)));
16507     }
16508   else
16509     {
16510       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16511 								offset)),
16512 			    gen_lowpart (QImode, outval)));
16513       emit_insn (gen_lshrsi3 (scratch,
16514 			      gen_rtx_SUBREG (SImode, outval, 0),
16515 			      GEN_INT (8)));
16516       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16517 					 plus_constant (Pmode, base,
16518 							offset + 1)),
16519 			    gen_lowpart (QImode, scratch)));
16520     }
16521 }
16522 
16523 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16524    (padded to the size of a word) should be passed in a register.  */
16525 
16526 static bool
arm_must_pass_in_stack(const function_arg_info & arg)16527 arm_must_pass_in_stack (const function_arg_info &arg)
16528 {
16529   if (TARGET_AAPCS_BASED)
16530     return must_pass_in_stack_var_size (arg);
16531   else
16532     return must_pass_in_stack_var_size_or_pad (arg);
16533 }
16534 
16535 
16536 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16537    byte of a stack argument has useful data.  For legacy APCS ABIs we use
16538    the default.  For AAPCS based ABIs small aggregate types are placed
16539    in the lowest memory address.  */
16540 
16541 static pad_direction
arm_function_arg_padding(machine_mode mode,const_tree type)16542 arm_function_arg_padding (machine_mode mode, const_tree type)
16543 {
16544   if (!TARGET_AAPCS_BASED)
16545     return default_function_arg_padding (mode, type);
16546 
16547   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16548     return PAD_DOWNWARD;
16549 
16550   return PAD_UPWARD;
16551 }
16552 
16553 
16554 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16555    Return !BYTES_BIG_ENDIAN if the least significant byte of the
16556    register has useful data, and return the opposite if the most
16557    significant byte does.  */
16558 
16559 bool
arm_pad_reg_upward(machine_mode mode,tree type,int first ATTRIBUTE_UNUSED)16560 arm_pad_reg_upward (machine_mode mode,
16561                     tree type, int first ATTRIBUTE_UNUSED)
16562 {
16563   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16564     {
16565       /* For AAPCS, small aggregates, small fixed-point types,
16566 	 and small complex types are always padded upwards.  */
16567       if (type)
16568 	{
16569 	  if ((AGGREGATE_TYPE_P (type)
16570 	       || TREE_CODE (type) == COMPLEX_TYPE
16571 	       || FIXED_POINT_TYPE_P (type))
16572 	      && int_size_in_bytes (type) <= 4)
16573 	    return true;
16574 	}
16575       else
16576 	{
16577 	  if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16578 	      && GET_MODE_SIZE (mode) <= 4)
16579 	    return true;
16580 	}
16581     }
16582 
16583   /* Otherwise, use default padding.  */
16584   return !BYTES_BIG_ENDIAN;
16585 }
16586 
16587 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16588    assuming that the address in the base register is word aligned.  */
16589 bool
offset_ok_for_ldrd_strd(HOST_WIDE_INT offset)16590 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16591 {
16592   HOST_WIDE_INT max_offset;
16593 
16594   /* Offset must be a multiple of 4 in Thumb mode.  */
16595   if (TARGET_THUMB2 && ((offset & 3) != 0))
16596     return false;
16597 
16598   if (TARGET_THUMB2)
16599     max_offset = 1020;
16600   else if (TARGET_ARM)
16601     max_offset = 255;
16602   else
16603     return false;
16604 
16605   return ((offset <= max_offset) && (offset >= -max_offset));
16606 }
16607 
16608 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16609    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
16610    Assumes that the address in the base register RN is word aligned.  Pattern
16611    guarantees that both memory accesses use the same base register,
16612    the offsets are constants within the range, and the gap between the offsets is 4.
16613    If preload complete then check that registers are legal.  WBACK indicates whether
16614    address is updated.  LOAD indicates whether memory access is load or store.  */
16615 bool
operands_ok_ldrd_strd(rtx rt,rtx rt2,rtx rn,HOST_WIDE_INT offset,bool wback,bool load)16616 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16617                        bool wback, bool load)
16618 {
16619   unsigned int t, t2, n;
16620 
16621   if (!reload_completed)
16622     return true;
16623 
16624   if (!offset_ok_for_ldrd_strd (offset))
16625     return false;
16626 
16627   t = REGNO (rt);
16628   t2 = REGNO (rt2);
16629   n = REGNO (rn);
16630 
16631   if ((TARGET_THUMB2)
16632       && ((wback && (n == t || n == t2))
16633           || (t == SP_REGNUM)
16634           || (t == PC_REGNUM)
16635           || (t2 == SP_REGNUM)
16636           || (t2 == PC_REGNUM)
16637           || (!load && (n == PC_REGNUM))
16638           || (load && (t == t2))
16639           /* Triggers Cortex-M3 LDRD errata.  */
16640           || (!wback && load && fix_cm3_ldrd && (n == t))))
16641     return false;
16642 
16643   if ((TARGET_ARM)
16644       && ((wback && (n == t || n == t2))
16645           || (t2 == PC_REGNUM)
16646           || (t % 2 != 0)   /* First destination register is not even.  */
16647           || (t2 != t + 1)
16648           /* PC can be used as base register (for offset addressing only),
16649              but it is depricated.  */
16650           || (n == PC_REGNUM)))
16651     return false;
16652 
16653   return true;
16654 }
16655 
16656 /* Return true if a 64-bit access with alignment ALIGN and with a
16657    constant offset OFFSET from the base pointer is permitted on this
16658    architecture.  */
16659 static bool
align_ok_ldrd_strd(HOST_WIDE_INT align,HOST_WIDE_INT offset)16660 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16661 {
16662   return (unaligned_access
16663 	  ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16664 	  : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16665 }
16666 
16667 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
16668    operand MEM's address contains an immediate offset from the base
16669    register and has no side effects, in which case it sets BASE,
16670    OFFSET and ALIGN accordingly.  */
16671 static bool
mem_ok_for_ldrd_strd(rtx mem,rtx * base,rtx * offset,HOST_WIDE_INT * align)16672 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16673 {
16674   rtx addr;
16675 
16676   gcc_assert (base != NULL && offset != NULL);
16677 
16678   /* TODO: Handle more general memory operand patterns, such as
16679      PRE_DEC and PRE_INC.  */
16680 
16681   if (side_effects_p (mem))
16682     return false;
16683 
16684   /* Can't deal with subregs.  */
16685   if (GET_CODE (mem) == SUBREG)
16686     return false;
16687 
16688   gcc_assert (MEM_P (mem));
16689 
16690   *offset = const0_rtx;
16691   *align = MEM_ALIGN (mem);
16692 
16693   addr = XEXP (mem, 0);
16694 
16695   /* If addr isn't valid for DImode, then we can't handle it.  */
16696   if (!arm_legitimate_address_p (DImode, addr,
16697 				 reload_in_progress || reload_completed))
16698     return false;
16699 
16700   if (REG_P (addr))
16701     {
16702       *base = addr;
16703       return true;
16704     }
16705   else if (GET_CODE (addr) == PLUS)
16706     {
16707       *base = XEXP (addr, 0);
16708       *offset = XEXP (addr, 1);
16709       return (REG_P (*base) && CONST_INT_P (*offset));
16710     }
16711 
16712   return false;
16713 }
16714 
16715 /* Called from a peephole2 to replace two word-size accesses with a
16716    single LDRD/STRD instruction.  Returns true iff we can generate a
16717    new instruction sequence.  That is, both accesses use the same base
16718    register and the gap between constant offsets is 4.  This function
16719    may reorder its operands to match ldrd/strd RTL templates.
16720    OPERANDS are the operands found by the peephole matcher;
16721    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16722    corresponding memory operands.  LOAD indicaates whether the access
16723    is load or store.  CONST_STORE indicates a store of constant
16724    integer values held in OPERANDS[4,5] and assumes that the pattern
16725    is of length 4 insn, for the purpose of checking dead registers.
16726    COMMUTE indicates that register operands may be reordered.  */
16727 bool
gen_operands_ldrd_strd(rtx * operands,bool load,bool const_store,bool commute)16728 gen_operands_ldrd_strd (rtx *operands, bool load,
16729                         bool const_store, bool commute)
16730 {
16731   int nops = 2;
16732   HOST_WIDE_INT offsets[2], offset, align[2];
16733   rtx base = NULL_RTX;
16734   rtx cur_base, cur_offset, tmp;
16735   int i, gap;
16736   HARD_REG_SET regset;
16737 
16738   gcc_assert (!const_store || !load);
16739   /* Check that the memory references are immediate offsets from the
16740      same base register.  Extract the base register, the destination
16741      registers, and the corresponding memory offsets.  */
16742   for (i = 0; i < nops; i++)
16743     {
16744       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16745 				 &align[i]))
16746         return false;
16747 
16748       if (i == 0)
16749         base = cur_base;
16750       else if (REGNO (base) != REGNO (cur_base))
16751         return false;
16752 
16753       offsets[i] = INTVAL (cur_offset);
16754       if (GET_CODE (operands[i]) == SUBREG)
16755         {
16756           tmp = SUBREG_REG (operands[i]);
16757           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16758           operands[i] = tmp;
16759         }
16760     }
16761 
16762   /* Make sure there is no dependency between the individual loads.  */
16763   if (load && REGNO (operands[0]) == REGNO (base))
16764     return false; /* RAW */
16765 
16766   if (load && REGNO (operands[0]) == REGNO (operands[1]))
16767     return false; /* WAW */
16768 
16769   /* If the same input register is used in both stores
16770      when storing different constants, try to find a free register.
16771      For example, the code
16772 	mov r0, 0
16773 	str r0, [r2]
16774 	mov r0, 1
16775 	str r0, [r2, #4]
16776      can be transformed into
16777 	mov r1, 0
16778 	mov r0, 1
16779 	strd r1, r0, [r2]
16780      in Thumb mode assuming that r1 is free.
16781      For ARM mode do the same but only if the starting register
16782      can be made to be even.  */
16783   if (const_store
16784       && REGNO (operands[0]) == REGNO (operands[1])
16785       && INTVAL (operands[4]) != INTVAL (operands[5]))
16786     {
16787     if (TARGET_THUMB2)
16788       {
16789         CLEAR_HARD_REG_SET (regset);
16790         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16791         if (tmp == NULL_RTX)
16792           return false;
16793 
16794         /* Use the new register in the first load to ensure that
16795            if the original input register is not dead after peephole,
16796            then it will have the correct constant value.  */
16797         operands[0] = tmp;
16798       }
16799     else if (TARGET_ARM)
16800       {
16801         int regno = REGNO (operands[0]);
16802         if (!peep2_reg_dead_p (4, operands[0]))
16803           {
16804             /* When the input register is even and is not dead after the
16805                pattern, it has to hold the second constant but we cannot
16806                form a legal STRD in ARM mode with this register as the second
16807                register.  */
16808             if (regno % 2 == 0)
16809               return false;
16810 
16811             /* Is regno-1 free? */
16812             SET_HARD_REG_SET (regset);
16813             CLEAR_HARD_REG_BIT(regset, regno - 1);
16814             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16815             if (tmp == NULL_RTX)
16816               return false;
16817 
16818             operands[0] = tmp;
16819           }
16820         else
16821           {
16822             /* Find a DImode register.  */
16823             CLEAR_HARD_REG_SET (regset);
16824             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16825             if (tmp != NULL_RTX)
16826               {
16827                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16828                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16829               }
16830             else
16831               {
16832                 /* Can we use the input register to form a DI register?  */
16833                 SET_HARD_REG_SET (regset);
16834                 CLEAR_HARD_REG_BIT(regset,
16835                                    regno % 2 == 0 ? regno + 1 : regno - 1);
16836                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16837                 if (tmp == NULL_RTX)
16838                   return false;
16839                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
16840               }
16841           }
16842 
16843         gcc_assert (operands[0] != NULL_RTX);
16844         gcc_assert (operands[1] != NULL_RTX);
16845         gcc_assert (REGNO (operands[0]) % 2 == 0);
16846         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
16847       }
16848     }
16849 
16850   /* Make sure the instructions are ordered with lower memory access first.  */
16851   if (offsets[0] > offsets[1])
16852     {
16853       gap = offsets[0] - offsets[1];
16854       offset = offsets[1];
16855 
16856       /* Swap the instructions such that lower memory is accessed first.  */
16857       std::swap (operands[0], operands[1]);
16858       std::swap (operands[2], operands[3]);
16859       std::swap (align[0], align[1]);
16860       if (const_store)
16861         std::swap (operands[4], operands[5]);
16862     }
16863   else
16864     {
16865       gap = offsets[1] - offsets[0];
16866       offset = offsets[0];
16867     }
16868 
16869   /* Make sure accesses are to consecutive memory locations.  */
16870   if (gap != GET_MODE_SIZE (SImode))
16871     return false;
16872 
16873   if (!align_ok_ldrd_strd (align[0], offset))
16874     return false;
16875 
16876   /* Make sure we generate legal instructions.  */
16877   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16878                              false, load))
16879     return true;
16880 
16881   /* In Thumb state, where registers are almost unconstrained, there
16882      is little hope to fix it.  */
16883   if (TARGET_THUMB2)
16884     return false;
16885 
16886   if (load && commute)
16887     {
16888       /* Try reordering registers.  */
16889       std::swap (operands[0], operands[1]);
16890       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16891                                  false, load))
16892         return true;
16893     }
16894 
16895   if (const_store)
16896     {
16897       /* If input registers are dead after this pattern, they can be
16898          reordered or replaced by other registers that are free in the
16899          current pattern.  */
16900       if (!peep2_reg_dead_p (4, operands[0])
16901           || !peep2_reg_dead_p (4, operands[1]))
16902         return false;
16903 
16904       /* Try to reorder the input registers.  */
16905       /* For example, the code
16906            mov r0, 0
16907            mov r1, 1
16908            str r1, [r2]
16909            str r0, [r2, #4]
16910          can be transformed into
16911            mov r1, 0
16912            mov r0, 1
16913            strd r0, [r2]
16914       */
16915       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16916                                   false, false))
16917         {
16918           std::swap (operands[0], operands[1]);
16919           return true;
16920         }
16921 
16922       /* Try to find a free DI register.  */
16923       CLEAR_HARD_REG_SET (regset);
16924       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16925       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16926       while (true)
16927         {
16928           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16929           if (tmp == NULL_RTX)
16930             return false;
16931 
16932           /* DREG must be an even-numbered register in DImode.
16933              Split it into SI registers.  */
16934           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16935           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16936           gcc_assert (operands[0] != NULL_RTX);
16937           gcc_assert (operands[1] != NULL_RTX);
16938           gcc_assert (REGNO (operands[0]) % 2 == 0);
16939           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16940 
16941           return (operands_ok_ldrd_strd (operands[0], operands[1],
16942                                          base, offset,
16943                                          false, load));
16944         }
16945     }
16946 
16947   return false;
16948 }
16949 
16950 
16951 /* Return true if parallel execution of the two word-size accesses provided
16952    could be satisfied with a single LDRD/STRD instruction.  Two word-size
16953    accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
16954    register operands and OPERANDS[2,3] are the corresponding memory operands.
16955    */
16956 bool
valid_operands_ldrd_strd(rtx * operands,bool load)16957 valid_operands_ldrd_strd (rtx *operands, bool load)
16958 {
16959   int nops = 2;
16960   HOST_WIDE_INT offsets[2], offset, align[2];
16961   rtx base = NULL_RTX;
16962   rtx cur_base, cur_offset;
16963   int i, gap;
16964 
16965   /* Check that the memory references are immediate offsets from the
16966      same base register.  Extract the base register, the destination
16967      registers, and the corresponding memory offsets.  */
16968   for (i = 0; i < nops; i++)
16969     {
16970       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16971 				 &align[i]))
16972 	return false;
16973 
16974       if (i == 0)
16975 	base = cur_base;
16976       else if (REGNO (base) != REGNO (cur_base))
16977 	return false;
16978 
16979       offsets[i] = INTVAL (cur_offset);
16980       if (GET_CODE (operands[i]) == SUBREG)
16981 	return false;
16982     }
16983 
16984   if (offsets[0] > offsets[1])
16985     return false;
16986 
16987   gap = offsets[1] - offsets[0];
16988   offset = offsets[0];
16989 
16990   /* Make sure accesses are to consecutive memory locations.  */
16991   if (gap != GET_MODE_SIZE (SImode))
16992     return false;
16993 
16994   if (!align_ok_ldrd_strd (align[0], offset))
16995     return false;
16996 
16997   return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16998 				false, load);
16999 }
17000 
17001 
17002 /* Print a symbolic form of X to the debug file, F.  */
17003 static void
arm_print_value(FILE * f,rtx x)17004 arm_print_value (FILE *f, rtx x)
17005 {
17006   switch (GET_CODE (x))
17007     {
17008     case CONST_INT:
17009       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17010       return;
17011 
17012     case CONST_DOUBLE:
17013       {
17014 	char fpstr[20];
17015 	real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17016 			 sizeof (fpstr), 0, 1);
17017 	fputs (fpstr, f);
17018       }
17019       return;
17020 
17021     case CONST_VECTOR:
17022       {
17023 	int i;
17024 
17025 	fprintf (f, "<");
17026 	for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17027 	  {
17028 	    fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17029 	    if (i < (CONST_VECTOR_NUNITS (x) - 1))
17030 	      fputc (',', f);
17031 	  }
17032 	fprintf (f, ">");
17033       }
17034       return;
17035 
17036     case CONST_STRING:
17037       fprintf (f, "\"%s\"", XSTR (x, 0));
17038       return;
17039 
17040     case SYMBOL_REF:
17041       fprintf (f, "`%s'", XSTR (x, 0));
17042       return;
17043 
17044     case LABEL_REF:
17045       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17046       return;
17047 
17048     case CONST:
17049       arm_print_value (f, XEXP (x, 0));
17050       return;
17051 
17052     case PLUS:
17053       arm_print_value (f, XEXP (x, 0));
17054       fprintf (f, "+");
17055       arm_print_value (f, XEXP (x, 1));
17056       return;
17057 
17058     case PC:
17059       fprintf (f, "pc");
17060       return;
17061 
17062     default:
17063       fprintf (f, "????");
17064       return;
17065     }
17066 }
17067 
17068 /* Routines for manipulation of the constant pool.  */
17069 
17070 /* Arm instructions cannot load a large constant directly into a
17071    register; they have to come from a pc relative load.  The constant
17072    must therefore be placed in the addressable range of the pc
17073    relative load.  Depending on the precise pc relative load
17074    instruction the range is somewhere between 256 bytes and 4k.  This
17075    means that we often have to dump a constant inside a function, and
17076    generate code to branch around it.
17077 
17078    It is important to minimize this, since the branches will slow
17079    things down and make the code larger.
17080 
17081    Normally we can hide the table after an existing unconditional
17082    branch so that there is no interruption of the flow, but in the
17083    worst case the code looks like this:
17084 
17085 	ldr	rn, L1
17086 	...
17087 	b	L2
17088 	align
17089 	L1:	.long value
17090 	L2:
17091 	...
17092 
17093 	ldr	rn, L3
17094 	...
17095 	b	L4
17096 	align
17097 	L3:	.long value
17098 	L4:
17099 	...
17100 
17101    We fix this by performing a scan after scheduling, which notices
17102    which instructions need to have their operands fetched from the
17103    constant table and builds the table.
17104 
17105    The algorithm starts by building a table of all the constants that
17106    need fixing up and all the natural barriers in the function (places
17107    where a constant table can be dropped without breaking the flow).
17108    For each fixup we note how far the pc-relative replacement will be
17109    able to reach and the offset of the instruction into the function.
17110 
17111    Having built the table we then group the fixes together to form
17112    tables that are as large as possible (subject to addressing
17113    constraints) and emit each table of constants after the last
17114    barrier that is within range of all the instructions in the group.
17115    If a group does not contain a barrier, then we forcibly create one
17116    by inserting a jump instruction into the flow.  Once the table has
17117    been inserted, the insns are then modified to reference the
17118    relevant entry in the pool.
17119 
17120    Possible enhancements to the algorithm (not implemented) are:
17121 
17122    1) For some processors and object formats, there may be benefit in
17123    aligning the pools to the start of cache lines; this alignment
17124    would need to be taken into account when calculating addressability
17125    of a pool.  */
17126 
17127 /* These typedefs are located at the start of this file, so that
17128    they can be used in the prototypes there.  This comment is to
17129    remind readers of that fact so that the following structures
17130    can be understood more easily.
17131 
17132      typedef struct minipool_node    Mnode;
17133      typedef struct minipool_fixup   Mfix;  */
17134 
17135 struct minipool_node
17136 {
17137   /* Doubly linked chain of entries.  */
17138   Mnode * next;
17139   Mnode * prev;
17140   /* The maximum offset into the code that this entry can be placed.  While
17141      pushing fixes for forward references, all entries are sorted in order
17142      of increasing max_address.  */
17143   HOST_WIDE_INT max_address;
17144   /* Similarly for an entry inserted for a backwards ref.  */
17145   HOST_WIDE_INT min_address;
17146   /* The number of fixes referencing this entry.  This can become zero
17147      if we "unpush" an entry.  In this case we ignore the entry when we
17148      come to emit the code.  */
17149   int refcount;
17150   /* The offset from the start of the minipool.  */
17151   HOST_WIDE_INT offset;
17152   /* The value in table.  */
17153   rtx value;
17154   /* The mode of value.  */
17155   machine_mode mode;
17156   /* The size of the value.  With iWMMXt enabled
17157      sizes > 4 also imply an alignment of 8-bytes.  */
17158   int fix_size;
17159 };
17160 
17161 struct minipool_fixup
17162 {
17163   Mfix *            next;
17164   rtx_insn *        insn;
17165   HOST_WIDE_INT     address;
17166   rtx *             loc;
17167   machine_mode mode;
17168   int               fix_size;
17169   rtx               value;
17170   Mnode *           minipool;
17171   HOST_WIDE_INT     forwards;
17172   HOST_WIDE_INT     backwards;
17173 };
17174 
17175 /* Fixes less than a word need padding out to a word boundary.  */
17176 #define MINIPOOL_FIX_SIZE(mode) \
17177   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17178 
17179 static Mnode *	minipool_vector_head;
17180 static Mnode *	minipool_vector_tail;
17181 static rtx_code_label	*minipool_vector_label;
17182 static int	minipool_pad;
17183 
17184 /* The linked list of all minipool fixes required for this function.  */
17185 Mfix * 		minipool_fix_head;
17186 Mfix * 		minipool_fix_tail;
17187 /* The fix entry for the current minipool, once it has been placed.  */
17188 Mfix *		minipool_barrier;
17189 
17190 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17191 #define JUMP_TABLES_IN_TEXT_SECTION 0
17192 #endif
17193 
17194 static HOST_WIDE_INT
get_jump_table_size(rtx_jump_table_data * insn)17195 get_jump_table_size (rtx_jump_table_data *insn)
17196 {
17197   /* ADDR_VECs only take room if read-only data does into the text
17198      section.  */
17199   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17200     {
17201       rtx body = PATTERN (insn);
17202       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17203       HOST_WIDE_INT size;
17204       HOST_WIDE_INT modesize;
17205 
17206       modesize = GET_MODE_SIZE (GET_MODE (body));
17207       size = modesize * XVECLEN (body, elt);
17208       switch (modesize)
17209 	{
17210 	case 1:
17211 	  /* Round up size  of TBB table to a halfword boundary.  */
17212 	  size = (size + 1) & ~HOST_WIDE_INT_1;
17213 	  break;
17214 	case 2:
17215 	  /* No padding necessary for TBH.  */
17216 	  break;
17217 	case 4:
17218 	  /* Add two bytes for alignment on Thumb.  */
17219 	  if (TARGET_THUMB)
17220 	    size += 2;
17221 	  break;
17222 	default:
17223 	  gcc_unreachable ();
17224 	}
17225       return size;
17226     }
17227 
17228   return 0;
17229 }
17230 
17231 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17232    function descriptor) into a register and the GOT address into the
17233    FDPIC register, returning an rtx for the register holding the
17234    function address.  */
17235 
17236 rtx
arm_load_function_descriptor(rtx funcdesc)17237 arm_load_function_descriptor (rtx funcdesc)
17238 {
17239   rtx fnaddr_reg = gen_reg_rtx (Pmode);
17240   rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17241   rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17242   rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17243 
17244   emit_move_insn (fnaddr_reg, fnaddr);
17245 
17246   /* The ABI requires the entry point address to be loaded first, but
17247      since we cannot support lazy binding for lack of atomic load of
17248      two 32-bits values, we do not need to bother to prevent the
17249      previous load from being moved after that of the GOT address.  */
17250   emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17251 
17252   return fnaddr_reg;
17253 }
17254 
17255 /* Return the maximum amount of padding that will be inserted before
17256    label LABEL.  */
17257 static HOST_WIDE_INT
get_label_padding(rtx label)17258 get_label_padding (rtx label)
17259 {
17260   HOST_WIDE_INT align, min_insn_size;
17261 
17262   align = 1 << label_to_alignment (label).levels[0].log;
17263   min_insn_size = TARGET_THUMB ? 2 : 4;
17264   return align > min_insn_size ? align - min_insn_size : 0;
17265 }
17266 
17267 /* Move a minipool fix MP from its current location to before MAX_MP.
17268    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17269    constraints may need updating.  */
17270 static Mnode *
move_minipool_fix_forward_ref(Mnode * mp,Mnode * max_mp,HOST_WIDE_INT max_address)17271 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17272 			       HOST_WIDE_INT max_address)
17273 {
17274   /* The code below assumes these are different.  */
17275   gcc_assert (mp != max_mp);
17276 
17277   if (max_mp == NULL)
17278     {
17279       if (max_address < mp->max_address)
17280 	mp->max_address = max_address;
17281     }
17282   else
17283     {
17284       if (max_address > max_mp->max_address - mp->fix_size)
17285 	mp->max_address = max_mp->max_address - mp->fix_size;
17286       else
17287 	mp->max_address = max_address;
17288 
17289       /* Unlink MP from its current position.  Since max_mp is non-null,
17290        mp->prev must be non-null.  */
17291       mp->prev->next = mp->next;
17292       if (mp->next != NULL)
17293 	mp->next->prev = mp->prev;
17294       else
17295 	minipool_vector_tail = mp->prev;
17296 
17297       /* Re-insert it before MAX_MP.  */
17298       mp->next = max_mp;
17299       mp->prev = max_mp->prev;
17300       max_mp->prev = mp;
17301 
17302       if (mp->prev != NULL)
17303 	mp->prev->next = mp;
17304       else
17305 	minipool_vector_head = mp;
17306     }
17307 
17308   /* Save the new entry.  */
17309   max_mp = mp;
17310 
17311   /* Scan over the preceding entries and adjust their addresses as
17312      required.  */
17313   while (mp->prev != NULL
17314 	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17315     {
17316       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17317       mp = mp->prev;
17318     }
17319 
17320   return max_mp;
17321 }
17322 
17323 /* Add a constant to the minipool for a forward reference.  Returns the
17324    node added or NULL if the constant will not fit in this pool.  */
17325 static Mnode *
add_minipool_forward_ref(Mfix * fix)17326 add_minipool_forward_ref (Mfix *fix)
17327 {
17328   /* If set, max_mp is the first pool_entry that has a lower
17329      constraint than the one we are trying to add.  */
17330   Mnode *       max_mp = NULL;
17331   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17332   Mnode *       mp;
17333 
17334   /* If the minipool starts before the end of FIX->INSN then this FIX
17335      cannot be placed into the current pool.  Furthermore, adding the
17336      new constant pool entry may cause the pool to start FIX_SIZE bytes
17337      earlier.  */
17338   if (minipool_vector_head &&
17339       (fix->address + get_attr_length (fix->insn)
17340        >= minipool_vector_head->max_address - fix->fix_size))
17341     return NULL;
17342 
17343   /* Scan the pool to see if a constant with the same value has
17344      already been added.  While we are doing this, also note the
17345      location where we must insert the constant if it doesn't already
17346      exist.  */
17347   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17348     {
17349       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17350 	  && fix->mode == mp->mode
17351 	  && (!LABEL_P (fix->value)
17352 	      || (CODE_LABEL_NUMBER (fix->value)
17353 		  == CODE_LABEL_NUMBER (mp->value)))
17354 	  && rtx_equal_p (fix->value, mp->value))
17355 	{
17356 	  /* More than one fix references this entry.  */
17357 	  mp->refcount++;
17358 	  return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17359 	}
17360 
17361       /* Note the insertion point if necessary.  */
17362       if (max_mp == NULL
17363 	  && mp->max_address > max_address)
17364 	max_mp = mp;
17365 
17366       /* If we are inserting an 8-bytes aligned quantity and
17367 	 we have not already found an insertion point, then
17368 	 make sure that all such 8-byte aligned quantities are
17369 	 placed at the start of the pool.  */
17370       if (ARM_DOUBLEWORD_ALIGN
17371 	  && max_mp == NULL
17372 	  && fix->fix_size >= 8
17373 	  && mp->fix_size < 8)
17374 	{
17375 	  max_mp = mp;
17376 	  max_address = mp->max_address;
17377 	}
17378     }
17379 
17380   /* The value is not currently in the minipool, so we need to create
17381      a new entry for it.  If MAX_MP is NULL, the entry will be put on
17382      the end of the list since the placement is less constrained than
17383      any existing entry.  Otherwise, we insert the new fix before
17384      MAX_MP and, if necessary, adjust the constraints on the other
17385      entries.  */
17386   mp = XNEW (Mnode);
17387   mp->fix_size = fix->fix_size;
17388   mp->mode = fix->mode;
17389   mp->value = fix->value;
17390   mp->refcount = 1;
17391   /* Not yet required for a backwards ref.  */
17392   mp->min_address = -65536;
17393 
17394   if (max_mp == NULL)
17395     {
17396       mp->max_address = max_address;
17397       mp->next = NULL;
17398       mp->prev = minipool_vector_tail;
17399 
17400       if (mp->prev == NULL)
17401 	{
17402 	  minipool_vector_head = mp;
17403 	  minipool_vector_label = gen_label_rtx ();
17404 	}
17405       else
17406 	mp->prev->next = mp;
17407 
17408       minipool_vector_tail = mp;
17409     }
17410   else
17411     {
17412       if (max_address > max_mp->max_address - mp->fix_size)
17413 	mp->max_address = max_mp->max_address - mp->fix_size;
17414       else
17415 	mp->max_address = max_address;
17416 
17417       mp->next = max_mp;
17418       mp->prev = max_mp->prev;
17419       max_mp->prev = mp;
17420       if (mp->prev != NULL)
17421 	mp->prev->next = mp;
17422       else
17423 	minipool_vector_head = mp;
17424     }
17425 
17426   /* Save the new entry.  */
17427   max_mp = mp;
17428 
17429   /* Scan over the preceding entries and adjust their addresses as
17430      required.  */
17431   while (mp->prev != NULL
17432 	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17433     {
17434       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17435       mp = mp->prev;
17436     }
17437 
17438   return max_mp;
17439 }
17440 
17441 static Mnode *
move_minipool_fix_backward_ref(Mnode * mp,Mnode * min_mp,HOST_WIDE_INT min_address)17442 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17443 				HOST_WIDE_INT  min_address)
17444 {
17445   HOST_WIDE_INT offset;
17446 
17447   /* The code below assumes these are different.  */
17448   gcc_assert (mp != min_mp);
17449 
17450   if (min_mp == NULL)
17451     {
17452       if (min_address > mp->min_address)
17453 	mp->min_address = min_address;
17454     }
17455   else
17456     {
17457       /* We will adjust this below if it is too loose.  */
17458       mp->min_address = min_address;
17459 
17460       /* Unlink MP from its current position.  Since min_mp is non-null,
17461 	 mp->next must be non-null.  */
17462       mp->next->prev = mp->prev;
17463       if (mp->prev != NULL)
17464 	mp->prev->next = mp->next;
17465       else
17466 	minipool_vector_head = mp->next;
17467 
17468       /* Reinsert it after MIN_MP.  */
17469       mp->prev = min_mp;
17470       mp->next = min_mp->next;
17471       min_mp->next = mp;
17472       if (mp->next != NULL)
17473 	mp->next->prev = mp;
17474       else
17475 	minipool_vector_tail = mp;
17476     }
17477 
17478   min_mp = mp;
17479 
17480   offset = 0;
17481   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17482     {
17483       mp->offset = offset;
17484       if (mp->refcount > 0)
17485 	offset += mp->fix_size;
17486 
17487       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17488 	mp->next->min_address = mp->min_address + mp->fix_size;
17489     }
17490 
17491   return min_mp;
17492 }
17493 
17494 /* Add a constant to the minipool for a backward reference.  Returns the
17495    node added or NULL if the constant will not fit in this pool.
17496 
17497    Note that the code for insertion for a backwards reference can be
17498    somewhat confusing because the calculated offsets for each fix do
17499    not take into account the size of the pool (which is still under
17500    construction.  */
17501 static Mnode *
add_minipool_backward_ref(Mfix * fix)17502 add_minipool_backward_ref (Mfix *fix)
17503 {
17504   /* If set, min_mp is the last pool_entry that has a lower constraint
17505      than the one we are trying to add.  */
17506   Mnode *min_mp = NULL;
17507   /* This can be negative, since it is only a constraint.  */
17508   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
17509   Mnode *mp;
17510 
17511   /* If we can't reach the current pool from this insn, or if we can't
17512      insert this entry at the end of the pool without pushing other
17513      fixes out of range, then we don't try.  This ensures that we
17514      can't fail later on.  */
17515   if (min_address >= minipool_barrier->address
17516       || (minipool_vector_tail->min_address + fix->fix_size
17517 	  >= minipool_barrier->address))
17518     return NULL;
17519 
17520   /* Scan the pool to see if a constant with the same value has
17521      already been added.  While we are doing this, also note the
17522      location where we must insert the constant if it doesn't already
17523      exist.  */
17524   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17525     {
17526       if (GET_CODE (fix->value) == GET_CODE (mp->value)
17527 	  && fix->mode == mp->mode
17528 	  && (!LABEL_P (fix->value)
17529 	      || (CODE_LABEL_NUMBER (fix->value)
17530 		  == CODE_LABEL_NUMBER (mp->value)))
17531 	  && rtx_equal_p (fix->value, mp->value)
17532 	  /* Check that there is enough slack to move this entry to the
17533 	     end of the table (this is conservative).  */
17534 	  && (mp->max_address
17535 	      > (minipool_barrier->address
17536 		 + minipool_vector_tail->offset
17537 		 + minipool_vector_tail->fix_size)))
17538 	{
17539 	  mp->refcount++;
17540 	  return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17541 	}
17542 
17543       if (min_mp != NULL)
17544 	mp->min_address += fix->fix_size;
17545       else
17546 	{
17547 	  /* Note the insertion point if necessary.  */
17548 	  if (mp->min_address < min_address)
17549 	    {
17550 	      /* For now, we do not allow the insertion of 8-byte alignment
17551 		 requiring nodes anywhere but at the start of the pool.  */
17552 	      if (ARM_DOUBLEWORD_ALIGN
17553 		  && fix->fix_size >= 8 && mp->fix_size < 8)
17554 		return NULL;
17555 	      else
17556 		min_mp = mp;
17557 	    }
17558 	  else if (mp->max_address
17559 		   < minipool_barrier->address + mp->offset + fix->fix_size)
17560 	    {
17561 	      /* Inserting before this entry would push the fix beyond
17562 		 its maximum address (which can happen if we have
17563 		 re-located a forwards fix); force the new fix to come
17564 		 after it.  */
17565 	      if (ARM_DOUBLEWORD_ALIGN
17566 		  && fix->fix_size >= 8 && mp->fix_size < 8)
17567 		return NULL;
17568 	      else
17569 		{
17570 		  min_mp = mp;
17571 		  min_address = mp->min_address + fix->fix_size;
17572 		}
17573 	    }
17574 	  /* Do not insert a non-8-byte aligned quantity before 8-byte
17575 	     aligned quantities.  */
17576 	  else if (ARM_DOUBLEWORD_ALIGN
17577 		   && fix->fix_size < 8
17578 		   && mp->fix_size >= 8)
17579 	    {
17580 	      min_mp = mp;
17581 	      min_address = mp->min_address + fix->fix_size;
17582 	    }
17583 	}
17584     }
17585 
17586   /* We need to create a new entry.  */
17587   mp = XNEW (Mnode);
17588   mp->fix_size = fix->fix_size;
17589   mp->mode = fix->mode;
17590   mp->value = fix->value;
17591   mp->refcount = 1;
17592   mp->max_address = minipool_barrier->address + 65536;
17593 
17594   mp->min_address = min_address;
17595 
17596   if (min_mp == NULL)
17597     {
17598       mp->prev = NULL;
17599       mp->next = minipool_vector_head;
17600 
17601       if (mp->next == NULL)
17602 	{
17603 	  minipool_vector_tail = mp;
17604 	  minipool_vector_label = gen_label_rtx ();
17605 	}
17606       else
17607 	mp->next->prev = mp;
17608 
17609       minipool_vector_head = mp;
17610     }
17611   else
17612     {
17613       mp->next = min_mp->next;
17614       mp->prev = min_mp;
17615       min_mp->next = mp;
17616 
17617       if (mp->next != NULL)
17618 	mp->next->prev = mp;
17619       else
17620 	minipool_vector_tail = mp;
17621     }
17622 
17623   /* Save the new entry.  */
17624   min_mp = mp;
17625 
17626   if (mp->prev)
17627     mp = mp->prev;
17628   else
17629     mp->offset = 0;
17630 
17631   /* Scan over the following entries and adjust their offsets.  */
17632   while (mp->next != NULL)
17633     {
17634       if (mp->next->min_address < mp->min_address + mp->fix_size)
17635 	mp->next->min_address = mp->min_address + mp->fix_size;
17636 
17637       if (mp->refcount)
17638 	mp->next->offset = mp->offset + mp->fix_size;
17639       else
17640 	mp->next->offset = mp->offset;
17641 
17642       mp = mp->next;
17643     }
17644 
17645   return min_mp;
17646 }
17647 
17648 static void
assign_minipool_offsets(Mfix * barrier)17649 assign_minipool_offsets (Mfix *barrier)
17650 {
17651   HOST_WIDE_INT offset = 0;
17652   Mnode *mp;
17653 
17654   minipool_barrier = barrier;
17655 
17656   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17657     {
17658       mp->offset = offset;
17659 
17660       if (mp->refcount > 0)
17661 	offset += mp->fix_size;
17662     }
17663 }
17664 
17665 /* Output the literal table */
17666 static void
dump_minipool(rtx_insn * scan)17667 dump_minipool (rtx_insn *scan)
17668 {
17669   Mnode * mp;
17670   Mnode * nmp;
17671   int align64 = 0;
17672 
17673   if (ARM_DOUBLEWORD_ALIGN)
17674     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17675       if (mp->refcount > 0 && mp->fix_size >= 8)
17676 	{
17677 	  align64 = 1;
17678 	  break;
17679 	}
17680 
17681   if (dump_file)
17682     fprintf (dump_file,
17683 	     ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17684 	     INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17685 
17686   scan = emit_label_after (gen_label_rtx (), scan);
17687   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17688   scan = emit_label_after (minipool_vector_label, scan);
17689 
17690   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17691     {
17692       if (mp->refcount > 0)
17693 	{
17694 	  if (dump_file)
17695 	    {
17696 	      fprintf (dump_file,
17697 		       ";;  Offset %u, min %ld, max %ld ",
17698 		       (unsigned) mp->offset, (unsigned long) mp->min_address,
17699 		       (unsigned long) mp->max_address);
17700 	      arm_print_value (dump_file, mp->value);
17701 	      fputc ('\n', dump_file);
17702 	    }
17703 
17704 	  rtx val = copy_rtx (mp->value);
17705 
17706 	  switch (GET_MODE_SIZE (mp->mode))
17707 	    {
17708 #ifdef HAVE_consttable_1
17709 	    case 1:
17710 	      scan = emit_insn_after (gen_consttable_1 (val), scan);
17711 	      break;
17712 
17713 #endif
17714 #ifdef HAVE_consttable_2
17715 	    case 2:
17716 	      scan = emit_insn_after (gen_consttable_2 (val), scan);
17717 	      break;
17718 
17719 #endif
17720 #ifdef HAVE_consttable_4
17721 	    case 4:
17722 	      scan = emit_insn_after (gen_consttable_4 (val), scan);
17723 	      break;
17724 
17725 #endif
17726 #ifdef HAVE_consttable_8
17727 	    case 8:
17728 	      scan = emit_insn_after (gen_consttable_8 (val), scan);
17729 	      break;
17730 
17731 #endif
17732 #ifdef HAVE_consttable_16
17733 	    case 16:
17734               scan = emit_insn_after (gen_consttable_16 (val), scan);
17735               break;
17736 
17737 #endif
17738 	    default:
17739 	      gcc_unreachable ();
17740 	    }
17741 	}
17742 
17743       nmp = mp->next;
17744       free (mp);
17745     }
17746 
17747   minipool_vector_head = minipool_vector_tail = NULL;
17748   scan = emit_insn_after (gen_consttable_end (), scan);
17749   scan = emit_barrier_after (scan);
17750 }
17751 
17752 /* Return the cost of forcibly inserting a barrier after INSN.  */
17753 static int
arm_barrier_cost(rtx_insn * insn)17754 arm_barrier_cost (rtx_insn *insn)
17755 {
17756   /* Basing the location of the pool on the loop depth is preferable,
17757      but at the moment, the basic block information seems to be
17758      corrupt by this stage of the compilation.  */
17759   int base_cost = 50;
17760   rtx_insn *next = next_nonnote_insn (insn);
17761 
17762   if (next != NULL && LABEL_P (next))
17763     base_cost -= 20;
17764 
17765   switch (GET_CODE (insn))
17766     {
17767     case CODE_LABEL:
17768       /* It will always be better to place the table before the label, rather
17769 	 than after it.  */
17770       return 50;
17771 
17772     case INSN:
17773     case CALL_INSN:
17774       return base_cost;
17775 
17776     case JUMP_INSN:
17777       return base_cost - 10;
17778 
17779     default:
17780       return base_cost + 10;
17781     }
17782 }
17783 
17784 /* Find the best place in the insn stream in the range
17785    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
17786    Create the barrier by inserting a jump and add a new fix entry for
17787    it.  */
17788 static Mfix *
create_fix_barrier(Mfix * fix,HOST_WIDE_INT max_address)17789 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
17790 {
17791   HOST_WIDE_INT count = 0;
17792   rtx_barrier *barrier;
17793   rtx_insn *from = fix->insn;
17794   /* The instruction after which we will insert the jump.  */
17795   rtx_insn *selected = NULL;
17796   int selected_cost;
17797   /* The address at which the jump instruction will be placed.  */
17798   HOST_WIDE_INT selected_address;
17799   Mfix * new_fix;
17800   HOST_WIDE_INT max_count = max_address - fix->address;
17801   rtx_code_label *label = gen_label_rtx ();
17802 
17803   selected_cost = arm_barrier_cost (from);
17804   selected_address = fix->address;
17805 
17806   while (from && count < max_count)
17807     {
17808       rtx_jump_table_data *tmp;
17809       int new_cost;
17810 
17811       /* This code shouldn't have been called if there was a natural barrier
17812 	 within range.  */
17813       gcc_assert (!BARRIER_P (from));
17814 
17815       /* Count the length of this insn.  This must stay in sync with the
17816 	 code that pushes minipool fixes.  */
17817       if (LABEL_P (from))
17818 	count += get_label_padding (from);
17819       else
17820 	count += get_attr_length (from);
17821 
17822       /* If there is a jump table, add its length.  */
17823       if (tablejump_p (from, NULL, &tmp))
17824 	{
17825 	  count += get_jump_table_size (tmp);
17826 
17827 	  /* Jump tables aren't in a basic block, so base the cost on
17828 	     the dispatch insn.  If we select this location, we will
17829 	     still put the pool after the table.  */
17830 	  new_cost = arm_barrier_cost (from);
17831 
17832 	  if (count < max_count
17833 	      && (!selected || new_cost <= selected_cost))
17834 	    {
17835 	      selected = tmp;
17836 	      selected_cost = new_cost;
17837 	      selected_address = fix->address + count;
17838 	    }
17839 
17840 	  /* Continue after the dispatch table.  */
17841 	  from = NEXT_INSN (tmp);
17842 	  continue;
17843 	}
17844 
17845       new_cost = arm_barrier_cost (from);
17846 
17847       if (count < max_count
17848 	  && (!selected || new_cost <= selected_cost))
17849 	{
17850 	  selected = from;
17851 	  selected_cost = new_cost;
17852 	  selected_address = fix->address + count;
17853 	}
17854 
17855       from = NEXT_INSN (from);
17856     }
17857 
17858   /* Make sure that we found a place to insert the jump.  */
17859   gcc_assert (selected);
17860 
17861   /* Create a new JUMP_INSN that branches around a barrier.  */
17862   from = emit_jump_insn_after (gen_jump (label), selected);
17863   JUMP_LABEL (from) = label;
17864   barrier = emit_barrier_after (from);
17865   emit_label_after (label, barrier);
17866 
17867   /* Create a minipool barrier entry for the new barrier.  */
17868   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
17869   new_fix->insn = barrier;
17870   new_fix->address = selected_address;
17871   new_fix->next = fix->next;
17872   fix->next = new_fix;
17873 
17874   return new_fix;
17875 }
17876 
17877 /* Record that there is a natural barrier in the insn stream at
17878    ADDRESS.  */
17879 static void
push_minipool_barrier(rtx_insn * insn,HOST_WIDE_INT address)17880 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
17881 {
17882   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17883 
17884   fix->insn = insn;
17885   fix->address = address;
17886 
17887   fix->next = NULL;
17888   if (minipool_fix_head != NULL)
17889     minipool_fix_tail->next = fix;
17890   else
17891     minipool_fix_head = fix;
17892 
17893   minipool_fix_tail = fix;
17894 }
17895 
17896 /* Record INSN, which will need fixing up to load a value from the
17897    minipool.  ADDRESS is the offset of the insn since the start of the
17898    function; LOC is a pointer to the part of the insn which requires
17899    fixing; VALUE is the constant that must be loaded, which is of type
17900    MODE.  */
17901 static void
push_minipool_fix(rtx_insn * insn,HOST_WIDE_INT address,rtx * loc,machine_mode mode,rtx value)17902 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
17903 		   machine_mode mode, rtx value)
17904 {
17905   gcc_assert (!arm_disable_literal_pool);
17906   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17907 
17908   fix->insn = insn;
17909   fix->address = address;
17910   fix->loc = loc;
17911   fix->mode = mode;
17912   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
17913   fix->value = value;
17914   fix->forwards = get_attr_pool_range (insn);
17915   fix->backwards = get_attr_neg_pool_range (insn);
17916   fix->minipool = NULL;
17917 
17918   /* If an insn doesn't have a range defined for it, then it isn't
17919      expecting to be reworked by this code.  Better to stop now than
17920      to generate duff assembly code.  */
17921   gcc_assert (fix->forwards || fix->backwards);
17922 
17923   /* If an entry requires 8-byte alignment then assume all constant pools
17924      require 4 bytes of padding.  Trying to do this later on a per-pool
17925      basis is awkward because existing pool entries have to be modified.  */
17926   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
17927     minipool_pad = 4;
17928 
17929   if (dump_file)
17930     {
17931       fprintf (dump_file,
17932 	       ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17933 	       GET_MODE_NAME (mode),
17934 	       INSN_UID (insn), (unsigned long) address,
17935 	       -1 * (long)fix->backwards, (long)fix->forwards);
17936       arm_print_value (dump_file, fix->value);
17937       fprintf (dump_file, "\n");
17938     }
17939 
17940   /* Add it to the chain of fixes.  */
17941   fix->next = NULL;
17942 
17943   if (minipool_fix_head != NULL)
17944     minipool_fix_tail->next = fix;
17945   else
17946     minipool_fix_head = fix;
17947 
17948   minipool_fix_tail = fix;
17949 }
17950 
17951 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17952    Returns the number of insns needed, or 99 if we always want to synthesize
17953    the value.  */
17954 int
arm_max_const_double_inline_cost()17955 arm_max_const_double_inline_cost ()
17956 {
17957   return ((optimize_size || arm_ld_sched) ? 3 : 4);
17958 }
17959 
17960 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17961    Returns the number of insns needed, or 99 if we don't know how to
17962    do it.  */
17963 int
arm_const_double_inline_cost(rtx val)17964 arm_const_double_inline_cost (rtx val)
17965 {
17966   rtx lowpart, highpart;
17967   machine_mode mode;
17968 
17969   mode = GET_MODE (val);
17970 
17971   if (mode == VOIDmode)
17972     mode = DImode;
17973 
17974   gcc_assert (GET_MODE_SIZE (mode) == 8);
17975 
17976   lowpart = gen_lowpart (SImode, val);
17977   highpart = gen_highpart_mode (SImode, mode, val);
17978 
17979   gcc_assert (CONST_INT_P (lowpart));
17980   gcc_assert (CONST_INT_P (highpart));
17981 
17982   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17983 			    NULL_RTX, NULL_RTX, 0, 0)
17984 	  + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17985 			      NULL_RTX, NULL_RTX, 0, 0));
17986 }
17987 
17988 /* Cost of loading a SImode constant.  */
17989 static inline int
arm_const_inline_cost(enum rtx_code code,rtx val)17990 arm_const_inline_cost (enum rtx_code code, rtx val)
17991 {
17992   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17993                            NULL_RTX, NULL_RTX, 1, 0);
17994 }
17995 
17996 /* Return true if it is worthwhile to split a 64-bit constant into two
17997    32-bit operations.  This is the case if optimizing for size, or
17998    if we have load delay slots, or if one 32-bit part can be done with
17999    a single data operation.  */
18000 bool
arm_const_double_by_parts(rtx val)18001 arm_const_double_by_parts (rtx val)
18002 {
18003   machine_mode mode = GET_MODE (val);
18004   rtx part;
18005 
18006   if (optimize_size || arm_ld_sched)
18007     return true;
18008 
18009   if (mode == VOIDmode)
18010     mode = DImode;
18011 
18012   part = gen_highpart_mode (SImode, mode, val);
18013 
18014   gcc_assert (CONST_INT_P (part));
18015 
18016   if (const_ok_for_arm (INTVAL (part))
18017       || const_ok_for_arm (~INTVAL (part)))
18018     return true;
18019 
18020   part = gen_lowpart (SImode, val);
18021 
18022   gcc_assert (CONST_INT_P (part));
18023 
18024   if (const_ok_for_arm (INTVAL (part))
18025       || const_ok_for_arm (~INTVAL (part)))
18026     return true;
18027 
18028   return false;
18029 }
18030 
18031 /* Return true if it is possible to inline both the high and low parts
18032    of a 64-bit constant into 32-bit data processing instructions.  */
18033 bool
arm_const_double_by_immediates(rtx val)18034 arm_const_double_by_immediates (rtx val)
18035 {
18036   machine_mode mode = GET_MODE (val);
18037   rtx part;
18038 
18039   if (mode == VOIDmode)
18040     mode = DImode;
18041 
18042   part = gen_highpart_mode (SImode, mode, val);
18043 
18044   gcc_assert (CONST_INT_P (part));
18045 
18046   if (!const_ok_for_arm (INTVAL (part)))
18047     return false;
18048 
18049   part = gen_lowpart (SImode, val);
18050 
18051   gcc_assert (CONST_INT_P (part));
18052 
18053   if (!const_ok_for_arm (INTVAL (part)))
18054     return false;
18055 
18056   return true;
18057 }
18058 
18059 /* Scan INSN and note any of its operands that need fixing.
18060    If DO_PUSHES is false we do not actually push any of the fixups
18061    needed.  */
18062 static void
note_invalid_constants(rtx_insn * insn,HOST_WIDE_INT address,int do_pushes)18063 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18064 {
18065   int opno;
18066 
18067   extract_constrain_insn (insn);
18068 
18069   if (recog_data.n_alternatives == 0)
18070     return;
18071 
18072   /* Fill in recog_op_alt with information about the constraints of
18073      this insn.  */
18074   preprocess_constraints (insn);
18075 
18076   const operand_alternative *op_alt = which_op_alt ();
18077   for (opno = 0; opno < recog_data.n_operands; opno++)
18078     {
18079       /* Things we need to fix can only occur in inputs.  */
18080       if (recog_data.operand_type[opno] != OP_IN)
18081 	continue;
18082 
18083       /* If this alternative is a memory reference, then any mention
18084 	 of constants in this alternative is really to fool reload
18085 	 into allowing us to accept one there.  We need to fix them up
18086 	 now so that we output the right code.  */
18087       if (op_alt[opno].memory_ok)
18088 	{
18089 	  rtx op = recog_data.operand[opno];
18090 
18091 	  if (CONSTANT_P (op))
18092 	    {
18093 	      if (do_pushes)
18094 		push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18095 				   recog_data.operand_mode[opno], op);
18096 	    }
18097 	  else if (MEM_P (op)
18098 		   && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18099 		   && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18100 	    {
18101 	      if (do_pushes)
18102 		{
18103 		  rtx cop = avoid_constant_pool_reference (op);
18104 
18105 		  /* Casting the address of something to a mode narrower
18106 		     than a word can cause avoid_constant_pool_reference()
18107 		     to return the pool reference itself.  That's no good to
18108 		     us here.  Lets just hope that we can use the
18109 		     constant pool value directly.  */
18110 		  if (op == cop)
18111 		    cop = get_pool_constant (XEXP (op, 0));
18112 
18113 		  push_minipool_fix (insn, address,
18114 				     recog_data.operand_loc[opno],
18115 				     recog_data.operand_mode[opno], cop);
18116 		}
18117 
18118 	    }
18119 	}
18120     }
18121 
18122   return;
18123 }
18124 
18125 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18126    and unions in the context of ARMv8-M Security Extensions.  It is used as a
18127    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18128    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18129    or four masks, depending on whether it is being computed for a
18130    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18131    respectively.  The tree for the type of the argument or a field within an
18132    argument is passed in ARG_TYPE, the current register this argument or field
18133    starts in is kept in the pointer REGNO and updated accordingly, the bit this
18134    argument or field starts at is passed in STARTING_BIT and the last used bit
18135    is kept in LAST_USED_BIT which is also updated accordingly.  */
18136 
18137 static unsigned HOST_WIDE_INT
comp_not_to_clear_mask_str_un(tree arg_type,int * regno,uint32_t * padding_bits_to_clear,unsigned starting_bit,int * last_used_bit)18138 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18139 			       uint32_t * padding_bits_to_clear,
18140 			       unsigned starting_bit, int * last_used_bit)
18141 
18142 {
18143   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18144 
18145   if (TREE_CODE (arg_type) == RECORD_TYPE)
18146     {
18147       unsigned current_bit = starting_bit;
18148       tree field;
18149       long int offset, size;
18150 
18151 
18152       field = TYPE_FIELDS (arg_type);
18153       while (field)
18154 	{
18155 	  /* The offset within a structure is always an offset from
18156 	     the start of that structure.  Make sure we take that into the
18157 	     calculation of the register based offset that we use here.  */
18158 	  offset = starting_bit;
18159 	  offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18160 	  offset %= 32;
18161 
18162 	  /* This is the actual size of the field, for bitfields this is the
18163 	     bitfield width and not the container size.  */
18164 	  size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18165 
18166 	  if (*last_used_bit != offset)
18167 	    {
18168 	      if (offset < *last_used_bit)
18169 		{
18170 		  /* This field's offset is before the 'last_used_bit', that
18171 		     means this field goes on the next register.  So we need to
18172 		     pad the rest of the current register and increase the
18173 		     register number.  */
18174 		  uint32_t mask;
18175 		  mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18176 		  mask++;
18177 
18178 		  padding_bits_to_clear[*regno] |= mask;
18179 		  not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18180 		  (*regno)++;
18181 		}
18182 	      else
18183 		{
18184 		  /* Otherwise we pad the bits between the last field's end and
18185 		     the start of the new field.  */
18186 		  uint32_t mask;
18187 
18188 		  mask = ((uint32_t)-1) >> (32 - offset);
18189 		  mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18190 		  padding_bits_to_clear[*regno] |= mask;
18191 		}
18192 	      current_bit = offset;
18193 	    }
18194 
18195 	  /* Calculate further padding bits for inner structs/unions too.  */
18196 	  if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18197 	    {
18198 	      *last_used_bit = current_bit;
18199 	      not_to_clear_reg_mask
18200 		|= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18201 						  padding_bits_to_clear, offset,
18202 						  last_used_bit);
18203 	    }
18204 	  else
18205 	    {
18206 	      /* Update 'current_bit' with this field's size.  If the
18207 		 'current_bit' lies in a subsequent register, update 'regno' and
18208 		 reset 'current_bit' to point to the current bit in that new
18209 		 register.  */
18210 	      current_bit += size;
18211 	      while (current_bit >= 32)
18212 		{
18213 		  current_bit-=32;
18214 		  not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18215 		  (*regno)++;
18216 		}
18217 	      *last_used_bit = current_bit;
18218 	    }
18219 
18220 	  field = TREE_CHAIN (field);
18221 	}
18222       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18223     }
18224   else if (TREE_CODE (arg_type) == UNION_TYPE)
18225     {
18226       tree field, field_t;
18227       int i, regno_t, field_size;
18228       int max_reg = -1;
18229       int max_bit = -1;
18230       uint32_t mask;
18231       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18232 	= {-1, -1, -1, -1};
18233 
18234       /* To compute the padding bits in a union we only consider bits as
18235 	 padding bits if they are always either a padding bit or fall outside a
18236 	 fields size for all fields in the union.  */
18237       field = TYPE_FIELDS (arg_type);
18238       while (field)
18239 	{
18240 	  uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18241 	    = {0U, 0U, 0U, 0U};
18242 	  int last_used_bit_t = *last_used_bit;
18243 	  regno_t = *regno;
18244 	  field_t = TREE_TYPE (field);
18245 
18246 	  /* If the field's type is either a record or a union make sure to
18247 	     compute their padding bits too.  */
18248 	  if (RECORD_OR_UNION_TYPE_P (field_t))
18249 	    not_to_clear_reg_mask
18250 	      |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18251 						&padding_bits_to_clear_t[0],
18252 						starting_bit, &last_used_bit_t);
18253 	  else
18254 	    {
18255 	      field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18256 	      regno_t = (field_size / 32) + *regno;
18257 	      last_used_bit_t = (starting_bit + field_size) % 32;
18258 	    }
18259 
18260 	  for (i = *regno; i < regno_t; i++)
18261 	    {
18262 	      /* For all but the last register used by this field only keep the
18263 		 padding bits that were padding bits in this field.  */
18264 	      padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18265 	    }
18266 
18267 	    /* For the last register, keep all padding bits that were padding
18268 	       bits in this field and any padding bits that are still valid
18269 	       as padding bits but fall outside of this field's size.  */
18270 	    mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18271 	    padding_bits_to_clear_res[regno_t]
18272 	      &= padding_bits_to_clear_t[regno_t] | mask;
18273 
18274 	  /* Update the maximum size of the fields in terms of registers used
18275 	     ('max_reg') and the 'last_used_bit' in said register.  */
18276 	  if (max_reg < regno_t)
18277 	    {
18278 	      max_reg = regno_t;
18279 	      max_bit = last_used_bit_t;
18280 	    }
18281 	  else if (max_reg == regno_t && max_bit < last_used_bit_t)
18282 	    max_bit = last_used_bit_t;
18283 
18284 	  field = TREE_CHAIN (field);
18285 	}
18286 
18287       /* Update the current padding_bits_to_clear using the intersection of the
18288 	 padding bits of all the fields.  */
18289       for (i=*regno; i < max_reg; i++)
18290 	padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18291 
18292       /* Do not keep trailing padding bits, we do not know yet whether this
18293 	 is the end of the argument.  */
18294       mask = ((uint32_t) 1 << max_bit) - 1;
18295       padding_bits_to_clear[max_reg]
18296 	|= padding_bits_to_clear_res[max_reg] & mask;
18297 
18298       *regno = max_reg;
18299       *last_used_bit = max_bit;
18300     }
18301   else
18302     /* This function should only be used for structs and unions.  */
18303     gcc_unreachable ();
18304 
18305   return not_to_clear_reg_mask;
18306 }
18307 
18308 /* In the context of ARMv8-M Security Extensions, this function is used for both
18309    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18310    registers are used when returning or passing arguments, which is then
18311    returned as a mask.  It will also compute a mask to indicate padding/unused
18312    bits for each of these registers, and passes this through the
18313    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
18314    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18315    the starting register used to pass this argument or return value is passed
18316    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18317    for struct and union types.  */
18318 
18319 static unsigned HOST_WIDE_INT
compute_not_to_clear_mask(tree arg_type,rtx arg_rtx,int regno,uint32_t * padding_bits_to_clear)18320 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18321 			     uint32_t * padding_bits_to_clear)
18322 
18323 {
18324   int last_used_bit = 0;
18325   unsigned HOST_WIDE_INT not_to_clear_mask;
18326 
18327   if (RECORD_OR_UNION_TYPE_P (arg_type))
18328     {
18329       not_to_clear_mask
18330 	= comp_not_to_clear_mask_str_un (arg_type, &regno,
18331 					 padding_bits_to_clear, 0,
18332 					 &last_used_bit);
18333 
18334 
18335       /* If the 'last_used_bit' is not zero, that means we are still using a
18336 	 part of the last 'regno'.  In such cases we must clear the trailing
18337 	 bits.  Otherwise we are not using regno and we should mark it as to
18338 	 clear.  */
18339       if (last_used_bit != 0)
18340 	padding_bits_to_clear[regno]
18341 	  |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18342       else
18343 	not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18344     }
18345   else
18346     {
18347       not_to_clear_mask = 0;
18348       /* We are not dealing with structs nor unions.  So these arguments may be
18349 	 passed in floating point registers too.  In some cases a BLKmode is
18350 	 used when returning or passing arguments in multiple VFP registers.  */
18351       if (GET_MODE (arg_rtx) == BLKmode)
18352 	{
18353 	  int i, arg_regs;
18354 	  rtx reg;
18355 
18356 	  /* This should really only occur when dealing with the hard-float
18357 	     ABI.  */
18358 	  gcc_assert (TARGET_HARD_FLOAT_ABI);
18359 
18360 	  for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18361 	    {
18362 	      reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18363 	      gcc_assert (REG_P (reg));
18364 
18365 	      not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18366 
18367 	      /* If we are dealing with DF mode, make sure we don't
18368 		 clear either of the registers it addresses.  */
18369 	      arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18370 	      if (arg_regs > 1)
18371 		{
18372 		  unsigned HOST_WIDE_INT mask;
18373 		  mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18374 		  mask -= HOST_WIDE_INT_1U << REGNO (reg);
18375 		  not_to_clear_mask |= mask;
18376 		}
18377 	    }
18378 	}
18379       else
18380 	{
18381 	  /* Otherwise we can rely on the MODE to determine how many registers
18382 	     are being used by this argument.  */
18383 	  int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18384 	  not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18385 	  if (arg_regs > 1)
18386 	    {
18387 	      unsigned HOST_WIDE_INT
18388 	      mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18389 	      mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18390 	      not_to_clear_mask |= mask;
18391 	    }
18392 	}
18393     }
18394 
18395   return not_to_clear_mask;
18396 }
18397 
18398 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18399    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
18400    are to be fully cleared, using the value in register CLEARING_REG if more
18401    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18402    the bits that needs to be cleared in caller-saved core registers, with
18403    SCRATCH_REG used as a scratch register for that clearing.
18404 
18405    NOTE: one of three following assertions must hold:
18406    - SCRATCH_REG is a low register
18407    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18408      in TO_CLEAR_BITMAP)
18409    - CLEARING_REG is a low register.  */
18410 
18411 static void
cmse_clear_registers(sbitmap to_clear_bitmap,uint32_t * padding_bits_to_clear,int padding_bits_len,rtx scratch_reg,rtx clearing_reg)18412 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18413 		      int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18414 {
18415   bool saved_clearing = false;
18416   rtx saved_clearing_reg = NULL_RTX;
18417   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18418 
18419   gcc_assert (arm_arch_cmse);
18420 
18421   if (!bitmap_empty_p (to_clear_bitmap))
18422     {
18423       minregno = bitmap_first_set_bit (to_clear_bitmap);
18424       maxregno = bitmap_last_set_bit (to_clear_bitmap);
18425     }
18426   clearing_regno = REGNO (clearing_reg);
18427 
18428   /* Clear padding bits.  */
18429   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18430   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18431     {
18432       uint64_t mask;
18433       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18434 
18435       if (padding_bits_to_clear[i] == 0)
18436 	continue;
18437 
18438       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18439 	 CLEARING_REG as scratch.  */
18440       if (TARGET_THUMB1
18441 	  && REGNO (scratch_reg) > LAST_LO_REGNUM)
18442 	{
18443 	  /* clearing_reg is not to be cleared, copy its value into scratch_reg
18444 	     such that we can use clearing_reg to clear the unused bits in the
18445 	     arguments.  */
18446 	  if ((clearing_regno > maxregno
18447 	       || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18448 	      && !saved_clearing)
18449 	    {
18450 	      gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18451 	      emit_move_insn (scratch_reg, clearing_reg);
18452 	      saved_clearing = true;
18453 	      saved_clearing_reg = scratch_reg;
18454 	    }
18455 	  scratch_reg = clearing_reg;
18456 	}
18457 
18458       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
18459       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18460       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18461 
18462       /* Fill the top half of the negated padding_bits_to_clear[i].  */
18463       mask = (~padding_bits_to_clear[i]) >> 16;
18464       rtx16 = gen_int_mode (16, SImode);
18465       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18466       if (mask)
18467 	emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18468 
18469       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18470     }
18471   if (saved_clearing)
18472     emit_move_insn (clearing_reg, saved_clearing_reg);
18473 
18474 
18475   /* Clear full registers.  */
18476 
18477   if (TARGET_HAVE_FPCXT_CMSE)
18478     {
18479       rtvec vunspec_vec;
18480       int i, j, k, nb_regs;
18481       rtx use_seq, par, reg, set, vunspec;
18482       int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18483       auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18484       auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18485 
18486       for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18487 	{
18488 	  /* Find next register to clear and exit if none.  */
18489 	  for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18490 	  if (i > maxregno)
18491 	    break;
18492 
18493 	  /* Compute number of consecutive registers to clear.  */
18494 	  for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18495 	       j++);
18496 	  nb_regs = j - i;
18497 
18498 	  /* Create VSCCLRM RTX pattern.  */
18499 	  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18500 	  vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18501 	  vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18502 					     VUNSPEC_VSCCLRM_VPR);
18503 	  XVECEXP (par, 0, 0) = vunspec;
18504 
18505 	  /* Insert VFP register clearing RTX in the pattern.  */
18506 	  start_sequence ();
18507 	  for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18508 	    {
18509 	      if (!bitmap_bit_p (to_clear_bitmap, j))
18510 		continue;
18511 
18512 	      reg = gen_rtx_REG (SFmode, j);
18513 	      set = gen_rtx_SET (reg, const0_rtx);
18514 	      XVECEXP (par, 0, k++) = set;
18515 	      emit_use (reg);
18516 	    }
18517 	  use_seq = get_insns ();
18518 	  end_sequence ();
18519 
18520 	  emit_insn_after (use_seq, emit_insn (par));
18521 	}
18522 
18523       /* Get set of core registers to clear.  */
18524       bitmap_clear (core_regs_bitmap);
18525       bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18526 			IP_REGNUM - R0_REGNUM + 1);
18527       bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18528 		  core_regs_bitmap);
18529       gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18530 
18531       if (bitmap_empty_p (to_clear_core_bitmap))
18532 	return;
18533 
18534       /* Create clrm RTX pattern.  */
18535       nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18536       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18537 
18538       /* Insert core register clearing RTX in the pattern.  */
18539       start_sequence ();
18540       for (j = 0, i = minregno; j < nb_regs; i++)
18541 	{
18542 	  if (!bitmap_bit_p (to_clear_core_bitmap, i))
18543 	    continue;
18544 
18545 	  reg = gen_rtx_REG (SImode, i);
18546 	  set = gen_rtx_SET (reg, const0_rtx);
18547 	  XVECEXP (par, 0, j++) = set;
18548 	  emit_use (reg);
18549 	}
18550 
18551       /* Insert APSR register clearing RTX in the pattern
18552        * along with clobbering CC.  */
18553       vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18554       vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18555 					 VUNSPEC_CLRM_APSR);
18556 
18557       XVECEXP (par, 0, j++) = vunspec;
18558 
18559       rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18560       rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18561       XVECEXP (par, 0, j) = clobber;
18562 
18563       use_seq = get_insns ();
18564       end_sequence ();
18565 
18566       emit_insn_after (use_seq, emit_insn (par));
18567     }
18568   else
18569     {
18570       /* If not marked for clearing, clearing_reg already does not contain
18571 	 any secret.  */
18572       if (clearing_regno <= maxregno
18573 	  && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18574 	{
18575 	  emit_move_insn (clearing_reg, const0_rtx);
18576 	  emit_use (clearing_reg);
18577 	  bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18578 	}
18579 
18580       for (regno = minregno; regno <= maxregno; regno++)
18581 	{
18582 	  if (!bitmap_bit_p (to_clear_bitmap, regno))
18583 	    continue;
18584 
18585 	  if (IS_VFP_REGNUM (regno))
18586 	    {
18587 	      /* If regno is an even vfp register and its successor is also to
18588 		 be cleared, use vmov.  */
18589 	      if (TARGET_VFP_DOUBLE
18590 		  && VFP_REGNO_OK_FOR_DOUBLE (regno)
18591 		  && bitmap_bit_p (to_clear_bitmap, regno + 1))
18592 		{
18593 		  emit_move_insn (gen_rtx_REG (DFmode, regno),
18594 				  CONST1_RTX (DFmode));
18595 		  emit_use (gen_rtx_REG (DFmode, regno));
18596 		  regno++;
18597 		}
18598 	      else
18599 		{
18600 		  emit_move_insn (gen_rtx_REG (SFmode, regno),
18601 				  CONST1_RTX (SFmode));
18602 		  emit_use (gen_rtx_REG (SFmode, regno));
18603 		}
18604 	    }
18605 	  else
18606 	    {
18607 	      emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18608 	      emit_use (gen_rtx_REG (SImode, regno));
18609 	    }
18610 	}
18611     }
18612 }
18613 
18614 /* Clear core and caller-saved VFP registers not used to pass arguments before
18615    a cmse_nonsecure_call.  Saving, clearing and restoring of VFP callee-saved
18616    registers is done in the __gnu_cmse_nonsecure_call libcall.  See
18617    libgcc/config/arm/cmse_nonsecure_call.S.  */
18618 
18619 static void
cmse_nonsecure_call_inline_register_clear(void)18620 cmse_nonsecure_call_inline_register_clear (void)
18621 {
18622   basic_block bb;
18623 
18624   FOR_EACH_BB_FN (bb, cfun)
18625     {
18626       rtx_insn *insn;
18627 
18628       FOR_BB_INSNS (bb, insn)
18629 	{
18630 	  bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18631 	  /* frame = VFP regs + FPSCR + VPR.  */
18632 	  unsigned lazy_store_stack_frame_size
18633 	    = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18634 	  unsigned long callee_saved_mask
18635 	    = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18636 	    & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18637 	  unsigned address_regnum, regno;
18638 	  unsigned max_int_regno
18639 	    = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18640 	  unsigned max_fp_regno
18641 	    = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18642 	  unsigned maxregno
18643 	    = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18644 	  auto_sbitmap to_clear_bitmap (maxregno + 1);
18645 	  rtx_insn *seq;
18646 	  rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18647 	  rtx address;
18648 	  CUMULATIVE_ARGS args_so_far_v;
18649 	  cumulative_args_t args_so_far;
18650 	  tree arg_type, fntype;
18651 	  bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18652 	  function_args_iterator args_iter;
18653 	  uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18654 
18655 	  if (!NONDEBUG_INSN_P (insn))
18656 	    continue;
18657 
18658 	  if (!CALL_P (insn))
18659 	    continue;
18660 
18661 	  pat = PATTERN (insn);
18662 	  gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18663 	  call = XVECEXP (pat, 0, 0);
18664 
18665 	  /* Get the real call RTX if the insn sets a value, ie. returns.  */
18666 	  if (GET_CODE (call) == SET)
18667 	      call = SET_SRC (call);
18668 
18669 	  /* Check if it is a cmse_nonsecure_call.  */
18670 	  unspec = XEXP (call, 0);
18671 	  if (GET_CODE (unspec) != UNSPEC
18672 	      || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18673 	    continue;
18674 
18675 	  /* Mark registers that needs to be cleared.  Those that holds a
18676 	     parameter are removed from the set further below.  */
18677 	  bitmap_clear (to_clear_bitmap);
18678 	  bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18679 			    max_int_regno - R0_REGNUM + 1);
18680 
18681 	  /* Only look at the caller-saved floating point registers in case of
18682 	     -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
18683 	     lazy store and loads which clear both caller- and callee-saved
18684 	     registers.  */
18685 	  if (!lazy_fpclear)
18686 	    {
18687 	      auto_sbitmap float_bitmap (maxregno + 1);
18688 
18689 	      bitmap_clear (float_bitmap);
18690 	      bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18691 				max_fp_regno - FIRST_VFP_REGNUM + 1);
18692 	      bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18693 	    }
18694 
18695 	  /* Make sure the register used to hold the function address is not
18696 	     cleared.  */
18697 	  address = RTVEC_ELT (XVEC (unspec, 0), 0);
18698 	  gcc_assert (MEM_P (address));
18699 	  gcc_assert (REG_P (XEXP (address, 0)));
18700 	  address_regnum = REGNO (XEXP (address, 0));
18701 	  if (address_regnum <= max_int_regno)
18702 	    bitmap_clear_bit (to_clear_bitmap, address_regnum);
18703 
18704 	  /* Set basic block of call insn so that df rescan is performed on
18705 	     insns inserted here.  */
18706 	  set_block_for_insn (insn, bb);
18707 	  df_set_flags (DF_DEFER_INSN_RESCAN);
18708 	  start_sequence ();
18709 
18710 	  /* Make sure the scheduler doesn't schedule other insns beyond
18711 	     here.  */
18712 	  emit_insn (gen_blockage ());
18713 
18714 	  /* Walk through all arguments and clear registers appropriately.
18715 	  */
18716 	  fntype = TREE_TYPE (MEM_EXPR (address));
18717 	  arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
18718 				    NULL_TREE);
18719 	  args_so_far = pack_cumulative_args (&args_so_far_v);
18720 	  FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
18721 	    {
18722 	      rtx arg_rtx;
18723 	      uint64_t to_clear_args_mask;
18724 
18725 	      if (VOID_TYPE_P (arg_type))
18726 		continue;
18727 
18728 	      function_arg_info arg (arg_type, /*named=*/true);
18729 	      if (!first_param)
18730 		/* ??? We should advance after processing the argument and pass
18731 		   the argument we're advancing past.  */
18732 		arm_function_arg_advance (args_so_far, arg);
18733 
18734 	      arg_rtx = arm_function_arg (args_so_far, arg);
18735 	      gcc_assert (REG_P (arg_rtx));
18736 	      to_clear_args_mask
18737 		= compute_not_to_clear_mask (arg_type, arg_rtx,
18738 					     REGNO (arg_rtx),
18739 					     &padding_bits_to_clear[0]);
18740 	      if (to_clear_args_mask)
18741 		{
18742 		  for (regno = R0_REGNUM; regno <= maxregno; regno++)
18743 		    {
18744 		      if (to_clear_args_mask & (1ULL << regno))
18745 			bitmap_clear_bit (to_clear_bitmap, regno);
18746 		    }
18747 		}
18748 
18749 	      first_param = false;
18750 	    }
18751 
18752 	  /* We use right shift and left shift to clear the LSB of the address
18753 	     we jump to instead of using bic, to avoid having to use an extra
18754 	     register on Thumb-1.  */
18755 	  clearing_reg = XEXP (address, 0);
18756 	  shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
18757 	  emit_insn (gen_rtx_SET (clearing_reg, shift));
18758 	  shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
18759 	  emit_insn (gen_rtx_SET (clearing_reg, shift));
18760 
18761 	  if (clear_callee_saved)
18762 	    {
18763 	      rtx push_insn =
18764 		emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
18765 	      /* Disable frame debug info in push because it needs to be
18766 		 disabled for pop (see below).  */
18767 	      RTX_FRAME_RELATED_P (push_insn) = 0;
18768 
18769 	      /* Lazy store multiple.  */
18770 	      if (lazy_fpclear)
18771 		{
18772 		  rtx imm;
18773 		  rtx_insn *add_insn;
18774 
18775 		  imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
18776 		  add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
18777 						    stack_pointer_rtx, imm));
18778 		  /* If we have the frame pointer, then it will be the
18779 		     CFA reg.  Otherwise, the stack pointer is the CFA
18780 		     reg, so we need to emit a CFA adjust.  */
18781 		  if (!frame_pointer_needed)
18782 		    arm_add_cfa_adjust_cfa_note (add_insn,
18783 						 - lazy_store_stack_frame_size,
18784 						 stack_pointer_rtx,
18785 						 stack_pointer_rtx);
18786 		  emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
18787 		}
18788 	      /* Save VFP callee-saved registers.  */
18789 	      else
18790 		{
18791 		  vfp_emit_fstmd (D7_VFP_REGNUM + 1,
18792 				  (max_fp_regno - D7_VFP_REGNUM) / 2);
18793 		  /* Disable frame debug info in push because it needs to be
18794 		     disabled for vpop (see below).  */
18795 		  RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
18796 		}
18797 	    }
18798 
18799 	  /* Clear caller-saved registers that leak before doing a non-secure
18800 	     call.  */
18801 	  ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
18802 	  cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
18803 				NUM_ARG_REGS, ip_reg, clearing_reg);
18804 
18805 	  seq = get_insns ();
18806 	  end_sequence ();
18807 	  emit_insn_before (seq, insn);
18808 
18809 	  if (TARGET_HAVE_FPCXT_CMSE)
18810 	    {
18811 	      rtx_insn *last, *pop_insn, *after = insn;
18812 
18813 	      start_sequence ();
18814 
18815 	      /* Lazy load multiple done as part of libcall in Armv8-M.  */
18816 	      if (lazy_fpclear)
18817 		{
18818 		  rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
18819 		  emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
18820 		  rtx_insn *add_insn =
18821 		    emit_insn (gen_addsi3 (stack_pointer_rtx,
18822 					   stack_pointer_rtx, imm));
18823 		  if (!frame_pointer_needed)
18824 		    arm_add_cfa_adjust_cfa_note (add_insn,
18825 						 lazy_store_stack_frame_size,
18826 						 stack_pointer_rtx,
18827 						 stack_pointer_rtx);
18828 		}
18829 	      /* Restore VFP callee-saved registers.  */
18830 	      else
18831 		{
18832 		  int nb_callee_saved_vfp_regs =
18833 		    (max_fp_regno - D7_VFP_REGNUM) / 2;
18834 		  arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
18835 					      nb_callee_saved_vfp_regs,
18836 					      stack_pointer_rtx);
18837 		  /* Disable frame debug info in vpop because the SP adjustment
18838 		     is made using a CFA adjustment note while CFA used is
18839 		     sometimes R7.  This then causes an assert failure in the
18840 		     CFI note creation code.  */
18841 		  RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
18842 		}
18843 
18844 	      arm_emit_multi_reg_pop (callee_saved_mask);
18845 	      pop_insn = get_last_insn ();
18846 
18847 	      /* Disable frame debug info in pop because they reset the state
18848 		 of popped registers to what it was at the beginning of the
18849 		 function, before the prologue.  This leads to incorrect state
18850 		 when doing the pop after the nonsecure call for registers that
18851 		 are pushed both in prologue and before the nonsecure call.
18852 
18853 		 It also occasionally triggers an assert failure in CFI note
18854 		 creation code when there are two codepaths to the epilogue,
18855 		 one of which does not go through the nonsecure call.
18856 		 Obviously this mean that debugging between the push and pop is
18857 		 not reliable.  */
18858 	      RTX_FRAME_RELATED_P (pop_insn) = 0;
18859 
18860 	      seq = get_insns ();
18861 	      last = get_last_insn ();
18862 	      end_sequence ();
18863 
18864 	      emit_insn_after (seq, after);
18865 
18866 	      /* Skip pop we have just inserted after nonsecure call, we know
18867 		 it does not contain a nonsecure call.  */
18868 	      insn = last;
18869 	    }
18870 	}
18871     }
18872 }
18873 
18874 /* Rewrite move insn into subtract of 0 if the condition codes will
18875    be useful in next conditional jump insn.  */
18876 
18877 static void
thumb1_reorg(void)18878 thumb1_reorg (void)
18879 {
18880   basic_block bb;
18881 
18882   FOR_EACH_BB_FN (bb, cfun)
18883     {
18884       rtx dest, src;
18885       rtx cmp, op0, op1, set = NULL;
18886       rtx_insn *prev, *insn = BB_END (bb);
18887       bool insn_clobbered = false;
18888 
18889       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
18890 	insn = PREV_INSN (insn);
18891 
18892       /* Find the last cbranchsi4_insn in basic block BB.  */
18893       if (insn == BB_HEAD (bb)
18894 	  || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
18895 	continue;
18896 
18897       /* Get the register with which we are comparing.  */
18898       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
18899       op0 = XEXP (cmp, 0);
18900       op1 = XEXP (cmp, 1);
18901 
18902       /* Check that comparison is against ZERO.  */
18903       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
18904 	continue;
18905 
18906       /* Find the first flag setting insn before INSN in basic block BB.  */
18907       gcc_assert (insn != BB_HEAD (bb));
18908       for (prev = PREV_INSN (insn);
18909 	   (!insn_clobbered
18910 	    && prev != BB_HEAD (bb)
18911 	    && (NOTE_P (prev)
18912 		|| DEBUG_INSN_P (prev)
18913 		|| ((set = single_set (prev)) != NULL
18914 		    && get_attr_conds (prev) == CONDS_NOCOND)));
18915 	   prev = PREV_INSN (prev))
18916 	{
18917 	  if (reg_set_p (op0, prev))
18918 	    insn_clobbered = true;
18919 	}
18920 
18921       /* Skip if op0 is clobbered by insn other than prev. */
18922       if (insn_clobbered)
18923 	continue;
18924 
18925       if (!set)
18926 	continue;
18927 
18928       dest = SET_DEST (set);
18929       src = SET_SRC (set);
18930       if (!low_register_operand (dest, SImode)
18931 	  || !low_register_operand (src, SImode))
18932 	continue;
18933 
18934       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
18935 	 in INSN.  Both src and dest of the move insn are checked.  */
18936       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
18937 	{
18938 	  dest = copy_rtx (dest);
18939 	  src = copy_rtx (src);
18940 	  src = gen_rtx_MINUS (SImode, src, const0_rtx);
18941 	  PATTERN (prev) = gen_rtx_SET (dest, src);
18942 	  INSN_CODE (prev) = -1;
18943 	  /* Set test register in INSN to dest.  */
18944 	  XEXP (cmp, 0) = copy_rtx (dest);
18945 	  INSN_CODE (insn) = -1;
18946 	}
18947     }
18948 }
18949 
18950 /* Convert instructions to their cc-clobbering variant if possible, since
18951    that allows us to use smaller encodings.  */
18952 
18953 static void
thumb2_reorg(void)18954 thumb2_reorg (void)
18955 {
18956   basic_block bb;
18957   regset_head live;
18958 
18959   INIT_REG_SET (&live);
18960 
18961   /* We are freeing block_for_insn in the toplev to keep compatibility
18962      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
18963   compute_bb_for_insn ();
18964   df_analyze ();
18965 
18966   enum Convert_Action {SKIP, CONV, SWAP_CONV};
18967 
18968   FOR_EACH_BB_FN (bb, cfun)
18969     {
18970       if ((current_tune->disparage_flag_setting_t16_encodings
18971 	   == tune_params::DISPARAGE_FLAGS_ALL)
18972 	  && optimize_bb_for_speed_p (bb))
18973 	continue;
18974 
18975       rtx_insn *insn;
18976       Convert_Action action = SKIP;
18977       Convert_Action action_for_partial_flag_setting
18978 	= ((current_tune->disparage_flag_setting_t16_encodings
18979 	    != tune_params::DISPARAGE_FLAGS_NEITHER)
18980 	   && optimize_bb_for_speed_p (bb))
18981 	  ? SKIP : CONV;
18982 
18983       COPY_REG_SET (&live, DF_LR_OUT (bb));
18984       df_simulate_initialize_backwards (bb, &live);
18985       FOR_BB_INSNS_REVERSE (bb, insn)
18986 	{
18987 	  if (NONJUMP_INSN_P (insn)
18988 	      && !REGNO_REG_SET_P (&live, CC_REGNUM)
18989 	      && GET_CODE (PATTERN (insn)) == SET)
18990 	    {
18991 	      action = SKIP;
18992 	      rtx pat = PATTERN (insn);
18993 	      rtx dst = XEXP (pat, 0);
18994 	      rtx src = XEXP (pat, 1);
18995 	      rtx op0 = NULL_RTX, op1 = NULL_RTX;
18996 
18997 	      if (UNARY_P (src) || BINARY_P (src))
18998 		  op0 = XEXP (src, 0);
18999 
19000 	      if (BINARY_P (src))
19001 		  op1 = XEXP (src, 1);
19002 
19003 	      if (low_register_operand (dst, SImode))
19004 		{
19005 		  switch (GET_CODE (src))
19006 		    {
19007 		    case PLUS:
19008 		      /* Adding two registers and storing the result
19009 			 in the first source is already a 16-bit
19010 			 operation.  */
19011 		      if (rtx_equal_p (dst, op0)
19012 			  && register_operand (op1, SImode))
19013 			break;
19014 
19015 		      if (low_register_operand (op0, SImode))
19016 			{
19017 			  /* ADDS <Rd>,<Rn>,<Rm>  */
19018 			  if (low_register_operand (op1, SImode))
19019 			    action = CONV;
19020 			  /* ADDS <Rdn>,#<imm8>  */
19021 			  /* SUBS <Rdn>,#<imm8>  */
19022 			  else if (rtx_equal_p (dst, op0)
19023 				   && CONST_INT_P (op1)
19024 				   && IN_RANGE (INTVAL (op1), -255, 255))
19025 			    action = CONV;
19026 			  /* ADDS <Rd>,<Rn>,#<imm3>  */
19027 			  /* SUBS <Rd>,<Rn>,#<imm3>  */
19028 			  else if (CONST_INT_P (op1)
19029 				   && IN_RANGE (INTVAL (op1), -7, 7))
19030 			    action = CONV;
19031 			}
19032 		      /* ADCS <Rd>, <Rn>  */
19033 		      else if (GET_CODE (XEXP (src, 0)) == PLUS
19034 			      && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19035 			      && low_register_operand (XEXP (XEXP (src, 0), 1),
19036 						       SImode)
19037 			      && COMPARISON_P (op1)
19038 			      && cc_register (XEXP (op1, 0), VOIDmode)
19039 			      && maybe_get_arm_condition_code (op1) == ARM_CS
19040 			      && XEXP (op1, 1) == const0_rtx)
19041 		        action = CONV;
19042 		      break;
19043 
19044 		    case MINUS:
19045 		      /* RSBS <Rd>,<Rn>,#0
19046 			 Not handled here: see NEG below.  */
19047 		      /* SUBS <Rd>,<Rn>,#<imm3>
19048 			 SUBS <Rdn>,#<imm8>
19049 			 Not handled here: see PLUS above.  */
19050 		      /* SUBS <Rd>,<Rn>,<Rm>  */
19051 		      if (low_register_operand (op0, SImode)
19052 			  && low_register_operand (op1, SImode))
19053 			    action = CONV;
19054 		      break;
19055 
19056 		    case MULT:
19057 		      /* MULS <Rdm>,<Rn>,<Rdm>
19058 			 As an exception to the rule, this is only used
19059 			 when optimizing for size since MULS is slow on all
19060 			 known implementations.  We do not even want to use
19061 			 MULS in cold code, if optimizing for speed, so we
19062 			 test the global flag here.  */
19063 		      if (!optimize_size)
19064 			break;
19065 		      /* Fall through.  */
19066 		    case AND:
19067 		    case IOR:
19068 		    case XOR:
19069 		      /* ANDS <Rdn>,<Rm>  */
19070 		      if (rtx_equal_p (dst, op0)
19071 			  && low_register_operand (op1, SImode))
19072 			action = action_for_partial_flag_setting;
19073 		      else if (rtx_equal_p (dst, op1)
19074 			       && low_register_operand (op0, SImode))
19075 			action = action_for_partial_flag_setting == SKIP
19076 				 ? SKIP : SWAP_CONV;
19077 		      break;
19078 
19079 		    case ASHIFTRT:
19080 		    case ASHIFT:
19081 		    case LSHIFTRT:
19082 		      /* ASRS <Rdn>,<Rm> */
19083 		      /* LSRS <Rdn>,<Rm> */
19084 		      /* LSLS <Rdn>,<Rm> */
19085 		      if (rtx_equal_p (dst, op0)
19086 			  && low_register_operand (op1, SImode))
19087 			action = action_for_partial_flag_setting;
19088 		      /* ASRS <Rd>,<Rm>,#<imm5> */
19089 		      /* LSRS <Rd>,<Rm>,#<imm5> */
19090 		      /* LSLS <Rd>,<Rm>,#<imm5> */
19091 		      else if (low_register_operand (op0, SImode)
19092 			       && CONST_INT_P (op1)
19093 			       && IN_RANGE (INTVAL (op1), 0, 31))
19094 			action = action_for_partial_flag_setting;
19095 		      break;
19096 
19097 		    case ROTATERT:
19098 		      /* RORS <Rdn>,<Rm>  */
19099 		      if (rtx_equal_p (dst, op0)
19100 			  && low_register_operand (op1, SImode))
19101 			action = action_for_partial_flag_setting;
19102 		      break;
19103 
19104 		    case NOT:
19105 		      /* MVNS <Rd>,<Rm>  */
19106 		      if (low_register_operand (op0, SImode))
19107 			action = action_for_partial_flag_setting;
19108 		      break;
19109 
19110 		    case NEG:
19111 		      /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
19112 		      if (low_register_operand (op0, SImode))
19113 			action = CONV;
19114 		      break;
19115 
19116 		    case CONST_INT:
19117 		      /* MOVS <Rd>,#<imm8>  */
19118 		      if (CONST_INT_P (src)
19119 			  && IN_RANGE (INTVAL (src), 0, 255))
19120 			action = action_for_partial_flag_setting;
19121 		      break;
19122 
19123 		    case REG:
19124 		      /* MOVS and MOV<c> with registers have different
19125 			 encodings, so are not relevant here.  */
19126 		      break;
19127 
19128 		    default:
19129 		      break;
19130 		    }
19131 		}
19132 
19133 	      if (action != SKIP)
19134 		{
19135 		  rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19136 		  rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19137 		  rtvec vec;
19138 
19139 		  if (action == SWAP_CONV)
19140 		    {
19141 		      src = copy_rtx (src);
19142 		      XEXP (src, 0) = op1;
19143 		      XEXP (src, 1) = op0;
19144 		      pat = gen_rtx_SET (dst, src);
19145 		      vec = gen_rtvec (2, pat, clobber);
19146 		    }
19147 		  else /* action == CONV */
19148 		    vec = gen_rtvec (2, pat, clobber);
19149 
19150 		  PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19151 		  INSN_CODE (insn) = -1;
19152 		}
19153 	    }
19154 
19155 	  if (NONDEBUG_INSN_P (insn))
19156 	    df_simulate_one_insn_backwards (bb, insn, &live);
19157 	}
19158     }
19159 
19160   CLEAR_REG_SET (&live);
19161 }
19162 
19163 /* Gcc puts the pool in the wrong place for ARM, since we can only
19164    load addresses a limited distance around the pc.  We do some
19165    special munging to move the constant pool values to the correct
19166    point in the code.  */
19167 static void
arm_reorg(void)19168 arm_reorg (void)
19169 {
19170   rtx_insn *insn;
19171   HOST_WIDE_INT address = 0;
19172   Mfix * fix;
19173 
19174   if (use_cmse)
19175     cmse_nonsecure_call_inline_register_clear ();
19176 
19177   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
19178   if (cfun->is_thunk)
19179     ;
19180   else if (TARGET_THUMB1)
19181     thumb1_reorg ();
19182   else if (TARGET_THUMB2)
19183     thumb2_reorg ();
19184 
19185   /* Ensure all insns that must be split have been split at this point.
19186      Otherwise, the pool placement code below may compute incorrect
19187      insn lengths.  Note that when optimizing, all insns have already
19188      been split at this point.  */
19189   if (!optimize)
19190     split_all_insns_noflow ();
19191 
19192   /* Make sure we do not attempt to create a literal pool even though it should
19193      no longer be necessary to create any.  */
19194   if (arm_disable_literal_pool)
19195     return ;
19196 
19197   minipool_fix_head = minipool_fix_tail = NULL;
19198 
19199   /* The first insn must always be a note, or the code below won't
19200      scan it properly.  */
19201   insn = get_insns ();
19202   gcc_assert (NOTE_P (insn));
19203   minipool_pad = 0;
19204 
19205   /* Scan all the insns and record the operands that will need fixing.  */
19206   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19207     {
19208       if (BARRIER_P (insn))
19209 	push_minipool_barrier (insn, address);
19210       else if (INSN_P (insn))
19211 	{
19212 	  rtx_jump_table_data *table;
19213 
19214 	  note_invalid_constants (insn, address, true);
19215 	  address += get_attr_length (insn);
19216 
19217 	  /* If the insn is a vector jump, add the size of the table
19218 	     and skip the table.  */
19219 	  if (tablejump_p (insn, NULL, &table))
19220 	    {
19221 	      address += get_jump_table_size (table);
19222 	      insn = table;
19223 	    }
19224 	}
19225       else if (LABEL_P (insn))
19226 	/* Add the worst-case padding due to alignment.  We don't add
19227 	   the _current_ padding because the minipool insertions
19228 	   themselves might change it.  */
19229 	address += get_label_padding (insn);
19230     }
19231 
19232   fix = minipool_fix_head;
19233 
19234   /* Now scan the fixups and perform the required changes.  */
19235   while (fix)
19236     {
19237       Mfix * ftmp;
19238       Mfix * fdel;
19239       Mfix *  last_added_fix;
19240       Mfix * last_barrier = NULL;
19241       Mfix * this_fix;
19242 
19243       /* Skip any further barriers before the next fix.  */
19244       while (fix && BARRIER_P (fix->insn))
19245 	fix = fix->next;
19246 
19247       /* No more fixes.  */
19248       if (fix == NULL)
19249 	break;
19250 
19251       last_added_fix = NULL;
19252 
19253       for (ftmp = fix; ftmp; ftmp = ftmp->next)
19254 	{
19255 	  if (BARRIER_P (ftmp->insn))
19256 	    {
19257 	      if (ftmp->address >= minipool_vector_head->max_address)
19258 		break;
19259 
19260 	      last_barrier = ftmp;
19261 	    }
19262 	  else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19263 	    break;
19264 
19265 	  last_added_fix = ftmp;  /* Keep track of the last fix added.  */
19266 	}
19267 
19268       /* If we found a barrier, drop back to that; any fixes that we
19269 	 could have reached but come after the barrier will now go in
19270 	 the next mini-pool.  */
19271       if (last_barrier != NULL)
19272 	{
19273 	  /* Reduce the refcount for those fixes that won't go into this
19274 	     pool after all.  */
19275 	  for (fdel = last_barrier->next;
19276 	       fdel && fdel != ftmp;
19277 	       fdel = fdel->next)
19278 	    {
19279 	      fdel->minipool->refcount--;
19280 	      fdel->minipool = NULL;
19281 	    }
19282 
19283 	  ftmp = last_barrier;
19284 	}
19285       else
19286         {
19287 	  /* ftmp is first fix that we can't fit into this pool and
19288 	     there no natural barriers that we could use.  Insert a
19289 	     new barrier in the code somewhere between the previous
19290 	     fix and this one, and arrange to jump around it.  */
19291 	  HOST_WIDE_INT max_address;
19292 
19293 	  /* The last item on the list of fixes must be a barrier, so
19294 	     we can never run off the end of the list of fixes without
19295 	     last_barrier being set.  */
19296 	  gcc_assert (ftmp);
19297 
19298 	  max_address = minipool_vector_head->max_address;
19299 	  /* Check that there isn't another fix that is in range that
19300 	     we couldn't fit into this pool because the pool was
19301 	     already too large: we need to put the pool before such an
19302 	     instruction.  The pool itself may come just after the
19303 	     fix because create_fix_barrier also allows space for a
19304 	     jump instruction.  */
19305 	  if (ftmp->address < max_address)
19306 	    max_address = ftmp->address + 1;
19307 
19308 	  last_barrier = create_fix_barrier (last_added_fix, max_address);
19309 	}
19310 
19311       assign_minipool_offsets (last_barrier);
19312 
19313       while (ftmp)
19314 	{
19315 	  if (!BARRIER_P (ftmp->insn)
19316 	      && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19317 		  == NULL))
19318 	    break;
19319 
19320 	  ftmp = ftmp->next;
19321 	}
19322 
19323       /* Scan over the fixes we have identified for this pool, fixing them
19324 	 up and adding the constants to the pool itself.  */
19325       for (this_fix = fix; this_fix && ftmp != this_fix;
19326 	   this_fix = this_fix->next)
19327 	if (!BARRIER_P (this_fix->insn))
19328 	  {
19329 	    rtx addr
19330 	      = plus_constant (Pmode,
19331 			       gen_rtx_LABEL_REF (VOIDmode,
19332 						  minipool_vector_label),
19333 			       this_fix->minipool->offset);
19334 	    *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19335 	  }
19336 
19337       dump_minipool (last_barrier->insn);
19338       fix = ftmp;
19339     }
19340 
19341   /* From now on we must synthesize any constants that we can't handle
19342      directly.  This can happen if the RTL gets split during final
19343      instruction generation.  */
19344   cfun->machine->after_arm_reorg = 1;
19345 
19346   /* Free the minipool memory.  */
19347   obstack_free (&minipool_obstack, minipool_startobj);
19348 }
19349 
19350 /* Routines to output assembly language.  */
19351 
19352 /* Return string representation of passed in real value.  */
19353 static const char *
fp_const_from_val(REAL_VALUE_TYPE * r)19354 fp_const_from_val (REAL_VALUE_TYPE *r)
19355 {
19356   if (!fp_consts_inited)
19357     init_fp_table ();
19358 
19359   gcc_assert (real_equal (r, &value_fp0));
19360   return "0";
19361 }
19362 
19363 /* OPERANDS[0] is the entire list of insns that constitute pop,
19364    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19365    is in the list, UPDATE is true iff the list contains explicit
19366    update of base register.  */
19367 void
arm_output_multireg_pop(rtx * operands,bool return_pc,rtx cond,bool reverse,bool update)19368 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19369                          bool update)
19370 {
19371   int i;
19372   char pattern[100];
19373   int offset;
19374   const char *conditional;
19375   int num_saves = XVECLEN (operands[0], 0);
19376   unsigned int regno;
19377   unsigned int regno_base = REGNO (operands[1]);
19378   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19379 
19380   offset = 0;
19381   offset += update ? 1 : 0;
19382   offset += return_pc ? 1 : 0;
19383 
19384   /* Is the base register in the list?  */
19385   for (i = offset; i < num_saves; i++)
19386     {
19387       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19388       /* If SP is in the list, then the base register must be SP.  */
19389       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19390       /* If base register is in the list, there must be no explicit update.  */
19391       if (regno == regno_base)
19392         gcc_assert (!update);
19393     }
19394 
19395   conditional = reverse ? "%?%D0" : "%?%d0";
19396   /* Can't use POP if returning from an interrupt.  */
19397   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19398     sprintf (pattern, "pop%s\t{", conditional);
19399   else
19400     {
19401       /* Output ldmfd when the base register is SP, otherwise output ldmia.
19402          It's just a convention, their semantics are identical.  */
19403       if (regno_base == SP_REGNUM)
19404 	sprintf (pattern, "ldmfd%s\t", conditional);
19405       else if (update)
19406 	sprintf (pattern, "ldmia%s\t", conditional);
19407       else
19408 	sprintf (pattern, "ldm%s\t", conditional);
19409 
19410       strcat (pattern, reg_names[regno_base]);
19411       if (update)
19412         strcat (pattern, "!, {");
19413       else
19414         strcat (pattern, ", {");
19415     }
19416 
19417   /* Output the first destination register.  */
19418   strcat (pattern,
19419           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19420 
19421   /* Output the rest of the destination registers.  */
19422   for (i = offset + 1; i < num_saves; i++)
19423     {
19424       strcat (pattern, ", ");
19425       strcat (pattern,
19426               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19427     }
19428 
19429   strcat (pattern, "}");
19430 
19431   if (interrupt_p && return_pc)
19432     strcat (pattern, "^");
19433 
19434   output_asm_insn (pattern, &cond);
19435 }
19436 
19437 
19438 /* Output the assembly for a store multiple.  */
19439 
19440 const char *
vfp_output_vstmd(rtx * operands)19441 vfp_output_vstmd (rtx * operands)
19442 {
19443   char pattern[100];
19444   int p;
19445   int base;
19446   int i;
19447   rtx addr_reg = REG_P (XEXP (operands[0], 0))
19448 		   ? XEXP (operands[0], 0)
19449 		   : XEXP (XEXP (operands[0], 0), 0);
19450   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
19451 
19452   if (push_p)
19453     strcpy (pattern, "vpush%?.64\t{%P1");
19454   else
19455     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19456 
19457   p = strlen (pattern);
19458 
19459   gcc_assert (REG_P (operands[1]));
19460 
19461   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19462   for (i = 1; i < XVECLEN (operands[2], 0); i++)
19463     {
19464       p += sprintf (&pattern[p], ", d%d", base + i);
19465     }
19466   strcpy (&pattern[p], "}");
19467 
19468   output_asm_insn (pattern, operands);
19469   return "";
19470 }
19471 
19472 
19473 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
19474    number of bytes pushed.  */
19475 
19476 static int
vfp_emit_fstmd(int base_reg,int count)19477 vfp_emit_fstmd (int base_reg, int count)
19478 {
19479   rtx par;
19480   rtx dwarf;
19481   rtx tmp, reg;
19482   int i;
19483 
19484   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
19485      register pairs are stored by a store multiple insn.  We avoid this
19486      by pushing an extra pair.  */
19487   if (count == 2 && !arm_arch6)
19488     {
19489       if (base_reg == LAST_VFP_REGNUM - 3)
19490 	base_reg -= 2;
19491       count++;
19492     }
19493 
19494   /* FSTMD may not store more than 16 doubleword registers at once.  Split
19495      larger stores into multiple parts (up to a maximum of two, in
19496      practice).  */
19497   if (count > 16)
19498     {
19499       int saved;
19500       /* NOTE: base_reg is an internal register number, so each D register
19501          counts as 2.  */
19502       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19503       saved += vfp_emit_fstmd (base_reg, 16);
19504       return saved;
19505     }
19506 
19507   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19508   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19509 
19510   reg = gen_rtx_REG (DFmode, base_reg);
19511   base_reg += 2;
19512 
19513   XVECEXP (par, 0, 0)
19514     = gen_rtx_SET (gen_frame_mem
19515 		   (BLKmode,
19516 		    gen_rtx_PRE_MODIFY (Pmode,
19517 					stack_pointer_rtx,
19518 					plus_constant
19519 					(Pmode, stack_pointer_rtx,
19520 					 - (count * 8)))
19521 		    ),
19522 		   gen_rtx_UNSPEC (BLKmode,
19523 				   gen_rtvec (1, reg),
19524 				   UNSPEC_PUSH_MULT));
19525 
19526   tmp = gen_rtx_SET (stack_pointer_rtx,
19527 		     plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19528   RTX_FRAME_RELATED_P (tmp) = 1;
19529   XVECEXP (dwarf, 0, 0) = tmp;
19530 
19531   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19532   RTX_FRAME_RELATED_P (tmp) = 1;
19533   XVECEXP (dwarf, 0, 1) = tmp;
19534 
19535   for (i = 1; i < count; i++)
19536     {
19537       reg = gen_rtx_REG (DFmode, base_reg);
19538       base_reg += 2;
19539       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19540 
19541       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19542 					plus_constant (Pmode,
19543 						       stack_pointer_rtx,
19544 						       i * 8)),
19545 			 reg);
19546       RTX_FRAME_RELATED_P (tmp) = 1;
19547       XVECEXP (dwarf, 0, i + 1) = tmp;
19548     }
19549 
19550   par = emit_insn (par);
19551   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19552   RTX_FRAME_RELATED_P (par) = 1;
19553 
19554   return count * 8;
19555 }
19556 
19557 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19558    has the cmse_nonsecure_call attribute and returns false otherwise.  */
19559 
19560 bool
detect_cmse_nonsecure_call(tree addr)19561 detect_cmse_nonsecure_call (tree addr)
19562 {
19563   if (!addr)
19564     return FALSE;
19565 
19566   tree fntype = TREE_TYPE (addr);
19567   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19568 				    TYPE_ATTRIBUTES (fntype)))
19569     return TRUE;
19570   return FALSE;
19571 }
19572 
19573 
19574 /* Emit a call instruction with pattern PAT.  ADDR is the address of
19575    the call target.  */
19576 
19577 void
arm_emit_call_insn(rtx pat,rtx addr,bool sibcall)19578 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19579 {
19580   rtx insn;
19581 
19582   insn = emit_call_insn (pat);
19583 
19584   /* The PIC register is live on entry to VxWorks PIC PLT entries.
19585      If the call might use such an entry, add a use of the PIC register
19586      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
19587   if (TARGET_VXWORKS_RTP
19588       && flag_pic
19589       && !sibcall
19590       && GET_CODE (addr) == SYMBOL_REF
19591       && (SYMBOL_REF_DECL (addr)
19592 	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19593 	  : !SYMBOL_REF_LOCAL_P (addr)))
19594     {
19595       require_pic_register (NULL_RTX, false /*compute_now*/);
19596       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19597     }
19598 
19599   if (TARGET_FDPIC)
19600     {
19601       rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19602       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19603     }
19604 
19605   if (TARGET_AAPCS_BASED)
19606     {
19607       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19608 	 linker.  We need to add an IP clobber to allow setting
19609 	 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
19610 	 is not needed since it's a fixed register.  */
19611       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19612       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19613     }
19614 }
19615 
19616 /* Output a 'call' insn.  */
19617 const char *
output_call(rtx * operands)19618 output_call (rtx *operands)
19619 {
19620   gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
19621 
19622   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
19623   if (REGNO (operands[0]) == LR_REGNUM)
19624     {
19625       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19626       output_asm_insn ("mov%?\t%0, %|lr", operands);
19627     }
19628 
19629   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19630 
19631   if (TARGET_INTERWORK || arm_arch4t)
19632     output_asm_insn ("bx%?\t%0", operands);
19633   else
19634     output_asm_insn ("mov%?\t%|pc, %0", operands);
19635 
19636   return "";
19637 }
19638 
19639 /* Output a move from arm registers to arm registers of a long double
19640    OPERANDS[0] is the destination.
19641    OPERANDS[1] is the source.  */
19642 const char *
output_mov_long_double_arm_from_arm(rtx * operands)19643 output_mov_long_double_arm_from_arm (rtx *operands)
19644 {
19645   /* We have to be careful here because the two might overlap.  */
19646   int dest_start = REGNO (operands[0]);
19647   int src_start = REGNO (operands[1]);
19648   rtx ops[2];
19649   int i;
19650 
19651   if (dest_start < src_start)
19652     {
19653       for (i = 0; i < 3; i++)
19654 	{
19655 	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
19656 	  ops[1] = gen_rtx_REG (SImode, src_start + i);
19657 	  output_asm_insn ("mov%?\t%0, %1", ops);
19658 	}
19659     }
19660   else
19661     {
19662       for (i = 2; i >= 0; i--)
19663 	{
19664 	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
19665 	  ops[1] = gen_rtx_REG (SImode, src_start + i);
19666 	  output_asm_insn ("mov%?\t%0, %1", ops);
19667 	}
19668     }
19669 
19670   return "";
19671 }
19672 
19673 void
arm_emit_movpair(rtx dest,rtx src)19674 arm_emit_movpair (rtx dest, rtx src)
19675  {
19676   /* If the src is an immediate, simplify it.  */
19677   if (CONST_INT_P (src))
19678     {
19679       HOST_WIDE_INT val = INTVAL (src);
19680       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19681       if ((val >> 16) & 0x0000ffff)
19682 	{
19683 	  emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
19684 					       GEN_INT (16)),
19685 			 GEN_INT ((val >> 16) & 0x0000ffff));
19686 	  rtx_insn *insn = get_last_insn ();
19687 	  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19688 	}
19689       return;
19690     }
19691    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
19692    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
19693    rtx_insn *insn = get_last_insn ();
19694    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19695  }
19696 
19697 /* Output a move between double words.  It must be REG<-MEM
19698    or MEM<-REG.  */
19699 const char *
output_move_double(rtx * operands,bool emit,int * count)19700 output_move_double (rtx *operands, bool emit, int *count)
19701 {
19702   enum rtx_code code0 = GET_CODE (operands[0]);
19703   enum rtx_code code1 = GET_CODE (operands[1]);
19704   rtx otherops[3];
19705   if (count)
19706     *count = 1;
19707 
19708   /* The only case when this might happen is when
19709      you are looking at the length of a DImode instruction
19710      that has an invalid constant in it.  */
19711   if (code0 == REG && code1 != MEM)
19712     {
19713       gcc_assert (!emit);
19714       *count = 2;
19715       return "";
19716     }
19717 
19718   if (code0 == REG)
19719     {
19720       unsigned int reg0 = REGNO (operands[0]);
19721       const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
19722 
19723       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
19724 
19725       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
19726 
19727       switch (GET_CODE (XEXP (operands[1], 0)))
19728 	{
19729 	case REG:
19730 
19731 	  if (emit)
19732 	    {
19733 	      if (can_ldrd
19734 		  && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
19735 		output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
19736 	      else
19737 		output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19738 	    }
19739 	  break;
19740 
19741 	case PRE_INC:
19742 	  gcc_assert (can_ldrd);
19743 	  if (emit)
19744 	    output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
19745 	  break;
19746 
19747 	case PRE_DEC:
19748 	  if (emit)
19749 	    {
19750 	      if (can_ldrd)
19751 		output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
19752 	      else
19753 		output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
19754 	    }
19755 	  break;
19756 
19757 	case POST_INC:
19758 	  if (emit)
19759 	    {
19760 	      if (can_ldrd)
19761 		output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
19762 	      else
19763 		output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
19764 	    }
19765 	  break;
19766 
19767 	case POST_DEC:
19768 	  gcc_assert (can_ldrd);
19769 	  if (emit)
19770 	    output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
19771 	  break;
19772 
19773 	case PRE_MODIFY:
19774 	case POST_MODIFY:
19775 	  /* Autoicrement addressing modes should never have overlapping
19776 	     base and destination registers, and overlapping index registers
19777 	     are already prohibited, so this doesn't need to worry about
19778 	     fix_cm3_ldrd.  */
19779 	  otherops[0] = operands[0];
19780 	  otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
19781 	  otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
19782 
19783 	  if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
19784 	    {
19785 	      if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
19786 		{
19787 		  /* Registers overlap so split out the increment.  */
19788 		  if (emit)
19789 		    {
19790 		      gcc_assert (can_ldrd);
19791 		      output_asm_insn ("add%?\t%1, %1, %2", otherops);
19792 		      output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
19793 		    }
19794 		  if (count)
19795 		    *count = 2;
19796 		}
19797 	      else
19798 		{
19799 		  /* Use a single insn if we can.
19800 		     FIXME: IWMMXT allows offsets larger than ldrd can
19801 		     handle, fix these up with a pair of ldr.  */
19802 		  if (can_ldrd
19803 		      && (TARGET_THUMB2
19804 		      || !CONST_INT_P (otherops[2])
19805 		      || (INTVAL (otherops[2]) > -256
19806 			  && INTVAL (otherops[2]) < 256)))
19807 		    {
19808 		      if (emit)
19809 			output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
19810 		    }
19811 		  else
19812 		    {
19813 		      if (emit)
19814 			{
19815 			  output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
19816 			  output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19817 			}
19818 		      if (count)
19819 			*count = 2;
19820 
19821 		    }
19822 		}
19823 	    }
19824 	  else
19825 	    {
19826 	      /* Use a single insn if we can.
19827 		 FIXME: IWMMXT allows offsets larger than ldrd can handle,
19828 		 fix these up with a pair of ldr.  */
19829 	      if (can_ldrd
19830 		  && (TARGET_THUMB2
19831 		  || !CONST_INT_P (otherops[2])
19832 		  || (INTVAL (otherops[2]) > -256
19833 		      && INTVAL (otherops[2]) < 256)))
19834 		{
19835 		  if (emit)
19836 		    output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
19837 		}
19838 	      else
19839 		{
19840 		  if (emit)
19841 		    {
19842 		      output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19843 		      output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
19844 		    }
19845 		  if (count)
19846 		    *count = 2;
19847 		}
19848 	    }
19849 	  break;
19850 
19851 	case LABEL_REF:
19852 	case CONST:
19853 	  /* We might be able to use ldrd %0, %1 here.  However the range is
19854 	     different to ldr/adr, and it is broken on some ARMv7-M
19855 	     implementations.  */
19856 	  /* Use the second register of the pair to avoid problematic
19857 	     overlap.  */
19858 	  otherops[1] = operands[1];
19859 	  if (emit)
19860 	    output_asm_insn ("adr%?\t%0, %1", otherops);
19861 	  operands[1] = otherops[0];
19862 	  if (emit)
19863 	    {
19864 	      if (can_ldrd)
19865 		output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19866 	      else
19867 		output_asm_insn ("ldmia%?\t%1, %M0", operands);
19868 	    }
19869 
19870 	  if (count)
19871 	    *count = 2;
19872 	  break;
19873 
19874 	  /* ??? This needs checking for thumb2.  */
19875 	default:
19876 	  if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
19877 			       GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
19878 	    {
19879 	      otherops[0] = operands[0];
19880 	      otherops[1] = XEXP (XEXP (operands[1], 0), 0);
19881 	      otherops[2] = XEXP (XEXP (operands[1], 0), 1);
19882 
19883 	      if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
19884 		{
19885 		  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
19886 		    {
19887 		      switch ((int) INTVAL (otherops[2]))
19888 			{
19889 			case -8:
19890 			  if (emit)
19891 			    output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
19892 			  return "";
19893 			case -4:
19894 			  if (TARGET_THUMB2)
19895 			    break;
19896 			  if (emit)
19897 			    output_asm_insn ("ldmda%?\t%1, %M0", otherops);
19898 			  return "";
19899 			case 4:
19900 			  if (TARGET_THUMB2)
19901 			    break;
19902 			  if (emit)
19903 			    output_asm_insn ("ldmib%?\t%1, %M0", otherops);
19904 			  return "";
19905 			}
19906 		    }
19907 		  otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
19908 		  operands[1] = otherops[0];
19909 		  if (can_ldrd
19910 		      && (REG_P (otherops[2])
19911 			  || TARGET_THUMB2
19912 			  || (CONST_INT_P (otherops[2])
19913 			      && INTVAL (otherops[2]) > -256
19914 			      && INTVAL (otherops[2]) < 256)))
19915 		    {
19916 		      if (reg_overlap_mentioned_p (operands[0],
19917 						   otherops[2]))
19918 			{
19919 			  /* Swap base and index registers over to
19920 			     avoid a conflict.  */
19921 			  std::swap (otherops[1], otherops[2]);
19922 			}
19923 		      /* If both registers conflict, it will usually
19924 			 have been fixed by a splitter.  */
19925 		      if (reg_overlap_mentioned_p (operands[0], otherops[2])
19926 			  || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
19927 			{
19928 			  if (emit)
19929 			    {
19930 			      output_asm_insn ("add%?\t%0, %1, %2", otherops);
19931 			      output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19932 			    }
19933 			  if (count)
19934 			    *count = 2;
19935 			}
19936 		      else
19937 			{
19938 			  otherops[0] = operands[0];
19939 			  if (emit)
19940 			    output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
19941 			}
19942 		      return "";
19943 		    }
19944 
19945 		  if (CONST_INT_P (otherops[2]))
19946 		    {
19947 		      if (emit)
19948 			{
19949 			  if (!(const_ok_for_arm (INTVAL (otherops[2]))))
19950 			    output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
19951 			  else
19952 			    output_asm_insn ("add%?\t%0, %1, %2", otherops);
19953 			}
19954 		    }
19955 		  else
19956 		    {
19957 		      if (emit)
19958 			output_asm_insn ("add%?\t%0, %1, %2", otherops);
19959 		    }
19960 		}
19961 	      else
19962 		{
19963 		  if (emit)
19964 		    output_asm_insn ("sub%?\t%0, %1, %2", otherops);
19965 		}
19966 
19967 	      if (count)
19968 		*count = 2;
19969 
19970 	      if (can_ldrd)
19971 		return "ldrd%?\t%0, [%1]";
19972 
19973 	      return "ldmia%?\t%1, %M0";
19974 	    }
19975 	  else
19976 	    {
19977 	      otherops[1] = adjust_address (operands[1], SImode, 4);
19978 	      /* Take care of overlapping base/data reg.  */
19979 	      if (reg_mentioned_p (operands[0], operands[1]))
19980 		{
19981 		  if (emit)
19982 		    {
19983 		      output_asm_insn ("ldr%?\t%0, %1", otherops);
19984 		      output_asm_insn ("ldr%?\t%0, %1", operands);
19985 		    }
19986 		  if (count)
19987 		    *count = 2;
19988 
19989 		}
19990 	      else
19991 		{
19992 		  if (emit)
19993 		    {
19994 		      output_asm_insn ("ldr%?\t%0, %1", operands);
19995 		      output_asm_insn ("ldr%?\t%0, %1", otherops);
19996 		    }
19997 		  if (count)
19998 		    *count = 2;
19999 		}
20000 	    }
20001 	}
20002     }
20003   else
20004     {
20005       /* Constraints should ensure this.  */
20006       gcc_assert (code0 == MEM && code1 == REG);
20007       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20008                   || (TARGET_ARM && TARGET_LDRD));
20009 
20010       /* For TARGET_ARM the first source register of an STRD
20011 	 must be even.  This is usually the case for double-word
20012 	 values but user assembly constraints can force an odd
20013 	 starting register.  */
20014       bool allow_strd = TARGET_LDRD
20015 			 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20016       switch (GET_CODE (XEXP (operands[0], 0)))
20017         {
20018 	case REG:
20019 	  if (emit)
20020 	    {
20021 	      if (allow_strd)
20022 		output_asm_insn ("strd%?\t%1, [%m0]", operands);
20023 	      else
20024 		output_asm_insn ("stm%?\t%m0, %M1", operands);
20025 	    }
20026 	  break;
20027 
20028         case PRE_INC:
20029 	  gcc_assert (allow_strd);
20030 	  if (emit)
20031 	    output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20032 	  break;
20033 
20034         case PRE_DEC:
20035 	  if (emit)
20036 	    {
20037 	      if (allow_strd)
20038 		output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20039 	      else
20040 		output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20041 	    }
20042 	  break;
20043 
20044         case POST_INC:
20045 	  if (emit)
20046 	    {
20047 	      if (allow_strd)
20048 		output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20049 	      else
20050 		output_asm_insn ("stm%?\t%m0!, %M1", operands);
20051 	    }
20052 	  break;
20053 
20054         case POST_DEC:
20055 	  gcc_assert (allow_strd);
20056 	  if (emit)
20057 	    output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20058 	  break;
20059 
20060 	case PRE_MODIFY:
20061 	case POST_MODIFY:
20062 	  otherops[0] = operands[1];
20063 	  otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20064 	  otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20065 
20066 	  /* IWMMXT allows offsets larger than strd can handle,
20067 	     fix these up with a pair of str.  */
20068 	  if (!TARGET_THUMB2
20069 	      && CONST_INT_P (otherops[2])
20070 	      && (INTVAL(otherops[2]) <= -256
20071 		  || INTVAL(otherops[2]) >= 256))
20072 	    {
20073 	      if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20074 		{
20075 		  if (emit)
20076 		    {
20077 		      output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20078 		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20079 		    }
20080 		  if (count)
20081 		    *count = 2;
20082 		}
20083 	      else
20084 		{
20085 		  if (emit)
20086 		    {
20087 		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20088 		      output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20089 		    }
20090 		  if (count)
20091 		    *count = 2;
20092 		}
20093 	    }
20094 	  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20095 	    {
20096 	      if (emit)
20097 		output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20098 	    }
20099 	  else
20100 	    {
20101 	      if (emit)
20102 		output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20103 	    }
20104 	  break;
20105 
20106 	case PLUS:
20107 	  otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20108 	  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20109 	    {
20110 	      switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20111 		{
20112 		case -8:
20113 		  if (emit)
20114 		    output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20115 		  return "";
20116 
20117 		case -4:
20118 		  if (TARGET_THUMB2)
20119 		    break;
20120 		  if (emit)
20121 		    output_asm_insn ("stmda%?\t%m0, %M1", operands);
20122 		  return "";
20123 
20124 		case 4:
20125 		  if (TARGET_THUMB2)
20126 		    break;
20127 		  if (emit)
20128 		    output_asm_insn ("stmib%?\t%m0, %M1", operands);
20129 		  return "";
20130 		}
20131 	    }
20132 	  if (allow_strd
20133 	      && (REG_P (otherops[2])
20134 		  || TARGET_THUMB2
20135 		  || (CONST_INT_P (otherops[2])
20136 		      && INTVAL (otherops[2]) > -256
20137 		      && INTVAL (otherops[2]) < 256)))
20138 	    {
20139 	      otherops[0] = operands[1];
20140 	      otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20141 	      if (emit)
20142 		output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20143 	      return "";
20144 	    }
20145 	  /* Fall through */
20146 
20147         default:
20148 	  otherops[0] = adjust_address (operands[0], SImode, 4);
20149 	  otherops[1] = operands[1];
20150 	  if (emit)
20151 	    {
20152 	      output_asm_insn ("str%?\t%1, %0", operands);
20153 	      output_asm_insn ("str%?\t%H1, %0", otherops);
20154 	    }
20155 	  if (count)
20156 	    *count = 2;
20157 	}
20158     }
20159 
20160   return "";
20161 }
20162 
20163 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
20164    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
20165 
20166 const char *
output_move_quad(rtx * operands)20167 output_move_quad (rtx *operands)
20168 {
20169   if (REG_P (operands[0]))
20170     {
20171       /* Load, or reg->reg move.  */
20172 
20173       if (MEM_P (operands[1]))
20174         {
20175           switch (GET_CODE (XEXP (operands[1], 0)))
20176             {
20177             case REG:
20178               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20179               break;
20180 
20181             case LABEL_REF:
20182             case CONST:
20183               output_asm_insn ("adr%?\t%0, %1", operands);
20184               output_asm_insn ("ldmia%?\t%0, %M0", operands);
20185               break;
20186 
20187             default:
20188               gcc_unreachable ();
20189             }
20190         }
20191       else
20192         {
20193           rtx ops[2];
20194           int dest, src, i;
20195 
20196           gcc_assert (REG_P (operands[1]));
20197 
20198           dest = REGNO (operands[0]);
20199           src = REGNO (operands[1]);
20200 
20201           /* This seems pretty dumb, but hopefully GCC won't try to do it
20202              very often.  */
20203           if (dest < src)
20204             for (i = 0; i < 4; i++)
20205               {
20206                 ops[0] = gen_rtx_REG (SImode, dest + i);
20207                 ops[1] = gen_rtx_REG (SImode, src + i);
20208                 output_asm_insn ("mov%?\t%0, %1", ops);
20209               }
20210           else
20211             for (i = 3; i >= 0; i--)
20212               {
20213                 ops[0] = gen_rtx_REG (SImode, dest + i);
20214                 ops[1] = gen_rtx_REG (SImode, src + i);
20215                 output_asm_insn ("mov%?\t%0, %1", ops);
20216               }
20217         }
20218     }
20219   else
20220     {
20221       gcc_assert (MEM_P (operands[0]));
20222       gcc_assert (REG_P (operands[1]));
20223       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20224 
20225       switch (GET_CODE (XEXP (operands[0], 0)))
20226         {
20227         case REG:
20228           output_asm_insn ("stm%?\t%m0, %M1", operands);
20229           break;
20230 
20231         default:
20232           gcc_unreachable ();
20233         }
20234     }
20235 
20236   return "";
20237 }
20238 
20239 /* Output a VFP load or store instruction.  */
20240 
20241 const char *
output_move_vfp(rtx * operands)20242 output_move_vfp (rtx *operands)
20243 {
20244   rtx reg, mem, addr, ops[2];
20245   int load = REG_P (operands[0]);
20246   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20247   int sp = (!TARGET_VFP_FP16INST
20248 	    || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20249   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20250   const char *templ;
20251   char buff[50];
20252   machine_mode mode;
20253 
20254   reg = operands[!load];
20255   mem = operands[load];
20256 
20257   mode = GET_MODE (reg);
20258 
20259   gcc_assert (REG_P (reg));
20260   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20261   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20262 	      || mode == SFmode
20263 	      || mode == DFmode
20264 	      || mode == HImode
20265 	      || mode == SImode
20266 	      || mode == DImode
20267               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20268   gcc_assert (MEM_P (mem));
20269 
20270   addr = XEXP (mem, 0);
20271 
20272   switch (GET_CODE (addr))
20273     {
20274     case PRE_DEC:
20275       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20276       ops[0] = XEXP (addr, 0);
20277       ops[1] = reg;
20278       break;
20279 
20280     case POST_INC:
20281       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20282       ops[0] = XEXP (addr, 0);
20283       ops[1] = reg;
20284       break;
20285 
20286     default:
20287       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20288       ops[0] = reg;
20289       ops[1] = mem;
20290       break;
20291     }
20292 
20293   sprintf (buff, templ,
20294 	   load ? "ld" : "st",
20295 	   dp ? "64" : sp ? "32" : "16",
20296 	   dp ? "P" : "",
20297 	   integer_p ? "\t%@ int" : "");
20298   output_asm_insn (buff, ops);
20299 
20300   return "";
20301 }
20302 
20303 /* Output a Neon double-word or quad-word load or store, or a load
20304    or store for larger structure modes.
20305 
20306    WARNING: The ordering of elements is weird in big-endian mode,
20307    because the EABI requires that vectors stored in memory appear
20308    as though they were stored by a VSTM, as required by the EABI.
20309    GCC RTL defines element ordering based on in-memory order.
20310    This can be different from the architectural ordering of elements
20311    within a NEON register. The intrinsics defined in arm_neon.h use the
20312    NEON register element ordering, not the GCC RTL element ordering.
20313 
20314    For example, the in-memory ordering of a big-endian a quadword
20315    vector with 16-bit elements when stored from register pair {d0,d1}
20316    will be (lowest address first, d0[N] is NEON register element N):
20317 
20318      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20319 
20320    When necessary, quadword registers (dN, dN+1) are moved to ARM
20321    registers from rN in the order:
20322 
20323      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20324 
20325    So that STM/LDM can be used on vectors in ARM registers, and the
20326    same memory layout will result as if VSTM/VLDM were used.
20327 
20328    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20329    possible, which allows use of appropriate alignment tags.
20330    Note that the choice of "64" is independent of the actual vector
20331    element size; this size simply ensures that the behavior is
20332    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20333 
20334    Due to limitations of those instructions, use of VST1.64/VLD1.64
20335    is not possible if:
20336     - the address contains PRE_DEC, or
20337     - the mode refers to more than 4 double-word registers
20338 
20339    In those cases, it would be possible to replace VSTM/VLDM by a
20340    sequence of instructions; this is not currently implemented since
20341    this is not certain to actually improve performance.  */
20342 
20343 const char *
output_move_neon(rtx * operands)20344 output_move_neon (rtx *operands)
20345 {
20346   rtx reg, mem, addr, ops[2];
20347   int regno, nregs, load = REG_P (operands[0]);
20348   const char *templ;
20349   char buff[50];
20350   machine_mode mode;
20351 
20352   reg = operands[!load];
20353   mem = operands[load];
20354 
20355   mode = GET_MODE (reg);
20356 
20357   gcc_assert (REG_P (reg));
20358   regno = REGNO (reg);
20359   nregs = REG_NREGS (reg) / 2;
20360   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20361 	      || NEON_REGNO_OK_FOR_QUAD (regno));
20362   gcc_assert (VALID_NEON_DREG_MODE (mode)
20363 	      || VALID_NEON_QREG_MODE (mode)
20364 	      || VALID_NEON_STRUCT_MODE (mode));
20365   gcc_assert (MEM_P (mem));
20366 
20367   addr = XEXP (mem, 0);
20368 
20369   /* Strip off const from addresses like (const (plus (...))).  */
20370   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20371     addr = XEXP (addr, 0);
20372 
20373   switch (GET_CODE (addr))
20374     {
20375     case POST_INC:
20376       /* We have to use vldm / vstm for too-large modes.  */
20377       if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20378 	{
20379 	  templ = "v%smia%%?\t%%0!, %%h1";
20380 	  ops[0] = XEXP (addr, 0);
20381 	}
20382       else
20383 	{
20384 	  templ = "v%s1.64\t%%h1, %%A0";
20385 	  ops[0] = mem;
20386 	}
20387       ops[1] = reg;
20388       break;
20389 
20390     case PRE_DEC:
20391       /* We have to use vldm / vstm in this case, since there is no
20392 	 pre-decrement form of the vld1 / vst1 instructions.  */
20393       templ = "v%smdb%%?\t%%0!, %%h1";
20394       ops[0] = XEXP (addr, 0);
20395       ops[1] = reg;
20396       break;
20397 
20398     case POST_MODIFY:
20399       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
20400       gcc_unreachable ();
20401 
20402     case REG:
20403       /* We have to use vldm / vstm for too-large modes.  */
20404       if (nregs > 1)
20405 	{
20406 	  if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20407 	    templ = "v%smia%%?\t%%m0, %%h1";
20408 	  else
20409 	    templ = "v%s1.64\t%%h1, %%A0";
20410 
20411 	  ops[0] = mem;
20412 	  ops[1] = reg;
20413 	  break;
20414 	}
20415       /* Fall through.  */
20416     case PLUS:
20417       if (GET_CODE (addr) == PLUS)
20418 	addr = XEXP (addr, 0);
20419       /* Fall through.  */
20420     case LABEL_REF:
20421       {
20422 	int i;
20423 	int overlap = -1;
20424 	for (i = 0; i < nregs; i++)
20425 	  {
20426 	    /* We're only using DImode here because it's a convenient
20427 	       size.  */
20428 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20429 	    ops[1] = adjust_address (mem, DImode, 8 * i);
20430 	    if (reg_overlap_mentioned_p (ops[0], mem))
20431 	      {
20432 		gcc_assert (overlap == -1);
20433 		overlap = i;
20434 	      }
20435 	    else
20436 	      {
20437 		if (TARGET_HAVE_MVE && GET_CODE (addr) == LABEL_REF)
20438 		  sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20439 		else
20440 		  sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20441 		output_asm_insn (buff, ops);
20442 	      }
20443 	  }
20444 	if (overlap != -1)
20445 	  {
20446 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20447 	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
20448 	    if (TARGET_HAVE_MVE && GET_CODE (addr) == LABEL_REF)
20449 	      sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20450 	    else
20451 	      sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20452 	    output_asm_insn (buff, ops);
20453 	  }
20454 
20455         return "";
20456       }
20457 
20458     default:
20459       gcc_unreachable ();
20460     }
20461 
20462   sprintf (buff, templ, load ? "ld" : "st");
20463   output_asm_insn (buff, ops);
20464 
20465   return "";
20466 }
20467 
20468 /* Compute and return the length of neon_mov<mode>, where <mode> is
20469    one of VSTRUCT modes: EI, OI, CI or XI.  */
20470 int
arm_attr_length_move_neon(rtx_insn * insn)20471 arm_attr_length_move_neon (rtx_insn *insn)
20472 {
20473   rtx reg, mem, addr;
20474   int load;
20475   machine_mode mode;
20476 
20477   extract_insn_cached (insn);
20478 
20479   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20480     {
20481       mode = GET_MODE (recog_data.operand[0]);
20482       switch (mode)
20483 	{
20484 	case E_EImode:
20485 	case E_OImode:
20486 	  return 8;
20487 	case E_CImode:
20488 	  return 12;
20489 	case E_XImode:
20490 	  return 16;
20491 	default:
20492 	  gcc_unreachable ();
20493 	}
20494     }
20495 
20496   load = REG_P (recog_data.operand[0]);
20497   reg = recog_data.operand[!load];
20498   mem = recog_data.operand[load];
20499 
20500   gcc_assert (MEM_P (mem));
20501 
20502   addr = XEXP (mem, 0);
20503 
20504   /* Strip off const from addresses like (const (plus (...))).  */
20505   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20506     addr = XEXP (addr, 0);
20507 
20508   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
20509     {
20510       int insns = REG_NREGS (reg) / 2;
20511       return insns * 4;
20512     }
20513   else
20514     return 4;
20515 }
20516 
20517 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
20518    return zero.  */
20519 
20520 int
arm_address_offset_is_imm(rtx_insn * insn)20521 arm_address_offset_is_imm (rtx_insn *insn)
20522 {
20523   rtx mem, addr;
20524 
20525   extract_insn_cached (insn);
20526 
20527   if (REG_P (recog_data.operand[0]))
20528     return 0;
20529 
20530   mem = recog_data.operand[0];
20531 
20532   gcc_assert (MEM_P (mem));
20533 
20534   addr = XEXP (mem, 0);
20535 
20536   if (REG_P (addr)
20537       || (GET_CODE (addr) == PLUS
20538 	  && REG_P (XEXP (addr, 0))
20539 	  && CONST_INT_P (XEXP (addr, 1))))
20540     return 1;
20541   else
20542     return 0;
20543 }
20544 
20545 /* Output an ADD r, s, #n where n may be too big for one instruction.
20546    If adding zero to one register, output nothing.  */
20547 const char *
output_add_immediate(rtx * operands)20548 output_add_immediate (rtx *operands)
20549 {
20550   HOST_WIDE_INT n = INTVAL (operands[2]);
20551 
20552   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20553     {
20554       if (n < 0)
20555 	output_multi_immediate (operands,
20556 				"sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20557 				-n);
20558       else
20559 	output_multi_immediate (operands,
20560 				"add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20561 				n);
20562     }
20563 
20564   return "";
20565 }
20566 
20567 /* Output a multiple immediate operation.
20568    OPERANDS is the vector of operands referred to in the output patterns.
20569    INSTR1 is the output pattern to use for the first constant.
20570    INSTR2 is the output pattern to use for subsequent constants.
20571    IMMED_OP is the index of the constant slot in OPERANDS.
20572    N is the constant value.  */
20573 static const char *
output_multi_immediate(rtx * operands,const char * instr1,const char * instr2,int immed_op,HOST_WIDE_INT n)20574 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20575 			int immed_op, HOST_WIDE_INT n)
20576 {
20577 #if HOST_BITS_PER_WIDE_INT > 32
20578   n &= 0xffffffff;
20579 #endif
20580 
20581   if (n == 0)
20582     {
20583       /* Quick and easy output.  */
20584       operands[immed_op] = const0_rtx;
20585       output_asm_insn (instr1, operands);
20586     }
20587   else
20588     {
20589       int i;
20590       const char * instr = instr1;
20591 
20592       /* Note that n is never zero here (which would give no output).  */
20593       for (i = 0; i < 32; i += 2)
20594 	{
20595 	  if (n & (3 << i))
20596 	    {
20597 	      operands[immed_op] = GEN_INT (n & (255 << i));
20598 	      output_asm_insn (instr, operands);
20599 	      instr = instr2;
20600 	      i += 6;
20601 	    }
20602 	}
20603     }
20604 
20605   return "";
20606 }
20607 
20608 /* Return the name of a shifter operation.  */
20609 static const char *
arm_shift_nmem(enum rtx_code code)20610 arm_shift_nmem(enum rtx_code code)
20611 {
20612   switch (code)
20613     {
20614     case ASHIFT:
20615       return ARM_LSL_NAME;
20616 
20617     case ASHIFTRT:
20618       return "asr";
20619 
20620     case LSHIFTRT:
20621       return "lsr";
20622 
20623     case ROTATERT:
20624       return "ror";
20625 
20626     default:
20627       abort();
20628     }
20629 }
20630 
20631 /* Return the appropriate ARM instruction for the operation code.
20632    The returned result should not be overwritten.  OP is the rtx of the
20633    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20634    was shifted.  */
20635 const char *
arithmetic_instr(rtx op,int shift_first_arg)20636 arithmetic_instr (rtx op, int shift_first_arg)
20637 {
20638   switch (GET_CODE (op))
20639     {
20640     case PLUS:
20641       return "add";
20642 
20643     case MINUS:
20644       return shift_first_arg ? "rsb" : "sub";
20645 
20646     case IOR:
20647       return "orr";
20648 
20649     case XOR:
20650       return "eor";
20651 
20652     case AND:
20653       return "and";
20654 
20655     case ASHIFT:
20656     case ASHIFTRT:
20657     case LSHIFTRT:
20658     case ROTATERT:
20659       return arm_shift_nmem(GET_CODE(op));
20660 
20661     default:
20662       gcc_unreachable ();
20663     }
20664 }
20665 
20666 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20667    for the operation code.  The returned result should not be overwritten.
20668    OP is the rtx code of the shift.
20669    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20670    shift.  */
20671 static const char *
shift_op(rtx op,HOST_WIDE_INT * amountp)20672 shift_op (rtx op, HOST_WIDE_INT *amountp)
20673 {
20674   const char * mnem;
20675   enum rtx_code code = GET_CODE (op);
20676 
20677   switch (code)
20678     {
20679     case ROTATE:
20680       if (!CONST_INT_P (XEXP (op, 1)))
20681 	{
20682 	  output_operand_lossage ("invalid shift operand");
20683 	  return NULL;
20684 	}
20685 
20686       code = ROTATERT;
20687       *amountp = 32 - INTVAL (XEXP (op, 1));
20688       mnem = "ror";
20689       break;
20690 
20691     case ASHIFT:
20692     case ASHIFTRT:
20693     case LSHIFTRT:
20694     case ROTATERT:
20695       mnem = arm_shift_nmem(code);
20696       if (CONST_INT_P (XEXP (op, 1)))
20697 	{
20698 	  *amountp = INTVAL (XEXP (op, 1));
20699 	}
20700       else if (REG_P (XEXP (op, 1)))
20701 	{
20702 	  *amountp = -1;
20703 	  return mnem;
20704 	}
20705       else
20706 	{
20707 	  output_operand_lossage ("invalid shift operand");
20708 	  return NULL;
20709 	}
20710       break;
20711 
20712     case MULT:
20713       /* We never have to worry about the amount being other than a
20714 	 power of 2, since this case can never be reloaded from a reg.  */
20715       if (!CONST_INT_P (XEXP (op, 1)))
20716 	{
20717 	  output_operand_lossage ("invalid shift operand");
20718 	  return NULL;
20719 	}
20720 
20721       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
20722 
20723       /* Amount must be a power of two.  */
20724       if (*amountp & (*amountp - 1))
20725 	{
20726 	  output_operand_lossage ("invalid shift operand");
20727 	  return NULL;
20728 	}
20729 
20730       *amountp = exact_log2 (*amountp);
20731       gcc_assert (IN_RANGE (*amountp, 0, 31));
20732       return ARM_LSL_NAME;
20733 
20734     default:
20735       output_operand_lossage ("invalid shift operand");
20736       return NULL;
20737     }
20738 
20739   /* This is not 100% correct, but follows from the desire to merge
20740      multiplication by a power of 2 with the recognizer for a
20741      shift.  >=32 is not a valid shift for "lsl", so we must try and
20742      output a shift that produces the correct arithmetical result.
20743      Using lsr #32 is identical except for the fact that the carry bit
20744      is not set correctly if we set the flags; but we never use the
20745      carry bit from such an operation, so we can ignore that.  */
20746   if (code == ROTATERT)
20747     /* Rotate is just modulo 32.  */
20748     *amountp &= 31;
20749   else if (*amountp != (*amountp & 31))
20750     {
20751       if (code == ASHIFT)
20752 	mnem = "lsr";
20753       *amountp = 32;
20754     }
20755 
20756   /* Shifts of 0 are no-ops.  */
20757   if (*amountp == 0)
20758     return NULL;
20759 
20760   return mnem;
20761 }
20762 
20763 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
20764    because /bin/as is horribly restrictive.  The judgement about
20765    whether or not each character is 'printable' (and can be output as
20766    is) or not (and must be printed with an octal escape) must be made
20767    with reference to the *host* character set -- the situation is
20768    similar to that discussed in the comments above pp_c_char in
20769    c-pretty-print.c.  */
20770 
20771 #define MAX_ASCII_LEN 51
20772 
20773 void
output_ascii_pseudo_op(FILE * stream,const unsigned char * p,int len)20774 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
20775 {
20776   int i;
20777   int len_so_far = 0;
20778 
20779   fputs ("\t.ascii\t\"", stream);
20780 
20781   for (i = 0; i < len; i++)
20782     {
20783       int c = p[i];
20784 
20785       if (len_so_far >= MAX_ASCII_LEN)
20786 	{
20787 	  fputs ("\"\n\t.ascii\t\"", stream);
20788 	  len_so_far = 0;
20789 	}
20790 
20791       if (ISPRINT (c))
20792 	{
20793 	  if (c == '\\' || c == '\"')
20794 	    {
20795 	      putc ('\\', stream);
20796 	      len_so_far++;
20797 	    }
20798 	  putc (c, stream);
20799 	  len_so_far++;
20800 	}
20801       else
20802 	{
20803 	  fprintf (stream, "\\%03o", c);
20804 	  len_so_far += 4;
20805 	}
20806     }
20807 
20808   fputs ("\"\n", stream);
20809 }
20810 
20811 
20812 /* Compute the register save mask for registers 0 through 12
20813    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
20814 
20815 static unsigned long
arm_compute_save_reg0_reg12_mask(void)20816 arm_compute_save_reg0_reg12_mask (void)
20817 {
20818   unsigned long func_type = arm_current_func_type ();
20819   unsigned long save_reg_mask = 0;
20820   unsigned int reg;
20821 
20822   if (IS_INTERRUPT (func_type))
20823     {
20824       unsigned int max_reg;
20825       /* Interrupt functions must not corrupt any registers,
20826 	 even call clobbered ones.  If this is a leaf function
20827 	 we can just examine the registers used by the RTL, but
20828 	 otherwise we have to assume that whatever function is
20829 	 called might clobber anything, and so we have to save
20830 	 all the call-clobbered registers as well.  */
20831       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
20832 	/* FIQ handlers have registers r8 - r12 banked, so
20833 	   we only need to check r0 - r7, Normal ISRs only
20834 	   bank r14 and r15, so we must check up to r12.
20835 	   r13 is the stack pointer which is always preserved,
20836 	   so we do not need to consider it here.  */
20837 	max_reg = 7;
20838       else
20839 	max_reg = 12;
20840 
20841       for (reg = 0; reg <= max_reg; reg++)
20842 	if (df_regs_ever_live_p (reg)
20843 	    || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg)))
20844 	  save_reg_mask |= (1 << reg);
20845 
20846       /* Also save the pic base register if necessary.  */
20847       if (PIC_REGISTER_MAY_NEED_SAVING
20848 	  && crtl->uses_pic_offset_table)
20849 	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20850     }
20851   else if (IS_VOLATILE(func_type))
20852     {
20853       /* For noreturn functions we historically omitted register saves
20854 	 altogether.  However this really messes up debugging.  As a
20855 	 compromise save just the frame pointers.  Combined with the link
20856 	 register saved elsewhere this should be sufficient to get
20857 	 a backtrace.  */
20858       if (frame_pointer_needed)
20859 	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20860       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
20861 	save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
20862       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
20863 	save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
20864     }
20865   else
20866     {
20867       /* In the normal case we only need to save those registers
20868 	 which are call saved and which are used by this function.  */
20869       for (reg = 0; reg <= 11; reg++)
20870 	if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
20871 	  save_reg_mask |= (1 << reg);
20872 
20873       /* Handle the frame pointer as a special case.  */
20874       if (frame_pointer_needed)
20875 	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20876 
20877       /* If we aren't loading the PIC register,
20878 	 don't stack it even though it may be live.  */
20879       if (PIC_REGISTER_MAY_NEED_SAVING
20880 	  && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
20881 	      || crtl->uses_pic_offset_table))
20882 	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20883 
20884       /* The prologue will copy SP into R0, so save it.  */
20885       if (IS_STACKALIGN (func_type))
20886 	save_reg_mask |= 1;
20887     }
20888 
20889   /* Save registers so the exception handler can modify them.  */
20890   if (crtl->calls_eh_return)
20891     {
20892       unsigned int i;
20893 
20894       for (i = 0; ; i++)
20895 	{
20896 	  reg = EH_RETURN_DATA_REGNO (i);
20897 	  if (reg == INVALID_REGNUM)
20898 	    break;
20899 	  save_reg_mask |= 1 << reg;
20900 	}
20901     }
20902 
20903   return save_reg_mask;
20904 }
20905 
20906 /* Return true if r3 is live at the start of the function.  */
20907 
20908 static bool
arm_r3_live_at_start_p(void)20909 arm_r3_live_at_start_p (void)
20910 {
20911   /* Just look at cfg info, which is still close enough to correct at this
20912      point.  This gives false positives for broken functions that might use
20913      uninitialized data that happens to be allocated in r3, but who cares?  */
20914   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
20915 }
20916 
20917 /* Compute the number of bytes used to store the static chain register on the
20918    stack, above the stack frame.  We need to know this accurately to get the
20919    alignment of the rest of the stack frame correct.  */
20920 
20921 static int
arm_compute_static_chain_stack_bytes(void)20922 arm_compute_static_chain_stack_bytes (void)
20923 {
20924   /* Once the value is updated from the init value of -1, do not
20925      re-compute.  */
20926   if (cfun->machine->static_chain_stack_bytes != -1)
20927     return cfun->machine->static_chain_stack_bytes;
20928 
20929   /* See the defining assertion in arm_expand_prologue.  */
20930   if (IS_NESTED (arm_current_func_type ())
20931       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20932 	  || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
20933 	       || flag_stack_clash_protection)
20934 	      && !df_regs_ever_live_p (LR_REGNUM)))
20935       && arm_r3_live_at_start_p ()
20936       && crtl->args.pretend_args_size == 0)
20937     return 4;
20938 
20939   return 0;
20940 }
20941 
20942 /* Compute a bit mask of which core registers need to be
20943    saved on the stack for the current function.
20944    This is used by arm_compute_frame_layout, which may add extra registers.  */
20945 
20946 static unsigned long
arm_compute_save_core_reg_mask(void)20947 arm_compute_save_core_reg_mask (void)
20948 {
20949   unsigned int save_reg_mask = 0;
20950   unsigned long func_type = arm_current_func_type ();
20951   unsigned int reg;
20952 
20953   if (IS_NAKED (func_type))
20954     /* This should never really happen.  */
20955     return 0;
20956 
20957   /* If we are creating a stack frame, then we must save the frame pointer,
20958      IP (which will hold the old stack pointer), LR and the PC.  */
20959   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20960     save_reg_mask |=
20961       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
20962       | (1 << IP_REGNUM)
20963       | (1 << LR_REGNUM)
20964       | (1 << PC_REGNUM);
20965 
20966   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
20967 
20968   /* Decide if we need to save the link register.
20969      Interrupt routines have their own banked link register,
20970      so they never need to save it.
20971      Otherwise if we do not use the link register we do not need to save
20972      it.  If we are pushing other registers onto the stack however, we
20973      can save an instruction in the epilogue by pushing the link register
20974      now and then popping it back into the PC.  This incurs extra memory
20975      accesses though, so we only do it when optimizing for size, and only
20976      if we know that we will not need a fancy return sequence.  */
20977   if (df_regs_ever_live_p (LR_REGNUM)
20978       || (save_reg_mask
20979 	  && optimize_size
20980 	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
20981 	  && !crtl->tail_call_emit
20982 	  && !crtl->calls_eh_return))
20983     save_reg_mask |= 1 << LR_REGNUM;
20984 
20985   if (cfun->machine->lr_save_eliminated)
20986     save_reg_mask &= ~ (1 << LR_REGNUM);
20987 
20988   if (TARGET_REALLY_IWMMXT
20989       && ((bit_count (save_reg_mask)
20990 	   + ARM_NUM_INTS (crtl->args.pretend_args_size +
20991 			   arm_compute_static_chain_stack_bytes())
20992 	   ) % 2) != 0)
20993     {
20994       /* The total number of registers that are going to be pushed
20995 	 onto the stack is odd.  We need to ensure that the stack
20996 	 is 64-bit aligned before we start to save iWMMXt registers,
20997 	 and also before we start to create locals.  (A local variable
20998 	 might be a double or long long which we will load/store using
20999 	 an iWMMXt instruction).  Therefore we need to push another
21000 	 ARM register, so that the stack will be 64-bit aligned.  We
21001 	 try to avoid using the arg registers (r0 -r3) as they might be
21002 	 used to pass values in a tail call.  */
21003       for (reg = 4; reg <= 12; reg++)
21004 	if ((save_reg_mask & (1 << reg)) == 0)
21005 	  break;
21006 
21007       if (reg <= 12)
21008 	save_reg_mask |= (1 << reg);
21009       else
21010 	{
21011 	  cfun->machine->sibcall_blocked = 1;
21012 	  save_reg_mask |= (1 << 3);
21013 	}
21014     }
21015 
21016   /* We may need to push an additional register for use initializing the
21017      PIC base register.  */
21018   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21019       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21020     {
21021       reg = thumb_find_work_register (1 << 4);
21022       if (!call_used_or_fixed_reg_p (reg))
21023 	save_reg_mask |= (1 << reg);
21024     }
21025 
21026   return save_reg_mask;
21027 }
21028 
21029 /* Compute a bit mask of which core registers need to be
21030    saved on the stack for the current function.  */
21031 static unsigned long
thumb1_compute_save_core_reg_mask(void)21032 thumb1_compute_save_core_reg_mask (void)
21033 {
21034   unsigned long mask;
21035   unsigned reg;
21036 
21037   mask = 0;
21038   for (reg = 0; reg < 12; reg ++)
21039     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21040       mask |= 1 << reg;
21041 
21042   /* Handle the frame pointer as a special case.  */
21043   if (frame_pointer_needed)
21044     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21045 
21046   if (flag_pic
21047       && !TARGET_SINGLE_PIC_BASE
21048       && arm_pic_register != INVALID_REGNUM
21049       && crtl->uses_pic_offset_table)
21050     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21051 
21052   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
21053   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21054     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21055 
21056   /* LR will also be pushed if any lo regs are pushed.  */
21057   if (mask & 0xff || thumb_force_lr_save ())
21058     mask |= (1 << LR_REGNUM);
21059 
21060   bool call_clobbered_scratch
21061     = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21062        && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21063 
21064   /* Make sure we have a low work register if we need one.  We will
21065      need one if we are going to push a high register, but we are not
21066      currently intending to push a low register.  However if both the
21067      prologue and epilogue have a spare call-clobbered low register,
21068      then we won't need to find an additional work register.  It does
21069      not need to be the same register in the prologue and
21070      epilogue.  */
21071   if ((mask & 0xff) == 0
21072       && !call_clobbered_scratch
21073       && ((mask & 0x0f00) || TARGET_BACKTRACE))
21074     {
21075       /* Use thumb_find_work_register to choose which register
21076 	 we will use.  If the register is live then we will
21077 	 have to push it.  Use LAST_LO_REGNUM as our fallback
21078 	 choice for the register to select.  */
21079       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21080       /* Make sure the register returned by thumb_find_work_register is
21081 	 not part of the return value.  */
21082       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21083 	reg = LAST_LO_REGNUM;
21084 
21085       if (callee_saved_reg_p (reg))
21086 	mask |= 1 << reg;
21087     }
21088 
21089   /* The 504 below is 8 bytes less than 512 because there are two possible
21090      alignment words.  We can't tell here if they will be present or not so we
21091      have to play it safe and assume that they are. */
21092   if ((CALLER_INTERWORKING_SLOT_SIZE +
21093        ROUND_UP_WORD (get_frame_size ()) +
21094        crtl->outgoing_args_size) >= 504)
21095     {
21096       /* This is the same as the code in thumb1_expand_prologue() which
21097 	 determines which register to use for stack decrement. */
21098       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21099 	if (mask & (1 << reg))
21100 	  break;
21101 
21102       if (reg > LAST_LO_REGNUM)
21103 	{
21104 	  /* Make sure we have a register available for stack decrement. */
21105 	  mask |= 1 << LAST_LO_REGNUM;
21106 	}
21107     }
21108 
21109   return mask;
21110 }
21111 
21112 
21113 /* Return the number of bytes required to save VFP registers.  */
21114 static int
arm_get_vfp_saved_size(void)21115 arm_get_vfp_saved_size (void)
21116 {
21117   unsigned int regno;
21118   int count;
21119   int saved;
21120 
21121   saved = 0;
21122   /* Space for saved VFP registers.  */
21123   if (TARGET_VFP_BASE)
21124     {
21125       count = 0;
21126       for (regno = FIRST_VFP_REGNUM;
21127 	   regno < LAST_VFP_REGNUM;
21128 	   regno += 2)
21129 	{
21130 	  if ((!df_regs_ever_live_p (regno)
21131 	       || call_used_or_fixed_reg_p (regno))
21132 	      && (!df_regs_ever_live_p (regno + 1)
21133 		  || call_used_or_fixed_reg_p (regno + 1)))
21134 	    {
21135 	      if (count > 0)
21136 		{
21137 		  /* Workaround ARM10 VFPr1 bug.  */
21138 		  if (count == 2 && !arm_arch6)
21139 		    count++;
21140 		  saved += count * 8;
21141 		}
21142 	      count = 0;
21143 	    }
21144 	  else
21145 	    count++;
21146 	}
21147       if (count > 0)
21148 	{
21149 	  if (count == 2 && !arm_arch6)
21150 	    count++;
21151 	  saved += count * 8;
21152 	}
21153     }
21154   return saved;
21155 }
21156 
21157 
21158 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
21159    everything bar the final return instruction.  If simple_return is true,
21160    then do not output epilogue, because it has already been emitted in RTL.
21161 
21162    Note: do not forget to update length attribute of corresponding insn pattern
21163    when changing assembly output (eg. length attribute of
21164    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21165    register clearing sequences).  */
21166 const char *
output_return_instruction(rtx operand,bool really_return,bool reverse,bool simple_return)21167 output_return_instruction (rtx operand, bool really_return, bool reverse,
21168                            bool simple_return)
21169 {
21170   char conditional[10];
21171   char instr[100];
21172   unsigned reg;
21173   unsigned long live_regs_mask;
21174   unsigned long func_type;
21175   arm_stack_offsets *offsets;
21176 
21177   func_type = arm_current_func_type ();
21178 
21179   if (IS_NAKED (func_type))
21180     return "";
21181 
21182   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21183     {
21184       /* If this function was declared non-returning, and we have
21185 	 found a tail call, then we have to trust that the called
21186 	 function won't return.  */
21187       if (really_return)
21188 	{
21189 	  rtx ops[2];
21190 
21191 	  /* Otherwise, trap an attempted return by aborting.  */
21192 	  ops[0] = operand;
21193 	  ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21194 				       : "abort");
21195 	  assemble_external_libcall (ops[1]);
21196 	  output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21197 	}
21198 
21199       return "";
21200     }
21201 
21202   gcc_assert (!cfun->calls_alloca || really_return);
21203 
21204   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21205 
21206   cfun->machine->return_used_this_function = 1;
21207 
21208   offsets = arm_get_frame_offsets ();
21209   live_regs_mask = offsets->saved_regs_mask;
21210 
21211   if (!simple_return && live_regs_mask)
21212     {
21213       const char * return_reg;
21214 
21215       /* If we do not have any special requirements for function exit
21216 	 (e.g. interworking) then we can load the return address
21217 	 directly into the PC.  Otherwise we must load it into LR.  */
21218       if (really_return
21219 	  && !IS_CMSE_ENTRY (func_type)
21220 	  && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21221 	return_reg = reg_names[PC_REGNUM];
21222       else
21223 	return_reg = reg_names[LR_REGNUM];
21224 
21225       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21226 	{
21227 	  /* There are three possible reasons for the IP register
21228 	     being saved.  1) a stack frame was created, in which case
21229 	     IP contains the old stack pointer, or 2) an ISR routine
21230 	     corrupted it, or 3) it was saved to align the stack on
21231 	     iWMMXt.  In case 1, restore IP into SP, otherwise just
21232 	     restore IP.  */
21233 	  if (frame_pointer_needed)
21234 	    {
21235 	      live_regs_mask &= ~ (1 << IP_REGNUM);
21236 	      live_regs_mask |=   (1 << SP_REGNUM);
21237 	    }
21238 	  else
21239 	    gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21240 	}
21241 
21242       /* On some ARM architectures it is faster to use LDR rather than
21243 	 LDM to load a single register.  On other architectures, the
21244 	 cost is the same.  In 26 bit mode, or for exception handlers,
21245 	 we have to use LDM to load the PC so that the CPSR is also
21246 	 restored.  */
21247       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21248 	if (live_regs_mask == (1U << reg))
21249 	  break;
21250 
21251       if (reg <= LAST_ARM_REGNUM
21252 	  && (reg != LR_REGNUM
21253 	      || ! really_return
21254 	      || ! IS_INTERRUPT (func_type)))
21255 	{
21256 	  sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21257 		   (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21258 	}
21259       else
21260 	{
21261 	  char *p;
21262 	  int first = 1;
21263 
21264 	  /* Generate the load multiple instruction to restore the
21265 	     registers.  Note we can get here, even if
21266 	     frame_pointer_needed is true, but only if sp already
21267 	     points to the base of the saved core registers.  */
21268 	  if (live_regs_mask & (1 << SP_REGNUM))
21269 	    {
21270 	      unsigned HOST_WIDE_INT stack_adjust;
21271 
21272 	      stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21273 	      gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21274 
21275 	      if (stack_adjust && arm_arch5t && TARGET_ARM)
21276 		  sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21277 	      else
21278 		{
21279 		  /* If we can't use ldmib (SA110 bug),
21280 		     then try to pop r3 instead.  */
21281 		  if (stack_adjust)
21282 		    live_regs_mask |= 1 << 3;
21283 
21284 		  sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21285 		}
21286 	    }
21287 	  /* For interrupt returns we have to use an LDM rather than
21288 	     a POP so that we can use the exception return variant.  */
21289 	  else if (IS_INTERRUPT (func_type))
21290 	    sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21291 	  else
21292 	    sprintf (instr, "pop%s\t{", conditional);
21293 
21294 	  p = instr + strlen (instr);
21295 
21296 	  for (reg = 0; reg <= SP_REGNUM; reg++)
21297 	    if (live_regs_mask & (1 << reg))
21298 	      {
21299 		int l = strlen (reg_names[reg]);
21300 
21301 		if (first)
21302 		  first = 0;
21303 		else
21304 		  {
21305 		    memcpy (p, ", ", 2);
21306 		    p += 2;
21307 		  }
21308 
21309 		memcpy (p, "%|", 2);
21310 		memcpy (p + 2, reg_names[reg], l);
21311 		p += l + 2;
21312 	      }
21313 
21314 	  if (live_regs_mask & (1 << LR_REGNUM))
21315 	    {
21316 	      sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21317 	      /* If returning from an interrupt, restore the CPSR.  */
21318 	      if (IS_INTERRUPT (func_type))
21319 		strcat (p, "^");
21320 	    }
21321 	  else
21322 	    strcpy (p, "}");
21323 	}
21324 
21325       output_asm_insn (instr, & operand);
21326 
21327       /* See if we need to generate an extra instruction to
21328 	 perform the actual function return.  */
21329       if (really_return
21330 	  && func_type != ARM_FT_INTERWORKED
21331 	  && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21332 	{
21333 	  /* The return has already been handled
21334 	     by loading the LR into the PC.  */
21335           return "";
21336 	}
21337     }
21338 
21339   if (really_return)
21340     {
21341       switch ((int) ARM_FUNC_TYPE (func_type))
21342 	{
21343 	case ARM_FT_ISR:
21344 	case ARM_FT_FIQ:
21345 	  /* ??? This is wrong for unified assembly syntax.  */
21346 	  sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21347 	  break;
21348 
21349 	case ARM_FT_INTERWORKED:
21350 	  gcc_assert (arm_arch5t || arm_arch4t);
21351 	  sprintf (instr, "bx%s\t%%|lr", conditional);
21352 	  break;
21353 
21354 	case ARM_FT_EXCEPTION:
21355 	  /* ??? This is wrong for unified assembly syntax.  */
21356 	  sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21357 	  break;
21358 
21359 	default:
21360 	  if (IS_CMSE_ENTRY (func_type))
21361 	    {
21362 	      /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21363 		 emitted by cmse_nonsecure_entry_clear_before_return () and the
21364 		 VSTR/VLDR instructions in the prologue and epilogue.  */
21365 	      if (!TARGET_HAVE_FPCXT_CMSE)
21366 		{
21367 		  /* Check if we have to clear the 'GE bits' which is only used if
21368 		     parallel add and subtraction instructions are available.  */
21369 		  if (TARGET_INT_SIMD)
21370 		    snprintf (instr, sizeof (instr),
21371 			      "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21372 		  else
21373 		    snprintf (instr, sizeof (instr),
21374 			      "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21375 
21376 		  output_asm_insn (instr, & operand);
21377 		  /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21378 		     care of it.  */
21379 		  if (TARGET_HARD_FLOAT)
21380 		    {
21381 		      /* Clear the cumulative exception-status bits (0-4,7) and
21382 			 the condition code bits (28-31) of the FPSCR.  We need
21383 			 to remember to clear the first scratch register used
21384 			 (IP) and save and restore the second (r4).
21385 
21386 			 Important note: the length of the
21387 			 thumb2_cmse_entry_return insn pattern must account for
21388 			 the size of the below instructions.  */
21389 		      output_asm_insn ("push\t{%|r4}", & operand);
21390 		      output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21391 		      output_asm_insn ("movw\t%|r4, #65376", & operand);
21392 		      output_asm_insn ("movt\t%|r4, #4095", & operand);
21393 		      output_asm_insn ("and\t%|ip, %|r4", & operand);
21394 		      output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21395 		      output_asm_insn ("pop\t{%|r4}", & operand);
21396 		      output_asm_insn ("mov\t%|ip, %|lr", & operand);
21397 		    }
21398 		}
21399 	      snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21400 	    }
21401 	  /* Use bx if it's available.  */
21402 	  else if (arm_arch5t || arm_arch4t)
21403 	    sprintf (instr, "bx%s\t%%|lr", conditional);
21404 	  else
21405 	    sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21406 	  break;
21407 	}
21408 
21409       output_asm_insn (instr, & operand);
21410     }
21411 
21412   return "";
21413 }
21414 
21415 /* Output in FILE asm statements needed to declare the NAME of the function
21416    defined by its DECL node.  */
21417 
21418 void
arm_asm_declare_function_name(FILE * file,const char * name,tree decl)21419 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21420 {
21421   size_t cmse_name_len;
21422   char *cmse_name = 0;
21423   char cmse_prefix[] = "__acle_se_";
21424 
21425   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21426      extra function label for each function with the 'cmse_nonsecure_entry'
21427      attribute.  This extra function label should be prepended with
21428      '__acle_se_', telling the linker that it needs to create secure gateway
21429      veneers for this function.  */
21430   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21431 				    DECL_ATTRIBUTES (decl)))
21432     {
21433       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21434       cmse_name = XALLOCAVEC (char, cmse_name_len);
21435       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21436       targetm.asm_out.globalize_label (file, cmse_name);
21437 
21438       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21439       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21440     }
21441 
21442   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21443   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21444   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21445   ASM_OUTPUT_LABEL (file, name);
21446 
21447   if (cmse_name)
21448     ASM_OUTPUT_LABEL (file, cmse_name);
21449 
21450   ARM_OUTPUT_FN_UNWIND (file, TRUE);
21451 }
21452 
21453 /* Write the function name into the code section, directly preceding
21454    the function prologue.
21455 
21456    Code will be output similar to this:
21457      t0
21458 	 .ascii "arm_poke_function_name", 0
21459 	 .align
21460      t1
21461 	 .word 0xff000000 + (t1 - t0)
21462      arm_poke_function_name
21463 	 mov     ip, sp
21464 	 stmfd   sp!, {fp, ip, lr, pc}
21465 	 sub     fp, ip, #4
21466 
21467    When performing a stack backtrace, code can inspect the value
21468    of 'pc' stored at 'fp' + 0.  If the trace function then looks
21469    at location pc - 12 and the top 8 bits are set, then we know
21470    that there is a function name embedded immediately preceding this
21471    location and has length ((pc[-3]) & 0xff000000).
21472 
21473    We assume that pc is declared as a pointer to an unsigned long.
21474 
21475    It is of no benefit to output the function name if we are assembling
21476    a leaf function.  These function types will not contain a stack
21477    backtrace structure, therefore it is not possible to determine the
21478    function name.  */
21479 void
arm_poke_function_name(FILE * stream,const char * name)21480 arm_poke_function_name (FILE *stream, const char *name)
21481 {
21482   unsigned long alignlength;
21483   unsigned long length;
21484   rtx           x;
21485 
21486   length      = strlen (name) + 1;
21487   alignlength = ROUND_UP_WORD (length);
21488 
21489   ASM_OUTPUT_ASCII (stream, name, length);
21490   ASM_OUTPUT_ALIGN (stream, 2);
21491   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21492   assemble_aligned_integer (UNITS_PER_WORD, x);
21493 }
21494 
21495 /* Place some comments into the assembler stream
21496    describing the current function.  */
21497 static void
arm_output_function_prologue(FILE * f)21498 arm_output_function_prologue (FILE *f)
21499 {
21500   unsigned long func_type;
21501 
21502   /* Sanity check.  */
21503   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21504 
21505   func_type = arm_current_func_type ();
21506 
21507   switch ((int) ARM_FUNC_TYPE (func_type))
21508     {
21509     default:
21510     case ARM_FT_NORMAL:
21511       break;
21512     case ARM_FT_INTERWORKED:
21513       asm_fprintf (f, "\t%@ Function supports interworking.\n");
21514       break;
21515     case ARM_FT_ISR:
21516       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21517       break;
21518     case ARM_FT_FIQ:
21519       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21520       break;
21521     case ARM_FT_EXCEPTION:
21522       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21523       break;
21524     }
21525 
21526   if (IS_NAKED (func_type))
21527     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21528 
21529   if (IS_VOLATILE (func_type))
21530     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21531 
21532   if (IS_NESTED (func_type))
21533     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21534   if (IS_STACKALIGN (func_type))
21535     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21536   if (IS_CMSE_ENTRY (func_type))
21537     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21538 
21539   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21540 	       (HOST_WIDE_INT) crtl->args.size,
21541 	       crtl->args.pretend_args_size,
21542 	       (HOST_WIDE_INT) get_frame_size ());
21543 
21544   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21545 	       frame_pointer_needed,
21546 	       cfun->machine->uses_anonymous_args);
21547 
21548   if (cfun->machine->lr_save_eliminated)
21549     asm_fprintf (f, "\t%@ link register save eliminated.\n");
21550 
21551   if (crtl->calls_eh_return)
21552     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21553 
21554 }
21555 
21556 static void
arm_output_function_epilogue(FILE *)21557 arm_output_function_epilogue (FILE *)
21558 {
21559   arm_stack_offsets *offsets;
21560 
21561   if (TARGET_THUMB1)
21562     {
21563       int regno;
21564 
21565       /* Emit any call-via-reg trampolines that are needed for v4t support
21566 	 of call_reg and call_value_reg type insns.  */
21567       for (regno = 0; regno < LR_REGNUM; regno++)
21568 	{
21569 	  rtx label = cfun->machine->call_via[regno];
21570 
21571 	  if (label != NULL)
21572 	    {
21573 	      switch_to_section (function_section (current_function_decl));
21574 	      targetm.asm_out.internal_label (asm_out_file, "L",
21575 					      CODE_LABEL_NUMBER (label));
21576 	      asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21577 	    }
21578 	}
21579 
21580       /* ??? Probably not safe to set this here, since it assumes that a
21581 	 function will be emitted as assembly immediately after we generate
21582 	 RTL for it.  This does not happen for inline functions.  */
21583       cfun->machine->return_used_this_function = 0;
21584     }
21585   else /* TARGET_32BIT */
21586     {
21587       /* We need to take into account any stack-frame rounding.  */
21588       offsets = arm_get_frame_offsets ();
21589 
21590       gcc_assert (!use_return_insn (FALSE, NULL)
21591 		  || (cfun->machine->return_used_this_function != 0)
21592 		  || offsets->saved_regs == offsets->outgoing_args
21593 		  || frame_pointer_needed);
21594     }
21595 }
21596 
21597 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21598    STR and STRD.  If an even number of registers are being pushed, one
21599    or more STRD patterns are created for each register pair.  If an
21600    odd number of registers are pushed, emit an initial STR followed by
21601    as many STRD instructions as are needed.  This works best when the
21602    stack is initially 64-bit aligned (the normal case), since it
21603    ensures that each STRD is also 64-bit aligned.  */
21604 static void
thumb2_emit_strd_push(unsigned long saved_regs_mask)21605 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21606 {
21607   int num_regs = 0;
21608   int i;
21609   int regno;
21610   rtx par = NULL_RTX;
21611   rtx dwarf = NULL_RTX;
21612   rtx tmp;
21613   bool first = true;
21614 
21615   num_regs = bit_count (saved_regs_mask);
21616 
21617   /* Must be at least one register to save, and can't save SP or PC.  */
21618   gcc_assert (num_regs > 0 && num_regs <= 14);
21619   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21620   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21621 
21622   /* Create sequence for DWARF info.  All the frame-related data for
21623      debugging is held in this wrapper.  */
21624   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21625 
21626   /* Describe the stack adjustment.  */
21627   tmp = gen_rtx_SET (stack_pointer_rtx,
21628 		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21629   RTX_FRAME_RELATED_P (tmp) = 1;
21630   XVECEXP (dwarf, 0, 0) = tmp;
21631 
21632   /* Find the first register.  */
21633   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21634     ;
21635 
21636   i = 0;
21637 
21638   /* If there's an odd number of registers to push.  Start off by
21639      pushing a single register.  This ensures that subsequent strd
21640      operations are dword aligned (assuming that SP was originally
21641      64-bit aligned).  */
21642   if ((num_regs & 1) != 0)
21643     {
21644       rtx reg, mem, insn;
21645 
21646       reg = gen_rtx_REG (SImode, regno);
21647       if (num_regs == 1)
21648 	mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21649 						     stack_pointer_rtx));
21650       else
21651 	mem = gen_frame_mem (Pmode,
21652 			     gen_rtx_PRE_MODIFY
21653 			     (Pmode, stack_pointer_rtx,
21654 			      plus_constant (Pmode, stack_pointer_rtx,
21655 					     -4 * num_regs)));
21656 
21657       tmp = gen_rtx_SET (mem, reg);
21658       RTX_FRAME_RELATED_P (tmp) = 1;
21659       insn = emit_insn (tmp);
21660       RTX_FRAME_RELATED_P (insn) = 1;
21661       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21662       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21663       RTX_FRAME_RELATED_P (tmp) = 1;
21664       i++;
21665       regno++;
21666       XVECEXP (dwarf, 0, i) = tmp;
21667       first = false;
21668     }
21669 
21670   while (i < num_regs)
21671     if (saved_regs_mask & (1 << regno))
21672       {
21673 	rtx reg1, reg2, mem1, mem2;
21674 	rtx tmp0, tmp1, tmp2;
21675 	int regno2;
21676 
21677 	/* Find the register to pair with this one.  */
21678 	for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21679 	     regno2++)
21680 	  ;
21681 
21682 	reg1 = gen_rtx_REG (SImode, regno);
21683 	reg2 = gen_rtx_REG (SImode, regno2);
21684 
21685 	if (first)
21686 	  {
21687 	    rtx insn;
21688 
21689 	    first = false;
21690 	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21691 							stack_pointer_rtx,
21692 							-4 * num_regs));
21693 	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21694 							stack_pointer_rtx,
21695 							-4 * (num_regs - 1)));
21696 	    tmp0 = gen_rtx_SET (stack_pointer_rtx,
21697 				plus_constant (Pmode, stack_pointer_rtx,
21698 					       -4 * (num_regs)));
21699 	    tmp1 = gen_rtx_SET (mem1, reg1);
21700 	    tmp2 = gen_rtx_SET (mem2, reg2);
21701 	    RTX_FRAME_RELATED_P (tmp0) = 1;
21702 	    RTX_FRAME_RELATED_P (tmp1) = 1;
21703 	    RTX_FRAME_RELATED_P (tmp2) = 1;
21704 	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
21705 	    XVECEXP (par, 0, 0) = tmp0;
21706 	    XVECEXP (par, 0, 1) = tmp1;
21707 	    XVECEXP (par, 0, 2) = tmp2;
21708 	    insn = emit_insn (par);
21709 	    RTX_FRAME_RELATED_P (insn) = 1;
21710 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21711 	  }
21712 	else
21713 	  {
21714 	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21715 							stack_pointer_rtx,
21716 							4 * i));
21717 	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21718 							stack_pointer_rtx,
21719 							4 * (i + 1)));
21720 	    tmp1 = gen_rtx_SET (mem1, reg1);
21721 	    tmp2 = gen_rtx_SET (mem2, reg2);
21722 	    RTX_FRAME_RELATED_P (tmp1) = 1;
21723 	    RTX_FRAME_RELATED_P (tmp2) = 1;
21724 	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21725 	    XVECEXP (par, 0, 0) = tmp1;
21726 	    XVECEXP (par, 0, 1) = tmp2;
21727 	    emit_insn (par);
21728 	  }
21729 
21730 	/* Create unwind information.  This is an approximation.  */
21731 	tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
21732 					   plus_constant (Pmode,
21733 							  stack_pointer_rtx,
21734 							  4 * i)),
21735 			    reg1);
21736 	tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
21737 					   plus_constant (Pmode,
21738 							  stack_pointer_rtx,
21739 							  4 * (i + 1))),
21740 			    reg2);
21741 
21742 	RTX_FRAME_RELATED_P (tmp1) = 1;
21743 	RTX_FRAME_RELATED_P (tmp2) = 1;
21744 	XVECEXP (dwarf, 0, i + 1) = tmp1;
21745 	XVECEXP (dwarf, 0, i + 2) = tmp2;
21746 	i += 2;
21747 	regno = regno2 + 1;
21748       }
21749     else
21750       regno++;
21751 
21752   return;
21753 }
21754 
21755 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
21756    whenever possible, otherwise it emits single-word stores.  The first store
21757    also allocates stack space for all saved registers, using writeback with
21758    post-addressing mode.  All other stores use offset addressing.  If no STRD
21759    can be emitted, this function emits a sequence of single-word stores,
21760    and not an STM as before, because single-word stores provide more freedom
21761    scheduling and can be turned into an STM by peephole optimizations.  */
21762 static void
arm_emit_strd_push(unsigned long saved_regs_mask)21763 arm_emit_strd_push (unsigned long saved_regs_mask)
21764 {
21765   int num_regs = 0;
21766   int i, j, dwarf_index  = 0;
21767   int offset = 0;
21768   rtx dwarf = NULL_RTX;
21769   rtx insn = NULL_RTX;
21770   rtx tmp, mem;
21771 
21772   /* TODO: A more efficient code can be emitted by changing the
21773      layout, e.g., first push all pairs that can use STRD to keep the
21774      stack aligned, and then push all other registers.  */
21775   for (i = 0; i <= LAST_ARM_REGNUM; i++)
21776     if (saved_regs_mask & (1 << i))
21777       num_regs++;
21778 
21779   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21780   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21781   gcc_assert (num_regs > 0);
21782 
21783   /* Create sequence for DWARF info.  */
21784   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21785 
21786   /* For dwarf info, we generate explicit stack update.  */
21787   tmp = gen_rtx_SET (stack_pointer_rtx,
21788                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21789   RTX_FRAME_RELATED_P (tmp) = 1;
21790   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21791 
21792   /* Save registers.  */
21793   offset = - 4 * num_regs;
21794   j = 0;
21795   while (j <= LAST_ARM_REGNUM)
21796     if (saved_regs_mask & (1 << j))
21797       {
21798         if ((j % 2 == 0)
21799             && (saved_regs_mask & (1 << (j + 1))))
21800           {
21801             /* Current register and previous register form register pair for
21802                which STRD can be generated.  */
21803             if (offset < 0)
21804               {
21805                 /* Allocate stack space for all saved registers.  */
21806                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21807                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21808                 mem = gen_frame_mem (DImode, tmp);
21809                 offset = 0;
21810               }
21811             else if (offset > 0)
21812               mem = gen_frame_mem (DImode,
21813                                    plus_constant (Pmode,
21814                                                   stack_pointer_rtx,
21815                                                   offset));
21816             else
21817               mem = gen_frame_mem (DImode, stack_pointer_rtx);
21818 
21819             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
21820             RTX_FRAME_RELATED_P (tmp) = 1;
21821             tmp = emit_insn (tmp);
21822 
21823             /* Record the first store insn.  */
21824             if (dwarf_index == 1)
21825               insn = tmp;
21826 
21827             /* Generate dwarf info.  */
21828             mem = gen_frame_mem (SImode,
21829                                  plus_constant (Pmode,
21830                                                 stack_pointer_rtx,
21831                                                 offset));
21832             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21833             RTX_FRAME_RELATED_P (tmp) = 1;
21834             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21835 
21836             mem = gen_frame_mem (SImode,
21837                                  plus_constant (Pmode,
21838                                                 stack_pointer_rtx,
21839                                                 offset + 4));
21840             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
21841             RTX_FRAME_RELATED_P (tmp) = 1;
21842             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21843 
21844             offset += 8;
21845             j += 2;
21846           }
21847         else
21848           {
21849             /* Emit a single word store.  */
21850             if (offset < 0)
21851               {
21852                 /* Allocate stack space for all saved registers.  */
21853                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21854                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21855                 mem = gen_frame_mem (SImode, tmp);
21856                 offset = 0;
21857               }
21858             else if (offset > 0)
21859               mem = gen_frame_mem (SImode,
21860                                    plus_constant (Pmode,
21861                                                   stack_pointer_rtx,
21862                                                   offset));
21863             else
21864               mem = gen_frame_mem (SImode, stack_pointer_rtx);
21865 
21866             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21867             RTX_FRAME_RELATED_P (tmp) = 1;
21868             tmp = emit_insn (tmp);
21869 
21870             /* Record the first store insn.  */
21871             if (dwarf_index == 1)
21872               insn = tmp;
21873 
21874             /* Generate dwarf info.  */
21875             mem = gen_frame_mem (SImode,
21876                                  plus_constant(Pmode,
21877                                                stack_pointer_rtx,
21878                                                offset));
21879             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21880             RTX_FRAME_RELATED_P (tmp) = 1;
21881             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21882 
21883             offset += 4;
21884             j += 1;
21885           }
21886       }
21887     else
21888       j++;
21889 
21890   /* Attach dwarf info to the first insn we generate.  */
21891   gcc_assert (insn != NULL_RTX);
21892   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21893   RTX_FRAME_RELATED_P (insn) = 1;
21894 }
21895 
21896 /* Generate and emit an insn that we will recognize as a push_multi.
21897    Unfortunately, since this insn does not reflect very well the actual
21898    semantics of the operation, we need to annotate the insn for the benefit
21899    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
21900    MASK for registers that should be annotated for DWARF2 frame unwind
21901    information.  */
21902 static rtx
emit_multi_reg_push(unsigned long mask,unsigned long dwarf_regs_mask)21903 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
21904 {
21905   int num_regs = 0;
21906   int num_dwarf_regs = 0;
21907   int i, j;
21908   rtx par;
21909   rtx dwarf;
21910   int dwarf_par_index;
21911   rtx tmp, reg;
21912 
21913   /* We don't record the PC in the dwarf frame information.  */
21914   dwarf_regs_mask &= ~(1 << PC_REGNUM);
21915 
21916   for (i = 0; i <= LAST_ARM_REGNUM; i++)
21917     {
21918       if (mask & (1 << i))
21919 	num_regs++;
21920       if (dwarf_regs_mask & (1 << i))
21921 	num_dwarf_regs++;
21922     }
21923 
21924   gcc_assert (num_regs && num_regs <= 16);
21925   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
21926 
21927   /* For the body of the insn we are going to generate an UNSPEC in
21928      parallel with several USEs.  This allows the insn to be recognized
21929      by the push_multi pattern in the arm.md file.
21930 
21931      The body of the insn looks something like this:
21932 
21933        (parallel [
21934            (set (mem:BLK (pre_modify:SI (reg:SI sp)
21935 	                                (const_int:SI <num>)))
21936 	        (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
21937            (use (reg:SI XX))
21938            (use (reg:SI YY))
21939 	   ...
21940         ])
21941 
21942      For the frame note however, we try to be more explicit and actually
21943      show each register being stored into the stack frame, plus a (single)
21944      decrement of the stack pointer.  We do it this way in order to be
21945      friendly to the stack unwinding code, which only wants to see a single
21946      stack decrement per instruction.  The RTL we generate for the note looks
21947      something like this:
21948 
21949       (sequence [
21950            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
21951            (set (mem:SI (reg:SI sp)) (reg:SI r4))
21952            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
21953            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
21954 	   ...
21955         ])
21956 
21957      FIXME:: In an ideal world the PRE_MODIFY would not exist and
21958      instead we'd have a parallel expression detailing all
21959      the stores to the various memory addresses so that debug
21960      information is more up-to-date. Remember however while writing
21961      this to take care of the constraints with the push instruction.
21962 
21963      Note also that this has to be taken care of for the VFP registers.
21964 
21965      For more see PR43399.  */
21966 
21967   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
21968   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
21969   dwarf_par_index = 1;
21970 
21971   for (i = 0; i <= LAST_ARM_REGNUM; i++)
21972     {
21973       if (mask & (1 << i))
21974 	{
21975 	  reg = gen_rtx_REG (SImode, i);
21976 
21977 	  XVECEXP (par, 0, 0)
21978 	    = gen_rtx_SET (gen_frame_mem
21979 			   (BLKmode,
21980 			    gen_rtx_PRE_MODIFY (Pmode,
21981 						stack_pointer_rtx,
21982 						plus_constant
21983 						(Pmode, stack_pointer_rtx,
21984 						 -4 * num_regs))
21985 			    ),
21986 			   gen_rtx_UNSPEC (BLKmode,
21987 					   gen_rtvec (1, reg),
21988 					   UNSPEC_PUSH_MULT));
21989 
21990 	  if (dwarf_regs_mask & (1 << i))
21991 	    {
21992 	      tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
21993 				 reg);
21994 	      RTX_FRAME_RELATED_P (tmp) = 1;
21995 	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
21996 	    }
21997 
21998 	  break;
21999 	}
22000     }
22001 
22002   for (j = 1, i++; j < num_regs; i++)
22003     {
22004       if (mask & (1 << i))
22005 	{
22006 	  reg = gen_rtx_REG (SImode, i);
22007 
22008 	  XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22009 
22010 	  if (dwarf_regs_mask & (1 << i))
22011 	    {
22012 	      tmp
22013 		= gen_rtx_SET (gen_frame_mem
22014 			       (SImode,
22015 				plus_constant (Pmode, stack_pointer_rtx,
22016 					       4 * j)),
22017 			       reg);
22018 	      RTX_FRAME_RELATED_P (tmp) = 1;
22019 	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22020 	    }
22021 
22022 	  j++;
22023 	}
22024     }
22025 
22026   par = emit_insn (par);
22027 
22028   tmp = gen_rtx_SET (stack_pointer_rtx,
22029 		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22030   RTX_FRAME_RELATED_P (tmp) = 1;
22031   XVECEXP (dwarf, 0, 0) = tmp;
22032 
22033   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22034 
22035   return par;
22036 }
22037 
22038 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22039    SIZE is the offset to be adjusted.
22040    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
22041 static void
arm_add_cfa_adjust_cfa_note(rtx insn,int size,rtx dest,rtx src)22042 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22043 {
22044   rtx dwarf;
22045 
22046   RTX_FRAME_RELATED_P (insn) = 1;
22047   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22048   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22049 }
22050 
22051 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22052    SAVED_REGS_MASK shows which registers need to be restored.
22053 
22054    Unfortunately, since this insn does not reflect very well the actual
22055    semantics of the operation, we need to annotate the insn for the benefit
22056    of DWARF2 frame unwind information.  */
22057 static void
arm_emit_multi_reg_pop(unsigned long saved_regs_mask)22058 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22059 {
22060   int num_regs = 0;
22061   int i, j;
22062   rtx par;
22063   rtx dwarf = NULL_RTX;
22064   rtx tmp, reg;
22065   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22066   int offset_adj;
22067   int emit_update;
22068 
22069   offset_adj = return_in_pc ? 1 : 0;
22070   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22071     if (saved_regs_mask & (1 << i))
22072       num_regs++;
22073 
22074   gcc_assert (num_regs && num_regs <= 16);
22075 
22076   /* If SP is in reglist, then we don't emit SP update insn.  */
22077   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22078 
22079   /* The parallel needs to hold num_regs SETs
22080      and one SET for the stack update.  */
22081   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22082 
22083   if (return_in_pc)
22084     XVECEXP (par, 0, 0) = ret_rtx;
22085 
22086   if (emit_update)
22087     {
22088       /* Increment the stack pointer, based on there being
22089          num_regs 4-byte registers to restore.  */
22090       tmp = gen_rtx_SET (stack_pointer_rtx,
22091                          plus_constant (Pmode,
22092                                         stack_pointer_rtx,
22093                                         4 * num_regs));
22094       RTX_FRAME_RELATED_P (tmp) = 1;
22095       XVECEXP (par, 0, offset_adj) = tmp;
22096     }
22097 
22098   /* Now restore every reg, which may include PC.  */
22099   for (j = 0, i = 0; j < num_regs; i++)
22100     if (saved_regs_mask & (1 << i))
22101       {
22102         reg = gen_rtx_REG (SImode, i);
22103         if ((num_regs == 1) && emit_update && !return_in_pc)
22104           {
22105             /* Emit single load with writeback.  */
22106             tmp = gen_frame_mem (SImode,
22107                                  gen_rtx_POST_INC (Pmode,
22108                                                    stack_pointer_rtx));
22109             tmp = emit_insn (gen_rtx_SET (reg, tmp));
22110             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22111             return;
22112           }
22113 
22114         tmp = gen_rtx_SET (reg,
22115                            gen_frame_mem
22116                            (SImode,
22117                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22118         RTX_FRAME_RELATED_P (tmp) = 1;
22119         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22120 
22121         /* We need to maintain a sequence for DWARF info too.  As dwarf info
22122            should not have PC, skip PC.  */
22123         if (i != PC_REGNUM)
22124           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22125 
22126         j++;
22127       }
22128 
22129   if (return_in_pc)
22130     par = emit_jump_insn (par);
22131   else
22132     par = emit_insn (par);
22133 
22134   REG_NOTES (par) = dwarf;
22135   if (!return_in_pc)
22136     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22137 				 stack_pointer_rtx, stack_pointer_rtx);
22138 }
22139 
22140 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22141    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22142 
22143    Unfortunately, since this insn does not reflect very well the actual
22144    semantics of the operation, we need to annotate the insn for the benefit
22145    of DWARF2 frame unwind information.  */
22146 static void
arm_emit_vfp_multi_reg_pop(int first_reg,int num_regs,rtx base_reg)22147 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22148 {
22149   int i, j;
22150   rtx par;
22151   rtx dwarf = NULL_RTX;
22152   rtx tmp, reg;
22153 
22154   gcc_assert (num_regs && num_regs <= 32);
22155 
22156     /* Workaround ARM10 VFPr1 bug.  */
22157   if (num_regs == 2 && !arm_arch6)
22158     {
22159       if (first_reg == 15)
22160         first_reg--;
22161 
22162       num_regs++;
22163     }
22164 
22165   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22166      there could be up to 32 D-registers to restore.
22167      If there are more than 16 D-registers, make two recursive calls,
22168      each of which emits one pop_multi instruction.  */
22169   if (num_regs > 16)
22170     {
22171       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22172       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22173       return;
22174     }
22175 
22176   /* The parallel needs to hold num_regs SETs
22177      and one SET for the stack update.  */
22178   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22179 
22180   /* Increment the stack pointer, based on there being
22181      num_regs 8-byte registers to restore.  */
22182   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22183   RTX_FRAME_RELATED_P (tmp) = 1;
22184   XVECEXP (par, 0, 0) = tmp;
22185 
22186   /* Now show every reg that will be restored, using a SET for each.  */
22187   for (j = 0, i=first_reg; j < num_regs; i += 2)
22188     {
22189       reg = gen_rtx_REG (DFmode, i);
22190 
22191       tmp = gen_rtx_SET (reg,
22192                          gen_frame_mem
22193                          (DFmode,
22194                           plus_constant (Pmode, base_reg, 8 * j)));
22195       RTX_FRAME_RELATED_P (tmp) = 1;
22196       XVECEXP (par, 0, j + 1) = tmp;
22197 
22198       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22199 
22200       j++;
22201     }
22202 
22203   par = emit_insn (par);
22204   REG_NOTES (par) = dwarf;
22205 
22206   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
22207   if (REGNO (base_reg) == IP_REGNUM)
22208     {
22209       RTX_FRAME_RELATED_P (par) = 1;
22210       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22211     }
22212   else
22213     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22214 				 base_reg, base_reg);
22215 }
22216 
22217 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
22218    number of registers are being popped, multiple LDRD patterns are created for
22219    all register pairs.  If odd number of registers are popped, last register is
22220    loaded by using LDR pattern.  */
22221 static void
thumb2_emit_ldrd_pop(unsigned long saved_regs_mask)22222 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22223 {
22224   int num_regs = 0;
22225   int i, j;
22226   rtx par = NULL_RTX;
22227   rtx dwarf = NULL_RTX;
22228   rtx tmp, reg, tmp1;
22229   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22230 
22231   for (i = 0; i <= LAST_ARM_REGNUM; i++)
22232     if (saved_regs_mask & (1 << i))
22233       num_regs++;
22234 
22235   gcc_assert (num_regs && num_regs <= 16);
22236 
22237   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
22238      to be popped.  So, if num_regs is even, now it will become odd,
22239      and we can generate pop with PC.  If num_regs is odd, it will be
22240      even now, and ldr with return can be generated for PC.  */
22241   if (return_in_pc)
22242     num_regs--;
22243 
22244   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22245 
22246   /* Var j iterates over all the registers to gather all the registers in
22247      saved_regs_mask.  Var i gives index of saved registers in stack frame.
22248      A PARALLEL RTX of register-pair is created here, so that pattern for
22249      LDRD can be matched.  As PC is always last register to be popped, and
22250      we have already decremented num_regs if PC, we don't have to worry
22251      about PC in this loop.  */
22252   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22253     if (saved_regs_mask & (1 << j))
22254       {
22255         /* Create RTX for memory load.  */
22256         reg = gen_rtx_REG (SImode, j);
22257         tmp = gen_rtx_SET (reg,
22258                            gen_frame_mem (SImode,
22259                                plus_constant (Pmode,
22260                                               stack_pointer_rtx, 4 * i)));
22261         RTX_FRAME_RELATED_P (tmp) = 1;
22262 
22263         if (i % 2 == 0)
22264           {
22265             /* When saved-register index (i) is even, the RTX to be emitted is
22266                yet to be created.  Hence create it first.  The LDRD pattern we
22267                are generating is :
22268                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22269                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22270                where target registers need not be consecutive.  */
22271             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22272             dwarf = NULL_RTX;
22273           }
22274 
22275         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
22276            added as 0th element and if i is odd, reg_i is added as 1st element
22277            of LDRD pattern shown above.  */
22278         XVECEXP (par, 0, (i % 2)) = tmp;
22279         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22280 
22281         if ((i % 2) == 1)
22282           {
22283             /* When saved-register index (i) is odd, RTXs for both the registers
22284                to be loaded are generated in above given LDRD pattern, and the
22285                pattern can be emitted now.  */
22286             par = emit_insn (par);
22287             REG_NOTES (par) = dwarf;
22288 	    RTX_FRAME_RELATED_P (par) = 1;
22289           }
22290 
22291         i++;
22292       }
22293 
22294   /* If the number of registers pushed is odd AND return_in_pc is false OR
22295      number of registers are even AND return_in_pc is true, last register is
22296      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
22297      then LDR with post increment.  */
22298 
22299   /* Increment the stack pointer, based on there being
22300      num_regs 4-byte registers to restore.  */
22301   tmp = gen_rtx_SET (stack_pointer_rtx,
22302                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22303   RTX_FRAME_RELATED_P (tmp) = 1;
22304   tmp = emit_insn (tmp);
22305   if (!return_in_pc)
22306     {
22307       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22308 				   stack_pointer_rtx, stack_pointer_rtx);
22309     }
22310 
22311   dwarf = NULL_RTX;
22312 
22313   if (((num_regs % 2) == 1 && !return_in_pc)
22314       || ((num_regs % 2) == 0 && return_in_pc))
22315     {
22316       /* Scan for the single register to be popped.  Skip until the saved
22317          register is found.  */
22318       for (; (saved_regs_mask & (1 << j)) == 0; j++);
22319 
22320       /* Gen LDR with post increment here.  */
22321       tmp1 = gen_rtx_MEM (SImode,
22322                           gen_rtx_POST_INC (SImode,
22323                                             stack_pointer_rtx));
22324       set_mem_alias_set (tmp1, get_frame_alias_set ());
22325 
22326       reg = gen_rtx_REG (SImode, j);
22327       tmp = gen_rtx_SET (reg, tmp1);
22328       RTX_FRAME_RELATED_P (tmp) = 1;
22329       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22330 
22331       if (return_in_pc)
22332         {
22333           /* If return_in_pc, j must be PC_REGNUM.  */
22334           gcc_assert (j == PC_REGNUM);
22335           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22336           XVECEXP (par, 0, 0) = ret_rtx;
22337           XVECEXP (par, 0, 1) = tmp;
22338           par = emit_jump_insn (par);
22339         }
22340       else
22341         {
22342           par = emit_insn (tmp);
22343 	  REG_NOTES (par) = dwarf;
22344 	  arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22345 				       stack_pointer_rtx, stack_pointer_rtx);
22346         }
22347 
22348     }
22349   else if ((num_regs % 2) == 1 && return_in_pc)
22350     {
22351       /* There are 2 registers to be popped.  So, generate the pattern
22352          pop_multiple_with_stack_update_and_return to pop in PC.  */
22353       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22354     }
22355 
22356   return;
22357 }
22358 
22359 /* LDRD in ARM mode needs consecutive registers as operands.  This function
22360    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22361    offset addressing and then generates one separate stack udpate. This provides
22362    more scheduling freedom, compared to writeback on every load.  However,
22363    if the function returns using load into PC directly
22364    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22365    before the last load.  TODO: Add a peephole optimization to recognize
22366    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
22367    peephole optimization to merge the load at stack-offset zero
22368    with the stack update instruction using load with writeback
22369    in post-index addressing mode.  */
22370 static void
arm_emit_ldrd_pop(unsigned long saved_regs_mask)22371 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22372 {
22373   int j = 0;
22374   int offset = 0;
22375   rtx par = NULL_RTX;
22376   rtx dwarf = NULL_RTX;
22377   rtx tmp, mem;
22378 
22379   /* Restore saved registers.  */
22380   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22381   j = 0;
22382   while (j <= LAST_ARM_REGNUM)
22383     if (saved_regs_mask & (1 << j))
22384       {
22385         if ((j % 2) == 0
22386             && (saved_regs_mask & (1 << (j + 1)))
22387             && (j + 1) != PC_REGNUM)
22388           {
22389             /* Current register and next register form register pair for which
22390                LDRD can be generated. PC is always the last register popped, and
22391                we handle it separately.  */
22392             if (offset > 0)
22393               mem = gen_frame_mem (DImode,
22394                                    plus_constant (Pmode,
22395                                                   stack_pointer_rtx,
22396                                                   offset));
22397             else
22398               mem = gen_frame_mem (DImode, stack_pointer_rtx);
22399 
22400             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22401             tmp = emit_insn (tmp);
22402 	    RTX_FRAME_RELATED_P (tmp) = 1;
22403 
22404             /* Generate dwarf info.  */
22405 
22406             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22407                                     gen_rtx_REG (SImode, j),
22408                                     NULL_RTX);
22409             dwarf = alloc_reg_note (REG_CFA_RESTORE,
22410                                     gen_rtx_REG (SImode, j + 1),
22411                                     dwarf);
22412 
22413             REG_NOTES (tmp) = dwarf;
22414 
22415             offset += 8;
22416             j += 2;
22417           }
22418         else if (j != PC_REGNUM)
22419           {
22420             /* Emit a single word load.  */
22421             if (offset > 0)
22422               mem = gen_frame_mem (SImode,
22423                                    plus_constant (Pmode,
22424                                                   stack_pointer_rtx,
22425                                                   offset));
22426             else
22427               mem = gen_frame_mem (SImode, stack_pointer_rtx);
22428 
22429             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22430             tmp = emit_insn (tmp);
22431 	    RTX_FRAME_RELATED_P (tmp) = 1;
22432 
22433             /* Generate dwarf info.  */
22434             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22435                                               gen_rtx_REG (SImode, j),
22436                                               NULL_RTX);
22437 
22438             offset += 4;
22439             j += 1;
22440           }
22441         else /* j == PC_REGNUM */
22442           j++;
22443       }
22444     else
22445       j++;
22446 
22447   /* Update the stack.  */
22448   if (offset > 0)
22449     {
22450       tmp = gen_rtx_SET (stack_pointer_rtx,
22451                          plus_constant (Pmode,
22452                                         stack_pointer_rtx,
22453                                         offset));
22454       tmp = emit_insn (tmp);
22455       arm_add_cfa_adjust_cfa_note (tmp, offset,
22456 				   stack_pointer_rtx, stack_pointer_rtx);
22457       offset = 0;
22458     }
22459 
22460   if (saved_regs_mask & (1 << PC_REGNUM))
22461     {
22462       /* Only PC is to be popped.  */
22463       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22464       XVECEXP (par, 0, 0) = ret_rtx;
22465       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22466                          gen_frame_mem (SImode,
22467                                         gen_rtx_POST_INC (SImode,
22468                                                           stack_pointer_rtx)));
22469       RTX_FRAME_RELATED_P (tmp) = 1;
22470       XVECEXP (par, 0, 1) = tmp;
22471       par = emit_jump_insn (par);
22472 
22473       /* Generate dwarf info.  */
22474       dwarf = alloc_reg_note (REG_CFA_RESTORE,
22475                               gen_rtx_REG (SImode, PC_REGNUM),
22476                               NULL_RTX);
22477       REG_NOTES (par) = dwarf;
22478       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22479 				   stack_pointer_rtx, stack_pointer_rtx);
22480     }
22481 }
22482 
22483 /* Calculate the size of the return value that is passed in registers.  */
22484 static unsigned
arm_size_return_regs(void)22485 arm_size_return_regs (void)
22486 {
22487   machine_mode mode;
22488 
22489   if (crtl->return_rtx != 0)
22490     mode = GET_MODE (crtl->return_rtx);
22491   else
22492     mode = DECL_MODE (DECL_RESULT (current_function_decl));
22493 
22494   return GET_MODE_SIZE (mode);
22495 }
22496 
22497 /* Return true if the current function needs to save/restore LR.  */
22498 static bool
thumb_force_lr_save(void)22499 thumb_force_lr_save (void)
22500 {
22501   return !cfun->machine->lr_save_eliminated
22502 	 && (!crtl->is_leaf
22503 	     || thumb_far_jump_used_p ()
22504 	     || df_regs_ever_live_p (LR_REGNUM));
22505 }
22506 
22507 /* We do not know if r3 will be available because
22508    we do have an indirect tailcall happening in this
22509    particular case.  */
22510 static bool
is_indirect_tailcall_p(rtx call)22511 is_indirect_tailcall_p (rtx call)
22512 {
22513   rtx pat = PATTERN (call);
22514 
22515   /* Indirect tail call.  */
22516   pat = XVECEXP (pat, 0, 0);
22517   if (GET_CODE (pat) == SET)
22518     pat = SET_SRC (pat);
22519 
22520   pat = XEXP (XEXP (pat, 0), 0);
22521   return REG_P (pat);
22522 }
22523 
22524 /* Return true if r3 is used by any of the tail call insns in the
22525    current function.  */
22526 static bool
any_sibcall_could_use_r3(void)22527 any_sibcall_could_use_r3 (void)
22528 {
22529   edge_iterator ei;
22530   edge e;
22531 
22532   if (!crtl->tail_call_emit)
22533     return false;
22534   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22535     if (e->flags & EDGE_SIBCALL)
22536       {
22537 	rtx_insn *call = BB_END (e->src);
22538 	if (!CALL_P (call))
22539 	  call = prev_nonnote_nondebug_insn (call);
22540 	gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22541 	if (find_regno_fusage (call, USE, 3)
22542 	    || is_indirect_tailcall_p (call))
22543 	  return true;
22544       }
22545   return false;
22546 }
22547 
22548 
22549 /* Compute the distance from register FROM to register TO.
22550    These can be the arg pointer (26), the soft frame pointer (25),
22551    the stack pointer (13) or the hard frame pointer (11).
22552    In thumb mode r7 is used as the soft frame pointer, if needed.
22553    Typical stack layout looks like this:
22554 
22555        old stack pointer -> |    |
22556                              ----
22557                             |    | \
22558                             |    |   saved arguments for
22559                             |    |   vararg functions
22560 			    |    | /
22561                               --
22562    hard FP & arg pointer -> |    | \
22563                             |    |   stack
22564                             |    |   frame
22565                             |    | /
22566                               --
22567                             |    | \
22568                             |    |   call saved
22569                             |    |   registers
22570       soft frame pointer -> |    | /
22571                               --
22572                             |    | \
22573                             |    |   local
22574                             |    |   variables
22575      locals base pointer -> |    | /
22576                               --
22577                             |    | \
22578                             |    |   outgoing
22579                             |    |   arguments
22580    current stack pointer -> |    | /
22581                               --
22582 
22583   For a given function some or all of these stack components
22584   may not be needed, giving rise to the possibility of
22585   eliminating some of the registers.
22586 
22587   The values returned by this function must reflect the behavior
22588   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22589 
22590   The sign of the number returned reflects the direction of stack
22591   growth, so the values are positive for all eliminations except
22592   from the soft frame pointer to the hard frame pointer.
22593 
22594   SFP may point just inside the local variables block to ensure correct
22595   alignment.  */
22596 
22597 
22598 /* Return cached stack offsets.  */
22599 
22600 static arm_stack_offsets *
arm_get_frame_offsets(void)22601 arm_get_frame_offsets (void)
22602 {
22603   struct arm_stack_offsets *offsets;
22604 
22605   offsets = &cfun->machine->stack_offsets;
22606 
22607   return offsets;
22608 }
22609 
22610 
22611 /* Calculate stack offsets.  These are used to calculate register elimination
22612    offsets and in prologue/epilogue code.  Also calculates which registers
22613    should be saved.  */
22614 
22615 static void
arm_compute_frame_layout(void)22616 arm_compute_frame_layout (void)
22617 {
22618   struct arm_stack_offsets *offsets;
22619   unsigned long func_type;
22620   int saved;
22621   int core_saved;
22622   HOST_WIDE_INT frame_size;
22623   int i;
22624 
22625   offsets = &cfun->machine->stack_offsets;
22626 
22627   /* Initially this is the size of the local variables.  It will translated
22628      into an offset once we have determined the size of preceding data.  */
22629   frame_size = ROUND_UP_WORD (get_frame_size ());
22630 
22631   /* Space for variadic functions.  */
22632   offsets->saved_args = crtl->args.pretend_args_size;
22633 
22634   /* In Thumb mode this is incorrect, but never used.  */
22635   offsets->frame
22636     = (offsets->saved_args
22637        + arm_compute_static_chain_stack_bytes ()
22638        + (frame_pointer_needed ? 4 : 0));
22639 
22640   if (TARGET_32BIT)
22641     {
22642       unsigned int regno;
22643 
22644       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22645       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22646       saved = core_saved;
22647 
22648       /* We know that SP will be doubleword aligned on entry, and we must
22649 	 preserve that condition at any subroutine call.  We also require the
22650 	 soft frame pointer to be doubleword aligned.  */
22651 
22652       if (TARGET_REALLY_IWMMXT)
22653 	{
22654 	  /* Check for the call-saved iWMMXt registers.  */
22655 	  for (regno = FIRST_IWMMXT_REGNUM;
22656 	       regno <= LAST_IWMMXT_REGNUM;
22657 	       regno++)
22658 	    if (df_regs_ever_live_p (regno)
22659 		&& !call_used_or_fixed_reg_p (regno))
22660 	      saved += 8;
22661 	}
22662 
22663       func_type = arm_current_func_type ();
22664       /* Space for saved VFP registers.  */
22665       if (! IS_VOLATILE (func_type)
22666 	  && TARGET_VFP_BASE)
22667 	saved += arm_get_vfp_saved_size ();
22668 
22669       /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22670 	 nonecure entry functions with VSTR/VLDR.  */
22671       if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
22672 	saved += 4;
22673     }
22674   else /* TARGET_THUMB1 */
22675     {
22676       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
22677       core_saved = bit_count (offsets->saved_regs_mask) * 4;
22678       saved = core_saved;
22679       if (TARGET_BACKTRACE)
22680 	saved += 16;
22681     }
22682 
22683   /* Saved registers include the stack frame.  */
22684   offsets->saved_regs
22685     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
22686   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
22687 
22688   /* A leaf function does not need any stack alignment if it has nothing
22689      on the stack.  */
22690   if (crtl->is_leaf && frame_size == 0
22691       /* However if it calls alloca(), we have a dynamically allocated
22692 	 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
22693       && ! cfun->calls_alloca)
22694     {
22695       offsets->outgoing_args = offsets->soft_frame;
22696       offsets->locals_base = offsets->soft_frame;
22697       return;
22698     }
22699 
22700   /* Ensure SFP has the correct alignment.  */
22701   if (ARM_DOUBLEWORD_ALIGN
22702       && (offsets->soft_frame & 7))
22703     {
22704       offsets->soft_frame += 4;
22705       /* Try to align stack by pushing an extra reg.  Don't bother doing this
22706          when there is a stack frame as the alignment will be rolled into
22707 	 the normal stack adjustment.  */
22708       if (frame_size + crtl->outgoing_args_size == 0)
22709 	{
22710 	  int reg = -1;
22711 
22712 	  /* Register r3 is caller-saved.  Normally it does not need to be
22713 	     saved on entry by the prologue.  However if we choose to save
22714 	     it for padding then we may confuse the compiler into thinking
22715 	     a prologue sequence is required when in fact it is not.  This
22716 	     will occur when shrink-wrapping if r3 is used as a scratch
22717 	     register and there are no other callee-saved writes.
22718 
22719 	     This situation can be avoided when other callee-saved registers
22720 	     are available and r3 is not mandatory if we choose a callee-saved
22721 	     register for padding.  */
22722 	  bool prefer_callee_reg_p = false;
22723 
22724 	  /* If it is safe to use r3, then do so.  This sometimes
22725 	     generates better code on Thumb-2 by avoiding the need to
22726 	     use 32-bit push/pop instructions.  */
22727           if (! any_sibcall_could_use_r3 ()
22728 	      && arm_size_return_regs () <= 12
22729 	      && (offsets->saved_regs_mask & (1 << 3)) == 0
22730 	      && (TARGET_THUMB2
22731 		  || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
22732 	    {
22733 	      reg = 3;
22734 	      if (!TARGET_THUMB2)
22735 		prefer_callee_reg_p = true;
22736 	    }
22737 	  if (reg == -1
22738 	      || prefer_callee_reg_p)
22739 	    {
22740 	      for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
22741 		{
22742 		  /* Avoid fixed registers; they may be changed at
22743 		     arbitrary times so it's unsafe to restore them
22744 		     during the epilogue.  */
22745 		  if (!fixed_regs[i]
22746 		      && (offsets->saved_regs_mask & (1 << i)) == 0)
22747 		    {
22748 		      reg = i;
22749 		      break;
22750 		    }
22751 		}
22752 	    }
22753 
22754 	  if (reg != -1)
22755 	    {
22756 	      offsets->saved_regs += 4;
22757 	      offsets->saved_regs_mask |= (1 << reg);
22758 	    }
22759 	}
22760     }
22761 
22762   offsets->locals_base = offsets->soft_frame + frame_size;
22763   offsets->outgoing_args = (offsets->locals_base
22764 			    + crtl->outgoing_args_size);
22765 
22766   if (ARM_DOUBLEWORD_ALIGN)
22767     {
22768       /* Ensure SP remains doubleword aligned.  */
22769       if (offsets->outgoing_args & 7)
22770 	offsets->outgoing_args += 4;
22771       gcc_assert (!(offsets->outgoing_args & 7));
22772     }
22773 }
22774 
22775 
22776 /* Calculate the relative offsets for the different stack pointers.  Positive
22777    offsets are in the direction of stack growth.  */
22778 
22779 HOST_WIDE_INT
arm_compute_initial_elimination_offset(unsigned int from,unsigned int to)22780 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22781 {
22782   arm_stack_offsets *offsets;
22783 
22784   offsets = arm_get_frame_offsets ();
22785 
22786   /* OK, now we have enough information to compute the distances.
22787      There must be an entry in these switch tables for each pair
22788      of registers in ELIMINABLE_REGS, even if some of the entries
22789      seem to be redundant or useless.  */
22790   switch (from)
22791     {
22792     case ARG_POINTER_REGNUM:
22793       switch (to)
22794 	{
22795 	case THUMB_HARD_FRAME_POINTER_REGNUM:
22796 	  return 0;
22797 
22798 	case FRAME_POINTER_REGNUM:
22799 	  /* This is the reverse of the soft frame pointer
22800 	     to hard frame pointer elimination below.  */
22801 	  return offsets->soft_frame - offsets->saved_args;
22802 
22803 	case ARM_HARD_FRAME_POINTER_REGNUM:
22804 	  /* This is only non-zero in the case where the static chain register
22805 	     is stored above the frame.  */
22806 	  return offsets->frame - offsets->saved_args - 4;
22807 
22808 	case STACK_POINTER_REGNUM:
22809 	  /* If nothing has been pushed on the stack at all
22810 	     then this will return -4.  This *is* correct!  */
22811 	  return offsets->outgoing_args - (offsets->saved_args + 4);
22812 
22813 	default:
22814 	  gcc_unreachable ();
22815 	}
22816       gcc_unreachable ();
22817 
22818     case FRAME_POINTER_REGNUM:
22819       switch (to)
22820 	{
22821 	case THUMB_HARD_FRAME_POINTER_REGNUM:
22822 	  return 0;
22823 
22824 	case ARM_HARD_FRAME_POINTER_REGNUM:
22825 	  /* The hard frame pointer points to the top entry in the
22826 	     stack frame.  The soft frame pointer to the bottom entry
22827 	     in the stack frame.  If there is no stack frame at all,
22828 	     then they are identical.  */
22829 
22830 	  return offsets->frame - offsets->soft_frame;
22831 
22832 	case STACK_POINTER_REGNUM:
22833 	  return offsets->outgoing_args - offsets->soft_frame;
22834 
22835 	default:
22836 	  gcc_unreachable ();
22837 	}
22838       gcc_unreachable ();
22839 
22840     default:
22841       /* You cannot eliminate from the stack pointer.
22842 	 In theory you could eliminate from the hard frame
22843 	 pointer to the stack pointer, but this will never
22844 	 happen, since if a stack frame is not needed the
22845 	 hard frame pointer will never be used.  */
22846       gcc_unreachable ();
22847     }
22848 }
22849 
22850 /* Given FROM and TO register numbers, say whether this elimination is
22851    allowed.  Frame pointer elimination is automatically handled.
22852 
22853    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
22854    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
22855    pointer, we must eliminate FRAME_POINTER_REGNUM into
22856    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
22857    ARG_POINTER_REGNUM.  */
22858 
22859 bool
arm_can_eliminate(const int from,const int to)22860 arm_can_eliminate (const int from, const int to)
22861 {
22862   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
22863           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
22864           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
22865           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
22866            true);
22867 }
22868 
22869 /* Emit RTL to save coprocessor registers on function entry.  Returns the
22870    number of bytes pushed.  */
22871 
22872 static int
arm_save_coproc_regs(void)22873 arm_save_coproc_regs(void)
22874 {
22875   int saved_size = 0;
22876   unsigned reg;
22877   unsigned start_reg;
22878   rtx insn;
22879 
22880   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
22881     if (df_regs_ever_live_p (reg) && !call_used_or_fixed_reg_p (reg))
22882       {
22883 	insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22884 	insn = gen_rtx_MEM (V2SImode, insn);
22885 	insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
22886 	RTX_FRAME_RELATED_P (insn) = 1;
22887 	saved_size += 8;
22888       }
22889 
22890   if (TARGET_VFP_BASE)
22891     {
22892       start_reg = FIRST_VFP_REGNUM;
22893 
22894       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
22895 	{
22896 	  if ((!df_regs_ever_live_p (reg) || call_used_or_fixed_reg_p (reg))
22897 	      && (!df_regs_ever_live_p (reg + 1)
22898 		  || call_used_or_fixed_reg_p (reg + 1)))
22899 	    {
22900 	      if (start_reg != reg)
22901 		saved_size += vfp_emit_fstmd (start_reg,
22902 					      (reg - start_reg) / 2);
22903 	      start_reg = reg + 2;
22904 	    }
22905 	}
22906       if (start_reg != reg)
22907 	saved_size += vfp_emit_fstmd (start_reg,
22908 				      (reg - start_reg) / 2);
22909     }
22910   return saved_size;
22911 }
22912 
22913 
22914 /* Set the Thumb frame pointer from the stack pointer.  */
22915 
22916 static void
thumb_set_frame_pointer(arm_stack_offsets * offsets)22917 thumb_set_frame_pointer (arm_stack_offsets *offsets)
22918 {
22919   HOST_WIDE_INT amount;
22920   rtx insn, dwarf;
22921 
22922   amount = offsets->outgoing_args - offsets->locals_base;
22923   if (amount < 1024)
22924     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22925 				  stack_pointer_rtx, GEN_INT (amount)));
22926   else
22927     {
22928       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
22929       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
22930          expects the first two operands to be the same.  */
22931       if (TARGET_THUMB2)
22932 	{
22933 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22934 					stack_pointer_rtx,
22935 					hard_frame_pointer_rtx));
22936 	}
22937       else
22938 	{
22939 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22940 					hard_frame_pointer_rtx,
22941 					stack_pointer_rtx));
22942 	}
22943       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
22944 			   plus_constant (Pmode, stack_pointer_rtx, amount));
22945       RTX_FRAME_RELATED_P (dwarf) = 1;
22946       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22947     }
22948 
22949   RTX_FRAME_RELATED_P (insn) = 1;
22950 }
22951 
22952 struct scratch_reg {
22953   rtx reg;
22954   bool saved;
22955 };
22956 
22957 /* Return a short-lived scratch register for use as a 2nd scratch register on
22958    function entry after the registers are saved in the prologue.  This register
22959    must be released by means of release_scratch_register_on_entry.  IP is not
22960    considered since it is always used as the 1st scratch register if available.
22961 
22962    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
22963    mask of live registers.  */
22964 
22965 static void
get_scratch_register_on_entry(struct scratch_reg * sr,unsigned int regno1,unsigned long live_regs)22966 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
22967 			       unsigned long live_regs)
22968 {
22969   int regno = -1;
22970 
22971   sr->saved = false;
22972 
22973   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
22974     regno = LR_REGNUM;
22975   else
22976     {
22977       unsigned int i;
22978 
22979       for (i = 4; i < 11; i++)
22980 	if (regno1 != i && (live_regs & (1 << i)) != 0)
22981 	  {
22982 	    regno = i;
22983 	    break;
22984 	  }
22985 
22986       if (regno < 0)
22987 	{
22988 	  /* If IP is used as the 1st scratch register for a nested function,
22989 	     then either r3 wasn't available or is used to preserve IP.  */
22990 	  if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
22991 	    regno1 = 3;
22992 	  regno = (regno1 == 3 ? 2 : 3);
22993 	  sr->saved
22994 	    = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
22995 			       regno);
22996 	}
22997     }
22998 
22999   sr->reg = gen_rtx_REG (SImode, regno);
23000   if (sr->saved)
23001     {
23002       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23003       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23004       rtx x = gen_rtx_SET (stack_pointer_rtx,
23005 		           plus_constant (Pmode, stack_pointer_rtx, -4));
23006       RTX_FRAME_RELATED_P (insn) = 1;
23007       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23008     }
23009 }
23010 
23011 /* Release a scratch register obtained from the preceding function.  */
23012 
23013 static void
release_scratch_register_on_entry(struct scratch_reg * sr)23014 release_scratch_register_on_entry (struct scratch_reg *sr)
23015 {
23016   if (sr->saved)
23017     {
23018       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23019       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23020       rtx x = gen_rtx_SET (stack_pointer_rtx,
23021 			   plus_constant (Pmode, stack_pointer_rtx, 4));
23022       RTX_FRAME_RELATED_P (insn) = 1;
23023       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23024     }
23025 }
23026 
23027 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23028 
23029 #if PROBE_INTERVAL > 4096
23030 #error Cannot use indexed addressing mode for stack probing
23031 #endif
23032 
23033 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23034    inclusive.  These are offsets from the current stack pointer.  REGNO1
23035    is the index number of the 1st scratch register and LIVE_REGS is the
23036    mask of live registers.  */
23037 
23038 static void
arm_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size,unsigned int regno1,unsigned long live_regs)23039 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23040 			    unsigned int regno1, unsigned long live_regs)
23041 {
23042   rtx reg1 = gen_rtx_REG (Pmode, regno1);
23043 
23044   /* See if we have a constant small number of probes to generate.  If so,
23045      that's the easy case.  */
23046   if (size <= PROBE_INTERVAL)
23047     {
23048       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23049       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23050       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23051     }
23052 
23053   /* The run-time loop is made up of 10 insns in the generic case while the
23054      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
23055   else if (size <= 5 * PROBE_INTERVAL)
23056     {
23057       HOST_WIDE_INT i, rem;
23058 
23059       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23060       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23061       emit_stack_probe (reg1);
23062 
23063       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23064 	 it exceeds SIZE.  If only two probes are needed, this will not
23065 	 generate any code.  Then probe at FIRST + SIZE.  */
23066       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23067 	{
23068 	  emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23069 	  emit_stack_probe (reg1);
23070 	}
23071 
23072       rem = size - (i - PROBE_INTERVAL);
23073       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23074 	{
23075 	  emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23076 	  emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23077 	}
23078       else
23079 	emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23080     }
23081 
23082   /* Otherwise, do the same as above, but in a loop.  Note that we must be
23083      extra careful with variables wrapping around because we might be at
23084      the very top (or the very bottom) of the address space and we have
23085      to be able to handle this case properly; in particular, we use an
23086      equality test for the loop condition.  */
23087   else
23088     {
23089       HOST_WIDE_INT rounded_size;
23090       struct scratch_reg sr;
23091 
23092       get_scratch_register_on_entry (&sr, regno1, live_regs);
23093 
23094       emit_move_insn (reg1, GEN_INT (first));
23095 
23096 
23097       /* Step 1: round SIZE to the previous multiple of the interval.  */
23098 
23099       rounded_size = size & -PROBE_INTERVAL;
23100       emit_move_insn (sr.reg, GEN_INT (rounded_size));
23101 
23102 
23103       /* Step 2: compute initial and final value of the loop counter.  */
23104 
23105       /* TEST_ADDR = SP + FIRST.  */
23106       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23107 
23108       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
23109       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23110 
23111 
23112       /* Step 3: the loop
23113 
23114 	 do
23115 	   {
23116 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23117 	     probe at TEST_ADDR
23118 	   }
23119 	 while (TEST_ADDR != LAST_ADDR)
23120 
23121 	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23122 	 until it is equal to ROUNDED_SIZE.  */
23123 
23124       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23125 
23126 
23127       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23128 	 that SIZE is equal to ROUNDED_SIZE.  */
23129 
23130       if (size != rounded_size)
23131 	{
23132 	  HOST_WIDE_INT rem = size - rounded_size;
23133 
23134 	  if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23135 	    {
23136 	      emit_set_insn (sr.reg,
23137 			     plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23138 	      emit_stack_probe (plus_constant (Pmode, sr.reg,
23139 					       PROBE_INTERVAL - rem));
23140 	    }
23141 	  else
23142 	    emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23143 	}
23144 
23145       release_scratch_register_on_entry (&sr);
23146     }
23147 
23148   /* Make sure nothing is scheduled before we are done.  */
23149   emit_insn (gen_blockage ());
23150 }
23151 
23152 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
23153    absolute addresses.  */
23154 
23155 const char *
output_probe_stack_range(rtx reg1,rtx reg2)23156 output_probe_stack_range (rtx reg1, rtx reg2)
23157 {
23158   static int labelno = 0;
23159   char loop_lab[32];
23160   rtx xops[2];
23161 
23162   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23163 
23164   /* Loop.  */
23165   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23166 
23167   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
23168   xops[0] = reg1;
23169   xops[1] = GEN_INT (PROBE_INTERVAL);
23170   output_asm_insn ("sub\t%0, %0, %1", xops);
23171 
23172   /* Probe at TEST_ADDR.  */
23173   output_asm_insn ("str\tr0, [%0, #0]", xops);
23174 
23175   /* Test if TEST_ADDR == LAST_ADDR.  */
23176   xops[1] = reg2;
23177   output_asm_insn ("cmp\t%0, %1", xops);
23178 
23179   /* Branch.  */
23180   fputs ("\tbne\t", asm_out_file);
23181   assemble_name_raw (asm_out_file, loop_lab);
23182   fputc ('\n', asm_out_file);
23183 
23184   return "";
23185 }
23186 
23187 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23188    function.  */
23189 void
arm_expand_prologue(void)23190 arm_expand_prologue (void)
23191 {
23192   rtx amount;
23193   rtx insn;
23194   rtx ip_rtx;
23195   unsigned long live_regs_mask;
23196   unsigned long func_type;
23197   int fp_offset = 0;
23198   int saved_pretend_args = 0;
23199   int saved_regs = 0;
23200   unsigned HOST_WIDE_INT args_to_push;
23201   HOST_WIDE_INT size;
23202   arm_stack_offsets *offsets;
23203   bool clobber_ip;
23204 
23205   func_type = arm_current_func_type ();
23206 
23207   /* Naked functions don't have prologues.  */
23208   if (IS_NAKED (func_type))
23209     {
23210       if (flag_stack_usage_info)
23211 	current_function_static_stack_size = 0;
23212       return;
23213     }
23214 
23215   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
23216   args_to_push = crtl->args.pretend_args_size;
23217 
23218   /* Compute which register we will have to save onto the stack.  */
23219   offsets = arm_get_frame_offsets ();
23220   live_regs_mask = offsets->saved_regs_mask;
23221 
23222   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23223 
23224   if (IS_STACKALIGN (func_type))
23225     {
23226       rtx r0, r1;
23227 
23228       /* Handle a word-aligned stack pointer.  We generate the following:
23229 
23230 	  mov r0, sp
23231 	  bic r1, r0, #7
23232 	  mov sp, r1
23233 	  <save and restore r0 in normal prologue/epilogue>
23234 	  mov sp, r0
23235 	  bx lr
23236 
23237 	 The unwinder doesn't need to know about the stack realignment.
23238 	 Just tell it we saved SP in r0.  */
23239       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23240 
23241       r0 = gen_rtx_REG (SImode, R0_REGNUM);
23242       r1 = gen_rtx_REG (SImode, R1_REGNUM);
23243 
23244       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23245       RTX_FRAME_RELATED_P (insn) = 1;
23246       add_reg_note (insn, REG_CFA_REGISTER, NULL);
23247 
23248       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23249 
23250       /* ??? The CFA changes here, which may cause GDB to conclude that it
23251 	 has entered a different function.  That said, the unwind info is
23252 	 correct, individually, before and after this instruction because
23253 	 we've described the save of SP, which will override the default
23254 	 handling of SP as restoring from the CFA.  */
23255       emit_insn (gen_movsi (stack_pointer_rtx, r1));
23256     }
23257 
23258   /* Let's compute the static_chain_stack_bytes required and store it.  Right
23259      now the value must be -1 as stored by arm_init_machine_status ().  */
23260   cfun->machine->static_chain_stack_bytes
23261     = arm_compute_static_chain_stack_bytes ();
23262 
23263   /* The static chain register is the same as the IP register.  If it is
23264      clobbered when creating the frame, we need to save and restore it.  */
23265   clobber_ip = IS_NESTED (func_type)
23266 	       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23267 		   || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23268 			|| flag_stack_clash_protection)
23269 		       && !df_regs_ever_live_p (LR_REGNUM)
23270 		       && arm_r3_live_at_start_p ()));
23271 
23272   /* Find somewhere to store IP whilst the frame is being created.
23273      We try the following places in order:
23274 
23275        1. The last argument register r3 if it is available.
23276        2. A slot on the stack above the frame if there are no
23277 	  arguments to push onto the stack.
23278        3. Register r3 again, after pushing the argument registers
23279 	  onto the stack, if this is a varargs function.
23280        4. The last slot on the stack created for the arguments to
23281 	  push, if this isn't a varargs function.
23282 
23283      Note - we only need to tell the dwarf2 backend about the SP
23284      adjustment in the second variant; the static chain register
23285      doesn't need to be unwound, as it doesn't contain a value
23286      inherited from the caller.  */
23287   if (clobber_ip)
23288     {
23289       if (!arm_r3_live_at_start_p ())
23290 	insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23291       else if (args_to_push == 0)
23292 	{
23293 	  rtx addr, dwarf;
23294 
23295 	  gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
23296 	  saved_regs += 4;
23297 
23298 	  addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23299 	  insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23300 	  fp_offset = 4;
23301 
23302 	  /* Just tell the dwarf backend that we adjusted SP.  */
23303 	  dwarf = gen_rtx_SET (stack_pointer_rtx,
23304 			       plus_constant (Pmode, stack_pointer_rtx,
23305 					      -fp_offset));
23306 	  RTX_FRAME_RELATED_P (insn) = 1;
23307 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23308 	}
23309       else
23310 	{
23311 	  /* Store the args on the stack.  */
23312 	  if (cfun->machine->uses_anonymous_args)
23313 	    {
23314 	      insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23315 					  (0xf0 >> (args_to_push / 4)) & 0xf);
23316 	      emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23317 	      saved_pretend_args = 1;
23318 	    }
23319 	  else
23320 	    {
23321 	      rtx addr, dwarf;
23322 
23323 	      if (args_to_push == 4)
23324 		addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23325 	      else
23326 		addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23327 					   plus_constant (Pmode,
23328 							  stack_pointer_rtx,
23329 							  -args_to_push));
23330 
23331 	      insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23332 
23333 	      /* Just tell the dwarf backend that we adjusted SP.  */
23334 	      dwarf = gen_rtx_SET (stack_pointer_rtx,
23335 				   plus_constant (Pmode, stack_pointer_rtx,
23336 						  -args_to_push));
23337 	      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23338 	    }
23339 
23340 	  RTX_FRAME_RELATED_P (insn) = 1;
23341 	  fp_offset = args_to_push;
23342 	  args_to_push = 0;
23343 	}
23344     }
23345 
23346   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23347     {
23348       if (IS_INTERRUPT (func_type))
23349 	{
23350 	  /* Interrupt functions must not corrupt any registers.
23351 	     Creating a frame pointer however, corrupts the IP
23352 	     register, so we must push it first.  */
23353 	  emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23354 
23355 	  /* Do not set RTX_FRAME_RELATED_P on this insn.
23356 	     The dwarf stack unwinding code only wants to see one
23357 	     stack decrement per function, and this is not it.  If
23358 	     this instruction is labeled as being part of the frame
23359 	     creation sequence then dwarf2out_frame_debug_expr will
23360 	     die when it encounters the assignment of IP to FP
23361 	     later on, since the use of SP here establishes SP as
23362 	     the CFA register and not IP.
23363 
23364 	     Anyway this instruction is not really part of the stack
23365 	     frame creation although it is part of the prologue.  */
23366 	}
23367 
23368       insn = emit_set_insn (ip_rtx,
23369 			    plus_constant (Pmode, stack_pointer_rtx,
23370 					   fp_offset));
23371       RTX_FRAME_RELATED_P (insn) = 1;
23372     }
23373 
23374   /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR.  */
23375   if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23376     {
23377       saved_regs += 4;
23378       insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23379 						GEN_INT (FPCXTNS_ENUM)));
23380       rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23381 			  plus_constant (Pmode, stack_pointer_rtx, -4));
23382       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23383       RTX_FRAME_RELATED_P (insn) = 1;
23384     }
23385 
23386   if (args_to_push)
23387     {
23388       /* Push the argument registers, or reserve space for them.  */
23389       if (cfun->machine->uses_anonymous_args)
23390 	insn = emit_multi_reg_push
23391 	  ((0xf0 >> (args_to_push / 4)) & 0xf,
23392 	   (0xf0 >> (args_to_push / 4)) & 0xf);
23393       else
23394 	insn = emit_insn
23395 	  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23396 		       GEN_INT (- args_to_push)));
23397       RTX_FRAME_RELATED_P (insn) = 1;
23398     }
23399 
23400   /* If this is an interrupt service routine, and the link register
23401      is going to be pushed, and we're not generating extra
23402      push of IP (needed when frame is needed and frame layout if apcs),
23403      subtracting four from LR now will mean that the function return
23404      can be done with a single instruction.  */
23405   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23406       && (live_regs_mask & (1 << LR_REGNUM)) != 0
23407       && !(frame_pointer_needed && TARGET_APCS_FRAME)
23408       && TARGET_ARM)
23409     {
23410       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23411 
23412       emit_set_insn (lr, plus_constant (SImode, lr, -4));
23413     }
23414 
23415   if (live_regs_mask)
23416     {
23417       unsigned long dwarf_regs_mask = live_regs_mask;
23418 
23419       saved_regs += bit_count (live_regs_mask) * 4;
23420       if (optimize_size && !frame_pointer_needed
23421 	  && saved_regs == offsets->saved_regs - offsets->saved_args)
23422 	{
23423 	  /* If no coprocessor registers are being pushed and we don't have
23424 	     to worry about a frame pointer then push extra registers to
23425 	     create the stack frame.  This is done in a way that does not
23426 	     alter the frame layout, so is independent of the epilogue.  */
23427 	  int n;
23428 	  int frame;
23429 	  n = 0;
23430 	  while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23431 	    n++;
23432 	  frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23433 	  if (frame && n * 4 >= frame)
23434 	    {
23435 	      n = frame / 4;
23436 	      live_regs_mask |= (1 << n) - 1;
23437 	      saved_regs += frame;
23438 	    }
23439 	}
23440 
23441       if (TARGET_LDRD
23442 	  && current_tune->prefer_ldrd_strd
23443           && !optimize_function_for_size_p (cfun))
23444         {
23445 	  gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23446           if (TARGET_THUMB2)
23447 	    thumb2_emit_strd_push (live_regs_mask);
23448           else if (TARGET_ARM
23449                    && !TARGET_APCS_FRAME
23450                    && !IS_INTERRUPT (func_type))
23451 	    arm_emit_strd_push (live_regs_mask);
23452           else
23453             {
23454 	      insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23455               RTX_FRAME_RELATED_P (insn) = 1;
23456             }
23457         }
23458       else
23459         {
23460 	  insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23461           RTX_FRAME_RELATED_P (insn) = 1;
23462         }
23463     }
23464 
23465   if (! IS_VOLATILE (func_type))
23466     saved_regs += arm_save_coproc_regs ();
23467 
23468   if (frame_pointer_needed && TARGET_ARM)
23469     {
23470       /* Create the new frame pointer.  */
23471       if (TARGET_APCS_FRAME)
23472 	{
23473 	  insn = GEN_INT (-(4 + args_to_push + fp_offset));
23474 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23475 	  RTX_FRAME_RELATED_P (insn) = 1;
23476 	}
23477       else
23478 	{
23479 	  insn = GEN_INT (saved_regs - (4 + fp_offset));
23480 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23481 					stack_pointer_rtx, insn));
23482 	  RTX_FRAME_RELATED_P (insn) = 1;
23483 	}
23484     }
23485 
23486   size = offsets->outgoing_args - offsets->saved_args;
23487   if (flag_stack_usage_info)
23488     current_function_static_stack_size = size;
23489 
23490   /* If this isn't an interrupt service routine and we have a frame, then do
23491      stack checking.  We use IP as the first scratch register, except for the
23492      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
23493   if (!IS_INTERRUPT (func_type)
23494       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23495 	  || flag_stack_clash_protection))
23496     {
23497       unsigned int regno;
23498 
23499       if (!IS_NESTED (func_type) || clobber_ip)
23500 	regno = IP_REGNUM;
23501       else if (df_regs_ever_live_p (LR_REGNUM))
23502 	regno = LR_REGNUM;
23503       else
23504 	regno = 3;
23505 
23506       if (crtl->is_leaf && !cfun->calls_alloca)
23507 	{
23508 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23509 	    arm_emit_probe_stack_range (get_stack_check_protect (),
23510 					size - get_stack_check_protect (),
23511 					regno, live_regs_mask);
23512 	}
23513       else if (size > 0)
23514 	arm_emit_probe_stack_range (get_stack_check_protect (), size,
23515 				    regno, live_regs_mask);
23516     }
23517 
23518   /* Recover the static chain register.  */
23519   if (clobber_ip)
23520     {
23521       if (!arm_r3_live_at_start_p () || saved_pretend_args)
23522 	insn = gen_rtx_REG (SImode, 3);
23523       else
23524 	{
23525 	  insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23526 	  insn = gen_frame_mem (SImode, insn);
23527 	}
23528       emit_set_insn (ip_rtx, insn);
23529       emit_insn (gen_force_register_use (ip_rtx));
23530     }
23531 
23532   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23533     {
23534       /* This add can produce multiple insns for a large constant, so we
23535 	 need to get tricky.  */
23536       rtx_insn *last = get_last_insn ();
23537 
23538       amount = GEN_INT (offsets->saved_args + saved_regs
23539 			- offsets->outgoing_args);
23540 
23541       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23542 				    amount));
23543       do
23544 	{
23545 	  last = last ? NEXT_INSN (last) : get_insns ();
23546 	  RTX_FRAME_RELATED_P (last) = 1;
23547 	}
23548       while (last != insn);
23549 
23550       /* If the frame pointer is needed, emit a special barrier that
23551 	 will prevent the scheduler from moving stores to the frame
23552 	 before the stack adjustment.  */
23553       if (frame_pointer_needed)
23554 	emit_insn (gen_stack_tie (stack_pointer_rtx,
23555 				  hard_frame_pointer_rtx));
23556     }
23557 
23558 
23559   if (frame_pointer_needed && TARGET_THUMB2)
23560     thumb_set_frame_pointer (offsets);
23561 
23562   if (flag_pic && arm_pic_register != INVALID_REGNUM)
23563     {
23564       unsigned long mask;
23565 
23566       mask = live_regs_mask;
23567       mask &= THUMB2_WORK_REGS;
23568       if (!IS_NESTED (func_type))
23569 	mask |= (1 << IP_REGNUM);
23570       arm_load_pic_register (mask, NULL_RTX);
23571     }
23572 
23573   /* If we are profiling, make sure no instructions are scheduled before
23574      the call to mcount.  Similarly if the user has requested no
23575      scheduling in the prolog.  Similarly if we want non-call exceptions
23576      using the EABI unwinder, to prevent faulting instructions from being
23577      swapped with a stack adjustment.  */
23578   if (crtl->profile || !TARGET_SCHED_PROLOG
23579       || (arm_except_unwind_info (&global_options) == UI_TARGET
23580 	  && cfun->can_throw_non_call_exceptions))
23581     emit_insn (gen_blockage ());
23582 
23583   /* If the link register is being kept alive, with the return address in it,
23584      then make sure that it does not get reused by the ce2 pass.  */
23585   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23586     cfun->machine->lr_save_eliminated = 1;
23587 }
23588 
23589 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
23590 static void
arm_print_condition(FILE * stream)23591 arm_print_condition (FILE *stream)
23592 {
23593   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23594     {
23595       /* Branch conversion is not implemented for Thumb-2.  */
23596       if (TARGET_THUMB)
23597 	{
23598 	  output_operand_lossage ("predicated Thumb instruction");
23599 	  return;
23600 	}
23601       if (current_insn_predicate != NULL)
23602 	{
23603 	  output_operand_lossage
23604 	    ("predicated instruction in conditional sequence");
23605 	  return;
23606 	}
23607 
23608       fputs (arm_condition_codes[arm_current_cc], stream);
23609     }
23610   else if (current_insn_predicate)
23611     {
23612       enum arm_cond_code code;
23613 
23614       if (TARGET_THUMB1)
23615 	{
23616 	  output_operand_lossage ("predicated Thumb instruction");
23617 	  return;
23618 	}
23619 
23620       code = get_arm_condition_code (current_insn_predicate);
23621       fputs (arm_condition_codes[code], stream);
23622     }
23623 }
23624 
23625 
23626 /* Globally reserved letters: acln
23627    Puncutation letters currently used: @_|?().!#
23628    Lower case letters currently used: bcdefhimpqtvwxyz
23629    Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTU
23630    Letters previously used, but now deprecated/obsolete: sVWXYZ.
23631 
23632    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23633 
23634    If CODE is 'd', then the X is a condition operand and the instruction
23635    should only be executed if the condition is true.
23636    if CODE is 'D', then the X is a condition operand and the instruction
23637    should only be executed if the condition is false: however, if the mode
23638    of the comparison is CCFPEmode, then always execute the instruction -- we
23639    do this because in these circumstances !GE does not necessarily imply LT;
23640    in these cases the instruction pattern will take care to make sure that
23641    an instruction containing %d will follow, thereby undoing the effects of
23642    doing this instruction unconditionally.
23643    If CODE is 'N' then X is a floating point operand that must be negated
23644    before output.
23645    If CODE is 'B' then output a bitwise inverted value of X (a const int).
23646    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
23647 static void
arm_print_operand(FILE * stream,rtx x,int code)23648 arm_print_operand (FILE *stream, rtx x, int code)
23649 {
23650   switch (code)
23651     {
23652     case '@':
23653       fputs (ASM_COMMENT_START, stream);
23654       return;
23655 
23656     case '_':
23657       fputs (user_label_prefix, stream);
23658       return;
23659 
23660     case '|':
23661       fputs (REGISTER_PREFIX, stream);
23662       return;
23663 
23664     case '?':
23665       arm_print_condition (stream);
23666       return;
23667 
23668     case '.':
23669       /* The current condition code for a condition code setting instruction.
23670 	 Preceded by 's' in unified syntax, otherwise followed by 's'.  */
23671       fputc('s', stream);
23672       arm_print_condition (stream);
23673       return;
23674 
23675     case '!':
23676       /* If the instruction is conditionally executed then print
23677 	 the current condition code, otherwise print 's'.  */
23678       gcc_assert (TARGET_THUMB2);
23679       if (current_insn_predicate)
23680 	arm_print_condition (stream);
23681       else
23682 	fputc('s', stream);
23683       break;
23684 
23685     /* %# is a "break" sequence. It doesn't output anything, but is used to
23686        separate e.g. operand numbers from following text, if that text consists
23687        of further digits which we don't want to be part of the operand
23688        number.  */
23689     case '#':
23690       return;
23691 
23692     case 'N':
23693       {
23694 	REAL_VALUE_TYPE r;
23695 	r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
23696 	fprintf (stream, "%s", fp_const_from_val (&r));
23697       }
23698       return;
23699 
23700     /* An integer or symbol address without a preceding # sign.  */
23701     case 'c':
23702       switch (GET_CODE (x))
23703 	{
23704 	case CONST_INT:
23705 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
23706 	  break;
23707 
23708 	case SYMBOL_REF:
23709 	  output_addr_const (stream, x);
23710 	  break;
23711 
23712 	case CONST:
23713 	  if (GET_CODE (XEXP (x, 0)) == PLUS
23714 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
23715 	    {
23716 	      output_addr_const (stream, x);
23717 	      break;
23718 	    }
23719 	  /* Fall through.  */
23720 
23721 	default:
23722 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
23723 	}
23724       return;
23725 
23726     /* An integer that we want to print in HEX.  */
23727     case 'x':
23728       switch (GET_CODE (x))
23729 	{
23730 	case CONST_INT:
23731 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
23732 	  break;
23733 
23734 	default:
23735 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
23736 	}
23737       return;
23738 
23739     case 'B':
23740       if (CONST_INT_P (x))
23741 	{
23742 	  HOST_WIDE_INT val;
23743 	  val = ARM_SIGN_EXTEND (~INTVAL (x));
23744 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
23745 	}
23746       else
23747 	{
23748 	  putc ('~', stream);
23749 	  output_addr_const (stream, x);
23750 	}
23751       return;
23752 
23753     case 'b':
23754       /* Print the log2 of a CONST_INT.  */
23755       {
23756 	HOST_WIDE_INT val;
23757 
23758 	if (!CONST_INT_P (x)
23759 	    || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
23760 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
23761 	else
23762 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23763       }
23764       return;
23765 
23766     case 'L':
23767       /* The low 16 bits of an immediate constant.  */
23768       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
23769       return;
23770 
23771     case 'i':
23772       fprintf (stream, "%s", arithmetic_instr (x, 1));
23773       return;
23774 
23775     case 'I':
23776       fprintf (stream, "%s", arithmetic_instr (x, 0));
23777       return;
23778 
23779     case 'S':
23780       {
23781 	HOST_WIDE_INT val;
23782 	const char *shift;
23783 
23784 	shift = shift_op (x, &val);
23785 
23786 	if (shift)
23787 	  {
23788 	    fprintf (stream, ", %s ", shift);
23789 	    if (val == -1)
23790 	      arm_print_operand (stream, XEXP (x, 1), 0);
23791 	    else
23792 	      fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23793 	  }
23794       }
23795       return;
23796 
23797       /* An explanation of the 'Q', 'R' and 'H' register operands:
23798 
23799 	 In a pair of registers containing a DI or DF value the 'Q'
23800 	 operand returns the register number of the register containing
23801 	 the least significant part of the value.  The 'R' operand returns
23802 	 the register number of the register containing the most
23803 	 significant part of the value.
23804 
23805 	 The 'H' operand returns the higher of the two register numbers.
23806 	 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
23807 	 same as the 'Q' operand, since the most significant part of the
23808 	 value is held in the lower number register.  The reverse is true
23809 	 on systems where WORDS_BIG_ENDIAN is false.
23810 
23811 	 The purpose of these operands is to distinguish between cases
23812 	 where the endian-ness of the values is important (for example
23813 	 when they are added together), and cases where the endian-ness
23814 	 is irrelevant, but the order of register operations is important.
23815 	 For example when loading a value from memory into a register
23816 	 pair, the endian-ness does not matter.  Provided that the value
23817 	 from the lower memory address is put into the lower numbered
23818 	 register, and the value from the higher address is put into the
23819 	 higher numbered register, the load will work regardless of whether
23820 	 the value being loaded is big-wordian or little-wordian.  The
23821 	 order of the two register loads can matter however, if the address
23822 	 of the memory location is actually held in one of the registers
23823 	 being overwritten by the load.
23824 
23825 	 The 'Q' and 'R' constraints are also available for 64-bit
23826 	 constants.  */
23827     case 'Q':
23828       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23829 	{
23830 	  rtx part = gen_lowpart (SImode, x);
23831 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23832 	  return;
23833 	}
23834 
23835       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23836 	{
23837 	  output_operand_lossage ("invalid operand for code '%c'", code);
23838 	  return;
23839 	}
23840 
23841       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
23842       return;
23843 
23844     case 'R':
23845       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23846 	{
23847 	  machine_mode mode = GET_MODE (x);
23848 	  rtx part;
23849 
23850 	  if (mode == VOIDmode)
23851 	    mode = DImode;
23852 	  part = gen_highpart_mode (SImode, mode, x);
23853 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23854 	  return;
23855 	}
23856 
23857       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23858 	{
23859 	  output_operand_lossage ("invalid operand for code '%c'", code);
23860 	  return;
23861 	}
23862 
23863       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
23864       return;
23865 
23866     case 'H':
23867       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23868 	{
23869 	  output_operand_lossage ("invalid operand for code '%c'", code);
23870 	  return;
23871 	}
23872 
23873       asm_fprintf (stream, "%r", REGNO (x) + 1);
23874       return;
23875 
23876     case 'J':
23877       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23878 	{
23879 	  output_operand_lossage ("invalid operand for code '%c'", code);
23880 	  return;
23881 	}
23882 
23883       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
23884       return;
23885 
23886     case 'K':
23887       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23888 	{
23889 	  output_operand_lossage ("invalid operand for code '%c'", code);
23890 	  return;
23891 	}
23892 
23893       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
23894       return;
23895 
23896     case 'm':
23897       asm_fprintf (stream, "%r",
23898 		   REG_P (XEXP (x, 0))
23899 		   ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
23900       return;
23901 
23902     case 'M':
23903       asm_fprintf (stream, "{%r-%r}",
23904 		   REGNO (x),
23905 		   REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
23906       return;
23907 
23908     /* Like 'M', but writing doubleword vector registers, for use by Neon
23909        insns.  */
23910     case 'h':
23911       {
23912         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
23913         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
23914         if (numregs == 1)
23915           asm_fprintf (stream, "{d%d}", regno);
23916         else
23917           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
23918       }
23919       return;
23920 
23921     case 'd':
23922       /* CONST_TRUE_RTX means always -- that's the default.  */
23923       if (x == const_true_rtx)
23924 	return;
23925 
23926       if (!COMPARISON_P (x))
23927 	{
23928 	  output_operand_lossage ("invalid operand for code '%c'", code);
23929 	  return;
23930 	}
23931 
23932       fputs (arm_condition_codes[get_arm_condition_code (x)],
23933 	     stream);
23934       return;
23935 
23936     case 'D':
23937       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
23938 	 want to do that.  */
23939       if (x == const_true_rtx)
23940 	{
23941 	  output_operand_lossage ("instruction never executed");
23942 	  return;
23943 	}
23944       if (!COMPARISON_P (x))
23945 	{
23946 	  output_operand_lossage ("invalid operand for code '%c'", code);
23947 	  return;
23948 	}
23949 
23950       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
23951 				 (get_arm_condition_code (x))],
23952 	     stream);
23953       return;
23954 
23955     case 's':
23956     case 'V':
23957     case 'W':
23958     case 'X':
23959     case 'Y':
23960     case 'Z':
23961       /* Former Maverick support, removed after GCC-4.7.  */
23962       output_operand_lossage ("obsolete Maverick format code '%c'", code);
23963       return;
23964 
23965     case 'U':
23966       if (!REG_P (x)
23967 	  || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
23968 	  || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
23969 	/* Bad value for wCG register number.  */
23970 	{
23971 	  output_operand_lossage ("invalid operand for code '%c'", code);
23972 	  return;
23973 	}
23974 
23975       else
23976 	fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
23977       return;
23978 
23979       /* Print an iWMMXt control register name.  */
23980     case 'w':
23981       if (!CONST_INT_P (x)
23982 	  || INTVAL (x) < 0
23983 	  || INTVAL (x) >= 16)
23984 	/* Bad value for wC register number.  */
23985 	{
23986 	  output_operand_lossage ("invalid operand for code '%c'", code);
23987 	  return;
23988 	}
23989 
23990       else
23991 	{
23992 	  static const char * wc_reg_names [16] =
23993 	    {
23994 	      "wCID",  "wCon",  "wCSSF", "wCASF",
23995 	      "wC4",   "wC5",   "wC6",   "wC7",
23996 	      "wCGR0", "wCGR1", "wCGR2", "wCGR3",
23997 	      "wC12",  "wC13",  "wC14",  "wC15"
23998 	    };
23999 
24000 	  fputs (wc_reg_names [INTVAL (x)], stream);
24001 	}
24002       return;
24003 
24004     /* Print the high single-precision register of a VFP double-precision
24005        register.  */
24006     case 'p':
24007       {
24008         machine_mode mode = GET_MODE (x);
24009         int regno;
24010 
24011         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24012           {
24013 	    output_operand_lossage ("invalid operand for code '%c'", code);
24014 	    return;
24015           }
24016 
24017         regno = REGNO (x);
24018         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24019           {
24020 	    output_operand_lossage ("invalid operand for code '%c'", code);
24021 	    return;
24022           }
24023 
24024 	fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24025       }
24026       return;
24027 
24028     /* Print a VFP/Neon double precision or quad precision register name.  */
24029     case 'P':
24030     case 'q':
24031       {
24032 	machine_mode mode = GET_MODE (x);
24033 	int is_quad = (code == 'q');
24034 	int regno;
24035 
24036 	if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24037 	  {
24038 	    output_operand_lossage ("invalid operand for code '%c'", code);
24039 	    return;
24040 	  }
24041 
24042 	if (!REG_P (x)
24043 	    || !IS_VFP_REGNUM (REGNO (x)))
24044 	  {
24045 	    output_operand_lossage ("invalid operand for code '%c'", code);
24046 	    return;
24047 	  }
24048 
24049 	regno = REGNO (x);
24050 	if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24051             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24052 	  {
24053 	    output_operand_lossage ("invalid operand for code '%c'", code);
24054 	    return;
24055 	  }
24056 
24057 	fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24058 	  (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24059       }
24060       return;
24061 
24062     /* These two codes print the low/high doubleword register of a Neon quad
24063        register, respectively.  For pair-structure types, can also print
24064        low/high quadword registers.  */
24065     case 'e':
24066     case 'f':
24067       {
24068         machine_mode mode = GET_MODE (x);
24069         int regno;
24070 
24071         if ((GET_MODE_SIZE (mode) != 16
24072 	     && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24073           {
24074 	    output_operand_lossage ("invalid operand for code '%c'", code);
24075 	    return;
24076           }
24077 
24078         regno = REGNO (x);
24079         if (!NEON_REGNO_OK_FOR_QUAD (regno))
24080           {
24081 	    output_operand_lossage ("invalid operand for code '%c'", code);
24082 	    return;
24083           }
24084 
24085         if (GET_MODE_SIZE (mode) == 16)
24086           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24087 				  + (code == 'f' ? 1 : 0));
24088         else
24089           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24090 				  + (code == 'f' ? 1 : 0));
24091       }
24092       return;
24093 
24094     /* Print a VFPv3 floating-point constant, represented as an integer
24095        index.  */
24096     case 'G':
24097       {
24098         int index = vfp3_const_double_index (x);
24099 	gcc_assert (index != -1);
24100 	fprintf (stream, "%d", index);
24101       }
24102       return;
24103 
24104     /* Print bits representing opcode features for Neon.
24105 
24106        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
24107        and polynomials as unsigned.
24108 
24109        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24110 
24111        Bit 2 is 1 for rounding functions, 0 otherwise.  */
24112 
24113     /* Identify the type as 's', 'u', 'p' or 'f'.  */
24114     case 'T':
24115       {
24116         HOST_WIDE_INT bits = INTVAL (x);
24117         fputc ("uspf"[bits & 3], stream);
24118       }
24119       return;
24120 
24121     /* Likewise, but signed and unsigned integers are both 'i'.  */
24122     case 'F':
24123       {
24124         HOST_WIDE_INT bits = INTVAL (x);
24125         fputc ("iipf"[bits & 3], stream);
24126       }
24127       return;
24128 
24129     /* As for 'T', but emit 'u' instead of 'p'.  */
24130     case 't':
24131       {
24132         HOST_WIDE_INT bits = INTVAL (x);
24133         fputc ("usuf"[bits & 3], stream);
24134       }
24135       return;
24136 
24137     /* Bit 2: rounding (vs none).  */
24138     case 'O':
24139       {
24140         HOST_WIDE_INT bits = INTVAL (x);
24141         fputs ((bits & 4) != 0 ? "r" : "", stream);
24142       }
24143       return;
24144 
24145     /* Memory operand for vld1/vst1 instruction.  */
24146     case 'A':
24147       {
24148 	rtx addr;
24149 	bool postinc = FALSE;
24150 	rtx postinc_reg = NULL;
24151 	unsigned align, memsize, align_bits;
24152 
24153 	gcc_assert (MEM_P (x));
24154 	addr = XEXP (x, 0);
24155 	if (GET_CODE (addr) == POST_INC)
24156 	  {
24157 	    postinc = 1;
24158 	    addr = XEXP (addr, 0);
24159 	  }
24160 	if (GET_CODE (addr) == POST_MODIFY)
24161 	  {
24162 	    postinc_reg = XEXP( XEXP (addr, 1), 1);
24163 	    addr = XEXP (addr, 0);
24164 	  }
24165 	asm_fprintf (stream, "[%r", REGNO (addr));
24166 
24167 	/* We know the alignment of this access, so we can emit a hint in the
24168 	   instruction (for some alignments) as an aid to the memory subsystem
24169 	   of the target.  */
24170 	align = MEM_ALIGN (x) >> 3;
24171 	memsize = MEM_SIZE (x);
24172 
24173 	/* Only certain alignment specifiers are supported by the hardware.  */
24174 	if (memsize == 32 && (align % 32) == 0)
24175 	  align_bits = 256;
24176 	else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24177 	  align_bits = 128;
24178 	else if (memsize >= 8 && (align % 8) == 0)
24179 	  align_bits = 64;
24180 	else
24181 	  align_bits = 0;
24182 
24183 	if (align_bits != 0)
24184 	  asm_fprintf (stream, ":%d", align_bits);
24185 
24186 	asm_fprintf (stream, "]");
24187 
24188 	if (postinc)
24189 	  fputs("!", stream);
24190 	if (postinc_reg)
24191 	  asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24192       }
24193       return;
24194 
24195     /* To print the memory operand with "Ux" or "Uj" constraint.  Based on the
24196        rtx_code the memory operands output looks like following.
24197        1. [Rn], #+/-<imm>
24198        2. [Rn, #+/-<imm>]!
24199        3. [Rn, #+/-<imm>]
24200        4. [Rn].  */
24201     case 'E':
24202       {
24203 	rtx addr;
24204 	rtx postinc_reg = NULL;
24205 	unsigned inc_val = 0;
24206 	enum rtx_code code;
24207 
24208 	gcc_assert (MEM_P (x));
24209 	addr = XEXP (x, 0);
24210 	code = GET_CODE (addr);
24211 	if (code == POST_INC || code == POST_DEC || code == PRE_INC
24212 	    || code  == PRE_DEC)
24213 	  {
24214 	    asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24215 	    inc_val = GET_MODE_SIZE (GET_MODE (x));
24216 	    if (code == POST_INC || code == POST_DEC)
24217 	      asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24218 					      ? "": "-", inc_val);
24219 	    else
24220 	      asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24221 					       ? "": "-", inc_val);
24222 	  }
24223 	else if (code == POST_MODIFY || code == PRE_MODIFY)
24224 	  {
24225 	    asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24226 	    postinc_reg = XEXP ( XEXP (x, 1), 1);
24227 	    if (postinc_reg && CONST_INT_P (postinc_reg))
24228 	      {
24229 		if (code == POST_MODIFY)
24230 		  asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24231 		else
24232 		  asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24233 	      }
24234 	  }
24235 	else if (code == PLUS)
24236 	  {
24237 	    rtx base = XEXP (addr, 0);
24238 	    rtx index = XEXP (addr, 1);
24239 
24240 	    gcc_assert (REG_P (base) && CONST_INT_P (index));
24241 
24242 	    HOST_WIDE_INT offset = INTVAL (index);
24243 	    asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24244 	  }
24245 	else
24246 	  {
24247 	    gcc_assert (REG_P (addr));
24248 	    asm_fprintf (stream, "[%r]",REGNO (addr));
24249 	  }
24250       }
24251       return;
24252 
24253     case 'C':
24254       {
24255 	rtx addr;
24256 
24257 	gcc_assert (MEM_P (x));
24258 	addr = XEXP (x, 0);
24259 	gcc_assert (REG_P (addr));
24260 	asm_fprintf (stream, "[%r]", REGNO (addr));
24261       }
24262       return;
24263 
24264     /* Translate an S register number into a D register number and element index.  */
24265     case 'y':
24266       {
24267         machine_mode mode = GET_MODE (x);
24268         int regno;
24269 
24270         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24271           {
24272 	    output_operand_lossage ("invalid operand for code '%c'", code);
24273 	    return;
24274           }
24275 
24276         regno = REGNO (x);
24277         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24278           {
24279 	    output_operand_lossage ("invalid operand for code '%c'", code);
24280 	    return;
24281           }
24282 
24283 	regno = regno - FIRST_VFP_REGNUM;
24284 	fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24285       }
24286       return;
24287 
24288     case 'v':
24289 	gcc_assert (CONST_DOUBLE_P (x));
24290 	int result;
24291 	result = vfp3_const_double_for_fract_bits (x);
24292 	if (result == 0)
24293 	  result = vfp3_const_double_for_bits (x);
24294 	fprintf (stream, "#%d", result);
24295 	return;
24296 
24297     /* Register specifier for vld1.16/vst1.16.  Translate the S register
24298        number into a D register number and element index.  */
24299     case 'z':
24300       {
24301         machine_mode mode = GET_MODE (x);
24302         int regno;
24303 
24304         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24305           {
24306 	    output_operand_lossage ("invalid operand for code '%c'", code);
24307 	    return;
24308           }
24309 
24310         regno = REGNO (x);
24311         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24312           {
24313 	    output_operand_lossage ("invalid operand for code '%c'", code);
24314 	    return;
24315           }
24316 
24317 	regno = regno - FIRST_VFP_REGNUM;
24318 	fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24319       }
24320       return;
24321 
24322     default:
24323       if (x == 0)
24324 	{
24325 	  output_operand_lossage ("missing operand");
24326 	  return;
24327 	}
24328 
24329       switch (GET_CODE (x))
24330 	{
24331 	case REG:
24332 	  asm_fprintf (stream, "%r", REGNO (x));
24333 	  break;
24334 
24335 	case MEM:
24336 	  output_address (GET_MODE (x), XEXP (x, 0));
24337 	  break;
24338 
24339 	case CONST_DOUBLE:
24340 	  {
24341             char fpstr[20];
24342             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24343 			      sizeof (fpstr), 0, 1);
24344             fprintf (stream, "#%s", fpstr);
24345 	  }
24346 	  break;
24347 
24348 	default:
24349 	  gcc_assert (GET_CODE (x) != NEG);
24350 	  fputc ('#', stream);
24351 	  if (GET_CODE (x) == HIGH)
24352 	    {
24353 	      fputs (":lower16:", stream);
24354 	      x = XEXP (x, 0);
24355 	    }
24356 
24357 	  output_addr_const (stream, x);
24358 	  break;
24359 	}
24360     }
24361 }
24362 
24363 /* Target hook for printing a memory address.  */
24364 static void
arm_print_operand_address(FILE * stream,machine_mode mode,rtx x)24365 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24366 {
24367   if (TARGET_32BIT)
24368     {
24369       int is_minus = GET_CODE (x) == MINUS;
24370 
24371       if (REG_P (x))
24372 	asm_fprintf (stream, "[%r]", REGNO (x));
24373       else if (GET_CODE (x) == PLUS || is_minus)
24374 	{
24375 	  rtx base = XEXP (x, 0);
24376 	  rtx index = XEXP (x, 1);
24377 	  HOST_WIDE_INT offset = 0;
24378 	  if (!REG_P (base)
24379 	      || (REG_P (index) && REGNO (index) == SP_REGNUM))
24380 	    {
24381 	      /* Ensure that BASE is a register.  */
24382 	      /* (one of them must be).  */
24383 	      /* Also ensure the SP is not used as in index register.  */
24384 	      std::swap (base, index);
24385 	    }
24386 	  switch (GET_CODE (index))
24387 	    {
24388 	    case CONST_INT:
24389 	      offset = INTVAL (index);
24390 	      if (is_minus)
24391 		offset = -offset;
24392 	      asm_fprintf (stream, "[%r, #%wd]",
24393 			   REGNO (base), offset);
24394 	      break;
24395 
24396 	    case REG:
24397 	      asm_fprintf (stream, "[%r, %s%r]",
24398 			   REGNO (base), is_minus ? "-" : "",
24399 			   REGNO (index));
24400 	      break;
24401 
24402 	    case MULT:
24403 	    case ASHIFTRT:
24404 	    case LSHIFTRT:
24405 	    case ASHIFT:
24406 	    case ROTATERT:
24407 	      {
24408 		asm_fprintf (stream, "[%r, %s%r",
24409 			     REGNO (base), is_minus ? "-" : "",
24410 			     REGNO (XEXP (index, 0)));
24411 		arm_print_operand (stream, index, 'S');
24412 		fputs ("]", stream);
24413 		break;
24414 	      }
24415 
24416 	    default:
24417 	      gcc_unreachable ();
24418 	    }
24419 	}
24420       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24421 	       || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24422 	{
24423 	  gcc_assert (REG_P (XEXP (x, 0)));
24424 
24425 	  if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24426 	    asm_fprintf (stream, "[%r, #%s%d]!",
24427 			 REGNO (XEXP (x, 0)),
24428 			 GET_CODE (x) == PRE_DEC ? "-" : "",
24429 			 GET_MODE_SIZE (mode));
24430 	  else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24431 	    asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24432 	  else
24433 	    asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24434 			 GET_CODE (x) == POST_DEC ? "-" : "",
24435 			 GET_MODE_SIZE (mode));
24436 	}
24437       else if (GET_CODE (x) == PRE_MODIFY)
24438 	{
24439 	  asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24440 	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24441 	    asm_fprintf (stream, "#%wd]!",
24442 			 INTVAL (XEXP (XEXP (x, 1), 1)));
24443 	  else
24444 	    asm_fprintf (stream, "%r]!",
24445 			 REGNO (XEXP (XEXP (x, 1), 1)));
24446 	}
24447       else if (GET_CODE (x) == POST_MODIFY)
24448 	{
24449 	  asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24450 	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24451 	    asm_fprintf (stream, "#%wd",
24452 			 INTVAL (XEXP (XEXP (x, 1), 1)));
24453 	  else
24454 	    asm_fprintf (stream, "%r",
24455 			 REGNO (XEXP (XEXP (x, 1), 1)));
24456 	}
24457       else output_addr_const (stream, x);
24458     }
24459   else
24460     {
24461       if (REG_P (x))
24462 	asm_fprintf (stream, "[%r]", REGNO (x));
24463       else if (GET_CODE (x) == POST_INC)
24464 	asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24465       else if (GET_CODE (x) == PLUS)
24466 	{
24467 	  gcc_assert (REG_P (XEXP (x, 0)));
24468 	  if (CONST_INT_P (XEXP (x, 1)))
24469 	    asm_fprintf (stream, "[%r, #%wd]",
24470 			 REGNO (XEXP (x, 0)),
24471 			 INTVAL (XEXP (x, 1)));
24472 	  else
24473 	    asm_fprintf (stream, "[%r, %r]",
24474 			 REGNO (XEXP (x, 0)),
24475 			 REGNO (XEXP (x, 1)));
24476 	}
24477       else
24478 	output_addr_const (stream, x);
24479     }
24480 }
24481 
24482 /* Target hook for indicating whether a punctuation character for
24483    TARGET_PRINT_OPERAND is valid.  */
24484 static bool
arm_print_operand_punct_valid_p(unsigned char code)24485 arm_print_operand_punct_valid_p (unsigned char code)
24486 {
24487   return (code == '@' || code == '|' || code == '.'
24488 	  || code == '(' || code == ')' || code == '#'
24489 	  || (TARGET_32BIT && (code == '?'))
24490 	  || (TARGET_THUMB2 && (code == '!'))
24491 	  || (TARGET_THUMB && (code == '_')));
24492 }
24493 
24494 /* Target hook for assembling integer objects.  The ARM version needs to
24495    handle word-sized values specially.  */
24496 static bool
arm_assemble_integer(rtx x,unsigned int size,int aligned_p)24497 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24498 {
24499   machine_mode mode;
24500 
24501   if (size == UNITS_PER_WORD && aligned_p)
24502     {
24503       fputs ("\t.word\t", asm_out_file);
24504       output_addr_const (asm_out_file, x);
24505 
24506       /* Mark symbols as position independent.  We only do this in the
24507 	 .text segment, not in the .data segment.  */
24508       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24509 	  (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
24510 	{
24511 	  /* See legitimize_pic_address for an explanation of the
24512 	     TARGET_VXWORKS_RTP check.  */
24513 	  /* References to weak symbols cannot be resolved locally:
24514 	     they may be overridden by a non-weak definition at link
24515 	     time.  */
24516 	  if (!arm_pic_data_is_text_relative
24517 	      || (GET_CODE (x) == SYMBOL_REF
24518 		  && (!SYMBOL_REF_LOCAL_P (x)
24519 		      || (SYMBOL_REF_DECL (x)
24520 			  ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24521 		      || (SYMBOL_REF_FUNCTION_P (x)
24522 			  && !arm_fdpic_local_funcdesc_p (x)))))
24523 	    {
24524 	      if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24525 		fputs ("(GOTFUNCDESC)", asm_out_file);
24526 	      else
24527 		fputs ("(GOT)", asm_out_file);
24528 	    }
24529 	  else
24530 	    {
24531 	      if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24532 		fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24533 	      else
24534 		{
24535 		  bool is_readonly;
24536 
24537 		  if (!TARGET_FDPIC
24538 		      || arm_is_segment_info_known (x, &is_readonly))
24539 		    fputs ("(GOTOFF)", asm_out_file);
24540 		  else
24541 		    fputs ("(GOT)", asm_out_file);
24542 		}
24543 	    }
24544 	}
24545 
24546       /* For FDPIC we also have to mark symbol for .data section.  */
24547       if (TARGET_FDPIC
24548 	  && !making_const_table
24549 	  && SYMBOL_REF_P (x)
24550 	  && SYMBOL_REF_FUNCTION_P (x))
24551 	fputs ("(FUNCDESC)", asm_out_file);
24552 
24553       fputc ('\n', asm_out_file);
24554       return true;
24555     }
24556 
24557   mode = GET_MODE (x);
24558 
24559   if (arm_vector_mode_supported_p (mode))
24560     {
24561       int i, units;
24562 
24563       gcc_assert (GET_CODE (x) == CONST_VECTOR);
24564 
24565       units = CONST_VECTOR_NUNITS (x);
24566       size = GET_MODE_UNIT_SIZE (mode);
24567 
24568       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24569         for (i = 0; i < units; i++)
24570 	  {
24571 	    rtx elt = CONST_VECTOR_ELT (x, i);
24572 	    assemble_integer
24573 	      (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24574 	  }
24575       else
24576         for (i = 0; i < units; i++)
24577           {
24578             rtx elt = CONST_VECTOR_ELT (x, i);
24579 	    assemble_real
24580 	      (*CONST_DOUBLE_REAL_VALUE (elt),
24581 	       as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24582 	       i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24583           }
24584 
24585       return true;
24586     }
24587 
24588   return default_assemble_integer (x, size, aligned_p);
24589 }
24590 
24591 static void
arm_elf_asm_cdtor(rtx symbol,int priority,bool is_ctor)24592 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24593 {
24594   section *s;
24595 
24596   if (!TARGET_AAPCS_BASED)
24597     {
24598       (is_ctor ?
24599        default_named_section_asm_out_constructor
24600        : default_named_section_asm_out_destructor) (symbol, priority);
24601       return;
24602     }
24603 
24604   /* Put these in the .init_array section, using a special relocation.  */
24605   if (priority != DEFAULT_INIT_PRIORITY)
24606     {
24607       char buf[18];
24608       sprintf (buf, "%s.%.5u",
24609 	       is_ctor ? ".init_array" : ".fini_array",
24610 	       priority);
24611       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24612     }
24613   else if (is_ctor)
24614     s = ctors_section;
24615   else
24616     s = dtors_section;
24617 
24618   switch_to_section (s);
24619   assemble_align (POINTER_SIZE);
24620   fputs ("\t.word\t", asm_out_file);
24621   output_addr_const (asm_out_file, symbol);
24622   fputs ("(target1)\n", asm_out_file);
24623 }
24624 
24625 /* Add a function to the list of static constructors.  */
24626 
24627 static void
arm_elf_asm_constructor(rtx symbol,int priority)24628 arm_elf_asm_constructor (rtx symbol, int priority)
24629 {
24630   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
24631 }
24632 
24633 /* Add a function to the list of static destructors.  */
24634 
24635 static void
arm_elf_asm_destructor(rtx symbol,int priority)24636 arm_elf_asm_destructor (rtx symbol, int priority)
24637 {
24638   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
24639 }
24640 
24641 /* A finite state machine takes care of noticing whether or not instructions
24642    can be conditionally executed, and thus decrease execution time and code
24643    size by deleting branch instructions.  The fsm is controlled by
24644    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
24645 
24646 /* The state of the fsm controlling condition codes are:
24647    0: normal, do nothing special
24648    1: make ASM_OUTPUT_OPCODE not output this instruction
24649    2: make ASM_OUTPUT_OPCODE not output this instruction
24650    3: make instructions conditional
24651    4: make instructions conditional
24652 
24653    State transitions (state->state by whom under condition):
24654    0 -> 1 final_prescan_insn if the `target' is a label
24655    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24656    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24657    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24658    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24659           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24660    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24661           (the target insn is arm_target_insn).
24662 
24663    If the jump clobbers the conditions then we use states 2 and 4.
24664 
24665    A similar thing can be done with conditional return insns.
24666 
24667    XXX In case the `target' is an unconditional branch, this conditionalising
24668    of the instructions always reduces code size, but not always execution
24669    time.  But then, I want to reduce the code size to somewhere near what
24670    /bin/cc produces.  */
24671 
24672 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
24673    instructions.  When a COND_EXEC instruction is seen the subsequent
24674    instructions are scanned so that multiple conditional instructions can be
24675    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
24676    specify the length and true/false mask for the IT block.  These will be
24677    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
24678 
24679 /* Returns the index of the ARM condition code string in
24680    `arm_condition_codes', or ARM_NV if the comparison is invalid.
24681    COMPARISON should be an rtx like `(eq (...) (...))'.  */
24682 
24683 enum arm_cond_code
maybe_get_arm_condition_code(rtx comparison)24684 maybe_get_arm_condition_code (rtx comparison)
24685 {
24686   machine_mode mode = GET_MODE (XEXP (comparison, 0));
24687   enum arm_cond_code code;
24688   enum rtx_code comp_code = GET_CODE (comparison);
24689 
24690   if (GET_MODE_CLASS (mode) != MODE_CC)
24691     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
24692 			   XEXP (comparison, 1));
24693 
24694   switch (mode)
24695     {
24696     case E_CC_DNEmode: code = ARM_NE; goto dominance;
24697     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
24698     case E_CC_DGEmode: code = ARM_GE; goto dominance;
24699     case E_CC_DGTmode: code = ARM_GT; goto dominance;
24700     case E_CC_DLEmode: code = ARM_LE; goto dominance;
24701     case E_CC_DLTmode: code = ARM_LT; goto dominance;
24702     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
24703     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
24704     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
24705     case E_CC_DLTUmode: code = ARM_CC;
24706 
24707     dominance:
24708       if (comp_code == EQ)
24709 	return ARM_INVERSE_CONDITION_CODE (code);
24710       if (comp_code == NE)
24711 	return code;
24712       return ARM_NV;
24713 
24714     case E_CC_NZmode:
24715       switch (comp_code)
24716 	{
24717 	case NE: return ARM_NE;
24718 	case EQ: return ARM_EQ;
24719 	case GE: return ARM_PL;
24720 	case LT: return ARM_MI;
24721 	default: return ARM_NV;
24722 	}
24723 
24724     case E_CC_Zmode:
24725       switch (comp_code)
24726 	{
24727 	case NE: return ARM_NE;
24728 	case EQ: return ARM_EQ;
24729 	default: return ARM_NV;
24730 	}
24731 
24732     case E_CC_Nmode:
24733       switch (comp_code)
24734 	{
24735 	case NE: return ARM_MI;
24736 	case EQ: return ARM_PL;
24737 	default: return ARM_NV;
24738 	}
24739 
24740     case E_CCFPEmode:
24741     case E_CCFPmode:
24742       /* We can handle all cases except UNEQ and LTGT.  */
24743       switch (comp_code)
24744 	{
24745 	case GE: return ARM_GE;
24746 	case GT: return ARM_GT;
24747 	case LE: return ARM_LS;
24748 	case LT: return ARM_MI;
24749 	case NE: return ARM_NE;
24750 	case EQ: return ARM_EQ;
24751 	case ORDERED: return ARM_VC;
24752 	case UNORDERED: return ARM_VS;
24753 	case UNLT: return ARM_LT;
24754 	case UNLE: return ARM_LE;
24755 	case UNGT: return ARM_HI;
24756 	case UNGE: return ARM_PL;
24757 	  /* UNEQ and LTGT do not have a representation.  */
24758 	case UNEQ: /* Fall through.  */
24759 	case LTGT: /* Fall through.  */
24760 	default: return ARM_NV;
24761 	}
24762 
24763     case E_CC_SWPmode:
24764       switch (comp_code)
24765 	{
24766 	case NE: return ARM_NE;
24767 	case EQ: return ARM_EQ;
24768 	case GE: return ARM_LE;
24769 	case GT: return ARM_LT;
24770 	case LE: return ARM_GE;
24771 	case LT: return ARM_GT;
24772 	case GEU: return ARM_LS;
24773 	case GTU: return ARM_CC;
24774 	case LEU: return ARM_CS;
24775 	case LTU: return ARM_HI;
24776 	default: return ARM_NV;
24777 	}
24778 
24779     case E_CC_Cmode:
24780       switch (comp_code)
24781 	{
24782 	case LTU: return ARM_CS;
24783 	case GEU: return ARM_CC;
24784 	default: return ARM_NV;
24785 	}
24786 
24787     case E_CC_NVmode:
24788       switch (comp_code)
24789 	{
24790 	case GE: return ARM_GE;
24791 	case LT: return ARM_LT;
24792 	default: return ARM_NV;
24793 	}
24794 
24795     case E_CC_Bmode:
24796       switch (comp_code)
24797 	{
24798 	case GEU: return ARM_CS;
24799 	case LTU: return ARM_CC;
24800 	default: return ARM_NV;
24801 	}
24802 
24803     case E_CC_Vmode:
24804       switch (comp_code)
24805 	{
24806 	case NE: return ARM_VS;
24807 	case EQ: return ARM_VC;
24808 	default: return ARM_NV;
24809 	}
24810 
24811     case E_CC_ADCmode:
24812       switch (comp_code)
24813 	{
24814 	case GEU: return ARM_CS;
24815 	case LTU: return ARM_CC;
24816 	default: return ARM_NV;
24817 	}
24818 
24819     case E_CCmode:
24820     case E_CC_RSBmode:
24821       switch (comp_code)
24822 	{
24823 	case NE: return ARM_NE;
24824 	case EQ: return ARM_EQ;
24825 	case GE: return ARM_GE;
24826 	case GT: return ARM_GT;
24827 	case LE: return ARM_LE;
24828 	case LT: return ARM_LT;
24829 	case GEU: return ARM_CS;
24830 	case GTU: return ARM_HI;
24831 	case LEU: return ARM_LS;
24832 	case LTU: return ARM_CC;
24833 	default: return ARM_NV;
24834 	}
24835 
24836     default: gcc_unreachable ();
24837     }
24838 }
24839 
24840 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
24841 static enum arm_cond_code
get_arm_condition_code(rtx comparison)24842 get_arm_condition_code (rtx comparison)
24843 {
24844   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
24845   gcc_assert (code != ARM_NV);
24846   return code;
24847 }
24848 
24849 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
24850    code registers when not targetting Thumb1.  The VFP condition register
24851    only exists when generating hard-float code.  */
24852 static bool
arm_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)24853 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
24854 {
24855   if (!TARGET_32BIT)
24856     return false;
24857 
24858   *p1 = CC_REGNUM;
24859   *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
24860   return true;
24861 }
24862 
24863 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
24864    instructions.  */
24865 void
thumb2_final_prescan_insn(rtx_insn * insn)24866 thumb2_final_prescan_insn (rtx_insn *insn)
24867 {
24868   rtx_insn *first_insn = insn;
24869   rtx body = PATTERN (insn);
24870   rtx predicate;
24871   enum arm_cond_code code;
24872   int n;
24873   int mask;
24874   int max;
24875 
24876   /* max_insns_skipped in the tune was already taken into account in the
24877      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
24878      just emit the IT blocks as we can.  It does not make sense to split
24879      the IT blocks.  */
24880   max = MAX_INSN_PER_IT_BLOCK;
24881 
24882   /* Remove the previous insn from the count of insns to be output.  */
24883   if (arm_condexec_count)
24884       arm_condexec_count--;
24885 
24886   /* Nothing to do if we are already inside a conditional block.  */
24887   if (arm_condexec_count)
24888     return;
24889 
24890   if (GET_CODE (body) != COND_EXEC)
24891     return;
24892 
24893   /* Conditional jumps are implemented directly.  */
24894   if (JUMP_P (insn))
24895     return;
24896 
24897   predicate = COND_EXEC_TEST (body);
24898   arm_current_cc = get_arm_condition_code (predicate);
24899 
24900   n = get_attr_ce_count (insn);
24901   arm_condexec_count = 1;
24902   arm_condexec_mask = (1 << n) - 1;
24903   arm_condexec_masklen = n;
24904   /* See if subsequent instructions can be combined into the same block.  */
24905   for (;;)
24906     {
24907       insn = next_nonnote_insn (insn);
24908 
24909       /* Jumping into the middle of an IT block is illegal, so a label or
24910          barrier terminates the block.  */
24911       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
24912 	break;
24913 
24914       body = PATTERN (insn);
24915       /* USE and CLOBBER aren't really insns, so just skip them.  */
24916       if (GET_CODE (body) == USE
24917 	  || GET_CODE (body) == CLOBBER)
24918 	continue;
24919 
24920       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
24921       if (GET_CODE (body) != COND_EXEC)
24922 	break;
24923       /* Maximum number of conditionally executed instructions in a block.  */
24924       n = get_attr_ce_count (insn);
24925       if (arm_condexec_masklen + n > max)
24926 	break;
24927 
24928       predicate = COND_EXEC_TEST (body);
24929       code = get_arm_condition_code (predicate);
24930       mask = (1 << n) - 1;
24931       if (arm_current_cc == code)
24932 	arm_condexec_mask |= (mask << arm_condexec_masklen);
24933       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
24934 	break;
24935 
24936       arm_condexec_count++;
24937       arm_condexec_masklen += n;
24938 
24939       /* A jump must be the last instruction in a conditional block.  */
24940       if (JUMP_P (insn))
24941 	break;
24942     }
24943   /* Restore recog_data (getting the attributes of other insns can
24944      destroy this array, but final.c assumes that it remains intact
24945      across this call).  */
24946   extract_constrain_insn_cached (first_insn);
24947 }
24948 
24949 void
arm_final_prescan_insn(rtx_insn * insn)24950 arm_final_prescan_insn (rtx_insn *insn)
24951 {
24952   /* BODY will hold the body of INSN.  */
24953   rtx body = PATTERN (insn);
24954 
24955   /* This will be 1 if trying to repeat the trick, and things need to be
24956      reversed if it appears to fail.  */
24957   int reverse = 0;
24958 
24959   /* If we start with a return insn, we only succeed if we find another one.  */
24960   int seeking_return = 0;
24961   enum rtx_code return_code = UNKNOWN;
24962 
24963   /* START_INSN will hold the insn from where we start looking.  This is the
24964      first insn after the following code_label if REVERSE is true.  */
24965   rtx_insn *start_insn = insn;
24966 
24967   /* If in state 4, check if the target branch is reached, in order to
24968      change back to state 0.  */
24969   if (arm_ccfsm_state == 4)
24970     {
24971       if (insn == arm_target_insn)
24972 	{
24973 	  arm_target_insn = NULL;
24974 	  arm_ccfsm_state = 0;
24975 	}
24976       return;
24977     }
24978 
24979   /* If in state 3, it is possible to repeat the trick, if this insn is an
24980      unconditional branch to a label, and immediately following this branch
24981      is the previous target label which is only used once, and the label this
24982      branch jumps to is not too far off.  */
24983   if (arm_ccfsm_state == 3)
24984     {
24985       if (simplejump_p (insn))
24986 	{
24987 	  start_insn = next_nonnote_insn (start_insn);
24988 	  if (BARRIER_P (start_insn))
24989 	    {
24990 	      /* XXX Isn't this always a barrier?  */
24991 	      start_insn = next_nonnote_insn (start_insn);
24992 	    }
24993 	  if (LABEL_P (start_insn)
24994 	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
24995 	      && LABEL_NUSES (start_insn) == 1)
24996 	    reverse = TRUE;
24997 	  else
24998 	    return;
24999 	}
25000       else if (ANY_RETURN_P (body))
25001         {
25002 	  start_insn = next_nonnote_insn (start_insn);
25003 	  if (BARRIER_P (start_insn))
25004 	    start_insn = next_nonnote_insn (start_insn);
25005 	  if (LABEL_P (start_insn)
25006 	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25007 	      && LABEL_NUSES (start_insn) == 1)
25008 	    {
25009 	      reverse = TRUE;
25010 	      seeking_return = 1;
25011 	      return_code = GET_CODE (body);
25012 	    }
25013 	  else
25014 	    return;
25015         }
25016       else
25017 	return;
25018     }
25019 
25020   gcc_assert (!arm_ccfsm_state || reverse);
25021   if (!JUMP_P (insn))
25022     return;
25023 
25024   /* This jump might be paralleled with a clobber of the condition codes
25025      the jump should always come first */
25026   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25027     body = XVECEXP (body, 0, 0);
25028 
25029   if (reverse
25030       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25031 	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25032     {
25033       int insns_skipped;
25034       int fail = FALSE, succeed = FALSE;
25035       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
25036       int then_not_else = TRUE;
25037       rtx_insn *this_insn = start_insn;
25038       rtx label = 0;
25039 
25040       /* Register the insn jumped to.  */
25041       if (reverse)
25042         {
25043 	  if (!seeking_return)
25044 	    label = XEXP (SET_SRC (body), 0);
25045         }
25046       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25047 	label = XEXP (XEXP (SET_SRC (body), 1), 0);
25048       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25049 	{
25050 	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
25051 	  then_not_else = FALSE;
25052 	}
25053       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25054 	{
25055 	  seeking_return = 1;
25056 	  return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25057 	}
25058       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25059         {
25060 	  seeking_return = 1;
25061 	  return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25062 	  then_not_else = FALSE;
25063         }
25064       else
25065 	gcc_unreachable ();
25066 
25067       /* See how many insns this branch skips, and what kind of insns.  If all
25068 	 insns are okay, and the label or unconditional branch to the same
25069 	 label is not too far away, succeed.  */
25070       for (insns_skipped = 0;
25071 	   !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25072 	{
25073 	  rtx scanbody;
25074 
25075 	  this_insn = next_nonnote_insn (this_insn);
25076 	  if (!this_insn)
25077 	    break;
25078 
25079 	  switch (GET_CODE (this_insn))
25080 	    {
25081 	    case CODE_LABEL:
25082 	      /* Succeed if it is the target label, otherwise fail since
25083 		 control falls in from somewhere else.  */
25084 	      if (this_insn == label)
25085 		{
25086 		  arm_ccfsm_state = 1;
25087 		  succeed = TRUE;
25088 		}
25089 	      else
25090 		fail = TRUE;
25091 	      break;
25092 
25093 	    case BARRIER:
25094 	      /* Succeed if the following insn is the target label.
25095 		 Otherwise fail.
25096 		 If return insns are used then the last insn in a function
25097 		 will be a barrier.  */
25098 	      this_insn = next_nonnote_insn (this_insn);
25099 	      if (this_insn && this_insn == label)
25100 		{
25101 		  arm_ccfsm_state = 1;
25102 		  succeed = TRUE;
25103 		}
25104 	      else
25105 		fail = TRUE;
25106 	      break;
25107 
25108 	    case CALL_INSN:
25109 	      /* The AAPCS says that conditional calls should not be
25110 		 used since they make interworking inefficient (the
25111 		 linker can't transform BL<cond> into BLX).  That's
25112 		 only a problem if the machine has BLX.  */
25113 	      if (arm_arch5t)
25114 		{
25115 		  fail = TRUE;
25116 		  break;
25117 		}
25118 
25119 	      /* Succeed if the following insn is the target label, or
25120 		 if the following two insns are a barrier and the
25121 		 target label.  */
25122 	      this_insn = next_nonnote_insn (this_insn);
25123 	      if (this_insn && BARRIER_P (this_insn))
25124 		this_insn = next_nonnote_insn (this_insn);
25125 
25126 	      if (this_insn && this_insn == label
25127 		  && insns_skipped < max_insns_skipped)
25128 		{
25129 		  arm_ccfsm_state = 1;
25130 		  succeed = TRUE;
25131 		}
25132 	      else
25133 		fail = TRUE;
25134 	      break;
25135 
25136 	    case JUMP_INSN:
25137       	      /* If this is an unconditional branch to the same label, succeed.
25138 		 If it is to another label, do nothing.  If it is conditional,
25139 		 fail.  */
25140 	      /* XXX Probably, the tests for SET and the PC are
25141 		 unnecessary.  */
25142 
25143 	      scanbody = PATTERN (this_insn);
25144 	      if (GET_CODE (scanbody) == SET
25145 		  && GET_CODE (SET_DEST (scanbody)) == PC)
25146 		{
25147 		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25148 		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25149 		    {
25150 		      arm_ccfsm_state = 2;
25151 		      succeed = TRUE;
25152 		    }
25153 		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25154 		    fail = TRUE;
25155 		}
25156 	      /* Fail if a conditional return is undesirable (e.g. on a
25157 		 StrongARM), but still allow this if optimizing for size.  */
25158 	      else if (GET_CODE (scanbody) == return_code
25159 		       && !use_return_insn (TRUE, NULL)
25160 		       && !optimize_size)
25161 		fail = TRUE;
25162 	      else if (GET_CODE (scanbody) == return_code)
25163 	        {
25164 		  arm_ccfsm_state = 2;
25165 		  succeed = TRUE;
25166 	        }
25167 	      else if (GET_CODE (scanbody) == PARALLEL)
25168 	        {
25169 		  switch (get_attr_conds (this_insn))
25170 		    {
25171 		    case CONDS_NOCOND:
25172 		      break;
25173 		    default:
25174 		      fail = TRUE;
25175 		      break;
25176 		    }
25177 		}
25178 	      else
25179 		fail = TRUE;	/* Unrecognized jump (e.g. epilogue).  */
25180 
25181 	      break;
25182 
25183 	    case INSN:
25184 	      /* Instructions using or affecting the condition codes make it
25185 		 fail.  */
25186 	      scanbody = PATTERN (this_insn);
25187 	      if (!(GET_CODE (scanbody) == SET
25188 		    || GET_CODE (scanbody) == PARALLEL)
25189 		  || get_attr_conds (this_insn) != CONDS_NOCOND)
25190 		fail = TRUE;
25191 	      break;
25192 
25193 	    default:
25194 	      break;
25195 	    }
25196 	}
25197       if (succeed)
25198 	{
25199 	  if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25200 	    arm_target_label = CODE_LABEL_NUMBER (label);
25201 	  else
25202 	    {
25203 	      gcc_assert (seeking_return || arm_ccfsm_state == 2);
25204 
25205 	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25206 	        {
25207 		  this_insn = next_nonnote_insn (this_insn);
25208 		  gcc_assert (!this_insn
25209 			      || (!BARRIER_P (this_insn)
25210 				  && !LABEL_P (this_insn)));
25211 	        }
25212 	      if (!this_insn)
25213 	        {
25214 		  /* Oh, dear! we ran off the end.. give up.  */
25215 		  extract_constrain_insn_cached (insn);
25216 		  arm_ccfsm_state = 0;
25217 		  arm_target_insn = NULL;
25218 		  return;
25219 	        }
25220 	      arm_target_insn = this_insn;
25221 	    }
25222 
25223 	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25224 	     what it was.  */
25225 	  if (!reverse)
25226 	    arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25227 
25228 	  if (reverse || then_not_else)
25229 	    arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25230 	}
25231 
25232       /* Restore recog_data (getting the attributes of other insns can
25233 	 destroy this array, but final.c assumes that it remains intact
25234 	 across this call.  */
25235       extract_constrain_insn_cached (insn);
25236     }
25237 }
25238 
25239 /* Output IT instructions.  */
25240 void
thumb2_asm_output_opcode(FILE * stream)25241 thumb2_asm_output_opcode (FILE * stream)
25242 {
25243   char buff[5];
25244   int n;
25245 
25246   if (arm_condexec_mask)
25247     {
25248       for (n = 0; n < arm_condexec_masklen; n++)
25249 	buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25250       buff[n] = 0;
25251       asm_fprintf(stream, "i%s\t%s\n\t", buff,
25252 		  arm_condition_codes[arm_current_cc]);
25253       arm_condexec_mask = 0;
25254     }
25255 }
25256 
25257 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
25258    UNITS_PER_WORD bytes wide.  */
25259 static unsigned int
arm_hard_regno_nregs(unsigned int regno,machine_mode mode)25260 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25261 {
25262   if (TARGET_32BIT
25263       && regno > PC_REGNUM
25264       && regno != FRAME_POINTER_REGNUM
25265       && regno != ARG_POINTER_REGNUM
25266       && !IS_VFP_REGNUM (regno))
25267     return 1;
25268 
25269   return ARM_NUM_REGS (mode);
25270 }
25271 
25272 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
25273 static bool
arm_hard_regno_mode_ok(unsigned int regno,machine_mode mode)25274 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25275 {
25276   if (GET_MODE_CLASS (mode) == MODE_CC)
25277     return (regno == CC_REGNUM
25278 	    || (TARGET_VFP_BASE
25279 		&& regno == VFPCC_REGNUM));
25280 
25281   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25282     return false;
25283 
25284   if (IS_VPR_REGNUM (regno))
25285     return mode == HImode;
25286 
25287   if (TARGET_THUMB1)
25288     /* For the Thumb we only allow values bigger than SImode in
25289        registers 0 - 6, so that there is always a second low
25290        register available to hold the upper part of the value.
25291        We probably we ought to ensure that the register is the
25292        start of an even numbered register pair.  */
25293     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25294 
25295   if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25296     {
25297       if (mode == DFmode || mode == DImode)
25298 	return VFP_REGNO_OK_FOR_DOUBLE (regno);
25299 
25300       if (mode == HFmode || mode == BFmode || mode == HImode
25301 	  || mode == SFmode || mode == SImode)
25302 	return VFP_REGNO_OK_FOR_SINGLE (regno);
25303 
25304       if (TARGET_NEON)
25305         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25306                || (VALID_NEON_QREG_MODE (mode)
25307                    && NEON_REGNO_OK_FOR_QUAD (regno))
25308 	       || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25309 	       || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25310 	       || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25311 	       || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25312 	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25313      if (TARGET_HAVE_MVE)
25314        return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25315 	       || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25316 	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25317 
25318       return false;
25319     }
25320 
25321   if (TARGET_REALLY_IWMMXT)
25322     {
25323       if (IS_IWMMXT_GR_REGNUM (regno))
25324 	return mode == SImode;
25325 
25326       if (IS_IWMMXT_REGNUM (regno))
25327 	return VALID_IWMMXT_REG_MODE (mode);
25328     }
25329 
25330   /* We allow almost any value to be stored in the general registers.
25331      Restrict doubleword quantities to even register pairs in ARM state
25332      so that we can use ldrd. The same restriction applies for MVE
25333      in order to support Armv8.1-M Mainline instructions.
25334      Do not allow very large Neon structure  opaque modes in general
25335      registers; they would use too many.  */
25336   if (regno <= LAST_ARM_REGNUM)
25337     {
25338       if (ARM_NUM_REGS (mode) > 4)
25339 	return false;
25340 
25341       if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25342 	return true;
25343 
25344       return !((TARGET_LDRD || TARGET_CDE)
25345 	       && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25346     }
25347 
25348   if (regno == FRAME_POINTER_REGNUM
25349       || regno == ARG_POINTER_REGNUM)
25350     /* We only allow integers in the fake hard registers.  */
25351     return GET_MODE_CLASS (mode) == MODE_INT;
25352 
25353   return false;
25354 }
25355 
25356 /* Implement TARGET_MODES_TIEABLE_P.  */
25357 
25358 static bool
arm_modes_tieable_p(machine_mode mode1,machine_mode mode2)25359 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25360 {
25361   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25362     return true;
25363 
25364   /* We specifically want to allow elements of "structure" modes to
25365      be tieable to the structure.  This more general condition allows
25366      other rarer situations too.  */
25367   if ((TARGET_NEON
25368        && (VALID_NEON_DREG_MODE (mode1)
25369 	   || VALID_NEON_QREG_MODE (mode1)
25370 	   || VALID_NEON_STRUCT_MODE (mode1))
25371        && (VALID_NEON_DREG_MODE (mode2)
25372 	   || VALID_NEON_QREG_MODE (mode2)
25373 	   || VALID_NEON_STRUCT_MODE (mode2)))
25374       || (TARGET_HAVE_MVE
25375 	  && (VALID_MVE_MODE (mode1)
25376 	      || VALID_MVE_STRUCT_MODE (mode1))
25377 	  && (VALID_MVE_MODE (mode2)
25378 	      || VALID_MVE_STRUCT_MODE (mode2))))
25379     return true;
25380 
25381   return false;
25382 }
25383 
25384 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25385    not used in arm mode.  */
25386 
25387 enum reg_class
arm_regno_class(int regno)25388 arm_regno_class (int regno)
25389 {
25390   if (regno == PC_REGNUM)
25391     return NO_REGS;
25392 
25393   if (IS_VPR_REGNUM (regno))
25394     return VPR_REG;
25395 
25396   if (TARGET_THUMB1)
25397     {
25398       if (regno == STACK_POINTER_REGNUM)
25399 	return STACK_REG;
25400       if (regno == CC_REGNUM)
25401 	return CC_REG;
25402       if (regno < 8)
25403 	return LO_REGS;
25404       return HI_REGS;
25405     }
25406 
25407   if (TARGET_THUMB2 && regno < 8)
25408     return LO_REGS;
25409 
25410   if (   regno <= LAST_ARM_REGNUM
25411       || regno == FRAME_POINTER_REGNUM
25412       || regno == ARG_POINTER_REGNUM)
25413     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25414 
25415   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25416     return TARGET_THUMB2 ? CC_REG : NO_REGS;
25417 
25418   if (IS_VFP_REGNUM (regno))
25419     {
25420       if (regno <= D7_VFP_REGNUM)
25421 	return VFP_D0_D7_REGS;
25422       else if (regno <= LAST_LO_VFP_REGNUM)
25423         return VFP_LO_REGS;
25424       else
25425         return VFP_HI_REGS;
25426     }
25427 
25428   if (IS_IWMMXT_REGNUM (regno))
25429     return IWMMXT_REGS;
25430 
25431   if (IS_IWMMXT_GR_REGNUM (regno))
25432     return IWMMXT_GR_REGS;
25433 
25434   return NO_REGS;
25435 }
25436 
25437 /* Handle a special case when computing the offset
25438    of an argument from the frame pointer.  */
25439 int
arm_debugger_arg_offset(int value,rtx addr)25440 arm_debugger_arg_offset (int value, rtx addr)
25441 {
25442   rtx_insn *insn;
25443 
25444   /* We are only interested if dbxout_parms() failed to compute the offset.  */
25445   if (value != 0)
25446     return 0;
25447 
25448   /* We can only cope with the case where the address is held in a register.  */
25449   if (!REG_P (addr))
25450     return 0;
25451 
25452   /* If we are using the frame pointer to point at the argument, then
25453      an offset of 0 is correct.  */
25454   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25455     return 0;
25456 
25457   /* If we are using the stack pointer to point at the
25458      argument, then an offset of 0 is correct.  */
25459   /* ??? Check this is consistent with thumb2 frame layout.  */
25460   if ((TARGET_THUMB || !frame_pointer_needed)
25461       && REGNO (addr) == SP_REGNUM)
25462     return 0;
25463 
25464   /* Oh dear.  The argument is pointed to by a register rather
25465      than being held in a register, or being stored at a known
25466      offset from the frame pointer.  Since GDB only understands
25467      those two kinds of argument we must translate the address
25468      held in the register into an offset from the frame pointer.
25469      We do this by searching through the insns for the function
25470      looking to see where this register gets its value.  If the
25471      register is initialized from the frame pointer plus an offset
25472      then we are in luck and we can continue, otherwise we give up.
25473 
25474      This code is exercised by producing debugging information
25475      for a function with arguments like this:
25476 
25477            double func (double a, double b, int c, double d) {return d;}
25478 
25479      Without this code the stab for parameter 'd' will be set to
25480      an offset of 0 from the frame pointer, rather than 8.  */
25481 
25482   /* The if() statement says:
25483 
25484      If the insn is a normal instruction
25485      and if the insn is setting the value in a register
25486      and if the register being set is the register holding the address of the argument
25487      and if the address is computing by an addition
25488      that involves adding to a register
25489      which is the frame pointer
25490      a constant integer
25491 
25492      then...  */
25493 
25494   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25495     {
25496       if (   NONJUMP_INSN_P (insn)
25497 	  && GET_CODE (PATTERN (insn)) == SET
25498 	  && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25499 	  && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25500 	  && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25501 	  && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25502 	  && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25503 	     )
25504 	{
25505 	  value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25506 
25507 	  break;
25508 	}
25509     }
25510 
25511   if (value == 0)
25512     {
25513       debug_rtx (addr);
25514       warning (0, "unable to compute real location of stacked parameter");
25515       value = 8; /* XXX magic hack */
25516     }
25517 
25518   return value;
25519 }
25520 
25521 /* Implement TARGET_PROMOTED_TYPE.  */
25522 
25523 static tree
arm_promoted_type(const_tree t)25524 arm_promoted_type (const_tree t)
25525 {
25526   if (SCALAR_FLOAT_TYPE_P (t)
25527       && TYPE_PRECISION (t) == 16
25528       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25529     return float_type_node;
25530   return NULL_TREE;
25531 }
25532 
25533 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25534    This simply adds HFmode as a supported mode; even though we don't
25535    implement arithmetic on this type directly, it's supported by
25536    optabs conversions, much the way the double-word arithmetic is
25537    special-cased in the default hook.  */
25538 
25539 static bool
arm_scalar_mode_supported_p(scalar_mode mode)25540 arm_scalar_mode_supported_p (scalar_mode mode)
25541 {
25542   if (mode == HFmode)
25543     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25544   else if (ALL_FIXED_POINT_MODE_P (mode))
25545     return true;
25546   else
25547     return default_scalar_mode_supported_p (mode);
25548 }
25549 
25550 /* Set the value of FLT_EVAL_METHOD.
25551    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25552 
25553     0: evaluate all operations and constants, whose semantic type has at
25554        most the range and precision of type float, to the range and
25555        precision of float; evaluate all other operations and constants to
25556        the range and precision of the semantic type;
25557 
25558     N, where _FloatN is a supported interchange floating type
25559        evaluate all operations and constants, whose semantic type has at
25560        most the range and precision of _FloatN type, to the range and
25561        precision of the _FloatN type; evaluate all other operations and
25562        constants to the range and precision of the semantic type;
25563 
25564    If we have the ARMv8.2-A extensions then we support _Float16 in native
25565    precision, so we should set this to 16.  Otherwise, we support the type,
25566    but want to evaluate expressions in float precision, so set this to
25567    0.  */
25568 
25569 static enum flt_eval_method
arm_excess_precision(enum excess_precision_type type)25570 arm_excess_precision (enum excess_precision_type type)
25571 {
25572   switch (type)
25573     {
25574       case EXCESS_PRECISION_TYPE_FAST:
25575       case EXCESS_PRECISION_TYPE_STANDARD:
25576 	/* We can calculate either in 16-bit range and precision or
25577 	   32-bit range and precision.  Make that decision based on whether
25578 	   we have native support for the ARMv8.2-A 16-bit floating-point
25579 	   instructions or not.  */
25580 	return (TARGET_VFP_FP16INST
25581 		? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25582 		: FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25583       case EXCESS_PRECISION_TYPE_IMPLICIT:
25584 	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25585       default:
25586 	gcc_unreachable ();
25587     }
25588   return FLT_EVAL_METHOD_UNPREDICTABLE;
25589 }
25590 
25591 
25592 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
25593    _Float16 if we are using anything other than ieee format for 16-bit
25594    floating point.  Otherwise, punt to the default implementation.  */
25595 static opt_scalar_float_mode
arm_floatn_mode(int n,bool extended)25596 arm_floatn_mode (int n, bool extended)
25597 {
25598   if (!extended && n == 16)
25599     {
25600       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25601 	return HFmode;
25602       return opt_scalar_float_mode ();
25603     }
25604 
25605   return default_floatn_mode (n, extended);
25606 }
25607 
25608 
25609 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25610    not to early-clobber SRC registers in the process.
25611 
25612    We assume that the operands described by SRC and DEST represent a
25613    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
25614    number of components into which the copy has been decomposed.  */
25615 void
neon_disambiguate_copy(rtx * operands,rtx * dest,rtx * src,unsigned int count)25616 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25617 {
25618   unsigned int i;
25619 
25620   if (!reg_overlap_mentioned_p (operands[0], operands[1])
25621       || REGNO (operands[0]) < REGNO (operands[1]))
25622     {
25623       for (i = 0; i < count; i++)
25624 	{
25625 	  operands[2 * i] = dest[i];
25626 	  operands[2 * i + 1] = src[i];
25627 	}
25628     }
25629   else
25630     {
25631       for (i = 0; i < count; i++)
25632 	{
25633 	  operands[2 * i] = dest[count - i - 1];
25634 	  operands[2 * i + 1] = src[count - i - 1];
25635 	}
25636     }
25637 }
25638 
25639 /* Split operands into moves from op[1] + op[2] into op[0].  */
25640 
25641 void
neon_split_vcombine(rtx operands[3])25642 neon_split_vcombine (rtx operands[3])
25643 {
25644   unsigned int dest = REGNO (operands[0]);
25645   unsigned int src1 = REGNO (operands[1]);
25646   unsigned int src2 = REGNO (operands[2]);
25647   machine_mode halfmode = GET_MODE (operands[1]);
25648   unsigned int halfregs = REG_NREGS (operands[1]);
25649   rtx destlo, desthi;
25650 
25651   if (src1 == dest && src2 == dest + halfregs)
25652     {
25653       /* No-op move.  Can't split to nothing; emit something.  */
25654       emit_note (NOTE_INSN_DELETED);
25655       return;
25656     }
25657 
25658   /* Preserve register attributes for variable tracking.  */
25659   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25660   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25661 			       GET_MODE_SIZE (halfmode));
25662 
25663   /* Special case of reversed high/low parts.  Use VSWP.  */
25664   if (src2 == dest && src1 == dest + halfregs)
25665     {
25666       rtx x = gen_rtx_SET (destlo, operands[1]);
25667       rtx y = gen_rtx_SET (desthi, operands[2]);
25668       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25669       return;
25670     }
25671 
25672   if (!reg_overlap_mentioned_p (operands[2], destlo))
25673     {
25674       /* Try to avoid unnecessary moves if part of the result
25675 	 is in the right place already.  */
25676       if (src1 != dest)
25677 	emit_move_insn (destlo, operands[1]);
25678       if (src2 != dest + halfregs)
25679 	emit_move_insn (desthi, operands[2]);
25680     }
25681   else
25682     {
25683       if (src2 != dest + halfregs)
25684 	emit_move_insn (desthi, operands[2]);
25685       if (src1 != dest)
25686 	emit_move_insn (destlo, operands[1]);
25687     }
25688 }
25689 
25690 /* Return the number (counting from 0) of
25691    the least significant set bit in MASK.  */
25692 
25693 inline static int
number_of_first_bit_set(unsigned mask)25694 number_of_first_bit_set (unsigned mask)
25695 {
25696   return ctz_hwi (mask);
25697 }
25698 
25699 /* Like emit_multi_reg_push, but allowing for a different set of
25700    registers to be described as saved.  MASK is the set of registers
25701    to be saved; REAL_REGS is the set of registers to be described as
25702    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
25703 
25704 static rtx_insn *
thumb1_emit_multi_reg_push(unsigned long mask,unsigned long real_regs)25705 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25706 {
25707   unsigned long regno;
25708   rtx par[10], tmp, reg;
25709   rtx_insn *insn;
25710   int i, j;
25711 
25712   /* Build the parallel of the registers actually being stored.  */
25713   for (i = 0; mask; ++i, mask &= mask - 1)
25714     {
25715       regno = ctz_hwi (mask);
25716       reg = gen_rtx_REG (SImode, regno);
25717 
25718       if (i == 0)
25719 	tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25720       else
25721 	tmp = gen_rtx_USE (VOIDmode, reg);
25722 
25723       par[i] = tmp;
25724     }
25725 
25726   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25727   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25728   tmp = gen_frame_mem (BLKmode, tmp);
25729   tmp = gen_rtx_SET (tmp, par[0]);
25730   par[0] = tmp;
25731 
25732   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25733   insn = emit_insn (tmp);
25734 
25735   /* Always build the stack adjustment note for unwind info.  */
25736   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25737   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
25738   par[0] = tmp;
25739 
25740   /* Build the parallel of the registers recorded as saved for unwind.  */
25741   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25742     {
25743       regno = ctz_hwi (real_regs);
25744       reg = gen_rtx_REG (SImode, regno);
25745 
25746       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25747       tmp = gen_frame_mem (SImode, tmp);
25748       tmp = gen_rtx_SET (tmp, reg);
25749       RTX_FRAME_RELATED_P (tmp) = 1;
25750       par[j + 1] = tmp;
25751     }
25752 
25753   if (j == 0)
25754     tmp = par[0];
25755   else
25756     {
25757       RTX_FRAME_RELATED_P (par[0]) = 1;
25758       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25759     }
25760 
25761   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25762 
25763   return insn;
25764 }
25765 
25766 /* Emit code to push or pop registers to or from the stack.  F is the
25767    assembly file.  MASK is the registers to pop.  */
25768 static void
thumb_pop(FILE * f,unsigned long mask)25769 thumb_pop (FILE *f, unsigned long mask)
25770 {
25771   int regno;
25772   int lo_mask = mask & 0xFF;
25773 
25774   gcc_assert (mask);
25775 
25776   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25777     {
25778       /* Special case.  Do not generate a POP PC statement here, do it in
25779 	 thumb_exit() */
25780       thumb_exit (f, -1);
25781       return;
25782     }
25783 
25784   fprintf (f, "\tpop\t{");
25785 
25786   /* Look at the low registers first.  */
25787   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25788     {
25789       if (lo_mask & 1)
25790 	{
25791 	  asm_fprintf (f, "%r", regno);
25792 
25793 	  if ((lo_mask & ~1) != 0)
25794 	    fprintf (f, ", ");
25795 	}
25796     }
25797 
25798   if (mask & (1 << PC_REGNUM))
25799     {
25800       /* Catch popping the PC.  */
25801       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
25802 	  || IS_CMSE_ENTRY (arm_current_func_type ()))
25803 	{
25804 	  /* The PC is never poped directly, instead
25805 	     it is popped into r3 and then BX is used.  */
25806 	  fprintf (f, "}\n");
25807 
25808 	  thumb_exit (f, -1);
25809 
25810 	  return;
25811 	}
25812       else
25813 	{
25814 	  if (mask & 0xFF)
25815 	    fprintf (f, ", ");
25816 
25817 	  asm_fprintf (f, "%r", PC_REGNUM);
25818 	}
25819     }
25820 
25821   fprintf (f, "}\n");
25822 }
25823 
25824 /* Generate code to return from a thumb function.
25825    If 'reg_containing_return_addr' is -1, then the return address is
25826    actually on the stack, at the stack pointer.
25827 
25828    Note: do not forget to update length attribute of corresponding insn pattern
25829    when changing assembly output (eg. length attribute of epilogue_insns when
25830    updating Armv8-M Baseline Security Extensions register clearing
25831    sequences).  */
25832 static void
thumb_exit(FILE * f,int reg_containing_return_addr)25833 thumb_exit (FILE *f, int reg_containing_return_addr)
25834 {
25835   unsigned regs_available_for_popping;
25836   unsigned regs_to_pop;
25837   int pops_needed;
25838   unsigned available;
25839   unsigned required;
25840   machine_mode mode;
25841   int size;
25842   int restore_a4 = FALSE;
25843 
25844   /* Compute the registers we need to pop.  */
25845   regs_to_pop = 0;
25846   pops_needed = 0;
25847 
25848   if (reg_containing_return_addr == -1)
25849     {
25850       regs_to_pop |= 1 << LR_REGNUM;
25851       ++pops_needed;
25852     }
25853 
25854   if (TARGET_BACKTRACE)
25855     {
25856       /* Restore the (ARM) frame pointer and stack pointer.  */
25857       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25858       pops_needed += 2;
25859     }
25860 
25861   /* If there is nothing to pop then just emit the BX instruction and
25862      return.  */
25863   if (pops_needed == 0)
25864     {
25865       if (crtl->calls_eh_return)
25866 	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25867 
25868       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25869 	{
25870 	  /* For Armv8.1-M, this is cleared as part of the CLRM instruction
25871 	     emitted by cmse_nonsecure_entry_clear_before_return ().  */
25872 	  if (!TARGET_HAVE_FPCXT_CMSE)
25873 	    asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
25874 			 reg_containing_return_addr);
25875 	  asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
25876 	}
25877       else
25878 	asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25879       return;
25880     }
25881   /* Otherwise if we are not supporting interworking and we have not created
25882      a backtrace structure and the function was not entered in ARM mode then
25883      just pop the return address straight into the PC.  */
25884   else if (!TARGET_INTERWORK
25885 	   && !TARGET_BACKTRACE
25886 	   && !is_called_in_ARM_mode (current_function_decl)
25887 	   && !crtl->calls_eh_return
25888 	   && !IS_CMSE_ENTRY (arm_current_func_type ()))
25889     {
25890       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25891       return;
25892     }
25893 
25894   /* Find out how many of the (return) argument registers we can corrupt.  */
25895   regs_available_for_popping = 0;
25896 
25897   /* If returning via __builtin_eh_return, the bottom three registers
25898      all contain information needed for the return.  */
25899   if (crtl->calls_eh_return)
25900     size = 12;
25901   else
25902     {
25903       /* If we can deduce the registers used from the function's
25904 	 return value.  This is more reliable that examining
25905 	 df_regs_ever_live_p () because that will be set if the register is
25906 	 ever used in the function, not just if the register is used
25907 	 to hold a return value.  */
25908 
25909       if (crtl->return_rtx != 0)
25910 	mode = GET_MODE (crtl->return_rtx);
25911       else
25912 	mode = DECL_MODE (DECL_RESULT (current_function_decl));
25913 
25914       size = GET_MODE_SIZE (mode);
25915 
25916       if (size == 0)
25917 	{
25918 	  /* In a void function we can use any argument register.
25919 	     In a function that returns a structure on the stack
25920 	     we can use the second and third argument registers.  */
25921 	  if (mode == VOIDmode)
25922 	    regs_available_for_popping =
25923 	      (1 << ARG_REGISTER (1))
25924 	      | (1 << ARG_REGISTER (2))
25925 	      | (1 << ARG_REGISTER (3));
25926 	  else
25927 	    regs_available_for_popping =
25928 	      (1 << ARG_REGISTER (2))
25929 	      | (1 << ARG_REGISTER (3));
25930 	}
25931       else if (size <= 4)
25932 	regs_available_for_popping =
25933 	  (1 << ARG_REGISTER (2))
25934 	  | (1 << ARG_REGISTER (3));
25935       else if (size <= 8)
25936 	regs_available_for_popping =
25937 	  (1 << ARG_REGISTER (3));
25938     }
25939 
25940   /* Match registers to be popped with registers into which we pop them.  */
25941   for (available = regs_available_for_popping,
25942        required  = regs_to_pop;
25943        required != 0 && available != 0;
25944        available &= ~(available & - available),
25945        required  &= ~(required  & - required))
25946     -- pops_needed;
25947 
25948   /* If we have any popping registers left over, remove them.  */
25949   if (available > 0)
25950     regs_available_for_popping &= ~available;
25951 
25952   /* Otherwise if we need another popping register we can use
25953      the fourth argument register.  */
25954   else if (pops_needed)
25955     {
25956       /* If we have not found any free argument registers and
25957 	 reg a4 contains the return address, we must move it.  */
25958       if (regs_available_for_popping == 0
25959 	  && reg_containing_return_addr == LAST_ARG_REGNUM)
25960 	{
25961 	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25962 	  reg_containing_return_addr = LR_REGNUM;
25963 	}
25964       else if (size > 12)
25965 	{
25966 	  /* Register a4 is being used to hold part of the return value,
25967 	     but we have dire need of a free, low register.  */
25968 	  restore_a4 = TRUE;
25969 
25970 	  asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
25971 	}
25972 
25973       if (reg_containing_return_addr != LAST_ARG_REGNUM)
25974 	{
25975 	  /* The fourth argument register is available.  */
25976 	  regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
25977 
25978 	  --pops_needed;
25979 	}
25980     }
25981 
25982   /* Pop as many registers as we can.  */
25983   thumb_pop (f, regs_available_for_popping);
25984 
25985   /* Process the registers we popped.  */
25986   if (reg_containing_return_addr == -1)
25987     {
25988       /* The return address was popped into the lowest numbered register.  */
25989       regs_to_pop &= ~(1 << LR_REGNUM);
25990 
25991       reg_containing_return_addr =
25992 	number_of_first_bit_set (regs_available_for_popping);
25993 
25994       /* Remove this register for the mask of available registers, so that
25995          the return address will not be corrupted by further pops.  */
25996       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
25997     }
25998 
25999   /* If we popped other registers then handle them here.  */
26000   if (regs_available_for_popping)
26001     {
26002       int frame_pointer;
26003 
26004       /* Work out which register currently contains the frame pointer.  */
26005       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26006 
26007       /* Move it into the correct place.  */
26008       asm_fprintf (f, "\tmov\t%r, %r\n",
26009 		   ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26010 
26011       /* (Temporarily) remove it from the mask of popped registers.  */
26012       regs_available_for_popping &= ~(1 << frame_pointer);
26013       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26014 
26015       if (regs_available_for_popping)
26016 	{
26017 	  int stack_pointer;
26018 
26019 	  /* We popped the stack pointer as well,
26020 	     find the register that contains it.  */
26021 	  stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26022 
26023 	  /* Move it into the stack register.  */
26024 	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26025 
26026 	  /* At this point we have popped all necessary registers, so
26027 	     do not worry about restoring regs_available_for_popping
26028 	     to its correct value:
26029 
26030 	     assert (pops_needed == 0)
26031 	     assert (regs_available_for_popping == (1 << frame_pointer))
26032 	     assert (regs_to_pop == (1 << STACK_POINTER))  */
26033 	}
26034       else
26035 	{
26036 	  /* Since we have just move the popped value into the frame
26037 	     pointer, the popping register is available for reuse, and
26038 	     we know that we still have the stack pointer left to pop.  */
26039 	  regs_available_for_popping |= (1 << frame_pointer);
26040 	}
26041     }
26042 
26043   /* If we still have registers left on the stack, but we no longer have
26044      any registers into which we can pop them, then we must move the return
26045      address into the link register and make available the register that
26046      contained it.  */
26047   if (regs_available_for_popping == 0 && pops_needed > 0)
26048     {
26049       regs_available_for_popping |= 1 << reg_containing_return_addr;
26050 
26051       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26052 		   reg_containing_return_addr);
26053 
26054       reg_containing_return_addr = LR_REGNUM;
26055     }
26056 
26057   /* If we have registers left on the stack then pop some more.
26058      We know that at most we will want to pop FP and SP.  */
26059   if (pops_needed > 0)
26060     {
26061       int  popped_into;
26062       int  move_to;
26063 
26064       thumb_pop (f, regs_available_for_popping);
26065 
26066       /* We have popped either FP or SP.
26067 	 Move whichever one it is into the correct register.  */
26068       popped_into = number_of_first_bit_set (regs_available_for_popping);
26069       move_to     = number_of_first_bit_set (regs_to_pop);
26070 
26071       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26072       --pops_needed;
26073     }
26074 
26075   /* If we still have not popped everything then we must have only
26076      had one register available to us and we are now popping the SP.  */
26077   if (pops_needed > 0)
26078     {
26079       int  popped_into;
26080 
26081       thumb_pop (f, regs_available_for_popping);
26082 
26083       popped_into = number_of_first_bit_set (regs_available_for_popping);
26084 
26085       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26086       /*
26087 	assert (regs_to_pop == (1 << STACK_POINTER))
26088 	assert (pops_needed == 1)
26089       */
26090     }
26091 
26092   /* If necessary restore the a4 register.  */
26093   if (restore_a4)
26094     {
26095       if (reg_containing_return_addr != LR_REGNUM)
26096 	{
26097 	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26098 	  reg_containing_return_addr = LR_REGNUM;
26099 	}
26100 
26101       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26102     }
26103 
26104   if (crtl->calls_eh_return)
26105     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26106 
26107   /* Return to caller.  */
26108   if (IS_CMSE_ENTRY (arm_current_func_type ()))
26109     {
26110       /* This is for the cases where LR is not being used to contain the return
26111          address.  It may therefore contain information that we might not want
26112 	 to leak, hence it must be cleared.  The value in R0 will never be a
26113 	 secret at this point, so it is safe to use it, see the clearing code
26114 	 in cmse_nonsecure_entry_clear_before_return ().  */
26115       if (reg_containing_return_addr != LR_REGNUM)
26116 	asm_fprintf (f, "\tmov\tlr, r0\n");
26117 
26118       /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26119 	 by cmse_nonsecure_entry_clear_before_return ().  */
26120       if (!TARGET_HAVE_FPCXT_CMSE)
26121 	asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26122       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26123     }
26124   else
26125     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26126 }
26127 
26128 /* Scan INSN just before assembler is output for it.
26129    For Thumb-1, we track the status of the condition codes; this
26130    information is used in the cbranchsi4_insn pattern.  */
26131 void
thumb1_final_prescan_insn(rtx_insn * insn)26132 thumb1_final_prescan_insn (rtx_insn *insn)
26133 {
26134   if (flag_print_asm_name)
26135     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26136 		 INSN_ADDRESSES (INSN_UID (insn)));
26137   /* Don't overwrite the previous setter when we get to a cbranch.  */
26138   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26139     {
26140       enum attr_conds conds;
26141 
26142       if (cfun->machine->thumb1_cc_insn)
26143 	{
26144 	  if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26145 	      || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26146 	    CC_STATUS_INIT;
26147 	}
26148       conds = get_attr_conds (insn);
26149       if (conds == CONDS_SET)
26150 	{
26151 	  rtx set = single_set (insn);
26152 	  cfun->machine->thumb1_cc_insn = insn;
26153 	  cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26154 	  cfun->machine->thumb1_cc_op1 = const0_rtx;
26155 	  cfun->machine->thumb1_cc_mode = CC_NZmode;
26156 	  if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26157 	    {
26158 	      rtx src1 = XEXP (SET_SRC (set), 1);
26159 	      if (src1 == const0_rtx)
26160 		cfun->machine->thumb1_cc_mode = CCmode;
26161 	    }
26162 	  else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26163 	    {
26164 	      /* Record the src register operand instead of dest because
26165 		 cprop_hardreg pass propagates src.  */
26166 	      cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26167 	    }
26168 	}
26169       else if (conds != CONDS_NOCOND)
26170 	cfun->machine->thumb1_cc_insn = NULL_RTX;
26171     }
26172 
26173     /* Check if unexpected far jump is used.  */
26174     if (cfun->machine->lr_save_eliminated
26175         && get_attr_far_jump (insn) == FAR_JUMP_YES)
26176       internal_error("Unexpected thumb1 far jump");
26177 }
26178 
26179 int
thumb_shiftable_const(unsigned HOST_WIDE_INT val)26180 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26181 {
26182   unsigned HOST_WIDE_INT mask = 0xff;
26183   int i;
26184 
26185   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26186   if (val == 0) /* XXX */
26187     return 0;
26188 
26189   for (i = 0; i < 25; i++)
26190     if ((val & (mask << i)) == val)
26191       return 1;
26192 
26193   return 0;
26194 }
26195 
26196 /* Returns nonzero if the current function contains,
26197    or might contain a far jump.  */
26198 static int
thumb_far_jump_used_p(void)26199 thumb_far_jump_used_p (void)
26200 {
26201   rtx_insn *insn;
26202   bool far_jump = false;
26203   unsigned int func_size = 0;
26204 
26205   /* If we have already decided that far jumps may be used,
26206      do not bother checking again, and always return true even if
26207      it turns out that they are not being used.  Once we have made
26208      the decision that far jumps are present (and that hence the link
26209      register will be pushed onto the stack) we cannot go back on it.  */
26210   if (cfun->machine->far_jump_used)
26211     return 1;
26212 
26213   /* If this function is not being called from the prologue/epilogue
26214      generation code then it must be being called from the
26215      INITIAL_ELIMINATION_OFFSET macro.  */
26216   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26217     {
26218       /* In this case we know that we are being asked about the elimination
26219 	 of the arg pointer register.  If that register is not being used,
26220 	 then there are no arguments on the stack, and we do not have to
26221 	 worry that a far jump might force the prologue to push the link
26222 	 register, changing the stack offsets.  In this case we can just
26223 	 return false, since the presence of far jumps in the function will
26224 	 not affect stack offsets.
26225 
26226 	 If the arg pointer is live (or if it was live, but has now been
26227 	 eliminated and so set to dead) then we do have to test to see if
26228 	 the function might contain a far jump.  This test can lead to some
26229 	 false negatives, since before reload is completed, then length of
26230 	 branch instructions is not known, so gcc defaults to returning their
26231 	 longest length, which in turn sets the far jump attribute to true.
26232 
26233 	 A false negative will not result in bad code being generated, but it
26234 	 will result in a needless push and pop of the link register.  We
26235 	 hope that this does not occur too often.
26236 
26237 	 If we need doubleword stack alignment this could affect the other
26238 	 elimination offsets so we can't risk getting it wrong.  */
26239       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26240 	cfun->machine->arg_pointer_live = 1;
26241       else if (!cfun->machine->arg_pointer_live)
26242 	return 0;
26243     }
26244 
26245   /* We should not change far_jump_used during or after reload, as there is
26246      no chance to change stack frame layout.  */
26247   if (reload_in_progress || reload_completed)
26248     return 0;
26249 
26250   /* Check to see if the function contains a branch
26251      insn with the far jump attribute set.  */
26252   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26253     {
26254       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26255 	{
26256 	  far_jump = true;
26257 	}
26258       func_size += get_attr_length (insn);
26259     }
26260 
26261   /* Attribute far_jump will always be true for thumb1 before
26262      shorten_branch pass.  So checking far_jump attribute before
26263      shorten_branch isn't much useful.
26264 
26265      Following heuristic tries to estimate more accurately if a far jump
26266      may finally be used.  The heuristic is very conservative as there is
26267      no chance to roll-back the decision of not to use far jump.
26268 
26269      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
26270      2-byte insn is associated with a 4 byte constant pool.  Using
26271      function size 2048/3 as the threshold is conservative enough.  */
26272   if (far_jump)
26273     {
26274       if ((func_size * 3) >= 2048)
26275         {
26276 	  /* Record the fact that we have decided that
26277 	     the function does use far jumps.  */
26278 	  cfun->machine->far_jump_used = 1;
26279 	  return 1;
26280 	}
26281     }
26282 
26283   return 0;
26284 }
26285 
26286 /* Return nonzero if FUNC must be entered in ARM mode.  */
26287 static bool
is_called_in_ARM_mode(tree func)26288 is_called_in_ARM_mode (tree func)
26289 {
26290   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26291 
26292   /* Ignore the problem about functions whose address is taken.  */
26293   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26294     return true;
26295 
26296 #ifdef ARM_PE
26297   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26298 #else
26299   return false;
26300 #endif
26301 }
26302 
26303 /* Given the stack offsets and register mask in OFFSETS, decide how
26304    many additional registers to push instead of subtracting a constant
26305    from SP.  For epilogues the principle is the same except we use pop.
26306    FOR_PROLOGUE indicates which we're generating.  */
26307 static int
thumb1_extra_regs_pushed(arm_stack_offsets * offsets,bool for_prologue)26308 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26309 {
26310   HOST_WIDE_INT amount;
26311   unsigned long live_regs_mask = offsets->saved_regs_mask;
26312   /* Extract a mask of the ones we can give to the Thumb's push/pop
26313      instruction.  */
26314   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26315   /* Then count how many other high registers will need to be pushed.  */
26316   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26317   int n_free, reg_base, size;
26318 
26319   if (!for_prologue && frame_pointer_needed)
26320     amount = offsets->locals_base - offsets->saved_regs;
26321   else
26322     amount = offsets->outgoing_args - offsets->saved_regs;
26323 
26324   /* If the stack frame size is 512 exactly, we can save one load
26325      instruction, which should make this a win even when optimizing
26326      for speed.  */
26327   if (!optimize_size && amount != 512)
26328     return 0;
26329 
26330   /* Can't do this if there are high registers to push.  */
26331   if (high_regs_pushed != 0)
26332     return 0;
26333 
26334   /* Shouldn't do it in the prologue if no registers would normally
26335      be pushed at all.  In the epilogue, also allow it if we'll have
26336      a pop insn for the PC.  */
26337   if  (l_mask == 0
26338        && (for_prologue
26339 	   || TARGET_BACKTRACE
26340 	   || (live_regs_mask & 1 << LR_REGNUM) == 0
26341 	   || TARGET_INTERWORK
26342 	   || crtl->args.pretend_args_size != 0))
26343     return 0;
26344 
26345   /* Don't do this if thumb_expand_prologue wants to emit instructions
26346      between the push and the stack frame allocation.  */
26347   if (for_prologue
26348       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26349 	  || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26350     return 0;
26351 
26352   reg_base = 0;
26353   n_free = 0;
26354   if (!for_prologue)
26355     {
26356       size = arm_size_return_regs ();
26357       reg_base = ARM_NUM_INTS (size);
26358       live_regs_mask >>= reg_base;
26359     }
26360 
26361   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26362 	 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26363     {
26364       live_regs_mask >>= 1;
26365       n_free++;
26366     }
26367 
26368   if (n_free == 0)
26369     return 0;
26370   gcc_assert (amount / 4 * 4 == amount);
26371 
26372   if (amount >= 512 && (amount - n_free * 4) < 512)
26373     return (amount - 508) / 4;
26374   if (amount <= n_free * 4)
26375     return amount / 4;
26376   return 0;
26377 }
26378 
26379 /* The bits which aren't usefully expanded as rtl.  */
26380 const char *
thumb1_unexpanded_epilogue(void)26381 thumb1_unexpanded_epilogue (void)
26382 {
26383   arm_stack_offsets *offsets;
26384   int regno;
26385   unsigned long live_regs_mask = 0;
26386   int high_regs_pushed = 0;
26387   int extra_pop;
26388   int had_to_push_lr;
26389   int size;
26390 
26391   if (cfun->machine->return_used_this_function != 0)
26392     return "";
26393 
26394   if (IS_NAKED (arm_current_func_type ()))
26395     return "";
26396 
26397   offsets = arm_get_frame_offsets ();
26398   live_regs_mask = offsets->saved_regs_mask;
26399   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26400 
26401   /* If we can deduce the registers used from the function's return value.
26402      This is more reliable that examining df_regs_ever_live_p () because that
26403      will be set if the register is ever used in the function, not just if
26404      the register is used to hold a return value.  */
26405   size = arm_size_return_regs ();
26406 
26407   extra_pop = thumb1_extra_regs_pushed (offsets, false);
26408   if (extra_pop > 0)
26409     {
26410       unsigned long extra_mask = (1 << extra_pop) - 1;
26411       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26412     }
26413 
26414   /* The prolog may have pushed some high registers to use as
26415      work registers.  e.g. the testsuite file:
26416      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26417      compiles to produce:
26418 	push	{r4, r5, r6, r7, lr}
26419 	mov	r7, r9
26420 	mov	r6, r8
26421 	push	{r6, r7}
26422      as part of the prolog.  We have to undo that pushing here.  */
26423 
26424   if (high_regs_pushed)
26425     {
26426       unsigned long mask = live_regs_mask & 0xff;
26427       int next_hi_reg;
26428 
26429       mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26430 
26431       if (mask == 0)
26432 	/* Oh dear!  We have no low registers into which we can pop
26433            high registers!  */
26434 	internal_error
26435 	  ("no low registers available for popping high registers");
26436 
26437       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26438 	if (live_regs_mask & (1 << next_hi_reg))
26439 	  break;
26440 
26441       while (high_regs_pushed)
26442 	{
26443 	  /* Find lo register(s) into which the high register(s) can
26444              be popped.  */
26445 	  for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26446 	    {
26447 	      if (mask & (1 << regno))
26448 		high_regs_pushed--;
26449 	      if (high_regs_pushed == 0)
26450 		break;
26451 	    }
26452 
26453 	  if (high_regs_pushed == 0 && regno >= 0)
26454 	    mask &= ~((1 << regno) - 1);
26455 
26456 	  /* Pop the values into the low register(s).  */
26457 	  thumb_pop (asm_out_file, mask);
26458 
26459 	  /* Move the value(s) into the high registers.  */
26460 	  for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26461 	    {
26462 	      if (mask & (1 << regno))
26463 		{
26464 		  asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26465 			       regno);
26466 
26467 		  for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26468 		       next_hi_reg--)
26469 		    if (live_regs_mask & (1 << next_hi_reg))
26470 		      break;
26471 		}
26472 	    }
26473 	}
26474       live_regs_mask &= ~0x0f00;
26475     }
26476 
26477   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26478   live_regs_mask &= 0xff;
26479 
26480   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26481     {
26482       /* Pop the return address into the PC.  */
26483       if (had_to_push_lr)
26484 	live_regs_mask |= 1 << PC_REGNUM;
26485 
26486       /* Either no argument registers were pushed or a backtrace
26487 	 structure was created which includes an adjusted stack
26488 	 pointer, so just pop everything.  */
26489       if (live_regs_mask)
26490 	thumb_pop (asm_out_file, live_regs_mask);
26491 
26492       /* We have either just popped the return address into the
26493 	 PC or it is was kept in LR for the entire function.
26494 	 Note that thumb_pop has already called thumb_exit if the
26495 	 PC was in the list.  */
26496       if (!had_to_push_lr)
26497 	thumb_exit (asm_out_file, LR_REGNUM);
26498     }
26499   else
26500     {
26501       /* Pop everything but the return address.  */
26502       if (live_regs_mask)
26503 	thumb_pop (asm_out_file, live_regs_mask);
26504 
26505       if (had_to_push_lr)
26506 	{
26507 	  if (size > 12)
26508 	    {
26509 	      /* We have no free low regs, so save one.  */
26510 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26511 			   LAST_ARG_REGNUM);
26512 	    }
26513 
26514 	  /* Get the return address into a temporary register.  */
26515 	  thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26516 
26517 	  if (size > 12)
26518 	    {
26519 	      /* Move the return address to lr.  */
26520 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26521 			   LAST_ARG_REGNUM);
26522 	      /* Restore the low register.  */
26523 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26524 			   IP_REGNUM);
26525 	      regno = LR_REGNUM;
26526 	    }
26527 	  else
26528 	    regno = LAST_ARG_REGNUM;
26529 	}
26530       else
26531 	regno = LR_REGNUM;
26532 
26533       /* Remove the argument registers that were pushed onto the stack.  */
26534       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26535 		   SP_REGNUM, SP_REGNUM,
26536 		   crtl->args.pretend_args_size);
26537 
26538       thumb_exit (asm_out_file, regno);
26539     }
26540 
26541   return "";
26542 }
26543 
26544 /* Functions to save and restore machine-specific function data.  */
26545 static struct machine_function *
arm_init_machine_status(void)26546 arm_init_machine_status (void)
26547 {
26548   struct machine_function *machine;
26549   machine = ggc_cleared_alloc<machine_function> ();
26550 
26551 #if ARM_FT_UNKNOWN != 0
26552   machine->func_type = ARM_FT_UNKNOWN;
26553 #endif
26554   machine->static_chain_stack_bytes = -1;
26555   return machine;
26556 }
26557 
26558 /* Return an RTX indicating where the return address to the
26559    calling function can be found.  */
26560 rtx
arm_return_addr(int count,rtx frame ATTRIBUTE_UNUSED)26561 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26562 {
26563   if (count != 0)
26564     return NULL_RTX;
26565 
26566   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26567 }
26568 
26569 /* Do anything needed before RTL is emitted for each function.  */
26570 void
arm_init_expanders(void)26571 arm_init_expanders (void)
26572 {
26573   /* Arrange to initialize and mark the machine per-function status.  */
26574   init_machine_status = arm_init_machine_status;
26575 
26576   /* This is to stop the combine pass optimizing away the alignment
26577      adjustment of va_arg.  */
26578   /* ??? It is claimed that this should not be necessary.  */
26579   if (cfun)
26580     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26581 }
26582 
26583 /* Check that FUNC is called with a different mode.  */
26584 
26585 bool
arm_change_mode_p(tree func)26586 arm_change_mode_p (tree func)
26587 {
26588   if (TREE_CODE (func) != FUNCTION_DECL)
26589     return false;
26590 
26591   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26592 
26593   if (!callee_tree)
26594     callee_tree = target_option_default_node;
26595 
26596   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26597   int flags = callee_opts->x_target_flags;
26598 
26599   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26600 }
26601 
26602 /* Like arm_compute_initial_elimination offset.  Simpler because there
26603    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
26604    to point at the base of the local variables after static stack
26605    space for a function has been allocated.  */
26606 
26607 HOST_WIDE_INT
thumb_compute_initial_elimination_offset(unsigned int from,unsigned int to)26608 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26609 {
26610   arm_stack_offsets *offsets;
26611 
26612   offsets = arm_get_frame_offsets ();
26613 
26614   switch (from)
26615     {
26616     case ARG_POINTER_REGNUM:
26617       switch (to)
26618 	{
26619 	case STACK_POINTER_REGNUM:
26620 	  return offsets->outgoing_args - offsets->saved_args;
26621 
26622 	case FRAME_POINTER_REGNUM:
26623 	  return offsets->soft_frame - offsets->saved_args;
26624 
26625 	case ARM_HARD_FRAME_POINTER_REGNUM:
26626 	  return offsets->saved_regs - offsets->saved_args;
26627 
26628 	case THUMB_HARD_FRAME_POINTER_REGNUM:
26629 	  return offsets->locals_base - offsets->saved_args;
26630 
26631 	default:
26632 	  gcc_unreachable ();
26633 	}
26634       break;
26635 
26636     case FRAME_POINTER_REGNUM:
26637       switch (to)
26638 	{
26639 	case STACK_POINTER_REGNUM:
26640 	  return offsets->outgoing_args - offsets->soft_frame;
26641 
26642 	case ARM_HARD_FRAME_POINTER_REGNUM:
26643 	  return offsets->saved_regs - offsets->soft_frame;
26644 
26645 	case THUMB_HARD_FRAME_POINTER_REGNUM:
26646 	  return offsets->locals_base - offsets->soft_frame;
26647 
26648 	default:
26649 	  gcc_unreachable ();
26650 	}
26651       break;
26652 
26653     default:
26654       gcc_unreachable ();
26655     }
26656 }
26657 
26658 /* Generate the function's prologue.  */
26659 
26660 void
thumb1_expand_prologue(void)26661 thumb1_expand_prologue (void)
26662 {
26663   rtx_insn *insn;
26664 
26665   HOST_WIDE_INT amount;
26666   HOST_WIDE_INT size;
26667   arm_stack_offsets *offsets;
26668   unsigned long func_type;
26669   int regno;
26670   unsigned long live_regs_mask;
26671   unsigned long l_mask;
26672   unsigned high_regs_pushed = 0;
26673   bool lr_needs_saving;
26674 
26675   func_type = arm_current_func_type ();
26676 
26677   /* Naked functions don't have prologues.  */
26678   if (IS_NAKED (func_type))
26679     {
26680       if (flag_stack_usage_info)
26681 	current_function_static_stack_size = 0;
26682       return;
26683     }
26684 
26685   if (IS_INTERRUPT (func_type))
26686     {
26687       error ("interrupt Service Routines cannot be coded in Thumb mode");
26688       return;
26689     }
26690 
26691   if (is_called_in_ARM_mode (current_function_decl))
26692     emit_insn (gen_prologue_thumb1_interwork ());
26693 
26694   offsets = arm_get_frame_offsets ();
26695   live_regs_mask = offsets->saved_regs_mask;
26696   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
26697 
26698   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
26699   l_mask = live_regs_mask & 0x40ff;
26700   /* Then count how many other high registers will need to be pushed.  */
26701   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26702 
26703   if (crtl->args.pretend_args_size)
26704     {
26705       rtx x = GEN_INT (-crtl->args.pretend_args_size);
26706 
26707       if (cfun->machine->uses_anonymous_args)
26708 	{
26709 	  int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26710 	  unsigned long mask;
26711 
26712 	  mask = 1ul << (LAST_ARG_REGNUM + 1);
26713 	  mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26714 
26715 	  insn = thumb1_emit_multi_reg_push (mask, 0);
26716 	}
26717       else
26718 	{
26719 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26720 					stack_pointer_rtx, x));
26721 	}
26722       RTX_FRAME_RELATED_P (insn) = 1;
26723     }
26724 
26725   if (TARGET_BACKTRACE)
26726     {
26727       HOST_WIDE_INT offset = 0;
26728       unsigned work_register;
26729       rtx work_reg, x, arm_hfp_rtx;
26730 
26731       /* We have been asked to create a stack backtrace structure.
26732          The code looks like this:
26733 
26734 	 0   .align 2
26735 	 0   func:
26736          0     sub   SP, #16         Reserve space for 4 registers.
26737 	 2     push  {R7}            Push low registers.
26738          4     add   R7, SP, #20     Get the stack pointer before the push.
26739          6     str   R7, [SP, #8]    Store the stack pointer
26740 					(before reserving the space).
26741          8     mov   R7, PC          Get hold of the start of this code + 12.
26742         10     str   R7, [SP, #16]   Store it.
26743         12     mov   R7, FP          Get hold of the current frame pointer.
26744         14     str   R7, [SP, #4]    Store it.
26745         16     mov   R7, LR          Get hold of the current return address.
26746         18     str   R7, [SP, #12]   Store it.
26747         20     add   R7, SP, #16     Point at the start of the
26748 					backtrace structure.
26749         22     mov   FP, R7          Put this value into the frame pointer.  */
26750 
26751       work_register = thumb_find_work_register (live_regs_mask);
26752       work_reg = gen_rtx_REG (SImode, work_register);
26753       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26754 
26755       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26756 				    stack_pointer_rtx, GEN_INT (-16)));
26757       RTX_FRAME_RELATED_P (insn) = 1;
26758 
26759       if (l_mask)
26760 	{
26761 	  insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26762 	  RTX_FRAME_RELATED_P (insn) = 1;
26763 	  lr_needs_saving = false;
26764 
26765 	  offset = bit_count (l_mask) * UNITS_PER_WORD;
26766 	}
26767 
26768       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26769       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26770 
26771       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26772       x = gen_frame_mem (SImode, x);
26773       emit_move_insn (x, work_reg);
26774 
26775       /* Make sure that the instruction fetching the PC is in the right place
26776 	 to calculate "start of backtrace creation code + 12".  */
26777       /* ??? The stores using the common WORK_REG ought to be enough to
26778 	 prevent the scheduler from doing anything weird.  Failing that
26779 	 we could always move all of the following into an UNSPEC_VOLATILE.  */
26780       if (l_mask)
26781 	{
26782 	  x = gen_rtx_REG (SImode, PC_REGNUM);
26783 	  emit_move_insn (work_reg, x);
26784 
26785 	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26786 	  x = gen_frame_mem (SImode, x);
26787 	  emit_move_insn (x, work_reg);
26788 
26789 	  emit_move_insn (work_reg, arm_hfp_rtx);
26790 
26791 	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
26792 	  x = gen_frame_mem (SImode, x);
26793 	  emit_move_insn (x, work_reg);
26794 	}
26795       else
26796 	{
26797 	  emit_move_insn (work_reg, arm_hfp_rtx);
26798 
26799 	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
26800 	  x = gen_frame_mem (SImode, x);
26801 	  emit_move_insn (x, work_reg);
26802 
26803 	  x = gen_rtx_REG (SImode, PC_REGNUM);
26804 	  emit_move_insn (work_reg, x);
26805 
26806 	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26807 	  x = gen_frame_mem (SImode, x);
26808 	  emit_move_insn (x, work_reg);
26809 	}
26810 
26811       x = gen_rtx_REG (SImode, LR_REGNUM);
26812       emit_move_insn (work_reg, x);
26813 
26814       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26815       x = gen_frame_mem (SImode, x);
26816       emit_move_insn (x, work_reg);
26817 
26818       x = GEN_INT (offset + 12);
26819       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26820 
26821       emit_move_insn (arm_hfp_rtx, work_reg);
26822     }
26823   /* Optimization:  If we are not pushing any low registers but we are going
26824      to push some high registers then delay our first push.  This will just
26825      be a push of LR and we can combine it with the push of the first high
26826      register.  */
26827   else if ((l_mask & 0xff) != 0
26828 	   || (high_regs_pushed == 0 && lr_needs_saving))
26829     {
26830       unsigned long mask = l_mask;
26831       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26832       insn = thumb1_emit_multi_reg_push (mask, mask);
26833       RTX_FRAME_RELATED_P (insn) = 1;
26834       lr_needs_saving = false;
26835     }
26836 
26837   if (high_regs_pushed)
26838     {
26839       unsigned pushable_regs;
26840       unsigned next_hi_reg;
26841       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26842 						 : crtl->args.info.nregs;
26843       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26844 
26845       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26846 	if (live_regs_mask & (1 << next_hi_reg))
26847 	  break;
26848 
26849       /* Here we need to mask out registers used for passing arguments
26850 	 even if they can be pushed.  This is to avoid using them to
26851 	 stash the high registers.  Such kind of stash may clobber the
26852 	 use of arguments.  */
26853       pushable_regs = l_mask & (~arg_regs_mask);
26854       pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
26855 
26856       /* Normally, LR can be used as a scratch register once it has been
26857 	 saved; but if the function examines its own return address then
26858 	 the value is still live and we need to avoid using it.  */
26859       bool return_addr_live
26860 	= REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
26861 			   LR_REGNUM);
26862 
26863       if (lr_needs_saving || return_addr_live)
26864 	pushable_regs &= ~(1 << LR_REGNUM);
26865 
26866       if (pushable_regs == 0)
26867 	pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26868 
26869       while (high_regs_pushed > 0)
26870 	{
26871 	  unsigned long real_regs_mask = 0;
26872 	  unsigned long push_mask = 0;
26873 
26874 	  for (regno = LR_REGNUM; regno >= 0; regno --)
26875 	    {
26876 	      if (pushable_regs & (1 << regno))
26877 		{
26878 		  emit_move_insn (gen_rtx_REG (SImode, regno),
26879 				  gen_rtx_REG (SImode, next_hi_reg));
26880 
26881 		  high_regs_pushed --;
26882 		  real_regs_mask |= (1 << next_hi_reg);
26883 		  push_mask |= (1 << regno);
26884 
26885 		  if (high_regs_pushed)
26886 		    {
26887 		      for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26888 			   next_hi_reg --)
26889 			if (live_regs_mask & (1 << next_hi_reg))
26890 			  break;
26891 		    }
26892 		  else
26893 		    break;
26894 		}
26895 	    }
26896 
26897 	  /* If we had to find a work register and we have not yet
26898 	     saved the LR then add it to the list of regs to push.  */
26899 	  if (lr_needs_saving)
26900 	    {
26901 	      push_mask |= 1 << LR_REGNUM;
26902 	      real_regs_mask |= 1 << LR_REGNUM;
26903 	      lr_needs_saving = false;
26904 	      /* If the return address is not live at this point, we
26905 		 can add LR to the list of registers that we can use
26906 		 for pushes.  */
26907 	      if (!return_addr_live)
26908 		pushable_regs |= 1 << LR_REGNUM;
26909 	    }
26910 
26911 	  insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
26912 	  RTX_FRAME_RELATED_P (insn) = 1;
26913 	}
26914     }
26915 
26916   /* Load the pic register before setting the frame pointer,
26917      so we can use r7 as a temporary work register.  */
26918   if (flag_pic && arm_pic_register != INVALID_REGNUM)
26919     arm_load_pic_register (live_regs_mask, NULL_RTX);
26920 
26921   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26922     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26923 		    stack_pointer_rtx);
26924 
26925   size = offsets->outgoing_args - offsets->saved_args;
26926   if (flag_stack_usage_info)
26927     current_function_static_stack_size = size;
26928 
26929   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
26930   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
26931        || flag_stack_clash_protection)
26932       && size)
26933     sorry ("%<-fstack-check=specific%> for Thumb-1");
26934 
26935   amount = offsets->outgoing_args - offsets->saved_regs;
26936   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26937   if (amount)
26938     {
26939       if (amount < 512)
26940 	{
26941 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26942 					GEN_INT (- amount)));
26943 	  RTX_FRAME_RELATED_P (insn) = 1;
26944 	}
26945       else
26946 	{
26947 	  rtx reg, dwarf;
26948 
26949 	  /* The stack decrement is too big for an immediate value in a single
26950 	     insn.  In theory we could issue multiple subtracts, but after
26951 	     three of them it becomes more space efficient to place the full
26952 	     value in the constant pool and load into a register.  (Also the
26953 	     ARM debugger really likes to see only one stack decrement per
26954 	     function).  So instead we look for a scratch register into which
26955 	     we can load the decrement, and then we subtract this from the
26956 	     stack pointer.  Unfortunately on the thumb the only available
26957 	     scratch registers are the argument registers, and we cannot use
26958 	     these as they may hold arguments to the function.  Instead we
26959 	     attempt to locate a call preserved register which is used by this
26960 	     function.  If we can find one, then we know that it will have
26961 	     been pushed at the start of the prologue and so we can corrupt
26962 	     it now.  */
26963 	  for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26964 	    if (live_regs_mask & (1 << regno))
26965 	      break;
26966 
26967 	  gcc_assert(regno <= LAST_LO_REGNUM);
26968 
26969 	  reg = gen_rtx_REG (SImode, regno);
26970 
26971 	  emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26972 
26973 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26974 					stack_pointer_rtx, reg));
26975 
26976 	  dwarf = gen_rtx_SET (stack_pointer_rtx,
26977 			       plus_constant (Pmode, stack_pointer_rtx,
26978 					      -amount));
26979 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26980 	  RTX_FRAME_RELATED_P (insn) = 1;
26981 	}
26982     }
26983 
26984   if (frame_pointer_needed)
26985     thumb_set_frame_pointer (offsets);
26986 
26987   /* If we are profiling, make sure no instructions are scheduled before
26988      the call to mcount.  Similarly if the user has requested no
26989      scheduling in the prolog.  Similarly if we want non-call exceptions
26990      using the EABI unwinder, to prevent faulting instructions from being
26991      swapped with a stack adjustment.  */
26992   if (crtl->profile || !TARGET_SCHED_PROLOG
26993       || (arm_except_unwind_info (&global_options) == UI_TARGET
26994 	  && cfun->can_throw_non_call_exceptions))
26995     emit_insn (gen_blockage ());
26996 
26997   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26998   if (live_regs_mask & 0xff)
26999     cfun->machine->lr_save_eliminated = 0;
27000 }
27001 
27002 /* Clear caller saved registers not used to pass return values and leaked
27003    condition flags before exiting a cmse_nonsecure_entry function.  */
27004 
27005 void
cmse_nonsecure_entry_clear_before_return(void)27006 cmse_nonsecure_entry_clear_before_return (void)
27007 {
27008   bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27009   int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27010   uint32_t padding_bits_to_clear = 0;
27011   auto_sbitmap to_clear_bitmap (maxregno + 1);
27012   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27013   tree result_type;
27014 
27015   bitmap_clear (to_clear_bitmap);
27016   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27017   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27018 
27019   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27020      registers.  */
27021   if (clear_vfpregs)
27022     {
27023       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27024 
27025       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27026 
27027       if (!TARGET_HAVE_FPCXT_CMSE)
27028 	{
27029 	  /* Make sure we don't clear the two scratch registers used to clear
27030 	     the relevant FPSCR bits in output_return_instruction.  */
27031 	  emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27032 	  bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27033 	  emit_use (gen_rtx_REG (SImode, 4));
27034 	  bitmap_clear_bit (to_clear_bitmap, 4);
27035 	}
27036     }
27037 
27038   /* If the user has defined registers to be caller saved, these are no longer
27039      restored by the function before returning and must thus be cleared for
27040      security purposes.  */
27041   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27042     {
27043       /* We do not touch registers that can be used to pass arguments as per
27044 	 the AAPCS, since these should never be made callee-saved by user
27045 	 options.  */
27046       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27047 	continue;
27048       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27049 	continue;
27050       if (!callee_saved_reg_p (regno)
27051 	  && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27052 	      || TARGET_HARD_FLOAT))
27053 	bitmap_set_bit (to_clear_bitmap, regno);
27054     }
27055 
27056   /* Make sure we do not clear the registers used to return the result in.  */
27057   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27058   if (!VOID_TYPE_P (result_type))
27059     {
27060       uint64_t to_clear_return_mask;
27061       result_rtl = arm_function_value (result_type, current_function_decl, 0);
27062 
27063       /* No need to check that we return in registers, because we don't
27064 	 support returning on stack yet.  */
27065       gcc_assert (REG_P (result_rtl));
27066       to_clear_return_mask
27067 	= compute_not_to_clear_mask (result_type, result_rtl, 0,
27068 				     &padding_bits_to_clear);
27069       if (to_clear_return_mask)
27070 	{
27071 	  gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27072 	  for (regno = R0_REGNUM; regno <= maxregno; regno++)
27073 	    {
27074 	      if (to_clear_return_mask & (1ULL << regno))
27075 		bitmap_clear_bit (to_clear_bitmap, regno);
27076 	    }
27077 	}
27078     }
27079 
27080   if (padding_bits_to_clear != 0)
27081     {
27082       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27083       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27084 
27085       /* Padding_bits_to_clear is not 0 so we know we are dealing with
27086 	 returning a composite type, which only uses r0.  Let's make sure that
27087 	 r1-r3 is cleared too.  */
27088       bitmap_clear (to_clear_arg_regs_bitmap);
27089       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27090       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27091     }
27092 
27093   /* Clear full registers that leak before returning.  */
27094   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27095   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27096   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27097 			clearing_reg);
27098 }
27099 
27100 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27101    POP instruction can be generated.  LR should be replaced by PC.  All
27102    the checks required are already done by  USE_RETURN_INSN ().  Hence,
27103    all we really need to check here is if single register is to be
27104    returned, or multiple register return.  */
27105 void
thumb2_expand_return(bool simple_return)27106 thumb2_expand_return (bool simple_return)
27107 {
27108   int i, num_regs;
27109   unsigned long saved_regs_mask;
27110   arm_stack_offsets *offsets;
27111 
27112   offsets = arm_get_frame_offsets ();
27113   saved_regs_mask = offsets->saved_regs_mask;
27114 
27115   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27116     if (saved_regs_mask & (1 << i))
27117       num_regs++;
27118 
27119   if (!simple_return && saved_regs_mask)
27120     {
27121       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27122 	 functions or adapt code to handle according to ACLE.  This path should
27123 	 not be reachable for cmse_nonsecure_entry functions though we prefer
27124 	 to assert it for now to ensure that future code changes do not silently
27125 	 change this behavior.  */
27126       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27127       if (num_regs == 1)
27128         {
27129           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27130           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27131           rtx addr = gen_rtx_MEM (SImode,
27132                                   gen_rtx_POST_INC (SImode,
27133                                                     stack_pointer_rtx));
27134           set_mem_alias_set (addr, get_frame_alias_set ());
27135           XVECEXP (par, 0, 0) = ret_rtx;
27136           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27137           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27138           emit_jump_insn (par);
27139         }
27140       else
27141         {
27142           saved_regs_mask &= ~ (1 << LR_REGNUM);
27143           saved_regs_mask |=   (1 << PC_REGNUM);
27144           arm_emit_multi_reg_pop (saved_regs_mask);
27145         }
27146     }
27147   else
27148     {
27149       if (IS_CMSE_ENTRY (arm_current_func_type ()))
27150 	cmse_nonsecure_entry_clear_before_return ();
27151       emit_jump_insn (simple_return_rtx);
27152     }
27153 }
27154 
27155 void
thumb1_expand_epilogue(void)27156 thumb1_expand_epilogue (void)
27157 {
27158   HOST_WIDE_INT amount;
27159   arm_stack_offsets *offsets;
27160   int regno;
27161 
27162   /* Naked functions don't have prologues.  */
27163   if (IS_NAKED (arm_current_func_type ()))
27164     return;
27165 
27166   offsets = arm_get_frame_offsets ();
27167   amount = offsets->outgoing_args - offsets->saved_regs;
27168 
27169   if (frame_pointer_needed)
27170     {
27171       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27172       amount = offsets->locals_base - offsets->saved_regs;
27173     }
27174   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27175 
27176   gcc_assert (amount >= 0);
27177   if (amount)
27178     {
27179       emit_insn (gen_blockage ());
27180 
27181       if (amount < 512)
27182 	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27183 			       GEN_INT (amount)));
27184       else
27185 	{
27186 	  /* r3 is always free in the epilogue.  */
27187 	  rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27188 
27189 	  emit_insn (gen_movsi (reg, GEN_INT (amount)));
27190 	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27191 	}
27192     }
27193 
27194   /* Emit a USE (stack_pointer_rtx), so that
27195      the stack adjustment will not be deleted.  */
27196   emit_insn (gen_force_register_use (stack_pointer_rtx));
27197 
27198   if (crtl->profile || !TARGET_SCHED_PROLOG)
27199     emit_insn (gen_blockage ());
27200 
27201   /* Emit a clobber for each insn that will be restored in the epilogue,
27202      so that flow2 will get register lifetimes correct.  */
27203   for (regno = 0; regno < 13; regno++)
27204     if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
27205       emit_clobber (gen_rtx_REG (SImode, regno));
27206 
27207   if (! df_regs_ever_live_p (LR_REGNUM))
27208     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27209 
27210   /* Clear all caller-saved regs that are not used to return.  */
27211   if (IS_CMSE_ENTRY (arm_current_func_type ()))
27212     cmse_nonsecure_entry_clear_before_return ();
27213 }
27214 
27215 /* Epilogue code for APCS frame.  */
27216 static void
arm_expand_epilogue_apcs_frame(bool really_return)27217 arm_expand_epilogue_apcs_frame (bool really_return)
27218 {
27219   unsigned long func_type;
27220   unsigned long saved_regs_mask;
27221   int num_regs = 0;
27222   int i;
27223   int floats_from_frame = 0;
27224   arm_stack_offsets *offsets;
27225 
27226   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27227   func_type = arm_current_func_type ();
27228 
27229   /* Get frame offsets for ARM.  */
27230   offsets = arm_get_frame_offsets ();
27231   saved_regs_mask = offsets->saved_regs_mask;
27232 
27233   /* Find the offset of the floating-point save area in the frame.  */
27234   floats_from_frame
27235     = (offsets->saved_args
27236        + arm_compute_static_chain_stack_bytes ()
27237        - offsets->frame);
27238 
27239   /* Compute how many core registers saved and how far away the floats are.  */
27240   for (i = 0; i <= LAST_ARM_REGNUM; i++)
27241     if (saved_regs_mask & (1 << i))
27242       {
27243         num_regs++;
27244         floats_from_frame += 4;
27245       }
27246 
27247   if (TARGET_VFP_BASE)
27248     {
27249       int start_reg;
27250       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27251 
27252       /* The offset is from IP_REGNUM.  */
27253       int saved_size = arm_get_vfp_saved_size ();
27254       if (saved_size > 0)
27255         {
27256 	  rtx_insn *insn;
27257           floats_from_frame += saved_size;
27258           insn = emit_insn (gen_addsi3 (ip_rtx,
27259 					hard_frame_pointer_rtx,
27260 					GEN_INT (-floats_from_frame)));
27261 	  arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27262 				       ip_rtx, hard_frame_pointer_rtx);
27263         }
27264 
27265       /* Generate VFP register multi-pop.  */
27266       start_reg = FIRST_VFP_REGNUM;
27267 
27268       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27269         /* Look for a case where a reg does not need restoring.  */
27270         if ((!df_regs_ever_live_p (i) || call_used_or_fixed_reg_p (i))
27271             && (!df_regs_ever_live_p (i + 1)
27272                 || call_used_or_fixed_reg_p (i + 1)))
27273           {
27274             if (start_reg != i)
27275               arm_emit_vfp_multi_reg_pop (start_reg,
27276                                           (i - start_reg) / 2,
27277                                           gen_rtx_REG (SImode,
27278                                                        IP_REGNUM));
27279             start_reg = i + 2;
27280           }
27281 
27282       /* Restore the remaining regs that we have discovered (or possibly
27283          even all of them, if the conditional in the for loop never
27284          fired).  */
27285       if (start_reg != i)
27286         arm_emit_vfp_multi_reg_pop (start_reg,
27287                                     (i - start_reg) / 2,
27288                                     gen_rtx_REG (SImode, IP_REGNUM));
27289     }
27290 
27291   if (TARGET_IWMMXT)
27292     {
27293       /* The frame pointer is guaranteed to be non-double-word aligned, as
27294          it is set to double-word-aligned old_stack_pointer - 4.  */
27295       rtx_insn *insn;
27296       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27297 
27298       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27299         if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
27300           {
27301             rtx addr = gen_frame_mem (V2SImode,
27302                                  plus_constant (Pmode, hard_frame_pointer_rtx,
27303                                                 - lrm_count * 4));
27304             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27305             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27306                                                gen_rtx_REG (V2SImode, i),
27307                                                NULL_RTX);
27308             lrm_count += 2;
27309           }
27310     }
27311 
27312   /* saved_regs_mask should contain IP which contains old stack pointer
27313      at the time of activation creation.  Since SP and IP are adjacent registers,
27314      we can restore the value directly into SP.  */
27315   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27316   saved_regs_mask &= ~(1 << IP_REGNUM);
27317   saved_regs_mask |= (1 << SP_REGNUM);
27318 
27319   /* There are two registers left in saved_regs_mask - LR and PC.  We
27320      only need to restore LR (the return address), but to
27321      save time we can load it directly into PC, unless we need a
27322      special function exit sequence, or we are not really returning.  */
27323   if (really_return
27324       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27325       && !crtl->calls_eh_return)
27326     /* Delete LR from the register mask, so that LR on
27327        the stack is loaded into the PC in the register mask.  */
27328     saved_regs_mask &= ~(1 << LR_REGNUM);
27329   else
27330     saved_regs_mask &= ~(1 << PC_REGNUM);
27331 
27332   num_regs = bit_count (saved_regs_mask);
27333   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27334     {
27335       rtx_insn *insn;
27336       emit_insn (gen_blockage ());
27337       /* Unwind the stack to just below the saved registers.  */
27338       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27339 				    hard_frame_pointer_rtx,
27340 				    GEN_INT (- 4 * num_regs)));
27341 
27342       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27343 				   stack_pointer_rtx, hard_frame_pointer_rtx);
27344     }
27345 
27346   arm_emit_multi_reg_pop (saved_regs_mask);
27347 
27348   if (IS_INTERRUPT (func_type))
27349     {
27350       /* Interrupt handlers will have pushed the
27351          IP onto the stack, so restore it now.  */
27352       rtx_insn *insn;
27353       rtx addr = gen_rtx_MEM (SImode,
27354                               gen_rtx_POST_INC (SImode,
27355                               stack_pointer_rtx));
27356       set_mem_alias_set (addr, get_frame_alias_set ());
27357       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27358       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27359                                          gen_rtx_REG (SImode, IP_REGNUM),
27360                                          NULL_RTX);
27361     }
27362 
27363   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27364     return;
27365 
27366   if (crtl->calls_eh_return)
27367     emit_insn (gen_addsi3 (stack_pointer_rtx,
27368 			   stack_pointer_rtx,
27369 			   gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27370 
27371   if (IS_STACKALIGN (func_type))
27372     /* Restore the original stack pointer.  Before prologue, the stack was
27373        realigned and the original stack pointer saved in r0.  For details,
27374        see comment in arm_expand_prologue.  */
27375     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27376 
27377   emit_jump_insn (simple_return_rtx);
27378 }
27379 
27380 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
27381    function is not a sibcall.  */
27382 void
arm_expand_epilogue(bool really_return)27383 arm_expand_epilogue (bool really_return)
27384 {
27385   unsigned long func_type;
27386   unsigned long saved_regs_mask;
27387   int num_regs = 0;
27388   int i;
27389   int amount;
27390   arm_stack_offsets *offsets;
27391 
27392   func_type = arm_current_func_type ();
27393 
27394   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
27395      let output_return_instruction take care of instruction emission if any.  */
27396   if (IS_NAKED (func_type)
27397       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27398     {
27399       if (really_return)
27400         emit_jump_insn (simple_return_rtx);
27401       return;
27402     }
27403 
27404   /* If we are throwing an exception, then we really must be doing a
27405      return, so we can't tail-call.  */
27406   gcc_assert (!crtl->calls_eh_return || really_return);
27407 
27408   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27409     {
27410       arm_expand_epilogue_apcs_frame (really_return);
27411       return;
27412     }
27413 
27414   /* Get frame offsets for ARM.  */
27415   offsets = arm_get_frame_offsets ();
27416   saved_regs_mask = offsets->saved_regs_mask;
27417   num_regs = bit_count (saved_regs_mask);
27418 
27419   if (frame_pointer_needed)
27420     {
27421       rtx_insn *insn;
27422       /* Restore stack pointer if necessary.  */
27423       if (TARGET_ARM)
27424         {
27425           /* In ARM mode, frame pointer points to first saved register.
27426              Restore stack pointer to last saved register.  */
27427           amount = offsets->frame - offsets->saved_regs;
27428 
27429           /* Force out any pending memory operations that reference stacked data
27430              before stack de-allocation occurs.  */
27431           emit_insn (gen_blockage ());
27432 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27433 			    hard_frame_pointer_rtx,
27434 			    GEN_INT (amount)));
27435 	  arm_add_cfa_adjust_cfa_note (insn, amount,
27436 				       stack_pointer_rtx,
27437 				       hard_frame_pointer_rtx);
27438 
27439           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27440              deleted.  */
27441           emit_insn (gen_force_register_use (stack_pointer_rtx));
27442         }
27443       else
27444         {
27445           /* In Thumb-2 mode, the frame pointer points to the last saved
27446              register.  */
27447 	  amount = offsets->locals_base - offsets->saved_regs;
27448 	  if (amount)
27449 	    {
27450 	      insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27451 				hard_frame_pointer_rtx,
27452 				GEN_INT (amount)));
27453 	      arm_add_cfa_adjust_cfa_note (insn, amount,
27454 					   hard_frame_pointer_rtx,
27455 					   hard_frame_pointer_rtx);
27456 	    }
27457 
27458           /* Force out any pending memory operations that reference stacked data
27459              before stack de-allocation occurs.  */
27460           emit_insn (gen_blockage ());
27461 	  insn = emit_insn (gen_movsi (stack_pointer_rtx,
27462 				       hard_frame_pointer_rtx));
27463 	  arm_add_cfa_adjust_cfa_note (insn, 0,
27464 				       stack_pointer_rtx,
27465 				       hard_frame_pointer_rtx);
27466           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27467              deleted.  */
27468           emit_insn (gen_force_register_use (stack_pointer_rtx));
27469         }
27470     }
27471   else
27472     {
27473       /* Pop off outgoing args and local frame to adjust stack pointer to
27474          last saved register.  */
27475       amount = offsets->outgoing_args - offsets->saved_regs;
27476       if (amount)
27477         {
27478 	  rtx_insn *tmp;
27479           /* Force out any pending memory operations that reference stacked data
27480              before stack de-allocation occurs.  */
27481           emit_insn (gen_blockage ());
27482 	  tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27483 				       stack_pointer_rtx,
27484 				       GEN_INT (amount)));
27485 	  arm_add_cfa_adjust_cfa_note (tmp, amount,
27486 				       stack_pointer_rtx, stack_pointer_rtx);
27487           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27488              not deleted.  */
27489           emit_insn (gen_force_register_use (stack_pointer_rtx));
27490         }
27491     }
27492 
27493   if (TARGET_VFP_BASE)
27494     {
27495       /* Generate VFP register multi-pop.  */
27496       int end_reg = LAST_VFP_REGNUM + 1;
27497 
27498       /* Scan the registers in reverse order.  We need to match
27499          any groupings made in the prologue and generate matching
27500          vldm operations.  The need to match groups is because,
27501          unlike pop, vldm can only do consecutive regs.  */
27502       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27503         /* Look for a case where a reg does not need restoring.  */
27504         if ((!df_regs_ever_live_p (i) || call_used_or_fixed_reg_p (i))
27505             && (!df_regs_ever_live_p (i + 1)
27506                 || call_used_or_fixed_reg_p (i + 1)))
27507           {
27508             /* Restore the regs discovered so far (from reg+2 to
27509                end_reg).  */
27510             if (end_reg > i + 2)
27511               arm_emit_vfp_multi_reg_pop (i + 2,
27512                                           (end_reg - (i + 2)) / 2,
27513                                           stack_pointer_rtx);
27514             end_reg = i;
27515           }
27516 
27517       /* Restore the remaining regs that we have discovered (or possibly
27518          even all of them, if the conditional in the for loop never
27519          fired).  */
27520       if (end_reg > i + 2)
27521         arm_emit_vfp_multi_reg_pop (i + 2,
27522                                     (end_reg - (i + 2)) / 2,
27523                                     stack_pointer_rtx);
27524     }
27525 
27526   if (TARGET_IWMMXT)
27527     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27528       if (df_regs_ever_live_p (i) && !call_used_or_fixed_reg_p (i))
27529         {
27530           rtx_insn *insn;
27531           rtx addr = gen_rtx_MEM (V2SImode,
27532                                   gen_rtx_POST_INC (SImode,
27533                                                     stack_pointer_rtx));
27534           set_mem_alias_set (addr, get_frame_alias_set ());
27535           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27536           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27537                                              gen_rtx_REG (V2SImode, i),
27538                                              NULL_RTX);
27539 	  arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27540 				       stack_pointer_rtx, stack_pointer_rtx);
27541         }
27542 
27543   if (saved_regs_mask)
27544     {
27545       rtx insn;
27546       bool return_in_pc = false;
27547 
27548       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27549           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27550 	  && !IS_CMSE_ENTRY (func_type)
27551           && !IS_STACKALIGN (func_type)
27552           && really_return
27553           && crtl->args.pretend_args_size == 0
27554           && saved_regs_mask & (1 << LR_REGNUM)
27555           && !crtl->calls_eh_return)
27556         {
27557           saved_regs_mask &= ~(1 << LR_REGNUM);
27558           saved_regs_mask |= (1 << PC_REGNUM);
27559           return_in_pc = true;
27560         }
27561 
27562       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27563         {
27564           for (i = 0; i <= LAST_ARM_REGNUM; i++)
27565             if (saved_regs_mask & (1 << i))
27566               {
27567                 rtx addr = gen_rtx_MEM (SImode,
27568                                         gen_rtx_POST_INC (SImode,
27569                                                           stack_pointer_rtx));
27570                 set_mem_alias_set (addr, get_frame_alias_set ());
27571 
27572                 if (i == PC_REGNUM)
27573                   {
27574                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27575                     XVECEXP (insn, 0, 0) = ret_rtx;
27576                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27577                                                         addr);
27578                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27579                     insn = emit_jump_insn (insn);
27580                   }
27581                 else
27582                   {
27583                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27584                                                  addr));
27585                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27586                                                        gen_rtx_REG (SImode, i),
27587                                                        NULL_RTX);
27588 		    arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27589 						 stack_pointer_rtx,
27590 						 stack_pointer_rtx);
27591                   }
27592               }
27593         }
27594       else
27595         {
27596           if (TARGET_LDRD
27597 	      && current_tune->prefer_ldrd_strd
27598               && !optimize_function_for_size_p (cfun))
27599             {
27600               if (TARGET_THUMB2)
27601                 thumb2_emit_ldrd_pop (saved_regs_mask);
27602               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27603                 arm_emit_ldrd_pop (saved_regs_mask);
27604               else
27605                 arm_emit_multi_reg_pop (saved_regs_mask);
27606             }
27607           else
27608             arm_emit_multi_reg_pop (saved_regs_mask);
27609         }
27610 
27611       if (return_in_pc)
27612         return;
27613     }
27614 
27615   amount
27616     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
27617   if (amount)
27618     {
27619       int i, j;
27620       rtx dwarf = NULL_RTX;
27621       rtx_insn *tmp =
27622 	emit_insn (gen_addsi3 (stack_pointer_rtx,
27623 			       stack_pointer_rtx,
27624 			       GEN_INT (amount)));
27625 
27626       RTX_FRAME_RELATED_P (tmp) = 1;
27627 
27628       if (cfun->machine->uses_anonymous_args)
27629 	{
27630 	  /* Restore pretend args.  Refer arm_expand_prologue on how to save
27631 	     pretend_args in stack.  */
27632 	  int num_regs = crtl->args.pretend_args_size / 4;
27633 	  saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27634 	  for (j = 0, i = 0; j < num_regs; i++)
27635 	    if (saved_regs_mask & (1 << i))
27636 	      {
27637 		rtx reg = gen_rtx_REG (SImode, i);
27638 		dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27639 		j++;
27640 	      }
27641 	  REG_NOTES (tmp) = dwarf;
27642 	}
27643       arm_add_cfa_adjust_cfa_note (tmp, amount,
27644 				   stack_pointer_rtx, stack_pointer_rtx);
27645     }
27646 
27647   if (IS_CMSE_ENTRY (func_type))
27648     {
27649       /* CMSE_ENTRY always returns.  */
27650       gcc_assert (really_return);
27651       /* Clear all caller-saved regs that are not used to return.  */
27652       cmse_nonsecure_entry_clear_before_return ();
27653 
27654       /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27655 	 VLDR.  */
27656       if (TARGET_HAVE_FPCXT_CMSE)
27657 	{
27658 	  rtx_insn *insn;
27659 
27660 	  insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
27661 						   GEN_INT (FPCXTNS_ENUM)));
27662 	  rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
27663 				  plus_constant (Pmode, stack_pointer_rtx, 4));
27664 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27665 	  RTX_FRAME_RELATED_P (insn) = 1;
27666 	}
27667     }
27668 
27669   if (!really_return)
27670     return;
27671 
27672   if (crtl->calls_eh_return)
27673     emit_insn (gen_addsi3 (stack_pointer_rtx,
27674                            stack_pointer_rtx,
27675                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27676 
27677   if (IS_STACKALIGN (func_type))
27678     /* Restore the original stack pointer.  Before prologue, the stack was
27679        realigned and the original stack pointer saved in r0.  For details,
27680        see comment in arm_expand_prologue.  */
27681     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27682 
27683   emit_jump_insn (simple_return_rtx);
27684 }
27685 
27686 /* Implementation of insn prologue_thumb1_interwork.  This is the first
27687    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
27688 
27689 const char *
thumb1_output_interwork(void)27690 thumb1_output_interwork (void)
27691 {
27692   const char * name;
27693   FILE *f = asm_out_file;
27694 
27695   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27696   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27697 	      == SYMBOL_REF);
27698   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27699 
27700   /* Generate code sequence to switch us into Thumb mode.  */
27701   /* The .code 32 directive has already been emitted by
27702      ASM_DECLARE_FUNCTION_NAME.  */
27703   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27704   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27705 
27706   /* Generate a label, so that the debugger will notice the
27707      change in instruction sets.  This label is also used by
27708      the assembler to bypass the ARM code when this function
27709      is called from a Thumb encoded function elsewhere in the
27710      same file.  Hence the definition of STUB_NAME here must
27711      agree with the definition in gas/config/tc-arm.c.  */
27712 
27713 #define STUB_NAME ".real_start_of"
27714 
27715   fprintf (f, "\t.code\t16\n");
27716 #ifdef ARM_PE
27717   if (arm_dllexport_name_p (name))
27718     name = arm_strip_name_encoding (name);
27719 #endif
27720   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27721   fprintf (f, "\t.thumb_func\n");
27722   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27723 
27724   return "";
27725 }
27726 
27727 /* Handle the case of a double word load into a low register from
27728    a computed memory address.  The computed address may involve a
27729    register which is overwritten by the load.  */
27730 const char *
thumb_load_double_from_address(rtx * operands)27731 thumb_load_double_from_address (rtx *operands)
27732 {
27733   rtx addr;
27734   rtx base;
27735   rtx offset;
27736   rtx arg1;
27737   rtx arg2;
27738 
27739   gcc_assert (REG_P (operands[0]));
27740   gcc_assert (MEM_P (operands[1]));
27741 
27742   /* Get the memory address.  */
27743   addr = XEXP (operands[1], 0);
27744 
27745   /* Work out how the memory address is computed.  */
27746   switch (GET_CODE (addr))
27747     {
27748     case REG:
27749       operands[2] = adjust_address (operands[1], SImode, 4);
27750 
27751       if (REGNO (operands[0]) == REGNO (addr))
27752 	{
27753 	  output_asm_insn ("ldr\t%H0, %2", operands);
27754 	  output_asm_insn ("ldr\t%0, %1", operands);
27755 	}
27756       else
27757 	{
27758 	  output_asm_insn ("ldr\t%0, %1", operands);
27759 	  output_asm_insn ("ldr\t%H0, %2", operands);
27760 	}
27761       break;
27762 
27763     case CONST:
27764       /* Compute <address> + 4 for the high order load.  */
27765       operands[2] = adjust_address (operands[1], SImode, 4);
27766 
27767       output_asm_insn ("ldr\t%0, %1", operands);
27768       output_asm_insn ("ldr\t%H0, %2", operands);
27769       break;
27770 
27771     case PLUS:
27772       arg1   = XEXP (addr, 0);
27773       arg2   = XEXP (addr, 1);
27774 
27775       if (CONSTANT_P (arg1))
27776 	base = arg2, offset = arg1;
27777       else
27778 	base = arg1, offset = arg2;
27779 
27780       gcc_assert (REG_P (base));
27781 
27782       /* Catch the case of <address> = <reg> + <reg> */
27783       if (REG_P (offset))
27784 	{
27785 	  int reg_offset = REGNO (offset);
27786 	  int reg_base   = REGNO (base);
27787 	  int reg_dest   = REGNO (operands[0]);
27788 
27789 	  /* Add the base and offset registers together into the
27790              higher destination register.  */
27791 	  asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27792 		       reg_dest + 1, reg_base, reg_offset);
27793 
27794 	  /* Load the lower destination register from the address in
27795              the higher destination register.  */
27796 	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27797 		       reg_dest, reg_dest + 1);
27798 
27799 	  /* Load the higher destination register from its own address
27800              plus 4.  */
27801 	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27802 		       reg_dest + 1, reg_dest + 1);
27803 	}
27804       else
27805 	{
27806 	  /* Compute <address> + 4 for the high order load.  */
27807 	  operands[2] = adjust_address (operands[1], SImode, 4);
27808 
27809 	  /* If the computed address is held in the low order register
27810 	     then load the high order register first, otherwise always
27811 	     load the low order register first.  */
27812 	  if (REGNO (operands[0]) == REGNO (base))
27813 	    {
27814 	      output_asm_insn ("ldr\t%H0, %2", operands);
27815 	      output_asm_insn ("ldr\t%0, %1", operands);
27816 	    }
27817 	  else
27818 	    {
27819 	      output_asm_insn ("ldr\t%0, %1", operands);
27820 	      output_asm_insn ("ldr\t%H0, %2", operands);
27821 	    }
27822 	}
27823       break;
27824 
27825     case LABEL_REF:
27826       /* With no registers to worry about we can just load the value
27827          directly.  */
27828       operands[2] = adjust_address (operands[1], SImode, 4);
27829 
27830       output_asm_insn ("ldr\t%H0, %2", operands);
27831       output_asm_insn ("ldr\t%0, %1", operands);
27832       break;
27833 
27834     default:
27835       gcc_unreachable ();
27836     }
27837 
27838   return "";
27839 }
27840 
27841 const char *
thumb_output_move_mem_multiple(int n,rtx * operands)27842 thumb_output_move_mem_multiple (int n, rtx *operands)
27843 {
27844   switch (n)
27845     {
27846     case 2:
27847       if (REGNO (operands[4]) > REGNO (operands[5]))
27848 	std::swap (operands[4], operands[5]);
27849 
27850       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27851       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27852       break;
27853 
27854     case 3:
27855       if (REGNO (operands[4]) > REGNO (operands[5]))
27856         std::swap (operands[4], operands[5]);
27857       if (REGNO (operands[5]) > REGNO (operands[6]))
27858         std::swap (operands[5], operands[6]);
27859       if (REGNO (operands[4]) > REGNO (operands[5]))
27860         std::swap (operands[4], operands[5]);
27861 
27862       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27863       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27864       break;
27865 
27866     default:
27867       gcc_unreachable ();
27868     }
27869 
27870   return "";
27871 }
27872 
27873 /* Output a call-via instruction for thumb state.  */
27874 const char *
thumb_call_via_reg(rtx reg)27875 thumb_call_via_reg (rtx reg)
27876 {
27877   int regno = REGNO (reg);
27878   rtx *labelp;
27879 
27880   gcc_assert (regno < LR_REGNUM);
27881 
27882   /* If we are in the normal text section we can use a single instance
27883      per compilation unit.  If we are doing function sections, then we need
27884      an entry per section, since we can't rely on reachability.  */
27885   if (in_section == text_section)
27886     {
27887       thumb_call_reg_needed = 1;
27888 
27889       if (thumb_call_via_label[regno] == NULL)
27890 	thumb_call_via_label[regno] = gen_label_rtx ();
27891       labelp = thumb_call_via_label + regno;
27892     }
27893   else
27894     {
27895       if (cfun->machine->call_via[regno] == NULL)
27896 	cfun->machine->call_via[regno] = gen_label_rtx ();
27897       labelp = cfun->machine->call_via + regno;
27898     }
27899 
27900   output_asm_insn ("bl\t%a0", labelp);
27901   return "";
27902 }
27903 
27904 /* Routines for generating rtl.  */
27905 void
thumb_expand_cpymemqi(rtx * operands)27906 thumb_expand_cpymemqi (rtx *operands)
27907 {
27908   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27909   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27910   HOST_WIDE_INT len = INTVAL (operands[2]);
27911   HOST_WIDE_INT offset = 0;
27912 
27913   while (len >= 12)
27914     {
27915       emit_insn (gen_cpymem12b (out, in, out, in));
27916       len -= 12;
27917     }
27918 
27919   if (len >= 8)
27920     {
27921       emit_insn (gen_cpymem8b (out, in, out, in));
27922       len -= 8;
27923     }
27924 
27925   if (len >= 4)
27926     {
27927       rtx reg = gen_reg_rtx (SImode);
27928       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27929       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27930       len -= 4;
27931       offset += 4;
27932     }
27933 
27934   if (len >= 2)
27935     {
27936       rtx reg = gen_reg_rtx (HImode);
27937       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27938 					      plus_constant (Pmode, in,
27939 							     offset))));
27940       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27941 								offset)),
27942 			    reg));
27943       len -= 2;
27944       offset += 2;
27945     }
27946 
27947   if (len)
27948     {
27949       rtx reg = gen_reg_rtx (QImode);
27950       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27951 					      plus_constant (Pmode, in,
27952 							     offset))));
27953       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27954 								offset)),
27955 			    reg));
27956     }
27957 }
27958 
27959 void
thumb_reload_out_hi(rtx * operands)27960 thumb_reload_out_hi (rtx *operands)
27961 {
27962   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27963 }
27964 
27965 /* Return the length of a function name prefix
27966     that starts with the character 'c'.  */
27967 static int
arm_get_strip_length(int c)27968 arm_get_strip_length (int c)
27969 {
27970   switch (c)
27971     {
27972     ARM_NAME_ENCODING_LENGTHS
27973       default: return 0;
27974     }
27975 }
27976 
27977 /* Return a pointer to a function's name with any
27978    and all prefix encodings stripped from it.  */
27979 const char *
arm_strip_name_encoding(const char * name)27980 arm_strip_name_encoding (const char *name)
27981 {
27982   int skip;
27983 
27984   while ((skip = arm_get_strip_length (* name)))
27985     name += skip;
27986 
27987   return name;
27988 }
27989 
27990 /* If there is a '*' anywhere in the name's prefix, then
27991    emit the stripped name verbatim, otherwise prepend an
27992    underscore if leading underscores are being used.  */
27993 void
arm_asm_output_labelref(FILE * stream,const char * name)27994 arm_asm_output_labelref (FILE *stream, const char *name)
27995 {
27996   int skip;
27997   int verbatim = 0;
27998 
27999   while ((skip = arm_get_strip_length (* name)))
28000     {
28001       verbatim |= (*name == '*');
28002       name += skip;
28003     }
28004 
28005   if (verbatim)
28006     fputs (name, stream);
28007   else
28008     asm_fprintf (stream, "%U%s", name);
28009 }
28010 
28011 /* This function is used to emit an EABI tag and its associated value.
28012    We emit the numerical value of the tag in case the assembler does not
28013    support textual tags.  (Eg gas prior to 2.20).  If requested we include
28014    the tag name in a comment so that anyone reading the assembler output
28015    will know which tag is being set.
28016 
28017    This function is not static because arm-c.c needs it too.  */
28018 
28019 void
arm_emit_eabi_attribute(const char * name,int num,int val)28020 arm_emit_eabi_attribute (const char *name, int num, int val)
28021 {
28022   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28023   if (flag_verbose_asm || flag_debug_asm)
28024     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28025   asm_fprintf (asm_out_file, "\n");
28026 }
28027 
28028 /* This function is used to print CPU tuning information as comment
28029    in assembler file.  Pointers are not printed for now.  */
28030 
28031 void
arm_print_tune_info(void)28032 arm_print_tune_info (void)
28033 {
28034   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28035   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28036 	       current_tune->constant_limit);
28037   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28038 	       "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28039   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28040 	       "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28041   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28042 	       "prefetch.l1_cache_size:\t%d\n",
28043 	       current_tune->prefetch.l1_cache_size);
28044   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28045 	       "prefetch.l1_cache_line_size:\t%d\n",
28046 	       current_tune->prefetch.l1_cache_line_size);
28047   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28048 	       "prefer_constant_pool:\t%d\n",
28049 	       (int) current_tune->prefer_constant_pool);
28050   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28051 	       "branch_cost:\t(s:speed, p:predictable)\n");
28052   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28053   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28054 	       current_tune->branch_cost (false, false));
28055   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28056 	       current_tune->branch_cost (false, true));
28057   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28058 	       current_tune->branch_cost (true, false));
28059   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28060 	       current_tune->branch_cost (true, true));
28061   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28062 	       "prefer_ldrd_strd:\t%d\n",
28063 	       (int) current_tune->prefer_ldrd_strd);
28064   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28065 	       "logical_op_non_short_circuit:\t[%d,%d]\n",
28066 	       (int) current_tune->logical_op_non_short_circuit_thumb,
28067 	       (int) current_tune->logical_op_non_short_circuit_arm);
28068   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28069 	       "disparage_flag_setting_t16_encodings:\t%d\n",
28070 	       (int) current_tune->disparage_flag_setting_t16_encodings);
28071   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28072 	       "string_ops_prefer_neon:\t%d\n",
28073 	       (int) current_tune->string_ops_prefer_neon);
28074   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28075 	       "max_insns_inline_memset:\t%d\n",
28076 	       current_tune->max_insns_inline_memset);
28077   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28078 	       current_tune->fusible_ops);
28079   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28080 	       (int) current_tune->sched_autopref);
28081 }
28082 
28083 /* The last set of target options used to emit .arch directives, etc.  This
28084    could be a function-local static if it were not required to expose it as a
28085    root to the garbage collector.  */
28086 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28087 
28088 /* Print .arch and .arch_extension directives corresponding to the
28089    current architecture configuration.  */
28090 static void
arm_print_asm_arch_directives(FILE * stream,cl_target_option * targ_options)28091 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28092 {
28093   arm_build_target build_target;
28094   /* If the target options haven't changed since the last time we were called
28095      there is nothing to do.  This should be sufficient to suppress the
28096      majority of redundant work.  */
28097   if (last_asm_targ_options == targ_options)
28098     return;
28099 
28100   last_asm_targ_options = targ_options;
28101 
28102   build_target.isa = sbitmap_alloc (isa_num_bits);
28103   arm_configure_build_target (&build_target, targ_options, false);
28104 
28105   if (build_target.core_name
28106       && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28107     {
28108       const char* truncated_name
28109 	= arm_rewrite_selected_cpu (build_target.core_name);
28110       asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28111     }
28112 
28113   const arch_option *arch
28114     = arm_parse_arch_option_name (all_architectures, "-march",
28115 				  build_target.arch_name);
28116   auto_sbitmap opt_bits (isa_num_bits);
28117 
28118   gcc_assert (arch);
28119 
28120   if (strcmp (build_target.arch_name, "armv7ve") == 0)
28121     {
28122       /* Keep backward compatability for assemblers which don't support
28123 	 armv7ve.  Fortunately, none of the following extensions are reset
28124 	 by a .fpu directive.  */
28125       asm_fprintf (stream, "\t.arch armv7-a\n");
28126       asm_fprintf (stream, "\t.arch_extension virt\n");
28127       asm_fprintf (stream, "\t.arch_extension idiv\n");
28128       asm_fprintf (stream, "\t.arch_extension sec\n");
28129       asm_fprintf (stream, "\t.arch_extension mp\n");
28130     }
28131   else
28132     asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28133 
28134   /* The .fpu directive will reset any architecture extensions from the
28135      assembler that relate to the fp/vector extensions.  So put this out before
28136      any .arch_extension directives.  */
28137   const char *fpu_name = (TARGET_SOFT_FLOAT
28138 			  ? "softvfp"
28139 			  : arm_identify_fpu_from_isa (build_target.isa));
28140   asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28141 
28142   if (!arch->common.extensions)
28143     return;
28144 
28145   for (const struct cpu_arch_extension *opt = arch->common.extensions;
28146        opt->name != NULL;
28147        opt++)
28148     {
28149       if (!opt->remove)
28150 	{
28151 	  arm_initialize_isa (opt_bits, opt->isa_bits);
28152 
28153 	  /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28154 	     "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28155 	     floating point instructions is disabled.  So the following check
28156 	     restricts the printing of ".arch_extension mve" and
28157 	     ".arch_extension fp" (for mve.fp) in the assembly file.  MVE needs
28158 	     this special behaviour because the feature bit "mve" and
28159 	     "mve_float" are not part of "fpu bits", so they are not cleared
28160 	     when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28161 	     TARGET_HAVE_MVE_FLOAT are disabled.  */
28162 	  if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28163 	      || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28164 		  && !TARGET_HAVE_MVE_FLOAT))
28165 	    continue;
28166 
28167 	  /* If every feature bit of this option is set in the target ISA
28168 	     specification, print out the option name.  However, don't print
28169 	     anything if all the bits are part of the FPU specification.  */
28170 	  if (bitmap_subset_p (opt_bits, build_target.isa)
28171 	      && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28172 	    asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28173 	}
28174     }
28175 }
28176 
28177 static void
arm_file_start(void)28178 arm_file_start (void)
28179 {
28180   int val;
28181 
28182   arm_print_asm_arch_directives
28183     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28184 
28185   if (TARGET_BPABI)
28186     {
28187       /* If we have a named cpu, but we the assembler does not support that
28188 	 name via .cpu, put out a cpu name attribute; but don't do this if the
28189 	 name starts with the fictitious prefix, 'generic'.  */
28190       if (arm_active_target.core_name
28191 	  && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28192 	  && strncmp (arm_active_target.core_name, "generic", 7) != 0)
28193 	{
28194 	  const char* truncated_name
28195 	    = arm_rewrite_selected_cpu (arm_active_target.core_name);
28196 	  if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28197 	    asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28198 			 truncated_name);
28199 	}
28200 
28201       if (print_tune_info)
28202 	arm_print_tune_info ();
28203 
28204       if (! TARGET_SOFT_FLOAT)
28205 	{
28206 	  if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28207 	    arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28208 
28209 	  if (TARGET_HARD_FLOAT_ABI)
28210 	    arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28211 	}
28212 
28213       /* Some of these attributes only apply when the corresponding features
28214 	 are used.  However we don't have any easy way of figuring this out.
28215 	 Conservatively record the setting that would have been used.  */
28216 
28217       if (flag_rounding_math)
28218 	arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28219 
28220       if (!flag_unsafe_math_optimizations)
28221 	{
28222 	  arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28223 	  arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28224 	}
28225       if (flag_signaling_nans)
28226 	arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28227 
28228       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28229 			   flag_finite_math_only ? 1 : 3);
28230 
28231       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28232       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28233       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28234 			       flag_short_enums ? 1 : 2);
28235 
28236       /* Tag_ABI_optimization_goals.  */
28237       if (optimize_size)
28238 	val = 4;
28239       else if (optimize >= 2)
28240 	val = 2;
28241       else if (optimize)
28242 	val = 1;
28243       else
28244 	val = 6;
28245       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28246 
28247       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28248 			       unaligned_access);
28249 
28250       if (arm_fp16_format)
28251 	arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28252 			     (int) arm_fp16_format);
28253 
28254       if (arm_lang_output_object_attributes_hook)
28255 	arm_lang_output_object_attributes_hook();
28256     }
28257 
28258   default_file_start ();
28259 }
28260 
28261 static void
arm_file_end(void)28262 arm_file_end (void)
28263 {
28264   int regno;
28265 
28266   /* Just in case the last function output in the assembler had non-default
28267      architecture directives, we force the assembler state back to the default
28268      set, so that any 'calculated' build attributes are based on the default
28269      options rather than the special options for that function.  */
28270   arm_print_asm_arch_directives
28271     (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28272 
28273   if (NEED_INDICATE_EXEC_STACK)
28274     /* Add .note.GNU-stack.  */
28275     file_end_indicate_exec_stack ();
28276 
28277   if (! thumb_call_reg_needed)
28278     return;
28279 
28280   switch_to_section (text_section);
28281   asm_fprintf (asm_out_file, "\t.code 16\n");
28282   ASM_OUTPUT_ALIGN (asm_out_file, 1);
28283 
28284   for (regno = 0; regno < LR_REGNUM; regno++)
28285     {
28286       rtx label = thumb_call_via_label[regno];
28287 
28288       if (label != 0)
28289 	{
28290 	  targetm.asm_out.internal_label (asm_out_file, "L",
28291 					  CODE_LABEL_NUMBER (label));
28292 	  asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28293 	}
28294     }
28295 }
28296 
28297 #ifndef ARM_PE
28298 /* Symbols in the text segment can be accessed without indirecting via the
28299    constant pool; it may take an extra binary operation, but this is still
28300    faster than indirecting via memory.  Don't do this when not optimizing,
28301    since we won't be calculating al of the offsets necessary to do this
28302    simplification.  */
28303 
28304 static void
arm_encode_section_info(tree decl,rtx rtl,int first)28305 arm_encode_section_info (tree decl, rtx rtl, int first)
28306 {
28307   if (optimize > 0 && TREE_CONSTANT (decl))
28308     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28309 
28310   default_encode_section_info (decl, rtl, first);
28311 }
28312 #endif /* !ARM_PE */
28313 
28314 static void
arm_internal_label(FILE * stream,const char * prefix,unsigned long labelno)28315 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28316 {
28317   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28318       && !strcmp (prefix, "L"))
28319     {
28320       arm_ccfsm_state = 0;
28321       arm_target_insn = NULL;
28322     }
28323   default_internal_label (stream, prefix, labelno);
28324 }
28325 
28326 /* Output code to add DELTA to the first argument, and then jump
28327    to FUNCTION.  Used for C++ multiple inheritance.  */
28328 
28329 static void
arm_thumb1_mi_thunk(FILE * file,tree,HOST_WIDE_INT delta,HOST_WIDE_INT,tree function)28330 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28331 		     HOST_WIDE_INT, tree function)
28332 {
28333   static int thunk_label = 0;
28334   char label[256];
28335   char labelpc[256];
28336   int mi_delta = delta;
28337   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28338   int shift = 0;
28339   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28340                     ? 1 : 0);
28341   if (mi_delta < 0)
28342     mi_delta = - mi_delta;
28343 
28344   final_start_function (emit_barrier (), file, 1);
28345 
28346   if (TARGET_THUMB1)
28347     {
28348       int labelno = thunk_label++;
28349       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28350       /* Thunks are entered in arm mode when available.  */
28351       if (TARGET_THUMB1_ONLY)
28352 	{
28353 	  /* push r3 so we can use it as a temporary.  */
28354 	  /* TODO: Omit this save if r3 is not used.  */
28355 	  fputs ("\tpush {r3}\n", file);
28356 
28357 	  /* With -mpure-code, we cannot load the address from the
28358 	     constant pool: we build it explicitly.  */
28359 	  if (target_pure_code)
28360 	    {
28361 	      fputs ("\tmovs\tr3, #:upper8_15:#", file);
28362 	      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28363 	      fputc ('\n', file);
28364 	      fputs ("\tlsls r3, #8\n", file);
28365 	      fputs ("\tadds\tr3, #:upper0_7:#", file);
28366 	      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28367 	      fputc ('\n', file);
28368 	      fputs ("\tlsls r3, #8\n", file);
28369 	      fputs ("\tadds\tr3, #:lower8_15:#", file);
28370 	      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28371 	      fputc ('\n', file);
28372 	      fputs ("\tlsls r3, #8\n", file);
28373 	      fputs ("\tadds\tr3, #:lower0_7:#", file);
28374 	      assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28375 	      fputc ('\n', file);
28376 	    }
28377 	  else
28378 	    fputs ("\tldr\tr3, ", file);
28379 	}
28380       else
28381 	{
28382 	  fputs ("\tldr\tr12, ", file);
28383 	}
28384 
28385       if (!target_pure_code)
28386 	{
28387 	  assemble_name (file, label);
28388 	  fputc ('\n', file);
28389 	}
28390 
28391       if (flag_pic)
28392 	{
28393 	  /* If we are generating PIC, the ldr instruction below loads
28394 	     "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
28395 	     the address of the add + 8, so we have:
28396 
28397 	     r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28398 	         = target + 1.
28399 
28400 	     Note that we have "+ 1" because some versions of GNU ld
28401 	     don't set the low bit of the result for R_ARM_REL32
28402 	     relocations against thumb function symbols.
28403 	     On ARMv6M this is +4, not +8.  */
28404 	  ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28405 	  assemble_name (file, labelpc);
28406 	  fputs (":\n", file);
28407 	  if (TARGET_THUMB1_ONLY)
28408 	    {
28409 	      /* This is 2 insns after the start of the thunk, so we know it
28410 	         is 4-byte aligned.  */
28411 	      fputs ("\tadd\tr3, pc, r3\n", file);
28412 	      fputs ("\tmov r12, r3\n", file);
28413 	    }
28414 	  else
28415 	    fputs ("\tadd\tr12, pc, r12\n", file);
28416 	}
28417       else if (TARGET_THUMB1_ONLY)
28418 	fputs ("\tmov r12, r3\n", file);
28419     }
28420   if (TARGET_THUMB1_ONLY)
28421     {
28422       if (mi_delta > 255)
28423 	{
28424 	  fputs ("\tldr\tr3, ", file);
28425 	  assemble_name (file, label);
28426 	  fputs ("+4\n", file);
28427 	  asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28428 		       mi_op, this_regno, this_regno);
28429 	}
28430       else if (mi_delta != 0)
28431 	{
28432 	  /* Thumb1 unified syntax requires s suffix in instruction name when
28433 	     one of the operands is immediate.  */
28434 	  asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28435 		       mi_op, this_regno, this_regno,
28436 		       mi_delta);
28437 	}
28438     }
28439   else
28440     {
28441       /* TODO: Use movw/movt for large constants when available.  */
28442       while (mi_delta != 0)
28443 	{
28444 	  if ((mi_delta & (3 << shift)) == 0)
28445 	    shift += 2;
28446 	  else
28447 	    {
28448 	      asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28449 			   mi_op, this_regno, this_regno,
28450 			   mi_delta & (0xff << shift));
28451 	      mi_delta &= ~(0xff << shift);
28452 	      shift += 8;
28453 	    }
28454 	}
28455     }
28456   if (TARGET_THUMB1)
28457     {
28458       if (TARGET_THUMB1_ONLY)
28459 	fputs ("\tpop\t{r3}\n", file);
28460 
28461       fprintf (file, "\tbx\tr12\n");
28462       ASM_OUTPUT_ALIGN (file, 2);
28463       assemble_name (file, label);
28464       fputs (":\n", file);
28465       if (flag_pic)
28466 	{
28467 	  /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
28468 	  rtx tem = XEXP (DECL_RTL (function), 0);
28469 	  /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28470 	     pipeline offset is four rather than eight.  Adjust the offset
28471 	     accordingly.  */
28472 	  tem = plus_constant (GET_MODE (tem), tem,
28473 			       TARGET_THUMB1_ONLY ? -3 : -7);
28474 	  tem = gen_rtx_MINUS (GET_MODE (tem),
28475 			       tem,
28476 			       gen_rtx_SYMBOL_REF (Pmode,
28477 						   ggc_strdup (labelpc)));
28478 	  assemble_integer (tem, 4, BITS_PER_WORD, 1);
28479 	}
28480       else
28481 	/* Output ".word .LTHUNKn".  */
28482 	assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28483 
28484       if (TARGET_THUMB1_ONLY && mi_delta > 255)
28485 	assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28486     }
28487   else
28488     {
28489       fputs ("\tb\t", file);
28490       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28491       if (NEED_PLT_RELOC)
28492         fputs ("(PLT)", file);
28493       fputc ('\n', file);
28494     }
28495 
28496   final_end_function ();
28497 }
28498 
28499 /* MI thunk handling for TARGET_32BIT.  */
28500 
28501 static void
arm32_output_mi_thunk(FILE * file,tree,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)28502 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28503 		       HOST_WIDE_INT vcall_offset, tree function)
28504 {
28505   const bool long_call_p = arm_is_long_call_p (function);
28506 
28507   /* On ARM, this_regno is R0 or R1 depending on
28508      whether the function returns an aggregate or not.
28509   */
28510   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
28511 				       function)
28512 		    ? R1_REGNUM : R0_REGNUM);
28513 
28514   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
28515   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
28516   reload_completed = 1;
28517   emit_note (NOTE_INSN_PROLOGUE_END);
28518 
28519   /* Add DELTA to THIS_RTX.  */
28520   if (delta != 0)
28521     arm_split_constant (PLUS, Pmode, NULL_RTX,
28522 			delta, this_rtx, this_rtx, false);
28523 
28524   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
28525   if (vcall_offset != 0)
28526     {
28527       /* Load *THIS_RTX.  */
28528       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
28529       /* Compute *THIS_RTX + VCALL_OFFSET.  */
28530       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
28531 			  false);
28532       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
28533       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
28534       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
28535     }
28536 
28537   /* Generate a tail call to the target function.  */
28538   if (!TREE_USED (function))
28539     {
28540       assemble_external (function);
28541       TREE_USED (function) = 1;
28542     }
28543   rtx funexp = XEXP (DECL_RTL (function), 0);
28544   if (long_call_p)
28545     {
28546       emit_move_insn (temp, funexp);
28547       funexp = temp;
28548     }
28549   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
28550   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
28551   SIBLING_CALL_P (insn) = 1;
28552   emit_barrier ();
28553 
28554   /* Indirect calls require a bit of fixup in PIC mode.  */
28555   if (long_call_p)
28556     {
28557       split_all_insns_noflow ();
28558       arm_reorg ();
28559     }
28560 
28561   insn = get_insns ();
28562   shorten_branches (insn);
28563   final_start_function (insn, file, 1);
28564   final (insn, file, 1);
28565   final_end_function ();
28566 
28567   /* Stop pretending this is a post-reload pass.  */
28568   reload_completed = 0;
28569 }
28570 
28571 /* Output code to add DELTA to the first argument, and then jump
28572    to FUNCTION.  Used for C++ multiple inheritance.  */
28573 
28574 static void
arm_output_mi_thunk(FILE * file,tree thunk,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)28575 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
28576 		     HOST_WIDE_INT vcall_offset, tree function)
28577 {
28578   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
28579 
28580   assemble_start_function (thunk, fnname);
28581   if (TARGET_32BIT)
28582     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
28583   else
28584     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
28585   assemble_end_function (thunk, fnname);
28586 }
28587 
28588 int
arm_emit_vector_const(FILE * file,rtx x)28589 arm_emit_vector_const (FILE *file, rtx x)
28590 {
28591   int i;
28592   const char * pattern;
28593 
28594   gcc_assert (GET_CODE (x) == CONST_VECTOR);
28595 
28596   switch (GET_MODE (x))
28597     {
28598     case E_V2SImode: pattern = "%08x"; break;
28599     case E_V4HImode: pattern = "%04x"; break;
28600     case E_V8QImode: pattern = "%02x"; break;
28601     default:       gcc_unreachable ();
28602     }
28603 
28604   fprintf (file, "0x");
28605   for (i = CONST_VECTOR_NUNITS (x); i--;)
28606     {
28607       rtx element;
28608 
28609       element = CONST_VECTOR_ELT (x, i);
28610       fprintf (file, pattern, INTVAL (element));
28611     }
28612 
28613   return 1;
28614 }
28615 
28616 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28617    HFmode constant pool entries are actually loaded with ldr.  */
28618 void
arm_emit_fp16_const(rtx c)28619 arm_emit_fp16_const (rtx c)
28620 {
28621   long bits;
28622 
28623   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
28624   if (WORDS_BIG_ENDIAN)
28625     assemble_zeros (2);
28626   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28627   if (!WORDS_BIG_ENDIAN)
28628     assemble_zeros (2);
28629 }
28630 
28631 const char *
arm_output_load_gr(rtx * operands)28632 arm_output_load_gr (rtx *operands)
28633 {
28634   rtx reg;
28635   rtx offset;
28636   rtx wcgr;
28637   rtx sum;
28638 
28639   if (!MEM_P (operands [1])
28640       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28641       || !REG_P (reg = XEXP (sum, 0))
28642       || !CONST_INT_P (offset = XEXP (sum, 1))
28643       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28644     return "wldrw%?\t%0, %1";
28645 
28646   /* Fix up an out-of-range load of a GR register.  */
28647   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28648   wcgr = operands[0];
28649   operands[0] = reg;
28650   output_asm_insn ("ldr%?\t%0, %1", operands);
28651 
28652   operands[0] = wcgr;
28653   operands[1] = reg;
28654   output_asm_insn ("tmcr%?\t%0, %1", operands);
28655   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28656 
28657   return "";
28658 }
28659 
28660 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28661 
28662    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28663    named arg and all anonymous args onto the stack.
28664    XXX I know the prologue shouldn't be pushing registers, but it is faster
28665    that way.  */
28666 
28667 static void
arm_setup_incoming_varargs(cumulative_args_t pcum_v,const function_arg_info & arg,int * pretend_size,int second_time ATTRIBUTE_UNUSED)28668 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28669 			    const function_arg_info &arg,
28670 			    int *pretend_size,
28671 			    int second_time ATTRIBUTE_UNUSED)
28672 {
28673   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28674   int nregs;
28675 
28676   cfun->machine->uses_anonymous_args = 1;
28677   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28678     {
28679       nregs = pcum->aapcs_ncrn;
28680       if (nregs & 1)
28681 	{
28682 	  int res = arm_needs_doubleword_align (arg.mode, arg.type);
28683 	  if (res < 0 && warn_psabi)
28684 	    inform (input_location, "parameter passing for argument of "
28685 		    "type %qT changed in GCC 7.1", arg.type);
28686 	  else if (res > 0)
28687 	    {
28688 	      nregs++;
28689 	      if (res > 1 && warn_psabi)
28690 		inform (input_location,
28691 			"parameter passing for argument of type "
28692 			"%qT changed in GCC 9.1", arg.type);
28693 	    }
28694 	}
28695     }
28696   else
28697     nregs = pcum->nregs;
28698 
28699   if (nregs < NUM_ARG_REGS)
28700     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28701 }
28702 
28703 /* We can't rely on the caller doing the proper promotion when
28704    using APCS or ATPCS.  */
28705 
28706 static bool
arm_promote_prototypes(const_tree t ATTRIBUTE_UNUSED)28707 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28708 {
28709     return !TARGET_AAPCS_BASED;
28710 }
28711 
28712 static machine_mode
arm_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)28713 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28714                            machine_mode mode,
28715                            int *punsignedp ATTRIBUTE_UNUSED,
28716                            const_tree fntype ATTRIBUTE_UNUSED,
28717                            int for_return ATTRIBUTE_UNUSED)
28718 {
28719   if (GET_MODE_CLASS (mode) == MODE_INT
28720       && GET_MODE_SIZE (mode) < 4)
28721     return SImode;
28722 
28723   return mode;
28724 }
28725 
28726 
28727 static bool
arm_default_short_enums(void)28728 arm_default_short_enums (void)
28729 {
28730   return ARM_DEFAULT_SHORT_ENUMS;
28731 }
28732 
28733 
28734 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
28735 
28736 static bool
arm_align_anon_bitfield(void)28737 arm_align_anon_bitfield (void)
28738 {
28739   return TARGET_AAPCS_BASED;
28740 }
28741 
28742 
28743 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
28744 
28745 static tree
arm_cxx_guard_type(void)28746 arm_cxx_guard_type (void)
28747 {
28748   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28749 }
28750 
28751 
28752 /* The EABI says test the least significant bit of a guard variable.  */
28753 
28754 static bool
arm_cxx_guard_mask_bit(void)28755 arm_cxx_guard_mask_bit (void)
28756 {
28757   return TARGET_AAPCS_BASED;
28758 }
28759 
28760 
28761 /* The EABI specifies that all array cookies are 8 bytes long.  */
28762 
28763 static tree
arm_get_cookie_size(tree type)28764 arm_get_cookie_size (tree type)
28765 {
28766   tree size;
28767 
28768   if (!TARGET_AAPCS_BASED)
28769     return default_cxx_get_cookie_size (type);
28770 
28771   size = build_int_cst (sizetype, 8);
28772   return size;
28773 }
28774 
28775 
28776 /* The EABI says that array cookies should also contain the element size.  */
28777 
28778 static bool
arm_cookie_has_size(void)28779 arm_cookie_has_size (void)
28780 {
28781   return TARGET_AAPCS_BASED;
28782 }
28783 
28784 
28785 /* The EABI says constructors and destructors should return a pointer to
28786    the object constructed/destroyed.  */
28787 
28788 static bool
arm_cxx_cdtor_returns_this(void)28789 arm_cxx_cdtor_returns_this (void)
28790 {
28791   return TARGET_AAPCS_BASED;
28792 }
28793 
28794 /* The EABI says that an inline function may never be the key
28795    method.  */
28796 
28797 static bool
arm_cxx_key_method_may_be_inline(void)28798 arm_cxx_key_method_may_be_inline (void)
28799 {
28800   return !TARGET_AAPCS_BASED;
28801 }
28802 
28803 static void
arm_cxx_determine_class_data_visibility(tree decl)28804 arm_cxx_determine_class_data_visibility (tree decl)
28805 {
28806   if (!TARGET_AAPCS_BASED
28807       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28808     return;
28809 
28810   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28811      is exported.  However, on systems without dynamic vague linkage,
28812      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
28813   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28814     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28815   else
28816     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28817   DECL_VISIBILITY_SPECIFIED (decl) = 1;
28818 }
28819 
28820 static bool
arm_cxx_class_data_always_comdat(void)28821 arm_cxx_class_data_always_comdat (void)
28822 {
28823   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28824      vague linkage if the class has no key function.  */
28825   return !TARGET_AAPCS_BASED;
28826 }
28827 
28828 
28829 /* The EABI says __aeabi_atexit should be used to register static
28830    destructors.  */
28831 
28832 static bool
arm_cxx_use_aeabi_atexit(void)28833 arm_cxx_use_aeabi_atexit (void)
28834 {
28835   return TARGET_AAPCS_BASED;
28836 }
28837 
28838 
28839 void
arm_set_return_address(rtx source,rtx scratch)28840 arm_set_return_address (rtx source, rtx scratch)
28841 {
28842   arm_stack_offsets *offsets;
28843   HOST_WIDE_INT delta;
28844   rtx addr, mem;
28845   unsigned long saved_regs;
28846 
28847   offsets = arm_get_frame_offsets ();
28848   saved_regs = offsets->saved_regs_mask;
28849 
28850   if ((saved_regs & (1 << LR_REGNUM)) == 0)
28851     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28852   else
28853     {
28854       if (frame_pointer_needed)
28855 	addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28856       else
28857 	{
28858 	  /* LR will be the first saved register.  */
28859 	  delta = offsets->outgoing_args - (offsets->frame + 4);
28860 
28861 
28862 	  if (delta >= 4096)
28863 	    {
28864 	      emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28865 				     GEN_INT (delta & ~4095)));
28866 	      addr = scratch;
28867 	      delta &= 4095;
28868 	    }
28869 	  else
28870 	    addr = stack_pointer_rtx;
28871 
28872 	  addr = plus_constant (Pmode, addr, delta);
28873 	}
28874 
28875       /* The store needs to be marked to prevent DSE from deleting
28876 	 it as dead if it is based on fp.  */
28877       mem = gen_frame_mem (Pmode, addr);
28878       MEM_VOLATILE_P (mem) = true;
28879       emit_move_insn (mem, source);
28880     }
28881 }
28882 
28883 
28884 void
thumb_set_return_address(rtx source,rtx scratch)28885 thumb_set_return_address (rtx source, rtx scratch)
28886 {
28887   arm_stack_offsets *offsets;
28888   HOST_WIDE_INT delta;
28889   HOST_WIDE_INT limit;
28890   int reg;
28891   rtx addr, mem;
28892   unsigned long mask;
28893 
28894   emit_use (source);
28895 
28896   offsets = arm_get_frame_offsets ();
28897   mask = offsets->saved_regs_mask;
28898   if (mask & (1 << LR_REGNUM))
28899     {
28900       limit = 1024;
28901       /* Find the saved regs.  */
28902       if (frame_pointer_needed)
28903 	{
28904 	  delta = offsets->soft_frame - offsets->saved_args;
28905 	  reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28906 	  if (TARGET_THUMB1)
28907 	    limit = 128;
28908 	}
28909       else
28910 	{
28911 	  delta = offsets->outgoing_args - offsets->saved_args;
28912 	  reg = SP_REGNUM;
28913 	}
28914       /* Allow for the stack frame.  */
28915       if (TARGET_THUMB1 && TARGET_BACKTRACE)
28916 	delta -= 16;
28917       /* The link register is always the first saved register.  */
28918       delta -= 4;
28919 
28920       /* Construct the address.  */
28921       addr = gen_rtx_REG (SImode, reg);
28922       if (delta > limit)
28923 	{
28924 	  emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28925 	  emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28926 	  addr = scratch;
28927 	}
28928       else
28929 	addr = plus_constant (Pmode, addr, delta);
28930 
28931       /* The store needs to be marked to prevent DSE from deleting
28932 	 it as dead if it is based on fp.  */
28933       mem = gen_frame_mem (Pmode, addr);
28934       MEM_VOLATILE_P (mem) = true;
28935       emit_move_insn (mem, source);
28936     }
28937   else
28938     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28939 }
28940 
28941 /* Implements target hook vector_mode_supported_p.  */
28942 bool
arm_vector_mode_supported_p(machine_mode mode)28943 arm_vector_mode_supported_p (machine_mode mode)
28944 {
28945   /* Neon also supports V2SImode, etc. listed in the clause below.  */
28946   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28947       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
28948       || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
28949       || mode == V8BFmode))
28950     return true;
28951 
28952   if ((TARGET_NEON || TARGET_IWMMXT)
28953       && ((mode == V2SImode)
28954 	  || (mode == V4HImode)
28955 	  || (mode == V8QImode)))
28956     return true;
28957 
28958   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28959       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28960       || mode == V2HAmode))
28961     return true;
28962 
28963   if (TARGET_HAVE_MVE
28964       && (mode == V2DImode || mode == V4SImode || mode == V8HImode
28965 	  || mode == V16QImode))
28966       return true;
28967 
28968   if (TARGET_HAVE_MVE_FLOAT
28969       && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
28970       return true;
28971 
28972   return false;
28973 }
28974 
28975 /* Implements target hook array_mode_supported_p.  */
28976 
28977 static bool
arm_array_mode_supported_p(machine_mode mode,unsigned HOST_WIDE_INT nelems)28978 arm_array_mode_supported_p (machine_mode mode,
28979 			    unsigned HOST_WIDE_INT nelems)
28980 {
28981   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
28982      for now, as the lane-swapping logic needs to be extended in the expanders.
28983      See PR target/82518.  */
28984   if (TARGET_NEON && !BYTES_BIG_ENDIAN
28985       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28986       && (nelems >= 2 && nelems <= 4))
28987     return true;
28988 
28989   if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
28990       && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
28991     return true;
28992 
28993   return false;
28994 }
28995 
28996 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28997    registers when autovectorizing for Neon, at least until multiple vector
28998    widths are supported properly by the middle-end.  */
28999 
29000 static machine_mode
arm_preferred_simd_mode(scalar_mode mode)29001 arm_preferred_simd_mode (scalar_mode mode)
29002 {
29003   if (TARGET_NEON)
29004     switch (mode)
29005       {
29006       case E_SFmode:
29007 	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29008       case E_SImode:
29009 	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29010       case E_HImode:
29011 	return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29012       case E_QImode:
29013 	return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29014       case E_DImode:
29015 	if (!TARGET_NEON_VECTORIZE_DOUBLE)
29016 	  return V2DImode;
29017 	break;
29018 
29019       default:;
29020       }
29021 
29022   if (TARGET_REALLY_IWMMXT)
29023     switch (mode)
29024       {
29025       case E_SImode:
29026 	return V2SImode;
29027       case E_HImode:
29028 	return V4HImode;
29029       case E_QImode:
29030 	return V8QImode;
29031 
29032       default:;
29033       }
29034 
29035   return word_mode;
29036 }
29037 
29038 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29039 
29040    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
29041    using r0-r4 for function arguments, r7 for the stack frame and don't have
29042    enough left over to do doubleword arithmetic.  For Thumb-2 all the
29043    potentially problematic instructions accept high registers so this is not
29044    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
29045    that require many low registers.  */
29046 static bool
arm_class_likely_spilled_p(reg_class_t rclass)29047 arm_class_likely_spilled_p (reg_class_t rclass)
29048 {
29049   if ((TARGET_THUMB1 && rclass == LO_REGS)
29050       || rclass  == CC_REG)
29051     return true;
29052 
29053   return false;
29054 }
29055 
29056 /* Implements target hook small_register_classes_for_mode_p.  */
29057 bool
arm_small_register_classes_for_mode_p(machine_mode mode ATTRIBUTE_UNUSED)29058 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29059 {
29060   return TARGET_THUMB1;
29061 }
29062 
29063 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
29064    ARM insns and therefore guarantee that the shift count is modulo 256.
29065    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29066    guarantee no particular behavior for out-of-range counts.  */
29067 
29068 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask(machine_mode mode)29069 arm_shift_truncation_mask (machine_mode mode)
29070 {
29071   return mode == SImode ? 255 : 0;
29072 }
29073 
29074 
29075 /* Map internal gcc register numbers to DWARF2 register numbers.  */
29076 
29077 unsigned int
arm_dbx_register_number(unsigned int regno)29078 arm_dbx_register_number (unsigned int regno)
29079 {
29080   if (regno < 16)
29081     return regno;
29082 
29083   if (IS_VFP_REGNUM (regno))
29084     {
29085       /* See comment in arm_dwarf_register_span.  */
29086       if (VFP_REGNO_OK_FOR_SINGLE (regno))
29087 	return 64 + regno - FIRST_VFP_REGNUM;
29088       else
29089 	return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29090     }
29091 
29092   if (IS_IWMMXT_GR_REGNUM (regno))
29093     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29094 
29095   if (IS_IWMMXT_REGNUM (regno))
29096     return 112 + regno - FIRST_IWMMXT_REGNUM;
29097 
29098   return DWARF_FRAME_REGISTERS;
29099 }
29100 
29101 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29102    GCC models tham as 64 32-bit registers, so we need to describe this to
29103    the DWARF generation code.  Other registers can use the default.  */
29104 static rtx
arm_dwarf_register_span(rtx rtl)29105 arm_dwarf_register_span (rtx rtl)
29106 {
29107   machine_mode mode;
29108   unsigned regno;
29109   rtx parts[16];
29110   int nregs;
29111   int i;
29112 
29113   regno = REGNO (rtl);
29114   if (!IS_VFP_REGNUM (regno))
29115     return NULL_RTX;
29116 
29117   /* XXX FIXME: The EABI defines two VFP register ranges:
29118 	64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29119 	256-287: D0-D31
29120      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29121      corresponding D register.  Until GDB supports this, we shall use the
29122      legacy encodings.  We also use these encodings for D0-D15 for
29123      compatibility with older debuggers.  */
29124   mode = GET_MODE (rtl);
29125   if (GET_MODE_SIZE (mode) < 8)
29126     return NULL_RTX;
29127 
29128   if (VFP_REGNO_OK_FOR_SINGLE (regno))
29129     {
29130       nregs = GET_MODE_SIZE (mode) / 4;
29131       for (i = 0; i < nregs; i += 2)
29132 	if (TARGET_BIG_END)
29133 	  {
29134 	    parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29135 	    parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29136 	  }
29137 	else
29138 	  {
29139 	    parts[i] = gen_rtx_REG (SImode, regno + i);
29140 	    parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29141 	  }
29142     }
29143   else
29144     {
29145       nregs = GET_MODE_SIZE (mode) / 8;
29146       for (i = 0; i < nregs; i++)
29147 	parts[i] = gen_rtx_REG (DImode, regno + i);
29148     }
29149 
29150   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29151 }
29152 
29153 #if ARM_UNWIND_INFO
29154 /* Emit unwind directives for a store-multiple instruction or stack pointer
29155    push during alignment.
29156    These should only ever be generated by the function prologue code, so
29157    expect them to have a particular form.
29158    The store-multiple instruction sometimes pushes pc as the last register,
29159    although it should not be tracked into unwind information, or for -Os
29160    sometimes pushes some dummy registers before first register that needs
29161    to be tracked in unwind information; such dummy registers are there just
29162    to avoid separate stack adjustment, and will not be restored in the
29163    epilogue.  */
29164 
29165 static void
arm_unwind_emit_sequence(FILE * asm_out_file,rtx p)29166 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29167 {
29168   int i;
29169   HOST_WIDE_INT offset;
29170   HOST_WIDE_INT nregs;
29171   int reg_size;
29172   unsigned reg;
29173   unsigned lastreg;
29174   unsigned padfirst = 0, padlast = 0;
29175   rtx e;
29176 
29177   e = XVECEXP (p, 0, 0);
29178   gcc_assert (GET_CODE (e) == SET);
29179 
29180   /* First insn will adjust the stack pointer.  */
29181   gcc_assert (GET_CODE (e) == SET
29182 	      && REG_P (SET_DEST (e))
29183 	      && REGNO (SET_DEST (e)) == SP_REGNUM
29184 	      && GET_CODE (SET_SRC (e)) == PLUS);
29185 
29186   offset = -INTVAL (XEXP (SET_SRC (e), 1));
29187   nregs = XVECLEN (p, 0) - 1;
29188   gcc_assert (nregs);
29189 
29190   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29191   if (reg < 16)
29192     {
29193       /* For -Os dummy registers can be pushed at the beginning to
29194 	 avoid separate stack pointer adjustment.  */
29195       e = XVECEXP (p, 0, 1);
29196       e = XEXP (SET_DEST (e), 0);
29197       if (GET_CODE (e) == PLUS)
29198 	padfirst = INTVAL (XEXP (e, 1));
29199       gcc_assert (padfirst == 0 || optimize_size);
29200       /* The function prologue may also push pc, but not annotate it as it is
29201 	 never restored.  We turn this into a stack pointer adjustment.  */
29202       e = XVECEXP (p, 0, nregs);
29203       e = XEXP (SET_DEST (e), 0);
29204       if (GET_CODE (e) == PLUS)
29205 	padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29206       else
29207 	padlast = offset - 4;
29208       gcc_assert (padlast == 0 || padlast == 4);
29209       if (padlast == 4)
29210 	fprintf (asm_out_file, "\t.pad #4\n");
29211       reg_size = 4;
29212       fprintf (asm_out_file, "\t.save {");
29213     }
29214   else if (IS_VFP_REGNUM (reg))
29215     {
29216       reg_size = 8;
29217       fprintf (asm_out_file, "\t.vsave {");
29218     }
29219   else
29220     /* Unknown register type.  */
29221     gcc_unreachable ();
29222 
29223   /* If the stack increment doesn't match the size of the saved registers,
29224      something has gone horribly wrong.  */
29225   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29226 
29227   offset = padfirst;
29228   lastreg = 0;
29229   /* The remaining insns will describe the stores.  */
29230   for (i = 1; i <= nregs; i++)
29231     {
29232       /* Expect (set (mem <addr>) (reg)).
29233          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
29234       e = XVECEXP (p, 0, i);
29235       gcc_assert (GET_CODE (e) == SET
29236 		  && MEM_P (SET_DEST (e))
29237 		  && REG_P (SET_SRC (e)));
29238 
29239       reg = REGNO (SET_SRC (e));
29240       gcc_assert (reg >= lastreg);
29241 
29242       if (i != 1)
29243 	fprintf (asm_out_file, ", ");
29244       /* We can't use %r for vfp because we need to use the
29245 	 double precision register names.  */
29246       if (IS_VFP_REGNUM (reg))
29247 	asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29248       else
29249 	asm_fprintf (asm_out_file, "%r", reg);
29250 
29251       if (flag_checking)
29252 	{
29253 	  /* Check that the addresses are consecutive.  */
29254 	  e = XEXP (SET_DEST (e), 0);
29255 	  if (GET_CODE (e) == PLUS)
29256 	    gcc_assert (REG_P (XEXP (e, 0))
29257 			&& REGNO (XEXP (e, 0)) == SP_REGNUM
29258 			&& CONST_INT_P (XEXP (e, 1))
29259 			&& offset == INTVAL (XEXP (e, 1)));
29260 	  else
29261 	    gcc_assert (i == 1
29262 			&& REG_P (e)
29263 			&& REGNO (e) == SP_REGNUM);
29264 	  offset += reg_size;
29265 	}
29266     }
29267   fprintf (asm_out_file, "}\n");
29268   if (padfirst)
29269     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29270 }
29271 
29272 /*  Emit unwind directives for a SET.  */
29273 
29274 static void
arm_unwind_emit_set(FILE * asm_out_file,rtx p)29275 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29276 {
29277   rtx e0;
29278   rtx e1;
29279   unsigned reg;
29280 
29281   e0 = XEXP (p, 0);
29282   e1 = XEXP (p, 1);
29283   switch (GET_CODE (e0))
29284     {
29285     case MEM:
29286       /* Pushing a single register.  */
29287       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29288 	  || !REG_P (XEXP (XEXP (e0, 0), 0))
29289 	  || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29290 	abort ();
29291 
29292       asm_fprintf (asm_out_file, "\t.save ");
29293       if (IS_VFP_REGNUM (REGNO (e1)))
29294 	asm_fprintf(asm_out_file, "{d%d}\n",
29295 		    (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29296       else
29297 	asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29298       break;
29299 
29300     case REG:
29301       if (REGNO (e0) == SP_REGNUM)
29302 	{
29303 	  /* A stack increment.  */
29304 	  if (GET_CODE (e1) != PLUS
29305 	      || !REG_P (XEXP (e1, 0))
29306 	      || REGNO (XEXP (e1, 0)) != SP_REGNUM
29307 	      || !CONST_INT_P (XEXP (e1, 1)))
29308 	    abort ();
29309 
29310 	  asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29311 		       -INTVAL (XEXP (e1, 1)));
29312 	}
29313       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29314 	{
29315 	  HOST_WIDE_INT offset;
29316 
29317 	  if (GET_CODE (e1) == PLUS)
29318 	    {
29319 	      if (!REG_P (XEXP (e1, 0))
29320 		  || !CONST_INT_P (XEXP (e1, 1)))
29321 		abort ();
29322 	      reg = REGNO (XEXP (e1, 0));
29323 	      offset = INTVAL (XEXP (e1, 1));
29324 	      asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29325 			   HARD_FRAME_POINTER_REGNUM, reg,
29326 			   offset);
29327 	    }
29328 	  else if (REG_P (e1))
29329 	    {
29330 	      reg = REGNO (e1);
29331 	      asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29332 			   HARD_FRAME_POINTER_REGNUM, reg);
29333 	    }
29334 	  else
29335 	    abort ();
29336 	}
29337       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29338 	{
29339 	  /* Move from sp to reg.  */
29340 	  asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29341 	}
29342      else if (GET_CODE (e1) == PLUS
29343 	      && REG_P (XEXP (e1, 0))
29344 	      && REGNO (XEXP (e1, 0)) == SP_REGNUM
29345 	      && CONST_INT_P (XEXP (e1, 1)))
29346 	{
29347 	  /* Set reg to offset from sp.  */
29348 	  asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29349 		       REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29350 	}
29351       else
29352 	abort ();
29353       break;
29354 
29355     default:
29356       abort ();
29357     }
29358 }
29359 
29360 
29361 /* Emit unwind directives for the given insn.  */
29362 
29363 static void
arm_unwind_emit(FILE * asm_out_file,rtx_insn * insn)29364 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29365 {
29366   rtx note, pat;
29367   bool handled_one = false;
29368 
29369   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29370     return;
29371 
29372   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29373       && (TREE_NOTHROW (current_function_decl)
29374 	  || crtl->all_throwers_are_sibcalls))
29375     return;
29376 
29377   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29378     return;
29379 
29380   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29381     {
29382       switch (REG_NOTE_KIND (note))
29383 	{
29384 	case REG_FRAME_RELATED_EXPR:
29385 	  pat = XEXP (note, 0);
29386 	  goto found;
29387 
29388 	case REG_CFA_REGISTER:
29389 	  pat = XEXP (note, 0);
29390 	  if (pat == NULL)
29391 	    {
29392 	      pat = PATTERN (insn);
29393 	      if (GET_CODE (pat) == PARALLEL)
29394 		pat = XVECEXP (pat, 0, 0);
29395 	    }
29396 
29397 	  /* Only emitted for IS_STACKALIGN re-alignment.  */
29398 	  {
29399 	    rtx dest, src;
29400 	    unsigned reg;
29401 
29402 	    src = SET_SRC (pat);
29403 	    dest = SET_DEST (pat);
29404 
29405 	    gcc_assert (src == stack_pointer_rtx);
29406 	    reg = REGNO (dest);
29407 	    asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29408 			 reg + 0x90, reg);
29409 	  }
29410 	  handled_one = true;
29411 	  break;
29412 
29413 	/* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
29414 	   to get correct dwarf information for shrink-wrap.  We should not
29415 	   emit unwind information for it because these are used either for
29416 	   pretend arguments or notes to adjust sp and restore registers from
29417 	   stack.  */
29418 	case REG_CFA_DEF_CFA:
29419 	case REG_CFA_ADJUST_CFA:
29420 	case REG_CFA_RESTORE:
29421 	  return;
29422 
29423 	case REG_CFA_EXPRESSION:
29424 	case REG_CFA_OFFSET:
29425 	  /* ??? Only handling here what we actually emit.  */
29426 	  gcc_unreachable ();
29427 
29428 	default:
29429 	  break;
29430 	}
29431     }
29432   if (handled_one)
29433     return;
29434   pat = PATTERN (insn);
29435  found:
29436 
29437   switch (GET_CODE (pat))
29438     {
29439     case SET:
29440       arm_unwind_emit_set (asm_out_file, pat);
29441       break;
29442 
29443     case SEQUENCE:
29444       /* Store multiple.  */
29445       arm_unwind_emit_sequence (asm_out_file, pat);
29446       break;
29447 
29448     default:
29449       abort();
29450     }
29451 }
29452 
29453 
29454 /* Output a reference from a function exception table to the type_info
29455    object X.  The EABI specifies that the symbol should be relocated by
29456    an R_ARM_TARGET2 relocation.  */
29457 
29458 static bool
arm_output_ttype(rtx x)29459 arm_output_ttype (rtx x)
29460 {
29461   fputs ("\t.word\t", asm_out_file);
29462   output_addr_const (asm_out_file, x);
29463   /* Use special relocations for symbol references.  */
29464   if (!CONST_INT_P (x))
29465     fputs ("(TARGET2)", asm_out_file);
29466   fputc ('\n', asm_out_file);
29467 
29468   return TRUE;
29469 }
29470 
29471 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
29472 
29473 static void
arm_asm_emit_except_personality(rtx personality)29474 arm_asm_emit_except_personality (rtx personality)
29475 {
29476   fputs ("\t.personality\t", asm_out_file);
29477   output_addr_const (asm_out_file, personality);
29478   fputc ('\n', asm_out_file);
29479 }
29480 #endif /* ARM_UNWIND_INFO */
29481 
29482 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
29483 
29484 static void
arm_asm_init_sections(void)29485 arm_asm_init_sections (void)
29486 {
29487 #if ARM_UNWIND_INFO
29488   exception_section = get_unnamed_section (0, output_section_asm_op,
29489 					   "\t.handlerdata");
29490 #endif /* ARM_UNWIND_INFO */
29491 
29492 #ifdef OBJECT_FORMAT_ELF
29493   if (target_pure_code)
29494     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
29495 #endif
29496 }
29497 
29498 /* Output unwind directives for the start/end of a function.  */
29499 
29500 void
arm_output_fn_unwind(FILE * f,bool prologue)29501 arm_output_fn_unwind (FILE * f, bool prologue)
29502 {
29503   if (arm_except_unwind_info (&global_options) != UI_TARGET)
29504     return;
29505 
29506   if (prologue)
29507     fputs ("\t.fnstart\n", f);
29508   else
29509     {
29510       /* If this function will never be unwound, then mark it as such.
29511          The came condition is used in arm_unwind_emit to suppress
29512 	 the frame annotations.  */
29513       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29514 	  && (TREE_NOTHROW (current_function_decl)
29515 	      || crtl->all_throwers_are_sibcalls))
29516 	fputs("\t.cantunwind\n", f);
29517 
29518       fputs ("\t.fnend\n", f);
29519     }
29520 }
29521 
29522 static bool
arm_emit_tls_decoration(FILE * fp,rtx x)29523 arm_emit_tls_decoration (FILE *fp, rtx x)
29524 {
29525   enum tls_reloc reloc;
29526   rtx val;
29527 
29528   val = XVECEXP (x, 0, 0);
29529   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29530 
29531   output_addr_const (fp, val);
29532 
29533   switch (reloc)
29534     {
29535     case TLS_GD32:
29536       fputs ("(tlsgd)", fp);
29537       break;
29538     case TLS_GD32_FDPIC:
29539       fputs ("(tlsgd_fdpic)", fp);
29540       break;
29541     case TLS_LDM32:
29542       fputs ("(tlsldm)", fp);
29543       break;
29544     case TLS_LDM32_FDPIC:
29545       fputs ("(tlsldm_fdpic)", fp);
29546       break;
29547     case TLS_LDO32:
29548       fputs ("(tlsldo)", fp);
29549       break;
29550     case TLS_IE32:
29551       fputs ("(gottpoff)", fp);
29552       break;
29553     case TLS_IE32_FDPIC:
29554       fputs ("(gottpoff_fdpic)", fp);
29555       break;
29556     case TLS_LE32:
29557       fputs ("(tpoff)", fp);
29558       break;
29559     case TLS_DESCSEQ:
29560       fputs ("(tlsdesc)", fp);
29561       break;
29562     default:
29563       gcc_unreachable ();
29564     }
29565 
29566   switch (reloc)
29567     {
29568     case TLS_GD32:
29569     case TLS_LDM32:
29570     case TLS_IE32:
29571     case TLS_DESCSEQ:
29572       fputs (" + (. - ", fp);
29573       output_addr_const (fp, XVECEXP (x, 0, 2));
29574       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29575       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29576       output_addr_const (fp, XVECEXP (x, 0, 3));
29577       fputc (')', fp);
29578       break;
29579     default:
29580       break;
29581     }
29582 
29583   return TRUE;
29584 }
29585 
29586 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
29587 
29588 static void
arm_output_dwarf_dtprel(FILE * file,int size,rtx x)29589 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29590 {
29591   gcc_assert (size == 4);
29592   fputs ("\t.word\t", file);
29593   output_addr_const (file, x);
29594   fputs ("(tlsldo)", file);
29595 }
29596 
29597 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
29598 
29599 static bool
arm_output_addr_const_extra(FILE * fp,rtx x)29600 arm_output_addr_const_extra (FILE *fp, rtx x)
29601 {
29602   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29603     return arm_emit_tls_decoration (fp, x);
29604   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29605     {
29606       char label[256];
29607       int labelno = INTVAL (XVECEXP (x, 0, 0));
29608 
29609       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29610       assemble_name_raw (fp, label);
29611 
29612       return TRUE;
29613     }
29614   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29615     {
29616       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29617       if (GOT_PCREL)
29618 	fputs ("+.", fp);
29619       fputs ("-(", fp);
29620       output_addr_const (fp, XVECEXP (x, 0, 0));
29621       fputc (')', fp);
29622       return TRUE;
29623     }
29624   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29625     {
29626       output_addr_const (fp, XVECEXP (x, 0, 0));
29627       if (GOT_PCREL)
29628         fputs ("+.", fp);
29629       fputs ("-(", fp);
29630       output_addr_const (fp, XVECEXP (x, 0, 1));
29631       fputc (')', fp);
29632       return TRUE;
29633     }
29634   else if (GET_CODE (x) == CONST_VECTOR)
29635     return arm_emit_vector_const (fp, x);
29636 
29637   return FALSE;
29638 }
29639 
29640 /* Output assembly for a shift instruction.
29641    SET_FLAGS determines how the instruction modifies the condition codes.
29642    0 - Do not set condition codes.
29643    1 - Set condition codes.
29644    2 - Use smallest instruction.  */
29645 const char *
arm_output_shift(rtx * operands,int set_flags)29646 arm_output_shift(rtx * operands, int set_flags)
29647 {
29648   char pattern[100];
29649   static const char flag_chars[3] = {'?', '.', '!'};
29650   const char *shift;
29651   HOST_WIDE_INT val;
29652   char c;
29653 
29654   c = flag_chars[set_flags];
29655   shift = shift_op(operands[3], &val);
29656   if (shift)
29657     {
29658       if (val != -1)
29659 	operands[2] = GEN_INT(val);
29660       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29661     }
29662   else
29663     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29664 
29665   output_asm_insn (pattern, operands);
29666   return "";
29667 }
29668 
29669 /* Output assembly for a WMMX immediate shift instruction.  */
29670 const char *
arm_output_iwmmxt_shift_immediate(const char * insn_name,rtx * operands,bool wror_or_wsra)29671 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29672 {
29673   int shift = INTVAL (operands[2]);
29674   char templ[50];
29675   machine_mode opmode = GET_MODE (operands[0]);
29676 
29677   gcc_assert (shift >= 0);
29678 
29679   /* If the shift value in the register versions is > 63 (for D qualifier),
29680      31 (for W qualifier) or 15 (for H qualifier).  */
29681   if (((opmode == V4HImode) && (shift > 15))
29682 	|| ((opmode == V2SImode) && (shift > 31))
29683 	|| ((opmode == DImode) && (shift > 63)))
29684   {
29685     if (wror_or_wsra)
29686       {
29687         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29688         output_asm_insn (templ, operands);
29689         if (opmode == DImode)
29690           {
29691 	    sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29692 	    output_asm_insn (templ, operands);
29693           }
29694       }
29695     else
29696       {
29697         /* The destination register will contain all zeros.  */
29698         sprintf (templ, "wzero\t%%0");
29699         output_asm_insn (templ, operands);
29700       }
29701     return "";
29702   }
29703 
29704   if ((opmode == DImode) && (shift > 32))
29705     {
29706       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29707       output_asm_insn (templ, operands);
29708       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29709       output_asm_insn (templ, operands);
29710     }
29711   else
29712     {
29713       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29714       output_asm_insn (templ, operands);
29715     }
29716   return "";
29717 }
29718 
29719 /* Output assembly for a WMMX tinsr instruction.  */
29720 const char *
arm_output_iwmmxt_tinsr(rtx * operands)29721 arm_output_iwmmxt_tinsr (rtx *operands)
29722 {
29723   int mask = INTVAL (operands[3]);
29724   int i;
29725   char templ[50];
29726   int units = mode_nunits[GET_MODE (operands[0])];
29727   gcc_assert ((mask & (mask - 1)) == 0);
29728   for (i = 0; i < units; ++i)
29729     {
29730       if ((mask & 0x01) == 1)
29731         {
29732           break;
29733         }
29734       mask >>= 1;
29735     }
29736   gcc_assert (i < units);
29737   {
29738     switch (GET_MODE (operands[0]))
29739       {
29740       case E_V8QImode:
29741 	sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29742 	break;
29743       case E_V4HImode:
29744 	sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29745 	break;
29746       case E_V2SImode:
29747 	sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29748 	break;
29749       default:
29750 	gcc_unreachable ();
29751 	break;
29752       }
29753     output_asm_insn (templ, operands);
29754   }
29755   return "";
29756 }
29757 
29758 /* Output a Thumb-1 casesi dispatch sequence.  */
29759 const char *
thumb1_output_casesi(rtx * operands)29760 thumb1_output_casesi (rtx *operands)
29761 {
29762   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29763 
29764   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29765 
29766   switch (GET_MODE(diff_vec))
29767     {
29768     case E_QImode:
29769       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29770 	      "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29771     case E_HImode:
29772       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29773 	      "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29774     case E_SImode:
29775       return "bl\t%___gnu_thumb1_case_si";
29776     default:
29777       gcc_unreachable ();
29778     }
29779 }
29780 
29781 /* Output a Thumb-2 casesi instruction.  */
29782 const char *
thumb2_output_casesi(rtx * operands)29783 thumb2_output_casesi (rtx *operands)
29784 {
29785   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
29786 
29787   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29788 
29789   output_asm_insn ("cmp\t%0, %1", operands);
29790   output_asm_insn ("bhi\t%l3", operands);
29791   switch (GET_MODE(diff_vec))
29792     {
29793     case E_QImode:
29794       return "tbb\t[%|pc, %0]";
29795     case E_HImode:
29796       return "tbh\t[%|pc, %0, lsl #1]";
29797     case E_SImode:
29798       if (flag_pic)
29799 	{
29800 	  output_asm_insn ("adr\t%4, %l2", operands);
29801 	  output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29802 	  output_asm_insn ("add\t%4, %4, %5", operands);
29803 	  return "bx\t%4";
29804 	}
29805       else
29806 	{
29807 	  output_asm_insn ("adr\t%4, %l2", operands);
29808 	  return "ldr\t%|pc, [%4, %0, lsl #2]";
29809 	}
29810     default:
29811       gcc_unreachable ();
29812     }
29813 }
29814 
29815 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
29816    per-core tuning structs.  */
29817 static int
arm_issue_rate(void)29818 arm_issue_rate (void)
29819 {
29820   return current_tune->issue_rate;
29821 }
29822 
29823 /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
29824 static int
arm_sched_variable_issue(FILE *,int,rtx_insn * insn,int more)29825 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
29826 {
29827   if (DEBUG_INSN_P (insn))
29828     return more;
29829 
29830   rtx_code code = GET_CODE (PATTERN (insn));
29831   if (code == USE || code == CLOBBER)
29832     return more;
29833 
29834   if (get_attr_type (insn) == TYPE_NO_INSN)
29835     return more;
29836 
29837   return more - 1;
29838 }
29839 
29840 /* Return how many instructions should scheduler lookahead to choose the
29841    best one.  */
29842 static int
arm_first_cycle_multipass_dfa_lookahead(void)29843 arm_first_cycle_multipass_dfa_lookahead (void)
29844 {
29845   int issue_rate = arm_issue_rate ();
29846 
29847   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
29848 }
29849 
29850 /* Enable modeling of L2 auto-prefetcher.  */
29851 static int
arm_first_cycle_multipass_dfa_lookahead_guard(rtx_insn * insn,int ready_index)29852 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
29853 {
29854   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
29855 }
29856 
29857 const char *
arm_mangle_type(const_tree type)29858 arm_mangle_type (const_tree type)
29859 {
29860   /* The ARM ABI documents (10th October 2008) say that "__va_list"
29861      has to be managled as if it is in the "std" namespace.  */
29862   if (TARGET_AAPCS_BASED
29863       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29864     return "St9__va_list";
29865 
29866   /* Half-precision floating point types.  */
29867   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29868     {
29869       if (TYPE_MODE (type) == BFmode)
29870 	return "u6__bf16";
29871       else
29872 	return "Dh";
29873     }
29874 
29875   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
29876      builtin type.  */
29877   if (TYPE_NAME (type) != NULL)
29878     return arm_mangle_builtin_type (type);
29879 
29880   /* Use the default mangling.  */
29881   return NULL;
29882 }
29883 
29884 /* Order of allocation of core registers for Thumb: this allocation is
29885    written over the corresponding initial entries of the array
29886    initialized with REG_ALLOC_ORDER.  We allocate all low registers
29887    first.  Saving and restoring a low register is usually cheaper than
29888    using a call-clobbered high register.  */
29889 
29890 static const int thumb_core_reg_alloc_order[] =
29891 {
29892    3,  2,  1,  0,  4,  5,  6,  7,
29893   12, 14,  8,  9, 10, 11
29894 };
29895 
29896 /* Adjust register allocation order when compiling for Thumb.  */
29897 
29898 void
arm_order_regs_for_local_alloc(void)29899 arm_order_regs_for_local_alloc (void)
29900 {
29901   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29902   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29903   if (TARGET_THUMB)
29904     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29905             sizeof (thumb_core_reg_alloc_order));
29906 }
29907 
29908 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
29909 
29910 bool
arm_frame_pointer_required(void)29911 arm_frame_pointer_required (void)
29912 {
29913   if (SUBTARGET_FRAME_POINTER_REQUIRED)
29914     return true;
29915 
29916   /* If the function receives nonlocal gotos, it needs to save the frame
29917      pointer in the nonlocal_goto_save_area object.  */
29918   if (cfun->has_nonlocal_label)
29919     return true;
29920 
29921   /* The frame pointer is required for non-leaf APCS frames.  */
29922   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
29923     return true;
29924 
29925   /* If we are probing the stack in the prologue, we will have a faulting
29926      instruction prior to the stack adjustment and this requires a frame
29927      pointer if we want to catch the exception using the EABI unwinder.  */
29928   if (!IS_INTERRUPT (arm_current_func_type ())
29929       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
29930 	  || flag_stack_clash_protection)
29931       && arm_except_unwind_info (&global_options) == UI_TARGET
29932       && cfun->can_throw_non_call_exceptions)
29933     {
29934       HOST_WIDE_INT size = get_frame_size ();
29935 
29936       /* That's irrelevant if there is no stack adjustment.  */
29937       if (size <= 0)
29938 	return false;
29939 
29940       /* That's relevant only if there is a stack probe.  */
29941       if (crtl->is_leaf && !cfun->calls_alloca)
29942 	{
29943 	  /* We don't have the final size of the frame so adjust.  */
29944 	  size += 32 * UNITS_PER_WORD;
29945 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
29946 	    return true;
29947 	}
29948       else
29949 	return true;
29950     }
29951 
29952   return false;
29953 }
29954 
29955 /* Only thumb1 can't support conditional execution, so return true if
29956    the target is not thumb1.  */
29957 static bool
arm_have_conditional_execution(void)29958 arm_have_conditional_execution (void)
29959 {
29960   return !TARGET_THUMB1;
29961 }
29962 
29963 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
29964 static HOST_WIDE_INT
arm_vector_alignment(const_tree type)29965 arm_vector_alignment (const_tree type)
29966 {
29967   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29968 
29969   if (TARGET_AAPCS_BASED)
29970     align = MIN (align, 64);
29971 
29972   return align;
29973 }
29974 
29975 static unsigned int
arm_autovectorize_vector_modes(vector_modes * modes,bool)29976 arm_autovectorize_vector_modes (vector_modes *modes, bool)
29977 {
29978   if (!TARGET_NEON_VECTORIZE_DOUBLE)
29979     {
29980       modes->safe_push (V16QImode);
29981       modes->safe_push (V8QImode);
29982     }
29983   return 0;
29984 }
29985 
29986 static bool
arm_vector_alignment_reachable(const_tree type,bool is_packed)29987 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29988 {
29989   /* Vectors which aren't in packed structures will not be less aligned than
29990      the natural alignment of their element type, so this is safe.  */
29991   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29992     return !is_packed;
29993 
29994   return default_builtin_vector_alignment_reachable (type, is_packed);
29995 }
29996 
29997 static bool
arm_builtin_support_vector_misalignment(machine_mode mode,const_tree type,int misalignment,bool is_packed)29998 arm_builtin_support_vector_misalignment (machine_mode mode,
29999 					 const_tree type, int misalignment,
30000 					 bool is_packed)
30001 {
30002   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30003     {
30004       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30005 
30006       if (is_packed)
30007         return align == 1;
30008 
30009       /* If the misalignment is unknown, we should be able to handle the access
30010 	 so long as it is not to a member of a packed data structure.  */
30011       if (misalignment == -1)
30012         return true;
30013 
30014       /* Return true if the misalignment is a multiple of the natural alignment
30015          of the vector's element type.  This is probably always going to be
30016 	 true in practice, since we've already established that this isn't a
30017 	 packed access.  */
30018       return ((misalignment % align) == 0);
30019     }
30020 
30021   return default_builtin_support_vector_misalignment (mode, type, misalignment,
30022 						      is_packed);
30023 }
30024 
30025 static void
arm_conditional_register_usage(void)30026 arm_conditional_register_usage (void)
30027 {
30028   int regno;
30029 
30030   if (TARGET_THUMB1 && optimize_size)
30031     {
30032       /* When optimizing for size on Thumb-1, it's better not
30033         to use the HI regs, because of the overhead of
30034         stacking them.  */
30035       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30036 	fixed_regs[regno] = call_used_regs[regno] = 1;
30037     }
30038 
30039   /* The link register can be clobbered by any branch insn,
30040      but we have no way to track that at present, so mark
30041      it as unavailable.  */
30042   if (TARGET_THUMB1)
30043     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30044 
30045   if (TARGET_32BIT && TARGET_VFP_BASE)
30046     {
30047       /* VFPv3 registers are disabled when earlier VFP
30048 	 versions are selected due to the definition of
30049 	 LAST_VFP_REGNUM.  */
30050       for (regno = FIRST_VFP_REGNUM;
30051 	   regno <= LAST_VFP_REGNUM; ++ regno)
30052 	{
30053 	  fixed_regs[regno] = 0;
30054 	  call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30055 	    || regno >= FIRST_VFP_REGNUM + 32;
30056 	}
30057       if (TARGET_HAVE_MVE)
30058 	fixed_regs[VPR_REGNUM] = 0;
30059     }
30060 
30061   if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30062     {
30063       regno = FIRST_IWMMXT_GR_REGNUM;
30064       /* The 2002/10/09 revision of the XScale ABI has wCG0
30065          and wCG1 as call-preserved registers.  The 2002/11/21
30066          revision changed this so that all wCG registers are
30067          scratch registers.  */
30068       for (regno = FIRST_IWMMXT_GR_REGNUM;
30069 	   regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30070 	fixed_regs[regno] = 0;
30071       /* The XScale ABI has wR0 - wR9 as scratch registers,
30072 	 the rest as call-preserved registers.  */
30073       for (regno = FIRST_IWMMXT_REGNUM;
30074 	   regno <= LAST_IWMMXT_REGNUM; ++ regno)
30075 	{
30076 	  fixed_regs[regno] = 0;
30077 	  call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30078 	}
30079     }
30080 
30081   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30082     {
30083       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30084       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30085     }
30086   else if (TARGET_APCS_STACK)
30087     {
30088       fixed_regs[10]     = 1;
30089       call_used_regs[10] = 1;
30090     }
30091   /* -mcaller-super-interworking reserves r11 for calls to
30092      _interwork_r11_call_via_rN().  Making the register global
30093      is an easy way of ensuring that it remains valid for all
30094      calls.  */
30095   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30096       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30097     {
30098       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30099       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30100       if (TARGET_CALLER_INTERWORKING)
30101 	global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30102     }
30103 
30104   /* The Q and GE bits are only accessed via special ACLE patterns.  */
30105   CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30106   CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30107 
30108   SUBTARGET_CONDITIONAL_REGISTER_USAGE
30109 }
30110 
30111 static reg_class_t
arm_preferred_rename_class(reg_class_t rclass)30112 arm_preferred_rename_class (reg_class_t rclass)
30113 {
30114   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30115      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
30116      and code size can be reduced.  */
30117   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30118     return LO_REGS;
30119   else
30120     return NO_REGS;
30121 }
30122 
30123 /* Compute the attribute "length" of insn "*push_multi".
30124    So this function MUST be kept in sync with that insn pattern.  */
30125 int
arm_attr_length_push_multi(rtx parallel_op,rtx first_op)30126 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30127 {
30128   int i, regno, hi_reg;
30129   int num_saves = XVECLEN (parallel_op, 0);
30130 
30131   /* ARM mode.  */
30132   if (TARGET_ARM)
30133     return 4;
30134   /* Thumb1 mode.  */
30135   if (TARGET_THUMB1)
30136     return 2;
30137 
30138   /* Thumb2 mode.  */
30139   regno = REGNO (first_op);
30140   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30141      list is 8-bit.  Normally this means all registers in the list must be
30142      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
30143      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
30144      with 16-bit encoding.  */
30145   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30146   for (i = 1; i < num_saves && !hi_reg; i++)
30147     {
30148       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30149       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30150     }
30151 
30152   if (!hi_reg)
30153     return 2;
30154   return 4;
30155 }
30156 
30157 /* Compute the attribute "length" of insn.  Currently, this function is used
30158    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30159    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
30160    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
30161    true if OPERANDS contains insn which explicit updates base register.  */
30162 
30163 int
arm_attr_length_pop_multi(rtx * operands,bool return_pc,bool write_back_p)30164 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30165 {
30166   /* ARM mode.  */
30167   if (TARGET_ARM)
30168     return 4;
30169   /* Thumb1 mode.  */
30170   if (TARGET_THUMB1)
30171     return 2;
30172 
30173   rtx parallel_op = operands[0];
30174   /* Initialize to elements number of PARALLEL.  */
30175   unsigned indx = XVECLEN (parallel_op, 0) - 1;
30176   /* Initialize the value to base register.  */
30177   unsigned regno = REGNO (operands[1]);
30178   /* Skip return and write back pattern.
30179      We only need register pop pattern for later analysis.  */
30180   unsigned first_indx = 0;
30181   first_indx += return_pc ? 1 : 0;
30182   first_indx += write_back_p ? 1 : 0;
30183 
30184   /* A pop operation can be done through LDM or POP.  If the base register is SP
30185      and if it's with write back, then a LDM will be alias of POP.  */
30186   bool pop_p = (regno == SP_REGNUM && write_back_p);
30187   bool ldm_p = !pop_p;
30188 
30189   /* Check base register for LDM.  */
30190   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30191     return 4;
30192 
30193   /* Check each register in the list.  */
30194   for (; indx >= first_indx; indx--)
30195     {
30196       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30197       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
30198 	 comment in arm_attr_length_push_multi.  */
30199       if (REGNO_REG_CLASS (regno) == HI_REGS
30200 	  && (regno != PC_REGNUM || ldm_p))
30201 	return 4;
30202     }
30203 
30204   return 2;
30205 }
30206 
30207 /* Compute the number of instructions emitted by output_move_double.  */
30208 int
arm_count_output_move_double_insns(rtx * operands)30209 arm_count_output_move_double_insns (rtx *operands)
30210 {
30211   int count;
30212   rtx ops[2];
30213   /* output_move_double may modify the operands array, so call it
30214      here on a copy of the array.  */
30215   ops[0] = operands[0];
30216   ops[1] = operands[1];
30217   output_move_double (ops, false, &count);
30218   return count;
30219 }
30220 
30221 /* Same as above, but operands are a register/memory pair in SImode.
30222    Assumes operands has the base register in position 0 and memory in position
30223    2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
30224 int
arm_count_ldrdstrd_insns(rtx * operands,bool load)30225 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30226 {
30227   int count;
30228   rtx ops[2];
30229   int regnum, memnum;
30230   if (load)
30231     regnum = 0, memnum = 1;
30232   else
30233     regnum = 1, memnum = 0;
30234   ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30235   ops[memnum] = adjust_address (operands[2], DImode, 0);
30236   output_move_double (ops, false, &count);
30237   return count;
30238 }
30239 
30240 
30241 int
vfp3_const_double_for_fract_bits(rtx operand)30242 vfp3_const_double_for_fract_bits (rtx operand)
30243 {
30244   REAL_VALUE_TYPE r0;
30245 
30246   if (!CONST_DOUBLE_P (operand))
30247     return 0;
30248 
30249   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30250   if (exact_real_inverse (DFmode, &r0)
30251       && !REAL_VALUE_NEGATIVE (r0))
30252     {
30253       if (exact_real_truncate (DFmode, &r0))
30254 	{
30255 	  HOST_WIDE_INT value = real_to_integer (&r0);
30256 	  value = value & 0xffffffff;
30257 	  if ((value != 0) && ( (value & (value - 1)) == 0))
30258 	    {
30259 	      int ret = exact_log2 (value);
30260 	      gcc_assert (IN_RANGE (ret, 0, 31));
30261 	      return ret;
30262 	    }
30263 	}
30264     }
30265   return 0;
30266 }
30267 
30268 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30269    log2 is in [1, 32], return that log2.  Otherwise return -1.
30270    This is used in the patterns for vcvt.s32.f32 floating-point to
30271    fixed-point conversions.  */
30272 
30273 int
vfp3_const_double_for_bits(rtx x)30274 vfp3_const_double_for_bits (rtx x)
30275 {
30276   const REAL_VALUE_TYPE *r;
30277 
30278   if (!CONST_DOUBLE_P (x))
30279     return -1;
30280 
30281   r = CONST_DOUBLE_REAL_VALUE (x);
30282 
30283   if (REAL_VALUE_NEGATIVE (*r)
30284       || REAL_VALUE_ISNAN (*r)
30285       || REAL_VALUE_ISINF (*r)
30286       || !real_isinteger (r, SFmode))
30287     return -1;
30288 
30289   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30290 
30291 /* The exact_log2 above will have returned -1 if this is
30292    not an exact log2.  */
30293   if (!IN_RANGE (hwint, 1, 32))
30294     return -1;
30295 
30296   return hwint;
30297 }
30298 
30299 
30300 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
30301 
30302 static void
arm_pre_atomic_barrier(enum memmodel model)30303 arm_pre_atomic_barrier (enum memmodel model)
30304 {
30305   if (need_atomic_barrier_p (model, true))
30306     emit_insn (gen_memory_barrier ());
30307 }
30308 
30309 static void
arm_post_atomic_barrier(enum memmodel model)30310 arm_post_atomic_barrier (enum memmodel model)
30311 {
30312   if (need_atomic_barrier_p (model, false))
30313     emit_insn (gen_memory_barrier ());
30314 }
30315 
30316 /* Emit the load-exclusive and store-exclusive instructions.
30317    Use acquire and release versions if necessary.  */
30318 
30319 static void
arm_emit_load_exclusive(machine_mode mode,rtx rval,rtx mem,bool acq)30320 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30321 {
30322   rtx (*gen) (rtx, rtx);
30323 
30324   if (acq)
30325     {
30326       switch (mode)
30327         {
30328         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30329         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30330         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30331         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30332         default:
30333           gcc_unreachable ();
30334         }
30335     }
30336   else
30337     {
30338       switch (mode)
30339         {
30340         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
30341         case E_HImode: gen = gen_arm_load_exclusivehi; break;
30342         case E_SImode: gen = gen_arm_load_exclusivesi; break;
30343         case E_DImode: gen = gen_arm_load_exclusivedi; break;
30344         default:
30345           gcc_unreachable ();
30346         }
30347     }
30348 
30349   emit_insn (gen (rval, mem));
30350 }
30351 
30352 static void
arm_emit_store_exclusive(machine_mode mode,rtx bval,rtx rval,rtx mem,bool rel)30353 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30354                           rtx mem, bool rel)
30355 {
30356   rtx (*gen) (rtx, rtx, rtx);
30357 
30358   if (rel)
30359     {
30360       switch (mode)
30361         {
30362         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
30363         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
30364         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
30365         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
30366         default:
30367           gcc_unreachable ();
30368         }
30369     }
30370   else
30371     {
30372       switch (mode)
30373         {
30374         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
30375         case E_HImode: gen = gen_arm_store_exclusivehi; break;
30376         case E_SImode: gen = gen_arm_store_exclusivesi; break;
30377         case E_DImode: gen = gen_arm_store_exclusivedi; break;
30378         default:
30379           gcc_unreachable ();
30380         }
30381     }
30382 
30383   emit_insn (gen (bval, rval, mem));
30384 }
30385 
30386 /* Mark the previous jump instruction as unlikely.  */
30387 
30388 static void
emit_unlikely_jump(rtx insn)30389 emit_unlikely_jump (rtx insn)
30390 {
30391   rtx_insn *jump = emit_jump_insn (insn);
30392   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
30393 }
30394 
30395 /* Expand a compare and swap pattern.  */
30396 
30397 void
arm_expand_compare_and_swap(rtx operands[])30398 arm_expand_compare_and_swap (rtx operands[])
30399 {
30400   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30401   machine_mode mode, cmp_mode;
30402 
30403   bval = operands[0];
30404   rval = operands[1];
30405   mem = operands[2];
30406   oldval = operands[3];
30407   newval = operands[4];
30408   is_weak = operands[5];
30409   mod_s = operands[6];
30410   mod_f = operands[7];
30411   mode = GET_MODE (mem);
30412 
30413   /* Normally the succ memory model must be stronger than fail, but in the
30414      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30415      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
30416 
30417   if (TARGET_HAVE_LDACQ
30418       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
30419       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
30420     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30421 
30422   switch (mode)
30423     {
30424     case E_QImode:
30425     case E_HImode:
30426       /* For narrow modes, we're going to perform the comparison in SImode,
30427 	 so do the zero-extension now.  */
30428       rval = gen_reg_rtx (SImode);
30429       oldval = convert_modes (SImode, mode, oldval, true);
30430       /* FALLTHRU */
30431 
30432     case E_SImode:
30433       /* Force the value into a register if needed.  We waited until after
30434 	 the zero-extension above to do this properly.  */
30435       if (!arm_add_operand (oldval, SImode))
30436 	oldval = force_reg (SImode, oldval);
30437       break;
30438 
30439     case E_DImode:
30440       if (!cmpdi_operand (oldval, mode))
30441 	oldval = force_reg (mode, oldval);
30442       break;
30443 
30444     default:
30445       gcc_unreachable ();
30446     }
30447 
30448   if (TARGET_THUMB1)
30449     cmp_mode = E_SImode;
30450   else
30451     cmp_mode = CC_Zmode;
30452 
30453   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
30454   emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
30455                                         oldval, newval, is_weak, mod_s, mod_f));
30456 
30457   if (mode == QImode || mode == HImode)
30458     emit_move_insn (operands[1], gen_lowpart (mode, rval));
30459 
30460   /* In all cases, we arrange for success to be signaled by Z set.
30461      This arrangement allows for the boolean result to be used directly
30462      in a subsequent branch, post optimization.  For Thumb-1 targets, the
30463      boolean negation of the result is also stored in bval because Thumb-1
30464      backend lacks dependency tracking for CC flag due to flag-setting not
30465      being represented at RTL level.  */
30466   if (TARGET_THUMB1)
30467       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
30468   else
30469     {
30470       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
30471       emit_insn (gen_rtx_SET (bval, x));
30472     }
30473 }
30474 
30475 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
30476    another memory store between the load-exclusive and store-exclusive can
30477    reset the monitor from Exclusive to Open state.  This means we must wait
30478    until after reload to split the pattern, lest we get a register spill in
30479    the middle of the atomic sequence.  Success of the compare and swap is
30480    indicated by the Z flag set for 32bit targets and by neg_bval being zero
30481    for Thumb-1 targets (ie. negation of the boolean value returned by
30482    atomic_compare_and_swapmode standard pattern in operand 0).  */
30483 
30484 void
arm_split_compare_and_swap(rtx operands[])30485 arm_split_compare_and_swap (rtx operands[])
30486 {
30487   rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
30488   machine_mode mode;
30489   enum memmodel mod_s, mod_f;
30490   bool is_weak;
30491   rtx_code_label *label1, *label2;
30492   rtx x, cond;
30493 
30494   rval = operands[1];
30495   mem = operands[2];
30496   oldval = operands[3];
30497   newval = operands[4];
30498   is_weak = (operands[5] != const0_rtx);
30499   mod_s_rtx = operands[6];
30500   mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
30501   mod_f = memmodel_from_int (INTVAL (operands[7]));
30502   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
30503   mode = GET_MODE (mem);
30504 
30505   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
30506 
30507   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
30508   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
30509 
30510   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
30511      a full barrier is emitted after the store-release.  */
30512   if (is_armv8_sync)
30513     use_acquire = false;
30514 
30515   /* Checks whether a barrier is needed and emits one accordingly.  */
30516   if (!(use_acquire || use_release))
30517     arm_pre_atomic_barrier (mod_s);
30518 
30519   label1 = NULL;
30520   if (!is_weak)
30521     {
30522       label1 = gen_label_rtx ();
30523       emit_label (label1);
30524     }
30525   label2 = gen_label_rtx ();
30526 
30527   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30528 
30529   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
30530      as required to communicate with arm_expand_compare_and_swap.  */
30531   if (TARGET_32BIT)
30532     {
30533       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
30534       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30535       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30536 				gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30537       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30538     }
30539   else
30540     {
30541       cond = gen_rtx_NE (VOIDmode, rval, oldval);
30542       if (thumb1_cmpneg_operand (oldval, SImode))
30543 	{
30544 	  rtx src = rval;
30545 	  if (!satisfies_constraint_L (oldval))
30546 	    {
30547 	      gcc_assert (satisfies_constraint_J (oldval));
30548 
30549 	      /* For such immediates, ADDS needs the source and destination regs
30550 		 to be the same.
30551 
30552 		 Normally this would be handled by RA, but this is all happening
30553 		 after RA.  */
30554 	      emit_move_insn (neg_bval, rval);
30555 	      src = neg_bval;
30556 	    }
30557 
30558 	  emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
30559 						       label2, cond));
30560 	}
30561       else
30562 	{
30563 	  emit_move_insn (neg_bval, const1_rtx);
30564 	  emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
30565 	}
30566     }
30567 
30568   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
30569 
30570   /* Weak or strong, we want EQ to be true for success, so that we
30571      match the flags that we got from the compare above.  */
30572   if (TARGET_32BIT)
30573     {
30574       cond = gen_rtx_REG (CCmode, CC_REGNUM);
30575       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
30576       emit_insn (gen_rtx_SET (cond, x));
30577     }
30578 
30579   if (!is_weak)
30580     {
30581       /* Z is set to boolean value of !neg_bval, as required to communicate
30582 	 with arm_expand_compare_and_swap.  */
30583       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
30584       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
30585     }
30586 
30587   if (!is_mm_relaxed (mod_f))
30588     emit_label (label2);
30589 
30590   /* Checks whether a barrier is needed and emits one accordingly.  */
30591   if (is_armv8_sync
30592       || !(use_acquire || use_release))
30593     arm_post_atomic_barrier (mod_s);
30594 
30595   if (is_mm_relaxed (mod_f))
30596     emit_label (label2);
30597 }
30598 
30599 /* Split an atomic operation pattern.  Operation is given by CODE and is one
30600    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
30601    operation).  Operation is performed on the content at MEM and on VALUE
30602    following the memory model MODEL_RTX.  The content at MEM before and after
30603    the operation is returned in OLD_OUT and NEW_OUT respectively while the
30604    success of the operation is returned in COND.  Using a scratch register or
30605    an operand register for these determines what result is returned for that
30606    pattern.  */
30607 
30608 void
arm_split_atomic_op(enum rtx_code code,rtx old_out,rtx new_out,rtx mem,rtx value,rtx model_rtx,rtx cond)30609 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30610 		     rtx value, rtx model_rtx, rtx cond)
30611 {
30612   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
30613   machine_mode mode = GET_MODE (mem);
30614   machine_mode wmode = (mode == DImode ? DImode : SImode);
30615   rtx_code_label *label;
30616   bool all_low_regs, bind_old_new;
30617   rtx x;
30618 
30619   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
30620 
30621   bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
30622   bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
30623 
30624   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
30625      a full barrier is emitted after the store-release.  */
30626   if (is_armv8_sync)
30627     use_acquire = false;
30628 
30629   /* Checks whether a barrier is needed and emits one accordingly.  */
30630   if (!(use_acquire || use_release))
30631     arm_pre_atomic_barrier (model);
30632 
30633   label = gen_label_rtx ();
30634   emit_label (label);
30635 
30636   if (new_out)
30637     new_out = gen_lowpart (wmode, new_out);
30638   if (old_out)
30639     old_out = gen_lowpart (wmode, old_out);
30640   else
30641     old_out = new_out;
30642   value = simplify_gen_subreg (wmode, value, mode, 0);
30643 
30644   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30645 
30646   /* Does the operation require destination and first operand to use the same
30647      register?  This is decided by register constraints of relevant insn
30648      patterns in thumb1.md.  */
30649   gcc_assert (!new_out || REG_P (new_out));
30650   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
30651 		 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
30652 		 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
30653   bind_old_new =
30654     (TARGET_THUMB1
30655      && code != SET
30656      && code != MINUS
30657      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
30658 
30659   /* We want to return the old value while putting the result of the operation
30660      in the same register as the old value so copy the old value over to the
30661      destination register and use that register for the operation.  */
30662   if (old_out && bind_old_new)
30663     {
30664       emit_move_insn (new_out, old_out);
30665       old_out = new_out;
30666     }
30667 
30668   switch (code)
30669     {
30670     case SET:
30671       new_out = value;
30672       break;
30673 
30674     case NOT:
30675       x = gen_rtx_AND (wmode, old_out, value);
30676       emit_insn (gen_rtx_SET (new_out, x));
30677       x = gen_rtx_NOT (wmode, new_out);
30678       emit_insn (gen_rtx_SET (new_out, x));
30679       break;
30680 
30681     case MINUS:
30682       if (CONST_INT_P (value))
30683 	{
30684 	  value = gen_int_mode (-INTVAL (value), wmode);
30685 	  code = PLUS;
30686 	}
30687       /* FALLTHRU */
30688 
30689     case PLUS:
30690       if (mode == DImode)
30691 	{
30692 	  /* DImode plus/minus need to clobber flags.  */
30693 	  /* The adddi3 and subdi3 patterns are incorrectly written so that
30694 	     they require matching operands, even when we could easily support
30695 	     three operands.  Thankfully, this can be fixed up post-splitting,
30696 	     as the individual add+adc patterns do accept three operands and
30697 	     post-reload cprop can make these moves go away.  */
30698 	  emit_move_insn (new_out, old_out);
30699 	  if (code == PLUS)
30700 	    x = gen_adddi3 (new_out, new_out, value);
30701 	  else
30702 	    x = gen_subdi3 (new_out, new_out, value);
30703 	  emit_insn (x);
30704 	  break;
30705 	}
30706       /* FALLTHRU */
30707 
30708     default:
30709       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30710       emit_insn (gen_rtx_SET (new_out, x));
30711       break;
30712     }
30713 
30714   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30715                             use_release);
30716 
30717   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30718   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30719 
30720   /* Checks whether a barrier is needed and emits one accordingly.  */
30721   if (is_armv8_sync
30722       || !(use_acquire || use_release))
30723     arm_post_atomic_barrier (model);
30724 }
30725 
30726 #define MAX_VECT_LEN 16
30727 
30728 struct expand_vec_perm_d
30729 {
30730   rtx target, op0, op1;
30731   vec_perm_indices perm;
30732   machine_mode vmode;
30733   bool one_vector_p;
30734   bool testing_p;
30735 };
30736 
30737 /* Generate a variable permutation.  */
30738 
30739 static void
arm_expand_vec_perm_1(rtx target,rtx op0,rtx op1,rtx sel)30740 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30741 {
30742   machine_mode vmode = GET_MODE (target);
30743   bool one_vector_p = rtx_equal_p (op0, op1);
30744 
30745   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30746   gcc_checking_assert (GET_MODE (op0) == vmode);
30747   gcc_checking_assert (GET_MODE (op1) == vmode);
30748   gcc_checking_assert (GET_MODE (sel) == vmode);
30749   gcc_checking_assert (TARGET_NEON);
30750 
30751   if (one_vector_p)
30752     {
30753       if (vmode == V8QImode)
30754 	emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30755       else
30756 	emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30757     }
30758   else
30759     {
30760       rtx pair;
30761 
30762       if (vmode == V8QImode)
30763 	{
30764 	  pair = gen_reg_rtx (V16QImode);
30765 	  emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30766 	  pair = gen_lowpart (TImode, pair);
30767 	  emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30768 	}
30769       else
30770 	{
30771 	  pair = gen_reg_rtx (OImode);
30772 	  emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30773 	  emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30774 	}
30775     }
30776 }
30777 
30778 void
arm_expand_vec_perm(rtx target,rtx op0,rtx op1,rtx sel)30779 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30780 {
30781   machine_mode vmode = GET_MODE (target);
30782   unsigned int nelt = GET_MODE_NUNITS (vmode);
30783   bool one_vector_p = rtx_equal_p (op0, op1);
30784   rtx mask;
30785 
30786   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
30787      numbering of elements for big-endian, we must reverse the order.  */
30788   gcc_checking_assert (!BYTES_BIG_ENDIAN);
30789 
30790   /* The VTBL instruction does not use a modulo index, so we must take care
30791      of that ourselves.  */
30792   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30793   mask = gen_const_vec_duplicate (vmode, mask);
30794   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30795 
30796   arm_expand_vec_perm_1 (target, op0, op1, sel);
30797 }
30798 
30799 /* Map lane ordering between architectural lane order, and GCC lane order,
30800    taking into account ABI.  See comment above output_move_neon for details.  */
30801 
30802 static int
neon_endian_lane_map(machine_mode mode,int lane)30803 neon_endian_lane_map (machine_mode mode, int lane)
30804 {
30805   if (BYTES_BIG_ENDIAN)
30806   {
30807     int nelems = GET_MODE_NUNITS (mode);
30808     /* Reverse lane order.  */
30809     lane = (nelems - 1 - lane);
30810     /* Reverse D register order, to match ABI.  */
30811     if (GET_MODE_SIZE (mode) == 16)
30812       lane = lane ^ (nelems / 2);
30813   }
30814   return lane;
30815 }
30816 
30817 /* Some permutations index into pairs of vectors, this is a helper function
30818    to map indexes into those pairs of vectors.  */
30819 
30820 static int
neon_pair_endian_lane_map(machine_mode mode,int lane)30821 neon_pair_endian_lane_map (machine_mode mode, int lane)
30822 {
30823   int nelem = GET_MODE_NUNITS (mode);
30824   if (BYTES_BIG_ENDIAN)
30825     lane =
30826       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
30827   return lane;
30828 }
30829 
30830 /* Generate or test for an insn that supports a constant permutation.  */
30831 
30832 /* Recognize patterns for the VUZP insns.  */
30833 
30834 static bool
arm_evpc_neon_vuzp(struct expand_vec_perm_d * d)30835 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30836 {
30837   unsigned int i, odd, mask, nelt = d->perm.length ();
30838   rtx out0, out1, in0, in1;
30839   int first_elem;
30840   int swap_nelt;
30841 
30842   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30843     return false;
30844 
30845   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
30846      big endian pattern on 64 bit vectors, so we correct for that.  */
30847   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
30848     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
30849 
30850   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
30851 
30852   if (first_elem == neon_endian_lane_map (d->vmode, 0))
30853     odd = 0;
30854   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
30855     odd = 1;
30856   else
30857     return false;
30858   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30859 
30860   for (i = 0; i < nelt; i++)
30861     {
30862       unsigned elt =
30863 	(neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
30864       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
30865 	return false;
30866     }
30867 
30868   /* Success!  */
30869   if (d->testing_p)
30870     return true;
30871 
30872   in0 = d->op0;
30873   in1 = d->op1;
30874   if (swap_nelt != 0)
30875     std::swap (in0, in1);
30876 
30877   out0 = d->target;
30878   out1 = gen_reg_rtx (d->vmode);
30879   if (odd)
30880     std::swap (out0, out1);
30881 
30882   emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
30883   return true;
30884 }
30885 
30886 /* Recognize patterns for the VZIP insns.  */
30887 
30888 static bool
arm_evpc_neon_vzip(struct expand_vec_perm_d * d)30889 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30890 {
30891   unsigned int i, high, mask, nelt = d->perm.length ();
30892   rtx out0, out1, in0, in1;
30893   int first_elem;
30894   bool is_swapped;
30895 
30896   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30897     return false;
30898 
30899   is_swapped = BYTES_BIG_ENDIAN;
30900 
30901   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
30902 
30903   high = nelt / 2;
30904   if (first_elem == neon_endian_lane_map (d->vmode, high))
30905     ;
30906   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
30907     high = 0;
30908   else
30909     return false;
30910   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30911 
30912   for (i = 0; i < nelt / 2; i++)
30913     {
30914       unsigned elt =
30915 	neon_pair_endian_lane_map (d->vmode, i + high) & mask;
30916       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
30917 	  != elt)
30918 	return false;
30919       elt =
30920 	neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
30921       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
30922 	  != elt)
30923 	return false;
30924     }
30925 
30926   /* Success!  */
30927   if (d->testing_p)
30928     return true;
30929 
30930   in0 = d->op0;
30931   in1 = d->op1;
30932   if (is_swapped)
30933     std::swap (in0, in1);
30934 
30935   out0 = d->target;
30936   out1 = gen_reg_rtx (d->vmode);
30937   if (high)
30938     std::swap (out0, out1);
30939 
30940   emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
30941   return true;
30942 }
30943 
30944 /* Recognize patterns for the VREV insns.  */
30945 static bool
arm_evpc_neon_vrev(struct expand_vec_perm_d * d)30946 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30947 {
30948   unsigned int i, j, diff, nelt = d->perm.length ();
30949   rtx (*gen) (machine_mode, rtx, rtx);
30950 
30951   if (!d->one_vector_p)
30952     return false;
30953 
30954   diff = d->perm[0];
30955   switch (diff)
30956     {
30957     case 7:
30958        switch (d->vmode)
30959         {
30960          case E_V16QImode:
30961          case E_V8QImode:
30962           gen = gen_neon_vrev64;
30963           break;
30964          default:
30965           return false;
30966         }
30967        break;
30968     case 3:
30969        switch (d->vmode)
30970         {
30971 	case E_V16QImode:
30972 	case E_V8QImode:
30973           gen = gen_neon_vrev32;
30974           break;
30975 	case E_V8HImode:
30976 	case E_V4HImode:
30977 	case E_V8HFmode:
30978 	case E_V4HFmode:
30979           gen = gen_neon_vrev64;
30980           break;
30981 	default:
30982 	  return false;
30983 	}
30984       break;
30985     case 1:
30986       switch (d->vmode)
30987 	{
30988 	case E_V16QImode:
30989 	case E_V8QImode:
30990           gen = gen_neon_vrev16;
30991           break;
30992 	case E_V8HImode:
30993 	case E_V4HImode:
30994           gen = gen_neon_vrev32;
30995           break;
30996 	case E_V4SImode:
30997 	case E_V2SImode:
30998 	case E_V4SFmode:
30999 	case E_V2SFmode:
31000           gen = gen_neon_vrev64;
31001 	  break;
31002         default:
31003 	  return false;
31004 	}
31005       break;
31006     default:
31007       return false;
31008     }
31009 
31010   for (i = 0; i < nelt ; i += diff + 1)
31011     for (j = 0; j <= diff; j += 1)
31012       {
31013 	/* This is guaranteed to be true as the value of diff
31014 	   is 7, 3, 1 and we should have enough elements in the
31015 	   queue to generate this. Getting a vector mask with a
31016 	   value of diff other than these values implies that
31017 	   something is wrong by the time we get here.  */
31018 	gcc_assert (i + j < nelt);
31019 	if (d->perm[i + j] != i + diff - j)
31020 	  return false;
31021       }
31022 
31023   /* Success! */
31024   if (d->testing_p)
31025     return true;
31026 
31027   emit_insn (gen (d->vmode, d->target, d->op0));
31028   return true;
31029 }
31030 
31031 /* Recognize patterns for the VTRN insns.  */
31032 
31033 static bool
arm_evpc_neon_vtrn(struct expand_vec_perm_d * d)31034 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31035 {
31036   unsigned int i, odd, mask, nelt = d->perm.length ();
31037   rtx out0, out1, in0, in1;
31038 
31039   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31040     return false;
31041 
31042   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
31043   if (d->perm[0] == 0)
31044     odd = 0;
31045   else if (d->perm[0] == 1)
31046     odd = 1;
31047   else
31048     return false;
31049   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31050 
31051   for (i = 0; i < nelt; i += 2)
31052     {
31053       if (d->perm[i] != i + odd)
31054 	return false;
31055       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31056 	return false;
31057     }
31058 
31059   /* Success!  */
31060   if (d->testing_p)
31061     return true;
31062 
31063   in0 = d->op0;
31064   in1 = d->op1;
31065   if (BYTES_BIG_ENDIAN)
31066     {
31067       std::swap (in0, in1);
31068       odd = !odd;
31069     }
31070 
31071   out0 = d->target;
31072   out1 = gen_reg_rtx (d->vmode);
31073   if (odd)
31074     std::swap (out0, out1);
31075 
31076   emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31077   return true;
31078 }
31079 
31080 /* Recognize patterns for the VEXT insns.  */
31081 
31082 static bool
arm_evpc_neon_vext(struct expand_vec_perm_d * d)31083 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31084 {
31085   unsigned int i, nelt = d->perm.length ();
31086   rtx offset;
31087 
31088   unsigned int location;
31089 
31090   unsigned int next  = d->perm[0] + 1;
31091 
31092   /* TODO: Handle GCC's numbering of elements for big-endian.  */
31093   if (BYTES_BIG_ENDIAN)
31094     return false;
31095 
31096   /* Check if the extracted indexes are increasing by one.  */
31097   for (i = 1; i < nelt; next++, i++)
31098     {
31099       /* If we hit the most significant element of the 2nd vector in
31100 	 the previous iteration, no need to test further.  */
31101       if (next == 2 * nelt)
31102 	return false;
31103 
31104       /* If we are operating on only one vector: it could be a
31105 	 rotation.  If there are only two elements of size < 64, let
31106 	 arm_evpc_neon_vrev catch it.  */
31107       if (d->one_vector_p && (next == nelt))
31108 	{
31109 	  if ((nelt == 2) && (d->vmode != V2DImode))
31110 	    return false;
31111 	  else
31112 	    next = 0;
31113 	}
31114 
31115       if (d->perm[i] != next)
31116 	return false;
31117     }
31118 
31119   location = d->perm[0];
31120 
31121   /* Success! */
31122   if (d->testing_p)
31123     return true;
31124 
31125   offset = GEN_INT (location);
31126 
31127   if(d->vmode == E_DImode)
31128     return false;
31129 
31130   emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
31131   return true;
31132 }
31133 
31134 /* The NEON VTBL instruction is a fully variable permuation that's even
31135    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
31136    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
31137    can do slightly better by expanding this as a constant where we don't
31138    have to apply a mask.  */
31139 
31140 static bool
arm_evpc_neon_vtbl(struct expand_vec_perm_d * d)31141 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31142 {
31143   rtx rperm[MAX_VECT_LEN], sel;
31144   machine_mode vmode = d->vmode;
31145   unsigned int i, nelt = d->perm.length ();
31146 
31147   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
31148      numbering of elements for big-endian, we must reverse the order.  */
31149   if (BYTES_BIG_ENDIAN)
31150     return false;
31151 
31152   if (d->testing_p)
31153     return true;
31154 
31155   /* Generic code will try constant permutation twice.  Once with the
31156      original mode and again with the elements lowered to QImode.
31157      So wait and don't do the selector expansion ourselves.  */
31158   if (vmode != V8QImode && vmode != V16QImode)
31159     return false;
31160 
31161   for (i = 0; i < nelt; ++i)
31162     rperm[i] = GEN_INT (d->perm[i]);
31163   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31164   sel = force_reg (vmode, sel);
31165 
31166   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31167   return true;
31168 }
31169 
31170 static bool
arm_expand_vec_perm_const_1(struct expand_vec_perm_d * d)31171 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31172 {
31173   /* Check if the input mask matches vext before reordering the
31174      operands.  */
31175   if (TARGET_NEON)
31176     if (arm_evpc_neon_vext (d))
31177       return true;
31178 
31179   /* The pattern matching functions above are written to look for a small
31180      number to begin the sequence (0, 1, N/2).  If we begin with an index
31181      from the second operand, we can swap the operands.  */
31182   unsigned int nelt = d->perm.length ();
31183   if (d->perm[0] >= nelt)
31184     {
31185       d->perm.rotate_inputs (1);
31186       std::swap (d->op0, d->op1);
31187     }
31188 
31189   if (TARGET_NEON)
31190     {
31191       if (arm_evpc_neon_vuzp (d))
31192 	return true;
31193       if (arm_evpc_neon_vzip (d))
31194 	return true;
31195       if (arm_evpc_neon_vrev (d))
31196 	return true;
31197       if (arm_evpc_neon_vtrn (d))
31198 	return true;
31199       return arm_evpc_neon_vtbl (d);
31200     }
31201   return false;
31202 }
31203 
31204 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
31205 
31206 static bool
arm_vectorize_vec_perm_const(machine_mode vmode,rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)31207 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
31208 			      const vec_perm_indices &sel)
31209 {
31210   struct expand_vec_perm_d d;
31211   int i, nelt, which;
31212 
31213   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
31214     return false;
31215 
31216   d.target = target;
31217   d.op0 = op0;
31218   d.op1 = op1;
31219 
31220   d.vmode = vmode;
31221   gcc_assert (VECTOR_MODE_P (d.vmode));
31222   d.testing_p = !target;
31223 
31224   nelt = GET_MODE_NUNITS (d.vmode);
31225   for (i = which = 0; i < nelt; ++i)
31226     {
31227       int ei = sel[i] & (2 * nelt - 1);
31228       which |= (ei < nelt ? 1 : 2);
31229     }
31230 
31231   switch (which)
31232     {
31233     default:
31234       gcc_unreachable();
31235 
31236     case 3:
31237       d.one_vector_p = false;
31238       if (d.testing_p || !rtx_equal_p (op0, op1))
31239 	break;
31240 
31241       /* The elements of PERM do not suggest that only the first operand
31242 	 is used, but both operands are identical.  Allow easier matching
31243 	 of the permutation by folding the permutation into the single
31244 	 input vector.  */
31245       /* FALLTHRU */
31246     case 2:
31247       d.op0 = op1;
31248       d.one_vector_p = true;
31249       break;
31250 
31251     case 1:
31252       d.op1 = op0;
31253       d.one_vector_p = true;
31254       break;
31255     }
31256 
31257   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
31258 
31259   if (!d.testing_p)
31260     return arm_expand_vec_perm_const_1 (&d);
31261 
31262   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31263   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31264   if (!d.one_vector_p)
31265     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31266 
31267   start_sequence ();
31268   bool ret = arm_expand_vec_perm_const_1 (&d);
31269   end_sequence ();
31270 
31271   return ret;
31272 }
31273 
31274 bool
arm_autoinc_modes_ok_p(machine_mode mode,enum arm_auto_incmodes code)31275 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31276 {
31277   /* If we are soft float and we do not have ldrd
31278      then all auto increment forms are ok.  */
31279   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31280     return true;
31281 
31282   switch (code)
31283     {
31284       /* Post increment and Pre Decrement are supported for all
31285 	 instruction forms except for vector forms.  */
31286     case ARM_POST_INC:
31287     case ARM_PRE_DEC:
31288       if (VECTOR_MODE_P (mode))
31289 	{
31290 	  if (code != ARM_PRE_DEC)
31291 	    return true;
31292 	  else
31293 	    return false;
31294 	}
31295 
31296       return true;
31297 
31298     case ARM_POST_DEC:
31299     case ARM_PRE_INC:
31300       /* Without LDRD and mode size greater than
31301 	 word size, there is no point in auto-incrementing
31302          because ldm and stm will not have these forms.  */
31303       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31304 	return false;
31305 
31306       /* Vector and floating point modes do not support
31307 	 these auto increment forms.  */
31308       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31309 	return false;
31310 
31311       return true;
31312 
31313     default:
31314       return false;
31315 
31316     }
31317 
31318   return false;
31319 }
31320 
31321 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31322    on ARM, since we know that shifts by negative amounts are no-ops.
31323    Additionally, the default expansion code is not available or suitable
31324    for post-reload insn splits (this can occur when the register allocator
31325    chooses not to do a shift in NEON).
31326 
31327    This function is used in both initial expand and post-reload splits, and
31328    handles all kinds of 64-bit shifts.
31329 
31330    Input requirements:
31331     - It is safe for the input and output to be the same register, but
31332       early-clobber rules apply for the shift amount and scratch registers.
31333     - Shift by register requires both scratch registers.  In all other cases
31334       the scratch registers may be NULL.
31335     - Ashiftrt by a register also clobbers the CC register.  */
31336 void
arm_emit_coreregs_64bit_shift(enum rtx_code code,rtx out,rtx in,rtx amount,rtx scratch1,rtx scratch2)31337 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31338 			       rtx amount, rtx scratch1, rtx scratch2)
31339 {
31340   rtx out_high = gen_highpart (SImode, out);
31341   rtx out_low = gen_lowpart (SImode, out);
31342   rtx in_high = gen_highpart (SImode, in);
31343   rtx in_low = gen_lowpart (SImode, in);
31344 
31345   /* Terminology:
31346 	in = the register pair containing the input value.
31347 	out = the destination register pair.
31348 	up = the high- or low-part of each pair.
31349 	down = the opposite part to "up".
31350      In a shift, we can consider bits to shift from "up"-stream to
31351      "down"-stream, so in a left-shift "up" is the low-part and "down"
31352      is the high-part of each register pair.  */
31353 
31354   rtx out_up   = code == ASHIFT ? out_low : out_high;
31355   rtx out_down = code == ASHIFT ? out_high : out_low;
31356   rtx in_up   = code == ASHIFT ? in_low : in_high;
31357   rtx in_down = code == ASHIFT ? in_high : in_low;
31358 
31359   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31360   gcc_assert (out
31361 	      && (REG_P (out) || GET_CODE (out) == SUBREG)
31362 	      && GET_MODE (out) == DImode);
31363   gcc_assert (in
31364 	      && (REG_P (in) || GET_CODE (in) == SUBREG)
31365 	      && GET_MODE (in) == DImode);
31366   gcc_assert (amount
31367 	      && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31368 		   && GET_MODE (amount) == SImode)
31369 		  || CONST_INT_P (amount)));
31370   gcc_assert (scratch1 == NULL
31371 	      || (GET_CODE (scratch1) == SCRATCH)
31372 	      || (GET_MODE (scratch1) == SImode
31373 		  && REG_P (scratch1)));
31374   gcc_assert (scratch2 == NULL
31375 	      || (GET_CODE (scratch2) == SCRATCH)
31376 	      || (GET_MODE (scratch2) == SImode
31377 		  && REG_P (scratch2)));
31378   gcc_assert (!REG_P (out) || !REG_P (amount)
31379 	      || !HARD_REGISTER_P (out)
31380 	      || (REGNO (out) != REGNO (amount)
31381 		  && REGNO (out) + 1 != REGNO (amount)));
31382 
31383   /* Macros to make following code more readable.  */
31384   #define SUB_32(DEST,SRC) \
31385 	    gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31386   #define RSB_32(DEST,SRC) \
31387 	    gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31388   #define SUB_S_32(DEST,SRC) \
31389 	    gen_addsi3_compare0 ((DEST), (SRC), \
31390 				 GEN_INT (-32))
31391   #define SET(DEST,SRC) \
31392 	    gen_rtx_SET ((DEST), (SRC))
31393   #define SHIFT(CODE,SRC,AMOUNT) \
31394 	    gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31395   #define LSHIFT(CODE,SRC,AMOUNT) \
31396 	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31397 			    SImode, (SRC), (AMOUNT))
31398   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31399 	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31400 			    SImode, (SRC), (AMOUNT))
31401   #define ORR(A,B) \
31402 	    gen_rtx_IOR (SImode, (A), (B))
31403   #define BRANCH(COND,LABEL) \
31404 	    gen_arm_cond_branch ((LABEL), \
31405 				 gen_rtx_ ## COND (CCmode, cc_reg, \
31406 						   const0_rtx), \
31407 				 cc_reg)
31408 
31409   /* Shifts by register and shifts by constant are handled separately.  */
31410   if (CONST_INT_P (amount))
31411     {
31412       /* We have a shift-by-constant.  */
31413 
31414       /* First, handle out-of-range shift amounts.
31415 	 In both cases we try to match the result an ARM instruction in a
31416 	 shift-by-register would give.  This helps reduce execution
31417 	 differences between optimization levels, but it won't stop other
31418          parts of the compiler doing different things.  This is "undefined
31419          behavior, in any case.  */
31420       if (INTVAL (amount) <= 0)
31421 	emit_insn (gen_movdi (out, in));
31422       else if (INTVAL (amount) >= 64)
31423 	{
31424 	  if (code == ASHIFTRT)
31425 	    {
31426 	      rtx const31_rtx = GEN_INT (31);
31427 	      emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31428 	      emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31429 	    }
31430 	  else
31431 	    emit_insn (gen_movdi (out, const0_rtx));
31432 	}
31433 
31434       /* Now handle valid shifts. */
31435       else if (INTVAL (amount) < 32)
31436 	{
31437 	  /* Shifts by a constant less than 32.  */
31438 	  rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31439 
31440 	  /* Clearing the out register in DImode first avoids lots
31441 	     of spilling and results in less stack usage.
31442 	     Later this redundant insn is completely removed.
31443 	     Do that only if "in" and "out" are different registers.  */
31444 	  if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
31445 	    emit_insn (SET (out, const0_rtx));
31446 	  emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31447 	  emit_insn (SET (out_down,
31448 			  ORR (REV_LSHIFT (code, in_up, reverse_amount),
31449 			       out_down)));
31450 	  emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31451 	}
31452       else
31453 	{
31454 	  /* Shifts by a constant greater than 31.  */
31455 	  rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31456 
31457 	  if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
31458 	    emit_insn (SET (out, const0_rtx));
31459 	  emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31460 	  if (code == ASHIFTRT)
31461 	    emit_insn (gen_ashrsi3 (out_up, in_up,
31462 				    GEN_INT (31)));
31463 	  else
31464 	    emit_insn (SET (out_up, const0_rtx));
31465 	}
31466     }
31467   else
31468     {
31469       /* We have a shift-by-register.  */
31470       rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
31471 
31472       /* This alternative requires the scratch registers.  */
31473       gcc_assert (scratch1 && REG_P (scratch1));
31474       gcc_assert (scratch2 && REG_P (scratch2));
31475 
31476       /* We will need the values "amount-32" and "32-amount" later.
31477          Swapping them around now allows the later code to be more general. */
31478       switch (code)
31479 	{
31480 	case ASHIFT:
31481 	  emit_insn (SUB_32 (scratch1, amount));
31482 	  emit_insn (RSB_32 (scratch2, amount));
31483 	  break;
31484 	case ASHIFTRT:
31485 	  emit_insn (RSB_32 (scratch1, amount));
31486 	  /* Also set CC = amount > 32.  */
31487 	  emit_insn (SUB_S_32 (scratch2, amount));
31488 	  break;
31489 	case LSHIFTRT:
31490 	  emit_insn (RSB_32 (scratch1, amount));
31491 	  emit_insn (SUB_32 (scratch2, amount));
31492 	  break;
31493 	default:
31494 	  gcc_unreachable ();
31495 	}
31496 
31497       /* Emit code like this:
31498 
31499 	 arithmetic-left:
31500 	    out_down = in_down << amount;
31501 	    out_down = (in_up << (amount - 32)) | out_down;
31502 	    out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31503 	    out_up = in_up << amount;
31504 
31505 	 arithmetic-right:
31506 	    out_down = in_down >> amount;
31507 	    out_down = (in_up << (32 - amount)) | out_down;
31508 	    if (amount < 32)
31509 	      out_down = ((signed)in_up >> (amount - 32)) | out_down;
31510 	    out_up = in_up << amount;
31511 
31512 	 logical-right:
31513 	    out_down = in_down >> amount;
31514 	    out_down = (in_up << (32 - amount)) | out_down;
31515 	    if (amount < 32)
31516 	      out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31517 	    out_up = in_up << amount;
31518 
31519 	  The ARM and Thumb2 variants are the same but implemented slightly
31520 	  differently.  If this were only called during expand we could just
31521 	  use the Thumb2 case and let combine do the right thing, but this
31522 	  can also be called from post-reload splitters.  */
31523 
31524       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31525 
31526       if (!TARGET_THUMB2)
31527 	{
31528 	  /* Emit code for ARM mode.  */
31529 	  emit_insn (SET (out_down,
31530 			  ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31531 	  if (code == ASHIFTRT)
31532 	    {
31533 	      rtx_code_label *done_label = gen_label_rtx ();
31534 	      emit_jump_insn (BRANCH (LT, done_label));
31535 	      emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31536 					     out_down)));
31537 	      emit_label (done_label);
31538 	    }
31539 	  else
31540 	    emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31541 					   out_down)));
31542 	}
31543       else
31544 	{
31545 	  /* Emit code for Thumb2 mode.
31546 	     Thumb2 can't do shift and or in one insn.  */
31547 	  emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31548 	  emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31549 
31550 	  if (code == ASHIFTRT)
31551 	    {
31552 	      rtx_code_label *done_label = gen_label_rtx ();
31553 	      emit_jump_insn (BRANCH (LT, done_label));
31554 	      emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31555 	      emit_insn (SET (out_down, ORR (out_down, scratch2)));
31556 	      emit_label (done_label);
31557 	    }
31558 	  else
31559 	    {
31560 	      emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31561 	      emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31562 	    }
31563 	}
31564 
31565       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31566     }
31567 
31568   #undef SUB_32
31569   #undef RSB_32
31570   #undef SUB_S_32
31571   #undef SET
31572   #undef SHIFT
31573   #undef LSHIFT
31574   #undef REV_LSHIFT
31575   #undef ORR
31576   #undef BRANCH
31577 }
31578 
31579 /* Returns true if the pattern is a valid symbolic address, which is either a
31580    symbol_ref or (symbol_ref + addend).
31581 
31582    According to the ARM ELF ABI, the initial addend of REL-type relocations
31583    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
31584    literal field of the instruction as a 16-bit signed value in the range
31585    -32768 <= A < 32768.  */
31586 
31587 bool
arm_valid_symbolic_address_p(rtx addr)31588 arm_valid_symbolic_address_p (rtx addr)
31589 {
31590   rtx xop0, xop1 = NULL_RTX;
31591   rtx tmp = addr;
31592 
31593   if (target_word_relocations)
31594     return false;
31595 
31596   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
31597     return true;
31598 
31599   /* (const (plus: symbol_ref const_int))  */
31600   if (GET_CODE (addr) == CONST)
31601     tmp = XEXP (addr, 0);
31602 
31603   if (GET_CODE (tmp) == PLUS)
31604     {
31605       xop0 = XEXP (tmp, 0);
31606       xop1 = XEXP (tmp, 1);
31607 
31608       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
31609 	  return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
31610     }
31611 
31612   return false;
31613 }
31614 
31615 /* Returns true if a valid comparison operation and makes
31616    the operands in a form that is valid.  */
31617 bool
arm_validize_comparison(rtx * comparison,rtx * op1,rtx * op2)31618 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31619 {
31620   enum rtx_code code = GET_CODE (*comparison);
31621   int code_int;
31622   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31623     ? GET_MODE (*op2) : GET_MODE (*op1);
31624 
31625   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31626 
31627   if (code == UNEQ || code == LTGT)
31628     return false;
31629 
31630   code_int = (int)code;
31631   arm_canonicalize_comparison (&code_int, op1, op2, 0);
31632   PUT_CODE (*comparison, (enum rtx_code)code_int);
31633 
31634   switch (mode)
31635     {
31636     case E_SImode:
31637       if (!arm_add_operand (*op1, mode))
31638 	*op1 = force_reg (mode, *op1);
31639       if (!arm_add_operand (*op2, mode))
31640 	*op2 = force_reg (mode, *op2);
31641       return true;
31642 
31643     case E_DImode:
31644       /* gen_compare_reg() will sort out any invalid operands.  */
31645       return true;
31646 
31647     case E_HFmode:
31648       if (!TARGET_VFP_FP16INST)
31649 	break;
31650       /* FP16 comparisons are done in SF mode.  */
31651       mode = SFmode;
31652       *op1 = convert_to_mode (mode, *op1, 1);
31653       *op2 = convert_to_mode (mode, *op2, 1);
31654       /* Fall through.  */
31655     case E_SFmode:
31656     case E_DFmode:
31657       if (!vfp_compare_operand (*op1, mode))
31658 	*op1 = force_reg (mode, *op1);
31659       if (!vfp_compare_operand (*op2, mode))
31660 	*op2 = force_reg (mode, *op2);
31661       return true;
31662     default:
31663       break;
31664     }
31665 
31666   return false;
31667 
31668 }
31669 
31670 /* Maximum number of instructions to set block of memory.  */
31671 static int
arm_block_set_max_insns(void)31672 arm_block_set_max_insns (void)
31673 {
31674   if (optimize_function_for_size_p (cfun))
31675     return 4;
31676   else
31677     return current_tune->max_insns_inline_memset;
31678 }
31679 
31680 /* Return TRUE if it's profitable to set block of memory for
31681    non-vectorized case.  VAL is the value to set the memory
31682    with.  LENGTH is the number of bytes to set.  ALIGN is the
31683    alignment of the destination memory in bytes.  UNALIGNED_P
31684    is TRUE if we can only set the memory with instructions
31685    meeting alignment requirements.  USE_STRD_P is TRUE if we
31686    can use strd to set the memory.  */
31687 static bool
arm_block_set_non_vect_profit_p(rtx val,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT align,bool unaligned_p,bool use_strd_p)31688 arm_block_set_non_vect_profit_p (rtx val,
31689 				 unsigned HOST_WIDE_INT length,
31690 				 unsigned HOST_WIDE_INT align,
31691 				 bool unaligned_p, bool use_strd_p)
31692 {
31693   int num = 0;
31694   /* For leftovers in bytes of 0-7, we can set the memory block using
31695      strb/strh/str with minimum instruction number.  */
31696   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
31697 
31698   if (unaligned_p)
31699     {
31700       num = arm_const_inline_cost (SET, val);
31701       num += length / align + length % align;
31702     }
31703   else if (use_strd_p)
31704     {
31705       num = arm_const_double_inline_cost (val);
31706       num += (length >> 3) + leftover[length & 7];
31707     }
31708   else
31709     {
31710       num = arm_const_inline_cost (SET, val);
31711       num += (length >> 2) + leftover[length & 3];
31712     }
31713 
31714   /* We may be able to combine last pair STRH/STRB into a single STR
31715      by shifting one byte back.  */
31716   if (unaligned_access && length > 3 && (length & 3) == 3)
31717     num--;
31718 
31719   return (num <= arm_block_set_max_insns ());
31720 }
31721 
31722 /* Return TRUE if it's profitable to set block of memory for
31723    vectorized case.  LENGTH is the number of bytes to set.
31724    ALIGN is the alignment of destination memory in bytes.
31725    MODE is the vector mode used to set the memory.  */
31726 static bool
arm_block_set_vect_profit_p(unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT align,machine_mode mode)31727 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
31728 			     unsigned HOST_WIDE_INT align,
31729 			     machine_mode mode)
31730 {
31731   int num;
31732   bool unaligned_p = ((align & 3) != 0);
31733   unsigned int nelt = GET_MODE_NUNITS (mode);
31734 
31735   /* Instruction loading constant value.  */
31736   num = 1;
31737   /* Instructions storing the memory.  */
31738   num += (length + nelt - 1) / nelt;
31739   /* Instructions adjusting the address expression.  Only need to
31740      adjust address expression if it's 4 bytes aligned and bytes
31741      leftover can only be stored by mis-aligned store instruction.  */
31742   if (!unaligned_p && (length & 3) != 0)
31743     num++;
31744 
31745   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
31746   if (!unaligned_p && mode == V16QImode)
31747     num--;
31748 
31749   return (num <= arm_block_set_max_insns ());
31750 }
31751 
31752 /* Set a block of memory using vectorization instructions for the
31753    unaligned case.  We fill the first LENGTH bytes of the memory
31754    area starting from DSTBASE with byte constant VALUE.  ALIGN is
31755    the alignment requirement of memory.  Return TRUE if succeeded.  */
31756 static bool
arm_block_set_unaligned_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)31757 arm_block_set_unaligned_vect (rtx dstbase,
31758 			      unsigned HOST_WIDE_INT length,
31759 			      unsigned HOST_WIDE_INT value,
31760 			      unsigned HOST_WIDE_INT align)
31761 {
31762   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
31763   rtx dst, mem;
31764   rtx val_vec, reg;
31765   rtx (*gen_func) (rtx, rtx);
31766   machine_mode mode;
31767   unsigned HOST_WIDE_INT v = value;
31768   unsigned int offset = 0;
31769   gcc_assert ((align & 0x3) != 0);
31770   nelt_v8 = GET_MODE_NUNITS (V8QImode);
31771   nelt_v16 = GET_MODE_NUNITS (V16QImode);
31772   if (length >= nelt_v16)
31773     {
31774       mode = V16QImode;
31775       gen_func = gen_movmisalignv16qi;
31776     }
31777   else
31778     {
31779       mode = V8QImode;
31780       gen_func = gen_movmisalignv8qi;
31781     }
31782   nelt_mode = GET_MODE_NUNITS (mode);
31783   gcc_assert (length >= nelt_mode);
31784   /* Skip if it isn't profitable.  */
31785   if (!arm_block_set_vect_profit_p (length, align, mode))
31786     return false;
31787 
31788   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31789   mem = adjust_automodify_address (dstbase, mode, dst, offset);
31790 
31791   v = sext_hwi (v, BITS_PER_WORD);
31792 
31793   reg = gen_reg_rtx (mode);
31794   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
31795   /* Emit instruction loading the constant value.  */
31796   emit_move_insn (reg, val_vec);
31797 
31798   /* Handle nelt_mode bytes in a vector.  */
31799   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
31800     {
31801       emit_insn ((*gen_func) (mem, reg));
31802       if (i + 2 * nelt_mode <= length)
31803 	{
31804 	  emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
31805 	  offset += nelt_mode;
31806 	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
31807 	}
31808     }
31809 
31810   /* If there are not less than nelt_v8 bytes leftover, we must be in
31811      V16QI mode.  */
31812   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
31813 
31814   /* Handle (8, 16) bytes leftover.  */
31815   if (i + nelt_v8 < length)
31816     {
31817       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
31818       offset += length - i;
31819       mem = adjust_automodify_address (dstbase, mode, dst, offset);
31820 
31821       /* We are shifting bytes back, set the alignment accordingly.  */
31822       if ((length & 1) != 0 && align >= 2)
31823 	set_mem_align (mem, BITS_PER_UNIT);
31824 
31825       emit_insn (gen_movmisalignv16qi (mem, reg));
31826     }
31827   /* Handle (0, 8] bytes leftover.  */
31828   else if (i < length && i + nelt_v8 >= length)
31829     {
31830       if (mode == V16QImode)
31831 	reg = gen_lowpart (V8QImode, reg);
31832 
31833       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
31834 					      + (nelt_mode - nelt_v8))));
31835       offset += (length - i) + (nelt_mode - nelt_v8);
31836       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
31837 
31838       /* We are shifting bytes back, set the alignment accordingly.  */
31839       if ((length & 1) != 0 && align >= 2)
31840 	set_mem_align (mem, BITS_PER_UNIT);
31841 
31842       emit_insn (gen_movmisalignv8qi (mem, reg));
31843     }
31844 
31845   return true;
31846 }
31847 
31848 /* Set a block of memory using vectorization instructions for the
31849    aligned case.  We fill the first LENGTH bytes of the memory area
31850    starting from DSTBASE with byte constant VALUE.  ALIGN is the
31851    alignment requirement of memory.  Return TRUE if succeeded.  */
31852 static bool
arm_block_set_aligned_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)31853 arm_block_set_aligned_vect (rtx dstbase,
31854 			    unsigned HOST_WIDE_INT length,
31855 			    unsigned HOST_WIDE_INT value,
31856 			    unsigned HOST_WIDE_INT align)
31857 {
31858   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
31859   rtx dst, addr, mem;
31860   rtx val_vec, reg;
31861   machine_mode mode;
31862   unsigned int offset = 0;
31863 
31864   gcc_assert ((align & 0x3) == 0);
31865   nelt_v8 = GET_MODE_NUNITS (V8QImode);
31866   nelt_v16 = GET_MODE_NUNITS (V16QImode);
31867   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
31868     mode = V16QImode;
31869   else
31870     mode = V8QImode;
31871 
31872   nelt_mode = GET_MODE_NUNITS (mode);
31873   gcc_assert (length >= nelt_mode);
31874   /* Skip if it isn't profitable.  */
31875   if (!arm_block_set_vect_profit_p (length, align, mode))
31876     return false;
31877 
31878   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31879 
31880   reg = gen_reg_rtx (mode);
31881   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
31882   /* Emit instruction loading the constant value.  */
31883   emit_move_insn (reg, val_vec);
31884 
31885   i = 0;
31886   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
31887   if (mode == V16QImode)
31888     {
31889       mem = adjust_automodify_address (dstbase, mode, dst, offset);
31890       emit_insn (gen_movmisalignv16qi (mem, reg));
31891       i += nelt_mode;
31892       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
31893       if (i + nelt_v8 < length && i + nelt_v16 > length)
31894 	{
31895 	  emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31896 	  offset += length - nelt_mode;
31897 	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
31898 	  /* We are shifting bytes back, set the alignment accordingly.  */
31899 	  if ((length & 0x3) == 0)
31900 	    set_mem_align (mem, BITS_PER_UNIT * 4);
31901 	  else if ((length & 0x1) == 0)
31902 	    set_mem_align (mem, BITS_PER_UNIT * 2);
31903 	  else
31904 	    set_mem_align (mem, BITS_PER_UNIT);
31905 
31906 	  emit_insn (gen_movmisalignv16qi (mem, reg));
31907 	  return true;
31908 	}
31909       /* Fall through for bytes leftover.  */
31910       mode = V8QImode;
31911       nelt_mode = GET_MODE_NUNITS (mode);
31912       reg = gen_lowpart (V8QImode, reg);
31913     }
31914 
31915   /* Handle 8 bytes in a vector.  */
31916   for (; (i + nelt_mode <= length); i += nelt_mode)
31917     {
31918       addr = plus_constant (Pmode, dst, i);
31919       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
31920       if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
31921 	emit_move_insn (mem, reg);
31922       else
31923 	emit_insn (gen_unaligned_storev8qi (mem, reg));
31924     }
31925 
31926   /* Handle single word leftover by shifting 4 bytes back.  We can
31927      use aligned access for this case.  */
31928   if (i + UNITS_PER_WORD == length)
31929     {
31930       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
31931       offset += i - UNITS_PER_WORD;
31932       mem = adjust_automodify_address (dstbase, mode, addr, offset);
31933       /* We are shifting 4 bytes back, set the alignment accordingly.  */
31934       if (align > UNITS_PER_WORD)
31935 	set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
31936 
31937       emit_insn (gen_unaligned_storev8qi (mem, reg));
31938     }
31939   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
31940      We have to use unaligned access for this case.  */
31941   else if (i < length)
31942     {
31943       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
31944       offset += length - nelt_mode;
31945       mem = adjust_automodify_address (dstbase, mode, dst, offset);
31946       /* We are shifting bytes back, set the alignment accordingly.  */
31947       if ((length & 1) == 0)
31948 	set_mem_align (mem, BITS_PER_UNIT * 2);
31949       else
31950 	set_mem_align (mem, BITS_PER_UNIT);
31951 
31952       emit_insn (gen_movmisalignv8qi (mem, reg));
31953     }
31954 
31955   return true;
31956 }
31957 
31958 /* Set a block of memory using plain strh/strb instructions, only
31959    using instructions allowed by ALIGN on processor.  We fill the
31960    first LENGTH bytes of the memory area starting from DSTBASE
31961    with byte constant VALUE.  ALIGN is the alignment requirement
31962    of memory.  */
31963 static bool
arm_block_set_unaligned_non_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)31964 arm_block_set_unaligned_non_vect (rtx dstbase,
31965 				  unsigned HOST_WIDE_INT length,
31966 				  unsigned HOST_WIDE_INT value,
31967 				  unsigned HOST_WIDE_INT align)
31968 {
31969   unsigned int i;
31970   rtx dst, addr, mem;
31971   rtx val_exp, val_reg, reg;
31972   machine_mode mode;
31973   HOST_WIDE_INT v = value;
31974 
31975   gcc_assert (align == 1 || align == 2);
31976 
31977   if (align == 2)
31978     v |= (value << BITS_PER_UNIT);
31979 
31980   v = sext_hwi (v, BITS_PER_WORD);
31981   val_exp = GEN_INT (v);
31982   /* Skip if it isn't profitable.  */
31983   if (!arm_block_set_non_vect_profit_p (val_exp, length,
31984 					align, true, false))
31985     return false;
31986 
31987   dst = copy_addr_to_reg (XEXP (dstbase, 0));
31988   mode = (align == 2 ? HImode : QImode);
31989   val_reg = force_reg (SImode, val_exp);
31990   reg = gen_lowpart (mode, val_reg);
31991 
31992   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
31993     {
31994       addr = plus_constant (Pmode, dst, i);
31995       mem = adjust_automodify_address (dstbase, mode, addr, i);
31996       emit_move_insn (mem, reg);
31997     }
31998 
31999   /* Handle single byte leftover.  */
32000   if (i + 1 == length)
32001     {
32002       reg = gen_lowpart (QImode, val_reg);
32003       addr = plus_constant (Pmode, dst, i);
32004       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32005       emit_move_insn (mem, reg);
32006       i++;
32007     }
32008 
32009   gcc_assert (i == length);
32010   return true;
32011 }
32012 
32013 /* Set a block of memory using plain strd/str/strh/strb instructions,
32014    to permit unaligned copies on processors which support unaligned
32015    semantics for those instructions.  We fill the first LENGTH bytes
32016    of the memory area starting from DSTBASE with byte constant VALUE.
32017    ALIGN is the alignment requirement of memory.  */
32018 static bool
arm_block_set_aligned_non_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)32019 arm_block_set_aligned_non_vect (rtx dstbase,
32020 				unsigned HOST_WIDE_INT length,
32021 				unsigned HOST_WIDE_INT value,
32022 				unsigned HOST_WIDE_INT align)
32023 {
32024   unsigned int i;
32025   rtx dst, addr, mem;
32026   rtx val_exp, val_reg, reg;
32027   unsigned HOST_WIDE_INT v;
32028   bool use_strd_p;
32029 
32030   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32031 		&& TARGET_LDRD && current_tune->prefer_ldrd_strd);
32032 
32033   v = (value | (value << 8) | (value << 16) | (value << 24));
32034   if (length < UNITS_PER_WORD)
32035     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32036 
32037   if (use_strd_p)
32038     v |= (v << BITS_PER_WORD);
32039   else
32040     v = sext_hwi (v, BITS_PER_WORD);
32041 
32042   val_exp = GEN_INT (v);
32043   /* Skip if it isn't profitable.  */
32044   if (!arm_block_set_non_vect_profit_p (val_exp, length,
32045 					align, false, use_strd_p))
32046     {
32047       if (!use_strd_p)
32048 	return false;
32049 
32050       /* Try without strd.  */
32051       v = (v >> BITS_PER_WORD);
32052       v = sext_hwi (v, BITS_PER_WORD);
32053       val_exp = GEN_INT (v);
32054       use_strd_p = false;
32055       if (!arm_block_set_non_vect_profit_p (val_exp, length,
32056 					    align, false, use_strd_p))
32057 	return false;
32058     }
32059 
32060   i = 0;
32061   dst = copy_addr_to_reg (XEXP (dstbase, 0));
32062   /* Handle double words using strd if possible.  */
32063   if (use_strd_p)
32064     {
32065       val_reg = force_reg (DImode, val_exp);
32066       reg = val_reg;
32067       for (; (i + 8 <= length); i += 8)
32068 	{
32069 	  addr = plus_constant (Pmode, dst, i);
32070 	  mem = adjust_automodify_address (dstbase, DImode, addr, i);
32071 	  if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32072 	    emit_move_insn (mem, reg);
32073 	  else
32074 	    emit_insn (gen_unaligned_storedi (mem, reg));
32075 	}
32076     }
32077   else
32078     val_reg = force_reg (SImode, val_exp);
32079 
32080   /* Handle words.  */
32081   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32082   for (; (i + 4 <= length); i += 4)
32083     {
32084       addr = plus_constant (Pmode, dst, i);
32085       mem = adjust_automodify_address (dstbase, SImode, addr, i);
32086       if ((align & 3) == 0)
32087 	emit_move_insn (mem, reg);
32088       else
32089 	emit_insn (gen_unaligned_storesi (mem, reg));
32090     }
32091 
32092   /* Merge last pair of STRH and STRB into a STR if possible.  */
32093   if (unaligned_access && i > 0 && (i + 3) == length)
32094     {
32095       addr = plus_constant (Pmode, dst, i - 1);
32096       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32097       /* We are shifting one byte back, set the alignment accordingly.  */
32098       if ((align & 1) == 0)
32099 	set_mem_align (mem, BITS_PER_UNIT);
32100 
32101       /* Most likely this is an unaligned access, and we can't tell at
32102 	 compilation time.  */
32103       emit_insn (gen_unaligned_storesi (mem, reg));
32104       return true;
32105     }
32106 
32107   /* Handle half word leftover.  */
32108   if (i + 2 <= length)
32109     {
32110       reg = gen_lowpart (HImode, val_reg);
32111       addr = plus_constant (Pmode, dst, i);
32112       mem = adjust_automodify_address (dstbase, HImode, addr, i);
32113       if ((align & 1) == 0)
32114 	emit_move_insn (mem, reg);
32115       else
32116 	emit_insn (gen_unaligned_storehi (mem, reg));
32117 
32118       i += 2;
32119     }
32120 
32121   /* Handle single byte leftover.  */
32122   if (i + 1 == length)
32123     {
32124       reg = gen_lowpart (QImode, val_reg);
32125       addr = plus_constant (Pmode, dst, i);
32126       mem = adjust_automodify_address (dstbase, QImode, addr, i);
32127       emit_move_insn (mem, reg);
32128     }
32129 
32130   return true;
32131 }
32132 
32133 /* Set a block of memory using vectorization instructions for both
32134    aligned and unaligned cases.  We fill the first LENGTH bytes of
32135    the memory area starting from DSTBASE with byte constant VALUE.
32136    ALIGN is the alignment requirement of memory.  */
32137 static bool
arm_block_set_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)32138 arm_block_set_vect (rtx dstbase,
32139 		    unsigned HOST_WIDE_INT length,
32140 		    unsigned HOST_WIDE_INT value,
32141 		    unsigned HOST_WIDE_INT align)
32142 {
32143   /* Check whether we need to use unaligned store instruction.  */
32144   if (((align & 3) != 0 || (length & 3) != 0)
32145       /* Check whether unaligned store instruction is available.  */
32146       && (!unaligned_access || BYTES_BIG_ENDIAN))
32147     return false;
32148 
32149   if ((align & 3) == 0)
32150     return arm_block_set_aligned_vect (dstbase, length, value, align);
32151   else
32152     return arm_block_set_unaligned_vect (dstbase, length, value, align);
32153 }
32154 
32155 /* Expand string store operation.  Firstly we try to do that by using
32156    vectorization instructions, then try with ARM unaligned access and
32157    double-word store if profitable.  OPERANDS[0] is the destination,
32158    OPERANDS[1] is the number of bytes, operands[2] is the value to
32159    initialize the memory, OPERANDS[3] is the known alignment of the
32160    destination.  */
32161 bool
arm_gen_setmem(rtx * operands)32162 arm_gen_setmem (rtx *operands)
32163 {
32164   rtx dstbase = operands[0];
32165   unsigned HOST_WIDE_INT length;
32166   unsigned HOST_WIDE_INT value;
32167   unsigned HOST_WIDE_INT align;
32168 
32169   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32170     return false;
32171 
32172   length = UINTVAL (operands[1]);
32173   if (length > 64)
32174     return false;
32175 
32176   value = (UINTVAL (operands[2]) & 0xFF);
32177   align = UINTVAL (operands[3]);
32178   if (TARGET_NEON && length >= 8
32179       && current_tune->string_ops_prefer_neon
32180       && arm_block_set_vect (dstbase, length, value, align))
32181     return true;
32182 
32183   if (!unaligned_access && (align & 3) != 0)
32184     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32185 
32186   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32187 }
32188 
32189 
32190 static bool
arm_macro_fusion_p(void)32191 arm_macro_fusion_p (void)
32192 {
32193   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
32194 }
32195 
32196 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
32197    for MOVW / MOVT macro fusion.  */
32198 
32199 static bool
arm_sets_movw_movt_fusible_p(rtx prev_set,rtx curr_set)32200 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
32201 {
32202   /* We are trying to fuse
32203      movw imm / movt imm
32204     instructions as a group that gets scheduled together.  */
32205 
32206   rtx set_dest = SET_DEST (curr_set);
32207 
32208   if (GET_MODE (set_dest) != SImode)
32209     return false;
32210 
32211   /* We are trying to match:
32212      prev (movw)  == (set (reg r0) (const_int imm16))
32213      curr (movt) == (set (zero_extract (reg r0)
32214 					(const_int 16)
32215 					(const_int 16))
32216 			  (const_int imm16_1))
32217      or
32218      prev (movw) == (set (reg r1)
32219 			  (high (symbol_ref ("SYM"))))
32220     curr (movt) == (set (reg r0)
32221 			(lo_sum (reg r1)
32222 				(symbol_ref ("SYM"))))  */
32223 
32224     if (GET_CODE (set_dest) == ZERO_EXTRACT)
32225       {
32226 	if (CONST_INT_P (SET_SRC (curr_set))
32227 	    && CONST_INT_P (SET_SRC (prev_set))
32228 	    && REG_P (XEXP (set_dest, 0))
32229 	    && REG_P (SET_DEST (prev_set))
32230 	    && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
32231 	  return true;
32232 
32233       }
32234     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
32235 	     && REG_P (SET_DEST (curr_set))
32236 	     && REG_P (SET_DEST (prev_set))
32237 	     && GET_CODE (SET_SRC (prev_set)) == HIGH
32238 	     && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
32239       return true;
32240 
32241   return false;
32242 }
32243 
32244 static bool
aarch_macro_fusion_pair_p(rtx_insn * prev,rtx_insn * curr)32245 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
32246 {
32247   rtx prev_set = single_set (prev);
32248   rtx curr_set = single_set (curr);
32249 
32250   if (!prev_set
32251       || !curr_set)
32252     return false;
32253 
32254   if (any_condjump_p (curr))
32255     return false;
32256 
32257   if (!arm_macro_fusion_p ())
32258     return false;
32259 
32260   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
32261       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
32262     return true;
32263 
32264   return false;
32265 }
32266 
32267 /* Return true iff the instruction fusion described by OP is enabled.  */
32268 bool
arm_fusion_enabled_p(tune_params::fuse_ops op)32269 arm_fusion_enabled_p (tune_params::fuse_ops op)
32270 {
32271   return current_tune->fusible_ops & op;
32272 }
32273 
32274 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
32275    scheduled for speculative execution.  Reject the long-running division
32276    and square-root instructions.  */
32277 
32278 static bool
arm_sched_can_speculate_insn(rtx_insn * insn)32279 arm_sched_can_speculate_insn (rtx_insn *insn)
32280 {
32281   switch (get_attr_type (insn))
32282     {
32283       case TYPE_SDIV:
32284       case TYPE_UDIV:
32285       case TYPE_FDIVS:
32286       case TYPE_FDIVD:
32287       case TYPE_FSQRTS:
32288       case TYPE_FSQRTD:
32289       case TYPE_NEON_FP_SQRT_S:
32290       case TYPE_NEON_FP_SQRT_D:
32291       case TYPE_NEON_FP_SQRT_S_Q:
32292       case TYPE_NEON_FP_SQRT_D_Q:
32293       case TYPE_NEON_FP_DIV_S:
32294       case TYPE_NEON_FP_DIV_D:
32295       case TYPE_NEON_FP_DIV_S_Q:
32296       case TYPE_NEON_FP_DIV_D_Q:
32297 	return false;
32298       default:
32299 	return true;
32300     }
32301 }
32302 
32303 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
32304 
32305 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset(void)32306 arm_asan_shadow_offset (void)
32307 {
32308   return HOST_WIDE_INT_1U << 29;
32309 }
32310 
32311 
32312 /* This is a temporary fix for PR60655.  Ideally we need
32313    to handle most of these cases in the generic part but
32314    currently we reject minus (..) (sym_ref).  We try to
32315    ameliorate the case with minus (sym_ref1) (sym_ref2)
32316    where they are in the same section.  */
32317 
32318 static bool
arm_const_not_ok_for_debug_p(rtx p)32319 arm_const_not_ok_for_debug_p (rtx p)
32320 {
32321   tree decl_op0 = NULL;
32322   tree decl_op1 = NULL;
32323 
32324   if (GET_CODE (p) == UNSPEC)
32325     return true;
32326   if (GET_CODE (p) == MINUS)
32327     {
32328       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32329 	{
32330 	  decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32331 	  if (decl_op1
32332 	      && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32333 	      && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32334 	    {
32335 	      if ((VAR_P (decl_op1)
32336 		   || TREE_CODE (decl_op1) == CONST_DECL)
32337 		  && (VAR_P (decl_op0)
32338 		      || TREE_CODE (decl_op0) == CONST_DECL))
32339 		return (get_variable_section (decl_op1, false)
32340 			!= get_variable_section (decl_op0, false));
32341 
32342 	      if (TREE_CODE (decl_op1) == LABEL_DECL
32343 		  && TREE_CODE (decl_op0) == LABEL_DECL)
32344 		return (DECL_CONTEXT (decl_op1)
32345 			!= DECL_CONTEXT (decl_op0));
32346 	    }
32347 
32348 	  return true;
32349 	}
32350     }
32351 
32352   return false;
32353 }
32354 
32355 /* return TRUE if x is a reference to a value in a constant pool */
32356 extern bool
arm_is_constant_pool_ref(rtx x)32357 arm_is_constant_pool_ref (rtx x)
32358 {
32359   return (MEM_P (x)
32360 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32361 	  && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32362 }
32363 
32364 /* Remember the last target of arm_set_current_function.  */
32365 static GTY(()) tree arm_previous_fndecl;
32366 
32367 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
32368 
32369 void
save_restore_target_globals(tree new_tree)32370 save_restore_target_globals (tree new_tree)
32371 {
32372   /* If we have a previous state, use it.  */
32373   if (TREE_TARGET_GLOBALS (new_tree))
32374     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32375   else if (new_tree == target_option_default_node)
32376     restore_target_globals (&default_target_globals);
32377   else
32378     {
32379       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
32380       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
32381     }
32382 
32383   arm_option_params_internal ();
32384 }
32385 
32386 /* Invalidate arm_previous_fndecl.  */
32387 
32388 void
arm_reset_previous_fndecl(void)32389 arm_reset_previous_fndecl (void)
32390 {
32391   arm_previous_fndecl = NULL_TREE;
32392 }
32393 
32394 /* Establish appropriate back-end context for processing the function
32395    FNDECL.  The argument might be NULL to indicate processing at top
32396    level, outside of any function scope.  */
32397 
32398 static void
arm_set_current_function(tree fndecl)32399 arm_set_current_function (tree fndecl)
32400 {
32401   if (!fndecl || fndecl == arm_previous_fndecl)
32402     return;
32403 
32404   tree old_tree = (arm_previous_fndecl
32405 		   ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
32406 		   : NULL_TREE);
32407 
32408   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
32409 
32410   /* If current function has no attributes but previous one did,
32411      use the default node.  */
32412   if (! new_tree && old_tree)
32413     new_tree = target_option_default_node;
32414 
32415   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
32416      the default have been handled by save_restore_target_globals from
32417      arm_pragma_target_parse.  */
32418   if (old_tree == new_tree)
32419     return;
32420 
32421   arm_previous_fndecl = fndecl;
32422 
32423   /* First set the target options.  */
32424   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
32425 
32426   save_restore_target_globals (new_tree);
32427 }
32428 
32429 /* Implement TARGET_OPTION_PRINT.  */
32430 
32431 static void
arm_option_print(FILE * file,int indent,struct cl_target_option * ptr)32432 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
32433 {
32434   int flags = ptr->x_target_flags;
32435   const char *fpu_name;
32436 
32437   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
32438 	      ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
32439 
32440   fprintf (file, "%*sselected isa %s\n", indent, "",
32441 	   TARGET_THUMB2_P (flags) ? "thumb2" :
32442 	   TARGET_THUMB_P (flags) ? "thumb1" :
32443 	   "arm");
32444 
32445   if (ptr->x_arm_arch_string)
32446     fprintf (file, "%*sselected architecture %s\n", indent, "",
32447 	     ptr->x_arm_arch_string);
32448 
32449   if (ptr->x_arm_cpu_string)
32450     fprintf (file, "%*sselected CPU %s\n", indent, "",
32451 	     ptr->x_arm_cpu_string);
32452 
32453   if (ptr->x_arm_tune_string)
32454     fprintf (file, "%*sselected tune %s\n", indent, "",
32455 	     ptr->x_arm_tune_string);
32456 
32457   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
32458 }
32459 
32460 /* Hook to determine if one function can safely inline another.  */
32461 
32462 static bool
arm_can_inline_p(tree caller,tree callee)32463 arm_can_inline_p (tree caller, tree callee)
32464 {
32465   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32466   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32467   bool can_inline = true;
32468 
32469   struct cl_target_option *caller_opts
32470 	= TREE_TARGET_OPTION (caller_tree ? caller_tree
32471 					   : target_option_default_node);
32472 
32473   struct cl_target_option *callee_opts
32474 	= TREE_TARGET_OPTION (callee_tree ? callee_tree
32475 					   : target_option_default_node);
32476 
32477   if (callee_opts == caller_opts)
32478     return true;
32479 
32480   /* Callee's ISA features should be a subset of the caller's.  */
32481   struct arm_build_target caller_target;
32482   struct arm_build_target callee_target;
32483   caller_target.isa = sbitmap_alloc (isa_num_bits);
32484   callee_target.isa = sbitmap_alloc (isa_num_bits);
32485 
32486   arm_configure_build_target (&caller_target, caller_opts, false);
32487   arm_configure_build_target (&callee_target, callee_opts, false);
32488   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
32489     can_inline = false;
32490 
32491   sbitmap_free (caller_target.isa);
32492   sbitmap_free (callee_target.isa);
32493 
32494   /* OK to inline between different modes.
32495      Function with mode specific instructions, e.g using asm,
32496      must be explicitly protected with noinline.  */
32497   return can_inline;
32498 }
32499 
32500 /* Hook to fix function's alignment affected by target attribute.  */
32501 
32502 static void
arm_relayout_function(tree fndecl)32503 arm_relayout_function (tree fndecl)
32504 {
32505   if (DECL_USER_ALIGN (fndecl))
32506     return;
32507 
32508   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
32509 
32510   if (!callee_tree)
32511     callee_tree = target_option_default_node;
32512 
32513   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
32514   SET_DECL_ALIGN
32515     (fndecl,
32516      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
32517 }
32518 
32519 /* Inner function to process the attribute((target(...))), take an argument and
32520    set the current options from the argument.  If we have a list, recursively
32521    go over the list.  */
32522 
32523 static bool
arm_valid_target_attribute_rec(tree args,struct gcc_options * opts)32524 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
32525 {
32526   if (TREE_CODE (args) == TREE_LIST)
32527     {
32528       bool ret = true;
32529 
32530       for (; args; args = TREE_CHAIN (args))
32531 	if (TREE_VALUE (args)
32532 	    && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
32533 	  ret = false;
32534       return ret;
32535     }
32536 
32537   else if (TREE_CODE (args) != STRING_CST)
32538     {
32539       error ("attribute %<target%> argument not a string");
32540       return false;
32541     }
32542 
32543   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
32544   char *q;
32545 
32546   while ((q = strtok (argstr, ",")) != NULL)
32547     {
32548       argstr = NULL;
32549       if (!strcmp (q, "thumb"))
32550 	{
32551 	  opts->x_target_flags |= MASK_THUMB;
32552 	  if (TARGET_FDPIC && !arm_arch_thumb2)
32553 	    sorry ("FDPIC mode is not supported in Thumb-1 mode");
32554 	}
32555 
32556       else if (!strcmp (q, "arm"))
32557 	opts->x_target_flags &= ~MASK_THUMB;
32558 
32559       else if (!strcmp (q, "general-regs-only"))
32560 	opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
32561 
32562       else if (!strncmp (q, "fpu=", 4))
32563 	{
32564 	  int fpu_index;
32565 	  if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
32566 				       &fpu_index, CL_TARGET))
32567 	    {
32568 	      error ("invalid fpu for target attribute or pragma %qs", q);
32569 	      return false;
32570 	    }
32571 	  if (fpu_index == TARGET_FPU_auto)
32572 	    {
32573 	      /* This doesn't really make sense until we support
32574 		 general dynamic selection of the architecture and all
32575 		 sub-features.  */
32576 	      sorry ("auto fpu selection not currently permitted here");
32577 	      return false;
32578 	    }
32579 	  opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
32580 	}
32581       else if (!strncmp (q, "arch=", 5))
32582 	{
32583 	  char *arch = q + 5;
32584 	  const arch_option *arm_selected_arch
32585 	     = arm_parse_arch_option_name (all_architectures, "arch", arch);
32586 
32587 	  if (!arm_selected_arch)
32588 	    {
32589 	      error ("invalid architecture for target attribute or pragma %qs",
32590 		     q);
32591 	      return false;
32592 	    }
32593 
32594 	  opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
32595 	}
32596       else if (q[0] == '+')
32597 	{
32598 	  opts->x_arm_arch_string
32599 	    = xasprintf ("%s%s", opts->x_arm_arch_string, q);
32600 	}
32601       else
32602 	{
32603 	  error ("unknown target attribute or pragma %qs", q);
32604 	  return false;
32605 	}
32606     }
32607 
32608   return true;
32609 }
32610 
32611 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
32612 
32613 tree
arm_valid_target_attribute_tree(tree args,struct gcc_options * opts,struct gcc_options * opts_set)32614 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
32615 				 struct gcc_options *opts_set)
32616 {
32617   struct cl_target_option cl_opts;
32618 
32619   if (!arm_valid_target_attribute_rec (args, opts))
32620     return NULL_TREE;
32621 
32622   cl_target_option_save (&cl_opts, opts);
32623   arm_configure_build_target (&arm_active_target, &cl_opts, false);
32624   arm_option_check_internal (opts);
32625   /* Do any overrides, such as global options arch=xxx.
32626      We do this since arm_active_target was overridden.  */
32627   arm_option_reconfigure_globals ();
32628   arm_options_perform_arch_sanity_checks ();
32629   arm_option_override_internal (opts, opts_set);
32630 
32631   return build_target_option_node (opts);
32632 }
32633 
32634 static void
add_attribute(const char * mode,tree * attributes)32635 add_attribute  (const char * mode, tree *attributes)
32636 {
32637   size_t len = strlen (mode);
32638   tree value = build_string (len, mode);
32639 
32640   TREE_TYPE (value) = build_array_type (char_type_node,
32641 					build_index_type (size_int (len)));
32642 
32643   *attributes = tree_cons (get_identifier ("target"),
32644 			   build_tree_list (NULL_TREE, value),
32645 			   *attributes);
32646 }
32647 
32648 /* For testing. Insert thumb or arm modes alternatively on functions.  */
32649 
32650 static void
arm_insert_attributes(tree fndecl,tree * attributes)32651 arm_insert_attributes (tree fndecl, tree * attributes)
32652 {
32653   const char *mode;
32654 
32655   if (! TARGET_FLIP_THUMB)
32656     return;
32657 
32658   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
32659       || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
32660    return;
32661 
32662   /* Nested definitions must inherit mode.  */
32663   if (current_function_decl)
32664    {
32665      mode = TARGET_THUMB ? "thumb" : "arm";
32666      add_attribute (mode, attributes);
32667      return;
32668    }
32669 
32670   /* If there is already a setting don't change it.  */
32671   if (lookup_attribute ("target", *attributes) != NULL)
32672     return;
32673 
32674   mode = thumb_flipper ? "thumb" : "arm";
32675   add_attribute (mode, attributes);
32676 
32677   thumb_flipper = !thumb_flipper;
32678 }
32679 
32680 /* Hook to validate attribute((target("string"))).  */
32681 
32682 static bool
arm_valid_target_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int ARG_UNUSED (flags))32683 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
32684 			      tree args, int ARG_UNUSED (flags))
32685 {
32686   bool ret = true;
32687   struct gcc_options func_options;
32688   tree cur_tree, new_optimize;
32689   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
32690 
32691   /* Get the optimization options of the current function.  */
32692   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32693 
32694   /* If the function changed the optimization levels as well as setting target
32695      options, start with the optimizations specified.  */
32696   if (!func_optimize)
32697     func_optimize = optimization_default_node;
32698 
32699   /* Init func_options.  */
32700   memset (&func_options, 0, sizeof (func_options));
32701   init_options_struct (&func_options, NULL);
32702   lang_hooks.init_options_struct (&func_options);
32703 
32704   /* Initialize func_options to the defaults.  */
32705   cl_optimization_restore (&func_options,
32706 			   TREE_OPTIMIZATION (func_optimize));
32707 
32708   cl_target_option_restore (&func_options,
32709 			    TREE_TARGET_OPTION (target_option_default_node));
32710 
32711   /* Set func_options flags with new target mode.  */
32712   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
32713 					      &global_options_set);
32714 
32715   if (cur_tree == NULL_TREE)
32716     ret = false;
32717 
32718   new_optimize = build_optimization_node (&func_options);
32719 
32720   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
32721 
32722   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
32723 
32724   return ret;
32725 }
32726 
32727 /* Match an ISA feature bitmap to a named FPU.  We always use the
32728    first entry that exactly matches the feature set, so that we
32729    effectively canonicalize the FPU name for the assembler.  */
32730 static const char*
arm_identify_fpu_from_isa(sbitmap isa)32731 arm_identify_fpu_from_isa (sbitmap isa)
32732 {
32733   auto_sbitmap fpubits (isa_num_bits);
32734   auto_sbitmap cand_fpubits (isa_num_bits);
32735 
32736   bitmap_and (fpubits, isa, isa_all_fpubits_internal);
32737 
32738   /* If there are no ISA feature bits relating to the FPU, we must be
32739      doing soft-float.  */
32740   if (bitmap_empty_p (fpubits))
32741     return "softvfp";
32742 
32743   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
32744     {
32745       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
32746       if (bitmap_equal_p (fpubits, cand_fpubits))
32747 	return all_fpus[i].name;
32748     }
32749   /* We must find an entry, or things have gone wrong.  */
32750   gcc_unreachable ();
32751 }
32752 
32753 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
32754    by the function fndecl.  */
32755 void
arm_declare_function_name(FILE * stream,const char * name,tree decl)32756 arm_declare_function_name (FILE *stream, const char *name, tree decl)
32757 {
32758   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
32759 
32760   struct cl_target_option *targ_options;
32761   if (target_parts)
32762     targ_options = TREE_TARGET_OPTION (target_parts);
32763   else
32764     targ_options = TREE_TARGET_OPTION (target_option_current_node);
32765   gcc_assert (targ_options);
32766 
32767   arm_print_asm_arch_directives (stream, targ_options);
32768 
32769   fprintf (stream, "\t.syntax unified\n");
32770 
32771   if (TARGET_THUMB)
32772     {
32773       if (is_called_in_ARM_mode (decl)
32774 	  || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
32775 	      && cfun->is_thunk))
32776 	fprintf (stream, "\t.code 32\n");
32777       else if (TARGET_THUMB1)
32778 	fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
32779       else
32780 	fprintf (stream, "\t.thumb\n\t.thumb_func\n");
32781     }
32782   else
32783     fprintf (stream, "\t.arm\n");
32784 
32785   if (TARGET_POKE_FUNCTION_NAME)
32786     arm_poke_function_name (stream, (const char *) name);
32787 }
32788 
32789 /* If MEM is in the form of [base+offset], extract the two parts
32790    of address and set to BASE and OFFSET, otherwise return false
32791    after clearing BASE and OFFSET.  */
32792 
32793 static bool
extract_base_offset_in_addr(rtx mem,rtx * base,rtx * offset)32794 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
32795 {
32796   rtx addr;
32797 
32798   gcc_assert (MEM_P (mem));
32799 
32800   addr = XEXP (mem, 0);
32801 
32802   /* Strip off const from addresses like (const (addr)).  */
32803   if (GET_CODE (addr) == CONST)
32804     addr = XEXP (addr, 0);
32805 
32806   if (GET_CODE (addr) == REG)
32807     {
32808       *base = addr;
32809       *offset = const0_rtx;
32810       return true;
32811     }
32812 
32813   if (GET_CODE (addr) == PLUS
32814       && GET_CODE (XEXP (addr, 0)) == REG
32815       && CONST_INT_P (XEXP (addr, 1)))
32816     {
32817       *base = XEXP (addr, 0);
32818       *offset = XEXP (addr, 1);
32819       return true;
32820     }
32821 
32822   *base = NULL_RTX;
32823   *offset = NULL_RTX;
32824 
32825   return false;
32826 }
32827 
32828 /* If INSN is a load or store of address in the form of [base+offset],
32829    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
32830    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
32831    otherwise return FALSE.  */
32832 
32833 static bool
fusion_load_store(rtx_insn * insn,rtx * base,rtx * offset,bool * is_load)32834 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
32835 {
32836   rtx x, dest, src;
32837 
32838   gcc_assert (INSN_P (insn));
32839   x = PATTERN (insn);
32840   if (GET_CODE (x) != SET)
32841     return false;
32842 
32843   src = SET_SRC (x);
32844   dest = SET_DEST (x);
32845   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
32846     {
32847       *is_load = false;
32848       extract_base_offset_in_addr (dest, base, offset);
32849     }
32850   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
32851     {
32852       *is_load = true;
32853       extract_base_offset_in_addr (src, base, offset);
32854     }
32855   else
32856     return false;
32857 
32858   return (*base != NULL_RTX && *offset != NULL_RTX);
32859 }
32860 
32861 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
32862 
32863    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
32864    and PRI are only calculated for these instructions.  For other instruction,
32865    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
32866    instruction fusion can be supported by returning different priorities.
32867 
32868    It's important that irrelevant instructions get the largest FUSION_PRI.  */
32869 
32870 static void
arm_sched_fusion_priority(rtx_insn * insn,int max_pri,int * fusion_pri,int * pri)32871 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
32872 			   int *fusion_pri, int *pri)
32873 {
32874   int tmp, off_val;
32875   bool is_load;
32876   rtx base, offset;
32877 
32878   gcc_assert (INSN_P (insn));
32879 
32880   tmp = max_pri - 1;
32881   if (!fusion_load_store (insn, &base, &offset, &is_load))
32882     {
32883       *pri = tmp;
32884       *fusion_pri = tmp;
32885       return;
32886     }
32887 
32888   /* Load goes first.  */
32889   if (is_load)
32890     *fusion_pri = tmp - 1;
32891   else
32892     *fusion_pri = tmp - 2;
32893 
32894   tmp /= 2;
32895 
32896   /* INSN with smaller base register goes first.  */
32897   tmp -= ((REGNO (base) & 0xff) << 20);
32898 
32899   /* INSN with smaller offset goes first.  */
32900   off_val = (int)(INTVAL (offset));
32901   if (off_val >= 0)
32902     tmp -= (off_val & 0xfffff);
32903   else
32904     tmp += ((- off_val) & 0xfffff);
32905 
32906   *pri = tmp;
32907   return;
32908 }
32909 
32910 
32911 /* Construct and return a PARALLEL RTX vector with elements numbering the
32912    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
32913    the vector - from the perspective of the architecture.  This does not
32914    line up with GCC's perspective on lane numbers, so we end up with
32915    different masks depending on our target endian-ness.  The diagram
32916    below may help.  We must draw the distinction when building masks
32917    which select one half of the vector.  An instruction selecting
32918    architectural low-lanes for a big-endian target, must be described using
32919    a mask selecting GCC high-lanes.
32920 
32921                  Big-Endian             Little-Endian
32922 
32923 GCC             0   1   2   3           3   2   1   0
32924               | x | x | x | x |       | x | x | x | x |
32925 Architecture    3   2   1   0           3   2   1   0
32926 
32927 Low Mask:         { 2, 3 }                { 0, 1 }
32928 High Mask:        { 0, 1 }                { 2, 3 }
32929 */
32930 
32931 rtx
arm_simd_vect_par_cnst_half(machine_mode mode,bool high)32932 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
32933 {
32934   int nunits = GET_MODE_NUNITS (mode);
32935   rtvec v = rtvec_alloc (nunits / 2);
32936   int high_base = nunits / 2;
32937   int low_base = 0;
32938   int base;
32939   rtx t1;
32940   int i;
32941 
32942   if (BYTES_BIG_ENDIAN)
32943     base = high ? low_base : high_base;
32944   else
32945     base = high ? high_base : low_base;
32946 
32947   for (i = 0; i < nunits / 2; i++)
32948     RTVEC_ELT (v, i) = GEN_INT (base + i);
32949 
32950   t1 = gen_rtx_PARALLEL (mode, v);
32951   return t1;
32952 }
32953 
32954 /* Check OP for validity as a PARALLEL RTX vector with elements
32955    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
32956    from the perspective of the architecture.  See the diagram above
32957    arm_simd_vect_par_cnst_half_p for more details.  */
32958 
32959 bool
arm_simd_check_vect_par_cnst_half_p(rtx op,machine_mode mode,bool high)32960 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
32961 				       bool high)
32962 {
32963   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
32964   HOST_WIDE_INT count_op = XVECLEN (op, 0);
32965   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
32966   int i = 0;
32967 
32968   if (!VECTOR_MODE_P (mode))
32969     return false;
32970 
32971   if (count_op != count_ideal)
32972     return false;
32973 
32974   for (i = 0; i < count_ideal; i++)
32975     {
32976       rtx elt_op = XVECEXP (op, 0, i);
32977       rtx elt_ideal = XVECEXP (ideal, 0, i);
32978 
32979       if (!CONST_INT_P (elt_op)
32980 	  || INTVAL (elt_ideal) != INTVAL (elt_op))
32981 	return false;
32982     }
32983   return true;
32984 }
32985 
32986 /* Can output mi_thunk for all cases except for non-zero vcall_offset
32987    in Thumb1.  */
32988 static bool
arm_can_output_mi_thunk(const_tree,HOST_WIDE_INT,HOST_WIDE_INT vcall_offset,const_tree)32989 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
32990 			 const_tree)
32991 {
32992   /* For now, we punt and not handle this for TARGET_THUMB1.  */
32993   if (vcall_offset && TARGET_THUMB1)
32994     return false;
32995 
32996   /* Otherwise ok.  */
32997   return true;
32998 }
32999 
33000 /* Generate RTL for a conditional branch with rtx comparison CODE in
33001    mode CC_MODE. The destination of the unlikely conditional branch
33002    is LABEL_REF.  */
33003 
33004 void
arm_gen_unlikely_cbranch(enum rtx_code code,machine_mode cc_mode,rtx label_ref)33005 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33006 			  rtx label_ref)
33007 {
33008   rtx x;
33009   x = gen_rtx_fmt_ee (code, VOIDmode,
33010 		      gen_rtx_REG (cc_mode, CC_REGNUM),
33011 		      const0_rtx);
33012 
33013   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
33014 			    gen_rtx_LABEL_REF (VOIDmode, label_ref),
33015 			    pc_rtx);
33016   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
33017 }
33018 
33019 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33020 
33021    For pure-code sections there is no letter code for this attribute, so
33022    output all the section flags numerically when this is needed.  */
33023 
33024 static bool
arm_asm_elf_flags_numeric(unsigned int flags,unsigned int * num)33025 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
33026 {
33027 
33028   if (flags & SECTION_ARM_PURECODE)
33029     {
33030       *num = 0x20000000;
33031 
33032       if (!(flags & SECTION_DEBUG))
33033 	*num |= 0x2;
33034       if (flags & SECTION_EXCLUDE)
33035 	*num |= 0x80000000;
33036       if (flags & SECTION_WRITE)
33037 	*num |= 0x1;
33038       if (flags & SECTION_CODE)
33039 	*num |= 0x4;
33040       if (flags & SECTION_MERGE)
33041 	*num |= 0x10;
33042       if (flags & SECTION_STRINGS)
33043 	*num |= 0x20;
33044       if (flags & SECTION_TLS)
33045 	*num |= 0x400;
33046       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
33047 	*num |= 0x200;
33048 
33049 	return true;
33050     }
33051 
33052   return false;
33053 }
33054 
33055 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33056 
33057    If pure-code is passed as an option, make sure all functions are in
33058    sections that have the SHF_ARM_PURECODE attribute.  */
33059 
33060 static section *
arm_function_section(tree decl,enum node_frequency freq,bool startup,bool exit)33061 arm_function_section (tree decl, enum node_frequency freq,
33062 		      bool startup, bool exit)
33063 {
33064   const char * section_name;
33065   section * sec;
33066 
33067   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
33068     return default_function_section (decl, freq, startup, exit);
33069 
33070   if (!target_pure_code)
33071     return default_function_section (decl, freq, startup, exit);
33072 
33073 
33074   section_name = DECL_SECTION_NAME (decl);
33075 
33076   /* If a function is not in a named section then it falls under the 'default'
33077      text section, also known as '.text'.  We can preserve previous behavior as
33078      the default text section already has the SHF_ARM_PURECODE section
33079      attribute.  */
33080   if (!section_name)
33081     {
33082       section *default_sec = default_function_section (decl, freq, startup,
33083 						       exit);
33084 
33085       /* If default_sec is not null, then it must be a special section like for
33086 	 example .text.startup.  We set the pure-code attribute and return the
33087 	 same section to preserve existing behavior.  */
33088       if (default_sec)
33089 	  default_sec->common.flags |= SECTION_ARM_PURECODE;
33090       return default_sec;
33091     }
33092 
33093   /* Otherwise look whether a section has already been created with
33094      'section_name'.  */
33095   sec = get_named_section (decl, section_name, 0);
33096   if (!sec)
33097     /* If that is not the case passing NULL as the section's name to
33098        'get_named_section' will create a section with the declaration's
33099        section name.  */
33100     sec = get_named_section (decl, NULL, 0);
33101 
33102   /* Set the SHF_ARM_PURECODE attribute.  */
33103   sec->common.flags |= SECTION_ARM_PURECODE;
33104 
33105   return sec;
33106 }
33107 
33108 /* Implements the TARGET_SECTION_FLAGS hook.
33109 
33110    If DECL is a function declaration and pure-code is passed as an option
33111    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
33112    section's name and RELOC indicates whether the declarations initializer may
33113    contain runtime relocations.  */
33114 
33115 static unsigned int
arm_elf_section_type_flags(tree decl,const char * name,int reloc)33116 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
33117 {
33118   unsigned int flags = default_section_type_flags (decl, name, reloc);
33119 
33120   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
33121     flags |= SECTION_ARM_PURECODE;
33122 
33123   return flags;
33124 }
33125 
33126 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
33127 
33128 static void
arm_expand_divmod_libfunc(rtx libfunc,machine_mode mode,rtx op0,rtx op1,rtx * quot_p,rtx * rem_p)33129 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
33130 			   rtx op0, rtx op1,
33131 			   rtx *quot_p, rtx *rem_p)
33132 {
33133   if (mode == SImode)
33134     gcc_assert (!TARGET_IDIV);
33135 
33136   scalar_int_mode libval_mode
33137     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
33138 
33139   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
33140 					libval_mode,
33141 					op0, GET_MODE (op0),
33142 					op1, GET_MODE (op1));
33143 
33144   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
33145   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
33146 				       GET_MODE_SIZE (mode));
33147 
33148   gcc_assert (quotient);
33149   gcc_assert (remainder);
33150 
33151   *quot_p = quotient;
33152   *rem_p = remainder;
33153 }
33154 
33155 /*  This function checks for the availability of the coprocessor builtin passed
33156     in BUILTIN for the current target.  Returns true if it is available and
33157     false otherwise.  If a BUILTIN is passed for which this function has not
33158     been implemented it will cause an exception.  */
33159 
33160 bool
arm_coproc_builtin_available(enum unspecv builtin)33161 arm_coproc_builtin_available (enum unspecv builtin)
33162 {
33163   /* None of these builtins are available in Thumb mode if the target only
33164      supports Thumb-1.  */
33165   if (TARGET_THUMB1)
33166     return false;
33167 
33168   switch (builtin)
33169     {
33170       case VUNSPEC_CDP:
33171       case VUNSPEC_LDC:
33172       case VUNSPEC_LDCL:
33173       case VUNSPEC_STC:
33174       case VUNSPEC_STCL:
33175       case VUNSPEC_MCR:
33176       case VUNSPEC_MRC:
33177 	if (arm_arch4)
33178 	  return true;
33179 	break;
33180       case VUNSPEC_CDP2:
33181       case VUNSPEC_LDC2:
33182       case VUNSPEC_LDC2L:
33183       case VUNSPEC_STC2:
33184       case VUNSPEC_STC2L:
33185       case VUNSPEC_MCR2:
33186       case VUNSPEC_MRC2:
33187 	/* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
33188 	   ARMv8-{A,M}.  */
33189 	if (arm_arch5t)
33190 	  return true;
33191 	break;
33192       case VUNSPEC_MCRR:
33193       case VUNSPEC_MRRC:
33194 	/* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
33195 	   ARMv8-{A,M}.  */
33196 	if (arm_arch6 || arm_arch5te)
33197 	  return true;
33198 	break;
33199       case VUNSPEC_MCRR2:
33200       case VUNSPEC_MRRC2:
33201 	if (arm_arch6)
33202 	  return true;
33203 	break;
33204       default:
33205 	gcc_unreachable ();
33206     }
33207   return false;
33208 }
33209 
33210 /* This function returns true if OP is a valid memory operand for the ldc and
33211    stc coprocessor instructions and false otherwise.  */
33212 
33213 bool
arm_coproc_ldc_stc_legitimate_address(rtx op)33214 arm_coproc_ldc_stc_legitimate_address (rtx op)
33215 {
33216   HOST_WIDE_INT range;
33217   /* Has to be a memory operand.  */
33218   if (!MEM_P (op))
33219     return false;
33220 
33221   op = XEXP (op, 0);
33222 
33223   /* We accept registers.  */
33224   if (REG_P (op))
33225     return true;
33226 
33227   switch GET_CODE (op)
33228     {
33229       case PLUS:
33230 	{
33231 	  /* Or registers with an offset.  */
33232 	  if (!REG_P (XEXP (op, 0)))
33233 	    return false;
33234 
33235 	  op = XEXP (op, 1);
33236 
33237 	  /* The offset must be an immediate though.  */
33238 	  if (!CONST_INT_P (op))
33239 	    return false;
33240 
33241 	  range = INTVAL (op);
33242 
33243 	  /* Within the range of [-1020,1020].  */
33244 	  if (!IN_RANGE (range, -1020, 1020))
33245 	    return false;
33246 
33247 	  /* And a multiple of 4.  */
33248 	  return (range % 4) == 0;
33249 	}
33250       case PRE_INC:
33251       case POST_INC:
33252       case PRE_DEC:
33253       case POST_DEC:
33254 	return REG_P (XEXP (op, 0));
33255       default:
33256 	gcc_unreachable ();
33257     }
33258   return false;
33259 }
33260 
33261 /* Return the diagnostic message string if conversion from FROMTYPE to
33262    TOTYPE is not allowed, NULL otherwise.  */
33263 
33264 static const char *
arm_invalid_conversion(const_tree fromtype,const_tree totype)33265 arm_invalid_conversion (const_tree fromtype, const_tree totype)
33266 {
33267   if (element_mode (fromtype) != element_mode (totype))
33268     {
33269       /* Do no allow conversions to/from BFmode scalar types.  */
33270       if (TYPE_MODE (fromtype) == BFmode)
33271 	return N_("invalid conversion from type %<bfloat16_t%>");
33272       if (TYPE_MODE (totype) == BFmode)
33273 	return N_("invalid conversion to type %<bfloat16_t%>");
33274     }
33275 
33276   /* Conversion allowed.  */
33277   return NULL;
33278 }
33279 
33280 /* Return the diagnostic message string if the unary operation OP is
33281    not permitted on TYPE, NULL otherwise.  */
33282 
33283 static const char *
arm_invalid_unary_op(int op,const_tree type)33284 arm_invalid_unary_op (int op, const_tree type)
33285 {
33286   /* Reject all single-operand operations on BFmode except for &.  */
33287   if (element_mode (type) == BFmode && op != ADDR_EXPR)
33288     return N_("operation not permitted on type %<bfloat16_t%>");
33289 
33290   /* Operation allowed.  */
33291   return NULL;
33292 }
33293 
33294 /* Return the diagnostic message string if the binary operation OP is
33295    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
33296 
33297 static const char *
arm_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)33298 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
33299 			   const_tree type2)
33300 {
33301   /* Reject all 2-operand operations on BFmode.  */
33302   if (element_mode (type1) == BFmode
33303       || element_mode (type2) == BFmode)
33304     return N_("operation not permitted on type %<bfloat16_t%>");
33305 
33306   /* Operation allowed.  */
33307   return NULL;
33308 }
33309 
33310 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
33311 
33312    In VFPv1, VFP registers could only be accessed in the mode they were
33313    set, so subregs would be invalid there.  However, we don't support
33314    VFPv1 at the moment, and the restriction was lifted in VFPv2.
33315 
33316    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
33317    VFP registers in little-endian order.  We can't describe that accurately to
33318    GCC, so avoid taking subregs of such values.
33319 
33320    The only exception is going from a 128-bit to a 64-bit type.  In that
33321    case the data layout happens to be consistent for big-endian, so we
33322    explicitly allow that case.  */
33323 
33324 static bool
arm_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)33325 arm_can_change_mode_class (machine_mode from, machine_mode to,
33326 			   reg_class_t rclass)
33327 {
33328   if (TARGET_BIG_END
33329       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
33330       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
33331 	  || GET_MODE_SIZE (to) > UNITS_PER_WORD)
33332       && reg_classes_intersect_p (VFP_REGS, rclass))
33333     return false;
33334   return true;
33335 }
33336 
33337 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
33338    strcpy from constants will be faster.  */
33339 
33340 static HOST_WIDE_INT
arm_constant_alignment(const_tree exp,HOST_WIDE_INT align)33341 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
33342 {
33343   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
33344   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
33345     return MAX (align, BITS_PER_WORD * factor);
33346   return align;
33347 }
33348 
33349 /* Emit a speculation barrier on target architectures that do not have
33350    DSB/ISB directly.  Such systems probably don't need a barrier
33351    themselves, but if the code is ever run on a later architecture, it
33352    might become a problem.  */
33353 void
arm_emit_speculation_barrier_function()33354 arm_emit_speculation_barrier_function ()
33355 {
33356   emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
33357 }
33358 
33359 /* Have we recorded an explicit access to the Q bit of APSR?.  */
33360 bool
arm_q_bit_access(void)33361 arm_q_bit_access (void)
33362 {
33363   if (cfun && cfun->decl)
33364     return lookup_attribute ("acle qbit",
33365 			     DECL_ATTRIBUTES (cfun->decl));
33366   return true;
33367 }
33368 
33369 /* Have we recorded an explicit access to the GE bits of PSTATE?.  */
33370 bool
arm_ge_bits_access(void)33371 arm_ge_bits_access (void)
33372 {
33373   if (cfun && cfun->decl)
33374     return lookup_attribute ("acle gebits",
33375 			     DECL_ATTRIBUTES (cfun->decl));
33376   return true;
33377 }
33378 
33379 #if CHECKING_P
33380 namespace selftest {
33381 
33382 /* Scan the static data tables generated by parsecpu.awk looking for
33383    potential issues with the data.  We primarily check for
33384    inconsistencies in the option extensions at present (extensions
33385    that duplicate others but aren't marked as aliases).  Furthermore,
33386    for correct canonicalization later options must never be a subset
33387    of an earlier option.  Any extension should also only specify other
33388    feature bits and never an architecture bit.  The architecture is inferred
33389    from the declaration of the extension.  */
33390 static void
arm_test_cpu_arch_data(void)33391 arm_test_cpu_arch_data (void)
33392 {
33393   const arch_option *arch;
33394   const cpu_option *cpu;
33395   auto_sbitmap target_isa (isa_num_bits);
33396   auto_sbitmap isa1 (isa_num_bits);
33397   auto_sbitmap isa2 (isa_num_bits);
33398 
33399   for (arch = all_architectures; arch->common.name != NULL; ++arch)
33400     {
33401       const cpu_arch_extension *ext1, *ext2;
33402 
33403       if (arch->common.extensions == NULL)
33404 	continue;
33405 
33406       arm_initialize_isa (target_isa, arch->common.isa_bits);
33407 
33408       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
33409 	{
33410 	  if (ext1->alias)
33411 	    continue;
33412 
33413 	  arm_initialize_isa (isa1, ext1->isa_bits);
33414 	  for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
33415 	    {
33416 	      if (ext2->alias || ext1->remove != ext2->remove)
33417 		continue;
33418 
33419 	      arm_initialize_isa (isa2, ext2->isa_bits);
33420 	      /* If the option is a subset of the parent option, it doesn't
33421 		 add anything and so isn't useful.  */
33422 	      ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
33423 
33424 	      /* If the extension specifies any architectural bits then
33425 		 disallow it.  Extensions should only specify feature bits.  */
33426 	      ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
33427 	    }
33428 	}
33429     }
33430 
33431   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
33432     {
33433       const cpu_arch_extension *ext1, *ext2;
33434 
33435       if (cpu->common.extensions == NULL)
33436 	continue;
33437 
33438       arm_initialize_isa (target_isa, arch->common.isa_bits);
33439 
33440       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
33441 	{
33442 	  if (ext1->alias)
33443 	    continue;
33444 
33445 	  arm_initialize_isa (isa1, ext1->isa_bits);
33446 	  for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
33447 	    {
33448 	      if (ext2->alias || ext1->remove != ext2->remove)
33449 		continue;
33450 
33451 	      arm_initialize_isa (isa2, ext2->isa_bits);
33452 	      /* If the option is a subset of the parent option, it doesn't
33453 		 add anything and so isn't useful.  */
33454 	      ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
33455 
33456 	      /* If the extension specifies any architectural bits then
33457 		 disallow it.  Extensions should only specify feature bits.  */
33458 	      ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
33459 	    }
33460 	}
33461     }
33462 }
33463 
33464 /* Scan the static data tables generated by parsecpu.awk looking for
33465    potential issues with the data.  Here we check for consistency between the
33466    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
33467    a feature bit that is not defined by any FPU flag.  */
33468 static void
arm_test_fpu_data(void)33469 arm_test_fpu_data (void)
33470 {
33471   auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
33472   auto_sbitmap fpubits (isa_num_bits);
33473   auto_sbitmap tmpset (isa_num_bits);
33474 
33475   static const enum isa_feature fpu_bitlist_internal[]
33476     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
33477   arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
33478 
33479   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33480   {
33481     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
33482     bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
33483     bitmap_clear (isa_all_fpubits_internal);
33484     bitmap_copy (isa_all_fpubits_internal, tmpset);
33485   }
33486 
33487   if (!bitmap_empty_p (isa_all_fpubits_internal))
33488     {
33489 	fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
33490 			 " group that are not defined by any FPU.\n"
33491 			 "       Check your arm-cpus.in.\n");
33492 	ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
33493     }
33494 }
33495 
33496 static void
arm_run_selftests(void)33497 arm_run_selftests (void)
33498 {
33499   arm_test_cpu_arch_data ();
33500   arm_test_fpu_data ();
33501 }
33502 } /* Namespace selftest.  */
33503 
33504 #undef TARGET_RUN_TARGET_SELFTESTS
33505 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
33506 #endif /* CHECKING_P */
33507 
33508 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
33509    Unlike the arm version, we do NOT implement asm flag outputs.  */
33510 
33511 rtx_insn *
thumb1_md_asm_adjust(vec<rtx> & outputs,vec<rtx> &,vec<const char * > & constraints,vec<rtx> &,HARD_REG_SET &)33512 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
33513 		      vec<const char *> &constraints,
33514 		      vec<rtx> &/*clobbers*/, HARD_REG_SET &/*clobbered_regs*/)
33515 {
33516   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
33517     if (strncmp (constraints[i], "=@cc", 4) == 0)
33518       {
33519 	sorry ("asm flags not supported in thumb1 mode");
33520 	break;
33521       }
33522   return NULL;
33523 }
33524 
33525 /* Generate code to enable conditional branches in functions over 1 MiB.
33526    Parameters are:
33527      operands: is the operands list of the asm insn (see arm_cond_branch or
33528        arm_cond_branch_reversed).
33529      pos_label: is an index into the operands array where operands[pos_label] is
33530        the asm label of the final jump destination.
33531      dest: is a string which is used to generate the asm label of the intermediate
33532        destination
33533    branch_format: is a string denoting the intermediate branch format, e.g.
33534      "beq", "bne", etc.  */
33535 
33536 const char *
arm_gen_far_branch(rtx * operands,int pos_label,const char * dest,const char * branch_format)33537 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
33538 		    const char * branch_format)
33539 {
33540   rtx_code_label * tmp_label = gen_label_rtx ();
33541   char label_buf[256];
33542   char buffer[128];
33543   ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
33544 			CODE_LABEL_NUMBER (tmp_label));
33545   const char *label_ptr = arm_strip_name_encoding (label_buf);
33546   rtx dest_label = operands[pos_label];
33547   operands[pos_label] = tmp_label;
33548 
33549   snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
33550   output_asm_insn (buffer, operands);
33551 
33552   snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
33553   operands[pos_label] = dest_label;
33554   output_asm_insn (buffer, operands);
33555   return "";
33556 }
33557 
33558 /* If given mode matches, load from memory to LO_REGS.
33559    (i.e [Rn], Rn <= LO_REGS).  */
33560 enum reg_class
arm_mode_base_reg_class(machine_mode mode)33561 arm_mode_base_reg_class (machine_mode mode)
33562 {
33563   if (TARGET_HAVE_MVE
33564       && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
33565     return LO_REGS;
33566 
33567   return MODE_BASE_REG_REG_CLASS (mode);
33568 }
33569 
33570 struct gcc_target targetm = TARGET_INITIALIZER;
33571 
33572 #include "gt-arm.h"
33573