1 /* Output routines for GCC for ARM.
2    Copyright (C) 1991-2018 Free Software Foundation, Inc.
3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4    and Martin Simmons (@harleqn.co.uk).
5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
6 
7    This file is part of GCC.
8 
9    GCC is free software; you can redistribute it and/or modify it
10    under the terms of the GNU General Public License as published
11    by the Free Software Foundation; either version 3, or (at your
12    option) any later version.
13 
14    GCC is distributed in the hope that it will be useful, but WITHOUT
15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
17    License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with GCC; see the file COPYING3.  If not see
21    <http://www.gnu.org/licenses/>.  */
22 
23 #define IN_TARGET_CODE 1
24 
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "params.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73 
74 /* This file should be included last.  */
75 #include "target-def.h"
76 
77 /* Forward definitions of types.  */
78 typedef struct minipool_node    Mnode;
79 typedef struct minipool_fixup   Mfix;
80 
81 /* The last .arch and .fpu assembly strings that we printed.  */
82 static std::string arm_last_printed_arch_string;
83 static std::string arm_last_printed_fpu_string;
84 
85 void (*arm_lang_output_object_attributes_hook)(void);
86 
87 struct four_ints
88 {
89   int i[4];
90 };
91 
92 /* Forward function declarations.  */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 			     unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 					   int, HOST_WIDE_INT);
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 			       machine_mode, rtx);
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 #endif
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 				       unsigned HOST_WIDE_INT val,
162 				       struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 					 unsigned HOST_WIDE_INT val,
165 					 struct four_ints *return_sequence,
166 					 int i);
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
170 						    machine_mode, int *,
171 						    const_tree, int);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 				 tree);
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
191 				  tree, bool);
192 static rtx arm_function_arg (cumulative_args_t, machine_mode,
193 			     const_tree, bool);
194 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
195 				      const_tree, bool);
196 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
197 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
198 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
199 				      const_tree);
200 static rtx aapcs_libcall_value (machine_mode);
201 static int aapcs_select_return_coproc (const_tree, const_tree);
202 
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
205 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
206 #endif
207 #ifndef ARM_PE
208 static void arm_encode_section_info (tree, rtx, int);
209 #endif
210 
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree, tree *);
214 
215 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
216 					tree, int *, int);
217 static bool arm_pass_by_reference (cumulative_args_t,
218 				   machine_mode, const_tree, bool);
219 static bool arm_promote_prototypes (const_tree);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree);
223 static bool arm_must_pass_in_stack (machine_mode, const_tree);
224 static bool arm_return_in_memory (const_tree, const_tree);
225 #if ARM_UNWIND_INFO
226 static void arm_unwind_emit (FILE *, rtx_insn *);
227 static bool arm_output_ttype (rtx);
228 static void arm_asm_emit_except_personality (rtx);
229 #endif
230 static void arm_asm_init_sections (void);
231 static rtx arm_dwarf_register_span (rtx);
232 
233 static tree arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree arm_get_cookie_size (tree);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree, rtx);
245 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
246 static void arm_option_override (void);
247 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
248 static void arm_option_restore (struct gcc_options *,
249 				struct cl_target_option *);
250 static void arm_override_options_after_change (void);
251 static void arm_option_print (FILE *, int, struct cl_target_option *);
252 static void arm_set_current_function (tree);
253 static bool arm_can_inline_p (tree, tree);
254 static void arm_relayout_function (tree);
255 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
256 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
257 static bool arm_sched_can_speculate_insn (rtx_insn *);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn *);
260 static int arm_issue_rate (void);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
264 static bool arm_output_addr_const_extra (FILE *, rtx);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree);
267 static tree arm_promoted_type (const_tree t);
268 static bool arm_scalar_mode_supported_p (scalar_mode);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx, tree, rtx);
273 static rtx arm_trampoline_adjust_address (rtx);
274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool arm_array_mode_supported_p (machine_mode,
279 					unsigned HOST_WIDE_INT);
280 static machine_mode arm_preferred_simd_mode (scalar_mode);
281 static bool arm_class_likely_spilled_p (reg_class_t);
282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
285 						     const_tree type,
286 						     int misalignment,
287 						     bool is_packed);
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
291 static void arm_autovectorize_vector_sizes (vector_sizes *);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
296 
297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
298 					  const vec_perm_indices &);
299 
300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
301 
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 					   tree vectype,
304 					   int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 				   enum vect_cost_for_stmt kind,
307 				   struct _stmt_vec_info *stmt_info,
308 				   int misalign,
309 				   enum vect_cost_model_location where);
310 
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 					 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314 
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 				     const_tree);
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 						int reloc);
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 
329 /* Table of machine attributes.  */
330 static const struct attribute_spec arm_attribute_table[] =
331 {
332   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
333        affects_type_identity, handler, exclude } */
334   /* Function calls made to this symbol must be done indirectly, because
335      it may lie outside of the 26 bit addressing range of a normal function
336      call.  */
337   { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
338   /* Whereas these functions are always known to reside within the 26 bit
339      addressing range.  */
340   { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
341   /* Specify the procedure call conventions for a function.  */
342   { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
343     NULL },
344   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
345   { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
346     NULL },
347   { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
348     NULL },
349   { "naked",        0, 0, true,  false, false, false,
350     arm_handle_fndecl_attribute, NULL },
351 #ifdef ARM_PE
352   /* ARM/PE has three new attributes:
353      interfacearm - ?
354      dllexport - for exporting a function/variable that will live in a dll
355      dllimport - for importing a function/variable from a dll
356 
357      Microsoft allows multiple declspecs in one __declspec, separating
358      them with spaces.  We do NOT support this.  Instead, use __declspec
359      multiple times.
360   */
361   { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
362   { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
363   { "interfacearm", 0, 0, true,  false, false, false,
364     arm_handle_fndecl_attribute, NULL },
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366   { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
367     NULL },
368   { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
369     NULL },
370   { "notshared",    0, 0, false, true, false, false,
371     arm_handle_notshared_attribute, NULL },
372 #endif
373   /* ARMv8-M Security Extensions support.  */
374   { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
375     arm_handle_cmse_nonsecure_entry, NULL },
376   { "cmse_nonsecure_call", 0, 0, true, false, false, true,
377     arm_handle_cmse_nonsecure_call, NULL },
378   { NULL, 0, 0, false, false, false, false, NULL, NULL }
379 };
380 
381 /* Initialize the GCC target structure.  */
382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
383 #undef  TARGET_MERGE_DECL_ATTRIBUTES
384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
385 #endif
386 
387 #undef TARGET_LEGITIMIZE_ADDRESS
388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
389 
390 #undef  TARGET_ATTRIBUTE_TABLE
391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
392 
393 #undef  TARGET_INSERT_ATTRIBUTES
394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
395 
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START arm_file_start
398 #undef TARGET_ASM_FILE_END
399 #define TARGET_ASM_FILE_END arm_file_end
400 
401 #undef  TARGET_ASM_ALIGNED_SI_OP
402 #define TARGET_ASM_ALIGNED_SI_OP NULL
403 #undef  TARGET_ASM_INTEGER
404 #define TARGET_ASM_INTEGER arm_assemble_integer
405 
406 #undef TARGET_PRINT_OPERAND
407 #define TARGET_PRINT_OPERAND arm_print_operand
408 #undef TARGET_PRINT_OPERAND_ADDRESS
409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
412 
413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
415 
416 #undef  TARGET_ASM_FUNCTION_PROLOGUE
417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
418 
419 #undef  TARGET_ASM_FUNCTION_EPILOGUE
420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
421 
422 #undef TARGET_CAN_INLINE_P
423 #define TARGET_CAN_INLINE_P arm_can_inline_p
424 
425 #undef TARGET_RELAYOUT_FUNCTION
426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
427 
428 #undef  TARGET_OPTION_OVERRIDE
429 #define TARGET_OPTION_OVERRIDE arm_option_override
430 
431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
433 
434 #undef TARGET_OPTION_SAVE
435 #define TARGET_OPTION_SAVE arm_option_save
436 
437 #undef TARGET_OPTION_RESTORE
438 #define TARGET_OPTION_RESTORE arm_option_restore
439 
440 #undef TARGET_OPTION_PRINT
441 #define TARGET_OPTION_PRINT arm_option_print
442 
443 #undef  TARGET_COMP_TYPE_ATTRIBUTES
444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
445 
446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
448 
449 #undef TARGET_SCHED_MACRO_FUSION_P
450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
451 
452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
454 
455 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
457 
458 #undef  TARGET_SCHED_ADJUST_COST
459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
460 
461 #undef TARGET_SET_CURRENT_FUNCTION
462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
463 
464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
466 
467 #undef TARGET_SCHED_REORDER
468 #define TARGET_SCHED_REORDER arm_sched_reorder
469 
470 #undef TARGET_REGISTER_MOVE_COST
471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
472 
473 #undef TARGET_MEMORY_MOVE_COST
474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
475 
476 #undef TARGET_ENCODE_SECTION_INFO
477 #ifdef ARM_PE
478 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
479 #else
480 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
481 #endif
482 
483 #undef  TARGET_STRIP_NAME_ENCODING
484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
485 
486 #undef  TARGET_ASM_INTERNAL_LABEL
487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
488 
489 #undef TARGET_FLOATN_MODE
490 #define TARGET_FLOATN_MODE arm_floatn_mode
491 
492 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
494 
495 #undef  TARGET_FUNCTION_VALUE
496 #define TARGET_FUNCTION_VALUE arm_function_value
497 
498 #undef  TARGET_LIBCALL_VALUE
499 #define TARGET_LIBCALL_VALUE arm_libcall_value
500 
501 #undef TARGET_FUNCTION_VALUE_REGNO_P
502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
503 
504 #undef  TARGET_ASM_OUTPUT_MI_THUNK
505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
506 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
508 
509 #undef  TARGET_RTX_COSTS
510 #define TARGET_RTX_COSTS arm_rtx_costs
511 #undef  TARGET_ADDRESS_COST
512 #define TARGET_ADDRESS_COST arm_address_cost
513 
514 #undef TARGET_SHIFT_TRUNCATION_MASK
515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
524   arm_autovectorize_vector_sizes
525 
526 #undef  TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
528 
529 #undef  TARGET_INIT_BUILTINS
530 #define TARGET_INIT_BUILTINS  arm_init_builtins
531 #undef  TARGET_EXPAND_BUILTIN
532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
533 #undef  TARGET_BUILTIN_DECL
534 #define TARGET_BUILTIN_DECL arm_builtin_decl
535 
536 #undef TARGET_INIT_LIBFUNCS
537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
538 
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
541 #undef TARGET_PROMOTE_PROTOTYPES
542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
543 #undef TARGET_PASS_BY_REFERENCE
544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
545 #undef TARGET_ARG_PARTIAL_BYTES
546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
547 #undef TARGET_FUNCTION_ARG
548 #define TARGET_FUNCTION_ARG arm_function_arg
549 #undef TARGET_FUNCTION_ARG_ADVANCE
550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
551 #undef TARGET_FUNCTION_ARG_PADDING
552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
553 #undef TARGET_FUNCTION_ARG_BOUNDARY
554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
555 
556 #undef  TARGET_SETUP_INCOMING_VARARGS
557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
558 
559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
561 
562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
564 #undef TARGET_TRAMPOLINE_INIT
565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
568 
569 #undef TARGET_WARN_FUNC_RETURN
570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
571 
572 #undef TARGET_DEFAULT_SHORT_ENUMS
573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
574 
575 #undef TARGET_ALIGN_ANON_BITFIELD
576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
577 
578 #undef TARGET_NARROW_VOLATILE_BITFIELD
579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
580 
581 #undef TARGET_CXX_GUARD_TYPE
582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
583 
584 #undef TARGET_CXX_GUARD_MASK_BIT
585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
586 
587 #undef TARGET_CXX_GET_COOKIE_SIZE
588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
589 
590 #undef TARGET_CXX_COOKIE_HAS_SIZE
591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
592 
593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
595 
596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
598 
599 #undef TARGET_CXX_USE_AEABI_ATEXIT
600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
601 
602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
604   arm_cxx_determine_class_data_visibility
605 
606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
608 
609 #undef TARGET_RETURN_IN_MSB
610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
611 
612 #undef TARGET_RETURN_IN_MEMORY
613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
614 
615 #undef TARGET_MUST_PASS_IN_STACK
616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
617 
618 #if ARM_UNWIND_INFO
619 #undef TARGET_ASM_UNWIND_EMIT
620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
621 
622 /* EABI unwinding tables use a different format for the typeinfo tables.  */
623 #undef TARGET_ASM_TTYPE
624 #define TARGET_ASM_TTYPE arm_output_ttype
625 
626 #undef TARGET_ARM_EABI_UNWINDER
627 #define TARGET_ARM_EABI_UNWINDER true
628 
629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
631 
632 #endif /* ARM_UNWIND_INFO */
633 
634 #undef TARGET_ASM_INIT_SECTIONS
635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
636 
637 #undef TARGET_DWARF_REGISTER_SPAN
638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
639 
640 #undef  TARGET_CANNOT_COPY_INSN_P
641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
642 
643 #ifdef HAVE_AS_TLS
644 #undef TARGET_HAVE_TLS
645 #define TARGET_HAVE_TLS true
646 #endif
647 
648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
650 
651 #undef TARGET_LEGITIMATE_CONSTANT_P
652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
653 
654 #undef TARGET_CANNOT_FORCE_CONST_MEM
655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
656 
657 #undef TARGET_MAX_ANCHOR_OFFSET
658 #define TARGET_MAX_ANCHOR_OFFSET 4095
659 
660 /* The minimum is set such that the total size of the block
661    for a particular anchor is -4088 + 1 + 4095 bytes, which is
662    divisible by eight, ensuring natural spacing of anchors.  */
663 #undef TARGET_MIN_ANCHOR_OFFSET
664 #define TARGET_MIN_ANCHOR_OFFSET -4088
665 
666 #undef TARGET_SCHED_ISSUE_RATE
667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
668 
669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
671   arm_first_cycle_multipass_dfa_lookahead
672 
673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
675   arm_first_cycle_multipass_dfa_lookahead_guard
676 
677 #undef TARGET_MANGLE_TYPE
678 #define TARGET_MANGLE_TYPE arm_mangle_type
679 
680 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
681 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
682 
683 #undef TARGET_BUILD_BUILTIN_VA_LIST
684 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
685 #undef TARGET_EXPAND_BUILTIN_VA_START
686 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
688 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
689 
690 #ifdef HAVE_AS_TLS
691 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
692 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
693 #endif
694 
695 #undef TARGET_LEGITIMATE_ADDRESS_P
696 #define TARGET_LEGITIMATE_ADDRESS_P	arm_legitimate_address_p
697 
698 #undef TARGET_PREFERRED_RELOAD_CLASS
699 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
700 
701 #undef TARGET_PROMOTED_TYPE
702 #define TARGET_PROMOTED_TYPE arm_promoted_type
703 
704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
705 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
706 
707 #undef TARGET_COMPUTE_FRAME_LAYOUT
708 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
709 
710 #undef TARGET_FRAME_POINTER_REQUIRED
711 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
712 
713 #undef TARGET_CAN_ELIMINATE
714 #define TARGET_CAN_ELIMINATE arm_can_eliminate
715 
716 #undef TARGET_CONDITIONAL_REGISTER_USAGE
717 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
718 
719 #undef TARGET_CLASS_LIKELY_SPILLED_P
720 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
721 
722 #undef TARGET_VECTORIZE_BUILTINS
723 #define TARGET_VECTORIZE_BUILTINS
724 
725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
727   arm_builtin_vectorized_function
728 
729 #undef TARGET_VECTOR_ALIGNMENT
730 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
731 
732 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
733 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
734   arm_vector_alignment_reachable
735 
736 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
737 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
738   arm_builtin_support_vector_misalignment
739 
740 #undef TARGET_PREFERRED_RENAME_CLASS
741 #define TARGET_PREFERRED_RENAME_CLASS \
742   arm_preferred_rename_class
743 
744 #undef TARGET_VECTORIZE_VEC_PERM_CONST
745 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
746 
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
749   arm_builtin_vectorization_cost
750 #undef TARGET_VECTORIZE_ADD_STMT_COST
751 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
752 
753 #undef TARGET_CANONICALIZE_COMPARISON
754 #define TARGET_CANONICALIZE_COMPARISON \
755   arm_canonicalize_comparison
756 
757 #undef TARGET_ASAN_SHADOW_OFFSET
758 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
759 
760 #undef MAX_INSN_PER_IT_BLOCK
761 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
762 
763 #undef TARGET_CAN_USE_DOLOOP_P
764 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
765 
766 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
767 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
768 
769 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
770 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
771 
772 #undef TARGET_SCHED_FUSION_PRIORITY
773 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
774 
775 #undef  TARGET_ASM_FUNCTION_SECTION
776 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
777 
778 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
779 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
780 
781 #undef TARGET_SECTION_TYPE_FLAGS
782 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
783 
784 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
785 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
786 
787 #undef TARGET_C_EXCESS_PRECISION
788 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
789 
790 /* Although the architecture reserves bits 0 and 1, only the former is
791    used for ARM/Thumb ISA selection in v7 and earlier versions.  */
792 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
793 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
794 
795 #undef TARGET_FIXED_CONDITION_CODE_REGS
796 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
797 
798 #undef TARGET_HARD_REGNO_NREGS
799 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
800 #undef TARGET_HARD_REGNO_MODE_OK
801 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
802 
803 #undef TARGET_MODES_TIEABLE_P
804 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
805 
806 #undef TARGET_CAN_CHANGE_MODE_CLASS
807 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
808 
809 #undef TARGET_CONSTANT_ALIGNMENT
810 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
811 
812 /* Obstack for minipool constant handling.  */
813 static struct obstack minipool_obstack;
814 static char *         minipool_startobj;
815 
816 /* The maximum number of insns skipped which
817    will be conditionalised if possible.  */
818 static int max_insns_skipped = 5;
819 
820 extern FILE * asm_out_file;
821 
822 /* True if we are currently building a constant table.  */
823 int making_const_table;
824 
825 /* The processor for which instructions should be scheduled.  */
826 enum processor_type arm_tune = TARGET_CPU_arm_none;
827 
828 /* The current tuning set.  */
829 const struct tune_params *current_tune;
830 
831 /* Which floating point hardware to schedule for.  */
832 int arm_fpu_attr;
833 
834 /* Used for Thumb call_via trampolines.  */
835 rtx thumb_call_via_label[14];
836 static int thumb_call_reg_needed;
837 
838 /* The bits in this mask specify which instruction scheduling options should
839    be used.  */
840 unsigned int tune_flags = 0;
841 
842 /* The highest ARM architecture version supported by the
843    target.  */
844 enum base_architecture arm_base_arch = BASE_ARCH_0;
845 
846 /* Active target architecture and tuning.  */
847 
848 struct arm_build_target arm_active_target;
849 
850 /* The following are used in the arm.md file as equivalents to bits
851    in the above two flag variables.  */
852 
853 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
854 int arm_arch3m = 0;
855 
856 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
857 int arm_arch4 = 0;
858 
859 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
860 int arm_arch4t = 0;
861 
862 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
863 int arm_arch5 = 0;
864 
865 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
866 int arm_arch5e = 0;
867 
868 /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
869 int arm_arch5te = 0;
870 
871 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
872 int arm_arch6 = 0;
873 
874 /* Nonzero if this chip supports the ARM 6K extensions.  */
875 int arm_arch6k = 0;
876 
877 /* Nonzero if this chip supports the ARM 6KZ extensions.  */
878 int arm_arch6kz = 0;
879 
880 /* Nonzero if instructions present in ARMv6-M can be used.  */
881 int arm_arch6m = 0;
882 
883 /* Nonzero if this chip supports the ARM 7 extensions.  */
884 int arm_arch7 = 0;
885 
886 /* Nonzero if this chip supports the Large Physical Address Extension.  */
887 int arm_arch_lpae = 0;
888 
889 /* Nonzero if instructions not present in the 'M' profile can be used.  */
890 int arm_arch_notm = 0;
891 
892 /* Nonzero if instructions present in ARMv7E-M can be used.  */
893 int arm_arch7em = 0;
894 
895 /* Nonzero if instructions present in ARMv8 can be used.  */
896 int arm_arch8 = 0;
897 
898 /* Nonzero if this chip supports the ARMv8.1 extensions.  */
899 int arm_arch8_1 = 0;
900 
901 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
902 int arm_arch8_2 = 0;
903 
904 /* Nonzero if this chip supports the FP16 instructions extension of ARM
905    Architecture 8.2.  */
906 int arm_fp16_inst = 0;
907 
908 /* Nonzero if this chip can benefit from load scheduling.  */
909 int arm_ld_sched = 0;
910 
911 /* Nonzero if this chip is a StrongARM.  */
912 int arm_tune_strongarm = 0;
913 
914 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
915 int arm_arch_iwmmxt = 0;
916 
917 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
918 int arm_arch_iwmmxt2 = 0;
919 
920 /* Nonzero if this chip is an XScale.  */
921 int arm_arch_xscale = 0;
922 
923 /* Nonzero if tuning for XScale  */
924 int arm_tune_xscale = 0;
925 
926 /* Nonzero if we want to tune for stores that access the write-buffer.
927    This typically means an ARM6 or ARM7 with MMU or MPU.  */
928 int arm_tune_wbuf = 0;
929 
930 /* Nonzero if tuning for Cortex-A9.  */
931 int arm_tune_cortex_a9 = 0;
932 
933 /* Nonzero if we should define __THUMB_INTERWORK__ in the
934    preprocessor.
935    XXX This is a bit of a hack, it's intended to help work around
936    problems in GLD which doesn't understand that armv5t code is
937    interworking clean.  */
938 int arm_cpp_interwork = 0;
939 
940 /* Nonzero if chip supports Thumb 1.  */
941 int arm_arch_thumb1;
942 
943 /* Nonzero if chip supports Thumb 2.  */
944 int arm_arch_thumb2;
945 
946 /* Nonzero if chip supports integer division instruction.  */
947 int arm_arch_arm_hwdiv;
948 int arm_arch_thumb_hwdiv;
949 
950 /* Nonzero if chip disallows volatile memory access in IT block.  */
951 int arm_arch_no_volatile_ce;
952 
953 /* Nonzero if we should use Neon to handle 64-bits operations rather
954    than core registers.  */
955 int prefer_neon_for_64bits = 0;
956 
957 /* Nonzero if we shouldn't use literal pools.  */
958 bool arm_disable_literal_pool = false;
959 
960 /* The register number to be used for the PIC offset register.  */
961 unsigned arm_pic_register = INVALID_REGNUM;
962 
963 enum arm_pcs arm_pcs_default;
964 
965 /* For an explanation of these variables, see final_prescan_insn below.  */
966 int arm_ccfsm_state;
967 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
968 enum arm_cond_code arm_current_cc;
969 
970 rtx arm_target_insn;
971 int arm_target_label;
972 /* The number of conditionally executed insns, including the current insn.  */
973 int arm_condexec_count = 0;
974 /* A bitmask specifying the patterns for the IT block.
975    Zero means do not output an IT block before this insn. */
976 int arm_condexec_mask = 0;
977 /* The number of bits used in arm_condexec_mask.  */
978 int arm_condexec_masklen = 0;
979 
980 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
981 int arm_arch_crc = 0;
982 
983 /* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
984 int arm_arch_dotprod = 0;
985 
986 /* Nonzero if chip supports the ARMv8-M security extensions.  */
987 int arm_arch_cmse = 0;
988 
989 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
990 int arm_m_profile_small_mul = 0;
991 
992 /* The condition codes of the ARM, and the inverse function.  */
993 static const char * const arm_condition_codes[] =
994 {
995   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
996   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
997 };
998 
999 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
1000 int arm_regs_in_sequence[] =
1001 {
1002   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1003 };
1004 
1005 #define ARM_LSL_NAME "lsl"
1006 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1007 
1008 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1009 				   | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1010 				   | (1 << PIC_OFFSET_TABLE_REGNUM)))
1011 
1012 /* Initialization code.  */
1013 
1014 struct cpu_tune
1015 {
1016   enum processor_type scheduler;
1017   unsigned int tune_flags;
1018   const struct tune_params *tune;
1019 };
1020 
1021 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1022 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1023   {								\
1024     num_slots,							\
1025     l1_size,							\
1026     l1_line_size						\
1027   }
1028 
1029 /* arm generic vectorizer costs.  */
1030 static const
1031 struct cpu_vec_costs arm_default_vec_cost = {
1032   1,					/* scalar_stmt_cost.  */
1033   1,					/* scalar load_cost.  */
1034   1,					/* scalar_store_cost.  */
1035   1,					/* vec_stmt_cost.  */
1036   1,					/* vec_to_scalar_cost.  */
1037   1,					/* scalar_to_vec_cost.  */
1038   1,					/* vec_align_load_cost.  */
1039   1,					/* vec_unalign_load_cost.  */
1040   1,					/* vec_unalign_store_cost.  */
1041   1,					/* vec_store_cost.  */
1042   3,					/* cond_taken_branch_cost.  */
1043   1,					/* cond_not_taken_branch_cost.  */
1044 };
1045 
1046 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
1047 #include "aarch-cost-tables.h"
1048 
1049 
1050 
1051 const struct cpu_cost_table cortexa9_extra_costs =
1052 {
1053   /* ALU */
1054   {
1055     0,			/* arith.  */
1056     0,			/* logical.  */
1057     0,			/* shift.  */
1058     COSTS_N_INSNS (1),	/* shift_reg.  */
1059     COSTS_N_INSNS (1),	/* arith_shift.  */
1060     COSTS_N_INSNS (2),	/* arith_shift_reg.  */
1061     0,			/* log_shift.  */
1062     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1063     COSTS_N_INSNS (1),	/* extend.  */
1064     COSTS_N_INSNS (2),	/* extend_arith.  */
1065     COSTS_N_INSNS (1),	/* bfi.  */
1066     COSTS_N_INSNS (1),	/* bfx.  */
1067     0,			/* clz.  */
1068     0,			/* rev.  */
1069     0,			/* non_exec.  */
1070     true		/* non_exec_costs_exec.  */
1071   },
1072   {
1073     /* MULT SImode */
1074     {
1075       COSTS_N_INSNS (3),	/* simple.  */
1076       COSTS_N_INSNS (3),	/* flag_setting.  */
1077       COSTS_N_INSNS (2),	/* extend.  */
1078       COSTS_N_INSNS (3),	/* add.  */
1079       COSTS_N_INSNS (2),	/* extend_add.  */
1080       COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A9.  */
1081     },
1082     /* MULT DImode */
1083     {
1084       0,			/* simple (N/A).  */
1085       0,			/* flag_setting (N/A).  */
1086       COSTS_N_INSNS (4),	/* extend.  */
1087       0,			/* add (N/A).  */
1088       COSTS_N_INSNS (4),	/* extend_add.  */
1089       0				/* idiv (N/A).  */
1090     }
1091   },
1092   /* LD/ST */
1093   {
1094     COSTS_N_INSNS (2),	/* load.  */
1095     COSTS_N_INSNS (2),	/* load_sign_extend.  */
1096     COSTS_N_INSNS (2),	/* ldrd.  */
1097     COSTS_N_INSNS (2),	/* ldm_1st.  */
1098     1,			/* ldm_regs_per_insn_1st.  */
1099     2,			/* ldm_regs_per_insn_subsequent.  */
1100     COSTS_N_INSNS (5),	/* loadf.  */
1101     COSTS_N_INSNS (5),	/* loadd.  */
1102     COSTS_N_INSNS (1),  /* load_unaligned.  */
1103     COSTS_N_INSNS (2),	/* store.  */
1104     COSTS_N_INSNS (2),	/* strd.  */
1105     COSTS_N_INSNS (2),	/* stm_1st.  */
1106     1,			/* stm_regs_per_insn_1st.  */
1107     2,			/* stm_regs_per_insn_subsequent.  */
1108     COSTS_N_INSNS (1),	/* storef.  */
1109     COSTS_N_INSNS (1),	/* stored.  */
1110     COSTS_N_INSNS (1),	/* store_unaligned.  */
1111     COSTS_N_INSNS (1),	/* loadv.  */
1112     COSTS_N_INSNS (1)	/* storev.  */
1113   },
1114   {
1115     /* FP SFmode */
1116     {
1117       COSTS_N_INSNS (14),	/* div.  */
1118       COSTS_N_INSNS (4),	/* mult.  */
1119       COSTS_N_INSNS (7),	/* mult_addsub. */
1120       COSTS_N_INSNS (30),	/* fma.  */
1121       COSTS_N_INSNS (3),	/* addsub.  */
1122       COSTS_N_INSNS (1),	/* fpconst.  */
1123       COSTS_N_INSNS (1),	/* neg.  */
1124       COSTS_N_INSNS (3),	/* compare.  */
1125       COSTS_N_INSNS (3),	/* widen.  */
1126       COSTS_N_INSNS (3),	/* narrow.  */
1127       COSTS_N_INSNS (3),	/* toint.  */
1128       COSTS_N_INSNS (3),	/* fromint.  */
1129       COSTS_N_INSNS (3)		/* roundint.  */
1130     },
1131     /* FP DFmode */
1132     {
1133       COSTS_N_INSNS (24),	/* div.  */
1134       COSTS_N_INSNS (5),	/* mult.  */
1135       COSTS_N_INSNS (8),	/* mult_addsub.  */
1136       COSTS_N_INSNS (30),	/* fma.  */
1137       COSTS_N_INSNS (3),	/* addsub.  */
1138       COSTS_N_INSNS (1),	/* fpconst.  */
1139       COSTS_N_INSNS (1),	/* neg.  */
1140       COSTS_N_INSNS (3),	/* compare.  */
1141       COSTS_N_INSNS (3),	/* widen.  */
1142       COSTS_N_INSNS (3),	/* narrow.  */
1143       COSTS_N_INSNS (3),	/* toint.  */
1144       COSTS_N_INSNS (3),	/* fromint.  */
1145       COSTS_N_INSNS (3)		/* roundint.  */
1146     }
1147   },
1148   /* Vector */
1149   {
1150     COSTS_N_INSNS (1)	/* alu.  */
1151   }
1152 };
1153 
1154 const struct cpu_cost_table cortexa8_extra_costs =
1155 {
1156   /* ALU */
1157   {
1158     0,			/* arith.  */
1159     0,			/* logical.  */
1160     COSTS_N_INSNS (1),	/* shift.  */
1161     0,			/* shift_reg.  */
1162     COSTS_N_INSNS (1),	/* arith_shift.  */
1163     0,			/* arith_shift_reg.  */
1164     COSTS_N_INSNS (1),	/* log_shift.  */
1165     0,			/* log_shift_reg.  */
1166     0,			/* extend.  */
1167     0,			/* extend_arith.  */
1168     0,			/* bfi.  */
1169     0,			/* bfx.  */
1170     0,			/* clz.  */
1171     0,			/* rev.  */
1172     0,			/* non_exec.  */
1173     true		/* non_exec_costs_exec.  */
1174   },
1175   {
1176     /* MULT SImode */
1177     {
1178       COSTS_N_INSNS (1),	/* simple.  */
1179       COSTS_N_INSNS (1),	/* flag_setting.  */
1180       COSTS_N_INSNS (1),	/* extend.  */
1181       COSTS_N_INSNS (1),	/* add.  */
1182       COSTS_N_INSNS (1),	/* extend_add.  */
1183       COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A8.  */
1184     },
1185     /* MULT DImode */
1186     {
1187       0,			/* simple (N/A).  */
1188       0,			/* flag_setting (N/A).  */
1189       COSTS_N_INSNS (2),	/* extend.  */
1190       0,			/* add (N/A).  */
1191       COSTS_N_INSNS (2),	/* extend_add.  */
1192       0				/* idiv (N/A).  */
1193     }
1194   },
1195   /* LD/ST */
1196   {
1197     COSTS_N_INSNS (1),	/* load.  */
1198     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1199     COSTS_N_INSNS (1),	/* ldrd.  */
1200     COSTS_N_INSNS (1),	/* ldm_1st.  */
1201     1,			/* ldm_regs_per_insn_1st.  */
1202     2,			/* ldm_regs_per_insn_subsequent.  */
1203     COSTS_N_INSNS (1),	/* loadf.  */
1204     COSTS_N_INSNS (1),	/* loadd.  */
1205     COSTS_N_INSNS (1),  /* load_unaligned.  */
1206     COSTS_N_INSNS (1),	/* store.  */
1207     COSTS_N_INSNS (1),	/* strd.  */
1208     COSTS_N_INSNS (1),	/* stm_1st.  */
1209     1,			/* stm_regs_per_insn_1st.  */
1210     2,			/* stm_regs_per_insn_subsequent.  */
1211     COSTS_N_INSNS (1),	/* storef.  */
1212     COSTS_N_INSNS (1),	/* stored.  */
1213     COSTS_N_INSNS (1),	/* store_unaligned.  */
1214     COSTS_N_INSNS (1),	/* loadv.  */
1215     COSTS_N_INSNS (1)	/* storev.  */
1216   },
1217   {
1218     /* FP SFmode */
1219     {
1220       COSTS_N_INSNS (36),	/* div.  */
1221       COSTS_N_INSNS (11),	/* mult.  */
1222       COSTS_N_INSNS (20),	/* mult_addsub. */
1223       COSTS_N_INSNS (30),	/* fma.  */
1224       COSTS_N_INSNS (9),	/* addsub.  */
1225       COSTS_N_INSNS (3),	/* fpconst.  */
1226       COSTS_N_INSNS (3),	/* neg.  */
1227       COSTS_N_INSNS (6),	/* compare.  */
1228       COSTS_N_INSNS (4),	/* widen.  */
1229       COSTS_N_INSNS (4),	/* narrow.  */
1230       COSTS_N_INSNS (8),	/* toint.  */
1231       COSTS_N_INSNS (8),	/* fromint.  */
1232       COSTS_N_INSNS (8)		/* roundint.  */
1233     },
1234     /* FP DFmode */
1235     {
1236       COSTS_N_INSNS (64),	/* div.  */
1237       COSTS_N_INSNS (16),	/* mult.  */
1238       COSTS_N_INSNS (25),	/* mult_addsub.  */
1239       COSTS_N_INSNS (30),	/* fma.  */
1240       COSTS_N_INSNS (9),	/* addsub.  */
1241       COSTS_N_INSNS (3),	/* fpconst.  */
1242       COSTS_N_INSNS (3),	/* neg.  */
1243       COSTS_N_INSNS (6),	/* compare.  */
1244       COSTS_N_INSNS (6),	/* widen.  */
1245       COSTS_N_INSNS (6),	/* narrow.  */
1246       COSTS_N_INSNS (8),	/* toint.  */
1247       COSTS_N_INSNS (8),	/* fromint.  */
1248       COSTS_N_INSNS (8)		/* roundint.  */
1249     }
1250   },
1251   /* Vector */
1252   {
1253     COSTS_N_INSNS (1)	/* alu.  */
1254   }
1255 };
1256 
1257 const struct cpu_cost_table cortexa5_extra_costs =
1258 {
1259   /* ALU */
1260   {
1261     0,			/* arith.  */
1262     0,			/* logical.  */
1263     COSTS_N_INSNS (1),	/* shift.  */
1264     COSTS_N_INSNS (1),	/* shift_reg.  */
1265     COSTS_N_INSNS (1),	/* arith_shift.  */
1266     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1267     COSTS_N_INSNS (1),	/* log_shift.  */
1268     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1269     COSTS_N_INSNS (1),	/* extend.  */
1270     COSTS_N_INSNS (1),	/* extend_arith.  */
1271     COSTS_N_INSNS (1),	/* bfi.  */
1272     COSTS_N_INSNS (1),	/* bfx.  */
1273     COSTS_N_INSNS (1),	/* clz.  */
1274     COSTS_N_INSNS (1),	/* rev.  */
1275     0,			/* non_exec.  */
1276     true		/* non_exec_costs_exec.  */
1277   },
1278 
1279   {
1280     /* MULT SImode */
1281     {
1282       0,			/* simple.  */
1283       COSTS_N_INSNS (1),	/* flag_setting.  */
1284       COSTS_N_INSNS (1),	/* extend.  */
1285       COSTS_N_INSNS (1),	/* add.  */
1286       COSTS_N_INSNS (1),	/* extend_add.  */
1287       COSTS_N_INSNS (7)		/* idiv.  */
1288     },
1289     /* MULT DImode */
1290     {
1291       0,			/* simple (N/A).  */
1292       0,			/* flag_setting (N/A).  */
1293       COSTS_N_INSNS (1),	/* extend.  */
1294       0,			/* add.  */
1295       COSTS_N_INSNS (2),	/* extend_add.  */
1296       0				/* idiv (N/A).  */
1297     }
1298   },
1299   /* LD/ST */
1300   {
1301     COSTS_N_INSNS (1),	/* load.  */
1302     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1303     COSTS_N_INSNS (6),	/* ldrd.  */
1304     COSTS_N_INSNS (1),	/* ldm_1st.  */
1305     1,			/* ldm_regs_per_insn_1st.  */
1306     2,			/* ldm_regs_per_insn_subsequent.  */
1307     COSTS_N_INSNS (2),	/* loadf.  */
1308     COSTS_N_INSNS (4),	/* loadd.  */
1309     COSTS_N_INSNS (1),	/* load_unaligned.  */
1310     COSTS_N_INSNS (1),	/* store.  */
1311     COSTS_N_INSNS (3),	/* strd.  */
1312     COSTS_N_INSNS (1),	/* stm_1st.  */
1313     1,			/* stm_regs_per_insn_1st.  */
1314     2,			/* stm_regs_per_insn_subsequent.  */
1315     COSTS_N_INSNS (2),	/* storef.  */
1316     COSTS_N_INSNS (2),	/* stored.  */
1317     COSTS_N_INSNS (1),	/* store_unaligned.  */
1318     COSTS_N_INSNS (1),	/* loadv.  */
1319     COSTS_N_INSNS (1)	/* storev.  */
1320   },
1321   {
1322     /* FP SFmode */
1323     {
1324       COSTS_N_INSNS (15),	/* div.  */
1325       COSTS_N_INSNS (3),	/* mult.  */
1326       COSTS_N_INSNS (7),	/* mult_addsub. */
1327       COSTS_N_INSNS (7),	/* fma.  */
1328       COSTS_N_INSNS (3),	/* addsub.  */
1329       COSTS_N_INSNS (3),	/* fpconst.  */
1330       COSTS_N_INSNS (3),	/* neg.  */
1331       COSTS_N_INSNS (3),	/* compare.  */
1332       COSTS_N_INSNS (3),	/* widen.  */
1333       COSTS_N_INSNS (3),	/* narrow.  */
1334       COSTS_N_INSNS (3),	/* toint.  */
1335       COSTS_N_INSNS (3),	/* fromint.  */
1336       COSTS_N_INSNS (3)		/* roundint.  */
1337     },
1338     /* FP DFmode */
1339     {
1340       COSTS_N_INSNS (30),	/* div.  */
1341       COSTS_N_INSNS (6),	/* mult.  */
1342       COSTS_N_INSNS (10),	/* mult_addsub.  */
1343       COSTS_N_INSNS (7),	/* fma.  */
1344       COSTS_N_INSNS (3),	/* addsub.  */
1345       COSTS_N_INSNS (3),	/* fpconst.  */
1346       COSTS_N_INSNS (3),	/* neg.  */
1347       COSTS_N_INSNS (3),	/* compare.  */
1348       COSTS_N_INSNS (3),	/* widen.  */
1349       COSTS_N_INSNS (3),	/* narrow.  */
1350       COSTS_N_INSNS (3),	/* toint.  */
1351       COSTS_N_INSNS (3),	/* fromint.  */
1352       COSTS_N_INSNS (3)		/* roundint.  */
1353     }
1354   },
1355   /* Vector */
1356   {
1357     COSTS_N_INSNS (1)	/* alu.  */
1358   }
1359 };
1360 
1361 
1362 const struct cpu_cost_table cortexa7_extra_costs =
1363 {
1364   /* ALU */
1365   {
1366     0,			/* arith.  */
1367     0,			/* logical.  */
1368     COSTS_N_INSNS (1),	/* shift.  */
1369     COSTS_N_INSNS (1),	/* shift_reg.  */
1370     COSTS_N_INSNS (1),	/* arith_shift.  */
1371     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1372     COSTS_N_INSNS (1),	/* log_shift.  */
1373     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1374     COSTS_N_INSNS (1),	/* extend.  */
1375     COSTS_N_INSNS (1),	/* extend_arith.  */
1376     COSTS_N_INSNS (1),	/* bfi.  */
1377     COSTS_N_INSNS (1),	/* bfx.  */
1378     COSTS_N_INSNS (1),	/* clz.  */
1379     COSTS_N_INSNS (1),	/* rev.  */
1380     0,			/* non_exec.  */
1381     true		/* non_exec_costs_exec.  */
1382   },
1383 
1384   {
1385     /* MULT SImode */
1386     {
1387       0,			/* simple.  */
1388       COSTS_N_INSNS (1),	/* flag_setting.  */
1389       COSTS_N_INSNS (1),	/* extend.  */
1390       COSTS_N_INSNS (1),	/* add.  */
1391       COSTS_N_INSNS (1),	/* extend_add.  */
1392       COSTS_N_INSNS (7)		/* idiv.  */
1393     },
1394     /* MULT DImode */
1395     {
1396       0,			/* simple (N/A).  */
1397       0,			/* flag_setting (N/A).  */
1398       COSTS_N_INSNS (1),	/* extend.  */
1399       0,			/* add.  */
1400       COSTS_N_INSNS (2),	/* extend_add.  */
1401       0				/* idiv (N/A).  */
1402     }
1403   },
1404   /* LD/ST */
1405   {
1406     COSTS_N_INSNS (1),	/* load.  */
1407     COSTS_N_INSNS (1),	/* load_sign_extend.  */
1408     COSTS_N_INSNS (3),	/* ldrd.  */
1409     COSTS_N_INSNS (1),	/* ldm_1st.  */
1410     1,			/* ldm_regs_per_insn_1st.  */
1411     2,			/* ldm_regs_per_insn_subsequent.  */
1412     COSTS_N_INSNS (2),	/* loadf.  */
1413     COSTS_N_INSNS (2),	/* loadd.  */
1414     COSTS_N_INSNS (1),	/* load_unaligned.  */
1415     COSTS_N_INSNS (1),	/* store.  */
1416     COSTS_N_INSNS (3),	/* strd.  */
1417     COSTS_N_INSNS (1),	/* stm_1st.  */
1418     1,			/* stm_regs_per_insn_1st.  */
1419     2,			/* stm_regs_per_insn_subsequent.  */
1420     COSTS_N_INSNS (2),	/* storef.  */
1421     COSTS_N_INSNS (2),	/* stored.  */
1422     COSTS_N_INSNS (1),	/* store_unaligned.  */
1423     COSTS_N_INSNS (1),	/* loadv.  */
1424     COSTS_N_INSNS (1)	/* storev.  */
1425   },
1426   {
1427     /* FP SFmode */
1428     {
1429       COSTS_N_INSNS (15),	/* div.  */
1430       COSTS_N_INSNS (3),	/* mult.  */
1431       COSTS_N_INSNS (7),	/* mult_addsub. */
1432       COSTS_N_INSNS (7),	/* fma.  */
1433       COSTS_N_INSNS (3),	/* addsub.  */
1434       COSTS_N_INSNS (3),	/* fpconst.  */
1435       COSTS_N_INSNS (3),	/* neg.  */
1436       COSTS_N_INSNS (3),	/* compare.  */
1437       COSTS_N_INSNS (3),	/* widen.  */
1438       COSTS_N_INSNS (3),	/* narrow.  */
1439       COSTS_N_INSNS (3),	/* toint.  */
1440       COSTS_N_INSNS (3),	/* fromint.  */
1441       COSTS_N_INSNS (3)		/* roundint.  */
1442     },
1443     /* FP DFmode */
1444     {
1445       COSTS_N_INSNS (30),	/* div.  */
1446       COSTS_N_INSNS (6),	/* mult.  */
1447       COSTS_N_INSNS (10),	/* mult_addsub.  */
1448       COSTS_N_INSNS (7),	/* fma.  */
1449       COSTS_N_INSNS (3),	/* addsub.  */
1450       COSTS_N_INSNS (3),	/* fpconst.  */
1451       COSTS_N_INSNS (3),	/* neg.  */
1452       COSTS_N_INSNS (3),	/* compare.  */
1453       COSTS_N_INSNS (3),	/* widen.  */
1454       COSTS_N_INSNS (3),	/* narrow.  */
1455       COSTS_N_INSNS (3),	/* toint.  */
1456       COSTS_N_INSNS (3),	/* fromint.  */
1457       COSTS_N_INSNS (3)		/* roundint.  */
1458     }
1459   },
1460   /* Vector */
1461   {
1462     COSTS_N_INSNS (1)	/* alu.  */
1463   }
1464 };
1465 
1466 const struct cpu_cost_table cortexa12_extra_costs =
1467 {
1468   /* ALU */
1469   {
1470     0,			/* arith.  */
1471     0,			/* logical.  */
1472     0,			/* shift.  */
1473     COSTS_N_INSNS (1),	/* shift_reg.  */
1474     COSTS_N_INSNS (1),	/* arith_shift.  */
1475     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1476     COSTS_N_INSNS (1),	/* log_shift.  */
1477     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1478     0,			/* extend.  */
1479     COSTS_N_INSNS (1),	/* extend_arith.  */
1480     0,			/* bfi.  */
1481     COSTS_N_INSNS (1),	/* bfx.  */
1482     COSTS_N_INSNS (1),	/* clz.  */
1483     COSTS_N_INSNS (1),	/* rev.  */
1484     0,			/* non_exec.  */
1485     true		/* non_exec_costs_exec.  */
1486   },
1487   /* MULT SImode */
1488   {
1489     {
1490       COSTS_N_INSNS (2),	/* simple.  */
1491       COSTS_N_INSNS (3),	/* flag_setting.  */
1492       COSTS_N_INSNS (2),	/* extend.  */
1493       COSTS_N_INSNS (3),	/* add.  */
1494       COSTS_N_INSNS (2),	/* extend_add.  */
1495       COSTS_N_INSNS (18)	/* idiv.  */
1496     },
1497     /* MULT DImode */
1498     {
1499       0,			/* simple (N/A).  */
1500       0,			/* flag_setting (N/A).  */
1501       COSTS_N_INSNS (3),	/* extend.  */
1502       0,			/* add (N/A).  */
1503       COSTS_N_INSNS (3),	/* extend_add.  */
1504       0				/* idiv (N/A).  */
1505     }
1506   },
1507   /* LD/ST */
1508   {
1509     COSTS_N_INSNS (3),	/* load.  */
1510     COSTS_N_INSNS (3),	/* load_sign_extend.  */
1511     COSTS_N_INSNS (3),	/* ldrd.  */
1512     COSTS_N_INSNS (3),	/* ldm_1st.  */
1513     1,			/* ldm_regs_per_insn_1st.  */
1514     2,			/* ldm_regs_per_insn_subsequent.  */
1515     COSTS_N_INSNS (3),	/* loadf.  */
1516     COSTS_N_INSNS (3),	/* loadd.  */
1517     0,			/* load_unaligned.  */
1518     0,			/* store.  */
1519     0,			/* strd.  */
1520     0,			/* stm_1st.  */
1521     1,			/* stm_regs_per_insn_1st.  */
1522     2,			/* stm_regs_per_insn_subsequent.  */
1523     COSTS_N_INSNS (2),	/* storef.  */
1524     COSTS_N_INSNS (2),	/* stored.  */
1525     0,			/* store_unaligned.  */
1526     COSTS_N_INSNS (1),	/* loadv.  */
1527     COSTS_N_INSNS (1)	/* storev.  */
1528   },
1529   {
1530     /* FP SFmode */
1531     {
1532       COSTS_N_INSNS (17),	/* div.  */
1533       COSTS_N_INSNS (4),	/* mult.  */
1534       COSTS_N_INSNS (8),	/* mult_addsub. */
1535       COSTS_N_INSNS (8),	/* fma.  */
1536       COSTS_N_INSNS (4),	/* addsub.  */
1537       COSTS_N_INSNS (2),	/* fpconst. */
1538       COSTS_N_INSNS (2),	/* neg.  */
1539       COSTS_N_INSNS (2),	/* compare.  */
1540       COSTS_N_INSNS (4),	/* widen.  */
1541       COSTS_N_INSNS (4),	/* narrow.  */
1542       COSTS_N_INSNS (4),	/* toint.  */
1543       COSTS_N_INSNS (4),	/* fromint.  */
1544       COSTS_N_INSNS (4)		/* roundint.  */
1545     },
1546     /* FP DFmode */
1547     {
1548       COSTS_N_INSNS (31),	/* div.  */
1549       COSTS_N_INSNS (4),	/* mult.  */
1550       COSTS_N_INSNS (8),	/* mult_addsub.  */
1551       COSTS_N_INSNS (8),	/* fma.  */
1552       COSTS_N_INSNS (4),	/* addsub.  */
1553       COSTS_N_INSNS (2),	/* fpconst.  */
1554       COSTS_N_INSNS (2),	/* neg.  */
1555       COSTS_N_INSNS (2),	/* compare.  */
1556       COSTS_N_INSNS (4),	/* widen.  */
1557       COSTS_N_INSNS (4),	/* narrow.  */
1558       COSTS_N_INSNS (4),	/* toint.  */
1559       COSTS_N_INSNS (4),	/* fromint.  */
1560       COSTS_N_INSNS (4)		/* roundint.  */
1561     }
1562   },
1563   /* Vector */
1564   {
1565     COSTS_N_INSNS (1)	/* alu.  */
1566   }
1567 };
1568 
1569 const struct cpu_cost_table cortexa15_extra_costs =
1570 {
1571   /* ALU */
1572   {
1573     0,			/* arith.  */
1574     0,			/* logical.  */
1575     0,			/* shift.  */
1576     0,			/* shift_reg.  */
1577     COSTS_N_INSNS (1),	/* arith_shift.  */
1578     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1579     COSTS_N_INSNS (1),	/* log_shift.  */
1580     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1581     0,			/* extend.  */
1582     COSTS_N_INSNS (1),	/* extend_arith.  */
1583     COSTS_N_INSNS (1),	/* bfi.  */
1584     0,			/* bfx.  */
1585     0,			/* clz.  */
1586     0,			/* rev.  */
1587     0,			/* non_exec.  */
1588     true		/* non_exec_costs_exec.  */
1589   },
1590   /* MULT SImode */
1591   {
1592     {
1593       COSTS_N_INSNS (2),	/* simple.  */
1594       COSTS_N_INSNS (3),	/* flag_setting.  */
1595       COSTS_N_INSNS (2),	/* extend.  */
1596       COSTS_N_INSNS (2),	/* add.  */
1597       COSTS_N_INSNS (2),	/* extend_add.  */
1598       COSTS_N_INSNS (18)	/* idiv.  */
1599     },
1600     /* MULT DImode */
1601     {
1602       0,			/* simple (N/A).  */
1603       0,			/* flag_setting (N/A).  */
1604       COSTS_N_INSNS (3),	/* extend.  */
1605       0,			/* add (N/A).  */
1606       COSTS_N_INSNS (3),	/* extend_add.  */
1607       0				/* idiv (N/A).  */
1608     }
1609   },
1610   /* LD/ST */
1611   {
1612     COSTS_N_INSNS (3),	/* load.  */
1613     COSTS_N_INSNS (3),	/* load_sign_extend.  */
1614     COSTS_N_INSNS (3),	/* ldrd.  */
1615     COSTS_N_INSNS (4),	/* ldm_1st.  */
1616     1,			/* ldm_regs_per_insn_1st.  */
1617     2,			/* ldm_regs_per_insn_subsequent.  */
1618     COSTS_N_INSNS (4),	/* loadf.  */
1619     COSTS_N_INSNS (4),	/* loadd.  */
1620     0,			/* load_unaligned.  */
1621     0,			/* store.  */
1622     0,			/* strd.  */
1623     COSTS_N_INSNS (1),	/* stm_1st.  */
1624     1,			/* stm_regs_per_insn_1st.  */
1625     2,			/* stm_regs_per_insn_subsequent.  */
1626     0,			/* storef.  */
1627     0,			/* stored.  */
1628     0,			/* store_unaligned.  */
1629     COSTS_N_INSNS (1),	/* loadv.  */
1630     COSTS_N_INSNS (1)	/* storev.  */
1631   },
1632   {
1633     /* FP SFmode */
1634     {
1635       COSTS_N_INSNS (17),	/* div.  */
1636       COSTS_N_INSNS (4),	/* mult.  */
1637       COSTS_N_INSNS (8),	/* mult_addsub. */
1638       COSTS_N_INSNS (8),	/* fma.  */
1639       COSTS_N_INSNS (4),	/* addsub.  */
1640       COSTS_N_INSNS (2),	/* fpconst. */
1641       COSTS_N_INSNS (2),	/* neg.  */
1642       COSTS_N_INSNS (5),	/* compare.  */
1643       COSTS_N_INSNS (4),	/* widen.  */
1644       COSTS_N_INSNS (4),	/* narrow.  */
1645       COSTS_N_INSNS (4),	/* toint.  */
1646       COSTS_N_INSNS (4),	/* fromint.  */
1647       COSTS_N_INSNS (4)		/* roundint.  */
1648     },
1649     /* FP DFmode */
1650     {
1651       COSTS_N_INSNS (31),	/* div.  */
1652       COSTS_N_INSNS (4),	/* mult.  */
1653       COSTS_N_INSNS (8),	/* mult_addsub.  */
1654       COSTS_N_INSNS (8),	/* fma.  */
1655       COSTS_N_INSNS (4),	/* addsub.  */
1656       COSTS_N_INSNS (2),	/* fpconst.  */
1657       COSTS_N_INSNS (2),	/* neg.  */
1658       COSTS_N_INSNS (2),	/* compare.  */
1659       COSTS_N_INSNS (4),	/* widen.  */
1660       COSTS_N_INSNS (4),	/* narrow.  */
1661       COSTS_N_INSNS (4),	/* toint.  */
1662       COSTS_N_INSNS (4),	/* fromint.  */
1663       COSTS_N_INSNS (4)		/* roundint.  */
1664     }
1665   },
1666   /* Vector */
1667   {
1668     COSTS_N_INSNS (1)	/* alu.  */
1669   }
1670 };
1671 
1672 const struct cpu_cost_table v7m_extra_costs =
1673 {
1674   /* ALU */
1675   {
1676     0,			/* arith.  */
1677     0,			/* logical.  */
1678     0,			/* shift.  */
1679     0,			/* shift_reg.  */
1680     0,			/* arith_shift.  */
1681     COSTS_N_INSNS (1),	/* arith_shift_reg.  */
1682     0,			/* log_shift.  */
1683     COSTS_N_INSNS (1),	/* log_shift_reg.  */
1684     0,			/* extend.  */
1685     COSTS_N_INSNS (1),	/* extend_arith.  */
1686     0,			/* bfi.  */
1687     0,			/* bfx.  */
1688     0,			/* clz.  */
1689     0,			/* rev.  */
1690     COSTS_N_INSNS (1),	/* non_exec.  */
1691     false		/* non_exec_costs_exec.  */
1692   },
1693   {
1694     /* MULT SImode */
1695     {
1696       COSTS_N_INSNS (1),	/* simple.  */
1697       COSTS_N_INSNS (1),	/* flag_setting.  */
1698       COSTS_N_INSNS (2),	/* extend.  */
1699       COSTS_N_INSNS (1),	/* add.  */
1700       COSTS_N_INSNS (3),	/* extend_add.  */
1701       COSTS_N_INSNS (8)		/* idiv.  */
1702     },
1703     /* MULT DImode */
1704     {
1705       0,			/* simple (N/A).  */
1706       0,			/* flag_setting (N/A).  */
1707       COSTS_N_INSNS (2),	/* extend.  */
1708       0,			/* add (N/A).  */
1709       COSTS_N_INSNS (3),	/* extend_add.  */
1710       0				/* idiv (N/A).  */
1711     }
1712   },
1713   /* LD/ST */
1714   {
1715     COSTS_N_INSNS (2),	/* load.  */
1716     0,			/* load_sign_extend.  */
1717     COSTS_N_INSNS (3),	/* ldrd.  */
1718     COSTS_N_INSNS (2),	/* ldm_1st.  */
1719     1,			/* ldm_regs_per_insn_1st.  */
1720     1,			/* ldm_regs_per_insn_subsequent.  */
1721     COSTS_N_INSNS (2),	/* loadf.  */
1722     COSTS_N_INSNS (3),	/* loadd.  */
1723     COSTS_N_INSNS (1),  /* load_unaligned.  */
1724     COSTS_N_INSNS (2),	/* store.  */
1725     COSTS_N_INSNS (3),	/* strd.  */
1726     COSTS_N_INSNS (2),	/* stm_1st.  */
1727     1,			/* stm_regs_per_insn_1st.  */
1728     1,			/* stm_regs_per_insn_subsequent.  */
1729     COSTS_N_INSNS (2),	/* storef.  */
1730     COSTS_N_INSNS (3),	/* stored.  */
1731     COSTS_N_INSNS (1),	/* store_unaligned.  */
1732     COSTS_N_INSNS (1),	/* loadv.  */
1733     COSTS_N_INSNS (1)	/* storev.  */
1734   },
1735   {
1736     /* FP SFmode */
1737     {
1738       COSTS_N_INSNS (7),	/* div.  */
1739       COSTS_N_INSNS (2),	/* mult.  */
1740       COSTS_N_INSNS (5),	/* mult_addsub.  */
1741       COSTS_N_INSNS (3),	/* fma.  */
1742       COSTS_N_INSNS (1),	/* addsub.  */
1743       0,			/* fpconst.  */
1744       0,			/* neg.  */
1745       0,			/* compare.  */
1746       0,			/* widen.  */
1747       0,			/* narrow.  */
1748       0,			/* toint.  */
1749       0,			/* fromint.  */
1750       0				/* roundint.  */
1751     },
1752     /* FP DFmode */
1753     {
1754       COSTS_N_INSNS (15),	/* div.  */
1755       COSTS_N_INSNS (5),	/* mult.  */
1756       COSTS_N_INSNS (7),	/* mult_addsub.  */
1757       COSTS_N_INSNS (7),	/* fma.  */
1758       COSTS_N_INSNS (3),	/* addsub.  */
1759       0,			/* fpconst.  */
1760       0,			/* neg.  */
1761       0,			/* compare.  */
1762       0,			/* widen.  */
1763       0,			/* narrow.  */
1764       0,			/* toint.  */
1765       0,			/* fromint.  */
1766       0				/* roundint.  */
1767     }
1768   },
1769   /* Vector */
1770   {
1771     COSTS_N_INSNS (1)	/* alu.  */
1772   }
1773 };
1774 
1775 const struct addr_mode_cost_table generic_addr_mode_costs =
1776 {
1777   /* int.  */
1778   {
1779     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1780     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1781     COSTS_N_INSNS (0)	/* AMO_WB.  */
1782   },
1783   /* float.  */
1784   {
1785     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1786     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1787     COSTS_N_INSNS (0)	/* AMO_WB.  */
1788   },
1789   /* vector.  */
1790   {
1791     COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
1792     COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
1793     COSTS_N_INSNS (0)	/* AMO_WB.  */
1794   }
1795 };
1796 
1797 const struct tune_params arm_slowmul_tune =
1798 {
1799   &generic_extra_costs,			/* Insn extra costs.  */
1800   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1801   NULL,					/* Sched adj cost.  */
1802   arm_default_branch_cost,
1803   &arm_default_vec_cost,
1804   3,						/* Constant limit.  */
1805   5,						/* Max cond insns.  */
1806   8,						/* Memset max inline.  */
1807   1,						/* Issue rate.  */
1808   ARM_PREFETCH_NOT_BENEFICIAL,
1809   tune_params::PREF_CONST_POOL_TRUE,
1810   tune_params::PREF_LDRD_FALSE,
1811   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1812   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1813   tune_params::DISPARAGE_FLAGS_NEITHER,
1814   tune_params::PREF_NEON_64_FALSE,
1815   tune_params::PREF_NEON_STRINGOPS_FALSE,
1816   tune_params::FUSE_NOTHING,
1817   tune_params::SCHED_AUTOPREF_OFF
1818 };
1819 
1820 const struct tune_params arm_fastmul_tune =
1821 {
1822   &generic_extra_costs,			/* Insn extra costs.  */
1823   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1824   NULL,					/* Sched adj cost.  */
1825   arm_default_branch_cost,
1826   &arm_default_vec_cost,
1827   1,						/* Constant limit.  */
1828   5,						/* Max cond insns.  */
1829   8,						/* Memset max inline.  */
1830   1,						/* Issue rate.  */
1831   ARM_PREFETCH_NOT_BENEFICIAL,
1832   tune_params::PREF_CONST_POOL_TRUE,
1833   tune_params::PREF_LDRD_FALSE,
1834   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1835   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1836   tune_params::DISPARAGE_FLAGS_NEITHER,
1837   tune_params::PREF_NEON_64_FALSE,
1838   tune_params::PREF_NEON_STRINGOPS_FALSE,
1839   tune_params::FUSE_NOTHING,
1840   tune_params::SCHED_AUTOPREF_OFF
1841 };
1842 
1843 /* StrongARM has early execution of branches, so a sequence that is worth
1844    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1845 
1846 const struct tune_params arm_strongarm_tune =
1847 {
1848   &generic_extra_costs,			/* Insn extra costs.  */
1849   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1850   NULL,					/* Sched adj cost.  */
1851   arm_default_branch_cost,
1852   &arm_default_vec_cost,
1853   1,						/* Constant limit.  */
1854   3,						/* Max cond insns.  */
1855   8,						/* Memset max inline.  */
1856   1,						/* Issue rate.  */
1857   ARM_PREFETCH_NOT_BENEFICIAL,
1858   tune_params::PREF_CONST_POOL_TRUE,
1859   tune_params::PREF_LDRD_FALSE,
1860   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1861   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1862   tune_params::DISPARAGE_FLAGS_NEITHER,
1863   tune_params::PREF_NEON_64_FALSE,
1864   tune_params::PREF_NEON_STRINGOPS_FALSE,
1865   tune_params::FUSE_NOTHING,
1866   tune_params::SCHED_AUTOPREF_OFF
1867 };
1868 
1869 const struct tune_params arm_xscale_tune =
1870 {
1871   &generic_extra_costs,			/* Insn extra costs.  */
1872   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1873   xscale_sched_adjust_cost,
1874   arm_default_branch_cost,
1875   &arm_default_vec_cost,
1876   2,						/* Constant limit.  */
1877   3,						/* Max cond insns.  */
1878   8,						/* Memset max inline.  */
1879   1,						/* Issue rate.  */
1880   ARM_PREFETCH_NOT_BENEFICIAL,
1881   tune_params::PREF_CONST_POOL_TRUE,
1882   tune_params::PREF_LDRD_FALSE,
1883   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1884   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1885   tune_params::DISPARAGE_FLAGS_NEITHER,
1886   tune_params::PREF_NEON_64_FALSE,
1887   tune_params::PREF_NEON_STRINGOPS_FALSE,
1888   tune_params::FUSE_NOTHING,
1889   tune_params::SCHED_AUTOPREF_OFF
1890 };
1891 
1892 const struct tune_params arm_9e_tune =
1893 {
1894   &generic_extra_costs,			/* Insn extra costs.  */
1895   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1896   NULL,					/* Sched adj cost.  */
1897   arm_default_branch_cost,
1898   &arm_default_vec_cost,
1899   1,						/* Constant limit.  */
1900   5,						/* Max cond insns.  */
1901   8,						/* Memset max inline.  */
1902   1,						/* Issue rate.  */
1903   ARM_PREFETCH_NOT_BENEFICIAL,
1904   tune_params::PREF_CONST_POOL_TRUE,
1905   tune_params::PREF_LDRD_FALSE,
1906   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1907   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1908   tune_params::DISPARAGE_FLAGS_NEITHER,
1909   tune_params::PREF_NEON_64_FALSE,
1910   tune_params::PREF_NEON_STRINGOPS_FALSE,
1911   tune_params::FUSE_NOTHING,
1912   tune_params::SCHED_AUTOPREF_OFF
1913 };
1914 
1915 const struct tune_params arm_marvell_pj4_tune =
1916 {
1917   &generic_extra_costs,			/* Insn extra costs.  */
1918   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1919   NULL,					/* Sched adj cost.  */
1920   arm_default_branch_cost,
1921   &arm_default_vec_cost,
1922   1,						/* Constant limit.  */
1923   5,						/* Max cond insns.  */
1924   8,						/* Memset max inline.  */
1925   2,						/* Issue rate.  */
1926   ARM_PREFETCH_NOT_BENEFICIAL,
1927   tune_params::PREF_CONST_POOL_TRUE,
1928   tune_params::PREF_LDRD_FALSE,
1929   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1930   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1931   tune_params::DISPARAGE_FLAGS_NEITHER,
1932   tune_params::PREF_NEON_64_FALSE,
1933   tune_params::PREF_NEON_STRINGOPS_FALSE,
1934   tune_params::FUSE_NOTHING,
1935   tune_params::SCHED_AUTOPREF_OFF
1936 };
1937 
1938 const struct tune_params arm_v6t2_tune =
1939 {
1940   &generic_extra_costs,			/* Insn extra costs.  */
1941   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1942   NULL,					/* Sched adj cost.  */
1943   arm_default_branch_cost,
1944   &arm_default_vec_cost,
1945   1,						/* Constant limit.  */
1946   5,						/* Max cond insns.  */
1947   8,						/* Memset max inline.  */
1948   1,						/* Issue rate.  */
1949   ARM_PREFETCH_NOT_BENEFICIAL,
1950   tune_params::PREF_CONST_POOL_FALSE,
1951   tune_params::PREF_LDRD_FALSE,
1952   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1953   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1954   tune_params::DISPARAGE_FLAGS_NEITHER,
1955   tune_params::PREF_NEON_64_FALSE,
1956   tune_params::PREF_NEON_STRINGOPS_FALSE,
1957   tune_params::FUSE_NOTHING,
1958   tune_params::SCHED_AUTOPREF_OFF
1959 };
1960 
1961 
1962 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1963 const struct tune_params arm_cortex_tune =
1964 {
1965   &generic_extra_costs,
1966   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1967   NULL,					/* Sched adj cost.  */
1968   arm_default_branch_cost,
1969   &arm_default_vec_cost,
1970   1,						/* Constant limit.  */
1971   5,						/* Max cond insns.  */
1972   8,						/* Memset max inline.  */
1973   2,						/* Issue rate.  */
1974   ARM_PREFETCH_NOT_BENEFICIAL,
1975   tune_params::PREF_CONST_POOL_FALSE,
1976   tune_params::PREF_LDRD_FALSE,
1977   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
1978   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
1979   tune_params::DISPARAGE_FLAGS_NEITHER,
1980   tune_params::PREF_NEON_64_FALSE,
1981   tune_params::PREF_NEON_STRINGOPS_FALSE,
1982   tune_params::FUSE_NOTHING,
1983   tune_params::SCHED_AUTOPREF_OFF
1984 };
1985 
1986 const struct tune_params arm_cortex_a8_tune =
1987 {
1988   &cortexa8_extra_costs,
1989   &generic_addr_mode_costs,		/* Addressing mode costs.  */
1990   NULL,					/* Sched adj cost.  */
1991   arm_default_branch_cost,
1992   &arm_default_vec_cost,
1993   1,						/* Constant limit.  */
1994   5,						/* Max cond insns.  */
1995   8,						/* Memset max inline.  */
1996   2,						/* Issue rate.  */
1997   ARM_PREFETCH_NOT_BENEFICIAL,
1998   tune_params::PREF_CONST_POOL_FALSE,
1999   tune_params::PREF_LDRD_FALSE,
2000   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2001   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2002   tune_params::DISPARAGE_FLAGS_NEITHER,
2003   tune_params::PREF_NEON_64_FALSE,
2004   tune_params::PREF_NEON_STRINGOPS_TRUE,
2005   tune_params::FUSE_NOTHING,
2006   tune_params::SCHED_AUTOPREF_OFF
2007 };
2008 
2009 const struct tune_params arm_cortex_a7_tune =
2010 {
2011   &cortexa7_extra_costs,
2012   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2013   NULL,					/* Sched adj cost.  */
2014   arm_default_branch_cost,
2015   &arm_default_vec_cost,
2016   1,						/* Constant limit.  */
2017   5,						/* Max cond insns.  */
2018   8,						/* Memset max inline.  */
2019   2,						/* Issue rate.  */
2020   ARM_PREFETCH_NOT_BENEFICIAL,
2021   tune_params::PREF_CONST_POOL_FALSE,
2022   tune_params::PREF_LDRD_FALSE,
2023   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2024   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2025   tune_params::DISPARAGE_FLAGS_NEITHER,
2026   tune_params::PREF_NEON_64_FALSE,
2027   tune_params::PREF_NEON_STRINGOPS_TRUE,
2028   tune_params::FUSE_NOTHING,
2029   tune_params::SCHED_AUTOPREF_OFF
2030 };
2031 
2032 const struct tune_params arm_cortex_a15_tune =
2033 {
2034   &cortexa15_extra_costs,
2035   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2036   NULL,					/* Sched adj cost.  */
2037   arm_default_branch_cost,
2038   &arm_default_vec_cost,
2039   1,						/* Constant limit.  */
2040   2,						/* Max cond insns.  */
2041   8,						/* Memset max inline.  */
2042   3,						/* Issue rate.  */
2043   ARM_PREFETCH_NOT_BENEFICIAL,
2044   tune_params::PREF_CONST_POOL_FALSE,
2045   tune_params::PREF_LDRD_TRUE,
2046   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2047   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2048   tune_params::DISPARAGE_FLAGS_ALL,
2049   tune_params::PREF_NEON_64_FALSE,
2050   tune_params::PREF_NEON_STRINGOPS_TRUE,
2051   tune_params::FUSE_NOTHING,
2052   tune_params::SCHED_AUTOPREF_FULL
2053 };
2054 
2055 const struct tune_params arm_cortex_a35_tune =
2056 {
2057   &cortexa53_extra_costs,
2058   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2059   NULL,					/* Sched adj cost.  */
2060   arm_default_branch_cost,
2061   &arm_default_vec_cost,
2062   1,						/* Constant limit.  */
2063   5,						/* Max cond insns.  */
2064   8,						/* Memset max inline.  */
2065   1,						/* Issue rate.  */
2066   ARM_PREFETCH_NOT_BENEFICIAL,
2067   tune_params::PREF_CONST_POOL_FALSE,
2068   tune_params::PREF_LDRD_FALSE,
2069   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2070   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2071   tune_params::DISPARAGE_FLAGS_NEITHER,
2072   tune_params::PREF_NEON_64_FALSE,
2073   tune_params::PREF_NEON_STRINGOPS_TRUE,
2074   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2075   tune_params::SCHED_AUTOPREF_OFF
2076 };
2077 
2078 const struct tune_params arm_cortex_a53_tune =
2079 {
2080   &cortexa53_extra_costs,
2081   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2082   NULL,					/* Sched adj cost.  */
2083   arm_default_branch_cost,
2084   &arm_default_vec_cost,
2085   1,						/* Constant limit.  */
2086   5,						/* Max cond insns.  */
2087   8,						/* Memset max inline.  */
2088   2,						/* Issue rate.  */
2089   ARM_PREFETCH_NOT_BENEFICIAL,
2090   tune_params::PREF_CONST_POOL_FALSE,
2091   tune_params::PREF_LDRD_FALSE,
2092   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2093   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2094   tune_params::DISPARAGE_FLAGS_NEITHER,
2095   tune_params::PREF_NEON_64_FALSE,
2096   tune_params::PREF_NEON_STRINGOPS_TRUE,
2097   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2098   tune_params::SCHED_AUTOPREF_OFF
2099 };
2100 
2101 const struct tune_params arm_cortex_a57_tune =
2102 {
2103   &cortexa57_extra_costs,
2104   &generic_addr_mode_costs,		/* addressing mode costs */
2105   NULL,					/* Sched adj cost.  */
2106   arm_default_branch_cost,
2107   &arm_default_vec_cost,
2108   1,						/* Constant limit.  */
2109   2,						/* Max cond insns.  */
2110   8,						/* Memset max inline.  */
2111   3,						/* Issue rate.  */
2112   ARM_PREFETCH_NOT_BENEFICIAL,
2113   tune_params::PREF_CONST_POOL_FALSE,
2114   tune_params::PREF_LDRD_TRUE,
2115   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2116   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2117   tune_params::DISPARAGE_FLAGS_ALL,
2118   tune_params::PREF_NEON_64_FALSE,
2119   tune_params::PREF_NEON_STRINGOPS_TRUE,
2120   FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2121   tune_params::SCHED_AUTOPREF_FULL
2122 };
2123 
2124 const struct tune_params arm_exynosm1_tune =
2125 {
2126   &exynosm1_extra_costs,
2127   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2128   NULL,						/* Sched adj cost.  */
2129   arm_default_branch_cost,
2130   &arm_default_vec_cost,
2131   1,						/* Constant limit.  */
2132   2,						/* Max cond insns.  */
2133   8,						/* Memset max inline.  */
2134   3,						/* Issue rate.  */
2135   ARM_PREFETCH_NOT_BENEFICIAL,
2136   tune_params::PREF_CONST_POOL_FALSE,
2137   tune_params::PREF_LDRD_TRUE,
2138   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,	/* Thumb.  */
2139   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,	/* ARM.  */
2140   tune_params::DISPARAGE_FLAGS_ALL,
2141   tune_params::PREF_NEON_64_FALSE,
2142   tune_params::PREF_NEON_STRINGOPS_TRUE,
2143   tune_params::FUSE_NOTHING,
2144   tune_params::SCHED_AUTOPREF_OFF
2145 };
2146 
2147 const struct tune_params arm_xgene1_tune =
2148 {
2149   &xgene1_extra_costs,
2150   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2151   NULL,					/* Sched adj cost.  */
2152   arm_default_branch_cost,
2153   &arm_default_vec_cost,
2154   1,						/* Constant limit.  */
2155   2,						/* Max cond insns.  */
2156   32,						/* Memset max inline.  */
2157   4,						/* Issue rate.  */
2158   ARM_PREFETCH_NOT_BENEFICIAL,
2159   tune_params::PREF_CONST_POOL_FALSE,
2160   tune_params::PREF_LDRD_TRUE,
2161   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2162   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2163   tune_params::DISPARAGE_FLAGS_ALL,
2164   tune_params::PREF_NEON_64_FALSE,
2165   tune_params::PREF_NEON_STRINGOPS_FALSE,
2166   tune_params::FUSE_NOTHING,
2167   tune_params::SCHED_AUTOPREF_OFF
2168 };
2169 
2170 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2171    less appealing.  Set max_insns_skipped to a low value.  */
2172 
2173 const struct tune_params arm_cortex_a5_tune =
2174 {
2175   &cortexa5_extra_costs,
2176   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2177   NULL,					/* Sched adj cost.  */
2178   arm_cortex_a5_branch_cost,
2179   &arm_default_vec_cost,
2180   1,						/* Constant limit.  */
2181   1,						/* Max cond insns.  */
2182   8,						/* Memset max inline.  */
2183   2,						/* Issue rate.  */
2184   ARM_PREFETCH_NOT_BENEFICIAL,
2185   tune_params::PREF_CONST_POOL_FALSE,
2186   tune_params::PREF_LDRD_FALSE,
2187   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2188   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2189   tune_params::DISPARAGE_FLAGS_NEITHER,
2190   tune_params::PREF_NEON_64_FALSE,
2191   tune_params::PREF_NEON_STRINGOPS_TRUE,
2192   tune_params::FUSE_NOTHING,
2193   tune_params::SCHED_AUTOPREF_OFF
2194 };
2195 
2196 const struct tune_params arm_cortex_a9_tune =
2197 {
2198   &cortexa9_extra_costs,
2199   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2200   cortex_a9_sched_adjust_cost,
2201   arm_default_branch_cost,
2202   &arm_default_vec_cost,
2203   1,						/* Constant limit.  */
2204   5,						/* Max cond insns.  */
2205   8,						/* Memset max inline.  */
2206   2,						/* Issue rate.  */
2207   ARM_PREFETCH_BENEFICIAL(4,32,32),
2208   tune_params::PREF_CONST_POOL_FALSE,
2209   tune_params::PREF_LDRD_FALSE,
2210   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2211   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2212   tune_params::DISPARAGE_FLAGS_NEITHER,
2213   tune_params::PREF_NEON_64_FALSE,
2214   tune_params::PREF_NEON_STRINGOPS_FALSE,
2215   tune_params::FUSE_NOTHING,
2216   tune_params::SCHED_AUTOPREF_OFF
2217 };
2218 
2219 const struct tune_params arm_cortex_a12_tune =
2220 {
2221   &cortexa12_extra_costs,
2222   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2223   NULL,					/* Sched adj cost.  */
2224   arm_default_branch_cost,
2225   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2226   1,						/* Constant limit.  */
2227   2,						/* Max cond insns.  */
2228   8,						/* Memset max inline.  */
2229   2,						/* Issue rate.  */
2230   ARM_PREFETCH_NOT_BENEFICIAL,
2231   tune_params::PREF_CONST_POOL_FALSE,
2232   tune_params::PREF_LDRD_TRUE,
2233   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2234   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2235   tune_params::DISPARAGE_FLAGS_ALL,
2236   tune_params::PREF_NEON_64_FALSE,
2237   tune_params::PREF_NEON_STRINGOPS_TRUE,
2238   FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2239   tune_params::SCHED_AUTOPREF_OFF
2240 };
2241 
2242 const struct tune_params arm_cortex_a73_tune =
2243 {
2244   &cortexa57_extra_costs,
2245   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2246   NULL,						/* Sched adj cost.  */
2247   arm_default_branch_cost,
2248   &arm_default_vec_cost,			/* Vectorizer costs.  */
2249   1,						/* Constant limit.  */
2250   2,						/* Max cond insns.  */
2251   8,						/* Memset max inline.  */
2252   2,						/* Issue rate.  */
2253   ARM_PREFETCH_NOT_BENEFICIAL,
2254   tune_params::PREF_CONST_POOL_FALSE,
2255   tune_params::PREF_LDRD_TRUE,
2256   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2257   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2258   tune_params::DISPARAGE_FLAGS_ALL,
2259   tune_params::PREF_NEON_64_FALSE,
2260   tune_params::PREF_NEON_STRINGOPS_TRUE,
2261   FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2262   tune_params::SCHED_AUTOPREF_FULL
2263 };
2264 
2265 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
2266    cycle to execute each.  An LDR from the constant pool also takes two cycles
2267    to execute, but mildly increases pipelining opportunity (consecutive
2268    loads/stores can be pipelined together, saving one cycle), and may also
2269    improve icache utilisation.  Hence we prefer the constant pool for such
2270    processors.  */
2271 
2272 const struct tune_params arm_v7m_tune =
2273 {
2274   &v7m_extra_costs,
2275   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2276   NULL,					/* Sched adj cost.  */
2277   arm_cortex_m_branch_cost,
2278   &arm_default_vec_cost,
2279   1,						/* Constant limit.  */
2280   2,						/* Max cond insns.  */
2281   8,						/* Memset max inline.  */
2282   1,						/* Issue rate.  */
2283   ARM_PREFETCH_NOT_BENEFICIAL,
2284   tune_params::PREF_CONST_POOL_TRUE,
2285   tune_params::PREF_LDRD_FALSE,
2286   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2287   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2288   tune_params::DISPARAGE_FLAGS_NEITHER,
2289   tune_params::PREF_NEON_64_FALSE,
2290   tune_params::PREF_NEON_STRINGOPS_FALSE,
2291   tune_params::FUSE_NOTHING,
2292   tune_params::SCHED_AUTOPREF_OFF
2293 };
2294 
2295 /* Cortex-M7 tuning.  */
2296 
2297 const struct tune_params arm_cortex_m7_tune =
2298 {
2299   &v7m_extra_costs,
2300   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2301   NULL,					/* Sched adj cost.  */
2302   arm_cortex_m7_branch_cost,
2303   &arm_default_vec_cost,
2304   0,						/* Constant limit.  */
2305   1,						/* Max cond insns.  */
2306   8,						/* Memset max inline.  */
2307   2,						/* Issue rate.  */
2308   ARM_PREFETCH_NOT_BENEFICIAL,
2309   tune_params::PREF_CONST_POOL_TRUE,
2310   tune_params::PREF_LDRD_FALSE,
2311   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2312   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2313   tune_params::DISPARAGE_FLAGS_NEITHER,
2314   tune_params::PREF_NEON_64_FALSE,
2315   tune_params::PREF_NEON_STRINGOPS_FALSE,
2316   tune_params::FUSE_NOTHING,
2317   tune_params::SCHED_AUTOPREF_OFF
2318 };
2319 
2320 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2321    arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
2322    cortex-m23.  */
2323 const struct tune_params arm_v6m_tune =
2324 {
2325   &generic_extra_costs,			/* Insn extra costs.  */
2326   &generic_addr_mode_costs,		/* Addressing mode costs.  */
2327   NULL,					/* Sched adj cost.  */
2328   arm_default_branch_cost,
2329   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2330   1,						/* Constant limit.  */
2331   5,						/* Max cond insns.  */
2332   8,						/* Memset max inline.  */
2333   1,						/* Issue rate.  */
2334   ARM_PREFETCH_NOT_BENEFICIAL,
2335   tune_params::PREF_CONST_POOL_FALSE,
2336   tune_params::PREF_LDRD_FALSE,
2337   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
2338   tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
2339   tune_params::DISPARAGE_FLAGS_NEITHER,
2340   tune_params::PREF_NEON_64_FALSE,
2341   tune_params::PREF_NEON_STRINGOPS_FALSE,
2342   tune_params::FUSE_NOTHING,
2343   tune_params::SCHED_AUTOPREF_OFF
2344 };
2345 
2346 const struct tune_params arm_fa726te_tune =
2347 {
2348   &generic_extra_costs,				/* Insn extra costs.  */
2349   &generic_addr_mode_costs,			/* Addressing mode costs.  */
2350   fa726te_sched_adjust_cost,
2351   arm_default_branch_cost,
2352   &arm_default_vec_cost,
2353   1,						/* Constant limit.  */
2354   5,						/* Max cond insns.  */
2355   8,						/* Memset max inline.  */
2356   2,						/* Issue rate.  */
2357   ARM_PREFETCH_NOT_BENEFICIAL,
2358   tune_params::PREF_CONST_POOL_TRUE,
2359   tune_params::PREF_LDRD_FALSE,
2360   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
2361   tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
2362   tune_params::DISPARAGE_FLAGS_NEITHER,
2363   tune_params::PREF_NEON_64_FALSE,
2364   tune_params::PREF_NEON_STRINGOPS_FALSE,
2365   tune_params::FUSE_NOTHING,
2366   tune_params::SCHED_AUTOPREF_OFF
2367 };
2368 
2369 /* Auto-generated CPU, FPU and architecture tables.  */
2370 #include "arm-cpu-data.h"
2371 
2372 /* The name of the preprocessor macro to define for this architecture.  PROFILE
2373    is replaced by the architecture name (eg. 8A) in arm_option_override () and
2374    is thus chosen to be big enough to hold the longest architecture name.  */
2375 
2376 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2377 
2378 /* Supported TLS relocations.  */
2379 
2380 enum tls_reloc {
2381   TLS_GD32,
2382   TLS_LDM32,
2383   TLS_LDO32,
2384   TLS_IE32,
2385   TLS_LE32,
2386   TLS_DESCSEQ	/* GNU scheme */
2387 };
2388 
2389 /* The maximum number of insns to be used when loading a constant.  */
2390 inline static int
arm_constant_limit(bool size_p)2391 arm_constant_limit (bool size_p)
2392 {
2393   return size_p ? 1 : current_tune->constant_limit;
2394 }
2395 
2396 /* Emit an insn that's a simple single-set.  Both the operands must be known
2397    to be valid.  */
2398 inline static rtx_insn *
emit_set_insn(rtx x,rtx y)2399 emit_set_insn (rtx x, rtx y)
2400 {
2401   return emit_insn (gen_rtx_SET (x, y));
2402 }
2403 
2404 /* Return the number of bits set in VALUE.  */
2405 static unsigned
bit_count(unsigned long value)2406 bit_count (unsigned long value)
2407 {
2408   unsigned long count = 0;
2409 
2410   while (value)
2411     {
2412       count++;
2413       value &= value - 1;  /* Clear the least-significant set bit.  */
2414     }
2415 
2416   return count;
2417 }
2418 
2419 /* Return the number of bits set in BMAP.  */
2420 static unsigned
bitmap_popcount(const sbitmap bmap)2421 bitmap_popcount (const sbitmap bmap)
2422 {
2423   unsigned int count = 0;
2424   unsigned int n = 0;
2425   sbitmap_iterator sbi;
2426 
2427   EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2428     count++;
2429   return count;
2430 }
2431 
2432 typedef struct
2433 {
2434   machine_mode mode;
2435   const char *name;
2436 } arm_fixed_mode_set;
2437 
2438 /* A small helper for setting fixed-point library libfuncs.  */
2439 
2440 static void
arm_set_fixed_optab_libfunc(optab optable,machine_mode mode,const char * funcname,const char * modename,int num_suffix)2441 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2442 			     const char *funcname, const char *modename,
2443 			     int num_suffix)
2444 {
2445   char buffer[50];
2446 
2447   if (num_suffix == 0)
2448     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2449   else
2450     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2451 
2452   set_optab_libfunc (optable, mode, buffer);
2453 }
2454 
2455 static void
arm_set_fixed_conv_libfunc(convert_optab optable,machine_mode to,machine_mode from,const char * funcname,const char * toname,const char * fromname)2456 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2457 			    machine_mode from, const char *funcname,
2458 			    const char *toname, const char *fromname)
2459 {
2460   char buffer[50];
2461   const char *maybe_suffix_2 = "";
2462 
2463   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2464   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2465       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2466       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2467     maybe_suffix_2 = "2";
2468 
2469   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2470 	   maybe_suffix_2);
2471 
2472   set_conv_libfunc (optable, to, from, buffer);
2473 }
2474 
2475 /* Set up library functions unique to ARM.  */
2476 
2477 static void
arm_init_libfuncs(void)2478 arm_init_libfuncs (void)
2479 {
2480   /* For Linux, we have access to kernel support for atomic operations.  */
2481   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2482     init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2483 
2484   /* There are no special library functions unless we are using the
2485      ARM BPABI.  */
2486   if (!TARGET_BPABI)
2487     return;
2488 
2489   /* The functions below are described in Section 4 of the "Run-Time
2490      ABI for the ARM architecture", Version 1.0.  */
2491 
2492   /* Double-precision floating-point arithmetic.  Table 2.  */
2493   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2494   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2495   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2496   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2497   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2498 
2499   /* Double-precision comparisons.  Table 3.  */
2500   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2501   set_optab_libfunc (ne_optab, DFmode, NULL);
2502   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2503   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2504   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2505   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2506   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2507 
2508   /* Single-precision floating-point arithmetic.  Table 4.  */
2509   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2510   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2511   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2512   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2513   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2514 
2515   /* Single-precision comparisons.  Table 5.  */
2516   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2517   set_optab_libfunc (ne_optab, SFmode, NULL);
2518   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2519   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2520   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2521   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2522   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2523 
2524   /* Floating-point to integer conversions.  Table 6.  */
2525   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2526   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2527   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2528   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2529   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2530   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2531   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2532   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2533 
2534   /* Conversions between floating types.  Table 7.  */
2535   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2536   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2537 
2538   /* Integer to floating-point conversions.  Table 8.  */
2539   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2540   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2541   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2542   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2543   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2544   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2545   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2546   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2547 
2548   /* Long long.  Table 9.  */
2549   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2550   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2551   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2552   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2553   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2554   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2555   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2556   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2557 
2558   /* Integer (32/32->32) division.  \S 4.3.1.  */
2559   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2560   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2561 
2562   /* The divmod functions are designed so that they can be used for
2563      plain division, even though they return both the quotient and the
2564      remainder.  The quotient is returned in the usual location (i.e.,
2565      r0 for SImode, {r0, r1} for DImode), just as would be expected
2566      for an ordinary division routine.  Because the AAPCS calling
2567      conventions specify that all of { r0, r1, r2, r3 } are
2568      callee-saved registers, there is no need to tell the compiler
2569      explicitly that those registers are clobbered by these
2570      routines.  */
2571   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2572   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2573 
2574   /* For SImode division the ABI provides div-without-mod routines,
2575      which are faster.  */
2576   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2577   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2578 
2579   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2580      divmod libcalls instead.  */
2581   set_optab_libfunc (smod_optab, DImode, NULL);
2582   set_optab_libfunc (umod_optab, DImode, NULL);
2583   set_optab_libfunc (smod_optab, SImode, NULL);
2584   set_optab_libfunc (umod_optab, SImode, NULL);
2585 
2586   /* Half-precision float operations.  The compiler handles all operations
2587      with NULL libfuncs by converting the SFmode.  */
2588   switch (arm_fp16_format)
2589     {
2590     case ARM_FP16_FORMAT_IEEE:
2591     case ARM_FP16_FORMAT_ALTERNATIVE:
2592 
2593       /* Conversions.  */
2594       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2595 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2596 			 ? "__gnu_f2h_ieee"
2597 			 : "__gnu_f2h_alternative"));
2598       set_conv_libfunc (sext_optab, SFmode, HFmode,
2599 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2600 			 ? "__gnu_h2f_ieee"
2601 			 : "__gnu_h2f_alternative"));
2602 
2603       set_conv_libfunc (trunc_optab, HFmode, DFmode,
2604 			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
2605 			 ? "__gnu_d2h_ieee"
2606 			 : "__gnu_d2h_alternative"));
2607 
2608       /* Arithmetic.  */
2609       set_optab_libfunc (add_optab, HFmode, NULL);
2610       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2611       set_optab_libfunc (smul_optab, HFmode, NULL);
2612       set_optab_libfunc (neg_optab, HFmode, NULL);
2613       set_optab_libfunc (sub_optab, HFmode, NULL);
2614 
2615       /* Comparisons.  */
2616       set_optab_libfunc (eq_optab, HFmode, NULL);
2617       set_optab_libfunc (ne_optab, HFmode, NULL);
2618       set_optab_libfunc (lt_optab, HFmode, NULL);
2619       set_optab_libfunc (le_optab, HFmode, NULL);
2620       set_optab_libfunc (ge_optab, HFmode, NULL);
2621       set_optab_libfunc (gt_optab, HFmode, NULL);
2622       set_optab_libfunc (unord_optab, HFmode, NULL);
2623       break;
2624 
2625     default:
2626       break;
2627     }
2628 
2629   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2630   {
2631     const arm_fixed_mode_set fixed_arith_modes[] =
2632       {
2633 	{ E_QQmode, "qq" },
2634 	{ E_UQQmode, "uqq" },
2635 	{ E_HQmode, "hq" },
2636 	{ E_UHQmode, "uhq" },
2637 	{ E_SQmode, "sq" },
2638 	{ E_USQmode, "usq" },
2639 	{ E_DQmode, "dq" },
2640 	{ E_UDQmode, "udq" },
2641 	{ E_TQmode, "tq" },
2642 	{ E_UTQmode, "utq" },
2643 	{ E_HAmode, "ha" },
2644 	{ E_UHAmode, "uha" },
2645 	{ E_SAmode, "sa" },
2646 	{ E_USAmode, "usa" },
2647 	{ E_DAmode, "da" },
2648 	{ E_UDAmode, "uda" },
2649 	{ E_TAmode, "ta" },
2650 	{ E_UTAmode, "uta" }
2651       };
2652     const arm_fixed_mode_set fixed_conv_modes[] =
2653       {
2654 	{ E_QQmode, "qq" },
2655 	{ E_UQQmode, "uqq" },
2656 	{ E_HQmode, "hq" },
2657 	{ E_UHQmode, "uhq" },
2658 	{ E_SQmode, "sq" },
2659 	{ E_USQmode, "usq" },
2660 	{ E_DQmode, "dq" },
2661 	{ E_UDQmode, "udq" },
2662 	{ E_TQmode, "tq" },
2663 	{ E_UTQmode, "utq" },
2664 	{ E_HAmode, "ha" },
2665 	{ E_UHAmode, "uha" },
2666 	{ E_SAmode, "sa" },
2667 	{ E_USAmode, "usa" },
2668 	{ E_DAmode, "da" },
2669 	{ E_UDAmode, "uda" },
2670 	{ E_TAmode, "ta" },
2671 	{ E_UTAmode, "uta" },
2672 	{ E_QImode, "qi" },
2673 	{ E_HImode, "hi" },
2674 	{ E_SImode, "si" },
2675 	{ E_DImode, "di" },
2676 	{ E_TImode, "ti" },
2677 	{ E_SFmode, "sf" },
2678 	{ E_DFmode, "df" }
2679       };
2680     unsigned int i, j;
2681 
2682     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2683       {
2684 	arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2685 				     "add", fixed_arith_modes[i].name, 3);
2686 	arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2687 				     "ssadd", fixed_arith_modes[i].name, 3);
2688 	arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2689 				     "usadd", fixed_arith_modes[i].name, 3);
2690 	arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2691 				     "sub", fixed_arith_modes[i].name, 3);
2692 	arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2693 				     "sssub", fixed_arith_modes[i].name, 3);
2694 	arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2695 				     "ussub", fixed_arith_modes[i].name, 3);
2696 	arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2697 				     "mul", fixed_arith_modes[i].name, 3);
2698 	arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2699 				     "ssmul", fixed_arith_modes[i].name, 3);
2700 	arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2701 				     "usmul", fixed_arith_modes[i].name, 3);
2702 	arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2703 				     "div", fixed_arith_modes[i].name, 3);
2704 	arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2705 				     "udiv", fixed_arith_modes[i].name, 3);
2706 	arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2707 				     "ssdiv", fixed_arith_modes[i].name, 3);
2708 	arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2709 				     "usdiv", fixed_arith_modes[i].name, 3);
2710 	arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2711 				     "neg", fixed_arith_modes[i].name, 2);
2712 	arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2713 				     "ssneg", fixed_arith_modes[i].name, 2);
2714 	arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2715 				     "usneg", fixed_arith_modes[i].name, 2);
2716 	arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2717 				     "ashl", fixed_arith_modes[i].name, 3);
2718 	arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2719 				     "ashr", fixed_arith_modes[i].name, 3);
2720 	arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2721 				     "lshr", fixed_arith_modes[i].name, 3);
2722 	arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2723 				     "ssashl", fixed_arith_modes[i].name, 3);
2724 	arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2725 				     "usashl", fixed_arith_modes[i].name, 3);
2726 	arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2727 				     "cmp", fixed_arith_modes[i].name, 2);
2728       }
2729 
2730     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2731       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2732 	{
2733 	  if (i == j
2734 	      || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2735 		  && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2736 	    continue;
2737 
2738 	  arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2739 				      fixed_conv_modes[j].mode, "fract",
2740 				      fixed_conv_modes[i].name,
2741 				      fixed_conv_modes[j].name);
2742 	  arm_set_fixed_conv_libfunc (satfract_optab,
2743 				      fixed_conv_modes[i].mode,
2744 				      fixed_conv_modes[j].mode, "satfract",
2745 				      fixed_conv_modes[i].name,
2746 				      fixed_conv_modes[j].name);
2747 	  arm_set_fixed_conv_libfunc (fractuns_optab,
2748 				      fixed_conv_modes[i].mode,
2749 				      fixed_conv_modes[j].mode, "fractuns",
2750 				      fixed_conv_modes[i].name,
2751 				      fixed_conv_modes[j].name);
2752 	  arm_set_fixed_conv_libfunc (satfractuns_optab,
2753 				      fixed_conv_modes[i].mode,
2754 				      fixed_conv_modes[j].mode, "satfractuns",
2755 				      fixed_conv_modes[i].name,
2756 				      fixed_conv_modes[j].name);
2757 	}
2758   }
2759 
2760   if (TARGET_AAPCS_BASED)
2761     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2762 }
2763 
2764 /* On AAPCS systems, this is the "struct __va_list".  */
2765 static GTY(()) tree va_list_type;
2766 
2767 /* Return the type to use as __builtin_va_list.  */
2768 static tree
arm_build_builtin_va_list(void)2769 arm_build_builtin_va_list (void)
2770 {
2771   tree va_list_name;
2772   tree ap_field;
2773 
2774   if (!TARGET_AAPCS_BASED)
2775     return std_build_builtin_va_list ();
2776 
2777   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2778      defined as:
2779 
2780        struct __va_list
2781        {
2782 	 void *__ap;
2783        };
2784 
2785      The C Library ABI further reinforces this definition in \S
2786      4.1.
2787 
2788      We must follow this definition exactly.  The structure tag
2789      name is visible in C++ mangled names, and thus forms a part
2790      of the ABI.  The field name may be used by people who
2791      #include <stdarg.h>.  */
2792   /* Create the type.  */
2793   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2794   /* Give it the required name.  */
2795   va_list_name = build_decl (BUILTINS_LOCATION,
2796 			     TYPE_DECL,
2797 			     get_identifier ("__va_list"),
2798 			     va_list_type);
2799   DECL_ARTIFICIAL (va_list_name) = 1;
2800   TYPE_NAME (va_list_type) = va_list_name;
2801   TYPE_STUB_DECL (va_list_type) = va_list_name;
2802   /* Create the __ap field.  */
2803   ap_field = build_decl (BUILTINS_LOCATION,
2804 			 FIELD_DECL,
2805 			 get_identifier ("__ap"),
2806 			 ptr_type_node);
2807   DECL_ARTIFICIAL (ap_field) = 1;
2808   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2809   TYPE_FIELDS (va_list_type) = ap_field;
2810   /* Compute its layout.  */
2811   layout_type (va_list_type);
2812 
2813   return va_list_type;
2814 }
2815 
2816 /* Return an expression of type "void *" pointing to the next
2817    available argument in a variable-argument list.  VALIST is the
2818    user-level va_list object, of type __builtin_va_list.  */
2819 static tree
arm_extract_valist_ptr(tree valist)2820 arm_extract_valist_ptr (tree valist)
2821 {
2822   if (TREE_TYPE (valist) == error_mark_node)
2823     return error_mark_node;
2824 
2825   /* On an AAPCS target, the pointer is stored within "struct
2826      va_list".  */
2827   if (TARGET_AAPCS_BASED)
2828     {
2829       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2830       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2831 		       valist, ap_field, NULL_TREE);
2832     }
2833 
2834   return valist;
2835 }
2836 
2837 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2838 static void
arm_expand_builtin_va_start(tree valist,rtx nextarg)2839 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2840 {
2841   valist = arm_extract_valist_ptr (valist);
2842   std_expand_builtin_va_start (valist, nextarg);
2843 }
2844 
2845 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2846 static tree
arm_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p)2847 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2848 			  gimple_seq *post_p)
2849 {
2850   valist = arm_extract_valist_ptr (valist);
2851   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2852 }
2853 
2854 /* Check any incompatible options that the user has specified.  */
2855 static void
arm_option_check_internal(struct gcc_options * opts)2856 arm_option_check_internal (struct gcc_options *opts)
2857 {
2858   int flags = opts->x_target_flags;
2859 
2860   /* iWMMXt and NEON are incompatible.  */
2861   if (TARGET_IWMMXT
2862       && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2863     error ("iWMMXt and NEON are incompatible");
2864 
2865   /* Make sure that the processor choice does not conflict with any of the
2866      other command line choices.  */
2867   if (TARGET_ARM_P (flags)
2868       && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2869     error ("target CPU does not support ARM mode");
2870 
2871   /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
2872   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2873     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2874 
2875   if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2876     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2877 
2878   /* If this target is normally configured to use APCS frames, warn if they
2879      are turned off and debugging is turned on.  */
2880   if (TARGET_ARM_P (flags)
2881       && write_symbols != NO_DEBUG
2882       && !TARGET_APCS_FRAME
2883       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2884     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2885 
2886   /* iWMMXt unsupported under Thumb mode.  */
2887   if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2888     error ("iWMMXt unsupported under Thumb mode");
2889 
2890   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2891     error ("can not use -mtp=cp15 with 16-bit Thumb");
2892 
2893   if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2894     {
2895       error ("RTP PIC is incompatible with Thumb");
2896       flag_pic = 0;
2897     }
2898 
2899   /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2900      with MOVT.  */
2901   if ((target_pure_code || target_slow_flash_data)
2902       && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2903     {
2904       const char *flag = (target_pure_code ? "-mpure-code" :
2905 					     "-mslow-flash-data");
2906       error ("%s only supports non-pic code on M-profile targets with the "
2907 	     "MOVT instruction", flag);
2908     }
2909 
2910 }
2911 
2912 /* Recompute the global settings depending on target attribute options.  */
2913 
2914 static void
arm_option_params_internal(void)2915 arm_option_params_internal (void)
2916 {
2917   /* If we are not using the default (ARM mode) section anchor offset
2918      ranges, then set the correct ranges now.  */
2919   if (TARGET_THUMB1)
2920     {
2921       /* Thumb-1 LDR instructions cannot have negative offsets.
2922          Permissible positive offset ranges are 5-bit (for byte loads),
2923          6-bit (for halfword loads), or 7-bit (for word loads).
2924          Empirical results suggest a 7-bit anchor range gives the best
2925          overall code size.  */
2926       targetm.min_anchor_offset = 0;
2927       targetm.max_anchor_offset = 127;
2928     }
2929   else if (TARGET_THUMB2)
2930     {
2931       /* The minimum is set such that the total size of the block
2932          for a particular anchor is 248 + 1 + 4095 bytes, which is
2933          divisible by eight, ensuring natural spacing of anchors.  */
2934       targetm.min_anchor_offset = -248;
2935       targetm.max_anchor_offset = 4095;
2936     }
2937   else
2938     {
2939       targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2940       targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2941     }
2942 
2943   /* Increase the number of conditional instructions with -Os.  */
2944   max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2945 
2946   /* For THUMB2, we limit the conditional sequence to one IT block.  */
2947   if (TARGET_THUMB2)
2948     max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2949 }
2950 
2951 /* True if -mflip-thumb should next add an attribute for the default
2952    mode, false if it should next add an attribute for the opposite mode.  */
2953 static GTY(()) bool thumb_flipper;
2954 
2955 /* Options after initial target override.  */
2956 static GTY(()) tree init_optimize;
2957 
2958 static void
arm_override_options_after_change_1(struct gcc_options * opts)2959 arm_override_options_after_change_1 (struct gcc_options *opts)
2960 {
2961   if (opts->x_align_functions <= 0)
2962     opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2963       && opts->x_optimize_size ? 2 : 4;
2964 }
2965 
2966 /* Implement targetm.override_options_after_change.  */
2967 
2968 static void
arm_override_options_after_change(void)2969 arm_override_options_after_change (void)
2970 {
2971   arm_configure_build_target (&arm_active_target,
2972 			      TREE_TARGET_OPTION (target_option_default_node),
2973 			      &global_options_set, false);
2974 
2975   arm_override_options_after_change_1 (&global_options);
2976 }
2977 
2978 /* Implement TARGET_OPTION_SAVE.  */
2979 static void
arm_option_save(struct cl_target_option * ptr,struct gcc_options * opts)2980 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2981 {
2982   ptr->x_arm_arch_string = opts->x_arm_arch_string;
2983   ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2984   ptr->x_arm_tune_string = opts->x_arm_tune_string;
2985 }
2986 
2987 /* Implement TARGET_OPTION_RESTORE.  */
2988 static void
arm_option_restore(struct gcc_options * opts,struct cl_target_option * ptr)2989 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2990 {
2991   opts->x_arm_arch_string = ptr->x_arm_arch_string;
2992   opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2993   opts->x_arm_tune_string = ptr->x_arm_tune_string;
2994   arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2995 			      false);
2996 }
2997 
2998 /* Reset options between modes that the user has specified.  */
2999 static void
arm_option_override_internal(struct gcc_options * opts,struct gcc_options * opts_set)3000 arm_option_override_internal (struct gcc_options *opts,
3001 			      struct gcc_options *opts_set)
3002 {
3003   arm_override_options_after_change_1 (opts);
3004 
3005   if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3006     {
3007       /* The default is to enable interworking, so this warning message would
3008 	 be confusing to users who have just compiled with, eg, -march=armv3.  */
3009       /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3010       opts->x_target_flags &= ~MASK_INTERWORK;
3011     }
3012 
3013   if (TARGET_THUMB_P (opts->x_target_flags)
3014       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3015     {
3016       warning (0, "target CPU does not support THUMB instructions");
3017       opts->x_target_flags &= ~MASK_THUMB;
3018     }
3019 
3020   if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3021     {
3022       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3023       opts->x_target_flags &= ~MASK_APCS_FRAME;
3024     }
3025 
3026   /* Callee super interworking implies thumb interworking.  Adding
3027      this to the flags here simplifies the logic elsewhere.  */
3028   if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3029     opts->x_target_flags |= MASK_INTERWORK;
3030 
3031   /* need to remember initial values so combinaisons of options like
3032      -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
3033   cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3034 
3035   if (! opts_set->x_arm_restrict_it)
3036     opts->x_arm_restrict_it = arm_arch8;
3037 
3038   /* ARM execution state and M profile don't have [restrict] IT.  */
3039   if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3040     opts->x_arm_restrict_it = 0;
3041 
3042   /* Enable -munaligned-access by default for
3043      - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3044      i.e. Thumb2 and ARM state only.
3045      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3046      - ARMv8 architecture-base processors.
3047 
3048      Disable -munaligned-access by default for
3049      - all pre-ARMv6 architecture-based processors
3050      - ARMv6-M architecture-based processors
3051      - ARMv8-M Baseline processors.  */
3052 
3053   if (! opts_set->x_unaligned_access)
3054     {
3055       opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3056 			  && arm_arch6 && (arm_arch_notm || arm_arch7));
3057     }
3058   else if (opts->x_unaligned_access == 1
3059 	   && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3060     {
3061       warning (0, "target CPU does not support unaligned accesses");
3062      opts->x_unaligned_access = 0;
3063     }
3064 
3065   /* Don't warn since it's on by default in -O2.  */
3066   if (TARGET_THUMB1_P (opts->x_target_flags))
3067     opts->x_flag_schedule_insns = 0;
3068   else
3069     opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3070 
3071   /* Disable shrink-wrap when optimizing function for size, since it tends to
3072      generate additional returns.  */
3073   if (optimize_function_for_size_p (cfun)
3074       && TARGET_THUMB2_P (opts->x_target_flags))
3075     opts->x_flag_shrink_wrap = false;
3076   else
3077     opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3078 
3079   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3080      - epilogue_insns - does not accurately model the corresponding insns
3081      emitted in the asm file.  In particular, see the comment in thumb_exit
3082      'Find out how many of the (return) argument registers we can corrupt'.
3083      As a consequence, the epilogue may clobber registers without fipa-ra
3084      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3085      TODO: Accurately model clobbers for epilogue_insns and reenable
3086      fipa-ra.  */
3087   if (TARGET_THUMB1_P (opts->x_target_flags))
3088     opts->x_flag_ipa_ra = 0;
3089   else
3090     opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3091 
3092   /* Thumb2 inline assembly code should always use unified syntax.
3093      This will apply to ARM and Thumb1 eventually.  */
3094   if (TARGET_THUMB2_P (opts->x_target_flags))
3095     opts->x_inline_asm_unified = true;
3096 
3097 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3098   SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3099 #endif
3100 }
3101 
3102 static sbitmap isa_all_fpubits;
3103 static sbitmap isa_quirkbits;
3104 
3105 /* Configure a build target TARGET from the user-specified options OPTS and
3106    OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3107    architecture have been specified, but the two are not identical.  */
3108 void
arm_configure_build_target(struct arm_build_target * target,struct cl_target_option * opts,struct gcc_options * opts_set,bool warn_compatible)3109 arm_configure_build_target (struct arm_build_target *target,
3110 			    struct cl_target_option *opts,
3111 			    struct gcc_options *opts_set,
3112 			    bool warn_compatible)
3113 {
3114   const cpu_option *arm_selected_tune = NULL;
3115   const arch_option *arm_selected_arch = NULL;
3116   const cpu_option *arm_selected_cpu = NULL;
3117   const arm_fpu_desc *arm_selected_fpu = NULL;
3118   const char *tune_opts = NULL;
3119   const char *arch_opts = NULL;
3120   const char *cpu_opts = NULL;
3121 
3122   bitmap_clear (target->isa);
3123   target->core_name = NULL;
3124   target->arch_name = NULL;
3125 
3126   if (opts_set->x_arm_arch_string)
3127     {
3128       arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3129 						      "-march",
3130 						      opts->x_arm_arch_string);
3131       arch_opts = strchr (opts->x_arm_arch_string, '+');
3132     }
3133 
3134   if (opts_set->x_arm_cpu_string)
3135     {
3136       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3137 						    opts->x_arm_cpu_string);
3138       cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3139       arm_selected_tune = arm_selected_cpu;
3140       /* If taking the tuning from -mcpu, we don't need to rescan the
3141 	 options for tuning.  */
3142     }
3143 
3144   if (opts_set->x_arm_tune_string)
3145     {
3146       arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3147 						     opts->x_arm_tune_string);
3148       tune_opts = strchr (opts->x_arm_tune_string, '+');
3149     }
3150 
3151   if (arm_selected_arch)
3152     {
3153       arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3154       arm_parse_option_features (target->isa, &arm_selected_arch->common,
3155 				 arch_opts);
3156 
3157       if (arm_selected_cpu)
3158 	{
3159 	  auto_sbitmap cpu_isa (isa_num_bits);
3160 	  auto_sbitmap isa_delta (isa_num_bits);
3161 
3162 	  arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3163 	  arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3164 				     cpu_opts);
3165 	  bitmap_xor (isa_delta, cpu_isa, target->isa);
3166 	  /* Ignore any bits that are quirk bits.  */
3167 	  bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3168 	  /* Ignore (for now) any bits that might be set by -mfpu.  */
3169 	  bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3170 
3171 	  if (!bitmap_empty_p (isa_delta))
3172 	    {
3173 	      if (warn_compatible)
3174 		warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3175 			 arm_selected_cpu->common.name,
3176 			 arm_selected_arch->common.name);
3177 	      /* -march wins for code generation.
3178 		 -mcpu wins for default tuning.  */
3179 	      if (!arm_selected_tune)
3180 		arm_selected_tune = arm_selected_cpu;
3181 
3182 	      arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3183 	      target->arch_name = arm_selected_arch->common.name;
3184 	    }
3185 	  else
3186 	    {
3187 	      /* Architecture and CPU are essentially the same.
3188 		 Prefer the CPU setting.  */
3189 	      arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3190 	      target->core_name = arm_selected_cpu->common.name;
3191 	      /* Copy the CPU's capabilities, so that we inherit the
3192 		 appropriate extensions and quirks.  */
3193 	      bitmap_copy (target->isa, cpu_isa);
3194 	    }
3195 	}
3196       else
3197 	{
3198 	  /* Pick a CPU based on the architecture.  */
3199 	  arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3200 	  target->arch_name = arm_selected_arch->common.name;
3201 	  /* Note: target->core_name is left unset in this path.  */
3202 	}
3203     }
3204   else if (arm_selected_cpu)
3205     {
3206       target->core_name = arm_selected_cpu->common.name;
3207       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3208       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3209 				 cpu_opts);
3210       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3211     }
3212   /* If the user did not specify a processor or architecture, choose
3213      one for them.  */
3214   else
3215     {
3216       const cpu_option *sel;
3217       auto_sbitmap sought_isa (isa_num_bits);
3218       bitmap_clear (sought_isa);
3219       auto_sbitmap default_isa (isa_num_bits);
3220 
3221       arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3222 						    TARGET_CPU_DEFAULT);
3223       cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3224       gcc_assert (arm_selected_cpu->common.name);
3225 
3226       /* RWE: All of the selection logic below (to the end of this
3227 	 'if' clause) looks somewhat suspect.  It appears to be mostly
3228 	 there to support forcing thumb support when the default CPU
3229 	 does not have thumb (somewhat dubious in terms of what the
3230 	 user might be expecting).  I think it should be removed once
3231 	 support for the pre-thumb era cores is removed.  */
3232       sel = arm_selected_cpu;
3233       arm_initialize_isa (default_isa, sel->common.isa_bits);
3234       arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3235 				 cpu_opts);
3236 
3237       /* Now check to see if the user has specified any command line
3238 	 switches that require certain abilities from the cpu.  */
3239 
3240       if (TARGET_INTERWORK || TARGET_THUMB)
3241 	{
3242 	  bitmap_set_bit (sought_isa, isa_bit_thumb);
3243 	  bitmap_set_bit (sought_isa, isa_bit_mode32);
3244 
3245 	  /* There are no ARM processors that support both APCS-26 and
3246 	     interworking.  Therefore we forcibly remove MODE26 from
3247 	     from the isa features here (if it was set), so that the
3248 	     search below will always be able to find a compatible
3249 	     processor.  */
3250 	  bitmap_clear_bit (default_isa, isa_bit_mode26);
3251 	}
3252 
3253       /* If there are such requirements and the default CPU does not
3254 	 satisfy them, we need to run over the complete list of
3255 	 cores looking for one that is satisfactory.  */
3256       if (!bitmap_empty_p (sought_isa)
3257 	  && !bitmap_subset_p (sought_isa, default_isa))
3258 	{
3259 	  auto_sbitmap candidate_isa (isa_num_bits);
3260 	  /* We're only interested in a CPU with at least the
3261 	     capabilities of the default CPU and the required
3262 	     additional features.  */
3263 	  bitmap_ior (default_isa, default_isa, sought_isa);
3264 
3265 	  /* Try to locate a CPU type that supports all of the abilities
3266 	     of the default CPU, plus the extra abilities requested by
3267 	     the user.  */
3268 	  for (sel = all_cores; sel->common.name != NULL; sel++)
3269 	    {
3270 	      arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3271 	      /* An exact match?  */
3272 	      if (bitmap_equal_p (default_isa, candidate_isa))
3273 		break;
3274 	    }
3275 
3276 	  if (sel->common.name == NULL)
3277 	    {
3278 	      unsigned current_bit_count = isa_num_bits;
3279 	      const cpu_option *best_fit = NULL;
3280 
3281 	      /* Ideally we would like to issue an error message here
3282 		 saying that it was not possible to find a CPU compatible
3283 		 with the default CPU, but which also supports the command
3284 		 line options specified by the programmer, and so they
3285 		 ought to use the -mcpu=<name> command line option to
3286 		 override the default CPU type.
3287 
3288 		 If we cannot find a CPU that has exactly the
3289 		 characteristics of the default CPU and the given
3290 		 command line options we scan the array again looking
3291 		 for a best match.  The best match must have at least
3292 		 the capabilities of the perfect match.  */
3293 	      for (sel = all_cores; sel->common.name != NULL; sel++)
3294 		{
3295 		  arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3296 
3297 		  if (bitmap_subset_p (default_isa, candidate_isa))
3298 		    {
3299 		      unsigned count;
3300 
3301 		      bitmap_and_compl (candidate_isa, candidate_isa,
3302 					default_isa);
3303 		      count = bitmap_popcount (candidate_isa);
3304 
3305 		      if (count < current_bit_count)
3306 			{
3307 			  best_fit = sel;
3308 			  current_bit_count = count;
3309 			}
3310 		    }
3311 
3312 		  gcc_assert (best_fit);
3313 		  sel = best_fit;
3314 		}
3315 	    }
3316 	  arm_selected_cpu = sel;
3317 	}
3318 
3319       /* Now we know the CPU, we can finally initialize the target
3320 	 structure.  */
3321       target->core_name = arm_selected_cpu->common.name;
3322       arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3323       arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3324 				 cpu_opts);
3325       arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3326     }
3327 
3328   gcc_assert (arm_selected_cpu);
3329   gcc_assert (arm_selected_arch);
3330 
3331   if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3332     {
3333       arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3334       auto_sbitmap fpu_bits (isa_num_bits);
3335 
3336       arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3337       bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3338       bitmap_ior (target->isa, target->isa, fpu_bits);
3339     }
3340 
3341   if (!arm_selected_tune)
3342     arm_selected_tune = arm_selected_cpu;
3343   else /* Validate the features passed to -mtune.  */
3344     arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3345 
3346   const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3347 
3348   /* Finish initializing the target structure.  */
3349   target->arch_pp_name = arm_selected_arch->arch;
3350   target->base_arch = arm_selected_arch->base_arch;
3351   target->profile = arm_selected_arch->profile;
3352 
3353   target->tune_flags = tune_data->tune_flags;
3354   target->tune = tune_data->tune;
3355   target->tune_core = tune_data->scheduler;
3356   arm_option_reconfigure_globals ();
3357 }
3358 
3359 /* Fix up any incompatible options that the user has specified.  */
3360 static void
arm_option_override(void)3361 arm_option_override (void)
3362 {
3363   static const enum isa_feature fpu_bitlist[]
3364     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3365   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3366   cl_target_option opts;
3367 
3368   isa_quirkbits = sbitmap_alloc (isa_num_bits);
3369   arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3370 
3371   isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3372   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3373 
3374   arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3375 
3376   if (!global_options_set.x_arm_fpu_index)
3377     {
3378       bool ok;
3379       int fpu_index;
3380 
3381       ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3382 				  CL_TARGET);
3383       gcc_assert (ok);
3384       arm_fpu_index = (enum fpu_type) fpu_index;
3385     }
3386 
3387   cl_target_option_save (&opts, &global_options);
3388   arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3389 			      true);
3390 
3391 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3392   SUBTARGET_OVERRIDE_OPTIONS;
3393 #endif
3394 
3395   /* Initialize boolean versions of the architectural flags, for use
3396      in the arm.md file and for enabling feature flags.  */
3397   arm_option_reconfigure_globals ();
3398 
3399   arm_tune = arm_active_target.tune_core;
3400   tune_flags = arm_active_target.tune_flags;
3401   current_tune = arm_active_target.tune;
3402 
3403   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3404   if (TARGET_APCS_FRAME)
3405     flag_shrink_wrap = false;
3406 
3407   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3408     {
3409       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3410       target_flags |= MASK_APCS_FRAME;
3411     }
3412 
3413   if (TARGET_POKE_FUNCTION_NAME)
3414     target_flags |= MASK_APCS_FRAME;
3415 
3416   if (TARGET_APCS_REENT && flag_pic)
3417     error ("-fpic and -mapcs-reent are incompatible");
3418 
3419   if (TARGET_APCS_REENT)
3420     warning (0, "APCS reentrant code not supported.  Ignored");
3421 
3422   /* Set up some tuning parameters.  */
3423   arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3424   arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3425   arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3426   arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3427   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3428   arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3429 
3430   /* For arm2/3 there is no need to do any scheduling if we are doing
3431      software floating-point.  */
3432   if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3433     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3434 
3435   /* Override the default structure alignment for AAPCS ABI.  */
3436   if (!global_options_set.x_arm_structure_size_boundary)
3437     {
3438       if (TARGET_AAPCS_BASED)
3439 	arm_structure_size_boundary = 8;
3440     }
3441   else
3442     {
3443       warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3444 
3445       if (arm_structure_size_boundary != 8
3446 	  && arm_structure_size_boundary != 32
3447 	  && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3448 	{
3449 	  if (ARM_DOUBLEWORD_ALIGN)
3450 	    warning (0,
3451 		     "structure size boundary can only be set to 8, 32 or 64");
3452 	  else
3453 	    warning (0, "structure size boundary can only be set to 8 or 32");
3454 	  arm_structure_size_boundary
3455 	    = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3456 	}
3457     }
3458 
3459   if (TARGET_VXWORKS_RTP)
3460     {
3461       if (!global_options_set.x_arm_pic_data_is_text_relative)
3462 	arm_pic_data_is_text_relative = 0;
3463     }
3464   else if (flag_pic
3465 	   && !arm_pic_data_is_text_relative
3466 	   && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3467     /* When text & data segments don't have a fixed displacement, the
3468        intended use is with a single, read only, pic base register.
3469        Unless the user explicitly requested not to do that, set
3470        it.  */
3471     target_flags |= MASK_SINGLE_PIC_BASE;
3472 
3473   /* If stack checking is disabled, we can use r10 as the PIC register,
3474      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
3475   if (flag_pic && TARGET_SINGLE_PIC_BASE)
3476     {
3477       if (TARGET_VXWORKS_RTP)
3478 	warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3479       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3480     }
3481 
3482   if (flag_pic && TARGET_VXWORKS_RTP)
3483     arm_pic_register = 9;
3484 
3485   if (arm_pic_register_string != NULL)
3486     {
3487       int pic_register = decode_reg_name (arm_pic_register_string);
3488 
3489       if (!flag_pic)
3490 	warning (0, "-mpic-register= is useless without -fpic");
3491 
3492       /* Prevent the user from choosing an obviously stupid PIC register.  */
3493       else if (pic_register < 0 || call_used_regs[pic_register]
3494 	       || pic_register == HARD_FRAME_POINTER_REGNUM
3495 	       || pic_register == STACK_POINTER_REGNUM
3496 	       || pic_register >= PC_REGNUM
3497 	       || (TARGET_VXWORKS_RTP
3498 		   && (unsigned int) pic_register != arm_pic_register))
3499 	error ("unable to use '%s' for PIC register", arm_pic_register_string);
3500       else
3501 	arm_pic_register = pic_register;
3502     }
3503 
3504   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
3505   if (fix_cm3_ldrd == 2)
3506     {
3507       if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3508 	fix_cm3_ldrd = 1;
3509       else
3510 	fix_cm3_ldrd = 0;
3511     }
3512 
3513   /* Hot/Cold partitioning is not currently supported, since we can't
3514      handle literal pool placement in that case.  */
3515   if (flag_reorder_blocks_and_partition)
3516     {
3517       inform (input_location,
3518 	      "-freorder-blocks-and-partition not supported on this architecture");
3519       flag_reorder_blocks_and_partition = 0;
3520       flag_reorder_blocks = 1;
3521     }
3522 
3523   if (flag_pic)
3524     /* Hoisting PIC address calculations more aggressively provides a small,
3525        but measurable, size reduction for PIC code.  Therefore, we decrease
3526        the bar for unrestricted expression hoisting to the cost of PIC address
3527        calculation, which is 2 instructions.  */
3528     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3529 			   global_options.x_param_values,
3530 			   global_options_set.x_param_values);
3531 
3532   /* ARM EABI defaults to strict volatile bitfields.  */
3533   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3534       && abi_version_at_least(2))
3535     flag_strict_volatile_bitfields = 1;
3536 
3537   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3538      have deemed it beneficial (signified by setting
3539      prefetch.num_slots to 1 or more).  */
3540   if (flag_prefetch_loop_arrays < 0
3541       && HAVE_prefetch
3542       && optimize >= 3
3543       && current_tune->prefetch.num_slots > 0)
3544     flag_prefetch_loop_arrays = 1;
3545 
3546   /* Set up parameters to be used in prefetching algorithm.  Do not
3547      override the defaults unless we are tuning for a core we have
3548      researched values for.  */
3549   if (current_tune->prefetch.num_slots > 0)
3550     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3551 			   current_tune->prefetch.num_slots,
3552 			   global_options.x_param_values,
3553 			   global_options_set.x_param_values);
3554   if (current_tune->prefetch.l1_cache_line_size >= 0)
3555     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3556 			   current_tune->prefetch.l1_cache_line_size,
3557 			   global_options.x_param_values,
3558 			   global_options_set.x_param_values);
3559   if (current_tune->prefetch.l1_cache_size >= 0)
3560     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3561 			   current_tune->prefetch.l1_cache_size,
3562 			   global_options.x_param_values,
3563 			   global_options_set.x_param_values);
3564 
3565   /* Use Neon to perform 64-bits operations rather than core
3566      registers.  */
3567   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3568   if (use_neon_for_64bits == 1)
3569      prefer_neon_for_64bits = true;
3570 
3571   /* Use the alternative scheduling-pressure algorithm by default.  */
3572   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3573 			 global_options.x_param_values,
3574 			 global_options_set.x_param_values);
3575 
3576   /* Look through ready list and all of queue for instructions
3577      relevant for L2 auto-prefetcher.  */
3578   int param_sched_autopref_queue_depth;
3579 
3580   switch (current_tune->sched_autopref)
3581     {
3582     case tune_params::SCHED_AUTOPREF_OFF:
3583       param_sched_autopref_queue_depth = -1;
3584       break;
3585 
3586     case tune_params::SCHED_AUTOPREF_RANK:
3587       param_sched_autopref_queue_depth = 0;
3588       break;
3589 
3590     case tune_params::SCHED_AUTOPREF_FULL:
3591       param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3592       break;
3593 
3594     default:
3595       gcc_unreachable ();
3596     }
3597 
3598   maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3599 			 param_sched_autopref_queue_depth,
3600 			 global_options.x_param_values,
3601 			 global_options_set.x_param_values);
3602 
3603   /* Currently, for slow flash data, we just disable literal pools.  We also
3604      disable it for pure-code.  */
3605   if (target_slow_flash_data || target_pure_code)
3606     arm_disable_literal_pool = true;
3607 
3608   /* Disable scheduling fusion by default if it's not armv7 processor
3609      or doesn't prefer ldrd/strd.  */
3610   if (flag_schedule_fusion == 2
3611       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3612     flag_schedule_fusion = 0;
3613 
3614   /* Need to remember initial options before they are overriden.  */
3615   init_optimize = build_optimization_node (&global_options);
3616 
3617   arm_options_perform_arch_sanity_checks ();
3618   arm_option_override_internal (&global_options, &global_options_set);
3619   arm_option_check_internal (&global_options);
3620   arm_option_params_internal ();
3621 
3622   /* Create the default target_options structure.  */
3623   target_option_default_node = target_option_current_node
3624     = build_target_option_node (&global_options);
3625 
3626   /* Register global variables with the garbage collector.  */
3627   arm_add_gc_roots ();
3628 
3629   /* Init initial mode for testing.  */
3630   thumb_flipper = TARGET_THUMB;
3631 }
3632 
3633 
3634 /* Reconfigure global status flags from the active_target.isa.  */
3635 void
arm_option_reconfigure_globals(void)3636 arm_option_reconfigure_globals (void)
3637 {
3638   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3639   arm_base_arch = arm_active_target.base_arch;
3640 
3641   /* Initialize boolean versions of the architectural flags, for use
3642      in the arm.md file.  */
3643   arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3644   arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3645   arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3646   arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3647   arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3648   arm_arch5te = arm_arch5e
3649     && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3650   arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3651   arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3652   arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3653   arm_arch6m = arm_arch6 && !arm_arch_notm;
3654   arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3655   arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3656   arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3657   arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3658   arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3659   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3660   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3661   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3662   arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3663   arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3664   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3665   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3666   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3667   arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3668   arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3669   arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3670   if (arm_fp16_inst)
3671     {
3672       if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3673 	error ("selected fp16 options are incompatible");
3674       arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3675     }
3676 
3677   /* And finally, set up some quirks.  */
3678   arm_arch_no_volatile_ce
3679     = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3680   arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3681 					    isa_bit_quirk_armv6kz);
3682 
3683   /* Use the cp15 method if it is available.  */
3684   if (target_thread_pointer == TP_AUTO)
3685     {
3686       if (arm_arch6k && !TARGET_THUMB1)
3687 	target_thread_pointer = TP_CP15;
3688       else
3689 	target_thread_pointer = TP_SOFT;
3690     }
3691 }
3692 
3693 /* Perform some validation between the desired architecture and the rest of the
3694    options.  */
3695 void
arm_options_perform_arch_sanity_checks(void)3696 arm_options_perform_arch_sanity_checks (void)
3697 {
3698   /* V5 code we generate is completely interworking capable, so we turn off
3699      TARGET_INTERWORK here to avoid many tests later on.  */
3700 
3701   /* XXX However, we must pass the right pre-processor defines to CPP
3702      or GLD can get confused.  This is a hack.  */
3703   if (TARGET_INTERWORK)
3704     arm_cpp_interwork = 1;
3705 
3706   if (arm_arch5)
3707     target_flags &= ~MASK_INTERWORK;
3708 
3709   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3710     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3711 
3712   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3713     error ("iwmmxt abi requires an iwmmxt capable cpu");
3714 
3715   /* BPABI targets use linker tricks to allow interworking on cores
3716      without thumb support.  */
3717   if (TARGET_INTERWORK
3718       && !TARGET_BPABI
3719       && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3720     {
3721       warning (0, "target CPU does not support interworking" );
3722       target_flags &= ~MASK_INTERWORK;
3723     }
3724 
3725   /* If soft-float is specified then don't use FPU.  */
3726   if (TARGET_SOFT_FLOAT)
3727     arm_fpu_attr = FPU_NONE;
3728   else
3729     arm_fpu_attr = FPU_VFP;
3730 
3731   if (TARGET_AAPCS_BASED)
3732     {
3733       if (TARGET_CALLER_INTERWORKING)
3734 	error ("AAPCS does not support -mcaller-super-interworking");
3735       else
3736 	if (TARGET_CALLEE_INTERWORKING)
3737 	  error ("AAPCS does not support -mcallee-super-interworking");
3738     }
3739 
3740   /* __fp16 support currently assumes the core has ldrh.  */
3741   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3742     sorry ("__fp16 and no ldrh");
3743 
3744   if (use_cmse && !arm_arch_cmse)
3745     error ("target CPU does not support ARMv8-M Security Extensions");
3746 
3747   /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3748      and ARMv8-M Baseline and Mainline do not allow such configuration.  */
3749   if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3750     error ("ARMv8-M Security Extensions incompatible with selected FPU");
3751 
3752 
3753   if (TARGET_AAPCS_BASED)
3754     {
3755       if (arm_abi == ARM_ABI_IWMMXT)
3756 	arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3757       else if (TARGET_HARD_FLOAT_ABI)
3758 	{
3759 	  arm_pcs_default = ARM_PCS_AAPCS_VFP;
3760 	  if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3761 	    error ("-mfloat-abi=hard: selected processor lacks an FPU");
3762 	}
3763       else
3764 	arm_pcs_default = ARM_PCS_AAPCS;
3765     }
3766   else
3767     {
3768       if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3769 	sorry ("-mfloat-abi=hard and VFP");
3770 
3771       if (arm_abi == ARM_ABI_APCS)
3772 	arm_pcs_default = ARM_PCS_APCS;
3773       else
3774 	arm_pcs_default = ARM_PCS_ATPCS;
3775     }
3776 }
3777 
3778 static void
arm_add_gc_roots(void)3779 arm_add_gc_roots (void)
3780 {
3781   gcc_obstack_init(&minipool_obstack);
3782   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3783 }
3784 
3785 /* A table of known ARM exception types.
3786    For use with the interrupt function attribute.  */
3787 
3788 typedef struct
3789 {
3790   const char *const arg;
3791   const unsigned long return_value;
3792 }
3793 isr_attribute_arg;
3794 
3795 static const isr_attribute_arg isr_attribute_args [] =
3796 {
3797   { "IRQ",   ARM_FT_ISR },
3798   { "irq",   ARM_FT_ISR },
3799   { "FIQ",   ARM_FT_FIQ },
3800   { "fiq",   ARM_FT_FIQ },
3801   { "ABORT", ARM_FT_ISR },
3802   { "abort", ARM_FT_ISR },
3803   { "ABORT", ARM_FT_ISR },
3804   { "abort", ARM_FT_ISR },
3805   { "UNDEF", ARM_FT_EXCEPTION },
3806   { "undef", ARM_FT_EXCEPTION },
3807   { "SWI",   ARM_FT_EXCEPTION },
3808   { "swi",   ARM_FT_EXCEPTION },
3809   { NULL,    ARM_FT_NORMAL }
3810 };
3811 
3812 /* Returns the (interrupt) function type of the current
3813    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3814 
3815 static unsigned long
arm_isr_value(tree argument)3816 arm_isr_value (tree argument)
3817 {
3818   const isr_attribute_arg * ptr;
3819   const char *              arg;
3820 
3821   if (!arm_arch_notm)
3822     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3823 
3824   /* No argument - default to IRQ.  */
3825   if (argument == NULL_TREE)
3826     return ARM_FT_ISR;
3827 
3828   /* Get the value of the argument.  */
3829   if (TREE_VALUE (argument) == NULL_TREE
3830       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3831     return ARM_FT_UNKNOWN;
3832 
3833   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3834 
3835   /* Check it against the list of known arguments.  */
3836   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3837     if (streq (arg, ptr->arg))
3838       return ptr->return_value;
3839 
3840   /* An unrecognized interrupt type.  */
3841   return ARM_FT_UNKNOWN;
3842 }
3843 
3844 /* Computes the type of the current function.  */
3845 
3846 static unsigned long
arm_compute_func_type(void)3847 arm_compute_func_type (void)
3848 {
3849   unsigned long type = ARM_FT_UNKNOWN;
3850   tree a;
3851   tree attr;
3852 
3853   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3854 
3855   /* Decide if the current function is volatile.  Such functions
3856      never return, and many memory cycles can be saved by not storing
3857      register values that will never be needed again.  This optimization
3858      was added to speed up context switching in a kernel application.  */
3859   if (optimize > 0
3860       && (TREE_NOTHROW (current_function_decl)
3861           || !(flag_unwind_tables
3862                || (flag_exceptions
3863 		   && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3864       && TREE_THIS_VOLATILE (current_function_decl))
3865     type |= ARM_FT_VOLATILE;
3866 
3867   if (cfun->static_chain_decl != NULL)
3868     type |= ARM_FT_NESTED;
3869 
3870   attr = DECL_ATTRIBUTES (current_function_decl);
3871 
3872   a = lookup_attribute ("naked", attr);
3873   if (a != NULL_TREE)
3874     type |= ARM_FT_NAKED;
3875 
3876   a = lookup_attribute ("isr", attr);
3877   if (a == NULL_TREE)
3878     a = lookup_attribute ("interrupt", attr);
3879 
3880   if (a == NULL_TREE)
3881     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3882   else
3883     type |= arm_isr_value (TREE_VALUE (a));
3884 
3885   if (lookup_attribute ("cmse_nonsecure_entry", attr))
3886     type |= ARM_FT_CMSE_ENTRY;
3887 
3888   return type;
3889 }
3890 
3891 /* Returns the type of the current function.  */
3892 
3893 unsigned long
arm_current_func_type(void)3894 arm_current_func_type (void)
3895 {
3896   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3897     cfun->machine->func_type = arm_compute_func_type ();
3898 
3899   return cfun->machine->func_type;
3900 }
3901 
3902 bool
arm_allocate_stack_slots_for_args(void)3903 arm_allocate_stack_slots_for_args (void)
3904 {
3905   /* Naked functions should not allocate stack slots for arguments.  */
3906   return !IS_NAKED (arm_current_func_type ());
3907 }
3908 
3909 static bool
arm_warn_func_return(tree decl)3910 arm_warn_func_return (tree decl)
3911 {
3912   /* Naked functions are implemented entirely in assembly, including the
3913      return sequence, so suppress warnings about this.  */
3914   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3915 }
3916 
3917 
3918 /* Output assembler code for a block containing the constant parts
3919    of a trampoline, leaving space for the variable parts.
3920 
3921    On the ARM, (if r8 is the static chain regnum, and remembering that
3922    referencing pc adds an offset of 8) the trampoline looks like:
3923 	   ldr 		r8, [pc, #0]
3924 	   ldr		pc, [pc]
3925 	   .word	static chain value
3926 	   .word	function's address
3927    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3928 
3929 static void
arm_asm_trampoline_template(FILE * f)3930 arm_asm_trampoline_template (FILE *f)
3931 {
3932   fprintf (f, "\t.syntax unified\n");
3933 
3934   if (TARGET_ARM)
3935     {
3936       fprintf (f, "\t.arm\n");
3937       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3938       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3939     }
3940   else if (TARGET_THUMB2)
3941     {
3942       fprintf (f, "\t.thumb\n");
3943       /* The Thumb-2 trampoline is similar to the arm implementation.
3944 	 Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3945       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3946 		   STATIC_CHAIN_REGNUM, PC_REGNUM);
3947       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3948     }
3949   else
3950     {
3951       ASM_OUTPUT_ALIGN (f, 2);
3952       fprintf (f, "\t.code\t16\n");
3953       fprintf (f, ".Ltrampoline_start:\n");
3954       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3955       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3956       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3957       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3958       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3959       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3960     }
3961   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3962   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3963 }
3964 
3965 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3966 
3967 static void
arm_trampoline_init(rtx m_tramp,tree fndecl,rtx chain_value)3968 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3969 {
3970   rtx fnaddr, mem, a_tramp;
3971 
3972   emit_block_move (m_tramp, assemble_trampoline_template (),
3973 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3974 
3975   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3976   emit_move_insn (mem, chain_value);
3977 
3978   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3979   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3980   emit_move_insn (mem, fnaddr);
3981 
3982   a_tramp = XEXP (m_tramp, 0);
3983   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3984 		     LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3985 		     plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3986 }
3987 
3988 /* Thumb trampolines should be entered in thumb mode, so set
3989    the bottom bit of the address.  */
3990 
3991 static rtx
arm_trampoline_adjust_address(rtx addr)3992 arm_trampoline_adjust_address (rtx addr)
3993 {
3994   if (TARGET_THUMB)
3995     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3996 				NULL, 0, OPTAB_LIB_WIDEN);
3997   return addr;
3998 }
3999 
4000 /* Return 1 if it is possible to return using a single instruction.
4001    If SIBLING is non-null, this is a test for a return before a sibling
4002    call.  SIBLING is the call insn, so we can examine its register usage.  */
4003 
4004 int
use_return_insn(int iscond,rtx sibling)4005 use_return_insn (int iscond, rtx sibling)
4006 {
4007   int regno;
4008   unsigned int func_type;
4009   unsigned long saved_int_regs;
4010   unsigned HOST_WIDE_INT stack_adjust;
4011   arm_stack_offsets *offsets;
4012 
4013   /* Never use a return instruction before reload has run.  */
4014   if (!reload_completed)
4015     return 0;
4016 
4017   func_type = arm_current_func_type ();
4018 
4019   /* Naked, volatile and stack alignment functions need special
4020      consideration.  */
4021   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4022     return 0;
4023 
4024   /* So do interrupt functions that use the frame pointer and Thumb
4025      interrupt functions.  */
4026   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4027     return 0;
4028 
4029   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4030       && !optimize_function_for_size_p (cfun))
4031     return 0;
4032 
4033   offsets = arm_get_frame_offsets ();
4034   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4035 
4036   /* As do variadic functions.  */
4037   if (crtl->args.pretend_args_size
4038       || cfun->machine->uses_anonymous_args
4039       /* Or if the function calls __builtin_eh_return () */
4040       || crtl->calls_eh_return
4041       /* Or if the function calls alloca */
4042       || cfun->calls_alloca
4043       /* Or if there is a stack adjustment.  However, if the stack pointer
4044 	 is saved on the stack, we can use a pre-incrementing stack load.  */
4045       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4046 				 && stack_adjust == 4))
4047       /* Or if the static chain register was saved above the frame, under the
4048 	 assumption that the stack pointer isn't saved on the stack.  */
4049       || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4050           && arm_compute_static_chain_stack_bytes() != 0))
4051     return 0;
4052 
4053   saved_int_regs = offsets->saved_regs_mask;
4054 
4055   /* Unfortunately, the insn
4056 
4057        ldmib sp, {..., sp, ...}
4058 
4059      triggers a bug on most SA-110 based devices, such that the stack
4060      pointer won't be correctly restored if the instruction takes a
4061      page fault.  We work around this problem by popping r3 along with
4062      the other registers, since that is never slower than executing
4063      another instruction.
4064 
4065      We test for !arm_arch5 here, because code for any architecture
4066      less than this could potentially be run on one of the buggy
4067      chips.  */
4068   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
4069     {
4070       /* Validate that r3 is a call-clobbered register (always true in
4071 	 the default abi) ...  */
4072       if (!call_used_regs[3])
4073 	return 0;
4074 
4075       /* ... that it isn't being used for a return value ... */
4076       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4077 	return 0;
4078 
4079       /* ... or for a tail-call argument ...  */
4080       if (sibling)
4081 	{
4082 	  gcc_assert (CALL_P (sibling));
4083 
4084 	  if (find_regno_fusage (sibling, USE, 3))
4085 	    return 0;
4086 	}
4087 
4088       /* ... and that there are no call-saved registers in r0-r2
4089 	 (always true in the default ABI).  */
4090       if (saved_int_regs & 0x7)
4091 	return 0;
4092     }
4093 
4094   /* Can't be done if interworking with Thumb, and any registers have been
4095      stacked.  */
4096   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4097     return 0;
4098 
4099   /* On StrongARM, conditional returns are expensive if they aren't
4100      taken and multiple registers have been stacked.  */
4101   if (iscond && arm_tune_strongarm)
4102     {
4103       /* Conditional return when just the LR is stored is a simple
4104 	 conditional-load instruction, that's not expensive.  */
4105       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4106 	return 0;
4107 
4108       if (flag_pic
4109 	  && arm_pic_register != INVALID_REGNUM
4110 	  && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4111 	return 0;
4112     }
4113 
4114   /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4115      several instructions if anything needs to be popped.  */
4116   if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4117     return 0;
4118 
4119   /* If there are saved registers but the LR isn't saved, then we need
4120      two instructions for the return.  */
4121   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4122     return 0;
4123 
4124   /* Can't be done if any of the VFP regs are pushed,
4125      since this also requires an insn.  */
4126   if (TARGET_HARD_FLOAT)
4127     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4128       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4129 	return 0;
4130 
4131   if (TARGET_REALLY_IWMMXT)
4132     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4133       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4134 	return 0;
4135 
4136   return 1;
4137 }
4138 
4139 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4140    shrink-wrapping if possible.  This is the case if we need to emit a
4141    prologue, which we can test by looking at the offsets.  */
4142 bool
use_simple_return_p(void)4143 use_simple_return_p (void)
4144 {
4145   arm_stack_offsets *offsets;
4146 
4147   /* Note this function can be called before or after reload.  */
4148   if (!reload_completed)
4149     arm_compute_frame_layout ();
4150 
4151   offsets = arm_get_frame_offsets ();
4152   return offsets->outgoing_args != 0;
4153 }
4154 
4155 /* Return TRUE if int I is a valid immediate ARM constant.  */
4156 
4157 int
const_ok_for_arm(HOST_WIDE_INT i)4158 const_ok_for_arm (HOST_WIDE_INT i)
4159 {
4160   int lowbit;
4161 
4162   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4163      be all zero, or all one.  */
4164   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4165       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4166 	  != ((~(unsigned HOST_WIDE_INT) 0)
4167 	      & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4168     return FALSE;
4169 
4170   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4171 
4172   /* Fast return for 0 and small values.  We must do this for zero, since
4173      the code below can't handle that one case.  */
4174   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4175     return TRUE;
4176 
4177   /* Get the number of trailing zeros.  */
4178   lowbit = ffs((int) i) - 1;
4179 
4180   /* Only even shifts are allowed in ARM mode so round down to the
4181      nearest even number.  */
4182   if (TARGET_ARM)
4183     lowbit &= ~1;
4184 
4185   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4186     return TRUE;
4187 
4188   if (TARGET_ARM)
4189     {
4190       /* Allow rotated constants in ARM mode.  */
4191       if (lowbit <= 4
4192 	   && ((i & ~0xc000003f) == 0
4193 	       || (i & ~0xf000000f) == 0
4194 	       || (i & ~0xfc000003) == 0))
4195 	return TRUE;
4196     }
4197   else if (TARGET_THUMB2)
4198     {
4199       HOST_WIDE_INT v;
4200 
4201       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
4202       v = i & 0xff;
4203       v |= v << 16;
4204       if (i == v || i == (v | (v << 8)))
4205 	return TRUE;
4206 
4207       /* Allow repeated pattern 0xXY00XY00.  */
4208       v = i & 0xff00;
4209       v |= v << 16;
4210       if (i == v)
4211 	return TRUE;
4212     }
4213   else if (TARGET_HAVE_MOVT)
4214     {
4215       /* Thumb-1 Targets with MOVT.  */
4216       if (i > 0xffff)
4217 	return FALSE;
4218       else
4219 	return TRUE;
4220     }
4221 
4222   return FALSE;
4223 }
4224 
4225 /* Return true if I is a valid constant for the operation CODE.  */
4226 int
const_ok_for_op(HOST_WIDE_INT i,enum rtx_code code)4227 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4228 {
4229   if (const_ok_for_arm (i))
4230     return 1;
4231 
4232   switch (code)
4233     {
4234     case SET:
4235       /* See if we can use movw.  */
4236       if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4237 	return 1;
4238       else
4239 	/* Otherwise, try mvn.  */
4240 	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4241 
4242     case PLUS:
4243       /* See if we can use addw or subw.  */
4244       if (TARGET_THUMB2
4245 	  && ((i & 0xfffff000) == 0
4246 	      || ((-i) & 0xfffff000) == 0))
4247 	return 1;
4248       /* Fall through.  */
4249     case COMPARE:
4250     case EQ:
4251     case NE:
4252     case GT:
4253     case LE:
4254     case LT:
4255     case GE:
4256     case GEU:
4257     case LTU:
4258     case GTU:
4259     case LEU:
4260     case UNORDERED:
4261     case ORDERED:
4262     case UNEQ:
4263     case UNGE:
4264     case UNLT:
4265     case UNGT:
4266     case UNLE:
4267       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4268 
4269     case MINUS:		/* Should only occur with (MINUS I reg) => rsb */
4270     case XOR:
4271       return 0;
4272 
4273     case IOR:
4274       if (TARGET_THUMB2)
4275 	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4276       return 0;
4277 
4278     case AND:
4279       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4280 
4281     default:
4282       gcc_unreachable ();
4283     }
4284 }
4285 
4286 /* Return true if I is a valid di mode constant for the operation CODE.  */
4287 int
const_ok_for_dimode_op(HOST_WIDE_INT i,enum rtx_code code)4288 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4289 {
4290   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4291   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4292   rtx hi = GEN_INT (hi_val);
4293   rtx lo = GEN_INT (lo_val);
4294 
4295   if (TARGET_THUMB1)
4296     return 0;
4297 
4298   switch (code)
4299     {
4300     case AND:
4301     case IOR:
4302     case XOR:
4303       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4304               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4305     case PLUS:
4306       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4307 
4308     default:
4309       return 0;
4310     }
4311 }
4312 
4313 /* Emit a sequence of insns to handle a large constant.
4314    CODE is the code of the operation required, it can be any of SET, PLUS,
4315    IOR, AND, XOR, MINUS;
4316    MODE is the mode in which the operation is being performed;
4317    VAL is the integer to operate on;
4318    SOURCE is the other operand (a register, or a null-pointer for SET);
4319    SUBTARGETS means it is safe to create scratch registers if that will
4320    either produce a simpler sequence, or we will want to cse the values.
4321    Return value is the number of insns emitted.  */
4322 
4323 /* ??? Tweak this for thumb2.  */
4324 int
arm_split_constant(enum rtx_code code,machine_mode mode,rtx insn,HOST_WIDE_INT val,rtx target,rtx source,int subtargets)4325 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4326 		    HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4327 {
4328   rtx cond;
4329 
4330   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4331     cond = COND_EXEC_TEST (PATTERN (insn));
4332   else
4333     cond = NULL_RTX;
4334 
4335   if (subtargets || code == SET
4336       || (REG_P (target) && REG_P (source)
4337 	  && REGNO (target) != REGNO (source)))
4338     {
4339       /* After arm_reorg has been called, we can't fix up expensive
4340 	 constants by pushing them into memory so we must synthesize
4341 	 them in-line, regardless of the cost.  This is only likely to
4342 	 be more costly on chips that have load delay slots and we are
4343 	 compiling without running the scheduler (so no splitting
4344 	 occurred before the final instruction emission).
4345 
4346 	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4347       */
4348       if (!cfun->machine->after_arm_reorg
4349 	  && !cond
4350 	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4351 				1, 0)
4352 	      > (arm_constant_limit (optimize_function_for_size_p (cfun))
4353 		 + (code != SET))))
4354 	{
4355 	  if (code == SET)
4356 	    {
4357 	      /* Currently SET is the only monadic value for CODE, all
4358 		 the rest are diadic.  */
4359 	      if (TARGET_USE_MOVT)
4360 		arm_emit_movpair (target, GEN_INT (val));
4361 	      else
4362 		emit_set_insn (target, GEN_INT (val));
4363 
4364 	      return 1;
4365 	    }
4366 	  else
4367 	    {
4368 	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4369 
4370 	      if (TARGET_USE_MOVT)
4371 		arm_emit_movpair (temp, GEN_INT (val));
4372 	      else
4373 		emit_set_insn (temp, GEN_INT (val));
4374 
4375 	      /* For MINUS, the value is subtracted from, since we never
4376 		 have subtraction of a constant.  */
4377 	      if (code == MINUS)
4378 		emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4379 	      else
4380 		emit_set_insn (target,
4381 			       gen_rtx_fmt_ee (code, mode, source, temp));
4382 	      return 2;
4383 	    }
4384 	}
4385     }
4386 
4387   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4388 			   1);
4389 }
4390 
4391 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4392    ARM/THUMB2 immediates, and add up to VAL.
4393    Thr function return value gives the number of insns required.  */
4394 static int
optimal_immediate_sequence(enum rtx_code code,unsigned HOST_WIDE_INT val,struct four_ints * return_sequence)4395 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4396 			    struct four_ints *return_sequence)
4397 {
4398   int best_consecutive_zeros = 0;
4399   int i;
4400   int best_start = 0;
4401   int insns1, insns2;
4402   struct four_ints tmp_sequence;
4403 
4404   /* If we aren't targeting ARM, the best place to start is always at
4405      the bottom, otherwise look more closely.  */
4406   if (TARGET_ARM)
4407     {
4408       for (i = 0; i < 32; i += 2)
4409 	{
4410 	  int consecutive_zeros = 0;
4411 
4412 	  if (!(val & (3 << i)))
4413 	    {
4414 	      while ((i < 32) && !(val & (3 << i)))
4415 		{
4416 		  consecutive_zeros += 2;
4417 		  i += 2;
4418 		}
4419 	      if (consecutive_zeros > best_consecutive_zeros)
4420 		{
4421 		  best_consecutive_zeros = consecutive_zeros;
4422 		  best_start = i - consecutive_zeros;
4423 		}
4424 	      i -= 2;
4425 	    }
4426 	}
4427     }
4428 
4429   /* So long as it won't require any more insns to do so, it's
4430      desirable to emit a small constant (in bits 0...9) in the last
4431      insn.  This way there is more chance that it can be combined with
4432      a later addressing insn to form a pre-indexed load or store
4433      operation.  Consider:
4434 
4435 	   *((volatile int *)0xe0000100) = 1;
4436 	   *((volatile int *)0xe0000110) = 2;
4437 
4438      We want this to wind up as:
4439 
4440 	    mov rA, #0xe0000000
4441 	    mov rB, #1
4442 	    str rB, [rA, #0x100]
4443 	    mov rB, #2
4444 	    str rB, [rA, #0x110]
4445 
4446      rather than having to synthesize both large constants from scratch.
4447 
4448      Therefore, we calculate how many insns would be required to emit
4449      the constant starting from `best_start', and also starting from
4450      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
4451      yield a shorter sequence, we may as well use zero.  */
4452   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4453   if (best_start != 0
4454       && ((HOST_WIDE_INT_1U << best_start) < val))
4455     {
4456       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4457       if (insns2 <= insns1)
4458 	{
4459 	  *return_sequence = tmp_sequence;
4460 	  insns1 = insns2;
4461 	}
4462     }
4463 
4464   return insns1;
4465 }
4466 
4467 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
4468 static int
optimal_immediate_sequence_1(enum rtx_code code,unsigned HOST_WIDE_INT val,struct four_ints * return_sequence,int i)4469 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4470 			     struct four_ints *return_sequence, int i)
4471 {
4472   int remainder = val & 0xffffffff;
4473   int insns = 0;
4474 
4475   /* Try and find a way of doing the job in either two or three
4476      instructions.
4477 
4478      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4479      location.  We start at position I.  This may be the MSB, or
4480      optimial_immediate_sequence may have positioned it at the largest block
4481      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4482      wrapping around to the top of the word when we drop off the bottom.
4483      In the worst case this code should produce no more than four insns.
4484 
4485      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4486      constants, shifted to any arbitrary location.  We should always start
4487      at the MSB.  */
4488   do
4489     {
4490       int end;
4491       unsigned int b1, b2, b3, b4;
4492       unsigned HOST_WIDE_INT result;
4493       int loc;
4494 
4495       gcc_assert (insns < 4);
4496 
4497       if (i <= 0)
4498 	i += 32;
4499 
4500       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
4501       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4502 	{
4503 	  loc = i;
4504 	  if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4505 	    /* We can use addw/subw for the last 12 bits.  */
4506 	    result = remainder;
4507 	  else
4508 	    {
4509 	      /* Use an 8-bit shifted/rotated immediate.  */
4510 	      end = i - 8;
4511 	      if (end < 0)
4512 		end += 32;
4513 	      result = remainder & ((0x0ff << end)
4514 				   | ((i < end) ? (0xff >> (32 - end))
4515 						: 0));
4516 	      i -= 8;
4517 	    }
4518 	}
4519       else
4520 	{
4521 	  /* Arm allows rotates by a multiple of two. Thumb-2 allows
4522 	     arbitrary shifts.  */
4523 	  i -= TARGET_ARM ? 2 : 1;
4524 	  continue;
4525 	}
4526 
4527       /* Next, see if we can do a better job with a thumb2 replicated
4528 	 constant.
4529 
4530          We do it this way around to catch the cases like 0x01F001E0 where
4531 	 two 8-bit immediates would work, but a replicated constant would
4532 	 make it worse.
4533 
4534          TODO: 16-bit constants that don't clear all the bits, but still win.
4535          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
4536       if (TARGET_THUMB2)
4537 	{
4538 	  b1 = (remainder & 0xff000000) >> 24;
4539 	  b2 = (remainder & 0x00ff0000) >> 16;
4540 	  b3 = (remainder & 0x0000ff00) >> 8;
4541 	  b4 = remainder & 0xff;
4542 
4543 	  if (loc > 24)
4544 	    {
4545 	      /* The 8-bit immediate already found clears b1 (and maybe b2),
4546 		 but must leave b3 and b4 alone.  */
4547 
4548 	      /* First try to find a 32-bit replicated constant that clears
4549 		 almost everything.  We can assume that we can't do it in one,
4550 		 or else we wouldn't be here.  */
4551 	      unsigned int tmp = b1 & b2 & b3 & b4;
4552 	      unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4553 				  + (tmp << 24);
4554 	      unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4555 					    + (tmp == b3) + (tmp == b4);
4556 	      if (tmp
4557 		  && (matching_bytes >= 3
4558 		      || (matching_bytes == 2
4559 			  && const_ok_for_op (remainder & ~tmp2, code))))
4560 		{
4561 		  /* At least 3 of the bytes match, and the fourth has at
4562 		     least as many bits set, or two of the bytes match
4563 		     and it will only require one more insn to finish.  */
4564 		  result = tmp2;
4565 		  i = tmp != b1 ? 32
4566 		      : tmp != b2 ? 24
4567 		      : tmp != b3 ? 16
4568 		      : 8;
4569 		}
4570 
4571 	      /* Second, try to find a 16-bit replicated constant that can
4572 		 leave three of the bytes clear.  If b2 or b4 is already
4573 		 zero, then we can.  If the 8-bit from above would not
4574 		 clear b2 anyway, then we still win.  */
4575 	      else if (b1 == b3 && (!b2 || !b4
4576 			       || (remainder & 0x00ff0000 & ~result)))
4577 		{
4578 		  result = remainder & 0xff00ff00;
4579 		  i = 24;
4580 		}
4581 	    }
4582 	  else if (loc > 16)
4583 	    {
4584 	      /* The 8-bit immediate already found clears b2 (and maybe b3)
4585 		 and we don't get here unless b1 is alredy clear, but it will
4586 		 leave b4 unchanged.  */
4587 
4588 	      /* If we can clear b2 and b4 at once, then we win, since the
4589 		 8-bits couldn't possibly reach that far.  */
4590 	      if (b2 == b4)
4591 		{
4592 		  result = remainder & 0x00ff00ff;
4593 		  i = 16;
4594 		}
4595 	    }
4596 	}
4597 
4598       return_sequence->i[insns++] = result;
4599       remainder &= ~result;
4600 
4601       if (code == SET || code == MINUS)
4602 	code = PLUS;
4603     }
4604   while (remainder);
4605 
4606   return insns;
4607 }
4608 
4609 /* Emit an instruction with the indicated PATTERN.  If COND is
4610    non-NULL, conditionalize the execution of the instruction on COND
4611    being true.  */
4612 
4613 static void
emit_constant_insn(rtx cond,rtx pattern)4614 emit_constant_insn (rtx cond, rtx pattern)
4615 {
4616   if (cond)
4617     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4618   emit_insn (pattern);
4619 }
4620 
4621 /* As above, but extra parameter GENERATE which, if clear, suppresses
4622    RTL generation.  */
4623 
4624 static int
arm_gen_constant(enum rtx_code code,machine_mode mode,rtx cond,unsigned HOST_WIDE_INT val,rtx target,rtx source,int subtargets,int generate)4625 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4626 		  unsigned HOST_WIDE_INT val, rtx target, rtx source,
4627 		  int subtargets, int generate)
4628 {
4629   int can_invert = 0;
4630   int can_negate = 0;
4631   int final_invert = 0;
4632   int i;
4633   int set_sign_bit_copies = 0;
4634   int clear_sign_bit_copies = 0;
4635   int clear_zero_bit_copies = 0;
4636   int set_zero_bit_copies = 0;
4637   int insns = 0, neg_insns, inv_insns;
4638   unsigned HOST_WIDE_INT temp1, temp2;
4639   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4640   struct four_ints *immediates;
4641   struct four_ints pos_immediates, neg_immediates, inv_immediates;
4642 
4643   /* Find out which operations are safe for a given CODE.  Also do a quick
4644      check for degenerate cases; these can occur when DImode operations
4645      are split.  */
4646   switch (code)
4647     {
4648     case SET:
4649       can_invert = 1;
4650       break;
4651 
4652     case PLUS:
4653       can_negate = 1;
4654       break;
4655 
4656     case IOR:
4657       if (remainder == 0xffffffff)
4658 	{
4659 	  if (generate)
4660 	    emit_constant_insn (cond,
4661 				gen_rtx_SET (target,
4662 					     GEN_INT (ARM_SIGN_EXTEND (val))));
4663 	  return 1;
4664 	}
4665 
4666       if (remainder == 0)
4667 	{
4668 	  if (reload_completed && rtx_equal_p (target, source))
4669 	    return 0;
4670 
4671 	  if (generate)
4672 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4673 	  return 1;
4674 	}
4675       break;
4676 
4677     case AND:
4678       if (remainder == 0)
4679 	{
4680 	  if (generate)
4681 	    emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4682 	  return 1;
4683 	}
4684       if (remainder == 0xffffffff)
4685 	{
4686 	  if (reload_completed && rtx_equal_p (target, source))
4687 	    return 0;
4688 	  if (generate)
4689 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4690 	  return 1;
4691 	}
4692       can_invert = 1;
4693       break;
4694 
4695     case XOR:
4696       if (remainder == 0)
4697 	{
4698 	  if (reload_completed && rtx_equal_p (target, source))
4699 	    return 0;
4700 	  if (generate)
4701 	    emit_constant_insn (cond, gen_rtx_SET (target, source));
4702 	  return 1;
4703 	}
4704 
4705       if (remainder == 0xffffffff)
4706 	{
4707 	  if (generate)
4708 	    emit_constant_insn (cond,
4709 				gen_rtx_SET (target,
4710 					     gen_rtx_NOT (mode, source)));
4711 	  return 1;
4712 	}
4713       final_invert = 1;
4714       break;
4715 
4716     case MINUS:
4717       /* We treat MINUS as (val - source), since (source - val) is always
4718 	 passed as (source + (-val)).  */
4719       if (remainder == 0)
4720 	{
4721 	  if (generate)
4722 	    emit_constant_insn (cond,
4723 				gen_rtx_SET (target,
4724 					     gen_rtx_NEG (mode, source)));
4725 	  return 1;
4726 	}
4727       if (const_ok_for_arm (val))
4728 	{
4729 	  if (generate)
4730 	    emit_constant_insn (cond,
4731 				gen_rtx_SET (target,
4732 					     gen_rtx_MINUS (mode, GEN_INT (val),
4733 							    source)));
4734 	  return 1;
4735 	}
4736 
4737       break;
4738 
4739     default:
4740       gcc_unreachable ();
4741     }
4742 
4743   /* If we can do it in one insn get out quickly.  */
4744   if (const_ok_for_op (val, code))
4745     {
4746       if (generate)
4747 	emit_constant_insn (cond,
4748 			    gen_rtx_SET (target,
4749 					 (source
4750 					  ? gen_rtx_fmt_ee (code, mode, source,
4751 							    GEN_INT (val))
4752 					  : GEN_INT (val))));
4753       return 1;
4754     }
4755 
4756   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4757      insn.  */
4758   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4759       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4760     {
4761       if (generate)
4762 	{
4763 	  if (mode == SImode && i == 16)
4764 	    /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4765 	       smaller insn.  */
4766 	    emit_constant_insn (cond,
4767 				gen_zero_extendhisi2
4768 				(target, gen_lowpart (HImode, source)));
4769 	  else
4770 	    /* Extz only supports SImode, but we can coerce the operands
4771 	       into that mode.  */
4772 	    emit_constant_insn (cond,
4773 				gen_extzv_t2 (gen_lowpart (SImode, target),
4774 					      gen_lowpart (SImode, source),
4775 					      GEN_INT (i), const0_rtx));
4776 	}
4777 
4778       return 1;
4779     }
4780 
4781   /* Calculate a few attributes that may be useful for specific
4782      optimizations.  */
4783   /* Count number of leading zeros.  */
4784   for (i = 31; i >= 0; i--)
4785     {
4786       if ((remainder & (1 << i)) == 0)
4787 	clear_sign_bit_copies++;
4788       else
4789 	break;
4790     }
4791 
4792   /* Count number of leading 1's.  */
4793   for (i = 31; i >= 0; i--)
4794     {
4795       if ((remainder & (1 << i)) != 0)
4796 	set_sign_bit_copies++;
4797       else
4798 	break;
4799     }
4800 
4801   /* Count number of trailing zero's.  */
4802   for (i = 0; i <= 31; i++)
4803     {
4804       if ((remainder & (1 << i)) == 0)
4805 	clear_zero_bit_copies++;
4806       else
4807 	break;
4808     }
4809 
4810   /* Count number of trailing 1's.  */
4811   for (i = 0; i <= 31; i++)
4812     {
4813       if ((remainder & (1 << i)) != 0)
4814 	set_zero_bit_copies++;
4815       else
4816 	break;
4817     }
4818 
4819   switch (code)
4820     {
4821     case SET:
4822       /* See if we can do this by sign_extending a constant that is known
4823 	 to be negative.  This is a good, way of doing it, since the shift
4824 	 may well merge into a subsequent insn.  */
4825       if (set_sign_bit_copies > 1)
4826 	{
4827 	  if (const_ok_for_arm
4828 	      (temp1 = ARM_SIGN_EXTEND (remainder
4829 					<< (set_sign_bit_copies - 1))))
4830 	    {
4831 	      if (generate)
4832 		{
4833 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4834 		  emit_constant_insn (cond,
4835 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
4836 		  emit_constant_insn (cond,
4837 				      gen_ashrsi3 (target, new_src,
4838 						   GEN_INT (set_sign_bit_copies - 1)));
4839 		}
4840 	      return 2;
4841 	    }
4842 	  /* For an inverted constant, we will need to set the low bits,
4843 	     these will be shifted out of harm's way.  */
4844 	  temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4845 	  if (const_ok_for_arm (~temp1))
4846 	    {
4847 	      if (generate)
4848 		{
4849 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4850 		  emit_constant_insn (cond,
4851 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
4852 		  emit_constant_insn (cond,
4853 				      gen_ashrsi3 (target, new_src,
4854 						   GEN_INT (set_sign_bit_copies - 1)));
4855 		}
4856 	      return 2;
4857 	    }
4858 	}
4859 
4860       /* See if we can calculate the value as the difference between two
4861 	 valid immediates.  */
4862       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4863 	{
4864 	  int topshift = clear_sign_bit_copies & ~1;
4865 
4866 	  temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4867 				   & (0xff000000 >> topshift));
4868 
4869 	  /* If temp1 is zero, then that means the 9 most significant
4870 	     bits of remainder were 1 and we've caused it to overflow.
4871 	     When topshift is 0 we don't need to do anything since we
4872 	     can borrow from 'bit 32'.  */
4873 	  if (temp1 == 0 && topshift != 0)
4874 	    temp1 = 0x80000000 >> (topshift - 1);
4875 
4876 	  temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4877 
4878 	  if (const_ok_for_arm (temp2))
4879 	    {
4880 	      if (generate)
4881 		{
4882 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4883 		  emit_constant_insn (cond,
4884 				      gen_rtx_SET (new_src, GEN_INT (temp1)));
4885 		  emit_constant_insn (cond,
4886 				      gen_addsi3 (target, new_src,
4887 						  GEN_INT (-temp2)));
4888 		}
4889 
4890 	      return 2;
4891 	    }
4892 	}
4893 
4894       /* See if we can generate this by setting the bottom (or the top)
4895 	 16 bits, and then shifting these into the other half of the
4896 	 word.  We only look for the simplest cases, to do more would cost
4897 	 too much.  Be careful, however, not to generate this when the
4898 	 alternative would take fewer insns.  */
4899       if (val & 0xffff0000)
4900 	{
4901 	  temp1 = remainder & 0xffff0000;
4902 	  temp2 = remainder & 0x0000ffff;
4903 
4904 	  /* Overlaps outside this range are best done using other methods.  */
4905 	  for (i = 9; i < 24; i++)
4906 	    {
4907 	      if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4908 		  && !const_ok_for_arm (temp2))
4909 		{
4910 		  rtx new_src = (subtargets
4911 				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4912 				 : target);
4913 		  insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4914 					    source, subtargets, generate);
4915 		  source = new_src;
4916 		  if (generate)
4917 		    emit_constant_insn
4918 		      (cond,
4919 		       gen_rtx_SET
4920 		       (target,
4921 			gen_rtx_IOR (mode,
4922 				     gen_rtx_ASHIFT (mode, source,
4923 						     GEN_INT (i)),
4924 				     source)));
4925 		  return insns + 1;
4926 		}
4927 	    }
4928 
4929 	  /* Don't duplicate cases already considered.  */
4930 	  for (i = 17; i < 24; i++)
4931 	    {
4932 	      if (((temp1 | (temp1 >> i)) == remainder)
4933 		  && !const_ok_for_arm (temp1))
4934 		{
4935 		  rtx new_src = (subtargets
4936 				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4937 				 : target);
4938 		  insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4939 					    source, subtargets, generate);
4940 		  source = new_src;
4941 		  if (generate)
4942 		    emit_constant_insn
4943 		      (cond,
4944 		       gen_rtx_SET (target,
4945 				    gen_rtx_IOR
4946 				    (mode,
4947 				     gen_rtx_LSHIFTRT (mode, source,
4948 						       GEN_INT (i)),
4949 				     source)));
4950 		  return insns + 1;
4951 		}
4952 	    }
4953 	}
4954       break;
4955 
4956     case IOR:
4957     case XOR:
4958       /* If we have IOR or XOR, and the constant can be loaded in a
4959 	 single instruction, and we can find a temporary to put it in,
4960 	 then this can be done in two instructions instead of 3-4.  */
4961       if (subtargets
4962 	  /* TARGET can't be NULL if SUBTARGETS is 0 */
4963 	  || (reload_completed && !reg_mentioned_p (target, source)))
4964 	{
4965 	  if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4966 	    {
4967 	      if (generate)
4968 		{
4969 		  rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4970 
4971 		  emit_constant_insn (cond,
4972 				      gen_rtx_SET (sub, GEN_INT (val)));
4973 		  emit_constant_insn (cond,
4974 				      gen_rtx_SET (target,
4975 						   gen_rtx_fmt_ee (code, mode,
4976 								   source, sub)));
4977 		}
4978 	      return 2;
4979 	    }
4980 	}
4981 
4982       if (code == XOR)
4983 	break;
4984 
4985       /*  Convert.
4986 	  x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4987 	                     and the remainder 0s for e.g. 0xfff00000)
4988 	  x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4989 
4990 	  This can be done in 2 instructions by using shifts with mov or mvn.
4991 	  e.g. for
4992 	  x = x | 0xfff00000;
4993 	  we generate.
4994 	  mvn	r0, r0, asl #12
4995 	  mvn	r0, r0, lsr #12  */
4996       if (set_sign_bit_copies > 8
4997 	  && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4998 	{
4999 	  if (generate)
5000 	    {
5001 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5002 	      rtx shift = GEN_INT (set_sign_bit_copies);
5003 
5004 	      emit_constant_insn
5005 		(cond,
5006 		 gen_rtx_SET (sub,
5007 			      gen_rtx_NOT (mode,
5008 					   gen_rtx_ASHIFT (mode,
5009 							   source,
5010 							   shift))));
5011 	      emit_constant_insn
5012 		(cond,
5013 		 gen_rtx_SET (target,
5014 			      gen_rtx_NOT (mode,
5015 					   gen_rtx_LSHIFTRT (mode, sub,
5016 							     shift))));
5017 	    }
5018 	  return 2;
5019 	}
5020 
5021       /* Convert
5022 	  x = y | constant (which has set_zero_bit_copies number of trailing ones).
5023 	   to
5024 	  x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5025 
5026 	  For eg. r0 = r0 | 0xfff
5027 	       mvn	r0, r0, lsr #12
5028 	       mvn	r0, r0, asl #12
5029 
5030       */
5031       if (set_zero_bit_copies > 8
5032 	  && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5033 	{
5034 	  if (generate)
5035 	    {
5036 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5037 	      rtx shift = GEN_INT (set_zero_bit_copies);
5038 
5039 	      emit_constant_insn
5040 		(cond,
5041 		 gen_rtx_SET (sub,
5042 			      gen_rtx_NOT (mode,
5043 					   gen_rtx_LSHIFTRT (mode,
5044 							     source,
5045 							     shift))));
5046 	      emit_constant_insn
5047 		(cond,
5048 		 gen_rtx_SET (target,
5049 			      gen_rtx_NOT (mode,
5050 					   gen_rtx_ASHIFT (mode, sub,
5051 							   shift))));
5052 	    }
5053 	  return 2;
5054 	}
5055 
5056       /* This will never be reached for Thumb2 because orn is a valid
5057 	 instruction. This is for Thumb1 and the ARM 32 bit cases.
5058 
5059 	 x = y | constant (such that ~constant is a valid constant)
5060 	 Transform this to
5061 	 x = ~(~y & ~constant).
5062       */
5063       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5064 	{
5065 	  if (generate)
5066 	    {
5067 	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5068 	      emit_constant_insn (cond,
5069 				  gen_rtx_SET (sub,
5070 					       gen_rtx_NOT (mode, source)));
5071 	      source = sub;
5072 	      if (subtargets)
5073 		sub = gen_reg_rtx (mode);
5074 	      emit_constant_insn (cond,
5075 				  gen_rtx_SET (sub,
5076 					       gen_rtx_AND (mode, source,
5077 							    GEN_INT (temp1))));
5078 	      emit_constant_insn (cond,
5079 				  gen_rtx_SET (target,
5080 					       gen_rtx_NOT (mode, sub)));
5081 	    }
5082 	  return 3;
5083 	}
5084       break;
5085 
5086     case AND:
5087       /* See if two shifts will do 2 or more insn's worth of work.  */
5088       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5089 	{
5090 	  HOST_WIDE_INT shift_mask = ((0xffffffff
5091 				       << (32 - clear_sign_bit_copies))
5092 				      & 0xffffffff);
5093 
5094 	  if ((remainder | shift_mask) != 0xffffffff)
5095 	    {
5096 	      HOST_WIDE_INT new_val
5097 	        = ARM_SIGN_EXTEND (remainder | shift_mask);
5098 
5099 	      if (generate)
5100 		{
5101 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5102 		  insns = arm_gen_constant (AND, SImode, cond, new_val,
5103 					    new_src, source, subtargets, 1);
5104 		  source = new_src;
5105 		}
5106 	      else
5107 		{
5108 		  rtx targ = subtargets ? NULL_RTX : target;
5109 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5110 					    targ, source, subtargets, 0);
5111 		}
5112 	    }
5113 
5114 	  if (generate)
5115 	    {
5116 	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5117 	      rtx shift = GEN_INT (clear_sign_bit_copies);
5118 
5119 	      emit_insn (gen_ashlsi3 (new_src, source, shift));
5120 	      emit_insn (gen_lshrsi3 (target, new_src, shift));
5121 	    }
5122 
5123 	  return insns + 2;
5124 	}
5125 
5126       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5127 	{
5128 	  HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5129 
5130 	  if ((remainder | shift_mask) != 0xffffffff)
5131 	    {
5132 	      HOST_WIDE_INT new_val
5133 	        = ARM_SIGN_EXTEND (remainder | shift_mask);
5134 	      if (generate)
5135 		{
5136 		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5137 
5138 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5139 					    new_src, source, subtargets, 1);
5140 		  source = new_src;
5141 		}
5142 	      else
5143 		{
5144 		  rtx targ = subtargets ? NULL_RTX : target;
5145 
5146 		  insns = arm_gen_constant (AND, mode, cond, new_val,
5147 					    targ, source, subtargets, 0);
5148 		}
5149 	    }
5150 
5151 	  if (generate)
5152 	    {
5153 	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5154 	      rtx shift = GEN_INT (clear_zero_bit_copies);
5155 
5156 	      emit_insn (gen_lshrsi3 (new_src, source, shift));
5157 	      emit_insn (gen_ashlsi3 (target, new_src, shift));
5158 	    }
5159 
5160 	  return insns + 2;
5161 	}
5162 
5163       break;
5164 
5165     default:
5166       break;
5167     }
5168 
5169   /* Calculate what the instruction sequences would be if we generated it
5170      normally, negated, or inverted.  */
5171   if (code == AND)
5172     /* AND cannot be split into multiple insns, so invert and use BIC.  */
5173     insns = 99;
5174   else
5175     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5176 
5177   if (can_negate)
5178     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5179 					    &neg_immediates);
5180   else
5181     neg_insns = 99;
5182 
5183   if (can_invert || final_invert)
5184     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5185 					    &inv_immediates);
5186   else
5187     inv_insns = 99;
5188 
5189   immediates = &pos_immediates;
5190 
5191   /* Is the negated immediate sequence more efficient?  */
5192   if (neg_insns < insns && neg_insns <= inv_insns)
5193     {
5194       insns = neg_insns;
5195       immediates = &neg_immediates;
5196     }
5197   else
5198     can_negate = 0;
5199 
5200   /* Is the inverted immediate sequence more efficient?
5201      We must allow for an extra NOT instruction for XOR operations, although
5202      there is some chance that the final 'mvn' will get optimized later.  */
5203   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5204     {
5205       insns = inv_insns;
5206       immediates = &inv_immediates;
5207     }
5208   else
5209     {
5210       can_invert = 0;
5211       final_invert = 0;
5212     }
5213 
5214   /* Now output the chosen sequence as instructions.  */
5215   if (generate)
5216     {
5217       for (i = 0; i < insns; i++)
5218 	{
5219 	  rtx new_src, temp1_rtx;
5220 
5221 	  temp1 = immediates->i[i];
5222 
5223 	  if (code == SET || code == MINUS)
5224 	    new_src = (subtargets ? gen_reg_rtx (mode) : target);
5225 	  else if ((final_invert || i < (insns - 1)) && subtargets)
5226 	    new_src = gen_reg_rtx (mode);
5227 	  else
5228 	    new_src = target;
5229 
5230 	  if (can_invert)
5231 	    temp1 = ~temp1;
5232 	  else if (can_negate)
5233 	    temp1 = -temp1;
5234 
5235 	  temp1 = trunc_int_for_mode (temp1, mode);
5236 	  temp1_rtx = GEN_INT (temp1);
5237 
5238 	  if (code == SET)
5239 	    ;
5240 	  else if (code == MINUS)
5241 	    temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5242 	  else
5243 	    temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5244 
5245 	  emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5246 	  source = new_src;
5247 
5248 	  if (code == SET)
5249 	    {
5250 	      can_negate = can_invert;
5251 	      can_invert = 0;
5252 	      code = PLUS;
5253 	    }
5254 	  else if (code == MINUS)
5255 	    code = PLUS;
5256 	}
5257     }
5258 
5259   if (final_invert)
5260     {
5261       if (generate)
5262 	emit_constant_insn (cond, gen_rtx_SET (target,
5263 					       gen_rtx_NOT (mode, source)));
5264       insns++;
5265     }
5266 
5267   return insns;
5268 }
5269 
5270 /* Canonicalize a comparison so that we are more likely to recognize it.
5271    This can be done for a few constant compares, where we can make the
5272    immediate value easier to load.  */
5273 
5274 static void
arm_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)5275 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5276 			     bool op0_preserve_value)
5277 {
5278   machine_mode mode;
5279   unsigned HOST_WIDE_INT i, maxval;
5280 
5281   mode = GET_MODE (*op0);
5282   if (mode == VOIDmode)
5283     mode = GET_MODE (*op1);
5284 
5285   maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5286 
5287   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
5288      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
5289      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
5290      for GTU/LEU in Thumb mode.  */
5291   if (mode == DImode)
5292     {
5293 
5294       if (*code == GT || *code == LE
5295 	  || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5296 	{
5297 	  /* Missing comparison.  First try to use an available
5298 	     comparison.  */
5299 	  if (CONST_INT_P (*op1))
5300 	    {
5301 	      i = INTVAL (*op1);
5302 	      switch (*code)
5303 		{
5304 		case GT:
5305 		case LE:
5306 		  if (i != maxval
5307 		      && arm_const_double_by_immediates (GEN_INT (i + 1)))
5308 		    {
5309 		      *op1 = GEN_INT (i + 1);
5310 		      *code = *code == GT ? GE : LT;
5311 		      return;
5312 		    }
5313 		  break;
5314 		case GTU:
5315 		case LEU:
5316 		  if (i != ~((unsigned HOST_WIDE_INT) 0)
5317 		      && arm_const_double_by_immediates (GEN_INT (i + 1)))
5318 		    {
5319 		      *op1 = GEN_INT (i + 1);
5320 		      *code = *code == GTU ? GEU : LTU;
5321 		      return;
5322 		    }
5323 		  break;
5324 		default:
5325 		  gcc_unreachable ();
5326 		}
5327 	    }
5328 
5329 	  /* If that did not work, reverse the condition.  */
5330 	  if (!op0_preserve_value)
5331 	    {
5332 	      std::swap (*op0, *op1);
5333 	      *code = (int)swap_condition ((enum rtx_code)*code);
5334 	    }
5335 	}
5336       return;
5337     }
5338 
5339   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5340      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5341      to facilitate possible combining with a cmp into 'ands'.  */
5342   if (mode == SImode
5343       && GET_CODE (*op0) == ZERO_EXTEND
5344       && GET_CODE (XEXP (*op0, 0)) == SUBREG
5345       && GET_MODE (XEXP (*op0, 0)) == QImode
5346       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5347       && subreg_lowpart_p (XEXP (*op0, 0))
5348       && *op1 == const0_rtx)
5349     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5350 			GEN_INT (255));
5351 
5352   /* Comparisons smaller than DImode.  Only adjust comparisons against
5353      an out-of-range constant.  */
5354   if (!CONST_INT_P (*op1)
5355       || const_ok_for_arm (INTVAL (*op1))
5356       || const_ok_for_arm (- INTVAL (*op1)))
5357     return;
5358 
5359   i = INTVAL (*op1);
5360 
5361   switch (*code)
5362     {
5363     case EQ:
5364     case NE:
5365       return;
5366 
5367     case GT:
5368     case LE:
5369       if (i != maxval
5370 	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5371 	{
5372 	  *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5373 	  *code = *code == GT ? GE : LT;
5374 	  return;
5375 	}
5376       break;
5377 
5378     case GE:
5379     case LT:
5380       if (i != ~maxval
5381 	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5382 	{
5383 	  *op1 = GEN_INT (i - 1);
5384 	  *code = *code == GE ? GT : LE;
5385 	  return;
5386 	}
5387       break;
5388 
5389     case GTU:
5390     case LEU:
5391       if (i != ~((unsigned HOST_WIDE_INT) 0)
5392 	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5393 	{
5394 	  *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5395 	  *code = *code == GTU ? GEU : LTU;
5396 	  return;
5397 	}
5398       break;
5399 
5400     case GEU:
5401     case LTU:
5402       if (i != 0
5403 	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5404 	{
5405 	  *op1 = GEN_INT (i - 1);
5406 	  *code = *code == GEU ? GTU : LEU;
5407 	  return;
5408 	}
5409       break;
5410 
5411     default:
5412       gcc_unreachable ();
5413     }
5414 }
5415 
5416 
5417 /* Define how to find the value returned by a function.  */
5418 
5419 static rtx
arm_function_value(const_tree type,const_tree func,bool outgoing ATTRIBUTE_UNUSED)5420 arm_function_value(const_tree type, const_tree func,
5421 		   bool outgoing ATTRIBUTE_UNUSED)
5422 {
5423   machine_mode mode;
5424   int unsignedp ATTRIBUTE_UNUSED;
5425   rtx r ATTRIBUTE_UNUSED;
5426 
5427   mode = TYPE_MODE (type);
5428 
5429   if (TARGET_AAPCS_BASED)
5430     return aapcs_allocate_return_reg (mode, type, func);
5431 
5432   /* Promote integer types.  */
5433   if (INTEGRAL_TYPE_P (type))
5434     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5435 
5436   /* Promotes small structs returned in a register to full-word size
5437      for big-endian AAPCS.  */
5438   if (arm_return_in_msb (type))
5439     {
5440       HOST_WIDE_INT size = int_size_in_bytes (type);
5441       if (size % UNITS_PER_WORD != 0)
5442 	{
5443 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5444 	  mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5445 	}
5446     }
5447 
5448   return arm_libcall_value_1 (mode);
5449 }
5450 
5451 /* libcall hashtable helpers.  */
5452 
5453 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5454 {
5455   static inline hashval_t hash (const rtx_def *);
5456   static inline bool equal (const rtx_def *, const rtx_def *);
5457   static inline void remove (rtx_def *);
5458 };
5459 
5460 inline bool
equal(const rtx_def * p1,const rtx_def * p2)5461 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5462 {
5463   return rtx_equal_p (p1, p2);
5464 }
5465 
5466 inline hashval_t
hash(const rtx_def * p1)5467 libcall_hasher::hash (const rtx_def *p1)
5468 {
5469   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5470 }
5471 
5472 typedef hash_table<libcall_hasher> libcall_table_type;
5473 
5474 static void
add_libcall(libcall_table_type * htab,rtx libcall)5475 add_libcall (libcall_table_type *htab, rtx libcall)
5476 {
5477   *htab->find_slot (libcall, INSERT) = libcall;
5478 }
5479 
5480 static bool
arm_libcall_uses_aapcs_base(const_rtx libcall)5481 arm_libcall_uses_aapcs_base (const_rtx libcall)
5482 {
5483   static bool init_done = false;
5484   static libcall_table_type *libcall_htab = NULL;
5485 
5486   if (!init_done)
5487     {
5488       init_done = true;
5489 
5490       libcall_htab = new libcall_table_type (31);
5491       add_libcall (libcall_htab,
5492 		   convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5493       add_libcall (libcall_htab,
5494 		   convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5495       add_libcall (libcall_htab,
5496 		   convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5497       add_libcall (libcall_htab,
5498 		   convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5499 
5500       add_libcall (libcall_htab,
5501 		   convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5502       add_libcall (libcall_htab,
5503 		   convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5504       add_libcall (libcall_htab,
5505 		   convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5506       add_libcall (libcall_htab,
5507 		   convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5508 
5509       add_libcall (libcall_htab,
5510 		   convert_optab_libfunc (sext_optab, SFmode, HFmode));
5511       add_libcall (libcall_htab,
5512 		   convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5513       add_libcall (libcall_htab,
5514 		   convert_optab_libfunc (sfix_optab, SImode, DFmode));
5515       add_libcall (libcall_htab,
5516 		   convert_optab_libfunc (ufix_optab, SImode, DFmode));
5517       add_libcall (libcall_htab,
5518 		   convert_optab_libfunc (sfix_optab, DImode, DFmode));
5519       add_libcall (libcall_htab,
5520 		   convert_optab_libfunc (ufix_optab, DImode, DFmode));
5521       add_libcall (libcall_htab,
5522 		   convert_optab_libfunc (sfix_optab, DImode, SFmode));
5523       add_libcall (libcall_htab,
5524 		   convert_optab_libfunc (ufix_optab, DImode, SFmode));
5525 
5526       /* Values from double-precision helper functions are returned in core
5527 	 registers if the selected core only supports single-precision
5528 	 arithmetic, even if we are using the hard-float ABI.  The same is
5529 	 true for single-precision helpers, but we will never be using the
5530 	 hard-float ABI on a CPU which doesn't support single-precision
5531 	 operations in hardware.  */
5532       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5533       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5534       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5535       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5536       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5537       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5538       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5539       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5540       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5541       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5542       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5543       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5544 							SFmode));
5545       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5546 							DFmode));
5547       add_libcall (libcall_htab,
5548 		   convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5549     }
5550 
5551   return libcall && libcall_htab->find (libcall) != NULL;
5552 }
5553 
5554 static rtx
arm_libcall_value_1(machine_mode mode)5555 arm_libcall_value_1 (machine_mode mode)
5556 {
5557   if (TARGET_AAPCS_BASED)
5558     return aapcs_libcall_value (mode);
5559   else if (TARGET_IWMMXT_ABI
5560 	   && arm_vector_mode_supported_p (mode))
5561     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5562   else
5563     return gen_rtx_REG (mode, ARG_REGISTER (1));
5564 }
5565 
5566 /* Define how to find the value returned by a library function
5567    assuming the value has mode MODE.  */
5568 
5569 static rtx
arm_libcall_value(machine_mode mode,const_rtx libcall)5570 arm_libcall_value (machine_mode mode, const_rtx libcall)
5571 {
5572   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5573       && GET_MODE_CLASS (mode) == MODE_FLOAT)
5574     {
5575       /* The following libcalls return their result in integer registers,
5576 	 even though they return a floating point value.  */
5577       if (arm_libcall_uses_aapcs_base (libcall))
5578 	return gen_rtx_REG (mode, ARG_REGISTER(1));
5579 
5580     }
5581 
5582   return arm_libcall_value_1 (mode);
5583 }
5584 
5585 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
5586 
5587 static bool
arm_function_value_regno_p(const unsigned int regno)5588 arm_function_value_regno_p (const unsigned int regno)
5589 {
5590   if (regno == ARG_REGISTER (1)
5591       || (TARGET_32BIT
5592 	  && TARGET_AAPCS_BASED
5593 	  && TARGET_HARD_FLOAT
5594 	  && regno == FIRST_VFP_REGNUM)
5595       || (TARGET_IWMMXT_ABI
5596 	  && regno == FIRST_IWMMXT_REGNUM))
5597     return true;
5598 
5599   return false;
5600 }
5601 
5602 /* Determine the amount of memory needed to store the possible return
5603    registers of an untyped call.  */
5604 int
arm_apply_result_size(void)5605 arm_apply_result_size (void)
5606 {
5607   int size = 16;
5608 
5609   if (TARGET_32BIT)
5610     {
5611       if (TARGET_HARD_FLOAT_ABI)
5612 	size += 32;
5613       if (TARGET_IWMMXT_ABI)
5614 	size += 8;
5615     }
5616 
5617   return size;
5618 }
5619 
5620 /* Decide whether TYPE should be returned in memory (true)
5621    or in a register (false).  FNTYPE is the type of the function making
5622    the call.  */
5623 static bool
arm_return_in_memory(const_tree type,const_tree fntype)5624 arm_return_in_memory (const_tree type, const_tree fntype)
5625 {
5626   HOST_WIDE_INT size;
5627 
5628   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
5629 
5630   if (TARGET_AAPCS_BASED)
5631     {
5632       /* Simple, non-aggregate types (ie not including vectors and
5633 	 complex) are always returned in a register (or registers).
5634 	 We don't care about which register here, so we can short-cut
5635 	 some of the detail.  */
5636       if (!AGGREGATE_TYPE_P (type)
5637 	  && TREE_CODE (type) != VECTOR_TYPE
5638 	  && TREE_CODE (type) != COMPLEX_TYPE)
5639 	return false;
5640 
5641       /* Any return value that is no larger than one word can be
5642 	 returned in r0.  */
5643       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5644 	return false;
5645 
5646       /* Check any available co-processors to see if they accept the
5647 	 type as a register candidate (VFP, for example, can return
5648 	 some aggregates in consecutive registers).  These aren't
5649 	 available if the call is variadic.  */
5650       if (aapcs_select_return_coproc (type, fntype) >= 0)
5651 	return false;
5652 
5653       /* Vector values should be returned using ARM registers, not
5654 	 memory (unless they're over 16 bytes, which will break since
5655 	 we only have four call-clobbered registers to play with).  */
5656       if (TREE_CODE (type) == VECTOR_TYPE)
5657 	return (size < 0 || size > (4 * UNITS_PER_WORD));
5658 
5659       /* The rest go in memory.  */
5660       return true;
5661     }
5662 
5663   if (TREE_CODE (type) == VECTOR_TYPE)
5664     return (size < 0 || size > (4 * UNITS_PER_WORD));
5665 
5666   if (!AGGREGATE_TYPE_P (type) &&
5667       (TREE_CODE (type) != VECTOR_TYPE))
5668     /* All simple types are returned in registers.  */
5669     return false;
5670 
5671   if (arm_abi != ARM_ABI_APCS)
5672     {
5673       /* ATPCS and later return aggregate types in memory only if they are
5674 	 larger than a word (or are variable size).  */
5675       return (size < 0 || size > UNITS_PER_WORD);
5676     }
5677 
5678   /* For the arm-wince targets we choose to be compatible with Microsoft's
5679      ARM and Thumb compilers, which always return aggregates in memory.  */
5680 #ifndef ARM_WINCE
5681   /* All structures/unions bigger than one word are returned in memory.
5682      Also catch the case where int_size_in_bytes returns -1.  In this case
5683      the aggregate is either huge or of variable size, and in either case
5684      we will want to return it via memory and not in a register.  */
5685   if (size < 0 || size > UNITS_PER_WORD)
5686     return true;
5687 
5688   if (TREE_CODE (type) == RECORD_TYPE)
5689     {
5690       tree field;
5691 
5692       /* For a struct the APCS says that we only return in a register
5693 	 if the type is 'integer like' and every addressable element
5694 	 has an offset of zero.  For practical purposes this means
5695 	 that the structure can have at most one non bit-field element
5696 	 and that this element must be the first one in the structure.  */
5697 
5698       /* Find the first field, ignoring non FIELD_DECL things which will
5699 	 have been created by C++.  */
5700       for (field = TYPE_FIELDS (type);
5701 	   field && TREE_CODE (field) != FIELD_DECL;
5702 	   field = DECL_CHAIN (field))
5703 	continue;
5704 
5705       if (field == NULL)
5706 	return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5707 
5708       /* Check that the first field is valid for returning in a register.  */
5709 
5710       /* ... Floats are not allowed */
5711       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5712 	return true;
5713 
5714       /* ... Aggregates that are not themselves valid for returning in
5715 	 a register are not allowed.  */
5716       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5717 	return true;
5718 
5719       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5720 	 since they are not addressable.  */
5721       for (field = DECL_CHAIN (field);
5722 	   field;
5723 	   field = DECL_CHAIN (field))
5724 	{
5725 	  if (TREE_CODE (field) != FIELD_DECL)
5726 	    continue;
5727 
5728 	  if (!DECL_BIT_FIELD_TYPE (field))
5729 	    return true;
5730 	}
5731 
5732       return false;
5733     }
5734 
5735   if (TREE_CODE (type) == UNION_TYPE)
5736     {
5737       tree field;
5738 
5739       /* Unions can be returned in registers if every element is
5740 	 integral, or can be returned in an integer register.  */
5741       for (field = TYPE_FIELDS (type);
5742 	   field;
5743 	   field = DECL_CHAIN (field))
5744 	{
5745 	  if (TREE_CODE (field) != FIELD_DECL)
5746 	    continue;
5747 
5748 	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
5749 	    return true;
5750 
5751 	  if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5752 	    return true;
5753 	}
5754 
5755       return false;
5756     }
5757 #endif /* not ARM_WINCE */
5758 
5759   /* Return all other types in memory.  */
5760   return true;
5761 }
5762 
5763 const struct pcs_attribute_arg
5764 {
5765   const char *arg;
5766   enum arm_pcs value;
5767 } pcs_attribute_args[] =
5768   {
5769     {"aapcs", ARM_PCS_AAPCS},
5770     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5771 #if 0
5772     /* We could recognize these, but changes would be needed elsewhere
5773      * to implement them.  */
5774     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5775     {"atpcs", ARM_PCS_ATPCS},
5776     {"apcs", ARM_PCS_APCS},
5777 #endif
5778     {NULL, ARM_PCS_UNKNOWN}
5779   };
5780 
5781 static enum arm_pcs
arm_pcs_from_attribute(tree attr)5782 arm_pcs_from_attribute (tree attr)
5783 {
5784   const struct pcs_attribute_arg *ptr;
5785   const char *arg;
5786 
5787   /* Get the value of the argument.  */
5788   if (TREE_VALUE (attr) == NULL_TREE
5789       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5790     return ARM_PCS_UNKNOWN;
5791 
5792   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5793 
5794   /* Check it against the list of known arguments.  */
5795   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5796     if (streq (arg, ptr->arg))
5797       return ptr->value;
5798 
5799   /* An unrecognized interrupt type.  */
5800   return ARM_PCS_UNKNOWN;
5801 }
5802 
5803 /* Get the PCS variant to use for this call.  TYPE is the function's type
5804    specification, DECL is the specific declartion.  DECL may be null if
5805    the call could be indirect or if this is a library call.  */
5806 static enum arm_pcs
arm_get_pcs_model(const_tree type,const_tree decl)5807 arm_get_pcs_model (const_tree type, const_tree decl)
5808 {
5809   bool user_convention = false;
5810   enum arm_pcs user_pcs = arm_pcs_default;
5811   tree attr;
5812 
5813   gcc_assert (type);
5814 
5815   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5816   if (attr)
5817     {
5818       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5819       user_convention = true;
5820     }
5821 
5822   if (TARGET_AAPCS_BASED)
5823     {
5824       /* Detect varargs functions.  These always use the base rules
5825 	 (no argument is ever a candidate for a co-processor
5826 	 register).  */
5827       bool base_rules = stdarg_p (type);
5828 
5829       if (user_convention)
5830 	{
5831 	  if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5832 	    sorry ("non-AAPCS derived PCS variant");
5833 	  else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5834 	    error ("variadic functions must use the base AAPCS variant");
5835 	}
5836 
5837       if (base_rules)
5838 	return ARM_PCS_AAPCS;
5839       else if (user_convention)
5840 	return user_pcs;
5841       else if (decl && flag_unit_at_a_time)
5842 	{
5843 	  /* Local functions never leak outside this compilation unit,
5844 	     so we are free to use whatever conventions are
5845 	     appropriate.  */
5846 	  /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5847 	  cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5848 	  if (i && i->local)
5849 	    return ARM_PCS_AAPCS_LOCAL;
5850 	}
5851     }
5852   else if (user_convention && user_pcs != arm_pcs_default)
5853     sorry ("PCS variant");
5854 
5855   /* For everything else we use the target's default.  */
5856   return arm_pcs_default;
5857 }
5858 
5859 
5860 static void
aapcs_vfp_cum_init(CUMULATIVE_ARGS * pcum ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,rtx libcall ATTRIBUTE_UNUSED,const_tree fndecl ATTRIBUTE_UNUSED)5861 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5862 		    const_tree fntype ATTRIBUTE_UNUSED,
5863 		    rtx libcall ATTRIBUTE_UNUSED,
5864 		    const_tree fndecl ATTRIBUTE_UNUSED)
5865 {
5866   /* Record the unallocated VFP registers.  */
5867   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5868   pcum->aapcs_vfp_reg_alloc = 0;
5869 }
5870 
5871 /* Walk down the type tree of TYPE counting consecutive base elements.
5872    If *MODEP is VOIDmode, then set it to the first valid floating point
5873    type.  If a non-floating point type is found, or if a floating point
5874    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5875    otherwise return the count in the sub-tree.  */
5876 static int
aapcs_vfp_sub_candidate(const_tree type,machine_mode * modep)5877 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5878 {
5879   machine_mode mode;
5880   HOST_WIDE_INT size;
5881 
5882   switch (TREE_CODE (type))
5883     {
5884     case REAL_TYPE:
5885       mode = TYPE_MODE (type);
5886       if (mode != DFmode && mode != SFmode && mode != HFmode)
5887 	return -1;
5888 
5889       if (*modep == VOIDmode)
5890 	*modep = mode;
5891 
5892       if (*modep == mode)
5893 	return 1;
5894 
5895       break;
5896 
5897     case COMPLEX_TYPE:
5898       mode = TYPE_MODE (TREE_TYPE (type));
5899       if (mode != DFmode && mode != SFmode)
5900 	return -1;
5901 
5902       if (*modep == VOIDmode)
5903 	*modep = mode;
5904 
5905       if (*modep == mode)
5906 	return 2;
5907 
5908       break;
5909 
5910     case VECTOR_TYPE:
5911       /* Use V2SImode and V4SImode as representatives of all 64-bit
5912 	 and 128-bit vector types, whether or not those modes are
5913 	 supported with the present options.  */
5914       size = int_size_in_bytes (type);
5915       switch (size)
5916 	{
5917 	case 8:
5918 	  mode = V2SImode;
5919 	  break;
5920 	case 16:
5921 	  mode = V4SImode;
5922 	  break;
5923 	default:
5924 	  return -1;
5925 	}
5926 
5927       if (*modep == VOIDmode)
5928 	*modep = mode;
5929 
5930       /* Vector modes are considered to be opaque: two vectors are
5931 	 equivalent for the purposes of being homogeneous aggregates
5932 	 if they are the same size.  */
5933       if (*modep == mode)
5934 	return 1;
5935 
5936       break;
5937 
5938     case ARRAY_TYPE:
5939       {
5940 	int count;
5941 	tree index = TYPE_DOMAIN (type);
5942 
5943 	/* Can't handle incomplete types nor sizes that are not
5944 	   fixed.  */
5945 	if (!COMPLETE_TYPE_P (type)
5946 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5947 	  return -1;
5948 
5949 	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5950 	if (count == -1
5951 	    || !index
5952 	    || !TYPE_MAX_VALUE (index)
5953 	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5954 	    || !TYPE_MIN_VALUE (index)
5955 	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5956 	    || count < 0)
5957 	  return -1;
5958 
5959 	count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5960 		      - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5961 
5962 	/* There must be no padding.  */
5963 	if (wi::to_wide (TYPE_SIZE (type))
5964 	    != count * GET_MODE_BITSIZE (*modep))
5965 	  return -1;
5966 
5967 	return count;
5968       }
5969 
5970     case RECORD_TYPE:
5971       {
5972 	int count = 0;
5973 	int sub_count;
5974 	tree field;
5975 
5976 	/* Can't handle incomplete types nor sizes that are not
5977 	   fixed.  */
5978 	if (!COMPLETE_TYPE_P (type)
5979 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5980 	  return -1;
5981 
5982 	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5983 	  {
5984 	    if (TREE_CODE (field) != FIELD_DECL)
5985 	      continue;
5986 
5987 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5988 	    if (sub_count < 0)
5989 	      return -1;
5990 	    count += sub_count;
5991 	  }
5992 
5993 	/* There must be no padding.  */
5994 	if (wi::to_wide (TYPE_SIZE (type))
5995 	    != count * GET_MODE_BITSIZE (*modep))
5996 	  return -1;
5997 
5998 	return count;
5999       }
6000 
6001     case UNION_TYPE:
6002     case QUAL_UNION_TYPE:
6003       {
6004 	/* These aren't very interesting except in a degenerate case.  */
6005 	int count = 0;
6006 	int sub_count;
6007 	tree field;
6008 
6009 	/* Can't handle incomplete types nor sizes that are not
6010 	   fixed.  */
6011 	if (!COMPLETE_TYPE_P (type)
6012 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6013 	  return -1;
6014 
6015 	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6016 	  {
6017 	    if (TREE_CODE (field) != FIELD_DECL)
6018 	      continue;
6019 
6020 	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6021 	    if (sub_count < 0)
6022 	      return -1;
6023 	    count = count > sub_count ? count : sub_count;
6024 	  }
6025 
6026 	/* There must be no padding.  */
6027 	if (wi::to_wide (TYPE_SIZE (type))
6028 	    != count * GET_MODE_BITSIZE (*modep))
6029 	  return -1;
6030 
6031 	return count;
6032       }
6033 
6034     default:
6035       break;
6036     }
6037 
6038   return -1;
6039 }
6040 
6041 /* Return true if PCS_VARIANT should use VFP registers.  */
6042 static bool
use_vfp_abi(enum arm_pcs pcs_variant,bool is_double)6043 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6044 {
6045   if (pcs_variant == ARM_PCS_AAPCS_VFP)
6046     {
6047       static bool seen_thumb1_vfp = false;
6048 
6049       if (TARGET_THUMB1 && !seen_thumb1_vfp)
6050 	{
6051 	  sorry ("Thumb-1 hard-float VFP ABI");
6052 	  /* sorry() is not immediately fatal, so only display this once.  */
6053 	  seen_thumb1_vfp = true;
6054 	}
6055 
6056       return true;
6057     }
6058 
6059   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6060     return false;
6061 
6062   return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6063 	  (TARGET_VFP_DOUBLE || !is_double));
6064 }
6065 
6066 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6067    suitable for passing or returning in VFP registers for the PCS
6068    variant selected.  If it is, then *BASE_MODE is updated to contain
6069    a machine mode describing each element of the argument's type and
6070    *COUNT to hold the number of such elements.  */
6071 static bool
aapcs_vfp_is_call_or_return_candidate(enum arm_pcs pcs_variant,machine_mode mode,const_tree type,machine_mode * base_mode,int * count)6072 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6073 				       machine_mode mode, const_tree type,
6074 				       machine_mode *base_mode, int *count)
6075 {
6076   machine_mode new_mode = VOIDmode;
6077 
6078   /* If we have the type information, prefer that to working things
6079      out from the mode.  */
6080   if (type)
6081     {
6082       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6083 
6084       if (ag_count > 0 && ag_count <= 4)
6085 	*count = ag_count;
6086       else
6087 	return false;
6088     }
6089   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6090 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6091 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6092     {
6093       *count = 1;
6094       new_mode = mode;
6095     }
6096   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6097     {
6098       *count = 2;
6099       new_mode = (mode == DCmode ? DFmode : SFmode);
6100     }
6101   else
6102     return false;
6103 
6104 
6105   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6106     return false;
6107 
6108   *base_mode = new_mode;
6109   return true;
6110 }
6111 
6112 static bool
aapcs_vfp_is_return_candidate(enum arm_pcs pcs_variant,machine_mode mode,const_tree type)6113 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6114 			       machine_mode mode, const_tree type)
6115 {
6116   int count ATTRIBUTE_UNUSED;
6117   machine_mode ag_mode ATTRIBUTE_UNUSED;
6118 
6119   if (!use_vfp_abi (pcs_variant, false))
6120     return false;
6121   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6122 						&ag_mode, &count);
6123 }
6124 
6125 static bool
aapcs_vfp_is_call_candidate(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type)6126 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6127 			     const_tree type)
6128 {
6129   if (!use_vfp_abi (pcum->pcs_variant, false))
6130     return false;
6131 
6132   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6133 						&pcum->aapcs_vfp_rmode,
6134 						&pcum->aapcs_vfp_rcount);
6135 }
6136 
6137 /* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
6138    for the behaviour of this function.  */
6139 
6140 static bool
aapcs_vfp_allocate(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type ATTRIBUTE_UNUSED)6141 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6142 		    const_tree type  ATTRIBUTE_UNUSED)
6143 {
6144   int rmode_size
6145     = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6146   int shift = rmode_size / GET_MODE_SIZE (SFmode);
6147   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6148   int regno;
6149 
6150   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6151     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6152       {
6153 	pcum->aapcs_vfp_reg_alloc = mask << regno;
6154 	if (mode == BLKmode
6155 	    || (mode == TImode && ! TARGET_NEON)
6156 	    || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6157 	  {
6158 	    int i;
6159 	    int rcount = pcum->aapcs_vfp_rcount;
6160 	    int rshift = shift;
6161 	    machine_mode rmode = pcum->aapcs_vfp_rmode;
6162 	    rtx par;
6163 	    if (!TARGET_NEON)
6164 	      {
6165 		/* Avoid using unsupported vector modes.  */
6166 		if (rmode == V2SImode)
6167 		  rmode = DImode;
6168 		else if (rmode == V4SImode)
6169 		  {
6170 		    rmode = DImode;
6171 		    rcount *= 2;
6172 		    rshift /= 2;
6173 		  }
6174 	      }
6175 	    par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6176 	    for (i = 0; i < rcount; i++)
6177 	      {
6178 		rtx tmp = gen_rtx_REG (rmode,
6179 				       FIRST_VFP_REGNUM + regno + i * rshift);
6180 		tmp = gen_rtx_EXPR_LIST
6181 		  (VOIDmode, tmp,
6182 		   GEN_INT (i * GET_MODE_SIZE (rmode)));
6183 		XVECEXP (par, 0, i) = tmp;
6184 	      }
6185 
6186 	    pcum->aapcs_reg = par;
6187 	  }
6188 	else
6189 	  pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6190 	return true;
6191       }
6192   return false;
6193 }
6194 
6195 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
6196    comment there for the behaviour of this function.  */
6197 
6198 static rtx
aapcs_vfp_allocate_return_reg(enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,machine_mode mode,const_tree type ATTRIBUTE_UNUSED)6199 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6200 			       machine_mode mode,
6201 			       const_tree type ATTRIBUTE_UNUSED)
6202 {
6203   if (!use_vfp_abi (pcs_variant, false))
6204     return NULL;
6205 
6206   if (mode == BLKmode
6207       || (GET_MODE_CLASS (mode) == MODE_INT
6208 	  && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6209 	  && !TARGET_NEON))
6210     {
6211       int count;
6212       machine_mode ag_mode;
6213       int i;
6214       rtx par;
6215       int shift;
6216 
6217       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6218 					     &ag_mode, &count);
6219 
6220       if (!TARGET_NEON)
6221 	{
6222 	  if (ag_mode == V2SImode)
6223 	    ag_mode = DImode;
6224 	  else if (ag_mode == V4SImode)
6225 	    {
6226 	      ag_mode = DImode;
6227 	      count *= 2;
6228 	    }
6229 	}
6230       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6231       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6232       for (i = 0; i < count; i++)
6233 	{
6234 	  rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6235 	  tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6236 				   GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6237 	  XVECEXP (par, 0, i) = tmp;
6238 	}
6239 
6240       return par;
6241     }
6242 
6243   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6244 }
6245 
6246 static void
aapcs_vfp_advance(CUMULATIVE_ARGS * pcum ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED)6247 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
6248 		   machine_mode mode  ATTRIBUTE_UNUSED,
6249 		   const_tree type  ATTRIBUTE_UNUSED)
6250 {
6251   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6252   pcum->aapcs_vfp_reg_alloc = 0;
6253   return;
6254 }
6255 
6256 #define AAPCS_CP(X)				\
6257   {						\
6258     aapcs_ ## X ## _cum_init,			\
6259     aapcs_ ## X ## _is_call_candidate,		\
6260     aapcs_ ## X ## _allocate,			\
6261     aapcs_ ## X ## _is_return_candidate,	\
6262     aapcs_ ## X ## _allocate_return_reg,	\
6263     aapcs_ ## X ## _advance			\
6264   }
6265 
6266 /* Table of co-processors that can be used to pass arguments in
6267    registers.  Idealy no arugment should be a candidate for more than
6268    one co-processor table entry, but the table is processed in order
6269    and stops after the first match.  If that entry then fails to put
6270    the argument into a co-processor register, the argument will go on
6271    the stack.  */
6272 static struct
6273 {
6274   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
6275   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6276 
6277   /* Return true if an argument of mode MODE (or type TYPE if MODE is
6278      BLKmode) is a candidate for this co-processor's registers; this
6279      function should ignore any position-dependent state in
6280      CUMULATIVE_ARGS and only use call-type dependent information.  */
6281   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6282 
6283   /* Return true if the argument does get a co-processor register; it
6284      should set aapcs_reg to an RTX of the register allocated as is
6285      required for a return from FUNCTION_ARG.  */
6286   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6287 
6288   /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6289      be returned in this co-processor's registers.  */
6290   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6291 
6292   /* Allocate and return an RTX element to hold the return type of a call.  This
6293      routine must not fail and will only be called if is_return_candidate
6294      returned true with the same parameters.  */
6295   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6296 
6297   /* Finish processing this argument and prepare to start processing
6298      the next one.  */
6299   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6300 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6301   {
6302     AAPCS_CP(vfp)
6303   };
6304 
6305 #undef AAPCS_CP
6306 
6307 static int
aapcs_select_call_coproc(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type)6308 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6309 			  const_tree type)
6310 {
6311   int i;
6312 
6313   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6314     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6315       return i;
6316 
6317   return -1;
6318 }
6319 
6320 static int
aapcs_select_return_coproc(const_tree type,const_tree fntype)6321 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6322 {
6323   /* We aren't passed a decl, so we can't check that a call is local.
6324      However, it isn't clear that that would be a win anyway, since it
6325      might limit some tail-calling opportunities.  */
6326   enum arm_pcs pcs_variant;
6327 
6328   if (fntype)
6329     {
6330       const_tree fndecl = NULL_TREE;
6331 
6332       if (TREE_CODE (fntype) == FUNCTION_DECL)
6333 	{
6334 	  fndecl = fntype;
6335 	  fntype = TREE_TYPE (fntype);
6336 	}
6337 
6338       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6339     }
6340   else
6341     pcs_variant = arm_pcs_default;
6342 
6343   if (pcs_variant != ARM_PCS_AAPCS)
6344     {
6345       int i;
6346 
6347       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6348 	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6349 							TYPE_MODE (type),
6350 							type))
6351 	  return i;
6352     }
6353   return -1;
6354 }
6355 
6356 static rtx
aapcs_allocate_return_reg(machine_mode mode,const_tree type,const_tree fntype)6357 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6358 			   const_tree fntype)
6359 {
6360   /* We aren't passed a decl, so we can't check that a call is local.
6361      However, it isn't clear that that would be a win anyway, since it
6362      might limit some tail-calling opportunities.  */
6363   enum arm_pcs pcs_variant;
6364   int unsignedp ATTRIBUTE_UNUSED;
6365 
6366   if (fntype)
6367     {
6368       const_tree fndecl = NULL_TREE;
6369 
6370       if (TREE_CODE (fntype) == FUNCTION_DECL)
6371 	{
6372 	  fndecl = fntype;
6373 	  fntype = TREE_TYPE (fntype);
6374 	}
6375 
6376       pcs_variant = arm_get_pcs_model (fntype, fndecl);
6377     }
6378   else
6379     pcs_variant = arm_pcs_default;
6380 
6381   /* Promote integer types.  */
6382   if (type && INTEGRAL_TYPE_P (type))
6383     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6384 
6385   if (pcs_variant != ARM_PCS_AAPCS)
6386     {
6387       int i;
6388 
6389       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6390 	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6391 							type))
6392 	  return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6393 							     mode, type);
6394     }
6395 
6396   /* Promotes small structs returned in a register to full-word size
6397      for big-endian AAPCS.  */
6398   if (type && arm_return_in_msb (type))
6399     {
6400       HOST_WIDE_INT size = int_size_in_bytes (type);
6401       if (size % UNITS_PER_WORD != 0)
6402 	{
6403 	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6404 	  mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6405 	}
6406     }
6407 
6408   return gen_rtx_REG (mode, R0_REGNUM);
6409 }
6410 
6411 static rtx
aapcs_libcall_value(machine_mode mode)6412 aapcs_libcall_value (machine_mode mode)
6413 {
6414   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6415       && GET_MODE_SIZE (mode) <= 4)
6416     mode = SImode;
6417 
6418   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6419 }
6420 
6421 /* Lay out a function argument using the AAPCS rules.  The rule
6422    numbers referred to here are those in the AAPCS.  */
6423 static void
aapcs_layout_arg(CUMULATIVE_ARGS * pcum,machine_mode mode,const_tree type,bool named)6424 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6425 		  const_tree type, bool named)
6426 {
6427   int nregs, nregs2;
6428   int ncrn;
6429 
6430   /* We only need to do this once per argument.  */
6431   if (pcum->aapcs_arg_processed)
6432     return;
6433 
6434   pcum->aapcs_arg_processed = true;
6435 
6436   /* Special case: if named is false then we are handling an incoming
6437      anonymous argument which is on the stack.  */
6438   if (!named)
6439     return;
6440 
6441   /* Is this a potential co-processor register candidate?  */
6442   if (pcum->pcs_variant != ARM_PCS_AAPCS)
6443     {
6444       int slot = aapcs_select_call_coproc (pcum, mode, type);
6445       pcum->aapcs_cprc_slot = slot;
6446 
6447       /* We don't have to apply any of the rules from part B of the
6448 	 preparation phase, these are handled elsewhere in the
6449 	 compiler.  */
6450 
6451       if (slot >= 0)
6452 	{
6453 	  /* A Co-processor register candidate goes either in its own
6454 	     class of registers or on the stack.  */
6455 	  if (!pcum->aapcs_cprc_failed[slot])
6456 	    {
6457 	      /* C1.cp - Try to allocate the argument to co-processor
6458 		 registers.  */
6459 	      if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6460 		return;
6461 
6462 	      /* C2.cp - Put the argument on the stack and note that we
6463 		 can't assign any more candidates in this slot.  We also
6464 		 need to note that we have allocated stack space, so that
6465 		 we won't later try to split a non-cprc candidate between
6466 		 core registers and the stack.  */
6467 	      pcum->aapcs_cprc_failed[slot] = true;
6468 	      pcum->can_split = false;
6469 	    }
6470 
6471 	  /* We didn't get a register, so this argument goes on the
6472 	     stack.  */
6473 	  gcc_assert (pcum->can_split == false);
6474 	  return;
6475 	}
6476     }
6477 
6478   /* C3 - For double-word aligned arguments, round the NCRN up to the
6479      next even number.  */
6480   ncrn = pcum->aapcs_ncrn;
6481   if (ncrn & 1)
6482     {
6483       int res = arm_needs_doubleword_align (mode, type);
6484       /* Only warn during RTL expansion of call stmts, otherwise we would
6485 	 warn e.g. during gimplification even on functions that will be
6486 	 always inlined, and we'd warn multiple times.  Don't warn when
6487 	 called in expand_function_start either, as we warn instead in
6488 	 arm_function_arg_boundary in that case.  */
6489       if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6490 	inform (input_location, "parameter passing for argument of type "
6491 		"%qT changed in GCC 7.1", type);
6492       else if (res > 0)
6493 	ncrn++;
6494     }
6495 
6496   nregs = ARM_NUM_REGS2(mode, type);
6497 
6498   /* Sigh, this test should really assert that nregs > 0, but a GCC
6499      extension allows empty structs and then gives them empty size; it
6500      then allows such a structure to be passed by value.  For some of
6501      the code below we have to pretend that such an argument has
6502      non-zero size so that we 'locate' it correctly either in
6503      registers or on the stack.  */
6504   gcc_assert (nregs >= 0);
6505 
6506   nregs2 = nregs ? nregs : 1;
6507 
6508   /* C4 - Argument fits entirely in core registers.  */
6509   if (ncrn + nregs2 <= NUM_ARG_REGS)
6510     {
6511       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6512       pcum->aapcs_next_ncrn = ncrn + nregs;
6513       return;
6514     }
6515 
6516   /* C5 - Some core registers left and there are no arguments already
6517      on the stack: split this argument between the remaining core
6518      registers and the stack.  */
6519   if (ncrn < NUM_ARG_REGS && pcum->can_split)
6520     {
6521       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6522       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6523       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6524       return;
6525     }
6526 
6527   /* C6 - NCRN is set to 4.  */
6528   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6529 
6530   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
6531   return;
6532 }
6533 
6534 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6535    for a call to a function whose data type is FNTYPE.
6536    For a library call, FNTYPE is NULL.  */
6537 void
arm_init_cumulative_args(CUMULATIVE_ARGS * pcum,tree fntype,rtx libname,tree fndecl ATTRIBUTE_UNUSED)6538 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6539 			  rtx libname,
6540 			  tree fndecl ATTRIBUTE_UNUSED)
6541 {
6542   /* Long call handling.  */
6543   if (fntype)
6544     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6545   else
6546     pcum->pcs_variant = arm_pcs_default;
6547 
6548   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6549     {
6550       if (arm_libcall_uses_aapcs_base (libname))
6551 	pcum->pcs_variant = ARM_PCS_AAPCS;
6552 
6553       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6554       pcum->aapcs_reg = NULL_RTX;
6555       pcum->aapcs_partial = 0;
6556       pcum->aapcs_arg_processed = false;
6557       pcum->aapcs_cprc_slot = -1;
6558       pcum->can_split = true;
6559 
6560       if (pcum->pcs_variant != ARM_PCS_AAPCS)
6561 	{
6562 	  int i;
6563 
6564 	  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6565 	    {
6566 	      pcum->aapcs_cprc_failed[i] = false;
6567 	      aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6568 	    }
6569 	}
6570       return;
6571     }
6572 
6573   /* Legacy ABIs */
6574 
6575   /* On the ARM, the offset starts at 0.  */
6576   pcum->nregs = 0;
6577   pcum->iwmmxt_nregs = 0;
6578   pcum->can_split = true;
6579 
6580   /* Varargs vectors are treated the same as long long.
6581      named_count avoids having to change the way arm handles 'named' */
6582   pcum->named_count = 0;
6583   pcum->nargs = 0;
6584 
6585   if (TARGET_REALLY_IWMMXT && fntype)
6586     {
6587       tree fn_arg;
6588 
6589       for (fn_arg = TYPE_ARG_TYPES (fntype);
6590 	   fn_arg;
6591 	   fn_arg = TREE_CHAIN (fn_arg))
6592 	pcum->named_count += 1;
6593 
6594       if (! pcum->named_count)
6595 	pcum->named_count = INT_MAX;
6596     }
6597 }
6598 
6599 /* Return 1 if double word alignment is required for argument passing.
6600    Return -1 if double word alignment used to be required for argument
6601    passing before PR77728 ABI fix, but is not required anymore.
6602    Return 0 if double word alignment is not required and wasn't requried
6603    before either.  */
6604 static int
arm_needs_doubleword_align(machine_mode mode,const_tree type)6605 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6606 {
6607   if (!type)
6608     return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6609 
6610   /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
6611   if (!AGGREGATE_TYPE_P (type))
6612     return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6613 
6614   /* Array types: Use member alignment of element type.  */
6615   if (TREE_CODE (type) == ARRAY_TYPE)
6616     return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6617 
6618   int ret = 0;
6619   /* Record/aggregate types: Use greatest member alignment of any member.  */
6620   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6621     if (DECL_ALIGN (field) > PARM_BOUNDARY)
6622       {
6623 	if (TREE_CODE (field) == FIELD_DECL)
6624 	  return 1;
6625 	else
6626 	  /* Before PR77728 fix, we were incorrectly considering also
6627 	     other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6628 	     Make sure we can warn about that with -Wpsabi.  */
6629 	  ret = -1;
6630       }
6631 
6632   return ret;
6633 }
6634 
6635 
6636 /* Determine where to put an argument to a function.
6637    Value is zero to push the argument on the stack,
6638    or a hard register in which to store the argument.
6639 
6640    MODE is the argument's machine mode.
6641    TYPE is the data type of the argument (as a tree).
6642     This is null for libcalls where that information may
6643     not be available.
6644    CUM is a variable of type CUMULATIVE_ARGS which gives info about
6645     the preceding args and about the function being called.
6646    NAMED is nonzero if this argument is a named parameter
6647     (otherwise it is an extra parameter matching an ellipsis).
6648 
6649    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6650    other arguments are passed on the stack.  If (NAMED == 0) (which happens
6651    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6652    defined), say it is passed in the stack (function_prologue will
6653    indeed make it pass in the stack if necessary).  */
6654 
6655 static rtx
arm_function_arg(cumulative_args_t pcum_v,machine_mode mode,const_tree type,bool named)6656 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6657 		  const_tree type, bool named)
6658 {
6659   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6660   int nregs;
6661 
6662   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
6663      a call insn (op3 of a call_value insn).  */
6664   if (mode == VOIDmode)
6665     return const0_rtx;
6666 
6667   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6668     {
6669       aapcs_layout_arg (pcum, mode, type, named);
6670       return pcum->aapcs_reg;
6671     }
6672 
6673   /* Varargs vectors are treated the same as long long.
6674      named_count avoids having to change the way arm handles 'named' */
6675   if (TARGET_IWMMXT_ABI
6676       && arm_vector_mode_supported_p (mode)
6677       && pcum->named_count > pcum->nargs + 1)
6678     {
6679       if (pcum->iwmmxt_nregs <= 9)
6680 	return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6681       else
6682 	{
6683 	  pcum->can_split = false;
6684 	  return NULL_RTX;
6685 	}
6686     }
6687 
6688   /* Put doubleword aligned quantities in even register pairs.  */
6689   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6690     {
6691       int res = arm_needs_doubleword_align (mode, type);
6692       if (res < 0 && warn_psabi)
6693 	inform (input_location, "parameter passing for argument of type "
6694 		"%qT changed in GCC 7.1", type);
6695       else if (res > 0)
6696 	pcum->nregs++;
6697     }
6698 
6699   /* Only allow splitting an arg between regs and memory if all preceding
6700      args were allocated to regs.  For args passed by reference we only count
6701      the reference pointer.  */
6702   if (pcum->can_split)
6703     nregs = 1;
6704   else
6705     nregs = ARM_NUM_REGS2 (mode, type);
6706 
6707   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6708     return NULL_RTX;
6709 
6710   return gen_rtx_REG (mode, pcum->nregs);
6711 }
6712 
6713 static unsigned int
arm_function_arg_boundary(machine_mode mode,const_tree type)6714 arm_function_arg_boundary (machine_mode mode, const_tree type)
6715 {
6716   if (!ARM_DOUBLEWORD_ALIGN)
6717     return PARM_BOUNDARY;
6718 
6719   int res = arm_needs_doubleword_align (mode, type);
6720   if (res < 0 && warn_psabi)
6721     inform (input_location, "parameter passing for argument of type %qT "
6722 	    "changed in GCC 7.1", type);
6723 
6724   return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6725 }
6726 
6727 static int
arm_arg_partial_bytes(cumulative_args_t pcum_v,machine_mode mode,tree type,bool named)6728 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6729 		       tree type, bool named)
6730 {
6731   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6732   int nregs = pcum->nregs;
6733 
6734   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6735     {
6736       aapcs_layout_arg (pcum, mode, type, named);
6737       return pcum->aapcs_partial;
6738     }
6739 
6740   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6741     return 0;
6742 
6743   if (NUM_ARG_REGS > nregs
6744       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6745       && pcum->can_split)
6746     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6747 
6748   return 0;
6749 }
6750 
6751 /* Update the data in PCUM to advance over an argument
6752    of mode MODE and data type TYPE.
6753    (TYPE is null for libcalls where that information may not be available.)  */
6754 
6755 static void
arm_function_arg_advance(cumulative_args_t pcum_v,machine_mode mode,const_tree type,bool named)6756 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6757 			  const_tree type, bool named)
6758 {
6759   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6760 
6761   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6762     {
6763       aapcs_layout_arg (pcum, mode, type, named);
6764 
6765       if (pcum->aapcs_cprc_slot >= 0)
6766 	{
6767 	  aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6768 							      type);
6769 	  pcum->aapcs_cprc_slot = -1;
6770 	}
6771 
6772       /* Generic stuff.  */
6773       pcum->aapcs_arg_processed = false;
6774       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6775       pcum->aapcs_reg = NULL_RTX;
6776       pcum->aapcs_partial = 0;
6777     }
6778   else
6779     {
6780       pcum->nargs += 1;
6781       if (arm_vector_mode_supported_p (mode)
6782 	  && pcum->named_count > pcum->nargs
6783 	  && TARGET_IWMMXT_ABI)
6784 	pcum->iwmmxt_nregs += 1;
6785       else
6786 	pcum->nregs += ARM_NUM_REGS2 (mode, type);
6787     }
6788 }
6789 
6790 /* Variable sized types are passed by reference.  This is a GCC
6791    extension to the ARM ABI.  */
6792 
6793 static bool
arm_pass_by_reference(cumulative_args_t cum ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type,bool named ATTRIBUTE_UNUSED)6794 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6795 		       machine_mode mode ATTRIBUTE_UNUSED,
6796 		       const_tree type, bool named ATTRIBUTE_UNUSED)
6797 {
6798   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6799 }
6800 
6801 /* Encode the current state of the #pragma [no_]long_calls.  */
6802 typedef enum
6803 {
6804   OFF,		/* No #pragma [no_]long_calls is in effect.  */
6805   LONG,		/* #pragma long_calls is in effect.  */
6806   SHORT		/* #pragma no_long_calls is in effect.  */
6807 } arm_pragma_enum;
6808 
6809 static arm_pragma_enum arm_pragma_long_calls = OFF;
6810 
6811 void
arm_pr_long_calls(struct cpp_reader * pfile ATTRIBUTE_UNUSED)6812 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6813 {
6814   arm_pragma_long_calls = LONG;
6815 }
6816 
6817 void
arm_pr_no_long_calls(struct cpp_reader * pfile ATTRIBUTE_UNUSED)6818 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6819 {
6820   arm_pragma_long_calls = SHORT;
6821 }
6822 
6823 void
arm_pr_long_calls_off(struct cpp_reader * pfile ATTRIBUTE_UNUSED)6824 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6825 {
6826   arm_pragma_long_calls = OFF;
6827 }
6828 
6829 /* Handle an attribute requiring a FUNCTION_DECL;
6830    arguments as in struct attribute_spec.handler.  */
6831 static tree
arm_handle_fndecl_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)6832 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6833 			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6834 {
6835   if (TREE_CODE (*node) != FUNCTION_DECL)
6836     {
6837       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6838 	       name);
6839       *no_add_attrs = true;
6840     }
6841 
6842   return NULL_TREE;
6843 }
6844 
6845 /* Handle an "interrupt" or "isr" attribute;
6846    arguments as in struct attribute_spec.handler.  */
6847 static tree
arm_handle_isr_attribute(tree * node,tree name,tree args,int flags,bool * no_add_attrs)6848 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6849 			  bool *no_add_attrs)
6850 {
6851   if (DECL_P (*node))
6852     {
6853       if (TREE_CODE (*node) != FUNCTION_DECL)
6854 	{
6855 	  warning (OPT_Wattributes, "%qE attribute only applies to functions",
6856 		   name);
6857 	  *no_add_attrs = true;
6858 	}
6859       /* FIXME: the argument if any is checked for type attributes;
6860 	 should it be checked for decl ones?  */
6861     }
6862   else
6863     {
6864       if (TREE_CODE (*node) == FUNCTION_TYPE
6865 	  || TREE_CODE (*node) == METHOD_TYPE)
6866 	{
6867 	  if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6868 	    {
6869 	      warning (OPT_Wattributes, "%qE attribute ignored",
6870 		       name);
6871 	      *no_add_attrs = true;
6872 	    }
6873 	}
6874       else if (TREE_CODE (*node) == POINTER_TYPE
6875 	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6876 		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6877 	       && arm_isr_value (args) != ARM_FT_UNKNOWN)
6878 	{
6879 	  *node = build_variant_type_copy (*node);
6880 	  TREE_TYPE (*node) = build_type_attribute_variant
6881 	    (TREE_TYPE (*node),
6882 	     tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6883 	  *no_add_attrs = true;
6884 	}
6885       else
6886 	{
6887 	  /* Possibly pass this attribute on from the type to a decl.  */
6888 	  if (flags & ((int) ATTR_FLAG_DECL_NEXT
6889 		       | (int) ATTR_FLAG_FUNCTION_NEXT
6890 		       | (int) ATTR_FLAG_ARRAY_NEXT))
6891 	    {
6892 	      *no_add_attrs = true;
6893 	      return tree_cons (name, args, NULL_TREE);
6894 	    }
6895 	  else
6896 	    {
6897 	      warning (OPT_Wattributes, "%qE attribute ignored",
6898 		       name);
6899 	    }
6900 	}
6901     }
6902 
6903   return NULL_TREE;
6904 }
6905 
6906 /* Handle a "pcs" attribute; arguments as in struct
6907    attribute_spec.handler.  */
6908 static tree
arm_handle_pcs_attribute(tree * node ATTRIBUTE_UNUSED,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)6909 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6910 			  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6911 {
6912   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6913     {
6914       warning (OPT_Wattributes, "%qE attribute ignored", name);
6915       *no_add_attrs = true;
6916     }
6917   return NULL_TREE;
6918 }
6919 
6920 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6921 /* Handle the "notshared" attribute.  This attribute is another way of
6922    requesting hidden visibility.  ARM's compiler supports
6923    "__declspec(notshared)"; we support the same thing via an
6924    attribute.  */
6925 
6926 static tree
arm_handle_notshared_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)6927 arm_handle_notshared_attribute (tree *node,
6928 				tree name ATTRIBUTE_UNUSED,
6929 				tree args ATTRIBUTE_UNUSED,
6930 				int flags ATTRIBUTE_UNUSED,
6931 				bool *no_add_attrs)
6932 {
6933   tree decl = TYPE_NAME (*node);
6934 
6935   if (decl)
6936     {
6937       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6938       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6939       *no_add_attrs = false;
6940     }
6941   return NULL_TREE;
6942 }
6943 #endif
6944 
6945 /* This function returns true if a function with declaration FNDECL and type
6946    FNTYPE uses the stack to pass arguments or return variables and false
6947    otherwise.  This is used for functions with the attributes
6948    'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6949    diagnostic messages if the stack is used.  NAME is the name of the attribute
6950    used.  */
6951 
6952 static bool
cmse_func_args_or_return_in_stack(tree fndecl,tree name,tree fntype)6953 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6954 {
6955   function_args_iterator args_iter;
6956   CUMULATIVE_ARGS args_so_far_v;
6957   cumulative_args_t args_so_far;
6958   bool first_param = true;
6959   tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6960 
6961   /* Error out if any argument is passed on the stack.  */
6962   arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6963   args_so_far = pack_cumulative_args (&args_so_far_v);
6964   FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6965     {
6966       rtx arg_rtx;
6967       machine_mode arg_mode = TYPE_MODE (arg_type);
6968 
6969       prev_arg_type = arg_type;
6970       if (VOID_TYPE_P (arg_type))
6971 	continue;
6972 
6973       if (!first_param)
6974 	arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6975       arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6976       if (!arg_rtx
6977 	  || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6978 	{
6979 	  error ("%qE attribute not available to functions with arguments "
6980 		 "passed on the stack", name);
6981 	  return true;
6982 	}
6983       first_param = false;
6984     }
6985 
6986   /* Error out for variadic functions since we cannot control how many
6987      arguments will be passed and thus stack could be used.  stdarg_p () is not
6988      used for the checking to avoid browsing arguments twice.  */
6989   if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6990     {
6991       error ("%qE attribute not available to functions with variable number "
6992 	     "of arguments", name);
6993       return true;
6994     }
6995 
6996   /* Error out if return value is passed on the stack.  */
6997   ret_type = TREE_TYPE (fntype);
6998   if (arm_return_in_memory (ret_type, fntype))
6999     {
7000       error ("%qE attribute not available to functions that return value on "
7001 	     "the stack", name);
7002       return true;
7003     }
7004   return false;
7005 }
7006 
7007 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7008    function will check whether the attribute is allowed here and will add the
7009    attribute to the function declaration tree or otherwise issue a warning.  */
7010 
7011 static tree
arm_handle_cmse_nonsecure_entry(tree * node,tree name,tree,int,bool * no_add_attrs)7012 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7013 				 tree /* args */,
7014 				 int /* flags */,
7015 				 bool *no_add_attrs)
7016 {
7017   tree fndecl;
7018 
7019   if (!use_cmse)
7020     {
7021       *no_add_attrs = true;
7022       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7023 	       name);
7024       return NULL_TREE;
7025     }
7026 
7027   /* Ignore attribute for function types.  */
7028   if (TREE_CODE (*node) != FUNCTION_DECL)
7029     {
7030       warning (OPT_Wattributes, "%qE attribute only applies to functions",
7031 	       name);
7032       *no_add_attrs = true;
7033       return NULL_TREE;
7034     }
7035 
7036   fndecl = *node;
7037 
7038   /* Warn for static linkage functions.  */
7039   if (!TREE_PUBLIC (fndecl))
7040     {
7041       warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7042 	       "with static linkage", name);
7043       *no_add_attrs = true;
7044       return NULL_TREE;
7045     }
7046 
7047   *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7048 						TREE_TYPE (fndecl));
7049   return NULL_TREE;
7050 }
7051 
7052 
7053 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7054    function will check whether the attribute is allowed here and will add the
7055    attribute to the function type tree or otherwise issue a diagnostic.  The
7056    reason we check this at declaration time is to only allow the use of the
7057    attribute with declarations of function pointers and not function
7058    declarations.  This function checks NODE is of the expected type and issues
7059    diagnostics otherwise using NAME.  If it is not of the expected type
7060    *NO_ADD_ATTRS will be set to true.  */
7061 
7062 static tree
arm_handle_cmse_nonsecure_call(tree * node,tree name,tree,int,bool * no_add_attrs)7063 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7064 				 tree /* args */,
7065 				 int /* flags */,
7066 				 bool *no_add_attrs)
7067 {
7068   tree decl = NULL_TREE, fntype = NULL_TREE;
7069   tree type;
7070 
7071   if (!use_cmse)
7072     {
7073       *no_add_attrs = true;
7074       warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7075 	       name);
7076       return NULL_TREE;
7077     }
7078 
7079   if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7080     {
7081       decl = *node;
7082       fntype = TREE_TYPE (decl);
7083     }
7084 
7085   while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7086     fntype = TREE_TYPE (fntype);
7087 
7088   if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7089     {
7090 	warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7091 		 "function pointer", name);
7092 	*no_add_attrs = true;
7093 	return NULL_TREE;
7094     }
7095 
7096   *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7097 
7098   if (*no_add_attrs)
7099     return NULL_TREE;
7100 
7101   /* Prevent trees being shared among function types with and without
7102      cmse_nonsecure_call attribute.  */
7103   type = TREE_TYPE (decl);
7104 
7105   type = build_distinct_type_copy (type);
7106   TREE_TYPE (decl) = type;
7107   fntype = type;
7108 
7109   while (TREE_CODE (fntype) != FUNCTION_TYPE)
7110     {
7111       type = fntype;
7112       fntype = TREE_TYPE (fntype);
7113       fntype = build_distinct_type_copy (fntype);
7114       TREE_TYPE (type) = fntype;
7115     }
7116 
7117   /* Construct a type attribute and add it to the function type.  */
7118   tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7119 			  TYPE_ATTRIBUTES (fntype));
7120   TYPE_ATTRIBUTES (fntype) = attrs;
7121   return NULL_TREE;
7122 }
7123 
7124 /* Return 0 if the attributes for two types are incompatible, 1 if they
7125    are compatible, and 2 if they are nearly compatible (which causes a
7126    warning to be generated).  */
7127 static int
arm_comp_type_attributes(const_tree type1,const_tree type2)7128 arm_comp_type_attributes (const_tree type1, const_tree type2)
7129 {
7130   int l1, l2, s1, s2;
7131 
7132   /* Check for mismatch of non-default calling convention.  */
7133   if (TREE_CODE (type1) != FUNCTION_TYPE)
7134     return 1;
7135 
7136   /* Check for mismatched call attributes.  */
7137   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7138   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7139   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7140   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7141 
7142   /* Only bother to check if an attribute is defined.  */
7143   if (l1 | l2 | s1 | s2)
7144     {
7145       /* If one type has an attribute, the other must have the same attribute.  */
7146       if ((l1 != l2) || (s1 != s2))
7147 	return 0;
7148 
7149       /* Disallow mixed attributes.  */
7150       if ((l1 & s2) || (l2 & s1))
7151 	return 0;
7152     }
7153 
7154   /* Check for mismatched ISR attribute.  */
7155   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7156   if (! l1)
7157     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7158   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7159   if (! l2)
7160     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7161   if (l1 != l2)
7162     return 0;
7163 
7164   l1 = lookup_attribute ("cmse_nonsecure_call",
7165 			 TYPE_ATTRIBUTES (type1)) != NULL;
7166   l2 = lookup_attribute ("cmse_nonsecure_call",
7167 			 TYPE_ATTRIBUTES (type2)) != NULL;
7168 
7169   if (l1 != l2)
7170     return 0;
7171 
7172   return 1;
7173 }
7174 
7175 /*  Assigns default attributes to newly defined type.  This is used to
7176     set short_call/long_call attributes for function types of
7177     functions defined inside corresponding #pragma scopes.  */
7178 static void
arm_set_default_type_attributes(tree type)7179 arm_set_default_type_attributes (tree type)
7180 {
7181   /* Add __attribute__ ((long_call)) to all functions, when
7182      inside #pragma long_calls or __attribute__ ((short_call)),
7183      when inside #pragma no_long_calls.  */
7184   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7185     {
7186       tree type_attr_list, attr_name;
7187       type_attr_list = TYPE_ATTRIBUTES (type);
7188 
7189       if (arm_pragma_long_calls == LONG)
7190  	attr_name = get_identifier ("long_call");
7191       else if (arm_pragma_long_calls == SHORT)
7192  	attr_name = get_identifier ("short_call");
7193       else
7194  	return;
7195 
7196       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7197       TYPE_ATTRIBUTES (type) = type_attr_list;
7198     }
7199 }
7200 
7201 /* Return true if DECL is known to be linked into section SECTION.  */
7202 
7203 static bool
arm_function_in_section_p(tree decl,section * section)7204 arm_function_in_section_p (tree decl, section *section)
7205 {
7206   /* We can only be certain about the prevailing symbol definition.  */
7207   if (!decl_binds_to_current_def_p (decl))
7208     return false;
7209 
7210   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
7211   if (!DECL_SECTION_NAME (decl))
7212     {
7213       /* Make sure that we will not create a unique section for DECL.  */
7214       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7215 	return false;
7216     }
7217 
7218   return function_section (decl) == section;
7219 }
7220 
7221 /* Return nonzero if a 32-bit "long_call" should be generated for
7222    a call from the current function to DECL.  We generate a long_call
7223    if the function:
7224 
7225         a.  has an __attribute__((long call))
7226      or b.  is within the scope of a #pragma long_calls
7227      or c.  the -mlong-calls command line switch has been specified
7228 
7229    However we do not generate a long call if the function:
7230 
7231         d.  has an __attribute__ ((short_call))
7232      or e.  is inside the scope of a #pragma no_long_calls
7233      or f.  is defined in the same section as the current function.  */
7234 
7235 bool
arm_is_long_call_p(tree decl)7236 arm_is_long_call_p (tree decl)
7237 {
7238   tree attrs;
7239 
7240   if (!decl)
7241     return TARGET_LONG_CALLS;
7242 
7243   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7244   if (lookup_attribute ("short_call", attrs))
7245     return false;
7246 
7247   /* For "f", be conservative, and only cater for cases in which the
7248      whole of the current function is placed in the same section.  */
7249   if (!flag_reorder_blocks_and_partition
7250       && TREE_CODE (decl) == FUNCTION_DECL
7251       && arm_function_in_section_p (decl, current_function_section ()))
7252     return false;
7253 
7254   if (lookup_attribute ("long_call", attrs))
7255     return true;
7256 
7257   return TARGET_LONG_CALLS;
7258 }
7259 
7260 /* Return nonzero if it is ok to make a tail-call to DECL.  */
7261 static bool
arm_function_ok_for_sibcall(tree decl,tree exp)7262 arm_function_ok_for_sibcall (tree decl, tree exp)
7263 {
7264   unsigned long func_type;
7265 
7266   if (cfun->machine->sibcall_blocked)
7267     return false;
7268 
7269   /* Never tailcall something if we are generating code for Thumb-1.  */
7270   if (TARGET_THUMB1)
7271     return false;
7272 
7273   /* The PIC register is live on entry to VxWorks PLT entries, so we
7274      must make the call before restoring the PIC register.  */
7275   if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7276     return false;
7277 
7278   /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7279      may be used both as target of the call and base register for restoring
7280      the VFP registers  */
7281   if (TARGET_APCS_FRAME && TARGET_ARM
7282       && TARGET_HARD_FLOAT
7283       && decl && arm_is_long_call_p (decl))
7284     return false;
7285 
7286   /* If we are interworking and the function is not declared static
7287      then we can't tail-call it unless we know that it exists in this
7288      compilation unit (since it might be a Thumb routine).  */
7289   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7290       && !TREE_ASM_WRITTEN (decl))
7291     return false;
7292 
7293   func_type = arm_current_func_type ();
7294   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
7295   if (IS_INTERRUPT (func_type))
7296     return false;
7297 
7298   /* ARMv8-M non-secure entry functions need to return with bxns which is only
7299      generated for entry functions themselves.  */
7300   if (IS_CMSE_ENTRY (arm_current_func_type ()))
7301     return false;
7302 
7303   /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7304      this would complicate matters for later code generation.  */
7305   if (TREE_CODE (exp) == CALL_EXPR)
7306     {
7307       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7308       if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7309 	return false;
7310     }
7311 
7312   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7313     {
7314       /* Check that the return value locations are the same.  For
7315 	 example that we aren't returning a value from the sibling in
7316 	 a VFP register but then need to transfer it to a core
7317 	 register.  */
7318       rtx a, b;
7319       tree decl_or_type = decl;
7320 
7321       /* If it is an indirect function pointer, get the function type.  */
7322       if (!decl)
7323 	decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7324 
7325       a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7326       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7327 			      cfun->decl, false);
7328       if (!rtx_equal_p (a, b))
7329 	return false;
7330     }
7331 
7332   /* Never tailcall if function may be called with a misaligned SP.  */
7333   if (IS_STACKALIGN (func_type))
7334     return false;
7335 
7336   /* The AAPCS says that, on bare-metal, calls to unresolved weak
7337      references should become a NOP.  Don't convert such calls into
7338      sibling calls.  */
7339   if (TARGET_AAPCS_BASED
7340       && arm_abi == ARM_ABI_AAPCS
7341       && decl
7342       && DECL_WEAK (decl))
7343     return false;
7344 
7345   /* We cannot do a tailcall for an indirect call by descriptor if all the
7346      argument registers are used because the only register left to load the
7347      address is IP and it will already contain the static chain.  */
7348   if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7349     {
7350       tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7351       CUMULATIVE_ARGS cum;
7352       cumulative_args_t cum_v;
7353 
7354       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7355       cum_v = pack_cumulative_args (&cum);
7356 
7357       for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7358 	{
7359 	  tree type = TREE_VALUE (t);
7360 	  if (!VOID_TYPE_P (type))
7361 	    arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7362 	}
7363 
7364       if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7365 	return false;
7366     }
7367 
7368   /* Everything else is ok.  */
7369   return true;
7370 }
7371 
7372 
7373 /* Addressing mode support functions.  */
7374 
7375 /* Return nonzero if X is a legitimate immediate operand when compiling
7376    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
7377 int
legitimate_pic_operand_p(rtx x)7378 legitimate_pic_operand_p (rtx x)
7379 {
7380   if (GET_CODE (x) == SYMBOL_REF
7381       || (GET_CODE (x) == CONST
7382 	  && GET_CODE (XEXP (x, 0)) == PLUS
7383 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7384     return 0;
7385 
7386   return 1;
7387 }
7388 
7389 /* Record that the current function needs a PIC register.  Initialize
7390    cfun->machine->pic_reg if we have not already done so.  */
7391 
7392 static void
require_pic_register(void)7393 require_pic_register (void)
7394 {
7395   /* A lot of the logic here is made obscure by the fact that this
7396      routine gets called as part of the rtx cost estimation process.
7397      We don't want those calls to affect any assumptions about the real
7398      function; and further, we can't call entry_of_function() until we
7399      start the real expansion process.  */
7400   if (!crtl->uses_pic_offset_table)
7401     {
7402       gcc_assert (can_create_pseudo_p ());
7403       if (arm_pic_register != INVALID_REGNUM
7404 	  && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7405 	{
7406 	  if (!cfun->machine->pic_reg)
7407 	    cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7408 
7409 	  /* Play games to avoid marking the function as needing pic
7410 	     if we are being called as part of the cost-estimation
7411 	     process.  */
7412 	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7413 	    crtl->uses_pic_offset_table = 1;
7414 	}
7415       else
7416 	{
7417 	  rtx_insn *seq, *insn;
7418 
7419 	  if (!cfun->machine->pic_reg)
7420 	    cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7421 
7422 	  /* Play games to avoid marking the function as needing pic
7423 	     if we are being called as part of the cost-estimation
7424 	     process.  */
7425 	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7426 	    {
7427 	      crtl->uses_pic_offset_table = 1;
7428 	      start_sequence ();
7429 
7430 	      if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7431 		  && arm_pic_register > LAST_LO_REGNUM)
7432 		emit_move_insn (cfun->machine->pic_reg,
7433 				gen_rtx_REG (Pmode, arm_pic_register));
7434 	      else
7435 		arm_load_pic_register (0UL);
7436 
7437 	      seq = get_insns ();
7438 	      end_sequence ();
7439 
7440 	      for (insn = seq; insn; insn = NEXT_INSN (insn))
7441 		if (INSN_P (insn))
7442 		  INSN_LOCATION (insn) = prologue_location;
7443 
7444 	      /* We can be called during expansion of PHI nodes, where
7445 	         we can't yet emit instructions directly in the final
7446 		 insn stream.  Queue the insns on the entry edge, they will
7447 		 be committed after everything else is expanded.  */
7448 	      insert_insn_on_edge (seq,
7449 				   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7450 	    }
7451 	}
7452     }
7453 }
7454 
7455 rtx
legitimize_pic_address(rtx orig,machine_mode mode,rtx reg)7456 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7457 {
7458   if (GET_CODE (orig) == SYMBOL_REF
7459       || GET_CODE (orig) == LABEL_REF)
7460     {
7461       if (reg == 0)
7462 	{
7463 	  gcc_assert (can_create_pseudo_p ());
7464 	  reg = gen_reg_rtx (Pmode);
7465 	}
7466 
7467       /* VxWorks does not impose a fixed gap between segments; the run-time
7468 	 gap can be different from the object-file gap.  We therefore can't
7469 	 use GOTOFF unless we are absolutely sure that the symbol is in the
7470 	 same segment as the GOT.  Unfortunately, the flexibility of linker
7471 	 scripts means that we can't be sure of that in general, so assume
7472 	 that GOTOFF is never valid on VxWorks.  */
7473       /* References to weak symbols cannot be resolved locally: they
7474 	 may be overridden by a non-weak definition at link time.  */
7475       rtx_insn *insn;
7476       if ((GET_CODE (orig) == LABEL_REF
7477 	   || (GET_CODE (orig) == SYMBOL_REF
7478 	       && SYMBOL_REF_LOCAL_P (orig)
7479 	       && (SYMBOL_REF_DECL (orig)
7480 		   ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7481 	  && NEED_GOT_RELOC
7482 	  && arm_pic_data_is_text_relative)
7483 	insn = arm_pic_static_addr (orig, reg);
7484       else
7485 	{
7486 	  rtx pat;
7487 	  rtx mem;
7488 
7489 	  /* If this function doesn't have a pic register, create one now.  */
7490 	  require_pic_register ();
7491 
7492 	  pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7493 
7494 	  /* Make the MEM as close to a constant as possible.  */
7495 	  mem = SET_SRC (pat);
7496 	  gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7497 	  MEM_READONLY_P (mem) = 1;
7498 	  MEM_NOTRAP_P (mem) = 1;
7499 
7500 	  insn = emit_insn (pat);
7501 	}
7502 
7503       /* Put a REG_EQUAL note on this insn, so that it can be optimized
7504 	 by loop.  */
7505       set_unique_reg_note (insn, REG_EQUAL, orig);
7506 
7507       return reg;
7508     }
7509   else if (GET_CODE (orig) == CONST)
7510     {
7511       rtx base, offset;
7512 
7513       if (GET_CODE (XEXP (orig, 0)) == PLUS
7514 	  && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7515 	return orig;
7516 
7517       /* Handle the case where we have: const (UNSPEC_TLS).  */
7518       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7519 	  && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7520 	return orig;
7521 
7522       /* Handle the case where we have:
7523          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
7524          CONST_INT.  */
7525       if (GET_CODE (XEXP (orig, 0)) == PLUS
7526           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7527           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7528         {
7529 	  gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7530 	  return orig;
7531 	}
7532 
7533       if (reg == 0)
7534 	{
7535 	  gcc_assert (can_create_pseudo_p ());
7536 	  reg = gen_reg_rtx (Pmode);
7537 	}
7538 
7539       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7540 
7541       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7542       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7543 				       base == reg ? 0 : reg);
7544 
7545       if (CONST_INT_P (offset))
7546 	{
7547 	  /* The base register doesn't really matter, we only want to
7548 	     test the index for the appropriate mode.  */
7549 	  if (!arm_legitimate_index_p (mode, offset, SET, 0))
7550 	    {
7551 	      gcc_assert (can_create_pseudo_p ());
7552 	      offset = force_reg (Pmode, offset);
7553 	    }
7554 
7555 	  if (CONST_INT_P (offset))
7556 	    return plus_constant (Pmode, base, INTVAL (offset));
7557 	}
7558 
7559       if (GET_MODE_SIZE (mode) > 4
7560 	  && (GET_MODE_CLASS (mode) == MODE_INT
7561 	      || TARGET_SOFT_FLOAT))
7562 	{
7563 	  emit_insn (gen_addsi3 (reg, base, offset));
7564 	  return reg;
7565 	}
7566 
7567       return gen_rtx_PLUS (Pmode, base, offset);
7568     }
7569 
7570   return orig;
7571 }
7572 
7573 
7574 /* Find a spare register to use during the prolog of a function.  */
7575 
7576 static int
thumb_find_work_register(unsigned long pushed_regs_mask)7577 thumb_find_work_register (unsigned long pushed_regs_mask)
7578 {
7579   int reg;
7580 
7581   /* Check the argument registers first as these are call-used.  The
7582      register allocation order means that sometimes r3 might be used
7583      but earlier argument registers might not, so check them all.  */
7584   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7585     if (!df_regs_ever_live_p (reg))
7586       return reg;
7587 
7588   /* Before going on to check the call-saved registers we can try a couple
7589      more ways of deducing that r3 is available.  The first is when we are
7590      pushing anonymous arguments onto the stack and we have less than 4
7591      registers worth of fixed arguments(*).  In this case r3 will be part of
7592      the variable argument list and so we can be sure that it will be
7593      pushed right at the start of the function.  Hence it will be available
7594      for the rest of the prologue.
7595      (*): ie crtl->args.pretend_args_size is greater than 0.  */
7596   if (cfun->machine->uses_anonymous_args
7597       && crtl->args.pretend_args_size > 0)
7598     return LAST_ARG_REGNUM;
7599 
7600   /* The other case is when we have fixed arguments but less than 4 registers
7601      worth.  In this case r3 might be used in the body of the function, but
7602      it is not being used to convey an argument into the function.  In theory
7603      we could just check crtl->args.size to see how many bytes are
7604      being passed in argument registers, but it seems that it is unreliable.
7605      Sometimes it will have the value 0 when in fact arguments are being
7606      passed.  (See testcase execute/20021111-1.c for an example).  So we also
7607      check the args_info.nregs field as well.  The problem with this field is
7608      that it makes no allowances for arguments that are passed to the
7609      function but which are not used.  Hence we could miss an opportunity
7610      when a function has an unused argument in r3.  But it is better to be
7611      safe than to be sorry.  */
7612   if (! cfun->machine->uses_anonymous_args
7613       && crtl->args.size >= 0
7614       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7615       && (TARGET_AAPCS_BASED
7616 	  ? crtl->args.info.aapcs_ncrn < 4
7617 	  : crtl->args.info.nregs < 4))
7618     return LAST_ARG_REGNUM;
7619 
7620   /* Otherwise look for a call-saved register that is going to be pushed.  */
7621   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7622     if (pushed_regs_mask & (1 << reg))
7623       return reg;
7624 
7625   if (TARGET_THUMB2)
7626     {
7627       /* Thumb-2 can use high regs.  */
7628       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7629 	if (pushed_regs_mask & (1 << reg))
7630 	  return reg;
7631     }
7632   /* Something went wrong - thumb_compute_save_reg_mask()
7633      should have arranged for a suitable register to be pushed.  */
7634   gcc_unreachable ();
7635 }
7636 
7637 static GTY(()) int pic_labelno;
7638 
7639 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
7640    low register.  */
7641 
7642 void
arm_load_pic_register(unsigned long saved_regs ATTRIBUTE_UNUSED)7643 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7644 {
7645   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7646 
7647   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7648     return;
7649 
7650   gcc_assert (flag_pic);
7651 
7652   pic_reg = cfun->machine->pic_reg;
7653   if (TARGET_VXWORKS_RTP)
7654     {
7655       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7656       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7657       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7658 
7659       emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7660 
7661       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7662       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7663     }
7664   else
7665     {
7666       /* We use an UNSPEC rather than a LABEL_REF because this label
7667 	 never appears in the code stream.  */
7668 
7669       labelno = GEN_INT (pic_labelno++);
7670       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7671       l1 = gen_rtx_CONST (VOIDmode, l1);
7672 
7673       /* On the ARM the PC register contains 'dot + 8' at the time of the
7674 	 addition, on the Thumb it is 'dot + 4'.  */
7675       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7676       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7677 				UNSPEC_GOTSYM_OFF);
7678       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7679 
7680       if (TARGET_32BIT)
7681 	{
7682 	  emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7683 	}
7684       else /* TARGET_THUMB1 */
7685 	{
7686 	  if (arm_pic_register != INVALID_REGNUM
7687 	      && REGNO (pic_reg) > LAST_LO_REGNUM)
7688 	    {
7689 	      /* We will have pushed the pic register, so we should always be
7690 		 able to find a work register.  */
7691 	      pic_tmp = gen_rtx_REG (SImode,
7692 				     thumb_find_work_register (saved_regs));
7693 	      emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7694 	      emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7695 	      emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7696 	    }
7697 	  else if (arm_pic_register != INVALID_REGNUM
7698 		   && arm_pic_register > LAST_LO_REGNUM
7699 		   && REGNO (pic_reg) <= LAST_LO_REGNUM)
7700 	    {
7701 	      emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7702 	      emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7703 	      emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7704 	    }
7705 	  else
7706 	    emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7707 	}
7708     }
7709 
7710   /* Need to emit this whether or not we obey regdecls,
7711      since setjmp/longjmp can cause life info to screw up.  */
7712   emit_use (pic_reg);
7713 }
7714 
7715 /* Generate code to load the address of a static var when flag_pic is set.  */
7716 static rtx_insn *
arm_pic_static_addr(rtx orig,rtx reg)7717 arm_pic_static_addr (rtx orig, rtx reg)
7718 {
7719   rtx l1, labelno, offset_rtx;
7720 
7721   gcc_assert (flag_pic);
7722 
7723   /* We use an UNSPEC rather than a LABEL_REF because this label
7724      never appears in the code stream.  */
7725   labelno = GEN_INT (pic_labelno++);
7726   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7727   l1 = gen_rtx_CONST (VOIDmode, l1);
7728 
7729   /* On the ARM the PC register contains 'dot + 8' at the time of the
7730      addition, on the Thumb it is 'dot + 4'.  */
7731   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7732   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7733                                UNSPEC_SYMBOL_OFFSET);
7734   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7735 
7736   return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7737 }
7738 
7739 /* Return nonzero if X is valid as an ARM state addressing register.  */
7740 static int
arm_address_register_rtx_p(rtx x,int strict_p)7741 arm_address_register_rtx_p (rtx x, int strict_p)
7742 {
7743   int regno;
7744 
7745   if (!REG_P (x))
7746     return 0;
7747 
7748   regno = REGNO (x);
7749 
7750   if (strict_p)
7751     return ARM_REGNO_OK_FOR_BASE_P (regno);
7752 
7753   return (regno <= LAST_ARM_REGNUM
7754 	  || regno >= FIRST_PSEUDO_REGISTER
7755 	  || regno == FRAME_POINTER_REGNUM
7756 	  || regno == ARG_POINTER_REGNUM);
7757 }
7758 
7759 /* Return TRUE if this rtx is the difference of a symbol and a label,
7760    and will reduce to a PC-relative relocation in the object file.
7761    Expressions like this can be left alone when generating PIC, rather
7762    than forced through the GOT.  */
7763 static int
pcrel_constant_p(rtx x)7764 pcrel_constant_p (rtx x)
7765 {
7766   if (GET_CODE (x) == MINUS)
7767     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7768 
7769   return FALSE;
7770 }
7771 
7772 /* Return true if X will surely end up in an index register after next
7773    splitting pass.  */
7774 static bool
will_be_in_index_register(const_rtx x)7775 will_be_in_index_register (const_rtx x)
7776 {
7777   /* arm.md: calculate_pic_address will split this into a register.  */
7778   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7779 }
7780 
7781 /* Return nonzero if X is a valid ARM state address operand.  */
7782 int
arm_legitimate_address_outer_p(machine_mode mode,rtx x,RTX_CODE outer,int strict_p)7783 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7784 			        int strict_p)
7785 {
7786   bool use_ldrd;
7787   enum rtx_code code = GET_CODE (x);
7788 
7789   if (arm_address_register_rtx_p (x, strict_p))
7790     return 1;
7791 
7792   use_ldrd = (TARGET_LDRD
7793 	      && (mode == DImode || mode == DFmode));
7794 
7795   if (code == POST_INC || code == PRE_DEC
7796       || ((code == PRE_INC || code == POST_DEC)
7797 	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7798     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7799 
7800   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7801 	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7802 	   && GET_CODE (XEXP (x, 1)) == PLUS
7803 	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7804     {
7805       rtx addend = XEXP (XEXP (x, 1), 1);
7806 
7807       /* Don't allow ldrd post increment by register because it's hard
7808 	 to fixup invalid register choices.  */
7809       if (use_ldrd
7810 	  && GET_CODE (x) == POST_MODIFY
7811 	  && REG_P (addend))
7812 	return 0;
7813 
7814       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7815 	      && arm_legitimate_index_p (mode, addend, outer, strict_p));
7816     }
7817 
7818   /* After reload constants split into minipools will have addresses
7819      from a LABEL_REF.  */
7820   else if (reload_completed
7821 	   && (code == LABEL_REF
7822 	       || (code == CONST
7823 		   && GET_CODE (XEXP (x, 0)) == PLUS
7824 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7825 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7826     return 1;
7827 
7828   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7829     return 0;
7830 
7831   else if (code == PLUS)
7832     {
7833       rtx xop0 = XEXP (x, 0);
7834       rtx xop1 = XEXP (x, 1);
7835 
7836       return ((arm_address_register_rtx_p (xop0, strict_p)
7837 	       && ((CONST_INT_P (xop1)
7838 		    && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7839 		   || (!strict_p && will_be_in_index_register (xop1))))
7840 	      || (arm_address_register_rtx_p (xop1, strict_p)
7841 		  && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7842     }
7843 
7844 #if 0
7845   /* Reload currently can't handle MINUS, so disable this for now */
7846   else if (GET_CODE (x) == MINUS)
7847     {
7848       rtx xop0 = XEXP (x, 0);
7849       rtx xop1 = XEXP (x, 1);
7850 
7851       return (arm_address_register_rtx_p (xop0, strict_p)
7852 	      && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7853     }
7854 #endif
7855 
7856   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7857 	   && code == SYMBOL_REF
7858 	   && CONSTANT_POOL_ADDRESS_P (x)
7859 	   && ! (flag_pic
7860 		 && symbol_mentioned_p (get_pool_constant (x))
7861 		 && ! pcrel_constant_p (get_pool_constant (x))))
7862     return 1;
7863 
7864   return 0;
7865 }
7866 
7867 /* Return true if we can avoid creating a constant pool entry for x.  */
7868 static bool
can_avoid_literal_pool_for_label_p(rtx x)7869 can_avoid_literal_pool_for_label_p (rtx x)
7870 {
7871   /* Normally we can assign constant values to target registers without
7872      the help of constant pool.  But there are cases we have to use constant
7873      pool like:
7874      1) assign a label to register.
7875      2) sign-extend a 8bit value to 32bit and then assign to register.
7876 
7877      Constant pool access in format:
7878      (set (reg r0) (mem (symbol_ref (".LC0"))))
7879      will cause the use of literal pool (later in function arm_reorg).
7880      So here we mark such format as an invalid format, then the compiler
7881      will adjust it into:
7882      (set (reg r0) (symbol_ref (".LC0")))
7883      (set (reg r0) (mem (reg r0))).
7884      No extra register is required, and (mem (reg r0)) won't cause the use
7885      of literal pools.  */
7886   if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7887       && CONSTANT_POOL_ADDRESS_P (x))
7888     return 1;
7889   return 0;
7890 }
7891 
7892 
7893 /* Return nonzero if X is a valid Thumb-2 address operand.  */
7894 static int
thumb2_legitimate_address_p(machine_mode mode,rtx x,int strict_p)7895 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7896 {
7897   bool use_ldrd;
7898   enum rtx_code code = GET_CODE (x);
7899 
7900   if (arm_address_register_rtx_p (x, strict_p))
7901     return 1;
7902 
7903   use_ldrd = (TARGET_LDRD
7904 	      && (mode == DImode || mode == DFmode));
7905 
7906   if (code == POST_INC || code == PRE_DEC
7907       || ((code == PRE_INC || code == POST_DEC)
7908 	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7909     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7910 
7911   else if ((code == POST_MODIFY || code == PRE_MODIFY)
7912 	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7913 	   && GET_CODE (XEXP (x, 1)) == PLUS
7914 	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7915     {
7916       /* Thumb-2 only has autoincrement by constant.  */
7917       rtx addend = XEXP (XEXP (x, 1), 1);
7918       HOST_WIDE_INT offset;
7919 
7920       if (!CONST_INT_P (addend))
7921 	return 0;
7922 
7923       offset = INTVAL(addend);
7924       if (GET_MODE_SIZE (mode) <= 4)
7925 	return (offset > -256 && offset < 256);
7926 
7927       return (use_ldrd && offset > -1024 && offset < 1024
7928 	      && (offset & 3) == 0);
7929     }
7930 
7931   /* After reload constants split into minipools will have addresses
7932      from a LABEL_REF.  */
7933   else if (reload_completed
7934 	   && (code == LABEL_REF
7935 	       || (code == CONST
7936 		   && GET_CODE (XEXP (x, 0)) == PLUS
7937 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7938 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7939     return 1;
7940 
7941   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7942     return 0;
7943 
7944   else if (code == PLUS)
7945     {
7946       rtx xop0 = XEXP (x, 0);
7947       rtx xop1 = XEXP (x, 1);
7948 
7949       return ((arm_address_register_rtx_p (xop0, strict_p)
7950 	       && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7951 		   || (!strict_p && will_be_in_index_register (xop1))))
7952 	      || (arm_address_register_rtx_p (xop1, strict_p)
7953 		  && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7954     }
7955 
7956   else if (can_avoid_literal_pool_for_label_p (x))
7957     return 0;
7958 
7959   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7960 	   && code == SYMBOL_REF
7961 	   && CONSTANT_POOL_ADDRESS_P (x)
7962 	   && ! (flag_pic
7963 		 && symbol_mentioned_p (get_pool_constant (x))
7964 		 && ! pcrel_constant_p (get_pool_constant (x))))
7965     return 1;
7966 
7967   return 0;
7968 }
7969 
7970 /* Return nonzero if INDEX is valid for an address index operand in
7971    ARM state.  */
7972 static int
arm_legitimate_index_p(machine_mode mode,rtx index,RTX_CODE outer,int strict_p)7973 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7974 			int strict_p)
7975 {
7976   HOST_WIDE_INT range;
7977   enum rtx_code code = GET_CODE (index);
7978 
7979   /* Standard coprocessor addressing modes.  */
7980   if (TARGET_HARD_FLOAT
7981       && (mode == SFmode || mode == DFmode))
7982     return (code == CONST_INT && INTVAL (index) < 1024
7983 	    && INTVAL (index) > -1024
7984 	    && (INTVAL (index) & 3) == 0);
7985 
7986   /* For quad modes, we restrict the constant offset to be slightly less
7987      than what the instruction format permits.  We do this because for
7988      quad mode moves, we will actually decompose them into two separate
7989      double-mode reads or writes.  INDEX must therefore be a valid
7990      (double-mode) offset and so should INDEX+8.  */
7991   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7992     return (code == CONST_INT
7993 	    && INTVAL (index) < 1016
7994 	    && INTVAL (index) > -1024
7995 	    && (INTVAL (index) & 3) == 0);
7996 
7997   /* We have no such constraint on double mode offsets, so we permit the
7998      full range of the instruction format.  */
7999   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8000     return (code == CONST_INT
8001 	    && INTVAL (index) < 1024
8002 	    && INTVAL (index) > -1024
8003 	    && (INTVAL (index) & 3) == 0);
8004 
8005   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8006     return (code == CONST_INT
8007 	    && INTVAL (index) < 1024
8008 	    && INTVAL (index) > -1024
8009 	    && (INTVAL (index) & 3) == 0);
8010 
8011   if (arm_address_register_rtx_p (index, strict_p)
8012       && (GET_MODE_SIZE (mode) <= 4))
8013     return 1;
8014 
8015   if (mode == DImode || mode == DFmode)
8016     {
8017       if (code == CONST_INT)
8018 	{
8019 	  HOST_WIDE_INT val = INTVAL (index);
8020 
8021 	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8022 	     If vldr is selected it uses arm_coproc_mem_operand.  */
8023 	  if (TARGET_LDRD)
8024 	    return val > -256 && val < 256;
8025 	  else
8026 	    return val > -4096 && val < 4092;
8027 	}
8028 
8029       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8030     }
8031 
8032   if (GET_MODE_SIZE (mode) <= 4
8033       && ! (arm_arch4
8034 	    && (mode == HImode
8035 		|| mode == HFmode
8036 		|| (mode == QImode && outer == SIGN_EXTEND))))
8037     {
8038       if (code == MULT)
8039 	{
8040 	  rtx xiop0 = XEXP (index, 0);
8041 	  rtx xiop1 = XEXP (index, 1);
8042 
8043 	  return ((arm_address_register_rtx_p (xiop0, strict_p)
8044 		   && power_of_two_operand (xiop1, SImode))
8045 		  || (arm_address_register_rtx_p (xiop1, strict_p)
8046 		      && power_of_two_operand (xiop0, SImode)));
8047 	}
8048       else if (code == LSHIFTRT || code == ASHIFTRT
8049 	       || code == ASHIFT || code == ROTATERT)
8050 	{
8051 	  rtx op = XEXP (index, 1);
8052 
8053 	  return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8054 		  && CONST_INT_P (op)
8055 		  && INTVAL (op) > 0
8056 		  && INTVAL (op) <= 31);
8057 	}
8058     }
8059 
8060   /* For ARM v4 we may be doing a sign-extend operation during the
8061      load.  */
8062   if (arm_arch4)
8063     {
8064       if (mode == HImode
8065 	  || mode == HFmode
8066 	  || (outer == SIGN_EXTEND && mode == QImode))
8067 	range = 256;
8068       else
8069 	range = 4096;
8070     }
8071   else
8072     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8073 
8074   return (code == CONST_INT
8075 	  && INTVAL (index) < range
8076 	  && INTVAL (index) > -range);
8077 }
8078 
8079 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8080    index operand.  i.e. 1, 2, 4 or 8.  */
8081 static bool
thumb2_index_mul_operand(rtx op)8082 thumb2_index_mul_operand (rtx op)
8083 {
8084   HOST_WIDE_INT val;
8085 
8086   if (!CONST_INT_P (op))
8087     return false;
8088 
8089   val = INTVAL(op);
8090   return (val == 1 || val == 2 || val == 4 || val == 8);
8091 }
8092 
8093 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
8094 static int
thumb2_legitimate_index_p(machine_mode mode,rtx index,int strict_p)8095 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8096 {
8097   enum rtx_code code = GET_CODE (index);
8098 
8099   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
8100   /* Standard coprocessor addressing modes.  */
8101   if (TARGET_HARD_FLOAT
8102       && (mode == SFmode || mode == DFmode))
8103     return (code == CONST_INT && INTVAL (index) < 1024
8104 	    /* Thumb-2 allows only > -256 index range for it's core register
8105 	       load/stores. Since we allow SF/DF in core registers, we have
8106 	       to use the intersection between -256~4096 (core) and -1024~1024
8107 	       (coprocessor).  */
8108 	    && INTVAL (index) > -256
8109 	    && (INTVAL (index) & 3) == 0);
8110 
8111   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8112     {
8113       /* For DImode assume values will usually live in core regs
8114 	 and only allow LDRD addressing modes.  */
8115       if (!TARGET_LDRD || mode != DImode)
8116 	return (code == CONST_INT
8117 		&& INTVAL (index) < 1024
8118 		&& INTVAL (index) > -1024
8119 		&& (INTVAL (index) & 3) == 0);
8120     }
8121 
8122   /* For quad modes, we restrict the constant offset to be slightly less
8123      than what the instruction format permits.  We do this because for
8124      quad mode moves, we will actually decompose them into two separate
8125      double-mode reads or writes.  INDEX must therefore be a valid
8126      (double-mode) offset and so should INDEX+8.  */
8127   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8128     return (code == CONST_INT
8129 	    && INTVAL (index) < 1016
8130 	    && INTVAL (index) > -1024
8131 	    && (INTVAL (index) & 3) == 0);
8132 
8133   /* We have no such constraint on double mode offsets, so we permit the
8134      full range of the instruction format.  */
8135   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8136     return (code == CONST_INT
8137 	    && INTVAL (index) < 1024
8138 	    && INTVAL (index) > -1024
8139 	    && (INTVAL (index) & 3) == 0);
8140 
8141   if (arm_address_register_rtx_p (index, strict_p)
8142       && (GET_MODE_SIZE (mode) <= 4))
8143     return 1;
8144 
8145   if (mode == DImode || mode == DFmode)
8146     {
8147       if (code == CONST_INT)
8148 	{
8149 	  HOST_WIDE_INT val = INTVAL (index);
8150 	  /* Thumb-2 ldrd only has reg+const addressing modes.
8151 	     Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8152 	     If vldr is selected it uses arm_coproc_mem_operand.  */
8153 	  if (TARGET_LDRD)
8154 	    return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8155 	  else
8156 	    return IN_RANGE (val, -255, 4095 - 4);
8157 	}
8158       else
8159 	return 0;
8160     }
8161 
8162   if (code == MULT)
8163     {
8164       rtx xiop0 = XEXP (index, 0);
8165       rtx xiop1 = XEXP (index, 1);
8166 
8167       return ((arm_address_register_rtx_p (xiop0, strict_p)
8168 	       && thumb2_index_mul_operand (xiop1))
8169 	      || (arm_address_register_rtx_p (xiop1, strict_p)
8170 		  && thumb2_index_mul_operand (xiop0)));
8171     }
8172   else if (code == ASHIFT)
8173     {
8174       rtx op = XEXP (index, 1);
8175 
8176       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8177 	      && CONST_INT_P (op)
8178 	      && INTVAL (op) > 0
8179 	      && INTVAL (op) <= 3);
8180     }
8181 
8182   return (code == CONST_INT
8183 	  && INTVAL (index) < 4096
8184 	  && INTVAL (index) > -256);
8185 }
8186 
8187 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
8188 static int
thumb1_base_register_rtx_p(rtx x,machine_mode mode,int strict_p)8189 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8190 {
8191   int regno;
8192 
8193   if (!REG_P (x))
8194     return 0;
8195 
8196   regno = REGNO (x);
8197 
8198   if (strict_p)
8199     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8200 
8201   return (regno <= LAST_LO_REGNUM
8202 	  || regno > LAST_VIRTUAL_REGISTER
8203 	  || regno == FRAME_POINTER_REGNUM
8204 	  || (GET_MODE_SIZE (mode) >= 4
8205 	      && (regno == STACK_POINTER_REGNUM
8206 		  || regno >= FIRST_PSEUDO_REGISTER
8207 		  || x == hard_frame_pointer_rtx
8208 		  || x == arg_pointer_rtx)));
8209 }
8210 
8211 /* Return nonzero if x is a legitimate index register.  This is the case
8212    for any base register that can access a QImode object.  */
8213 inline static int
thumb1_index_register_rtx_p(rtx x,int strict_p)8214 thumb1_index_register_rtx_p (rtx x, int strict_p)
8215 {
8216   return thumb1_base_register_rtx_p (x, QImode, strict_p);
8217 }
8218 
8219 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8220 
8221    The AP may be eliminated to either the SP or the FP, so we use the
8222    least common denominator, e.g. SImode, and offsets from 0 to 64.
8223 
8224    ??? Verify whether the above is the right approach.
8225 
8226    ??? Also, the FP may be eliminated to the SP, so perhaps that
8227    needs special handling also.
8228 
8229    ??? Look at how the mips16 port solves this problem.  It probably uses
8230    better ways to solve some of these problems.
8231 
8232    Although it is not incorrect, we don't accept QImode and HImode
8233    addresses based on the frame pointer or arg pointer until the
8234    reload pass starts.  This is so that eliminating such addresses
8235    into stack based ones won't produce impossible code.  */
8236 int
thumb1_legitimate_address_p(machine_mode mode,rtx x,int strict_p)8237 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8238 {
8239   if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8240     return 0;
8241 
8242   /* ??? Not clear if this is right.  Experiment.  */
8243   if (GET_MODE_SIZE (mode) < 4
8244       && !(reload_in_progress || reload_completed)
8245       && (reg_mentioned_p (frame_pointer_rtx, x)
8246 	  || reg_mentioned_p (arg_pointer_rtx, x)
8247 	  || reg_mentioned_p (virtual_incoming_args_rtx, x)
8248 	  || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8249 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8250 	  || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8251     return 0;
8252 
8253   /* Accept any base register.  SP only in SImode or larger.  */
8254   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8255     return 1;
8256 
8257   /* This is PC relative data before arm_reorg runs.  */
8258   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8259 	   && GET_CODE (x) == SYMBOL_REF
8260            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8261     return 1;
8262 
8263   /* This is PC relative data after arm_reorg runs.  */
8264   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8265 	   && reload_completed
8266 	   && (GET_CODE (x) == LABEL_REF
8267 	       || (GET_CODE (x) == CONST
8268 		   && GET_CODE (XEXP (x, 0)) == PLUS
8269 		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8270 		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8271     return 1;
8272 
8273   /* Post-inc indexing only supported for SImode and larger.  */
8274   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8275 	   && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8276     return 1;
8277 
8278   else if (GET_CODE (x) == PLUS)
8279     {
8280       /* REG+REG address can be any two index registers.  */
8281       /* We disallow FRAME+REG addressing since we know that FRAME
8282 	 will be replaced with STACK, and SP relative addressing only
8283 	 permits SP+OFFSET.  */
8284       if (GET_MODE_SIZE (mode) <= 4
8285 	  && XEXP (x, 0) != frame_pointer_rtx
8286 	  && XEXP (x, 1) != frame_pointer_rtx
8287 	  && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8288 	  && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8289 	      || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8290 	return 1;
8291 
8292       /* REG+const has 5-7 bit offset for non-SP registers.  */
8293       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8294 		|| XEXP (x, 0) == arg_pointer_rtx)
8295 	       && CONST_INT_P (XEXP (x, 1))
8296 	       && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8297 	return 1;
8298 
8299       /* REG+const has 10-bit offset for SP, but only SImode and
8300 	 larger is supported.  */
8301       /* ??? Should probably check for DI/DFmode overflow here
8302 	 just like GO_IF_LEGITIMATE_OFFSET does.  */
8303       else if (REG_P (XEXP (x, 0))
8304 	       && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8305 	       && GET_MODE_SIZE (mode) >= 4
8306 	       && CONST_INT_P (XEXP (x, 1))
8307 	       && INTVAL (XEXP (x, 1)) >= 0
8308 	       && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8309 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
8310 	return 1;
8311 
8312       else if (REG_P (XEXP (x, 0))
8313 	       && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8314 		   || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8315 		   || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8316 		       && REGNO (XEXP (x, 0))
8317 			  <= LAST_VIRTUAL_POINTER_REGISTER))
8318 	       && GET_MODE_SIZE (mode) >= 4
8319 	       && CONST_INT_P (XEXP (x, 1))
8320 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
8321 	return 1;
8322     }
8323 
8324   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8325 	   && GET_MODE_SIZE (mode) == 4
8326 	   && GET_CODE (x) == SYMBOL_REF
8327 	   && CONSTANT_POOL_ADDRESS_P (x)
8328 	   && ! (flag_pic
8329 		 && symbol_mentioned_p (get_pool_constant (x))
8330 		 && ! pcrel_constant_p (get_pool_constant (x))))
8331     return 1;
8332 
8333   return 0;
8334 }
8335 
8336 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8337    instruction of mode MODE.  */
8338 int
thumb_legitimate_offset_p(machine_mode mode,HOST_WIDE_INT val)8339 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8340 {
8341   switch (GET_MODE_SIZE (mode))
8342     {
8343     case 1:
8344       return val >= 0 && val < 32;
8345 
8346     case 2:
8347       return val >= 0 && val < 64 && (val & 1) == 0;
8348 
8349     default:
8350       return (val >= 0
8351 	      && (val + GET_MODE_SIZE (mode)) <= 128
8352 	      && (val & 3) == 0);
8353     }
8354 }
8355 
8356 bool
arm_legitimate_address_p(machine_mode mode,rtx x,bool strict_p)8357 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8358 {
8359   if (TARGET_ARM)
8360     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8361   else if (TARGET_THUMB2)
8362     return thumb2_legitimate_address_p (mode, x, strict_p);
8363   else /* if (TARGET_THUMB1) */
8364     return thumb1_legitimate_address_p (mode, x, strict_p);
8365 }
8366 
8367 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8368 
8369    Given an rtx X being reloaded into a reg required to be
8370    in class CLASS, return the class of reg to actually use.
8371    In general this is just CLASS, but for the Thumb core registers and
8372    immediate constants we prefer a LO_REGS class or a subset.  */
8373 
8374 static reg_class_t
arm_preferred_reload_class(rtx x ATTRIBUTE_UNUSED,reg_class_t rclass)8375 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8376 {
8377   if (TARGET_32BIT)
8378     return rclass;
8379   else
8380     {
8381       if (rclass == GENERAL_REGS)
8382 	return LO_REGS;
8383       else
8384 	return rclass;
8385     }
8386 }
8387 
8388 /* Build the SYMBOL_REF for __tls_get_addr.  */
8389 
8390 static GTY(()) rtx tls_get_addr_libfunc;
8391 
8392 static rtx
get_tls_get_addr(void)8393 get_tls_get_addr (void)
8394 {
8395   if (!tls_get_addr_libfunc)
8396     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8397   return tls_get_addr_libfunc;
8398 }
8399 
8400 rtx
arm_load_tp(rtx target)8401 arm_load_tp (rtx target)
8402 {
8403   if (!target)
8404     target = gen_reg_rtx (SImode);
8405 
8406   if (TARGET_HARD_TP)
8407     {
8408       /* Can return in any reg.  */
8409       emit_insn (gen_load_tp_hard (target));
8410     }
8411   else
8412     {
8413       /* Always returned in r0.  Immediately copy the result into a pseudo,
8414 	 otherwise other uses of r0 (e.g. setting up function arguments) may
8415 	 clobber the value.  */
8416 
8417       rtx tmp;
8418 
8419       emit_insn (gen_load_tp_soft ());
8420 
8421       tmp = gen_rtx_REG (SImode, R0_REGNUM);
8422       emit_move_insn (target, tmp);
8423     }
8424   return target;
8425 }
8426 
8427 static rtx
load_tls_operand(rtx x,rtx reg)8428 load_tls_operand (rtx x, rtx reg)
8429 {
8430   rtx tmp;
8431 
8432   if (reg == NULL_RTX)
8433     reg = gen_reg_rtx (SImode);
8434 
8435   tmp = gen_rtx_CONST (SImode, x);
8436 
8437   emit_move_insn (reg, tmp);
8438 
8439   return reg;
8440 }
8441 
8442 static rtx_insn *
arm_call_tls_get_addr(rtx x,rtx reg,rtx * valuep,int reloc)8443 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8444 {
8445   rtx label, labelno, sum;
8446 
8447   gcc_assert (reloc != TLS_DESCSEQ);
8448   start_sequence ();
8449 
8450   labelno = GEN_INT (pic_labelno++);
8451   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8452   label = gen_rtx_CONST (VOIDmode, label);
8453 
8454   sum = gen_rtx_UNSPEC (Pmode,
8455 			gen_rtvec (4, x, GEN_INT (reloc), label,
8456 				   GEN_INT (TARGET_ARM ? 8 : 4)),
8457 			UNSPEC_TLS);
8458   reg = load_tls_operand (sum, reg);
8459 
8460   if (TARGET_ARM)
8461     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8462   else
8463     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8464 
8465   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8466 				     LCT_PURE, /* LCT_CONST?  */
8467 				     Pmode, reg, Pmode);
8468 
8469   rtx_insn *insns = get_insns ();
8470   end_sequence ();
8471 
8472   return insns;
8473 }
8474 
8475 static rtx
arm_tls_descseq_addr(rtx x,rtx reg)8476 arm_tls_descseq_addr (rtx x, rtx reg)
8477 {
8478   rtx labelno = GEN_INT (pic_labelno++);
8479   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8480   rtx sum = gen_rtx_UNSPEC (Pmode,
8481 			    gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8482 				       gen_rtx_CONST (VOIDmode, label),
8483 				       GEN_INT (!TARGET_ARM)),
8484 			    UNSPEC_TLS);
8485   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8486 
8487   emit_insn (gen_tlscall (x, labelno));
8488   if (!reg)
8489     reg = gen_reg_rtx (SImode);
8490   else
8491     gcc_assert (REGNO (reg) != R0_REGNUM);
8492 
8493   emit_move_insn (reg, reg0);
8494 
8495   return reg;
8496 }
8497 
8498 rtx
legitimize_tls_address(rtx x,rtx reg)8499 legitimize_tls_address (rtx x, rtx reg)
8500 {
8501   rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8502   rtx_insn *insns;
8503   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8504 
8505   switch (model)
8506     {
8507     case TLS_MODEL_GLOBAL_DYNAMIC:
8508       if (TARGET_GNU2_TLS)
8509 	{
8510 	  reg = arm_tls_descseq_addr (x, reg);
8511 
8512 	  tp = arm_load_tp (NULL_RTX);
8513 
8514 	  dest = gen_rtx_PLUS (Pmode, tp, reg);
8515 	}
8516       else
8517 	{
8518 	  /* Original scheme */
8519 	  insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8520 	  dest = gen_reg_rtx (Pmode);
8521 	  emit_libcall_block (insns, dest, ret, x);
8522 	}
8523       return dest;
8524 
8525     case TLS_MODEL_LOCAL_DYNAMIC:
8526       if (TARGET_GNU2_TLS)
8527 	{
8528 	  reg = arm_tls_descseq_addr (x, reg);
8529 
8530 	  tp = arm_load_tp (NULL_RTX);
8531 
8532 	  dest = gen_rtx_PLUS (Pmode, tp, reg);
8533 	}
8534       else
8535 	{
8536 	  insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8537 
8538 	  /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8539 	     share the LDM result with other LD model accesses.  */
8540 	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8541 				UNSPEC_TLS);
8542 	  dest = gen_reg_rtx (Pmode);
8543 	  emit_libcall_block (insns, dest, ret, eqv);
8544 
8545 	  /* Load the addend.  */
8546 	  addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8547 						     GEN_INT (TLS_LDO32)),
8548 				   UNSPEC_TLS);
8549 	  addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8550 	  dest = gen_rtx_PLUS (Pmode, dest, addend);
8551 	}
8552       return dest;
8553 
8554     case TLS_MODEL_INITIAL_EXEC:
8555       labelno = GEN_INT (pic_labelno++);
8556       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8557       label = gen_rtx_CONST (VOIDmode, label);
8558       sum = gen_rtx_UNSPEC (Pmode,
8559 			    gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8560 				       GEN_INT (TARGET_ARM ? 8 : 4)),
8561 			    UNSPEC_TLS);
8562       reg = load_tls_operand (sum, reg);
8563 
8564       if (TARGET_ARM)
8565 	emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8566       else if (TARGET_THUMB2)
8567 	emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8568       else
8569 	{
8570 	  emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8571 	  emit_move_insn (reg, gen_const_mem (SImode, reg));
8572 	}
8573 
8574       tp = arm_load_tp (NULL_RTX);
8575 
8576       return gen_rtx_PLUS (Pmode, tp, reg);
8577 
8578     case TLS_MODEL_LOCAL_EXEC:
8579       tp = arm_load_tp (NULL_RTX);
8580 
8581       reg = gen_rtx_UNSPEC (Pmode,
8582 			    gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8583 			    UNSPEC_TLS);
8584       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8585 
8586       return gen_rtx_PLUS (Pmode, tp, reg);
8587 
8588     default:
8589       abort ();
8590     }
8591 }
8592 
8593 /* Try machine-dependent ways of modifying an illegitimate address
8594    to be legitimate.  If we find one, return the new, valid address.  */
8595 rtx
arm_legitimize_address(rtx x,rtx orig_x,machine_mode mode)8596 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8597 {
8598   if (arm_tls_referenced_p (x))
8599     {
8600       rtx addend = NULL;
8601 
8602       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8603 	{
8604 	  addend = XEXP (XEXP (x, 0), 1);
8605 	  x = XEXP (XEXP (x, 0), 0);
8606 	}
8607 
8608       if (GET_CODE (x) != SYMBOL_REF)
8609 	return x;
8610 
8611       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8612 
8613       x = legitimize_tls_address (x, NULL_RTX);
8614 
8615       if (addend)
8616 	{
8617 	  x = gen_rtx_PLUS (SImode, x, addend);
8618 	  orig_x = x;
8619 	}
8620       else
8621 	return x;
8622     }
8623 
8624   if (!TARGET_ARM)
8625     {
8626       /* TODO: legitimize_address for Thumb2.  */
8627       if (TARGET_THUMB2)
8628         return x;
8629       return thumb_legitimize_address (x, orig_x, mode);
8630     }
8631 
8632   if (GET_CODE (x) == PLUS)
8633     {
8634       rtx xop0 = XEXP (x, 0);
8635       rtx xop1 = XEXP (x, 1);
8636 
8637       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8638 	xop0 = force_reg (SImode, xop0);
8639 
8640       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8641 	  && !symbol_mentioned_p (xop1))
8642 	xop1 = force_reg (SImode, xop1);
8643 
8644       if (ARM_BASE_REGISTER_RTX_P (xop0)
8645 	  && CONST_INT_P (xop1))
8646 	{
8647 	  HOST_WIDE_INT n, low_n;
8648 	  rtx base_reg, val;
8649 	  n = INTVAL (xop1);
8650 
8651 	  /* VFP addressing modes actually allow greater offsets, but for
8652 	     now we just stick with the lowest common denominator.  */
8653 	  if (mode == DImode || mode == DFmode)
8654 	    {
8655 	      low_n = n & 0x0f;
8656 	      n &= ~0x0f;
8657 	      if (low_n > 4)
8658 		{
8659 		  n += 16;
8660 		  low_n -= 16;
8661 		}
8662 	    }
8663 	  else
8664 	    {
8665 	      low_n = ((mode) == TImode ? 0
8666 		       : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8667 	      n -= low_n;
8668 	    }
8669 
8670 	  base_reg = gen_reg_rtx (SImode);
8671 	  val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8672 	  emit_move_insn (base_reg, val);
8673 	  x = plus_constant (Pmode, base_reg, low_n);
8674 	}
8675       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8676 	x = gen_rtx_PLUS (SImode, xop0, xop1);
8677     }
8678 
8679   /* XXX We don't allow MINUS any more -- see comment in
8680      arm_legitimate_address_outer_p ().  */
8681   else if (GET_CODE (x) == MINUS)
8682     {
8683       rtx xop0 = XEXP (x, 0);
8684       rtx xop1 = XEXP (x, 1);
8685 
8686       if (CONSTANT_P (xop0))
8687 	xop0 = force_reg (SImode, xop0);
8688 
8689       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8690 	xop1 = force_reg (SImode, xop1);
8691 
8692       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8693 	x = gen_rtx_MINUS (SImode, xop0, xop1);
8694     }
8695 
8696   /* Make sure to take full advantage of the pre-indexed addressing mode
8697      with absolute addresses which often allows for the base register to
8698      be factorized for multiple adjacent memory references, and it might
8699      even allows for the mini pool to be avoided entirely. */
8700   else if (CONST_INT_P (x) && optimize > 0)
8701     {
8702       unsigned int bits;
8703       HOST_WIDE_INT mask, base, index;
8704       rtx base_reg;
8705 
8706       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8707          use a 8-bit index. So let's use a 12-bit index for SImode only and
8708          hope that arm_gen_constant will enable ldrb to use more bits. */
8709       bits = (mode == SImode) ? 12 : 8;
8710       mask = (1 << bits) - 1;
8711       base = INTVAL (x) & ~mask;
8712       index = INTVAL (x) & mask;
8713       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8714         {
8715 	  /* It'll most probably be more efficient to generate the base
8716 	     with more bits set and use a negative index instead. */
8717 	  base |= mask;
8718 	  index -= mask;
8719 	}
8720       base_reg = force_reg (SImode, GEN_INT (base));
8721       x = plus_constant (Pmode, base_reg, index);
8722     }
8723 
8724   if (flag_pic)
8725     {
8726       /* We need to find and carefully transform any SYMBOL and LABEL
8727 	 references; so go back to the original address expression.  */
8728       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8729 
8730       if (new_x != orig_x)
8731 	x = new_x;
8732     }
8733 
8734   return x;
8735 }
8736 
8737 
8738 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8739    to be legitimate.  If we find one, return the new, valid address.  */
8740 rtx
thumb_legitimize_address(rtx x,rtx orig_x,machine_mode mode)8741 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8742 {
8743   if (GET_CODE (x) == PLUS
8744       && CONST_INT_P (XEXP (x, 1))
8745       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8746 	  || INTVAL (XEXP (x, 1)) < 0))
8747     {
8748       rtx xop0 = XEXP (x, 0);
8749       rtx xop1 = XEXP (x, 1);
8750       HOST_WIDE_INT offset = INTVAL (xop1);
8751 
8752       /* Try and fold the offset into a biasing of the base register and
8753 	 then offsetting that.  Don't do this when optimizing for space
8754 	 since it can cause too many CSEs.  */
8755       if (optimize_size && offset >= 0
8756 	  && offset < 256 + 31 * GET_MODE_SIZE (mode))
8757 	{
8758 	  HOST_WIDE_INT delta;
8759 
8760 	  if (offset >= 256)
8761 	    delta = offset - (256 - GET_MODE_SIZE (mode));
8762 	  else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8763 	    delta = 31 * GET_MODE_SIZE (mode);
8764 	  else
8765 	    delta = offset & (~31 * GET_MODE_SIZE (mode));
8766 
8767 	  xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8768 				NULL_RTX);
8769 	  x = plus_constant (Pmode, xop0, delta);
8770 	}
8771       else if (offset < 0 && offset > -256)
8772 	/* Small negative offsets are best done with a subtract before the
8773 	   dereference, forcing these into a register normally takes two
8774 	   instructions.  */
8775 	x = force_operand (x, NULL_RTX);
8776       else
8777 	{
8778 	  /* For the remaining cases, force the constant into a register.  */
8779 	  xop1 = force_reg (SImode, xop1);
8780 	  x = gen_rtx_PLUS (SImode, xop0, xop1);
8781 	}
8782     }
8783   else if (GET_CODE (x) == PLUS
8784 	   && s_register_operand (XEXP (x, 1), SImode)
8785 	   && !s_register_operand (XEXP (x, 0), SImode))
8786     {
8787       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8788 
8789       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8790     }
8791 
8792   if (flag_pic)
8793     {
8794       /* We need to find and carefully transform any SYMBOL and LABEL
8795 	 references; so go back to the original address expression.  */
8796       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8797 
8798       if (new_x != orig_x)
8799 	x = new_x;
8800     }
8801 
8802   return x;
8803 }
8804 
8805 /* Return TRUE if X contains any TLS symbol references.  */
8806 
8807 bool
arm_tls_referenced_p(rtx x)8808 arm_tls_referenced_p (rtx x)
8809 {
8810   if (! TARGET_HAVE_TLS)
8811     return false;
8812 
8813   subrtx_iterator::array_type array;
8814   FOR_EACH_SUBRTX (iter, array, x, ALL)
8815     {
8816       const_rtx x = *iter;
8817       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8818 	{
8819 	  /* ARM currently does not provide relocations to encode TLS variables
8820 	     into AArch32 instructions, only data, so there is no way to
8821 	     currently implement these if a literal pool is disabled.  */
8822 	  if (arm_disable_literal_pool)
8823 	    sorry ("accessing thread-local storage is not currently supported "
8824 		   "with -mpure-code or -mslow-flash-data");
8825 
8826 	  return true;
8827 	}
8828 
8829       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8830 	 TLS offsets, not real symbol references.  */
8831       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8832 	iter.skip_subrtxes ();
8833     }
8834   return false;
8835 }
8836 
8837 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8838 
8839    On the ARM, allow any integer (invalid ones are removed later by insn
8840    patterns), nice doubles and symbol_refs which refer to the function's
8841    constant pool XXX.
8842 
8843    When generating pic allow anything.  */
8844 
8845 static bool
arm_legitimate_constant_p_1(machine_mode,rtx x)8846 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8847 {
8848   return flag_pic || !label_mentioned_p (x);
8849 }
8850 
8851 static bool
thumb_legitimate_constant_p(machine_mode mode ATTRIBUTE_UNUSED,rtx x)8852 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8853 {
8854   /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8855      RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
8856      for ARMv8-M Baseline or later the result is valid.  */
8857   if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8858     x = XEXP (x, 0);
8859 
8860   return (CONST_INT_P (x)
8861 	  || CONST_DOUBLE_P (x)
8862 	  || CONSTANT_ADDRESS_P (x)
8863 	  || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8864 	  || flag_pic);
8865 }
8866 
8867 static bool
arm_legitimate_constant_p(machine_mode mode,rtx x)8868 arm_legitimate_constant_p (machine_mode mode, rtx x)
8869 {
8870   return (!arm_cannot_force_const_mem (mode, x)
8871 	  && (TARGET_32BIT
8872 	      ? arm_legitimate_constant_p_1 (mode, x)
8873 	      : thumb_legitimate_constant_p (mode, x)));
8874 }
8875 
8876 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8877 
8878 static bool
arm_cannot_force_const_mem(machine_mode mode ATTRIBUTE_UNUSED,rtx x)8879 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8880 {
8881   rtx base, offset;
8882 
8883   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8884     {
8885       split_const (x, &base, &offset);
8886       if (GET_CODE (base) == SYMBOL_REF
8887 	  && !offset_within_block_p (base, INTVAL (offset)))
8888 	return true;
8889     }
8890   return arm_tls_referenced_p (x);
8891 }
8892 
8893 #define REG_OR_SUBREG_REG(X)						\
8894   (REG_P (X)							\
8895    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8896 
8897 #define REG_OR_SUBREG_RTX(X)			\
8898    (REG_P (X) ? (X) : SUBREG_REG (X))
8899 
8900 static inline int
thumb1_rtx_costs(rtx x,enum rtx_code code,enum rtx_code outer)8901 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8902 {
8903   machine_mode mode = GET_MODE (x);
8904   int total, words;
8905 
8906   switch (code)
8907     {
8908     case ASHIFT:
8909     case ASHIFTRT:
8910     case LSHIFTRT:
8911     case ROTATERT:
8912       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8913 
8914     case PLUS:
8915     case MINUS:
8916     case COMPARE:
8917     case NEG:
8918     case NOT:
8919       return COSTS_N_INSNS (1);
8920 
8921     case MULT:
8922       if (arm_arch6m && arm_m_profile_small_mul)
8923 	return COSTS_N_INSNS (32);
8924 
8925       if (CONST_INT_P (XEXP (x, 1)))
8926 	{
8927 	  int cycles = 0;
8928 	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8929 
8930 	  while (i)
8931 	    {
8932 	      i >>= 2;
8933 	      cycles++;
8934 	    }
8935 	  return COSTS_N_INSNS (2) + cycles;
8936 	}
8937       return COSTS_N_INSNS (1) + 16;
8938 
8939     case SET:
8940       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8941 	 the mode.  */
8942       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8943       return (COSTS_N_INSNS (words)
8944 	      + 4 * ((MEM_P (SET_SRC (x)))
8945 		     + MEM_P (SET_DEST (x))));
8946 
8947     case CONST_INT:
8948       if (outer == SET)
8949 	{
8950 	  if (UINTVAL (x) < 256
8951 	      /* 16-bit constant.  */
8952 	      || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8953 	    return 0;
8954 	  if (thumb_shiftable_const (INTVAL (x)))
8955 	    return COSTS_N_INSNS (2);
8956 	  return COSTS_N_INSNS (3);
8957 	}
8958       else if ((outer == PLUS || outer == COMPARE)
8959 	       && INTVAL (x) < 256 && INTVAL (x) > -256)
8960 	return 0;
8961       else if ((outer == IOR || outer == XOR || outer == AND)
8962 	       && INTVAL (x) < 256 && INTVAL (x) >= -256)
8963 	return COSTS_N_INSNS (1);
8964       else if (outer == AND)
8965 	{
8966 	  int i;
8967 	  /* This duplicates the tests in the andsi3 expander.  */
8968 	  for (i = 9; i <= 31; i++)
8969 	    if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8970 		|| (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8971 	      return COSTS_N_INSNS (2);
8972 	}
8973       else if (outer == ASHIFT || outer == ASHIFTRT
8974 	       || outer == LSHIFTRT)
8975 	return 0;
8976       return COSTS_N_INSNS (2);
8977 
8978     case CONST:
8979     case CONST_DOUBLE:
8980     case LABEL_REF:
8981     case SYMBOL_REF:
8982       return COSTS_N_INSNS (3);
8983 
8984     case UDIV:
8985     case UMOD:
8986     case DIV:
8987     case MOD:
8988       return 100;
8989 
8990     case TRUNCATE:
8991       return 99;
8992 
8993     case AND:
8994     case XOR:
8995     case IOR:
8996       /* XXX guess.  */
8997       return 8;
8998 
8999     case MEM:
9000       /* XXX another guess.  */
9001       /* Memory costs quite a lot for the first word, but subsequent words
9002 	 load at the equivalent of a single insn each.  */
9003       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9004 	      + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9005 		 ? 4 : 0));
9006 
9007     case IF_THEN_ELSE:
9008       /* XXX a guess.  */
9009       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9010 	return 14;
9011       return 2;
9012 
9013     case SIGN_EXTEND:
9014     case ZERO_EXTEND:
9015       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9016       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9017 
9018       if (mode == SImode)
9019 	return total;
9020 
9021       if (arm_arch6)
9022 	return total + COSTS_N_INSNS (1);
9023 
9024       /* Assume a two-shift sequence.  Increase the cost slightly so
9025 	 we prefer actual shifts over an extend operation.  */
9026       return total + 1 + COSTS_N_INSNS (2);
9027 
9028     default:
9029       return 99;
9030     }
9031 }
9032 
9033 /* Estimates the size cost of thumb1 instructions.
9034    For now most of the code is copied from thumb1_rtx_costs. We need more
9035    fine grain tuning when we have more related test cases.  */
9036 static inline int
thumb1_size_rtx_costs(rtx x,enum rtx_code code,enum rtx_code outer)9037 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9038 {
9039   machine_mode mode = GET_MODE (x);
9040   int words, cost;
9041 
9042   switch (code)
9043     {
9044     case ASHIFT:
9045     case ASHIFTRT:
9046     case LSHIFTRT:
9047     case ROTATERT:
9048       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9049 
9050     case PLUS:
9051     case MINUS:
9052       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9053 	 defined by RTL expansion, especially for the expansion of
9054 	 multiplication.  */
9055       if ((GET_CODE (XEXP (x, 0)) == MULT
9056 	   && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9057 	  || (GET_CODE (XEXP (x, 1)) == MULT
9058 	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9059 	return COSTS_N_INSNS (2);
9060       /* Fall through.  */
9061     case COMPARE:
9062     case NEG:
9063     case NOT:
9064       return COSTS_N_INSNS (1);
9065 
9066     case MULT:
9067       if (CONST_INT_P (XEXP (x, 1)))
9068         {
9069           /* Thumb1 mul instruction can't operate on const. We must Load it
9070              into a register first.  */
9071           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9072 	  /* For the targets which have a very small and high-latency multiply
9073 	     unit, we prefer to synthesize the mult with up to 5 instructions,
9074 	     giving a good balance between size and performance.  */
9075 	  if (arm_arch6m && arm_m_profile_small_mul)
9076 	    return COSTS_N_INSNS (5);
9077 	  else
9078 	    return COSTS_N_INSNS (1) + const_size;
9079         }
9080       return COSTS_N_INSNS (1);
9081 
9082     case SET:
9083       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9084 	 the mode.  */
9085       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9086       cost = COSTS_N_INSNS (words);
9087       if (satisfies_constraint_J (SET_SRC (x))
9088 	  || satisfies_constraint_K (SET_SRC (x))
9089 	     /* Too big an immediate for a 2-byte mov, using MOVT.  */
9090 	  || (CONST_INT_P (SET_SRC (x))
9091 	      && UINTVAL (SET_SRC (x)) >= 256
9092 	      && TARGET_HAVE_MOVT
9093 	      && satisfies_constraint_j (SET_SRC (x)))
9094 	     /* thumb1_movdi_insn.  */
9095 	  || ((words > 1) && MEM_P (SET_SRC (x))))
9096 	cost += COSTS_N_INSNS (1);
9097       return cost;
9098 
9099     case CONST_INT:
9100       if (outer == SET)
9101         {
9102           if (UINTVAL (x) < 256)
9103             return COSTS_N_INSNS (1);
9104 	  /* movw is 4byte long.  */
9105 	  if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9106 	    return COSTS_N_INSNS (2);
9107 	  /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
9108 	  if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9109             return COSTS_N_INSNS (2);
9110 	  /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
9111           if (thumb_shiftable_const (INTVAL (x)))
9112             return COSTS_N_INSNS (2);
9113           return COSTS_N_INSNS (3);
9114         }
9115       else if ((outer == PLUS || outer == COMPARE)
9116                && INTVAL (x) < 256 && INTVAL (x) > -256)
9117         return 0;
9118       else if ((outer == IOR || outer == XOR || outer == AND)
9119                && INTVAL (x) < 256 && INTVAL (x) >= -256)
9120         return COSTS_N_INSNS (1);
9121       else if (outer == AND)
9122         {
9123           int i;
9124           /* This duplicates the tests in the andsi3 expander.  */
9125           for (i = 9; i <= 31; i++)
9126             if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9127                 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9128               return COSTS_N_INSNS (2);
9129         }
9130       else if (outer == ASHIFT || outer == ASHIFTRT
9131                || outer == LSHIFTRT)
9132         return 0;
9133       return COSTS_N_INSNS (2);
9134 
9135     case CONST:
9136     case CONST_DOUBLE:
9137     case LABEL_REF:
9138     case SYMBOL_REF:
9139       return COSTS_N_INSNS (3);
9140 
9141     case UDIV:
9142     case UMOD:
9143     case DIV:
9144     case MOD:
9145       return 100;
9146 
9147     case TRUNCATE:
9148       return 99;
9149 
9150     case AND:
9151     case XOR:
9152     case IOR:
9153       return COSTS_N_INSNS (1);
9154 
9155     case MEM:
9156       return (COSTS_N_INSNS (1)
9157 	      + COSTS_N_INSNS (1)
9158 		* ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9159               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9160                  ? COSTS_N_INSNS (1) : 0));
9161 
9162     case IF_THEN_ELSE:
9163       /* XXX a guess.  */
9164       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9165         return 14;
9166       return 2;
9167 
9168     case ZERO_EXTEND:
9169       /* XXX still guessing.  */
9170       switch (GET_MODE (XEXP (x, 0)))
9171         {
9172           case E_QImode:
9173             return (1 + (mode == DImode ? 4 : 0)
9174                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9175 
9176           case E_HImode:
9177             return (4 + (mode == DImode ? 4 : 0)
9178                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9179 
9180           case E_SImode:
9181             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9182 
9183           default:
9184             return 99;
9185         }
9186 
9187     default:
9188       return 99;
9189     }
9190 }
9191 
9192 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9193    operand, then return the operand that is being shifted.  If the shift
9194    is not by a constant, then set SHIFT_REG to point to the operand.
9195    Return NULL if OP is not a shifter operand.  */
9196 static rtx
shifter_op_p(rtx op,rtx * shift_reg)9197 shifter_op_p (rtx op, rtx *shift_reg)
9198 {
9199   enum rtx_code code = GET_CODE (op);
9200 
9201   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9202       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9203     return XEXP (op, 0);
9204   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9205     return XEXP (op, 0);
9206   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9207 	   || code == ASHIFTRT)
9208     {
9209       if (!CONST_INT_P (XEXP (op, 1)))
9210 	*shift_reg = XEXP (op, 1);
9211       return XEXP (op, 0);
9212     }
9213 
9214   return NULL;
9215 }
9216 
9217 static bool
arm_unspec_cost(rtx x,enum rtx_code,bool speed_p,int * cost)9218 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9219 {
9220   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9221   rtx_code code = GET_CODE (x);
9222   gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9223 
9224   switch (XINT (x, 1))
9225     {
9226     case UNSPEC_UNALIGNED_LOAD:
9227       /* We can only do unaligned loads into the integer unit, and we can't
9228 	 use LDM or LDRD.  */
9229       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9230       if (speed_p)
9231 	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9232 		  + extra_cost->ldst.load_unaligned);
9233 
9234 #ifdef NOT_YET
9235       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9236 				 ADDR_SPACE_GENERIC, speed_p);
9237 #endif
9238       return true;
9239 
9240     case UNSPEC_UNALIGNED_STORE:
9241       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9242       if (speed_p)
9243 	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9244 		  + extra_cost->ldst.store_unaligned);
9245 
9246       *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9247 #ifdef NOT_YET
9248       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9249 				 ADDR_SPACE_GENERIC, speed_p);
9250 #endif
9251       return true;
9252 
9253     case UNSPEC_VRINTZ:
9254     case UNSPEC_VRINTP:
9255     case UNSPEC_VRINTM:
9256     case UNSPEC_VRINTR:
9257     case UNSPEC_VRINTX:
9258     case UNSPEC_VRINTA:
9259       if (speed_p)
9260         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9261 
9262       return true;
9263     default:
9264       *cost = COSTS_N_INSNS (2);
9265       break;
9266     }
9267   return true;
9268 }
9269 
9270 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9271    call (one insn for -Os) and then one for processing the result.  */
9272 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9273 
9274 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)				\
9275 	do								\
9276 	  {								\
9277 	    shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);	\
9278 	    if (shift_op != NULL					\
9279 	        && arm_rtx_shift_left_p (XEXP (x, IDX)))		\
9280 	      {								\
9281 	        if (shift_reg)						\
9282 		  {							\
9283 		    if (speed_p)					\
9284 		      *cost += extra_cost->alu.arith_shift_reg;		\
9285 		    *cost += rtx_cost (shift_reg, GET_MODE (shift_reg),	\
9286 				       ASHIFT, 1, speed_p);		\
9287 		  }							\
9288 	        else if (speed_p)					\
9289 		  *cost += extra_cost->alu.arith_shift;			\
9290 									\
9291 		*cost += (rtx_cost (shift_op, GET_MODE (shift_op),	\
9292 				    ASHIFT, 0, speed_p)			\
9293 			  + rtx_cost (XEXP (x, 1 - IDX),		\
9294 				      GET_MODE (shift_op),		\
9295 			              OP, 1, speed_p));			\
9296 	        return true;						\
9297 	      }								\
9298 	  }								\
9299 	while (0)
9300 
9301 /* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
9302    considering the costs of the addressing mode and memory access
9303    separately.  */
9304 static bool
arm_mem_costs(rtx x,const struct cpu_cost_table * extra_cost,int * cost,bool speed_p)9305 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9306 	       int *cost, bool speed_p)
9307 {
9308   machine_mode mode = GET_MODE (x);
9309 
9310   *cost = COSTS_N_INSNS (1);
9311 
9312   if (flag_pic
9313       && GET_CODE (XEXP (x, 0)) == PLUS
9314       && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9315     /* This will be split into two instructions.  Add the cost of the
9316        additional instruction here.  The cost of the memory access is computed
9317        below.  See arm.md:calculate_pic_address.  */
9318     *cost += COSTS_N_INSNS (1);
9319 
9320   /* Calculate cost of the addressing mode.  */
9321   if (speed_p)
9322     {
9323       arm_addr_mode_op op_type;
9324       switch (GET_CODE (XEXP (x, 0)))
9325 	{
9326 	default:
9327 	case REG:
9328 	  op_type = AMO_DEFAULT;
9329 	  break;
9330 	case MINUS:
9331 	  /* MINUS does not appear in RTL, but the architecture supports it,
9332 	     so handle this case defensively.  */
9333 	  /* fall through */
9334 	case PLUS:
9335 	  op_type = AMO_NO_WB;
9336 	  break;
9337 	case PRE_INC:
9338 	case PRE_DEC:
9339 	case POST_INC:
9340 	case POST_DEC:
9341 	case PRE_MODIFY:
9342 	case POST_MODIFY:
9343 	  op_type = AMO_WB;
9344 	  break;
9345 	}
9346 
9347       if (VECTOR_MODE_P (mode))
9348 	  *cost += current_tune->addr_mode_costs->vector[op_type];
9349       else if (FLOAT_MODE_P (mode))
9350 	  *cost += current_tune->addr_mode_costs->fp[op_type];
9351       else
9352 	  *cost += current_tune->addr_mode_costs->integer[op_type];
9353     }
9354 
9355   /* Calculate cost of memory access.  */
9356   if (speed_p)
9357     {
9358       if (FLOAT_MODE_P (mode))
9359 	{
9360 	  if (GET_MODE_SIZE (mode) == 8)
9361 	    *cost += extra_cost->ldst.loadd;
9362 	  else
9363 	    *cost += extra_cost->ldst.loadf;
9364 	}
9365       else if (VECTOR_MODE_P (mode))
9366 	*cost += extra_cost->ldst.loadv;
9367       else
9368 	{
9369 	  /* Integer modes */
9370 	  if (GET_MODE_SIZE (mode) == 8)
9371 	    *cost += extra_cost->ldst.ldrd;
9372 	  else
9373 	    *cost += extra_cost->ldst.load;
9374 	}
9375     }
9376 
9377   return true;
9378 }
9379 
9380 /* RTX costs.  Make an estimate of the cost of executing the operation
9381    X, which is contained within an operation with code OUTER_CODE.
9382    SPEED_P indicates whether the cost desired is the performance cost,
9383    or the size cost.  The estimate is stored in COST and the return
9384    value is TRUE if the cost calculation is final, or FALSE if the
9385    caller should recurse through the operands of X to add additional
9386    costs.
9387 
9388    We currently make no attempt to model the size savings of Thumb-2
9389    16-bit instructions.  At the normal points in compilation where
9390    this code is called we have no measure of whether the condition
9391    flags are live or not, and thus no realistic way to determine what
9392    the size will eventually be.  */
9393 static bool
arm_rtx_costs_internal(rtx x,enum rtx_code code,enum rtx_code outer_code,const struct cpu_cost_table * extra_cost,int * cost,bool speed_p)9394 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9395 		   const struct cpu_cost_table *extra_cost,
9396 		   int *cost, bool speed_p)
9397 {
9398   machine_mode mode = GET_MODE (x);
9399 
9400   *cost = COSTS_N_INSNS (1);
9401 
9402   if (TARGET_THUMB1)
9403     {
9404       if (speed_p)
9405 	*cost = thumb1_rtx_costs (x, code, outer_code);
9406       else
9407 	*cost = thumb1_size_rtx_costs (x, code, outer_code);
9408       return true;
9409     }
9410 
9411   switch (code)
9412     {
9413     case SET:
9414       *cost = 0;
9415       /* SET RTXs don't have a mode so we get it from the destination.  */
9416       mode = GET_MODE (SET_DEST (x));
9417 
9418       if (REG_P (SET_SRC (x))
9419 	  && REG_P (SET_DEST (x)))
9420 	{
9421 	  /* Assume that most copies can be done with a single insn,
9422 	     unless we don't have HW FP, in which case everything
9423 	     larger than word mode will require two insns.  */
9424 	  *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9425 				   && GET_MODE_SIZE (mode) > 4)
9426 				  || mode == DImode)
9427 				 ? 2 : 1);
9428 	  /* Conditional register moves can be encoded
9429 	     in 16 bits in Thumb mode.  */
9430 	  if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9431 	    *cost >>= 1;
9432 
9433 	  return true;
9434 	}
9435 
9436       if (CONST_INT_P (SET_SRC (x)))
9437 	{
9438 	  /* Handle CONST_INT here, since the value doesn't have a mode
9439 	     and we would otherwise be unable to work out the true cost.  */
9440 	  *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9441 			    0, speed_p);
9442 	  outer_code = SET;
9443 	  /* Slightly lower the cost of setting a core reg to a constant.
9444 	     This helps break up chains and allows for better scheduling.  */
9445 	  if (REG_P (SET_DEST (x))
9446 	      && REGNO (SET_DEST (x)) <= LR_REGNUM)
9447 	    *cost -= 1;
9448 	  x = SET_SRC (x);
9449 	  /* Immediate moves with an immediate in the range [0, 255] can be
9450 	     encoded in 16 bits in Thumb mode.  */
9451 	  if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9452 	      && INTVAL (x) >= 0 && INTVAL (x) <=255)
9453 	    *cost >>= 1;
9454 	  goto const_int_cost;
9455 	}
9456 
9457       return false;
9458 
9459     case MEM:
9460       return arm_mem_costs (x, extra_cost, cost, speed_p);
9461 
9462     case PARALLEL:
9463     {
9464    /* Calculations of LDM costs are complex.  We assume an initial cost
9465    (ldm_1st) which will load the number of registers mentioned in
9466    ldm_regs_per_insn_1st registers; then each additional
9467    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9468    formula for N regs is thus:
9469 
9470    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9471 			     + ldm_regs_per_insn_subsequent - 1)
9472 			    / ldm_regs_per_insn_subsequent).
9473 
9474    Additional costs may also be added for addressing.  A similar
9475    formula is used for STM.  */
9476 
9477       bool is_ldm = load_multiple_operation (x, SImode);
9478       bool is_stm = store_multiple_operation (x, SImode);
9479 
9480       if (is_ldm || is_stm)
9481         {
9482 	  if (speed_p)
9483 	    {
9484 	      HOST_WIDE_INT nregs = XVECLEN (x, 0);
9485 	      HOST_WIDE_INT regs_per_insn_1st = is_ldm
9486 	                              ? extra_cost->ldst.ldm_regs_per_insn_1st
9487 	                              : extra_cost->ldst.stm_regs_per_insn_1st;
9488 	      HOST_WIDE_INT regs_per_insn_sub = is_ldm
9489 	                       ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9490 	                       : extra_cost->ldst.stm_regs_per_insn_subsequent;
9491 
9492 	      *cost += regs_per_insn_1st
9493 	               + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9494 					    + regs_per_insn_sub - 1)
9495 					  / regs_per_insn_sub);
9496 	      return true;
9497 	    }
9498 
9499         }
9500       return false;
9501     }
9502     case DIV:
9503     case UDIV:
9504       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9505 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
9506 	*cost += COSTS_N_INSNS (speed_p
9507 			       ? extra_cost->fp[mode != SFmode].div : 0);
9508       else if (mode == SImode && TARGET_IDIV)
9509 	*cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9510       else
9511 	*cost = LIBCALL_COST (2);
9512 
9513       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9514 	 possible udiv is prefered.  */
9515       *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9516       return false;	/* All arguments must be in registers.  */
9517 
9518     case MOD:
9519       /* MOD by a power of 2 can be expanded as:
9520 	 rsbs    r1, r0, #0
9521 	 and     r0, r0, #(n - 1)
9522 	 and     r1, r1, #(n - 1)
9523 	 rsbpl   r0, r1, #0.  */
9524       if (CONST_INT_P (XEXP (x, 1))
9525 	  && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9526 	  && mode == SImode)
9527 	{
9528 	  *cost += COSTS_N_INSNS (3);
9529 
9530 	  if (speed_p)
9531 	    *cost += 2 * extra_cost->alu.logical
9532 		     + extra_cost->alu.arith;
9533 	  return true;
9534 	}
9535 
9536     /* Fall-through.  */
9537     case UMOD:
9538       /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9539 	 possible udiv is prefered.  */
9540       *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9541       return false;	/* All arguments must be in registers.  */
9542 
9543     case ROTATE:
9544       if (mode == SImode && REG_P (XEXP (x, 1)))
9545 	{
9546 	  *cost += (COSTS_N_INSNS (1)
9547 		   + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9548 	  if (speed_p)
9549 	    *cost += extra_cost->alu.shift_reg;
9550 	  return true;
9551 	}
9552       /* Fall through */
9553     case ROTATERT:
9554     case ASHIFT:
9555     case LSHIFTRT:
9556     case ASHIFTRT:
9557       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9558 	{
9559 	  *cost += (COSTS_N_INSNS (2)
9560 		   + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9561 	  if (speed_p)
9562 	    *cost += 2 * extra_cost->alu.shift;
9563 	  /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
9564 	  if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9565 	    *cost += 1;
9566 	  return true;
9567 	}
9568       else if (mode == SImode)
9569 	{
9570 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9571 	  /* Slightly disparage register shifts at -Os, but not by much.  */
9572 	  if (!CONST_INT_P (XEXP (x, 1)))
9573 	    *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9574 		      + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9575 	  return true;
9576 	}
9577       else if (GET_MODE_CLASS (mode) == MODE_INT
9578 	       && GET_MODE_SIZE (mode) < 4)
9579 	{
9580 	  if (code == ASHIFT)
9581 	    {
9582 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9583 	      /* Slightly disparage register shifts at -Os, but not by
9584 	         much.  */
9585 	      if (!CONST_INT_P (XEXP (x, 1)))
9586 		*cost += (speed_p ? extra_cost->alu.shift_reg : 1
9587 			  + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9588 	    }
9589 	  else if (code == LSHIFTRT || code == ASHIFTRT)
9590 	    {
9591 	      if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9592 		{
9593 		  /* Can use SBFX/UBFX.  */
9594 		  if (speed_p)
9595 		    *cost += extra_cost->alu.bfx;
9596 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9597 		}
9598 	      else
9599 		{
9600 		  *cost += COSTS_N_INSNS (1);
9601 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9602 		  if (speed_p)
9603 		    {
9604 		      if (CONST_INT_P (XEXP (x, 1)))
9605 			*cost += 2 * extra_cost->alu.shift;
9606 		      else
9607 			*cost += (extra_cost->alu.shift
9608 				  + extra_cost->alu.shift_reg);
9609 		    }
9610 		  else
9611 		    /* Slightly disparage register shifts.  */
9612 		    *cost += !CONST_INT_P (XEXP (x, 1));
9613 		}
9614 	    }
9615 	  else /* Rotates.  */
9616 	    {
9617 	      *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9618 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9619 	      if (speed_p)
9620 		{
9621 		  if (CONST_INT_P (XEXP (x, 1)))
9622 		    *cost += (2 * extra_cost->alu.shift
9623 			      + extra_cost->alu.log_shift);
9624 		  else
9625 		    *cost += (extra_cost->alu.shift
9626 			      + extra_cost->alu.shift_reg
9627 			      + extra_cost->alu.log_shift_reg);
9628 		}
9629 	    }
9630 	  return true;
9631 	}
9632 
9633       *cost = LIBCALL_COST (2);
9634       return false;
9635 
9636     case BSWAP:
9637       if (arm_arch6)
9638         {
9639           if (mode == SImode)
9640             {
9641               if (speed_p)
9642                 *cost += extra_cost->alu.rev;
9643 
9644               return false;
9645             }
9646         }
9647       else
9648         {
9649         /* No rev instruction available.  Look at arm_legacy_rev
9650            and thumb_legacy_rev for the form of RTL used then.  */
9651           if (TARGET_THUMB)
9652             {
9653               *cost += COSTS_N_INSNS (9);
9654 
9655               if (speed_p)
9656                 {
9657                   *cost += 6 * extra_cost->alu.shift;
9658                   *cost += 3 * extra_cost->alu.logical;
9659                 }
9660             }
9661           else
9662             {
9663               *cost += COSTS_N_INSNS (4);
9664 
9665               if (speed_p)
9666                 {
9667                   *cost += 2 * extra_cost->alu.shift;
9668                   *cost += extra_cost->alu.arith_shift;
9669                   *cost += 2 * extra_cost->alu.logical;
9670                 }
9671             }
9672           return true;
9673         }
9674       return false;
9675 
9676     case MINUS:
9677       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9678 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
9679 	{
9680 	  if (GET_CODE (XEXP (x, 0)) == MULT
9681 	      || GET_CODE (XEXP (x, 1)) == MULT)
9682 	    {
9683 	      rtx mul_op0, mul_op1, sub_op;
9684 
9685 	      if (speed_p)
9686 		*cost += extra_cost->fp[mode != SFmode].mult_addsub;
9687 
9688 	      if (GET_CODE (XEXP (x, 0)) == MULT)
9689 		{
9690 		  mul_op0 = XEXP (XEXP (x, 0), 0);
9691 		  mul_op1 = XEXP (XEXP (x, 0), 1);
9692 		  sub_op = XEXP (x, 1);
9693 		}
9694 	      else
9695 		{
9696 		  mul_op0 = XEXP (XEXP (x, 1), 0);
9697 		  mul_op1 = XEXP (XEXP (x, 1), 1);
9698 		  sub_op = XEXP (x, 0);
9699 		}
9700 
9701 	      /* The first operand of the multiply may be optionally
9702 		 negated.  */
9703 	      if (GET_CODE (mul_op0) == NEG)
9704 		mul_op0 = XEXP (mul_op0, 0);
9705 
9706 	      *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9707 			+ rtx_cost (mul_op1, mode, code, 0, speed_p)
9708 			+ rtx_cost (sub_op, mode, code, 0, speed_p));
9709 
9710 	      return true;
9711 	    }
9712 
9713 	  if (speed_p)
9714 	    *cost += extra_cost->fp[mode != SFmode].addsub;
9715 	  return false;
9716 	}
9717 
9718       if (mode == SImode)
9719 	{
9720 	  rtx shift_by_reg = NULL;
9721 	  rtx shift_op;
9722 	  rtx non_shift_op;
9723 
9724 	  shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9725 	  if (shift_op == NULL)
9726 	    {
9727 	      shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9728 	      non_shift_op = XEXP (x, 0);
9729 	    }
9730 	  else
9731 	    non_shift_op = XEXP (x, 1);
9732 
9733 	  if (shift_op != NULL)
9734 	    {
9735 	      if (shift_by_reg != NULL)
9736 		{
9737 		  if (speed_p)
9738 		    *cost += extra_cost->alu.arith_shift_reg;
9739 		  *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9740 		}
9741 	      else if (speed_p)
9742 		*cost += extra_cost->alu.arith_shift;
9743 
9744 	      *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9745 	      *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9746 	      return true;
9747 	    }
9748 
9749 	  if (arm_arch_thumb2
9750 	      && GET_CODE (XEXP (x, 1)) == MULT)
9751 	    {
9752 	      /* MLS.  */
9753 	      if (speed_p)
9754 		*cost += extra_cost->mult[0].add;
9755 	      *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9756 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9757 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9758 	      return true;
9759 	    }
9760 
9761 	  if (CONST_INT_P (XEXP (x, 0)))
9762 	    {
9763 	      int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9764 					    INTVAL (XEXP (x, 0)), NULL_RTX,
9765 					    NULL_RTX, 1, 0);
9766 	      *cost = COSTS_N_INSNS (insns);
9767 	      if (speed_p)
9768 		*cost += insns * extra_cost->alu.arith;
9769 	      *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9770 	      return true;
9771 	    }
9772 	  else if (speed_p)
9773 	    *cost += extra_cost->alu.arith;
9774 
9775 	  return false;
9776 	}
9777 
9778       if (GET_MODE_CLASS (mode) == MODE_INT
9779 	  && GET_MODE_SIZE (mode) < 4)
9780 	{
9781 	  rtx shift_op, shift_reg;
9782 	  shift_reg = NULL;
9783 
9784 	  /* We check both sides of the MINUS for shifter operands since,
9785 	     unlike PLUS, it's not commutative.  */
9786 
9787 	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9788 	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9789 
9790 	  /* Slightly disparage, as we might need to widen the result.  */
9791 	  *cost += 1;
9792 	  if (speed_p)
9793 	    *cost += extra_cost->alu.arith;
9794 
9795 	  if (CONST_INT_P (XEXP (x, 0)))
9796 	    {
9797 	      *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9798 	      return true;
9799 	    }
9800 
9801 	  return false;
9802 	}
9803 
9804       if (mode == DImode)
9805 	{
9806 	  *cost += COSTS_N_INSNS (1);
9807 
9808 	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9809 	    {
9810 	      rtx op1 = XEXP (x, 1);
9811 
9812 	      if (speed_p)
9813 		*cost += 2 * extra_cost->alu.arith;
9814 
9815 	      if (GET_CODE (op1) == ZERO_EXTEND)
9816 		*cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9817 				   0, speed_p);
9818 	      else
9819 		*cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9820 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9821 				 0, speed_p);
9822 	      return true;
9823 	    }
9824 	  else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9825 	    {
9826 	      if (speed_p)
9827 		*cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9828 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9829 				  0, speed_p)
9830 			+ rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9831 	      return true;
9832 	    }
9833 	  else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9834 		   || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9835 	    {
9836 	      if (speed_p)
9837 		*cost += (extra_cost->alu.arith
9838 			  + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9839 			     ? extra_cost->alu.arith
9840 			     : extra_cost->alu.arith_shift));
9841 	      *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9842 			+ rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9843 				    GET_CODE (XEXP (x, 1)), 0, speed_p));
9844 	      return true;
9845 	    }
9846 
9847 	  if (speed_p)
9848 	    *cost += 2 * extra_cost->alu.arith;
9849 	  return false;
9850 	}
9851 
9852       /* Vector mode?  */
9853 
9854       *cost = LIBCALL_COST (2);
9855       return false;
9856 
9857     case PLUS:
9858       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9859 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
9860 	{
9861 	  if (GET_CODE (XEXP (x, 0)) == MULT)
9862 	    {
9863 	      rtx mul_op0, mul_op1, add_op;
9864 
9865 	      if (speed_p)
9866 		*cost += extra_cost->fp[mode != SFmode].mult_addsub;
9867 
9868 	      mul_op0 = XEXP (XEXP (x, 0), 0);
9869 	      mul_op1 = XEXP (XEXP (x, 0), 1);
9870 	      add_op = XEXP (x, 1);
9871 
9872 	      *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9873 			+ rtx_cost (mul_op1, mode, code, 0, speed_p)
9874 			+ rtx_cost (add_op, mode, code, 0, speed_p));
9875 
9876 	      return true;
9877 	    }
9878 
9879 	  if (speed_p)
9880 	    *cost += extra_cost->fp[mode != SFmode].addsub;
9881 	  return false;
9882 	}
9883       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9884 	{
9885 	  *cost = LIBCALL_COST (2);
9886 	  return false;
9887 	}
9888 
9889 	/* Narrow modes can be synthesized in SImode, but the range
9890 	   of useful sub-operations is limited.  Check for shift operations
9891 	   on one of the operands.  Only left shifts can be used in the
9892 	   narrow modes.  */
9893       if (GET_MODE_CLASS (mode) == MODE_INT
9894 	  && GET_MODE_SIZE (mode) < 4)
9895 	{
9896 	  rtx shift_op, shift_reg;
9897 	  shift_reg = NULL;
9898 
9899 	  HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9900 
9901 	  if (CONST_INT_P (XEXP (x, 1)))
9902 	    {
9903 	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9904 					    INTVAL (XEXP (x, 1)), NULL_RTX,
9905 					    NULL_RTX, 1, 0);
9906 	      *cost = COSTS_N_INSNS (insns);
9907 	      if (speed_p)
9908 		*cost += insns * extra_cost->alu.arith;
9909 	      /* Slightly penalize a narrow operation as the result may
9910 		 need widening.  */
9911 	      *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9912 	      return true;
9913 	    }
9914 
9915 	  /* Slightly penalize a narrow operation as the result may
9916 	     need widening.  */
9917 	  *cost += 1;
9918 	  if (speed_p)
9919 	    *cost += extra_cost->alu.arith;
9920 
9921 	  return false;
9922 	}
9923 
9924       if (mode == SImode)
9925 	{
9926 	  rtx shift_op, shift_reg;
9927 
9928 	  if (TARGET_INT_SIMD
9929 	      && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9930 		  || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9931 	    {
9932 	      /* UXTA[BH] or SXTA[BH].  */
9933 	      if (speed_p)
9934 		*cost += extra_cost->alu.extend_arith;
9935 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9936 				  0, speed_p)
9937 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9938 	      return true;
9939 	    }
9940 
9941 	  shift_reg = NULL;
9942 	  shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9943 	  if (shift_op != NULL)
9944 	    {
9945 	      if (shift_reg)
9946 		{
9947 		  if (speed_p)
9948 		    *cost += extra_cost->alu.arith_shift_reg;
9949 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9950 		}
9951 	      else if (speed_p)
9952 		*cost += extra_cost->alu.arith_shift;
9953 
9954 	      *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9955 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9956 	      return true;
9957 	    }
9958 	  if (GET_CODE (XEXP (x, 0)) == MULT)
9959 	    {
9960 	      rtx mul_op = XEXP (x, 0);
9961 
9962 	      if (TARGET_DSP_MULTIPLY
9963 		  && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9964 		       && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9965 			   || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9966 			       && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9967 			       && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9968 		      || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9969 			  && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9970 			  && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9971 			  && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9972 			      || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9973 				  && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9974 				  && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9975 				      == 16))))))
9976 		{
9977 		  /* SMLA[BT][BT].  */
9978 		  if (speed_p)
9979 		    *cost += extra_cost->mult[0].extend_add;
9980 		  *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9981 				      SIGN_EXTEND, 0, speed_p)
9982 			    + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9983 					SIGN_EXTEND, 0, speed_p)
9984 			    + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9985 		  return true;
9986 		}
9987 
9988 	      if (speed_p)
9989 		*cost += extra_cost->mult[0].add;
9990 	      *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9991 			+ rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9992 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9993 	      return true;
9994 	    }
9995 	  if (CONST_INT_P (XEXP (x, 1)))
9996 	    {
9997 	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9998 					    INTVAL (XEXP (x, 1)), NULL_RTX,
9999 					    NULL_RTX, 1, 0);
10000 	      *cost = COSTS_N_INSNS (insns);
10001 	      if (speed_p)
10002 		*cost += insns * extra_cost->alu.arith;
10003 	      *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10004 	      return true;
10005 	    }
10006 	  else if (speed_p)
10007 	    *cost += extra_cost->alu.arith;
10008 
10009 	  return false;
10010 	}
10011 
10012       if (mode == DImode)
10013 	{
10014 	  if (arm_arch3m
10015 	      && GET_CODE (XEXP (x, 0)) == MULT
10016 	      && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10017 		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10018 		  || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10019 		      && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10020 	    {
10021 	      if (speed_p)
10022 		*cost += extra_cost->mult[1].extend_add;
10023 	      *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10024 				  ZERO_EXTEND, 0, speed_p)
10025 			+ rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10026 				    ZERO_EXTEND, 0, speed_p)
10027 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10028 	      return true;
10029 	    }
10030 
10031 	  *cost += COSTS_N_INSNS (1);
10032 
10033 	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10034 	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10035 	    {
10036 	      if (speed_p)
10037 		*cost += (extra_cost->alu.arith
10038 			  + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10039 			     ? extra_cost->alu.arith
10040 			     : extra_cost->alu.arith_shift));
10041 
10042 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10043 				  0, speed_p)
10044 			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10045 	      return true;
10046 	    }
10047 
10048 	  if (speed_p)
10049 	    *cost += 2 * extra_cost->alu.arith;
10050 	  return false;
10051 	}
10052 
10053       /* Vector mode?  */
10054       *cost = LIBCALL_COST (2);
10055       return false;
10056     case IOR:
10057       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10058         {
10059           if (speed_p)
10060             *cost += extra_cost->alu.rev;
10061 
10062           return true;
10063         }
10064     /* Fall through.  */
10065     case AND: case XOR:
10066       if (mode == SImode)
10067 	{
10068 	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10069 	  rtx op0 = XEXP (x, 0);
10070 	  rtx shift_op, shift_reg;
10071 
10072 	  if (subcode == NOT
10073 	      && (code == AND
10074 		  || (code == IOR && TARGET_THUMB2)))
10075 	    op0 = XEXP (op0, 0);
10076 
10077 	  shift_reg = NULL;
10078 	  shift_op = shifter_op_p (op0, &shift_reg);
10079 	  if (shift_op != NULL)
10080 	    {
10081 	      if (shift_reg)
10082 		{
10083 		  if (speed_p)
10084 		    *cost += extra_cost->alu.log_shift_reg;
10085 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10086 		}
10087 	      else if (speed_p)
10088 		*cost += extra_cost->alu.log_shift;
10089 
10090 	      *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10091 			+ rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10092 	      return true;
10093 	    }
10094 
10095 	  if (CONST_INT_P (XEXP (x, 1)))
10096 	    {
10097 	      int insns = arm_gen_constant (code, SImode, NULL_RTX,
10098 					    INTVAL (XEXP (x, 1)), NULL_RTX,
10099 					    NULL_RTX, 1, 0);
10100 
10101 	      *cost = COSTS_N_INSNS (insns);
10102 	      if (speed_p)
10103 		*cost += insns * extra_cost->alu.logical;
10104 	      *cost += rtx_cost (op0, mode, code, 0, speed_p);
10105 	      return true;
10106 	    }
10107 
10108 	  if (speed_p)
10109 	    *cost += extra_cost->alu.logical;
10110 	  *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10111 		    + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10112 	  return true;
10113 	}
10114 
10115       if (mode == DImode)
10116 	{
10117 	  rtx op0 = XEXP (x, 0);
10118 	  enum rtx_code subcode = GET_CODE (op0);
10119 
10120 	  *cost += COSTS_N_INSNS (1);
10121 
10122 	  if (subcode == NOT
10123 	      && (code == AND
10124 		  || (code == IOR && TARGET_THUMB2)))
10125 	    op0 = XEXP (op0, 0);
10126 
10127 	  if (GET_CODE (op0) == ZERO_EXTEND)
10128 	    {
10129 	      if (speed_p)
10130 		*cost += 2 * extra_cost->alu.logical;
10131 
10132 	      *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10133 				  0, speed_p)
10134 			+ rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10135 	      return true;
10136 	    }
10137 	  else if (GET_CODE (op0) == SIGN_EXTEND)
10138 	    {
10139 	      if (speed_p)
10140 		*cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10141 
10142 	      *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10143 				  0, speed_p)
10144 			+ rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10145 	      return true;
10146 	    }
10147 
10148 	  if (speed_p)
10149 	    *cost += 2 * extra_cost->alu.logical;
10150 
10151 	  return true;
10152 	}
10153       /* Vector mode?  */
10154 
10155       *cost = LIBCALL_COST (2);
10156       return false;
10157 
10158     case MULT:
10159       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10160 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10161 	{
10162 	  rtx op0 = XEXP (x, 0);
10163 
10164 	  if (GET_CODE (op0) == NEG && !flag_rounding_math)
10165 	    op0 = XEXP (op0, 0);
10166 
10167 	  if (speed_p)
10168 	    *cost += extra_cost->fp[mode != SFmode].mult;
10169 
10170 	  *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10171 		    + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10172 	  return true;
10173 	}
10174       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10175 	{
10176 	  *cost = LIBCALL_COST (2);
10177 	  return false;
10178 	}
10179 
10180       if (mode == SImode)
10181 	{
10182 	  if (TARGET_DSP_MULTIPLY
10183 	      && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10184 		   && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10185 		       || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10186 			   && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10187 			   && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10188 		  || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10189 		      && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10190 		      && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10191 		      && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10192 			  || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10193 			      && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10194 			      && (INTVAL (XEXP (XEXP (x, 1), 1))
10195 				  == 16))))))
10196 	    {
10197 	      /* SMUL[TB][TB].  */
10198 	      if (speed_p)
10199 		*cost += extra_cost->mult[0].extend;
10200 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10201 				 SIGN_EXTEND, 0, speed_p);
10202 	      *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10203 				 SIGN_EXTEND, 1, speed_p);
10204 	      return true;
10205 	    }
10206 	  if (speed_p)
10207 	    *cost += extra_cost->mult[0].simple;
10208 	  return false;
10209 	}
10210 
10211       if (mode == DImode)
10212 	{
10213 	  if (arm_arch3m
10214 	      && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10215 		   && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10216 		  || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10217 		      && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10218 	    {
10219 	      if (speed_p)
10220 		*cost += extra_cost->mult[1].extend;
10221 	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10222 				  ZERO_EXTEND, 0, speed_p)
10223 			+ rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10224 				    ZERO_EXTEND, 0, speed_p));
10225 	      return true;
10226 	    }
10227 
10228 	  *cost = LIBCALL_COST (2);
10229 	  return false;
10230 	}
10231 
10232       /* Vector mode?  */
10233       *cost = LIBCALL_COST (2);
10234       return false;
10235 
10236     case NEG:
10237       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10238 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10239 	{
10240 	  if (GET_CODE (XEXP (x, 0)) == MULT)
10241 	    {
10242 	      /* VNMUL.  */
10243 	      *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10244 	      return true;
10245 	    }
10246 
10247 	  if (speed_p)
10248 	    *cost += extra_cost->fp[mode != SFmode].neg;
10249 
10250 	  return false;
10251 	}
10252       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10253 	{
10254 	  *cost = LIBCALL_COST (1);
10255 	  return false;
10256 	}
10257 
10258       if (mode == SImode)
10259 	{
10260 	  if (GET_CODE (XEXP (x, 0)) == ABS)
10261 	    {
10262 	      *cost += COSTS_N_INSNS (1);
10263 	      /* Assume the non-flag-changing variant.  */
10264 	      if (speed_p)
10265 		*cost += (extra_cost->alu.log_shift
10266 			  + extra_cost->alu.arith_shift);
10267 	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10268 	      return true;
10269 	    }
10270 
10271 	  if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10272 	      || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10273 	    {
10274 	      *cost += COSTS_N_INSNS (1);
10275 	      /* No extra cost for MOV imm and MVN imm.  */
10276 	      /* If the comparison op is using the flags, there's no further
10277 		 cost, otherwise we need to add the cost of the comparison.  */
10278 	      if (!(REG_P (XEXP (XEXP (x, 0), 0))
10279 		    && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10280 		    && XEXP (XEXP (x, 0), 1) == const0_rtx))
10281 		{
10282 		  mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10283 		  *cost += (COSTS_N_INSNS (1)
10284 			    + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10285 					0, speed_p)
10286 			    + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10287 					1, speed_p));
10288 		  if (speed_p)
10289 		    *cost += extra_cost->alu.arith;
10290 		}
10291 	      return true;
10292 	    }
10293 
10294 	  if (speed_p)
10295 	    *cost += extra_cost->alu.arith;
10296 	  return false;
10297 	}
10298 
10299       if (GET_MODE_CLASS (mode) == MODE_INT
10300 	  && GET_MODE_SIZE (mode) < 4)
10301 	{
10302 	  /* Slightly disparage, as we might need an extend operation.  */
10303 	  *cost += 1;
10304 	  if (speed_p)
10305 	    *cost += extra_cost->alu.arith;
10306 	  return false;
10307 	}
10308 
10309       if (mode == DImode)
10310 	{
10311 	  *cost += COSTS_N_INSNS (1);
10312 	  if (speed_p)
10313 	    *cost += 2 * extra_cost->alu.arith;
10314 	  return false;
10315 	}
10316 
10317       /* Vector mode?  */
10318       *cost = LIBCALL_COST (1);
10319       return false;
10320 
10321     case NOT:
10322       if (mode == SImode)
10323 	{
10324 	  rtx shift_op;
10325 	  rtx shift_reg = NULL;
10326 
10327 	  shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10328 
10329 	  if (shift_op)
10330 	    {
10331 	      if (shift_reg != NULL)
10332 		{
10333 		  if (speed_p)
10334 		    *cost += extra_cost->alu.log_shift_reg;
10335 		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10336 		}
10337 	      else if (speed_p)
10338 		*cost += extra_cost->alu.log_shift;
10339 	      *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10340 	      return true;
10341 	    }
10342 
10343 	  if (speed_p)
10344 	    *cost += extra_cost->alu.logical;
10345 	  return false;
10346 	}
10347       if (mode == DImode)
10348 	{
10349 	  *cost += COSTS_N_INSNS (1);
10350 	  return false;
10351 	}
10352 
10353       /* Vector mode?  */
10354 
10355       *cost += LIBCALL_COST (1);
10356       return false;
10357 
10358     case IF_THEN_ELSE:
10359       {
10360         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10361 	  {
10362 	    *cost += COSTS_N_INSNS (3);
10363 	    return true;
10364 	  }
10365 	int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10366 	int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10367 
10368 	*cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10369 	/* Assume that if one arm of the if_then_else is a register,
10370 	   that it will be tied with the result and eliminate the
10371 	   conditional insn.  */
10372 	if (REG_P (XEXP (x, 1)))
10373 	  *cost += op2cost;
10374 	else if (REG_P (XEXP (x, 2)))
10375 	  *cost += op1cost;
10376 	else
10377 	  {
10378 	    if (speed_p)
10379 	      {
10380 		if (extra_cost->alu.non_exec_costs_exec)
10381 		  *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10382 		else
10383 		  *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10384 	      }
10385 	    else
10386 	      *cost += op1cost + op2cost;
10387 	  }
10388       }
10389       return true;
10390 
10391     case COMPARE:
10392       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10393 	*cost = 0;
10394       else
10395 	{
10396 	  machine_mode op0mode;
10397 	  /* We'll mostly assume that the cost of a compare is the cost of the
10398 	     LHS.  However, there are some notable exceptions.  */
10399 
10400 	  /* Floating point compares are never done as side-effects.  */
10401 	  op0mode = GET_MODE (XEXP (x, 0));
10402 	  if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10403 	      && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10404 	    {
10405 	      if (speed_p)
10406 		*cost += extra_cost->fp[op0mode != SFmode].compare;
10407 
10408 	      if (XEXP (x, 1) == CONST0_RTX (op0mode))
10409 		{
10410 		  *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10411 		  return true;
10412 		}
10413 
10414 	      return false;
10415 	    }
10416 	  else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10417 	    {
10418 	      *cost = LIBCALL_COST (2);
10419 	      return false;
10420 	    }
10421 
10422 	  /* DImode compares normally take two insns.  */
10423 	  if (op0mode == DImode)
10424 	    {
10425 	      *cost += COSTS_N_INSNS (1);
10426 	      if (speed_p)
10427 		*cost += 2 * extra_cost->alu.arith;
10428 	      return false;
10429 	    }
10430 
10431 	  if (op0mode == SImode)
10432 	    {
10433 	      rtx shift_op;
10434 	      rtx shift_reg;
10435 
10436 	      if (XEXP (x, 1) == const0_rtx
10437 		  && !(REG_P (XEXP (x, 0))
10438 		       || (GET_CODE (XEXP (x, 0)) == SUBREG
10439 			   && REG_P (SUBREG_REG (XEXP (x, 0))))))
10440 		{
10441 		  *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10442 
10443 		  /* Multiply operations that set the flags are often
10444 		     significantly more expensive.  */
10445 		  if (speed_p
10446 		      && GET_CODE (XEXP (x, 0)) == MULT
10447 		      && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10448 		    *cost += extra_cost->mult[0].flag_setting;
10449 
10450 		  if (speed_p
10451 		      && GET_CODE (XEXP (x, 0)) == PLUS
10452 		      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10453 		      && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10454 							    0), 1), mode))
10455 		    *cost += extra_cost->mult[0].flag_setting;
10456 		  return true;
10457 		}
10458 
10459 	      shift_reg = NULL;
10460 	      shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10461 	      if (shift_op != NULL)
10462 		{
10463 		  if (shift_reg != NULL)
10464 		    {
10465 		      *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10466 					 1, speed_p);
10467 		      if (speed_p)
10468 			*cost += extra_cost->alu.arith_shift_reg;
10469 		    }
10470 		  else if (speed_p)
10471 		    *cost += extra_cost->alu.arith_shift;
10472 		  *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10473 		  *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10474 		  return true;
10475 		}
10476 
10477 	      if (speed_p)
10478 		*cost += extra_cost->alu.arith;
10479 	      if (CONST_INT_P (XEXP (x, 1))
10480 		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10481 		{
10482 		  *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10483 		  return true;
10484 		}
10485 	      return false;
10486 	    }
10487 
10488 	  /* Vector mode?  */
10489 
10490 	  *cost = LIBCALL_COST (2);
10491 	  return false;
10492 	}
10493       return true;
10494 
10495     case EQ:
10496     case NE:
10497     case LT:
10498     case LE:
10499     case GT:
10500     case GE:
10501     case LTU:
10502     case LEU:
10503     case GEU:
10504     case GTU:
10505     case ORDERED:
10506     case UNORDERED:
10507     case UNEQ:
10508     case UNLE:
10509     case UNLT:
10510     case UNGE:
10511     case UNGT:
10512     case LTGT:
10513       if (outer_code == SET)
10514 	{
10515 	  /* Is it a store-flag operation?  */
10516 	  if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10517 	      && XEXP (x, 1) == const0_rtx)
10518 	    {
10519 	      /* Thumb also needs an IT insn.  */
10520 	      *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10521 	      return true;
10522 	    }
10523 	  if (XEXP (x, 1) == const0_rtx)
10524 	    {
10525 	      switch (code)
10526 		{
10527 		case LT:
10528 		  /* LSR Rd, Rn, #31.  */
10529 		  if (speed_p)
10530 		    *cost += extra_cost->alu.shift;
10531 		  break;
10532 
10533 		case EQ:
10534 		  /* RSBS T1, Rn, #0
10535 		     ADC  Rd, Rn, T1.  */
10536 
10537 		case NE:
10538 		  /* SUBS T1, Rn, #1
10539 		     SBC  Rd, Rn, T1.  */
10540 		  *cost += COSTS_N_INSNS (1);
10541 		  break;
10542 
10543 		case LE:
10544 		  /* RSBS T1, Rn, Rn, LSR #31
10545 		     ADC  Rd, Rn, T1. */
10546 		  *cost += COSTS_N_INSNS (1);
10547 		  if (speed_p)
10548 		    *cost += extra_cost->alu.arith_shift;
10549 		  break;
10550 
10551 		case GT:
10552 		  /* RSB  Rd, Rn, Rn, ASR #1
10553 		     LSR  Rd, Rd, #31.  */
10554 		  *cost += COSTS_N_INSNS (1);
10555 		  if (speed_p)
10556 		    *cost += (extra_cost->alu.arith_shift
10557 			      + extra_cost->alu.shift);
10558 		  break;
10559 
10560 		case GE:
10561 		  /* ASR  Rd, Rn, #31
10562 		     ADD  Rd, Rn, #1.  */
10563 		  *cost += COSTS_N_INSNS (1);
10564 		  if (speed_p)
10565 		    *cost += extra_cost->alu.shift;
10566 		  break;
10567 
10568 		default:
10569 		  /* Remaining cases are either meaningless or would take
10570 		     three insns anyway.  */
10571 		  *cost = COSTS_N_INSNS (3);
10572 		  break;
10573 		}
10574 	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10575 	      return true;
10576 	    }
10577 	  else
10578 	    {
10579 	      *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10580 	      if (CONST_INT_P (XEXP (x, 1))
10581 		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10582 		{
10583 		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10584 		  return true;
10585 		}
10586 
10587 	      return false;
10588 	    }
10589 	}
10590       /* Not directly inside a set.  If it involves the condition code
10591 	 register it must be the condition for a branch, cond_exec or
10592 	 I_T_E operation.  Since the comparison is performed elsewhere
10593 	 this is just the control part which has no additional
10594 	 cost.  */
10595       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10596 	       && XEXP (x, 1) == const0_rtx)
10597 	{
10598 	  *cost = 0;
10599 	  return true;
10600 	}
10601       return false;
10602 
10603     case ABS:
10604       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10605 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10606 	{
10607 	  if (speed_p)
10608 	    *cost += extra_cost->fp[mode != SFmode].neg;
10609 
10610 	  return false;
10611 	}
10612       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10613 	{
10614 	  *cost = LIBCALL_COST (1);
10615 	  return false;
10616 	}
10617 
10618       if (mode == SImode)
10619 	{
10620 	  if (speed_p)
10621 	    *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10622 	  return false;
10623 	}
10624       /* Vector mode?  */
10625       *cost = LIBCALL_COST (1);
10626       return false;
10627 
10628     case SIGN_EXTEND:
10629       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10630 	  && MEM_P (XEXP (x, 0)))
10631 	{
10632 	  if (mode == DImode)
10633 	    *cost += COSTS_N_INSNS (1);
10634 
10635 	  if (!speed_p)
10636 	    return true;
10637 
10638 	  if (GET_MODE (XEXP (x, 0)) == SImode)
10639 	    *cost += extra_cost->ldst.load;
10640 	  else
10641 	    *cost += extra_cost->ldst.load_sign_extend;
10642 
10643 	  if (mode == DImode)
10644 	    *cost += extra_cost->alu.shift;
10645 
10646 	  return true;
10647 	}
10648 
10649       /* Widening from less than 32-bits requires an extend operation.  */
10650       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10651 	{
10652 	  /* We have SXTB/SXTH.  */
10653 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10654 	  if (speed_p)
10655 	    *cost += extra_cost->alu.extend;
10656 	}
10657       else if (GET_MODE (XEXP (x, 0)) != SImode)
10658 	{
10659 	  /* Needs two shifts.  */
10660 	  *cost += COSTS_N_INSNS (1);
10661 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10662 	  if (speed_p)
10663 	    *cost += 2 * extra_cost->alu.shift;
10664 	}
10665 
10666       /* Widening beyond 32-bits requires one more insn.  */
10667       if (mode == DImode)
10668 	{
10669 	  *cost += COSTS_N_INSNS (1);
10670 	  if (speed_p)
10671 	    *cost += extra_cost->alu.shift;
10672 	}
10673 
10674       return true;
10675 
10676     case ZERO_EXTEND:
10677       if ((arm_arch4
10678 	   || GET_MODE (XEXP (x, 0)) == SImode
10679 	   || GET_MODE (XEXP (x, 0)) == QImode)
10680 	  && MEM_P (XEXP (x, 0)))
10681 	{
10682 	  *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10683 
10684 	  if (mode == DImode)
10685 	    *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10686 
10687 	  return true;
10688 	}
10689 
10690       /* Widening from less than 32-bits requires an extend operation.  */
10691       if (GET_MODE (XEXP (x, 0)) == QImode)
10692 	{
10693 	  /* UXTB can be a shorter instruction in Thumb2, but it might
10694 	     be slower than the AND Rd, Rn, #255 alternative.  When
10695 	     optimizing for speed it should never be slower to use
10696 	     AND, and we don't really model 16-bit vs 32-bit insns
10697 	     here.  */
10698 	  if (speed_p)
10699 	    *cost += extra_cost->alu.logical;
10700 	}
10701       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10702 	{
10703 	  /* We have UXTB/UXTH.  */
10704 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10705 	  if (speed_p)
10706 	    *cost += extra_cost->alu.extend;
10707 	}
10708       else if (GET_MODE (XEXP (x, 0)) != SImode)
10709 	{
10710 	  /* Needs two shifts.  It's marginally preferable to use
10711 	     shifts rather than two BIC instructions as the second
10712 	     shift may merge with a subsequent insn as a shifter
10713 	     op.  */
10714 	  *cost = COSTS_N_INSNS (2);
10715 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10716 	  if (speed_p)
10717 	    *cost += 2 * extra_cost->alu.shift;
10718 	}
10719 
10720       /* Widening beyond 32-bits requires one more insn.  */
10721       if (mode == DImode)
10722 	{
10723 	  *cost += COSTS_N_INSNS (1);	/* No speed penalty.  */
10724 	}
10725 
10726       return true;
10727 
10728     case CONST_INT:
10729       *cost = 0;
10730       /* CONST_INT has no mode, so we cannot tell for sure how many
10731 	 insns are really going to be needed.  The best we can do is
10732 	 look at the value passed.  If it fits in SImode, then assume
10733 	 that's the mode it will be used for.  Otherwise assume it
10734 	 will be used in DImode.  */
10735       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10736 	mode = SImode;
10737       else
10738 	mode = DImode;
10739 
10740       /* Avoid blowing up in arm_gen_constant ().  */
10741       if (!(outer_code == PLUS
10742 	    || outer_code == AND
10743 	    || outer_code == IOR
10744 	    || outer_code == XOR
10745 	    || outer_code == MINUS))
10746 	outer_code = SET;
10747 
10748     const_int_cost:
10749       if (mode == SImode)
10750 	{
10751 	  *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10752 						    INTVAL (x), NULL, NULL,
10753 						    0, 0));
10754 	  /* Extra costs?  */
10755 	}
10756       else
10757 	{
10758 	  *cost += COSTS_N_INSNS (arm_gen_constant
10759 				  (outer_code, SImode, NULL,
10760 				   trunc_int_for_mode (INTVAL (x), SImode),
10761 				   NULL, NULL, 0, 0)
10762 				  + arm_gen_constant (outer_code, SImode, NULL,
10763 						      INTVAL (x) >> 32, NULL,
10764 						      NULL, 0, 0));
10765 	  /* Extra costs?  */
10766 	}
10767 
10768       return true;
10769 
10770     case CONST:
10771     case LABEL_REF:
10772     case SYMBOL_REF:
10773       if (speed_p)
10774 	{
10775 	  if (arm_arch_thumb2 && !flag_pic)
10776 	    *cost += COSTS_N_INSNS (1);
10777 	  else
10778 	    *cost += extra_cost->ldst.load;
10779 	}
10780       else
10781 	*cost += COSTS_N_INSNS (1);
10782 
10783       if (flag_pic)
10784 	{
10785 	  *cost += COSTS_N_INSNS (1);
10786 	  if (speed_p)
10787 	    *cost += extra_cost->alu.arith;
10788 	}
10789 
10790       return true;
10791 
10792     case CONST_FIXED:
10793       *cost = COSTS_N_INSNS (4);
10794       /* Fixme.  */
10795       return true;
10796 
10797     case CONST_DOUBLE:
10798       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10799 	  && (mode == SFmode || !TARGET_VFP_SINGLE))
10800 	{
10801 	  if (vfp3_const_double_rtx (x))
10802 	    {
10803 	      if (speed_p)
10804 		*cost += extra_cost->fp[mode == DFmode].fpconst;
10805 	      return true;
10806 	    }
10807 
10808 	  if (speed_p)
10809 	    {
10810 	      if (mode == DFmode)
10811 		*cost += extra_cost->ldst.loadd;
10812 	      else
10813 		*cost += extra_cost->ldst.loadf;
10814 	    }
10815 	  else
10816 	    *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10817 
10818 	  return true;
10819 	}
10820       *cost = COSTS_N_INSNS (4);
10821       return true;
10822 
10823     case CONST_VECTOR:
10824       /* Fixme.  */
10825       if (TARGET_NEON
10826 	  && TARGET_HARD_FLOAT
10827 	  && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10828 	  && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10829 	*cost = COSTS_N_INSNS (1);
10830       else
10831 	*cost = COSTS_N_INSNS (4);
10832       return true;
10833 
10834     case HIGH:
10835     case LO_SUM:
10836       /* When optimizing for size, we prefer constant pool entries to
10837 	 MOVW/MOVT pairs, so bump the cost of these slightly.  */
10838       if (!speed_p)
10839 	*cost += 1;
10840       return true;
10841 
10842     case CLZ:
10843       if (speed_p)
10844 	*cost += extra_cost->alu.clz;
10845       return false;
10846 
10847     case SMIN:
10848       if (XEXP (x, 1) == const0_rtx)
10849 	{
10850 	  if (speed_p)
10851 	    *cost += extra_cost->alu.log_shift;
10852 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10853 	  return true;
10854 	}
10855       /* Fall through.  */
10856     case SMAX:
10857     case UMIN:
10858     case UMAX:
10859       *cost += COSTS_N_INSNS (1);
10860       return false;
10861 
10862     case TRUNCATE:
10863       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10864 	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10865 	  && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10866 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10867 	  && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10868 	       && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10869 	      || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10870 		  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10871 		      == ZERO_EXTEND))))
10872 	{
10873 	  if (speed_p)
10874 	    *cost += extra_cost->mult[1].extend;
10875 	  *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10876 			      ZERO_EXTEND, 0, speed_p)
10877 		    + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10878 				ZERO_EXTEND, 0, speed_p));
10879 	  return true;
10880 	}
10881       *cost = LIBCALL_COST (1);
10882       return false;
10883 
10884     case UNSPEC_VOLATILE:
10885     case UNSPEC:
10886       return arm_unspec_cost (x, outer_code, speed_p, cost);
10887 
10888     case PC:
10889       /* Reading the PC is like reading any other register.  Writing it
10890 	 is more expensive, but we take that into account elsewhere.  */
10891       *cost = 0;
10892       return true;
10893 
10894     case ZERO_EXTRACT:
10895       /* TODO: Simple zero_extract of bottom bits using AND.  */
10896       /* Fall through.  */
10897     case SIGN_EXTRACT:
10898       if (arm_arch6
10899 	  && mode == SImode
10900 	  && CONST_INT_P (XEXP (x, 1))
10901 	  && CONST_INT_P (XEXP (x, 2)))
10902 	{
10903 	  if (speed_p)
10904 	    *cost += extra_cost->alu.bfx;
10905 	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10906 	  return true;
10907 	}
10908       /* Without UBFX/SBFX, need to resort to shift operations.  */
10909       *cost += COSTS_N_INSNS (1);
10910       if (speed_p)
10911 	*cost += 2 * extra_cost->alu.shift;
10912       *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10913       return true;
10914 
10915     case FLOAT_EXTEND:
10916       if (TARGET_HARD_FLOAT)
10917 	{
10918 	  if (speed_p)
10919 	    *cost += extra_cost->fp[mode == DFmode].widen;
10920 	  if (!TARGET_VFP5
10921 	      && GET_MODE (XEXP (x, 0)) == HFmode)
10922 	    {
10923 	      /* Pre v8, widening HF->DF is a two-step process, first
10924 	         widening to SFmode.  */
10925 	      *cost += COSTS_N_INSNS (1);
10926 	      if (speed_p)
10927 		*cost += extra_cost->fp[0].widen;
10928 	    }
10929 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10930 	  return true;
10931 	}
10932 
10933       *cost = LIBCALL_COST (1);
10934       return false;
10935 
10936     case FLOAT_TRUNCATE:
10937       if (TARGET_HARD_FLOAT)
10938 	{
10939 	  if (speed_p)
10940 	    *cost += extra_cost->fp[mode == DFmode].narrow;
10941 	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10942 	  return true;
10943 	  /* Vector modes?  */
10944 	}
10945       *cost = LIBCALL_COST (1);
10946       return false;
10947 
10948     case FMA:
10949       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10950         {
10951           rtx op0 = XEXP (x, 0);
10952           rtx op1 = XEXP (x, 1);
10953           rtx op2 = XEXP (x, 2);
10954 
10955 
10956           /* vfms or vfnma.  */
10957           if (GET_CODE (op0) == NEG)
10958             op0 = XEXP (op0, 0);
10959 
10960           /* vfnms or vfnma.  */
10961           if (GET_CODE (op2) == NEG)
10962             op2 = XEXP (op2, 0);
10963 
10964           *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10965           *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10966           *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10967 
10968           if (speed_p)
10969             *cost += extra_cost->fp[mode ==DFmode].fma;
10970 
10971           return true;
10972         }
10973 
10974       *cost = LIBCALL_COST (3);
10975       return false;
10976 
10977     case FIX:
10978     case UNSIGNED_FIX:
10979       if (TARGET_HARD_FLOAT)
10980 	{
10981 	  /* The *combine_vcvtf2i reduces a vmul+vcvt into
10982 	     a vcvt fixed-point conversion.  */
10983 	  if (code == FIX && mode == SImode
10984 	      && GET_CODE (XEXP (x, 0)) == FIX
10985 	      && GET_MODE (XEXP (x, 0)) == SFmode
10986 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10987 	      && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10988 		 > 0)
10989 	    {
10990 	      if (speed_p)
10991 		*cost += extra_cost->fp[0].toint;
10992 
10993 	      *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10994 				 code, 0, speed_p);
10995 	      return true;
10996 	    }
10997 
10998 	  if (GET_MODE_CLASS (mode) == MODE_INT)
10999 	    {
11000 	      mode = GET_MODE (XEXP (x, 0));
11001 	      if (speed_p)
11002 		*cost += extra_cost->fp[mode == DFmode].toint;
11003 	      /* Strip of the 'cost' of rounding towards zero.  */
11004 	      if (GET_CODE (XEXP (x, 0)) == FIX)
11005 		*cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11006 				   0, speed_p);
11007 	      else
11008 		*cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11009 	      /* ??? Increase the cost to deal with transferring from
11010 		 FP -> CORE registers?  */
11011 	      return true;
11012 	    }
11013 	  else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11014 		   && TARGET_VFP5)
11015 	    {
11016 	      if (speed_p)
11017 		*cost += extra_cost->fp[mode == DFmode].roundint;
11018 	      return false;
11019 	    }
11020 	  /* Vector costs? */
11021 	}
11022       *cost = LIBCALL_COST (1);
11023       return false;
11024 
11025     case FLOAT:
11026     case UNSIGNED_FLOAT:
11027       if (TARGET_HARD_FLOAT)
11028 	{
11029 	  /* ??? Increase the cost to deal with transferring from CORE
11030 	     -> FP registers?  */
11031 	  if (speed_p)
11032 	    *cost += extra_cost->fp[mode == DFmode].fromint;
11033 	  return false;
11034 	}
11035       *cost = LIBCALL_COST (1);
11036       return false;
11037 
11038     case CALL:
11039       return true;
11040 
11041     case ASM_OPERANDS:
11042       {
11043       /* Just a guess.  Guess number of instructions in the asm
11044          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11045          though (see PR60663).  */
11046         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11047         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11048 
11049         *cost = COSTS_N_INSNS (asm_length + num_operands);
11050         return true;
11051       }
11052     default:
11053       if (mode != VOIDmode)
11054 	*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11055       else
11056 	*cost = COSTS_N_INSNS (4); /* Who knows?  */
11057       return false;
11058     }
11059 }
11060 
11061 #undef HANDLE_NARROW_SHIFT_ARITH
11062 
11063 /* RTX costs entry point.  */
11064 
11065 static bool
arm_rtx_costs(rtx x,machine_mode mode ATTRIBUTE_UNUSED,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed)11066 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11067 	       int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11068 {
11069   bool result;
11070   int code = GET_CODE (x);
11071   gcc_assert (current_tune->insn_extra_cost);
11072 
11073   result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
11074 				(enum rtx_code) outer_code,
11075 				current_tune->insn_extra_cost,
11076 				total, speed);
11077 
11078   if (dump_file && arm_verbose_cost)
11079     {
11080       print_rtl_single (dump_file, x);
11081       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11082 	       *total, result ? "final" : "partial");
11083     }
11084   return result;
11085 }
11086 
11087 /* All address computations that can be done are free, but rtx cost returns
11088    the same for practically all of them.  So we weight the different types
11089    of address here in the order (most pref first):
11090    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11091 static inline int
arm_arm_address_cost(rtx x)11092 arm_arm_address_cost (rtx x)
11093 {
11094   enum rtx_code c  = GET_CODE (x);
11095 
11096   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11097     return 0;
11098   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11099     return 10;
11100 
11101   if (c == PLUS)
11102     {
11103       if (CONST_INT_P (XEXP (x, 1)))
11104 	return 2;
11105 
11106       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11107 	return 3;
11108 
11109       return 4;
11110     }
11111 
11112   return 6;
11113 }
11114 
11115 static inline int
arm_thumb_address_cost(rtx x)11116 arm_thumb_address_cost (rtx x)
11117 {
11118   enum rtx_code c  = GET_CODE (x);
11119 
11120   if (c == REG)
11121     return 1;
11122   if (c == PLUS
11123       && REG_P (XEXP (x, 0))
11124       && CONST_INT_P (XEXP (x, 1)))
11125     return 1;
11126 
11127   return 2;
11128 }
11129 
11130 static int
arm_address_cost(rtx x,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)11131 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11132 		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11133 {
11134   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11135 }
11136 
11137 /* Adjust cost hook for XScale.  */
11138 static bool
xscale_sched_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int * cost)11139 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11140 			  int * cost)
11141 {
11142   /* Some true dependencies can have a higher cost depending
11143      on precisely how certain input operands are used.  */
11144   if (dep_type == 0
11145       && recog_memoized (insn) >= 0
11146       && recog_memoized (dep) >= 0)
11147     {
11148       int shift_opnum = get_attr_shift (insn);
11149       enum attr_type attr_type = get_attr_type (dep);
11150 
11151       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11152 	 operand for INSN.  If we have a shifted input operand and the
11153 	 instruction we depend on is another ALU instruction, then we may
11154 	 have to account for an additional stall.  */
11155       if (shift_opnum != 0
11156 	  && (attr_type == TYPE_ALU_SHIFT_IMM
11157 	      || attr_type == TYPE_ALUS_SHIFT_IMM
11158 	      || attr_type == TYPE_LOGIC_SHIFT_IMM
11159 	      || attr_type == TYPE_LOGICS_SHIFT_IMM
11160 	      || attr_type == TYPE_ALU_SHIFT_REG
11161 	      || attr_type == TYPE_ALUS_SHIFT_REG
11162 	      || attr_type == TYPE_LOGIC_SHIFT_REG
11163 	      || attr_type == TYPE_LOGICS_SHIFT_REG
11164 	      || attr_type == TYPE_MOV_SHIFT
11165 	      || attr_type == TYPE_MVN_SHIFT
11166 	      || attr_type == TYPE_MOV_SHIFT_REG
11167 	      || attr_type == TYPE_MVN_SHIFT_REG))
11168 	{
11169 	  rtx shifted_operand;
11170 	  int opno;
11171 
11172 	  /* Get the shifted operand.  */
11173 	  extract_insn (insn);
11174 	  shifted_operand = recog_data.operand[shift_opnum];
11175 
11176 	  /* Iterate over all the operands in DEP.  If we write an operand
11177 	     that overlaps with SHIFTED_OPERAND, then we have increase the
11178 	     cost of this dependency.  */
11179 	  extract_insn (dep);
11180 	  preprocess_constraints (dep);
11181 	  for (opno = 0; opno < recog_data.n_operands; opno++)
11182 	    {
11183 	      /* We can ignore strict inputs.  */
11184 	      if (recog_data.operand_type[opno] == OP_IN)
11185 		continue;
11186 
11187 	      if (reg_overlap_mentioned_p (recog_data.operand[opno],
11188 					   shifted_operand))
11189 		{
11190 		  *cost = 2;
11191 		  return false;
11192 		}
11193 	    }
11194 	}
11195     }
11196   return true;
11197 }
11198 
11199 /* Adjust cost hook for Cortex A9.  */
11200 static bool
cortex_a9_sched_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int * cost)11201 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11202 			     int * cost)
11203 {
11204   switch (dep_type)
11205     {
11206     case REG_DEP_ANTI:
11207       *cost = 0;
11208       return false;
11209 
11210     case REG_DEP_TRUE:
11211     case REG_DEP_OUTPUT:
11212 	if (recog_memoized (insn) >= 0
11213 	    && recog_memoized (dep) >= 0)
11214 	  {
11215 	    if (GET_CODE (PATTERN (insn)) == SET)
11216 	      {
11217 		if (GET_MODE_CLASS
11218 		    (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11219 		  || GET_MODE_CLASS
11220 		    (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11221 		  {
11222 		    enum attr_type attr_type_insn = get_attr_type (insn);
11223 		    enum attr_type attr_type_dep = get_attr_type (dep);
11224 
11225 		    /* By default all dependencies of the form
11226 		       s0 = s0 <op> s1
11227 		       s0 = s0 <op> s2
11228 		       have an extra latency of 1 cycle because
11229 		       of the input and output dependency in this
11230 		       case. However this gets modeled as an true
11231 		       dependency and hence all these checks.  */
11232 		    if (REG_P (SET_DEST (PATTERN (insn)))
11233 			&& reg_set_p (SET_DEST (PATTERN (insn)), dep))
11234 		      {
11235 			/* FMACS is a special case where the dependent
11236 			   instruction can be issued 3 cycles before
11237 			   the normal latency in case of an output
11238 			   dependency.  */
11239 			if ((attr_type_insn == TYPE_FMACS
11240 			     || attr_type_insn == TYPE_FMACD)
11241 			    && (attr_type_dep == TYPE_FMACS
11242 				|| attr_type_dep == TYPE_FMACD))
11243 			  {
11244 			    if (dep_type == REG_DEP_OUTPUT)
11245 			      *cost = insn_default_latency (dep) - 3;
11246 			    else
11247 			      *cost = insn_default_latency (dep);
11248 			    return false;
11249 			  }
11250 			else
11251 			  {
11252 			    if (dep_type == REG_DEP_OUTPUT)
11253 			      *cost = insn_default_latency (dep) + 1;
11254 			    else
11255 			      *cost = insn_default_latency (dep);
11256 			  }
11257 			return false;
11258 		      }
11259 		  }
11260 	      }
11261 	  }
11262 	break;
11263 
11264     default:
11265       gcc_unreachable ();
11266     }
11267 
11268   return true;
11269 }
11270 
11271 /* Adjust cost hook for FA726TE.  */
11272 static bool
fa726te_sched_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int * cost)11273 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11274 			   int * cost)
11275 {
11276   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11277      have penalty of 3.  */
11278   if (dep_type == REG_DEP_TRUE
11279       && recog_memoized (insn) >= 0
11280       && recog_memoized (dep) >= 0
11281       && get_attr_conds (dep) == CONDS_SET)
11282     {
11283       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11284       if (get_attr_conds (insn) == CONDS_USE
11285           && get_attr_type (insn) != TYPE_BRANCH)
11286         {
11287           *cost = 3;
11288           return false;
11289         }
11290 
11291       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11292           || get_attr_conds (insn) == CONDS_USE)
11293         {
11294           *cost = 0;
11295           return false;
11296         }
11297     }
11298 
11299   return true;
11300 }
11301 
11302 /* Implement TARGET_REGISTER_MOVE_COST.
11303 
11304    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11305    it is typically more expensive than a single memory access.  We set
11306    the cost to less than two memory accesses so that floating
11307    point to integer conversion does not go through memory.  */
11308 
11309 int
arm_register_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t from,reg_class_t to)11310 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11311 			reg_class_t from, reg_class_t to)
11312 {
11313   if (TARGET_32BIT)
11314     {
11315       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11316 	  || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11317 	return 15;
11318       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11319 	       || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11320 	return 4;
11321       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11322 	return 20;
11323       else
11324 	return 2;
11325     }
11326   else
11327     {
11328       if (from == HI_REGS || to == HI_REGS)
11329 	return 4;
11330       else
11331 	return 2;
11332     }
11333 }
11334 
11335 /* Implement TARGET_MEMORY_MOVE_COST.  */
11336 
11337 int
arm_memory_move_cost(machine_mode mode,reg_class_t rclass,bool in ATTRIBUTE_UNUSED)11338 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11339 		      bool in ATTRIBUTE_UNUSED)
11340 {
11341   if (TARGET_32BIT)
11342     return 10;
11343   else
11344     {
11345       if (GET_MODE_SIZE (mode) < 4)
11346 	return 8;
11347       else
11348 	return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11349     }
11350 }
11351 
11352 /* Vectorizer cost model implementation.  */
11353 
11354 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11355 static int
arm_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)11356 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11357 				tree vectype,
11358 				int misalign ATTRIBUTE_UNUSED)
11359 {
11360   unsigned elements;
11361 
11362   switch (type_of_cost)
11363     {
11364       case scalar_stmt:
11365         return current_tune->vec_costs->scalar_stmt_cost;
11366 
11367       case scalar_load:
11368         return current_tune->vec_costs->scalar_load_cost;
11369 
11370       case scalar_store:
11371         return current_tune->vec_costs->scalar_store_cost;
11372 
11373       case vector_stmt:
11374         return current_tune->vec_costs->vec_stmt_cost;
11375 
11376       case vector_load:
11377         return current_tune->vec_costs->vec_align_load_cost;
11378 
11379       case vector_store:
11380         return current_tune->vec_costs->vec_store_cost;
11381 
11382       case vec_to_scalar:
11383         return current_tune->vec_costs->vec_to_scalar_cost;
11384 
11385       case scalar_to_vec:
11386         return current_tune->vec_costs->scalar_to_vec_cost;
11387 
11388       case unaligned_load:
11389       case vector_gather_load:
11390         return current_tune->vec_costs->vec_unalign_load_cost;
11391 
11392       case unaligned_store:
11393       case vector_scatter_store:
11394         return current_tune->vec_costs->vec_unalign_store_cost;
11395 
11396       case cond_branch_taken:
11397         return current_tune->vec_costs->cond_taken_branch_cost;
11398 
11399       case cond_branch_not_taken:
11400         return current_tune->vec_costs->cond_not_taken_branch_cost;
11401 
11402       case vec_perm:
11403       case vec_promote_demote:
11404         return current_tune->vec_costs->vec_stmt_cost;
11405 
11406       case vec_construct:
11407 	elements = TYPE_VECTOR_SUBPARTS (vectype);
11408 	return elements / 2 + 1;
11409 
11410       default:
11411         gcc_unreachable ();
11412     }
11413 }
11414 
11415 /* Implement targetm.vectorize.add_stmt_cost.  */
11416 
11417 static unsigned
arm_add_stmt_cost(void * data,int count,enum vect_cost_for_stmt kind,struct _stmt_vec_info * stmt_info,int misalign,enum vect_cost_model_location where)11418 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11419 		   struct _stmt_vec_info *stmt_info, int misalign,
11420 		   enum vect_cost_model_location where)
11421 {
11422   unsigned *cost = (unsigned *) data;
11423   unsigned retval = 0;
11424 
11425   if (flag_vect_cost_model)
11426     {
11427       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11428       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11429 
11430       /* Statements in an inner loop relative to the loop being
11431 	 vectorized are weighted more heavily.  The value here is
11432 	 arbitrary and could potentially be improved with analysis.  */
11433       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11434 	count *= 50;  /* FIXME.  */
11435 
11436       retval = (unsigned) (count * stmt_cost);
11437       cost[where] += retval;
11438     }
11439 
11440   return retval;
11441 }
11442 
11443 /* Return true if and only if this insn can dual-issue only as older.  */
11444 static bool
cortexa7_older_only(rtx_insn * insn)11445 cortexa7_older_only (rtx_insn *insn)
11446 {
11447   if (recog_memoized (insn) < 0)
11448     return false;
11449 
11450   switch (get_attr_type (insn))
11451     {
11452     case TYPE_ALU_DSP_REG:
11453     case TYPE_ALU_SREG:
11454     case TYPE_ALUS_SREG:
11455     case TYPE_LOGIC_REG:
11456     case TYPE_LOGICS_REG:
11457     case TYPE_ADC_REG:
11458     case TYPE_ADCS_REG:
11459     case TYPE_ADR:
11460     case TYPE_BFM:
11461     case TYPE_REV:
11462     case TYPE_MVN_REG:
11463     case TYPE_SHIFT_IMM:
11464     case TYPE_SHIFT_REG:
11465     case TYPE_LOAD_BYTE:
11466     case TYPE_LOAD_4:
11467     case TYPE_STORE_4:
11468     case TYPE_FFARITHS:
11469     case TYPE_FADDS:
11470     case TYPE_FFARITHD:
11471     case TYPE_FADDD:
11472     case TYPE_FMOV:
11473     case TYPE_F_CVT:
11474     case TYPE_FCMPS:
11475     case TYPE_FCMPD:
11476     case TYPE_FCONSTS:
11477     case TYPE_FCONSTD:
11478     case TYPE_FMULS:
11479     case TYPE_FMACS:
11480     case TYPE_FMULD:
11481     case TYPE_FMACD:
11482     case TYPE_FDIVS:
11483     case TYPE_FDIVD:
11484     case TYPE_F_MRC:
11485     case TYPE_F_MRRC:
11486     case TYPE_F_FLAG:
11487     case TYPE_F_LOADS:
11488     case TYPE_F_STORES:
11489       return true;
11490     default:
11491       return false;
11492     }
11493 }
11494 
11495 /* Return true if and only if this insn can dual-issue as younger.  */
11496 static bool
cortexa7_younger(FILE * file,int verbose,rtx_insn * insn)11497 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11498 {
11499   if (recog_memoized (insn) < 0)
11500     {
11501       if (verbose > 5)
11502         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11503       return false;
11504     }
11505 
11506   switch (get_attr_type (insn))
11507     {
11508     case TYPE_ALU_IMM:
11509     case TYPE_ALUS_IMM:
11510     case TYPE_LOGIC_IMM:
11511     case TYPE_LOGICS_IMM:
11512     case TYPE_EXTEND:
11513     case TYPE_MVN_IMM:
11514     case TYPE_MOV_IMM:
11515     case TYPE_MOV_REG:
11516     case TYPE_MOV_SHIFT:
11517     case TYPE_MOV_SHIFT_REG:
11518     case TYPE_BRANCH:
11519     case TYPE_CALL:
11520       return true;
11521     default:
11522       return false;
11523     }
11524 }
11525 
11526 
11527 /* Look for an instruction that can dual issue only as an older
11528    instruction, and move it in front of any instructions that can
11529    dual-issue as younger, while preserving the relative order of all
11530    other instructions in the ready list.  This is a hueuristic to help
11531    dual-issue in later cycles, by postponing issue of more flexible
11532    instructions.  This heuristic may affect dual issue opportunities
11533    in the current cycle.  */
11534 static void
cortexa7_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * n_readyp,int clock)11535 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11536 			int *n_readyp, int clock)
11537 {
11538   int i;
11539   int first_older_only = -1, first_younger = -1;
11540 
11541   if (verbose > 5)
11542     fprintf (file,
11543              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11544              clock,
11545              *n_readyp);
11546 
11547   /* Traverse the ready list from the head (the instruction to issue
11548      first), and looking for the first instruction that can issue as
11549      younger and the first instruction that can dual-issue only as
11550      older.  */
11551   for (i = *n_readyp - 1; i >= 0; i--)
11552     {
11553       rtx_insn *insn = ready[i];
11554       if (cortexa7_older_only (insn))
11555         {
11556           first_older_only = i;
11557           if (verbose > 5)
11558             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11559           break;
11560         }
11561       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11562         first_younger = i;
11563     }
11564 
11565   /* Nothing to reorder because either no younger insn found or insn
11566      that can dual-issue only as older appears before any insn that
11567      can dual-issue as younger.  */
11568   if (first_younger == -1)
11569     {
11570       if (verbose > 5)
11571         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11572       return;
11573     }
11574 
11575   /* Nothing to reorder because no older-only insn in the ready list.  */
11576   if (first_older_only == -1)
11577     {
11578       if (verbose > 5)
11579         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11580       return;
11581     }
11582 
11583   /* Move first_older_only insn before first_younger.  */
11584   if (verbose > 5)
11585     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11586              INSN_UID(ready [first_older_only]),
11587              INSN_UID(ready [first_younger]));
11588   rtx_insn *first_older_only_insn = ready [first_older_only];
11589   for (i = first_older_only; i < first_younger; i++)
11590     {
11591       ready[i] = ready[i+1];
11592     }
11593 
11594   ready[i] = first_older_only_insn;
11595   return;
11596 }
11597 
11598 /* Implement TARGET_SCHED_REORDER. */
11599 static int
arm_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * n_readyp,int clock)11600 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11601                    int clock)
11602 {
11603   switch (arm_tune)
11604     {
11605     case TARGET_CPU_cortexa7:
11606       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11607       break;
11608     default:
11609       /* Do nothing for other cores.  */
11610       break;
11611     }
11612 
11613   return arm_issue_rate ();
11614 }
11615 
11616 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11617    It corrects the value of COST based on the relationship between
11618    INSN and DEP through the dependence LINK.  It returns the new
11619    value. There is a per-core adjust_cost hook to adjust scheduler costs
11620    and the per-core hook can choose to completely override the generic
11621    adjust_cost function. Only put bits of code into arm_adjust_cost that
11622    are common across all cores.  */
11623 static int
arm_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep,int cost,unsigned int)11624 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11625 		 unsigned int)
11626 {
11627   rtx i_pat, d_pat;
11628 
11629  /* When generating Thumb-1 code, we want to place flag-setting operations
11630     close to a conditional branch which depends on them, so that we can
11631     omit the comparison. */
11632   if (TARGET_THUMB1
11633       && dep_type == 0
11634       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11635       && recog_memoized (dep) >= 0
11636       && get_attr_conds (dep) == CONDS_SET)
11637     return 0;
11638 
11639   if (current_tune->sched_adjust_cost != NULL)
11640     {
11641       if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11642 	return cost;
11643     }
11644 
11645   /* XXX Is this strictly true?  */
11646   if (dep_type == REG_DEP_ANTI
11647       || dep_type == REG_DEP_OUTPUT)
11648     return 0;
11649 
11650   /* Call insns don't incur a stall, even if they follow a load.  */
11651   if (dep_type == 0
11652       && CALL_P (insn))
11653     return 1;
11654 
11655   if ((i_pat = single_set (insn)) != NULL
11656       && MEM_P (SET_SRC (i_pat))
11657       && (d_pat = single_set (dep)) != NULL
11658       && MEM_P (SET_DEST (d_pat)))
11659     {
11660       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11661       /* This is a load after a store, there is no conflict if the load reads
11662 	 from a cached area.  Assume that loads from the stack, and from the
11663 	 constant pool are cached, and that others will miss.  This is a
11664 	 hack.  */
11665 
11666       if ((GET_CODE (src_mem) == SYMBOL_REF
11667 	   && CONSTANT_POOL_ADDRESS_P (src_mem))
11668 	  || reg_mentioned_p (stack_pointer_rtx, src_mem)
11669 	  || reg_mentioned_p (frame_pointer_rtx, src_mem)
11670 	  || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11671 	return 1;
11672     }
11673 
11674   return cost;
11675 }
11676 
11677 int
arm_max_conditional_execute(void)11678 arm_max_conditional_execute (void)
11679 {
11680   return max_insns_skipped;
11681 }
11682 
11683 static int
arm_default_branch_cost(bool speed_p,bool predictable_p ATTRIBUTE_UNUSED)11684 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11685 {
11686   if (TARGET_32BIT)
11687     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11688   else
11689     return (optimize > 0) ? 2 : 0;
11690 }
11691 
11692 static int
arm_cortex_a5_branch_cost(bool speed_p,bool predictable_p)11693 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11694 {
11695   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11696 }
11697 
11698 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11699    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11700    sequences of non-executed instructions in IT blocks probably take the same
11701    amount of time as executed instructions (and the IT instruction itself takes
11702    space in icache).  This function was experimentally determined to give good
11703    results on a popular embedded benchmark.  */
11704 
11705 static int
arm_cortex_m_branch_cost(bool speed_p,bool predictable_p)11706 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11707 {
11708   return (TARGET_32BIT && speed_p) ? 1
11709          : arm_default_branch_cost (speed_p, predictable_p);
11710 }
11711 
11712 static int
arm_cortex_m7_branch_cost(bool speed_p,bool predictable_p)11713 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11714 {
11715   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11716 }
11717 
11718 static bool fp_consts_inited = false;
11719 
11720 static REAL_VALUE_TYPE value_fp0;
11721 
11722 static void
init_fp_table(void)11723 init_fp_table (void)
11724 {
11725   REAL_VALUE_TYPE r;
11726 
11727   r = REAL_VALUE_ATOF ("0", DFmode);
11728   value_fp0 = r;
11729   fp_consts_inited = true;
11730 }
11731 
11732 /* Return TRUE if rtx X is a valid immediate FP constant.  */
11733 int
arm_const_double_rtx(rtx x)11734 arm_const_double_rtx (rtx x)
11735 {
11736   const REAL_VALUE_TYPE *r;
11737 
11738   if (!fp_consts_inited)
11739     init_fp_table ();
11740 
11741   r = CONST_DOUBLE_REAL_VALUE (x);
11742   if (REAL_VALUE_MINUS_ZERO (*r))
11743     return 0;
11744 
11745   if (real_equal (r, &value_fp0))
11746     return 1;
11747 
11748   return 0;
11749 }
11750 
11751 /* VFPv3 has a fairly wide range of representable immediates, formed from
11752    "quarter-precision" floating-point values. These can be evaluated using this
11753    formula (with ^ for exponentiation):
11754 
11755      -1^s * n * 2^-r
11756 
11757    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11758    16 <= n <= 31 and 0 <= r <= 7.
11759 
11760    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11761 
11762      - A (most-significant) is the sign bit.
11763      - BCD are the exponent (encoded as r XOR 3).
11764      - EFGH are the mantissa (encoded as n - 16).
11765 */
11766 
11767 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11768    fconst[sd] instruction, or -1 if X isn't suitable.  */
11769 static int
vfp3_const_double_index(rtx x)11770 vfp3_const_double_index (rtx x)
11771 {
11772   REAL_VALUE_TYPE r, m;
11773   int sign, exponent;
11774   unsigned HOST_WIDE_INT mantissa, mant_hi;
11775   unsigned HOST_WIDE_INT mask;
11776   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11777   bool fail;
11778 
11779   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11780     return -1;
11781 
11782   r = *CONST_DOUBLE_REAL_VALUE (x);
11783 
11784   /* We can't represent these things, so detect them first.  */
11785   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11786     return -1;
11787 
11788   /* Extract sign, exponent and mantissa.  */
11789   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11790   r = real_value_abs (&r);
11791   exponent = REAL_EXP (&r);
11792   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11793      highest (sign) bit, with a fixed binary point at bit point_pos.
11794      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11795      bits for the mantissa, this may fail (low bits would be lost).  */
11796   real_ldexp (&m, &r, point_pos - exponent);
11797   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11798   mantissa = w.elt (0);
11799   mant_hi = w.elt (1);
11800 
11801   /* If there are bits set in the low part of the mantissa, we can't
11802      represent this value.  */
11803   if (mantissa != 0)
11804     return -1;
11805 
11806   /* Now make it so that mantissa contains the most-significant bits, and move
11807      the point_pos to indicate that the least-significant bits have been
11808      discarded.  */
11809   point_pos -= HOST_BITS_PER_WIDE_INT;
11810   mantissa = mant_hi;
11811 
11812   /* We can permit four significant bits of mantissa only, plus a high bit
11813      which is always 1.  */
11814   mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11815   if ((mantissa & mask) != 0)
11816     return -1;
11817 
11818   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
11819   mantissa >>= point_pos - 5;
11820 
11821   /* The mantissa may be zero. Disallow that case. (It's possible to load the
11822      floating-point immediate zero with Neon using an integer-zero load, but
11823      that case is handled elsewhere.)  */
11824   if (mantissa == 0)
11825     return -1;
11826 
11827   gcc_assert (mantissa >= 16 && mantissa <= 31);
11828 
11829   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11830      normalized significands are in the range [1, 2). (Our mantissa is shifted
11831      left 4 places at this point relative to normalized IEEE754 values).  GCC
11832      internally uses [0.5, 1) (see real.c), so the exponent returned from
11833      REAL_EXP must be altered.  */
11834   exponent = 5 - exponent;
11835 
11836   if (exponent < 0 || exponent > 7)
11837     return -1;
11838 
11839   /* Sign, mantissa and exponent are now in the correct form to plug into the
11840      formula described in the comment above.  */
11841   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11842 }
11843 
11844 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
11845 int
vfp3_const_double_rtx(rtx x)11846 vfp3_const_double_rtx (rtx x)
11847 {
11848   if (!TARGET_VFP3)
11849     return 0;
11850 
11851   return vfp3_const_double_index (x) != -1;
11852 }
11853 
11854 /* Recognize immediates which can be used in various Neon instructions. Legal
11855    immediates are described by the following table (for VMVN variants, the
11856    bitwise inverse of the constant shown is recognized. In either case, VMOV
11857    is output and the correct instruction to use for a given constant is chosen
11858    by the assembler). The constant shown is replicated across all elements of
11859    the destination vector.
11860 
11861    insn elems variant constant (binary)
11862    ---- ----- ------- -----------------
11863    vmov  i32     0    00000000 00000000 00000000 abcdefgh
11864    vmov  i32     1    00000000 00000000 abcdefgh 00000000
11865    vmov  i32     2    00000000 abcdefgh 00000000 00000000
11866    vmov  i32     3    abcdefgh 00000000 00000000 00000000
11867    vmov  i16     4    00000000 abcdefgh
11868    vmov  i16     5    abcdefgh 00000000
11869    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
11870    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
11871    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
11872    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
11873    vmvn  i16    10    00000000 abcdefgh
11874    vmvn  i16    11    abcdefgh 00000000
11875    vmov  i32    12    00000000 00000000 abcdefgh 11111111
11876    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
11877    vmov  i32    14    00000000 abcdefgh 11111111 11111111
11878    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
11879    vmov   i8    16    abcdefgh
11880    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
11881                       eeeeeeee ffffffff gggggggg hhhhhhhh
11882    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
11883    vmov  f32    19    00000000 00000000 00000000 00000000
11884 
11885    For case 18, B = !b. Representable values are exactly those accepted by
11886    vfp3_const_double_index, but are output as floating-point numbers rather
11887    than indices.
11888 
11889    For case 19, we will change it to vmov.i32 when assembling.
11890 
11891    Variants 0-5 (inclusive) may also be used as immediates for the second
11892    operand of VORR/VBIC instructions.
11893 
11894    The INVERSE argument causes the bitwise inverse of the given operand to be
11895    recognized instead (used for recognizing legal immediates for the VAND/VORN
11896    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11897    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11898    output, rather than the real insns vbic/vorr).
11899 
11900    INVERSE makes no difference to the recognition of float vectors.
11901 
11902    The return value is the variant of immediate as shown in the above table, or
11903    -1 if the given value doesn't match any of the listed patterns.
11904 */
11905 static int
neon_valid_immediate(rtx op,machine_mode mode,int inverse,rtx * modconst,int * elementwidth)11906 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11907 		      rtx *modconst, int *elementwidth)
11908 {
11909 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)	\
11910   matches = 1;					\
11911   for (i = 0; i < idx; i += (STRIDE))		\
11912     if (!(TEST))				\
11913       matches = 0;				\
11914   if (matches)					\
11915     {						\
11916       immtype = (CLASS);			\
11917       elsize = (ELSIZE);			\
11918       break;					\
11919     }
11920 
11921   unsigned int i, elsize = 0, idx = 0, n_elts;
11922   unsigned int innersize;
11923   unsigned char bytes[16];
11924   int immtype = -1, matches;
11925   unsigned int invmask = inverse ? 0xff : 0;
11926   bool vector = GET_CODE (op) == CONST_VECTOR;
11927 
11928   if (vector)
11929     n_elts = CONST_VECTOR_NUNITS (op);
11930   else
11931     {
11932       n_elts = 1;
11933       if (mode == VOIDmode)
11934 	mode = DImode;
11935     }
11936 
11937   innersize = GET_MODE_UNIT_SIZE (mode);
11938 
11939   /* Vectors of float constants.  */
11940   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11941     {
11942       rtx el0 = CONST_VECTOR_ELT (op, 0);
11943 
11944       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11945         return -1;
11946 
11947       /* FP16 vectors cannot be represented.  */
11948       if (GET_MODE_INNER (mode) == HFmode)
11949 	return -1;
11950 
11951       /* All elements in the vector must be the same.  Note that 0.0 and -0.0
11952 	 are distinct in this context.  */
11953       if (!const_vec_duplicate_p (op))
11954 	return -1;
11955 
11956       if (modconst)
11957         *modconst = CONST_VECTOR_ELT (op, 0);
11958 
11959       if (elementwidth)
11960         *elementwidth = 0;
11961 
11962       if (el0 == CONST0_RTX (GET_MODE (el0)))
11963 	return 19;
11964       else
11965 	return 18;
11966     }
11967 
11968   /* The tricks done in the code below apply for little-endian vector layout.
11969      For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11970      FIXME: Implement logic for big-endian vectors.  */
11971   if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11972     return -1;
11973 
11974   /* Splat vector constant out into a byte vector.  */
11975   for (i = 0; i < n_elts; i++)
11976     {
11977       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11978       unsigned HOST_WIDE_INT elpart;
11979 
11980       gcc_assert (CONST_INT_P (el));
11981       elpart = INTVAL (el);
11982 
11983       for (unsigned int byte = 0; byte < innersize; byte++)
11984 	{
11985 	  bytes[idx++] = (elpart & 0xff) ^ invmask;
11986 	  elpart >>= BITS_PER_UNIT;
11987 	}
11988     }
11989 
11990   /* Sanity check.  */
11991   gcc_assert (idx == GET_MODE_SIZE (mode));
11992 
11993   do
11994     {
11995       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11996 		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11997 
11998       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11999 		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12000 
12001       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12002 		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12003 
12004       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12005 		       && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12006 
12007       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12008 
12009       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12010 
12011       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12012 		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12013 
12014       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12015 		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12016 
12017       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12018 		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12019 
12020       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12021 		       && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12022 
12023       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12024 
12025       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12026 
12027       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12028 			&& bytes[i + 2] == 0 && bytes[i + 3] == 0);
12029 
12030       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12031 			&& bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12032 
12033       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12034 			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12035 
12036       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12037 			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12038 
12039       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12040 
12041       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12042 			&& bytes[i] == bytes[(i + 8) % idx]);
12043     }
12044   while (0);
12045 
12046   if (immtype == -1)
12047     return -1;
12048 
12049   if (elementwidth)
12050     *elementwidth = elsize;
12051 
12052   if (modconst)
12053     {
12054       unsigned HOST_WIDE_INT imm = 0;
12055 
12056       /* Un-invert bytes of recognized vector, if necessary.  */
12057       if (invmask != 0)
12058         for (i = 0; i < idx; i++)
12059           bytes[i] ^= invmask;
12060 
12061       if (immtype == 17)
12062         {
12063           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12064           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12065 
12066           for (i = 0; i < 8; i++)
12067             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12068                    << (i * BITS_PER_UNIT);
12069 
12070           *modconst = GEN_INT (imm);
12071         }
12072       else
12073         {
12074           unsigned HOST_WIDE_INT imm = 0;
12075 
12076           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12077             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12078 
12079           *modconst = GEN_INT (imm);
12080         }
12081     }
12082 
12083   return immtype;
12084 #undef CHECK
12085 }
12086 
12087 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12088    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12089    float elements), and a modified constant (whatever should be output for a
12090    VMOV) in *MODCONST.  */
12091 
12092 int
neon_immediate_valid_for_move(rtx op,machine_mode mode,rtx * modconst,int * elementwidth)12093 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12094 			       rtx *modconst, int *elementwidth)
12095 {
12096   rtx tmpconst;
12097   int tmpwidth;
12098   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12099 
12100   if (retval == -1)
12101     return 0;
12102 
12103   if (modconst)
12104     *modconst = tmpconst;
12105 
12106   if (elementwidth)
12107     *elementwidth = tmpwidth;
12108 
12109   return 1;
12110 }
12111 
12112 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12113    the immediate is valid, write a constant suitable for using as an operand
12114    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12115    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12116 
12117 int
neon_immediate_valid_for_logic(rtx op,machine_mode mode,int inverse,rtx * modconst,int * elementwidth)12118 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12119 				rtx *modconst, int *elementwidth)
12120 {
12121   rtx tmpconst;
12122   int tmpwidth;
12123   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12124 
12125   if (retval < 0 || retval > 5)
12126     return 0;
12127 
12128   if (modconst)
12129     *modconst = tmpconst;
12130 
12131   if (elementwidth)
12132     *elementwidth = tmpwidth;
12133 
12134   return 1;
12135 }
12136 
12137 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12138    the immediate is valid, write a constant suitable for using as an operand
12139    to VSHR/VSHL to *MODCONST and the corresponding element width to
12140    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12141    because they have different limitations.  */
12142 
12143 int
neon_immediate_valid_for_shift(rtx op,machine_mode mode,rtx * modconst,int * elementwidth,bool isleftshift)12144 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12145 				rtx *modconst, int *elementwidth,
12146 				bool isleftshift)
12147 {
12148   unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12149   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12150   unsigned HOST_WIDE_INT last_elt = 0;
12151   unsigned HOST_WIDE_INT maxshift;
12152 
12153   /* Split vector constant out into a byte vector.  */
12154   for (i = 0; i < n_elts; i++)
12155     {
12156       rtx el = CONST_VECTOR_ELT (op, i);
12157       unsigned HOST_WIDE_INT elpart;
12158 
12159       if (CONST_INT_P (el))
12160         elpart = INTVAL (el);
12161       else if (CONST_DOUBLE_P (el))
12162         return 0;
12163       else
12164         gcc_unreachable ();
12165 
12166       if (i != 0 && elpart != last_elt)
12167         return 0;
12168 
12169       last_elt = elpart;
12170     }
12171 
12172   /* Shift less than element size.  */
12173   maxshift = innersize * 8;
12174 
12175   if (isleftshift)
12176     {
12177       /* Left shift immediate value can be from 0 to <size>-1.  */
12178       if (last_elt >= maxshift)
12179         return 0;
12180     }
12181   else
12182     {
12183       /* Right shift immediate value can be from 1 to <size>.  */
12184       if (last_elt == 0 || last_elt > maxshift)
12185 	return 0;
12186     }
12187 
12188   if (elementwidth)
12189     *elementwidth = innersize * 8;
12190 
12191   if (modconst)
12192     *modconst = CONST_VECTOR_ELT (op, 0);
12193 
12194   return 1;
12195 }
12196 
12197 /* Return a string suitable for output of Neon immediate logic operation
12198    MNEM.  */
12199 
12200 char *
neon_output_logic_immediate(const char * mnem,rtx * op2,machine_mode mode,int inverse,int quad)12201 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12202 			     int inverse, int quad)
12203 {
12204   int width, is_valid;
12205   static char templ[40];
12206 
12207   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12208 
12209   gcc_assert (is_valid != 0);
12210 
12211   if (quad)
12212     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12213   else
12214     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12215 
12216   return templ;
12217 }
12218 
12219 /* Return a string suitable for output of Neon immediate shift operation
12220    (VSHR or VSHL) MNEM.  */
12221 
12222 char *
neon_output_shift_immediate(const char * mnem,char sign,rtx * op2,machine_mode mode,int quad,bool isleftshift)12223 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12224 			     machine_mode mode, int quad,
12225 			     bool isleftshift)
12226 {
12227   int width, is_valid;
12228   static char templ[40];
12229 
12230   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12231   gcc_assert (is_valid != 0);
12232 
12233   if (quad)
12234     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12235   else
12236     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12237 
12238   return templ;
12239 }
12240 
12241 /* Output a sequence of pairwise operations to implement a reduction.
12242    NOTE: We do "too much work" here, because pairwise operations work on two
12243    registers-worth of operands in one go. Unfortunately we can't exploit those
12244    extra calculations to do the full operation in fewer steps, I don't think.
12245    Although all vector elements of the result but the first are ignored, we
12246    actually calculate the same result in each of the elements. An alternative
12247    such as initially loading a vector with zero to use as each of the second
12248    operands would use up an additional register and take an extra instruction,
12249    for no particular gain.  */
12250 
12251 void
neon_pairwise_reduce(rtx op0,rtx op1,machine_mode mode,rtx (* reduc)(rtx,rtx,rtx))12252 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12253 		      rtx (*reduc) (rtx, rtx, rtx))
12254 {
12255   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12256   rtx tmpsum = op1;
12257 
12258   for (i = parts / 2; i >= 1; i /= 2)
12259     {
12260       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12261       emit_insn (reduc (dest, tmpsum, tmpsum));
12262       tmpsum = dest;
12263     }
12264 }
12265 
12266 /* If VALS is a vector constant that can be loaded into a register
12267    using VDUP, generate instructions to do so and return an RTX to
12268    assign to the register.  Otherwise return NULL_RTX.  */
12269 
12270 static rtx
neon_vdup_constant(rtx vals)12271 neon_vdup_constant (rtx vals)
12272 {
12273   machine_mode mode = GET_MODE (vals);
12274   machine_mode inner_mode = GET_MODE_INNER (mode);
12275   rtx x;
12276 
12277   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12278     return NULL_RTX;
12279 
12280   if (!const_vec_duplicate_p (vals, &x))
12281     /* The elements are not all the same.  We could handle repeating
12282        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12283        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12284        vdup.i16).  */
12285     return NULL_RTX;
12286 
12287   /* We can load this constant by using VDUP and a constant in a
12288      single ARM register.  This will be cheaper than a vector
12289      load.  */
12290 
12291   x = copy_to_mode_reg (inner_mode, x);
12292   return gen_vec_duplicate (mode, x);
12293 }
12294 
12295 /* Generate code to load VALS, which is a PARALLEL containing only
12296    constants (for vec_init) or CONST_VECTOR, efficiently into a
12297    register.  Returns an RTX to copy into the register, or NULL_RTX
12298    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12299 
12300 rtx
neon_make_constant(rtx vals)12301 neon_make_constant (rtx vals)
12302 {
12303   machine_mode mode = GET_MODE (vals);
12304   rtx target;
12305   rtx const_vec = NULL_RTX;
12306   int n_elts = GET_MODE_NUNITS (mode);
12307   int n_const = 0;
12308   int i;
12309 
12310   if (GET_CODE (vals) == CONST_VECTOR)
12311     const_vec = vals;
12312   else if (GET_CODE (vals) == PARALLEL)
12313     {
12314       /* A CONST_VECTOR must contain only CONST_INTs and
12315 	 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12316 	 Only store valid constants in a CONST_VECTOR.  */
12317       for (i = 0; i < n_elts; ++i)
12318 	{
12319 	  rtx x = XVECEXP (vals, 0, i);
12320 	  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12321 	    n_const++;
12322 	}
12323       if (n_const == n_elts)
12324 	const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12325     }
12326   else
12327     gcc_unreachable ();
12328 
12329   if (const_vec != NULL
12330       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12331     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12332     return const_vec;
12333   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12334     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12335        pipeline cycle; creating the constant takes one or two ARM
12336        pipeline cycles.  */
12337     return target;
12338   else if (const_vec != NULL_RTX)
12339     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12340        (for either double or quad vectors).  We can not take advantage
12341        of single-cycle VLD1 because we need a PC-relative addressing
12342        mode.  */
12343     return const_vec;
12344   else
12345     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12346        We can not construct an initializer.  */
12347     return NULL_RTX;
12348 }
12349 
12350 /* Initialize vector TARGET to VALS.  */
12351 
12352 void
neon_expand_vector_init(rtx target,rtx vals)12353 neon_expand_vector_init (rtx target, rtx vals)
12354 {
12355   machine_mode mode = GET_MODE (target);
12356   machine_mode inner_mode = GET_MODE_INNER (mode);
12357   int n_elts = GET_MODE_NUNITS (mode);
12358   int n_var = 0, one_var = -1;
12359   bool all_same = true;
12360   rtx x, mem;
12361   int i;
12362 
12363   for (i = 0; i < n_elts; ++i)
12364     {
12365       x = XVECEXP (vals, 0, i);
12366       if (!CONSTANT_P (x))
12367 	++n_var, one_var = i;
12368 
12369       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12370 	all_same = false;
12371     }
12372 
12373   if (n_var == 0)
12374     {
12375       rtx constant = neon_make_constant (vals);
12376       if (constant != NULL_RTX)
12377 	{
12378 	  emit_move_insn (target, constant);
12379 	  return;
12380 	}
12381     }
12382 
12383   /* Splat a single non-constant element if we can.  */
12384   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12385     {
12386       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12387       emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12388       return;
12389     }
12390 
12391   /* One field is non-constant.  Load constant then overwrite varying
12392      field.  This is more efficient than using the stack.  */
12393   if (n_var == 1)
12394     {
12395       rtx copy = copy_rtx (vals);
12396       rtx index = GEN_INT (one_var);
12397 
12398       /* Load constant part of vector, substitute neighboring value for
12399 	 varying element.  */
12400       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12401       neon_expand_vector_init (target, copy);
12402 
12403       /* Insert variable.  */
12404       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12405       switch (mode)
12406 	{
12407 	case E_V8QImode:
12408 	  emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12409 	  break;
12410 	case E_V16QImode:
12411 	  emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12412 	  break;
12413 	case E_V4HImode:
12414 	  emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12415 	  break;
12416 	case E_V8HImode:
12417 	  emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12418 	  break;
12419 	case E_V2SImode:
12420 	  emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12421 	  break;
12422 	case E_V4SImode:
12423 	  emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12424 	  break;
12425 	case E_V2SFmode:
12426 	  emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12427 	  break;
12428 	case E_V4SFmode:
12429 	  emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12430 	  break;
12431 	case E_V2DImode:
12432 	  emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12433 	  break;
12434 	default:
12435 	  gcc_unreachable ();
12436 	}
12437       return;
12438     }
12439 
12440   /* Construct the vector in memory one field at a time
12441      and load the whole vector.  */
12442   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12443   for (i = 0; i < n_elts; i++)
12444     emit_move_insn (adjust_address_nv (mem, inner_mode,
12445 				    i * GET_MODE_SIZE (inner_mode)),
12446 		    XVECEXP (vals, 0, i));
12447   emit_move_insn (target, mem);
12448 }
12449 
12450 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12451    ERR if it doesn't.  EXP indicates the source location, which includes the
12452    inlining history for intrinsics.  */
12453 
12454 static void
bounds_check(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high,const_tree exp,const char * desc)12455 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12456 	      const_tree exp, const char *desc)
12457 {
12458   HOST_WIDE_INT lane;
12459 
12460   gcc_assert (CONST_INT_P (operand));
12461 
12462   lane = INTVAL (operand);
12463 
12464   if (lane < low || lane >= high)
12465     {
12466       if (exp)
12467 	error ("%K%s %wd out of range %wd - %wd",
12468 	       exp, desc, lane, low, high - 1);
12469       else
12470 	error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12471     }
12472 }
12473 
12474 /* Bounds-check lanes.  */
12475 
12476 void
neon_lane_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high,const_tree exp)12477 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12478 		  const_tree exp)
12479 {
12480   bounds_check (operand, low, high, exp, "lane");
12481 }
12482 
12483 /* Bounds-check constants.  */
12484 
12485 void
arm_const_bounds(rtx operand,HOST_WIDE_INT low,HOST_WIDE_INT high)12486 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12487 {
12488   bounds_check (operand, low, high, NULL_TREE, "constant");
12489 }
12490 
12491 HOST_WIDE_INT
neon_element_bits(machine_mode mode)12492 neon_element_bits (machine_mode mode)
12493 {
12494   return GET_MODE_UNIT_BITSIZE (mode);
12495 }
12496 
12497 
12498 /* Predicates for `match_operand' and `match_operator'.  */
12499 
12500 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12501    WB is true if full writeback address modes are allowed and is false
12502    if limited writeback address modes (POST_INC and PRE_DEC) are
12503    allowed.  */
12504 
12505 int
arm_coproc_mem_operand(rtx op,bool wb)12506 arm_coproc_mem_operand (rtx op, bool wb)
12507 {
12508   rtx ind;
12509 
12510   /* Reject eliminable registers.  */
12511   if (! (reload_in_progress || reload_completed || lra_in_progress)
12512       && (   reg_mentioned_p (frame_pointer_rtx, op)
12513 	  || reg_mentioned_p (arg_pointer_rtx, op)
12514 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
12515 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12516 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12517 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12518     return FALSE;
12519 
12520   /* Constants are converted into offsets from labels.  */
12521   if (!MEM_P (op))
12522     return FALSE;
12523 
12524   ind = XEXP (op, 0);
12525 
12526   if (reload_completed
12527       && (GET_CODE (ind) == LABEL_REF
12528 	  || (GET_CODE (ind) == CONST
12529 	      && GET_CODE (XEXP (ind, 0)) == PLUS
12530 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12531 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12532     return TRUE;
12533 
12534   /* Match: (mem (reg)).  */
12535   if (REG_P (ind))
12536     return arm_address_register_rtx_p (ind, 0);
12537 
12538   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12539      acceptable in any case (subject to verification by
12540      arm_address_register_rtx_p).  We need WB to be true to accept
12541      PRE_INC and POST_DEC.  */
12542   if (GET_CODE (ind) == POST_INC
12543       || GET_CODE (ind) == PRE_DEC
12544       || (wb
12545 	  && (GET_CODE (ind) == PRE_INC
12546 	      || GET_CODE (ind) == POST_DEC)))
12547     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12548 
12549   if (wb
12550       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12551       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12552       && GET_CODE (XEXP (ind, 1)) == PLUS
12553       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12554     ind = XEXP (ind, 1);
12555 
12556   /* Match:
12557      (plus (reg)
12558 	   (const)).  */
12559   if (GET_CODE (ind) == PLUS
12560       && REG_P (XEXP (ind, 0))
12561       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12562       && CONST_INT_P (XEXP (ind, 1))
12563       && INTVAL (XEXP (ind, 1)) > -1024
12564       && INTVAL (XEXP (ind, 1)) <  1024
12565       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12566     return TRUE;
12567 
12568   return FALSE;
12569 }
12570 
12571 /* Return TRUE if OP is a memory operand which we can load or store a vector
12572    to/from. TYPE is one of the following values:
12573     0 - Vector load/stor (vldr)
12574     1 - Core registers (ldm)
12575     2 - Element/structure loads (vld1)
12576  */
12577 int
neon_vector_mem_operand(rtx op,int type,bool strict)12578 neon_vector_mem_operand (rtx op, int type, bool strict)
12579 {
12580   rtx ind;
12581 
12582   /* Reject eliminable registers.  */
12583   if (strict && ! (reload_in_progress || reload_completed)
12584       && (reg_mentioned_p (frame_pointer_rtx, op)
12585 	  || reg_mentioned_p (arg_pointer_rtx, op)
12586 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
12587 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12588 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12589 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12590     return FALSE;
12591 
12592   /* Constants are converted into offsets from labels.  */
12593   if (!MEM_P (op))
12594     return FALSE;
12595 
12596   ind = XEXP (op, 0);
12597 
12598   if (reload_completed
12599       && (GET_CODE (ind) == LABEL_REF
12600 	  || (GET_CODE (ind) == CONST
12601 	      && GET_CODE (XEXP (ind, 0)) == PLUS
12602 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12603 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12604     return TRUE;
12605 
12606   /* Match: (mem (reg)).  */
12607   if (REG_P (ind))
12608     return arm_address_register_rtx_p (ind, 0);
12609 
12610   /* Allow post-increment with Neon registers.  */
12611   if ((type != 1 && GET_CODE (ind) == POST_INC)
12612       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12613     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12614 
12615   /* Allow post-increment by register for VLDn */
12616   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12617       && GET_CODE (XEXP (ind, 1)) == PLUS
12618       && REG_P (XEXP (XEXP (ind, 1), 1)))
12619      return true;
12620 
12621   /* Match:
12622      (plus (reg)
12623           (const)).  */
12624   if (type == 0
12625       && GET_CODE (ind) == PLUS
12626       && REG_P (XEXP (ind, 0))
12627       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12628       && CONST_INT_P (XEXP (ind, 1))
12629       && INTVAL (XEXP (ind, 1)) > -1024
12630       /* For quad modes, we restrict the constant offset to be slightly less
12631 	 than what the instruction format permits.  We have no such constraint
12632 	 on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12633       && (INTVAL (XEXP (ind, 1))
12634 	  < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12635       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12636     return TRUE;
12637 
12638   return FALSE;
12639 }
12640 
12641 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12642    type.  */
12643 int
neon_struct_mem_operand(rtx op)12644 neon_struct_mem_operand (rtx op)
12645 {
12646   rtx ind;
12647 
12648   /* Reject eliminable registers.  */
12649   if (! (reload_in_progress || reload_completed)
12650       && (   reg_mentioned_p (frame_pointer_rtx, op)
12651 	  || reg_mentioned_p (arg_pointer_rtx, op)
12652 	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
12653 	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12654 	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12655 	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12656     return FALSE;
12657 
12658   /* Constants are converted into offsets from labels.  */
12659   if (!MEM_P (op))
12660     return FALSE;
12661 
12662   ind = XEXP (op, 0);
12663 
12664   if (reload_completed
12665       && (GET_CODE (ind) == LABEL_REF
12666 	  || (GET_CODE (ind) == CONST
12667 	      && GET_CODE (XEXP (ind, 0)) == PLUS
12668 	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12669 	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12670     return TRUE;
12671 
12672   /* Match: (mem (reg)).  */
12673   if (REG_P (ind))
12674     return arm_address_register_rtx_p (ind, 0);
12675 
12676   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
12677   if (GET_CODE (ind) == POST_INC
12678       || GET_CODE (ind) == PRE_DEC)
12679     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12680 
12681   return FALSE;
12682 }
12683 
12684 /* Return true if X is a register that will be eliminated later on.  */
12685 int
arm_eliminable_register(rtx x)12686 arm_eliminable_register (rtx x)
12687 {
12688   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12689 		       || REGNO (x) == ARG_POINTER_REGNUM
12690 		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12691 			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12692 }
12693 
12694 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12695    coprocessor registers.  Otherwise return NO_REGS.  */
12696 
12697 enum reg_class
coproc_secondary_reload_class(machine_mode mode,rtx x,bool wb)12698 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12699 {
12700   if (mode == HFmode)
12701     {
12702       if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12703 	return GENERAL_REGS;
12704       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12705 	return NO_REGS;
12706       return GENERAL_REGS;
12707     }
12708 
12709   /* The neon move patterns handle all legitimate vector and struct
12710      addresses.  */
12711   if (TARGET_NEON
12712       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12713       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12714 	  || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12715 	  || VALID_NEON_STRUCT_MODE (mode)))
12716     return NO_REGS;
12717 
12718   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12719     return NO_REGS;
12720 
12721   return GENERAL_REGS;
12722 }
12723 
12724 /* Values which must be returned in the most-significant end of the return
12725    register.  */
12726 
12727 static bool
arm_return_in_msb(const_tree valtype)12728 arm_return_in_msb (const_tree valtype)
12729 {
12730   return (TARGET_AAPCS_BASED
12731           && BYTES_BIG_ENDIAN
12732 	  && (AGGREGATE_TYPE_P (valtype)
12733 	      || TREE_CODE (valtype) == COMPLEX_TYPE
12734 	      || FIXED_POINT_TYPE_P (valtype)));
12735 }
12736 
12737 /* Return TRUE if X references a SYMBOL_REF.  */
12738 int
symbol_mentioned_p(rtx x)12739 symbol_mentioned_p (rtx x)
12740 {
12741   const char * fmt;
12742   int i;
12743 
12744   if (GET_CODE (x) == SYMBOL_REF)
12745     return 1;
12746 
12747   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12748      are constant offsets, not symbols.  */
12749   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12750     return 0;
12751 
12752   fmt = GET_RTX_FORMAT (GET_CODE (x));
12753 
12754   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12755     {
12756       if (fmt[i] == 'E')
12757 	{
12758 	  int j;
12759 
12760 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12761 	    if (symbol_mentioned_p (XVECEXP (x, i, j)))
12762 	      return 1;
12763 	}
12764       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12765 	return 1;
12766     }
12767 
12768   return 0;
12769 }
12770 
12771 /* Return TRUE if X references a LABEL_REF.  */
12772 int
label_mentioned_p(rtx x)12773 label_mentioned_p (rtx x)
12774 {
12775   const char * fmt;
12776   int i;
12777 
12778   if (GET_CODE (x) == LABEL_REF)
12779     return 1;
12780 
12781   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12782      instruction, but they are constant offsets, not symbols.  */
12783   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12784     return 0;
12785 
12786   fmt = GET_RTX_FORMAT (GET_CODE (x));
12787   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12788     {
12789       if (fmt[i] == 'E')
12790 	{
12791 	  int j;
12792 
12793 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12794 	    if (label_mentioned_p (XVECEXP (x, i, j)))
12795 	      return 1;
12796 	}
12797       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12798 	return 1;
12799     }
12800 
12801   return 0;
12802 }
12803 
12804 int
tls_mentioned_p(rtx x)12805 tls_mentioned_p (rtx x)
12806 {
12807   switch (GET_CODE (x))
12808     {
12809     case CONST:
12810       return tls_mentioned_p (XEXP (x, 0));
12811 
12812     case UNSPEC:
12813       if (XINT (x, 1) == UNSPEC_TLS)
12814 	return 1;
12815 
12816     /* Fall through.  */
12817     default:
12818       return 0;
12819     }
12820 }
12821 
12822 /* Must not copy any rtx that uses a pc-relative address.
12823    Also, disallow copying of load-exclusive instructions that
12824    may appear after splitting of compare-and-swap-style operations
12825    so as to prevent those loops from being transformed away from their
12826    canonical forms (see PR 69904).  */
12827 
12828 static bool
arm_cannot_copy_insn_p(rtx_insn * insn)12829 arm_cannot_copy_insn_p (rtx_insn *insn)
12830 {
12831   /* The tls call insn cannot be copied, as it is paired with a data
12832      word.  */
12833   if (recog_memoized (insn) == CODE_FOR_tlscall)
12834     return true;
12835 
12836   subrtx_iterator::array_type array;
12837   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12838     {
12839       const_rtx x = *iter;
12840       if (GET_CODE (x) == UNSPEC
12841 	  && (XINT (x, 1) == UNSPEC_PIC_BASE
12842 	      || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12843 	return true;
12844     }
12845 
12846   rtx set = single_set (insn);
12847   if (set)
12848     {
12849       rtx src = SET_SRC (set);
12850       if (GET_CODE (src) == ZERO_EXTEND)
12851 	src = XEXP (src, 0);
12852 
12853       /* Catch the load-exclusive and load-acquire operations.  */
12854       if (GET_CODE (src) == UNSPEC_VOLATILE
12855 	  && (XINT (src, 1) == VUNSPEC_LL
12856 	      || XINT (src, 1) == VUNSPEC_LAX))
12857 	return true;
12858     }
12859   return false;
12860 }
12861 
12862 enum rtx_code
minmax_code(rtx x)12863 minmax_code (rtx x)
12864 {
12865   enum rtx_code code = GET_CODE (x);
12866 
12867   switch (code)
12868     {
12869     case SMAX:
12870       return GE;
12871     case SMIN:
12872       return LE;
12873     case UMIN:
12874       return LEU;
12875     case UMAX:
12876       return GEU;
12877     default:
12878       gcc_unreachable ();
12879     }
12880 }
12881 
12882 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
12883 
12884 bool
arm_sat_operator_match(rtx lo_bound,rtx hi_bound,int * mask,bool * signed_sat)12885 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12886 			int *mask, bool *signed_sat)
12887 {
12888   /* The high bound must be a power of two minus one.  */
12889   int log = exact_log2 (INTVAL (hi_bound) + 1);
12890   if (log == -1)
12891     return false;
12892 
12893   /* The low bound is either zero (for usat) or one less than the
12894      negation of the high bound (for ssat).  */
12895   if (INTVAL (lo_bound) == 0)
12896     {
12897       if (mask)
12898         *mask = log;
12899       if (signed_sat)
12900         *signed_sat = false;
12901 
12902       return true;
12903     }
12904 
12905   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12906     {
12907       if (mask)
12908         *mask = log + 1;
12909       if (signed_sat)
12910         *signed_sat = true;
12911 
12912       return true;
12913     }
12914 
12915   return false;
12916 }
12917 
12918 /* Return 1 if memory locations are adjacent.  */
12919 int
adjacent_mem_locations(rtx a,rtx b)12920 adjacent_mem_locations (rtx a, rtx b)
12921 {
12922   /* We don't guarantee to preserve the order of these memory refs.  */
12923   if (volatile_refs_p (a) || volatile_refs_p (b))
12924     return 0;
12925 
12926   if ((REG_P (XEXP (a, 0))
12927        || (GET_CODE (XEXP (a, 0)) == PLUS
12928 	   && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12929       && (REG_P (XEXP (b, 0))
12930 	  || (GET_CODE (XEXP (b, 0)) == PLUS
12931 	      && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12932     {
12933       HOST_WIDE_INT val0 = 0, val1 = 0;
12934       rtx reg0, reg1;
12935       int val_diff;
12936 
12937       if (GET_CODE (XEXP (a, 0)) == PLUS)
12938         {
12939 	  reg0 = XEXP (XEXP (a, 0), 0);
12940 	  val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12941         }
12942       else
12943 	reg0 = XEXP (a, 0);
12944 
12945       if (GET_CODE (XEXP (b, 0)) == PLUS)
12946         {
12947 	  reg1 = XEXP (XEXP (b, 0), 0);
12948 	  val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12949         }
12950       else
12951 	reg1 = XEXP (b, 0);
12952 
12953       /* Don't accept any offset that will require multiple
12954 	 instructions to handle, since this would cause the
12955 	 arith_adjacentmem pattern to output an overlong sequence.  */
12956       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12957 	return 0;
12958 
12959       /* Don't allow an eliminable register: register elimination can make
12960 	 the offset too large.  */
12961       if (arm_eliminable_register (reg0))
12962 	return 0;
12963 
12964       val_diff = val1 - val0;
12965 
12966       if (arm_ld_sched)
12967 	{
12968 	  /* If the target has load delay slots, then there's no benefit
12969 	     to using an ldm instruction unless the offset is zero and
12970 	     we are optimizing for size.  */
12971 	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12972 		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12973 		  && (val_diff == 4 || val_diff == -4));
12974 	}
12975 
12976       return ((REGNO (reg0) == REGNO (reg1))
12977 	      && (val_diff == 4 || val_diff == -4));
12978     }
12979 
12980   return 0;
12981 }
12982 
12983 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
12984    for load operations, false for store operations.  CONSECUTIVE is true
12985    if the register numbers in the operation must be consecutive in the register
12986    bank. RETURN_PC is true if value is to be loaded in PC.
12987    The pattern we are trying to match for load is:
12988      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12989       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12990        :
12991        :
12992       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12993      ]
12994      where
12995      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12996      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12997      3.  If consecutive is TRUE, then for kth register being loaded,
12998          REGNO (R_dk) = REGNO (R_d0) + k.
12999    The pattern for store is similar.  */
13000 bool
ldm_stm_operation_p(rtx op,bool load,machine_mode mode,bool consecutive,bool return_pc)13001 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13002                      bool consecutive, bool return_pc)
13003 {
13004   HOST_WIDE_INT count = XVECLEN (op, 0);
13005   rtx reg, mem, addr;
13006   unsigned regno;
13007   unsigned first_regno;
13008   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13009   rtx elt;
13010   bool addr_reg_in_reglist = false;
13011   bool update = false;
13012   int reg_increment;
13013   int offset_adj;
13014   int regs_per_val;
13015 
13016   /* If not in SImode, then registers must be consecutive
13017      (e.g., VLDM instructions for DFmode).  */
13018   gcc_assert ((mode == SImode) || consecutive);
13019   /* Setting return_pc for stores is illegal.  */
13020   gcc_assert (!return_pc || load);
13021 
13022   /* Set up the increments and the regs per val based on the mode.  */
13023   reg_increment = GET_MODE_SIZE (mode);
13024   regs_per_val = reg_increment / 4;
13025   offset_adj = return_pc ? 1 : 0;
13026 
13027   if (count <= 1
13028       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13029       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13030     return false;
13031 
13032   /* Check if this is a write-back.  */
13033   elt = XVECEXP (op, 0, offset_adj);
13034   if (GET_CODE (SET_SRC (elt)) == PLUS)
13035     {
13036       i++;
13037       base = 1;
13038       update = true;
13039 
13040       /* The offset adjustment must be the number of registers being
13041          popped times the size of a single register.  */
13042       if (!REG_P (SET_DEST (elt))
13043           || !REG_P (XEXP (SET_SRC (elt), 0))
13044           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13045           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13046           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13047              ((count - 1 - offset_adj) * reg_increment))
13048         return false;
13049     }
13050 
13051   i = i + offset_adj;
13052   base = base + offset_adj;
13053   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13054      success depends on the type: VLDM can do just one reg,
13055      LDM must do at least two.  */
13056   if ((count <= i) && (mode == SImode))
13057       return false;
13058 
13059   elt = XVECEXP (op, 0, i - 1);
13060   if (GET_CODE (elt) != SET)
13061     return false;
13062 
13063   if (load)
13064     {
13065       reg = SET_DEST (elt);
13066       mem = SET_SRC (elt);
13067     }
13068   else
13069     {
13070       reg = SET_SRC (elt);
13071       mem = SET_DEST (elt);
13072     }
13073 
13074   if (!REG_P (reg) || !MEM_P (mem))
13075     return false;
13076 
13077   regno = REGNO (reg);
13078   first_regno = regno;
13079   addr = XEXP (mem, 0);
13080   if (GET_CODE (addr) == PLUS)
13081     {
13082       if (!CONST_INT_P (XEXP (addr, 1)))
13083 	return false;
13084 
13085       offset = INTVAL (XEXP (addr, 1));
13086       addr = XEXP (addr, 0);
13087     }
13088 
13089   if (!REG_P (addr))
13090     return false;
13091 
13092   /* Don't allow SP to be loaded unless it is also the base register. It
13093      guarantees that SP is reset correctly when an LDM instruction
13094      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13095   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13096     return false;
13097 
13098   for (; i < count; i++)
13099     {
13100       elt = XVECEXP (op, 0, i);
13101       if (GET_CODE (elt) != SET)
13102         return false;
13103 
13104       if (load)
13105         {
13106           reg = SET_DEST (elt);
13107           mem = SET_SRC (elt);
13108         }
13109       else
13110         {
13111           reg = SET_SRC (elt);
13112           mem = SET_DEST (elt);
13113         }
13114 
13115       if (!REG_P (reg)
13116           || GET_MODE (reg) != mode
13117           || REGNO (reg) <= regno
13118           || (consecutive
13119               && (REGNO (reg) !=
13120                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13121           /* Don't allow SP to be loaded unless it is also the base register. It
13122              guarantees that SP is reset correctly when an LDM instruction
13123              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13124           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13125           || !MEM_P (mem)
13126           || GET_MODE (mem) != mode
13127           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13128 	       || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13129 	       || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13130 	       || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13131                    offset + (i - base) * reg_increment))
13132 	      && (!REG_P (XEXP (mem, 0))
13133 		  || offset + (i - base) * reg_increment != 0)))
13134         return false;
13135 
13136       regno = REGNO (reg);
13137       if (regno == REGNO (addr))
13138         addr_reg_in_reglist = true;
13139     }
13140 
13141   if (load)
13142     {
13143       if (update && addr_reg_in_reglist)
13144         return false;
13145 
13146       /* For Thumb-1, address register is always modified - either by write-back
13147          or by explicit load.  If the pattern does not describe an update,
13148          then the address register must be in the list of loaded registers.  */
13149       if (TARGET_THUMB1)
13150         return update || addr_reg_in_reglist;
13151     }
13152 
13153   return true;
13154 }
13155 
13156 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13157    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13158    instruction.  ADD_OFFSET is nonzero if the base address register needs
13159    to be modified with an add instruction before we can use it.  */
13160 
13161 static bool
multiple_operation_profitable_p(bool is_store ATTRIBUTE_UNUSED,int nops,HOST_WIDE_INT add_offset)13162 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13163 				 int nops, HOST_WIDE_INT add_offset)
13164  {
13165   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13166      if the offset isn't small enough.  The reason 2 ldrs are faster
13167      is because these ARMs are able to do more than one cache access
13168      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13169      whilst the ARM8 has a double bandwidth cache.  This means that
13170      these cores can do both an instruction fetch and a data fetch in
13171      a single cycle, so the trick of calculating the address into a
13172      scratch register (one of the result regs) and then doing a load
13173      multiple actually becomes slower (and no smaller in code size).
13174      That is the transformation
13175 
13176  	ldr	rd1, [rbase + offset]
13177  	ldr	rd2, [rbase + offset + 4]
13178 
13179      to
13180 
13181  	add	rd1, rbase, offset
13182  	ldmia	rd1, {rd1, rd2}
13183 
13184      produces worse code -- '3 cycles + any stalls on rd2' instead of
13185      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13186      access per cycle, the first sequence could never complete in less
13187      than 6 cycles, whereas the ldm sequence would only take 5 and
13188      would make better use of sequential accesses if not hitting the
13189      cache.
13190 
13191      We cheat here and test 'arm_ld_sched' which we currently know to
13192      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13193      changes, then the test below needs to be reworked.  */
13194   if (nops == 2 && arm_ld_sched && add_offset != 0)
13195     return false;
13196 
13197   /* XScale has load-store double instructions, but they have stricter
13198      alignment requirements than load-store multiple, so we cannot
13199      use them.
13200 
13201      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13202      the pipeline until completion.
13203 
13204 	NREGS		CYCLES
13205 	  1		  3
13206 	  2		  4
13207 	  3		  5
13208 	  4		  6
13209 
13210      An ldr instruction takes 1-3 cycles, but does not block the
13211      pipeline.
13212 
13213 	NREGS		CYCLES
13214 	  1		 1-3
13215 	  2		 2-6
13216 	  3		 3-9
13217 	  4		 4-12
13218 
13219      Best case ldr will always win.  However, the more ldr instructions
13220      we issue, the less likely we are to be able to schedule them well.
13221      Using ldr instructions also increases code size.
13222 
13223      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13224      for counts of 3 or 4 regs.  */
13225   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13226     return false;
13227   return true;
13228 }
13229 
13230 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13231    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13232    an array ORDER which describes the sequence to use when accessing the
13233    offsets that produces an ascending order.  In this sequence, each
13234    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13235    must have been filled in with the lowest offset by the caller.
13236    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13237    we use to verify that ORDER produces an ascending order of registers.
13238    Return true if it was possible to construct such an order, false if
13239    not.  */
13240 
13241 static bool
compute_offset_order(int nops,HOST_WIDE_INT * unsorted_offsets,int * order,int * unsorted_regs)13242 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13243 		      int *unsorted_regs)
13244 {
13245   int i;
13246   for (i = 1; i < nops; i++)
13247     {
13248       int j;
13249 
13250       order[i] = order[i - 1];
13251       for (j = 0; j < nops; j++)
13252 	if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13253 	  {
13254 	    /* We must find exactly one offset that is higher than the
13255 	       previous one by 4.  */
13256 	    if (order[i] != order[i - 1])
13257 	      return false;
13258 	    order[i] = j;
13259 	  }
13260       if (order[i] == order[i - 1])
13261 	return false;
13262       /* The register numbers must be ascending.  */
13263       if (unsorted_regs != NULL
13264 	  && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13265 	return false;
13266     }
13267   return true;
13268 }
13269 
13270 /* Used to determine in a peephole whether a sequence of load
13271    instructions can be changed into a load-multiple instruction.
13272    NOPS is the number of separate load instructions we are examining.  The
13273    first NOPS entries in OPERANDS are the destination registers, the
13274    next NOPS entries are memory operands.  If this function is
13275    successful, *BASE is set to the common base register of the memory
13276    accesses; *LOAD_OFFSET is set to the first memory location's offset
13277    from that base register.
13278    REGS is an array filled in with the destination register numbers.
13279    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13280    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13281    the sequence of registers in REGS matches the loads from ascending memory
13282    locations, and the function verifies that the register numbers are
13283    themselves ascending.  If CHECK_REGS is false, the register numbers
13284    are stored in the order they are found in the operands.  */
13285 static int
load_multiple_sequence(rtx * operands,int nops,int * regs,int * saved_order,int * base,HOST_WIDE_INT * load_offset,bool check_regs)13286 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13287 			int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13288 {
13289   int unsorted_regs[MAX_LDM_STM_OPS];
13290   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13291   int order[MAX_LDM_STM_OPS];
13292   rtx base_reg_rtx = NULL;
13293   int base_reg = -1;
13294   int i, ldm_case;
13295 
13296   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13297      easily extended if required.  */
13298   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13299 
13300   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13301 
13302   /* Loop over the operands and check that the memory references are
13303      suitable (i.e. immediate offsets from the same base register).  At
13304      the same time, extract the target register, and the memory
13305      offsets.  */
13306   for (i = 0; i < nops; i++)
13307     {
13308       rtx reg;
13309       rtx offset;
13310 
13311       /* Convert a subreg of a mem into the mem itself.  */
13312       if (GET_CODE (operands[nops + i]) == SUBREG)
13313 	operands[nops + i] = alter_subreg (operands + (nops + i), true);
13314 
13315       gcc_assert (MEM_P (operands[nops + i]));
13316 
13317       /* Don't reorder volatile memory references; it doesn't seem worth
13318 	 looking for the case where the order is ok anyway.  */
13319       if (MEM_VOLATILE_P (operands[nops + i]))
13320 	return 0;
13321 
13322       offset = const0_rtx;
13323 
13324       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13325 	   || (GET_CODE (reg) == SUBREG
13326 	       && REG_P (reg = SUBREG_REG (reg))))
13327 	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13328 	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13329 		  || (GET_CODE (reg) == SUBREG
13330 		      && REG_P (reg = SUBREG_REG (reg))))
13331 	      && (CONST_INT_P (offset
13332 		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
13333 	{
13334 	  if (i == 0)
13335 	    {
13336 	      base_reg = REGNO (reg);
13337 	      base_reg_rtx = reg;
13338 	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13339 		return 0;
13340 	    }
13341 	  else if (base_reg != (int) REGNO (reg))
13342 	    /* Not addressed from the same base register.  */
13343 	    return 0;
13344 
13345 	  unsorted_regs[i] = (REG_P (operands[i])
13346 			      ? REGNO (operands[i])
13347 			      : REGNO (SUBREG_REG (operands[i])));
13348 
13349 	  /* If it isn't an integer register, or if it overwrites the
13350 	     base register but isn't the last insn in the list, then
13351 	     we can't do this.  */
13352 	  if (unsorted_regs[i] < 0
13353 	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13354 	      || unsorted_regs[i] > 14
13355 	      || (i != nops - 1 && unsorted_regs[i] == base_reg))
13356 	    return 0;
13357 
13358           /* Don't allow SP to be loaded unless it is also the base
13359              register.  It guarantees that SP is reset correctly when
13360              an LDM instruction is interrupted.  Otherwise, we might
13361              end up with a corrupt stack.  */
13362           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13363             return 0;
13364 
13365 	  unsorted_offsets[i] = INTVAL (offset);
13366 	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13367 	    order[0] = i;
13368 	}
13369       else
13370 	/* Not a suitable memory address.  */
13371 	return 0;
13372     }
13373 
13374   /* All the useful information has now been extracted from the
13375      operands into unsorted_regs and unsorted_offsets; additionally,
13376      order[0] has been set to the lowest offset in the list.  Sort
13377      the offsets into order, verifying that they are adjacent, and
13378      check that the register numbers are ascending.  */
13379   if (!compute_offset_order (nops, unsorted_offsets, order,
13380 			     check_regs ? unsorted_regs : NULL))
13381     return 0;
13382 
13383   if (saved_order)
13384     memcpy (saved_order, order, sizeof order);
13385 
13386   if (base)
13387     {
13388       *base = base_reg;
13389 
13390       for (i = 0; i < nops; i++)
13391 	regs[i] = unsorted_regs[check_regs ? order[i] : i];
13392 
13393       *load_offset = unsorted_offsets[order[0]];
13394     }
13395 
13396   if (TARGET_THUMB1
13397       && !peep2_reg_dead_p (nops, base_reg_rtx))
13398     return 0;
13399 
13400   if (unsorted_offsets[order[0]] == 0)
13401     ldm_case = 1; /* ldmia */
13402   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13403     ldm_case = 2; /* ldmib */
13404   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13405     ldm_case = 3; /* ldmda */
13406   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13407     ldm_case = 4; /* ldmdb */
13408   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13409 	   || const_ok_for_arm (-unsorted_offsets[order[0]]))
13410     ldm_case = 5;
13411   else
13412     return 0;
13413 
13414   if (!multiple_operation_profitable_p (false, nops,
13415 					ldm_case == 5
13416 					? unsorted_offsets[order[0]] : 0))
13417     return 0;
13418 
13419   return ldm_case;
13420 }
13421 
13422 /* Used to determine in a peephole whether a sequence of store instructions can
13423    be changed into a store-multiple instruction.
13424    NOPS is the number of separate store instructions we are examining.
13425    NOPS_TOTAL is the total number of instructions recognized by the peephole
13426    pattern.
13427    The first NOPS entries in OPERANDS are the source registers, the next
13428    NOPS entries are memory operands.  If this function is successful, *BASE is
13429    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13430    to the first memory location's offset from that base register.  REGS is an
13431    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13432    likewise filled with the corresponding rtx's.
13433    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13434    numbers to an ascending order of stores.
13435    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13436    from ascending memory locations, and the function verifies that the register
13437    numbers are themselves ascending.  If CHECK_REGS is false, the register
13438    numbers are stored in the order they are found in the operands.  */
13439 static int
store_multiple_sequence(rtx * operands,int nops,int nops_total,int * regs,rtx * reg_rtxs,int * saved_order,int * base,HOST_WIDE_INT * load_offset,bool check_regs)13440 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13441 			 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13442 			 HOST_WIDE_INT *load_offset, bool check_regs)
13443 {
13444   int unsorted_regs[MAX_LDM_STM_OPS];
13445   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13446   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13447   int order[MAX_LDM_STM_OPS];
13448   int base_reg = -1;
13449   rtx base_reg_rtx = NULL;
13450   int i, stm_case;
13451 
13452   /* Write back of base register is currently only supported for Thumb 1.  */
13453   int base_writeback = TARGET_THUMB1;
13454 
13455   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13456      easily extended if required.  */
13457   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13458 
13459   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13460 
13461   /* Loop over the operands and check that the memory references are
13462      suitable (i.e. immediate offsets from the same base register).  At
13463      the same time, extract the target register, and the memory
13464      offsets.  */
13465   for (i = 0; i < nops; i++)
13466     {
13467       rtx reg;
13468       rtx offset;
13469 
13470       /* Convert a subreg of a mem into the mem itself.  */
13471       if (GET_CODE (operands[nops + i]) == SUBREG)
13472 	operands[nops + i] = alter_subreg (operands + (nops + i), true);
13473 
13474       gcc_assert (MEM_P (operands[nops + i]));
13475 
13476       /* Don't reorder volatile memory references; it doesn't seem worth
13477 	 looking for the case where the order is ok anyway.  */
13478       if (MEM_VOLATILE_P (operands[nops + i]))
13479 	return 0;
13480 
13481       offset = const0_rtx;
13482 
13483       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13484 	   || (GET_CODE (reg) == SUBREG
13485 	       && REG_P (reg = SUBREG_REG (reg))))
13486 	  || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13487 	      && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13488 		  || (GET_CODE (reg) == SUBREG
13489 		      && REG_P (reg = SUBREG_REG (reg))))
13490 	      && (CONST_INT_P (offset
13491 		  = XEXP (XEXP (operands[nops + i], 0), 1)))))
13492 	{
13493 	  unsorted_reg_rtxs[i] = (REG_P (operands[i])
13494 				  ? operands[i] : SUBREG_REG (operands[i]));
13495 	  unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13496 
13497 	  if (i == 0)
13498 	    {
13499 	      base_reg = REGNO (reg);
13500 	      base_reg_rtx = reg;
13501 	      if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13502 		return 0;
13503 	    }
13504 	  else if (base_reg != (int) REGNO (reg))
13505 	    /* Not addressed from the same base register.  */
13506 	    return 0;
13507 
13508 	  /* If it isn't an integer register, then we can't do this.  */
13509 	  if (unsorted_regs[i] < 0
13510 	      || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13511 	      /* The effects are unpredictable if the base register is
13512 		 both updated and stored.  */
13513 	      || (base_writeback && unsorted_regs[i] == base_reg)
13514 	      || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13515 	      || unsorted_regs[i] > 14)
13516 	    return 0;
13517 
13518 	  unsorted_offsets[i] = INTVAL (offset);
13519 	  if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13520 	    order[0] = i;
13521 	}
13522       else
13523 	/* Not a suitable memory address.  */
13524 	return 0;
13525     }
13526 
13527   /* All the useful information has now been extracted from the
13528      operands into unsorted_regs and unsorted_offsets; additionally,
13529      order[0] has been set to the lowest offset in the list.  Sort
13530      the offsets into order, verifying that they are adjacent, and
13531      check that the register numbers are ascending.  */
13532   if (!compute_offset_order (nops, unsorted_offsets, order,
13533 			     check_regs ? unsorted_regs : NULL))
13534     return 0;
13535 
13536   if (saved_order)
13537     memcpy (saved_order, order, sizeof order);
13538 
13539   if (base)
13540     {
13541       *base = base_reg;
13542 
13543       for (i = 0; i < nops; i++)
13544 	{
13545 	  regs[i] = unsorted_regs[check_regs ? order[i] : i];
13546 	  if (reg_rtxs)
13547 	    reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13548 	}
13549 
13550       *load_offset = unsorted_offsets[order[0]];
13551     }
13552 
13553   if (TARGET_THUMB1
13554       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13555     return 0;
13556 
13557   if (unsorted_offsets[order[0]] == 0)
13558     stm_case = 1; /* stmia */
13559   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13560     stm_case = 2; /* stmib */
13561   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13562     stm_case = 3; /* stmda */
13563   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13564     stm_case = 4; /* stmdb */
13565   else
13566     return 0;
13567 
13568   if (!multiple_operation_profitable_p (false, nops, 0))
13569     return 0;
13570 
13571   return stm_case;
13572 }
13573 
13574 /* Routines for use in generating RTL.  */
13575 
13576 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13577    the instruction; REGS and MEMS are arrays containing the operands.
13578    BASEREG is the base register to be used in addressing the memory operands.
13579    WBACK_OFFSET is nonzero if the instruction should update the base
13580    register.  */
13581 
13582 static rtx
arm_gen_load_multiple_1(int count,int * regs,rtx * mems,rtx basereg,HOST_WIDE_INT wback_offset)13583 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13584 			 HOST_WIDE_INT wback_offset)
13585 {
13586   int i = 0, j;
13587   rtx result;
13588 
13589   if (!multiple_operation_profitable_p (false, count, 0))
13590     {
13591       rtx seq;
13592 
13593       start_sequence ();
13594 
13595       for (i = 0; i < count; i++)
13596 	emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13597 
13598       if (wback_offset != 0)
13599 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13600 
13601       seq = get_insns ();
13602       end_sequence ();
13603 
13604       return seq;
13605     }
13606 
13607   result = gen_rtx_PARALLEL (VOIDmode,
13608 			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13609   if (wback_offset != 0)
13610     {
13611       XVECEXP (result, 0, 0)
13612 	= gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13613       i = 1;
13614       count++;
13615     }
13616 
13617   for (j = 0; i < count; i++, j++)
13618     XVECEXP (result, 0, i)
13619       = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13620 
13621   return result;
13622 }
13623 
13624 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13625    the instruction; REGS and MEMS are arrays containing the operands.
13626    BASEREG is the base register to be used in addressing the memory operands.
13627    WBACK_OFFSET is nonzero if the instruction should update the base
13628    register.  */
13629 
13630 static rtx
arm_gen_store_multiple_1(int count,int * regs,rtx * mems,rtx basereg,HOST_WIDE_INT wback_offset)13631 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13632 			  HOST_WIDE_INT wback_offset)
13633 {
13634   int i = 0, j;
13635   rtx result;
13636 
13637   if (GET_CODE (basereg) == PLUS)
13638     basereg = XEXP (basereg, 0);
13639 
13640   if (!multiple_operation_profitable_p (false, count, 0))
13641     {
13642       rtx seq;
13643 
13644       start_sequence ();
13645 
13646       for (i = 0; i < count; i++)
13647 	emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13648 
13649       if (wback_offset != 0)
13650 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13651 
13652       seq = get_insns ();
13653       end_sequence ();
13654 
13655       return seq;
13656     }
13657 
13658   result = gen_rtx_PARALLEL (VOIDmode,
13659 			     rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13660   if (wback_offset != 0)
13661     {
13662       XVECEXP (result, 0, 0)
13663 	= gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13664       i = 1;
13665       count++;
13666     }
13667 
13668   for (j = 0; i < count; i++, j++)
13669     XVECEXP (result, 0, i)
13670       = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13671 
13672   return result;
13673 }
13674 
13675 /* Generate either a load-multiple or a store-multiple instruction.  This
13676    function can be used in situations where we can start with a single MEM
13677    rtx and adjust its address upwards.
13678    COUNT is the number of operations in the instruction, not counting a
13679    possible update of the base register.  REGS is an array containing the
13680    register operands.
13681    BASEREG is the base register to be used in addressing the memory operands,
13682    which are constructed from BASEMEM.
13683    WRITE_BACK specifies whether the generated instruction should include an
13684    update of the base register.
13685    OFFSETP is used to pass an offset to and from this function; this offset
13686    is not used when constructing the address (instead BASEMEM should have an
13687    appropriate offset in its address), it is used only for setting
13688    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13689 
13690 static rtx
arm_gen_multiple_op(bool is_load,int * regs,int count,rtx basereg,bool write_back,rtx basemem,HOST_WIDE_INT * offsetp)13691 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13692 		     bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13693 {
13694   rtx mems[MAX_LDM_STM_OPS];
13695   HOST_WIDE_INT offset = *offsetp;
13696   int i;
13697 
13698   gcc_assert (count <= MAX_LDM_STM_OPS);
13699 
13700   if (GET_CODE (basereg) == PLUS)
13701     basereg = XEXP (basereg, 0);
13702 
13703   for (i = 0; i < count; i++)
13704     {
13705       rtx addr = plus_constant (Pmode, basereg, i * 4);
13706       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13707       offset += 4;
13708     }
13709 
13710   if (write_back)
13711     *offsetp = offset;
13712 
13713   if (is_load)
13714     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13715 				    write_back ? 4 * count : 0);
13716   else
13717     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13718 				     write_back ? 4 * count : 0);
13719 }
13720 
13721 rtx
arm_gen_load_multiple(int * regs,int count,rtx basereg,int write_back,rtx basemem,HOST_WIDE_INT * offsetp)13722 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13723 		       rtx basemem, HOST_WIDE_INT *offsetp)
13724 {
13725   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13726 			      offsetp);
13727 }
13728 
13729 rtx
arm_gen_store_multiple(int * regs,int count,rtx basereg,int write_back,rtx basemem,HOST_WIDE_INT * offsetp)13730 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13731 			rtx basemem, HOST_WIDE_INT *offsetp)
13732 {
13733   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13734 			      offsetp);
13735 }
13736 
13737 /* Called from a peephole2 expander to turn a sequence of loads into an
13738    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
13739    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
13740    is true if we can reorder the registers because they are used commutatively
13741    subsequently.
13742    Returns true iff we could generate a new instruction.  */
13743 
13744 bool
gen_ldm_seq(rtx * operands,int nops,bool sort_regs)13745 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13746 {
13747   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13748   rtx mems[MAX_LDM_STM_OPS];
13749   int i, j, base_reg;
13750   rtx base_reg_rtx;
13751   HOST_WIDE_INT offset;
13752   int write_back = FALSE;
13753   int ldm_case;
13754   rtx addr;
13755 
13756   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13757 				     &base_reg, &offset, !sort_regs);
13758 
13759   if (ldm_case == 0)
13760     return false;
13761 
13762   if (sort_regs)
13763     for (i = 0; i < nops - 1; i++)
13764       for (j = i + 1; j < nops; j++)
13765 	if (regs[i] > regs[j])
13766 	  {
13767 	    int t = regs[i];
13768 	    regs[i] = regs[j];
13769 	    regs[j] = t;
13770 	  }
13771   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13772 
13773   if (TARGET_THUMB1)
13774     {
13775       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13776       gcc_assert (ldm_case == 1 || ldm_case == 5);
13777       write_back = TRUE;
13778     }
13779 
13780   if (ldm_case == 5)
13781     {
13782       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13783       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13784       offset = 0;
13785       if (!TARGET_THUMB1)
13786 	base_reg_rtx = newbase;
13787     }
13788 
13789   for (i = 0; i < nops; i++)
13790     {
13791       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13792       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13793 					      SImode, addr, 0);
13794     }
13795   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13796 				      write_back ? offset + i * 4 : 0));
13797   return true;
13798 }
13799 
13800 /* Called from a peephole2 expander to turn a sequence of stores into an
13801    STM instruction.  OPERANDS are the operands found by the peephole matcher;
13802    NOPS indicates how many separate stores we are trying to combine.
13803    Returns true iff we could generate a new instruction.  */
13804 
13805 bool
gen_stm_seq(rtx * operands,int nops)13806 gen_stm_seq (rtx *operands, int nops)
13807 {
13808   int i;
13809   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13810   rtx mems[MAX_LDM_STM_OPS];
13811   int base_reg;
13812   rtx base_reg_rtx;
13813   HOST_WIDE_INT offset;
13814   int write_back = FALSE;
13815   int stm_case;
13816   rtx addr;
13817   bool base_reg_dies;
13818 
13819   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13820 				      mem_order, &base_reg, &offset, true);
13821 
13822   if (stm_case == 0)
13823     return false;
13824 
13825   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13826 
13827   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13828   if (TARGET_THUMB1)
13829     {
13830       gcc_assert (base_reg_dies);
13831       write_back = TRUE;
13832     }
13833 
13834   if (stm_case == 5)
13835     {
13836       gcc_assert (base_reg_dies);
13837       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13838       offset = 0;
13839     }
13840 
13841   addr = plus_constant (Pmode, base_reg_rtx, offset);
13842 
13843   for (i = 0; i < nops; i++)
13844     {
13845       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13846       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13847 					      SImode, addr, 0);
13848     }
13849   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13850 				       write_back ? offset + i * 4 : 0));
13851   return true;
13852 }
13853 
13854 /* Called from a peephole2 expander to turn a sequence of stores that are
13855    preceded by constant loads into an STM instruction.  OPERANDS are the
13856    operands found by the peephole matcher; NOPS indicates how many
13857    separate stores we are trying to combine; there are 2 * NOPS
13858    instructions in the peephole.
13859    Returns true iff we could generate a new instruction.  */
13860 
13861 bool
gen_const_stm_seq(rtx * operands,int nops)13862 gen_const_stm_seq (rtx *operands, int nops)
13863 {
13864   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13865   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13866   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13867   rtx mems[MAX_LDM_STM_OPS];
13868   int base_reg;
13869   rtx base_reg_rtx;
13870   HOST_WIDE_INT offset;
13871   int write_back = FALSE;
13872   int stm_case;
13873   rtx addr;
13874   bool base_reg_dies;
13875   int i, j;
13876   HARD_REG_SET allocated;
13877 
13878   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13879 				      mem_order, &base_reg, &offset, false);
13880 
13881   if (stm_case == 0)
13882     return false;
13883 
13884   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13885 
13886   /* If the same register is used more than once, try to find a free
13887      register.  */
13888   CLEAR_HARD_REG_SET (allocated);
13889   for (i = 0; i < nops; i++)
13890     {
13891       for (j = i + 1; j < nops; j++)
13892 	if (regs[i] == regs[j])
13893 	  {
13894 	    rtx t = peep2_find_free_register (0, nops * 2,
13895 					      TARGET_THUMB1 ? "l" : "r",
13896 					      SImode, &allocated);
13897 	    if (t == NULL_RTX)
13898 	      return false;
13899 	    reg_rtxs[i] = t;
13900 	    regs[i] = REGNO (t);
13901 	  }
13902     }
13903 
13904   /* Compute an ordering that maps the register numbers to an ascending
13905      sequence.  */
13906   reg_order[0] = 0;
13907   for (i = 0; i < nops; i++)
13908     if (regs[i] < regs[reg_order[0]])
13909       reg_order[0] = i;
13910 
13911   for (i = 1; i < nops; i++)
13912     {
13913       int this_order = reg_order[i - 1];
13914       for (j = 0; j < nops; j++)
13915 	if (regs[j] > regs[reg_order[i - 1]]
13916 	    && (this_order == reg_order[i - 1]
13917 		|| regs[j] < regs[this_order]))
13918 	  this_order = j;
13919       reg_order[i] = this_order;
13920     }
13921 
13922   /* Ensure that registers that must be live after the instruction end
13923      up with the correct value.  */
13924   for (i = 0; i < nops; i++)
13925     {
13926       int this_order = reg_order[i];
13927       if ((this_order != mem_order[i]
13928 	   || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13929 	  && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13930 	return false;
13931     }
13932 
13933   /* Load the constants.  */
13934   for (i = 0; i < nops; i++)
13935     {
13936       rtx op = operands[2 * nops + mem_order[i]];
13937       sorted_regs[i] = regs[reg_order[i]];
13938       emit_move_insn (reg_rtxs[reg_order[i]], op);
13939     }
13940 
13941   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13942 
13943   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13944   if (TARGET_THUMB1)
13945     {
13946       gcc_assert (base_reg_dies);
13947       write_back = TRUE;
13948     }
13949 
13950   if (stm_case == 5)
13951     {
13952       gcc_assert (base_reg_dies);
13953       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13954       offset = 0;
13955     }
13956 
13957   addr = plus_constant (Pmode, base_reg_rtx, offset);
13958 
13959   for (i = 0; i < nops; i++)
13960     {
13961       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13962       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13963 					      SImode, addr, 0);
13964     }
13965   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13966 				       write_back ? offset + i * 4 : 0));
13967   return true;
13968 }
13969 
13970 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13971    unaligned copies on processors which support unaligned semantics for those
13972    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
13973    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13974    An interleave factor of 1 (the minimum) will perform no interleaving.
13975    Load/store multiple are used for aligned addresses where possible.  */
13976 
13977 static void
arm_block_move_unaligned_straight(rtx dstbase,rtx srcbase,HOST_WIDE_INT length,unsigned int interleave_factor)13978 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13979 				   HOST_WIDE_INT length,
13980 				   unsigned int interleave_factor)
13981 {
13982   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13983   int *regnos = XALLOCAVEC (int, interleave_factor);
13984   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13985   HOST_WIDE_INT i, j;
13986   HOST_WIDE_INT remaining = length, words;
13987   rtx halfword_tmp = NULL, byte_tmp = NULL;
13988   rtx dst, src;
13989   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13990   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13991   HOST_WIDE_INT srcoffset, dstoffset;
13992   HOST_WIDE_INT src_autoinc, dst_autoinc;
13993   rtx mem, addr;
13994 
13995   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
13996 
13997   /* Use hard registers if we have aligned source or destination so we can use
13998      load/store multiple with contiguous registers.  */
13999   if (dst_aligned || src_aligned)
14000     for (i = 0; i < interleave_factor; i++)
14001       regs[i] = gen_rtx_REG (SImode, i);
14002   else
14003     for (i = 0; i < interleave_factor; i++)
14004       regs[i] = gen_reg_rtx (SImode);
14005 
14006   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14007   src = copy_addr_to_reg (XEXP (srcbase, 0));
14008 
14009   srcoffset = dstoffset = 0;
14010 
14011   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14012      For copying the last bytes we want to subtract this offset again.  */
14013   src_autoinc = dst_autoinc = 0;
14014 
14015   for (i = 0; i < interleave_factor; i++)
14016     regnos[i] = i;
14017 
14018   /* Copy BLOCK_SIZE_BYTES chunks.  */
14019 
14020   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14021     {
14022       /* Load words.  */
14023       if (src_aligned && interleave_factor > 1)
14024 	{
14025 	  emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14026 					    TRUE, srcbase, &srcoffset));
14027 	  src_autoinc += UNITS_PER_WORD * interleave_factor;
14028 	}
14029       else
14030 	{
14031 	  for (j = 0; j < interleave_factor; j++)
14032 	    {
14033 	      addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14034 						 - src_autoinc));
14035 	      mem = adjust_automodify_address (srcbase, SImode, addr,
14036 					       srcoffset + j * UNITS_PER_WORD);
14037 	      emit_insn (gen_unaligned_loadsi (regs[j], mem));
14038 	    }
14039 	  srcoffset += block_size_bytes;
14040 	}
14041 
14042       /* Store words.  */
14043       if (dst_aligned && interleave_factor > 1)
14044 	{
14045 	  emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14046 					     TRUE, dstbase, &dstoffset));
14047 	  dst_autoinc += UNITS_PER_WORD * interleave_factor;
14048 	}
14049       else
14050 	{
14051 	  for (j = 0; j < interleave_factor; j++)
14052 	    {
14053 	      addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14054 						 - dst_autoinc));
14055 	      mem = adjust_automodify_address (dstbase, SImode, addr,
14056 					       dstoffset + j * UNITS_PER_WORD);
14057 	      emit_insn (gen_unaligned_storesi (mem, regs[j]));
14058 	    }
14059 	  dstoffset += block_size_bytes;
14060 	}
14061 
14062       remaining -= block_size_bytes;
14063     }
14064 
14065   /* Copy any whole words left (note these aren't interleaved with any
14066      subsequent halfword/byte load/stores in the interests of simplicity).  */
14067 
14068   words = remaining / UNITS_PER_WORD;
14069 
14070   gcc_assert (words < interleave_factor);
14071 
14072   if (src_aligned && words > 1)
14073     {
14074       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14075 					&srcoffset));
14076       src_autoinc += UNITS_PER_WORD * words;
14077     }
14078   else
14079     {
14080       for (j = 0; j < words; j++)
14081 	{
14082 	  addr = plus_constant (Pmode, src,
14083 				srcoffset + j * UNITS_PER_WORD - src_autoinc);
14084 	  mem = adjust_automodify_address (srcbase, SImode, addr,
14085 					   srcoffset + j * UNITS_PER_WORD);
14086 	  if (src_aligned)
14087 	    emit_move_insn (regs[j], mem);
14088 	  else
14089 	    emit_insn (gen_unaligned_loadsi (regs[j], mem));
14090 	}
14091       srcoffset += words * UNITS_PER_WORD;
14092     }
14093 
14094   if (dst_aligned && words > 1)
14095     {
14096       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14097 					 &dstoffset));
14098       dst_autoinc += words * UNITS_PER_WORD;
14099     }
14100   else
14101     {
14102       for (j = 0; j < words; j++)
14103 	{
14104 	  addr = plus_constant (Pmode, dst,
14105 				dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14106 	  mem = adjust_automodify_address (dstbase, SImode, addr,
14107 					   dstoffset + j * UNITS_PER_WORD);
14108 	  if (dst_aligned)
14109 	    emit_move_insn (mem, regs[j]);
14110 	  else
14111 	    emit_insn (gen_unaligned_storesi (mem, regs[j]));
14112 	}
14113       dstoffset += words * UNITS_PER_WORD;
14114     }
14115 
14116   remaining -= words * UNITS_PER_WORD;
14117 
14118   gcc_assert (remaining < 4);
14119 
14120   /* Copy a halfword if necessary.  */
14121 
14122   if (remaining >= 2)
14123     {
14124       halfword_tmp = gen_reg_rtx (SImode);
14125 
14126       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14127       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14128       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14129 
14130       /* Either write out immediately, or delay until we've loaded the last
14131 	 byte, depending on interleave factor.  */
14132       if (interleave_factor == 1)
14133 	{
14134 	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14135 	  mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14136 	  emit_insn (gen_unaligned_storehi (mem,
14137 		       gen_lowpart (HImode, halfword_tmp)));
14138 	  halfword_tmp = NULL;
14139 	  dstoffset += 2;
14140 	}
14141 
14142       remaining -= 2;
14143       srcoffset += 2;
14144     }
14145 
14146   gcc_assert (remaining < 2);
14147 
14148   /* Copy last byte.  */
14149 
14150   if ((remaining & 1) != 0)
14151     {
14152       byte_tmp = gen_reg_rtx (SImode);
14153 
14154       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14155       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14156       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14157 
14158       if (interleave_factor == 1)
14159 	{
14160 	  addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14161 	  mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14162 	  emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14163 	  byte_tmp = NULL;
14164 	  dstoffset++;
14165 	}
14166 
14167       remaining--;
14168       srcoffset++;
14169     }
14170 
14171   /* Store last halfword if we haven't done so already.  */
14172 
14173   if (halfword_tmp)
14174     {
14175       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14176       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14177       emit_insn (gen_unaligned_storehi (mem,
14178 		   gen_lowpart (HImode, halfword_tmp)));
14179       dstoffset += 2;
14180     }
14181 
14182   /* Likewise for last byte.  */
14183 
14184   if (byte_tmp)
14185     {
14186       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14187       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14188       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14189       dstoffset++;
14190     }
14191 
14192   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14193 }
14194 
14195 /* From mips_adjust_block_mem:
14196 
14197    Helper function for doing a loop-based block operation on memory
14198    reference MEM.  Each iteration of the loop will operate on LENGTH
14199    bytes of MEM.
14200 
14201    Create a new base register for use within the loop and point it to
14202    the start of MEM.  Create a new memory reference that uses this
14203    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14204 
14205 static void
arm_adjust_block_mem(rtx mem,HOST_WIDE_INT length,rtx * loop_reg,rtx * loop_mem)14206 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14207 		      rtx *loop_mem)
14208 {
14209   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14210 
14211   /* Although the new mem does not refer to a known location,
14212      it does keep up to LENGTH bytes of alignment.  */
14213   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14214   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14215 }
14216 
14217 /* From mips_block_move_loop:
14218 
14219    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14220    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14221    the memory regions do not overlap.  */
14222 
14223 static void
arm_block_move_unaligned_loop(rtx dest,rtx src,HOST_WIDE_INT length,unsigned int interleave_factor,HOST_WIDE_INT bytes_per_iter)14224 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14225 			       unsigned int interleave_factor,
14226 			       HOST_WIDE_INT bytes_per_iter)
14227 {
14228   rtx src_reg, dest_reg, final_src, test;
14229   HOST_WIDE_INT leftover;
14230 
14231   leftover = length % bytes_per_iter;
14232   length -= leftover;
14233 
14234   /* Create registers and memory references for use within the loop.  */
14235   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14236   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14237 
14238   /* Calculate the value that SRC_REG should have after the last iteration of
14239      the loop.  */
14240   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14241 				   0, 0, OPTAB_WIDEN);
14242 
14243   /* Emit the start of the loop.  */
14244   rtx_code_label *label = gen_label_rtx ();
14245   emit_label (label);
14246 
14247   /* Emit the loop body.  */
14248   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14249 				     interleave_factor);
14250 
14251   /* Move on to the next block.  */
14252   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14253   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14254 
14255   /* Emit the loop condition.  */
14256   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14257   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14258 
14259   /* Mop up any left-over bytes.  */
14260   if (leftover)
14261     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14262 }
14263 
14264 /* Emit a block move when either the source or destination is unaligned (not
14265    aligned to a four-byte boundary).  This may need further tuning depending on
14266    core type, optimize_size setting, etc.  */
14267 
14268 static int
arm_movmemqi_unaligned(rtx * operands)14269 arm_movmemqi_unaligned (rtx *operands)
14270 {
14271   HOST_WIDE_INT length = INTVAL (operands[2]);
14272 
14273   if (optimize_size)
14274     {
14275       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14276       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14277       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14278 	 size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14279 	 or dst_aligned though: allow more interleaving in those cases since the
14280 	 resulting code can be smaller.  */
14281       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14282       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14283 
14284       if (length > 12)
14285 	arm_block_move_unaligned_loop (operands[0], operands[1], length,
14286 				       interleave_factor, bytes_per_iter);
14287       else
14288 	arm_block_move_unaligned_straight (operands[0], operands[1], length,
14289 					   interleave_factor);
14290     }
14291   else
14292     {
14293       /* Note that the loop created by arm_block_move_unaligned_loop may be
14294 	 subject to loop unrolling, which makes tuning this condition a little
14295 	 redundant.  */
14296       if (length > 32)
14297 	arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14298       else
14299 	arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14300     }
14301 
14302   return 1;
14303 }
14304 
14305 int
arm_gen_movmemqi(rtx * operands)14306 arm_gen_movmemqi (rtx *operands)
14307 {
14308   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14309   HOST_WIDE_INT srcoffset, dstoffset;
14310   rtx src, dst, srcbase, dstbase;
14311   rtx part_bytes_reg = NULL;
14312   rtx mem;
14313 
14314   if (!CONST_INT_P (operands[2])
14315       || !CONST_INT_P (operands[3])
14316       || INTVAL (operands[2]) > 64)
14317     return 0;
14318 
14319   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14320     return arm_movmemqi_unaligned (operands);
14321 
14322   if (INTVAL (operands[3]) & 3)
14323     return 0;
14324 
14325   dstbase = operands[0];
14326   srcbase = operands[1];
14327 
14328   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14329   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14330 
14331   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14332   out_words_to_go = INTVAL (operands[2]) / 4;
14333   last_bytes = INTVAL (operands[2]) & 3;
14334   dstoffset = srcoffset = 0;
14335 
14336   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14337     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14338 
14339   while (in_words_to_go >= 2)
14340     {
14341       if (in_words_to_go > 4)
14342 	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14343 					  TRUE, srcbase, &srcoffset));
14344       else
14345 	emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14346 					  src, FALSE, srcbase,
14347 					  &srcoffset));
14348 
14349       if (out_words_to_go)
14350 	{
14351 	  if (out_words_to_go > 4)
14352 	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14353 					       TRUE, dstbase, &dstoffset));
14354 	  else if (out_words_to_go != 1)
14355 	    emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14356 					       out_words_to_go, dst,
14357 					       (last_bytes == 0
14358 						? FALSE : TRUE),
14359 					       dstbase, &dstoffset));
14360 	  else
14361 	    {
14362 	      mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14363 	      emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14364 	      if (last_bytes != 0)
14365 		{
14366 		  emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14367 		  dstoffset += 4;
14368 		}
14369 	    }
14370 	}
14371 
14372       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14373       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14374     }
14375 
14376   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14377   if (out_words_to_go)
14378     {
14379       rtx sreg;
14380 
14381       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14382       sreg = copy_to_reg (mem);
14383 
14384       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14385       emit_move_insn (mem, sreg);
14386       in_words_to_go--;
14387 
14388       gcc_assert (!in_words_to_go);	/* Sanity check */
14389     }
14390 
14391   if (in_words_to_go)
14392     {
14393       gcc_assert (in_words_to_go > 0);
14394 
14395       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14396       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14397     }
14398 
14399   gcc_assert (!last_bytes || part_bytes_reg);
14400 
14401   if (BYTES_BIG_ENDIAN && last_bytes)
14402     {
14403       rtx tmp = gen_reg_rtx (SImode);
14404 
14405       /* The bytes we want are in the top end of the word.  */
14406       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14407 			      GEN_INT (8 * (4 - last_bytes))));
14408       part_bytes_reg = tmp;
14409 
14410       while (last_bytes)
14411 	{
14412 	  mem = adjust_automodify_address (dstbase, QImode,
14413 					   plus_constant (Pmode, dst,
14414 							  last_bytes - 1),
14415 					   dstoffset + last_bytes - 1);
14416 	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14417 
14418 	  if (--last_bytes)
14419 	    {
14420 	      tmp = gen_reg_rtx (SImode);
14421 	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14422 	      part_bytes_reg = tmp;
14423 	    }
14424 	}
14425 
14426     }
14427   else
14428     {
14429       if (last_bytes > 1)
14430 	{
14431 	  mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14432 	  emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14433 	  last_bytes -= 2;
14434 	  if (last_bytes)
14435 	    {
14436 	      rtx tmp = gen_reg_rtx (SImode);
14437 	      emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14438 	      emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14439 	      part_bytes_reg = tmp;
14440 	      dstoffset += 2;
14441 	    }
14442 	}
14443 
14444       if (last_bytes)
14445 	{
14446 	  mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14447 	  emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14448 	}
14449     }
14450 
14451   return 1;
14452 }
14453 
14454 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14455 by mode size.  */
14456 inline static rtx
next_consecutive_mem(rtx mem)14457 next_consecutive_mem (rtx mem)
14458 {
14459   machine_mode mode = GET_MODE (mem);
14460   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14461   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14462 
14463   return adjust_automodify_address (mem, mode, addr, offset);
14464 }
14465 
14466 /* Copy using LDRD/STRD instructions whenever possible.
14467    Returns true upon success. */
14468 bool
gen_movmem_ldrd_strd(rtx * operands)14469 gen_movmem_ldrd_strd (rtx *operands)
14470 {
14471   unsigned HOST_WIDE_INT len;
14472   HOST_WIDE_INT align;
14473   rtx src, dst, base;
14474   rtx reg0;
14475   bool src_aligned, dst_aligned;
14476   bool src_volatile, dst_volatile;
14477 
14478   gcc_assert (CONST_INT_P (operands[2]));
14479   gcc_assert (CONST_INT_P (operands[3]));
14480 
14481   len = UINTVAL (operands[2]);
14482   if (len > 64)
14483     return false;
14484 
14485   /* Maximum alignment we can assume for both src and dst buffers.  */
14486   align = INTVAL (operands[3]);
14487 
14488   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14489     return false;
14490 
14491   /* Place src and dst addresses in registers
14492      and update the corresponding mem rtx.  */
14493   dst = operands[0];
14494   dst_volatile = MEM_VOLATILE_P (dst);
14495   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14496   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14497   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14498 
14499   src = operands[1];
14500   src_volatile = MEM_VOLATILE_P (src);
14501   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14502   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14503   src = adjust_automodify_address (src, VOIDmode, base, 0);
14504 
14505   if (!unaligned_access && !(src_aligned && dst_aligned))
14506     return false;
14507 
14508   if (src_volatile || dst_volatile)
14509     return false;
14510 
14511   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14512   if (!(dst_aligned || src_aligned))
14513     return arm_gen_movmemqi (operands);
14514 
14515   /* If the either src or dst is unaligned we'll be accessing it as pairs
14516      of unaligned SImode accesses.  Otherwise we can generate DImode
14517      ldrd/strd instructions.  */
14518   src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14519   dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14520 
14521   while (len >= 8)
14522     {
14523       len -= 8;
14524       reg0 = gen_reg_rtx (DImode);
14525       rtx low_reg = NULL_RTX;
14526       rtx hi_reg = NULL_RTX;
14527 
14528       if (!src_aligned || !dst_aligned)
14529 	{
14530 	  low_reg = gen_lowpart (SImode, reg0);
14531 	  hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14532 	}
14533       if (src_aligned)
14534         emit_move_insn (reg0, src);
14535       else
14536 	{
14537 	  emit_insn (gen_unaligned_loadsi (low_reg, src));
14538 	  src = next_consecutive_mem (src);
14539 	  emit_insn (gen_unaligned_loadsi (hi_reg, src));
14540 	}
14541 
14542       if (dst_aligned)
14543         emit_move_insn (dst, reg0);
14544       else
14545 	{
14546 	  emit_insn (gen_unaligned_storesi (dst, low_reg));
14547 	  dst = next_consecutive_mem (dst);
14548 	  emit_insn (gen_unaligned_storesi (dst, hi_reg));
14549 	}
14550 
14551       src = next_consecutive_mem (src);
14552       dst = next_consecutive_mem (dst);
14553     }
14554 
14555   gcc_assert (len < 8);
14556   if (len >= 4)
14557     {
14558       /* More than a word but less than a double-word to copy.  Copy a word.  */
14559       reg0 = gen_reg_rtx (SImode);
14560       src = adjust_address (src, SImode, 0);
14561       dst = adjust_address (dst, SImode, 0);
14562       if (src_aligned)
14563         emit_move_insn (reg0, src);
14564       else
14565         emit_insn (gen_unaligned_loadsi (reg0, src));
14566 
14567       if (dst_aligned)
14568         emit_move_insn (dst, reg0);
14569       else
14570         emit_insn (gen_unaligned_storesi (dst, reg0));
14571 
14572       src = next_consecutive_mem (src);
14573       dst = next_consecutive_mem (dst);
14574       len -= 4;
14575     }
14576 
14577   if (len == 0)
14578     return true;
14579 
14580   /* Copy the remaining bytes.  */
14581   if (len >= 2)
14582     {
14583       dst = adjust_address (dst, HImode, 0);
14584       src = adjust_address (src, HImode, 0);
14585       reg0 = gen_reg_rtx (SImode);
14586       if (src_aligned)
14587         emit_insn (gen_zero_extendhisi2 (reg0, src));
14588       else
14589         emit_insn (gen_unaligned_loadhiu (reg0, src));
14590 
14591       if (dst_aligned)
14592         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14593       else
14594         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14595 
14596       src = next_consecutive_mem (src);
14597       dst = next_consecutive_mem (dst);
14598       if (len == 2)
14599         return true;
14600     }
14601 
14602   dst = adjust_address (dst, QImode, 0);
14603   src = adjust_address (src, QImode, 0);
14604   reg0 = gen_reg_rtx (QImode);
14605   emit_move_insn (reg0, src);
14606   emit_move_insn (dst, reg0);
14607   return true;
14608 }
14609 
14610 /* Select a dominance comparison mode if possible for a test of the general
14611    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14612    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14613    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14614    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14615    In all cases OP will be either EQ or NE, but we don't need to know which
14616    here.  If we are unable to support a dominance comparison we return
14617    CC mode.  This will then fail to match for the RTL expressions that
14618    generate this call.  */
14619 machine_mode
arm_select_dominance_cc_mode(rtx x,rtx y,HOST_WIDE_INT cond_or)14620 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14621 {
14622   enum rtx_code cond1, cond2;
14623   int swapped = 0;
14624 
14625   /* Currently we will probably get the wrong result if the individual
14626      comparisons are not simple.  This also ensures that it is safe to
14627      reverse a comparison if necessary.  */
14628   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14629        != CCmode)
14630       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14631 	  != CCmode))
14632     return CCmode;
14633 
14634   /* The if_then_else variant of this tests the second condition if the
14635      first passes, but is true if the first fails.  Reverse the first
14636      condition to get a true "inclusive-or" expression.  */
14637   if (cond_or == DOM_CC_NX_OR_Y)
14638     cond1 = reverse_condition (cond1);
14639 
14640   /* If the comparisons are not equal, and one doesn't dominate the other,
14641      then we can't do this.  */
14642   if (cond1 != cond2
14643       && !comparison_dominates_p (cond1, cond2)
14644       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14645     return CCmode;
14646 
14647   if (swapped)
14648     std::swap (cond1, cond2);
14649 
14650   switch (cond1)
14651     {
14652     case EQ:
14653       if (cond_or == DOM_CC_X_AND_Y)
14654 	return CC_DEQmode;
14655 
14656       switch (cond2)
14657 	{
14658 	case EQ: return CC_DEQmode;
14659 	case LE: return CC_DLEmode;
14660 	case LEU: return CC_DLEUmode;
14661 	case GE: return CC_DGEmode;
14662 	case GEU: return CC_DGEUmode;
14663 	default: gcc_unreachable ();
14664 	}
14665 
14666     case LT:
14667       if (cond_or == DOM_CC_X_AND_Y)
14668 	return CC_DLTmode;
14669 
14670       switch (cond2)
14671 	{
14672 	case  LT:
14673 	    return CC_DLTmode;
14674 	case LE:
14675 	  return CC_DLEmode;
14676 	case NE:
14677 	  return CC_DNEmode;
14678 	default:
14679 	  gcc_unreachable ();
14680 	}
14681 
14682     case GT:
14683       if (cond_or == DOM_CC_X_AND_Y)
14684 	return CC_DGTmode;
14685 
14686       switch (cond2)
14687 	{
14688 	case GT:
14689 	  return CC_DGTmode;
14690 	case GE:
14691 	  return CC_DGEmode;
14692 	case NE:
14693 	  return CC_DNEmode;
14694 	default:
14695 	  gcc_unreachable ();
14696 	}
14697 
14698     case LTU:
14699       if (cond_or == DOM_CC_X_AND_Y)
14700 	return CC_DLTUmode;
14701 
14702       switch (cond2)
14703 	{
14704 	case LTU:
14705 	  return CC_DLTUmode;
14706 	case LEU:
14707 	  return CC_DLEUmode;
14708 	case NE:
14709 	  return CC_DNEmode;
14710 	default:
14711 	  gcc_unreachable ();
14712 	}
14713 
14714     case GTU:
14715       if (cond_or == DOM_CC_X_AND_Y)
14716 	return CC_DGTUmode;
14717 
14718       switch (cond2)
14719 	{
14720 	case GTU:
14721 	  return CC_DGTUmode;
14722 	case GEU:
14723 	  return CC_DGEUmode;
14724 	case NE:
14725 	  return CC_DNEmode;
14726 	default:
14727 	  gcc_unreachable ();
14728 	}
14729 
14730     /* The remaining cases only occur when both comparisons are the
14731        same.  */
14732     case NE:
14733       gcc_assert (cond1 == cond2);
14734       return CC_DNEmode;
14735 
14736     case LE:
14737       gcc_assert (cond1 == cond2);
14738       return CC_DLEmode;
14739 
14740     case GE:
14741       gcc_assert (cond1 == cond2);
14742       return CC_DGEmode;
14743 
14744     case LEU:
14745       gcc_assert (cond1 == cond2);
14746       return CC_DLEUmode;
14747 
14748     case GEU:
14749       gcc_assert (cond1 == cond2);
14750       return CC_DGEUmode;
14751 
14752     default:
14753       gcc_unreachable ();
14754     }
14755 }
14756 
14757 machine_mode
arm_select_cc_mode(enum rtx_code op,rtx x,rtx y)14758 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14759 {
14760   /* All floating point compares return CCFP if it is an equality
14761      comparison, and CCFPE otherwise.  */
14762   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14763     {
14764       switch (op)
14765 	{
14766 	case EQ:
14767 	case NE:
14768 	case UNORDERED:
14769 	case ORDERED:
14770 	case UNLT:
14771 	case UNLE:
14772 	case UNGT:
14773 	case UNGE:
14774 	case UNEQ:
14775 	case LTGT:
14776 	  return CCFPmode;
14777 
14778 	case LT:
14779 	case LE:
14780 	case GT:
14781 	case GE:
14782 	  return CCFPEmode;
14783 
14784 	default:
14785 	  gcc_unreachable ();
14786 	}
14787     }
14788 
14789   /* A compare with a shifted operand.  Because of canonicalization, the
14790      comparison will have to be swapped when we emit the assembler.  */
14791   if (GET_MODE (y) == SImode
14792       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14793       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14794 	  || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14795 	  || GET_CODE (x) == ROTATERT))
14796     return CC_SWPmode;
14797 
14798   /* This operation is performed swapped, but since we only rely on the Z
14799      flag we don't need an additional mode.  */
14800   if (GET_MODE (y) == SImode
14801       && (REG_P (y) || (GET_CODE (y) == SUBREG))
14802       && GET_CODE (x) == NEG
14803       && (op ==	EQ || op == NE))
14804     return CC_Zmode;
14805 
14806   /* This is a special case that is used by combine to allow a
14807      comparison of a shifted byte load to be split into a zero-extend
14808      followed by a comparison of the shifted integer (only valid for
14809      equalities and unsigned inequalities).  */
14810   if (GET_MODE (x) == SImode
14811       && GET_CODE (x) == ASHIFT
14812       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14813       && GET_CODE (XEXP (x, 0)) == SUBREG
14814       && MEM_P (SUBREG_REG (XEXP (x, 0)))
14815       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14816       && (op == EQ || op == NE
14817 	  || op == GEU || op == GTU || op == LTU || op == LEU)
14818       && CONST_INT_P (y))
14819     return CC_Zmode;
14820 
14821   /* A construct for a conditional compare, if the false arm contains
14822      0, then both conditions must be true, otherwise either condition
14823      must be true.  Not all conditions are possible, so CCmode is
14824      returned if it can't be done.  */
14825   if (GET_CODE (x) == IF_THEN_ELSE
14826       && (XEXP (x, 2) == const0_rtx
14827 	  || XEXP (x, 2) == const1_rtx)
14828       && COMPARISON_P (XEXP (x, 0))
14829       && COMPARISON_P (XEXP (x, 1)))
14830     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14831 					 INTVAL (XEXP (x, 2)));
14832 
14833   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
14834   if (GET_CODE (x) == AND
14835       && (op == EQ || op == NE)
14836       && COMPARISON_P (XEXP (x, 0))
14837       && COMPARISON_P (XEXP (x, 1)))
14838     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14839 					 DOM_CC_X_AND_Y);
14840 
14841   if (GET_CODE (x) == IOR
14842       && (op == EQ || op == NE)
14843       && COMPARISON_P (XEXP (x, 0))
14844       && COMPARISON_P (XEXP (x, 1)))
14845     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14846 					 DOM_CC_X_OR_Y);
14847 
14848   /* An operation (on Thumb) where we want to test for a single bit.
14849      This is done by shifting that bit up into the top bit of a
14850      scratch register; we can then branch on the sign bit.  */
14851   if (TARGET_THUMB1
14852       && GET_MODE (x) == SImode
14853       && (op == EQ || op == NE)
14854       && GET_CODE (x) == ZERO_EXTRACT
14855       && XEXP (x, 1) == const1_rtx)
14856     return CC_Nmode;
14857 
14858   /* An operation that sets the condition codes as a side-effect, the
14859      V flag is not set correctly, so we can only use comparisons where
14860      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
14861      instead.)  */
14862   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
14863   if (GET_MODE (x) == SImode
14864       && y == const0_rtx
14865       && (op == EQ || op == NE || op == LT || op == GE)
14866       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14867 	  || GET_CODE (x) == AND || GET_CODE (x) == IOR
14868 	  || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14869 	  || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14870 	  || GET_CODE (x) == LSHIFTRT
14871 	  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14872 	  || GET_CODE (x) == ROTATERT
14873 	  || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14874     return CC_NOOVmode;
14875 
14876   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14877     return CC_Zmode;
14878 
14879   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14880       && GET_CODE (x) == PLUS
14881       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14882     return CC_Cmode;
14883 
14884   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14885     {
14886       switch (op)
14887 	{
14888 	case EQ:
14889 	case NE:
14890 	  /* A DImode comparison against zero can be implemented by
14891 	     or'ing the two halves together.  */
14892 	  if (y == const0_rtx)
14893 	    return CC_Zmode;
14894 
14895 	  /* We can do an equality test in three Thumb instructions.  */
14896 	  if (!TARGET_32BIT)
14897 	    return CC_Zmode;
14898 
14899 	  /* FALLTHROUGH */
14900 
14901 	case LTU:
14902 	case LEU:
14903 	case GTU:
14904 	case GEU:
14905 	  /* DImode unsigned comparisons can be implemented by cmp +
14906 	     cmpeq without a scratch register.  Not worth doing in
14907 	     Thumb-2.  */
14908 	  if (TARGET_32BIT)
14909 	    return CC_CZmode;
14910 
14911 	  /* FALLTHROUGH */
14912 
14913 	case LT:
14914 	case LE:
14915 	case GT:
14916 	case GE:
14917 	  /* DImode signed and unsigned comparisons can be implemented
14918 	     by cmp + sbcs with a scratch register, but that does not
14919 	     set the Z flag - we must reverse GT/LE/GTU/LEU.  */
14920 	  gcc_assert (op != EQ && op != NE);
14921 	  return CC_NCVmode;
14922 
14923 	default:
14924 	  gcc_unreachable ();
14925 	}
14926     }
14927 
14928   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14929     return GET_MODE (x);
14930 
14931   return CCmode;
14932 }
14933 
14934 /* X and Y are two things to compare using CODE.  Emit the compare insn and
14935    return the rtx for register 0 in the proper mode.  FP means this is a
14936    floating point compare: I don't think that it is needed on the arm.  */
14937 rtx
arm_gen_compare_reg(enum rtx_code code,rtx x,rtx y,rtx scratch)14938 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14939 {
14940   machine_mode mode;
14941   rtx cc_reg;
14942   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14943 
14944   /* We might have X as a constant, Y as a register because of the predicates
14945      used for cmpdi.  If so, force X to a register here.  */
14946   if (dimode_comparison && !REG_P (x))
14947     x = force_reg (DImode, x);
14948 
14949   mode = SELECT_CC_MODE (code, x, y);
14950   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14951 
14952   if (dimode_comparison
14953       && mode != CC_CZmode)
14954     {
14955       rtx clobber, set;
14956 
14957       /* To compare two non-zero values for equality, XOR them and
14958 	 then compare against zero.  Not used for ARM mode; there
14959 	 CC_CZmode is cheaper.  */
14960       if (mode == CC_Zmode && y != const0_rtx)
14961 	{
14962 	  gcc_assert (!reload_completed);
14963 	  x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14964 	  y = const0_rtx;
14965 	}
14966 
14967       /* A scratch register is required.  */
14968       if (reload_completed)
14969 	gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14970       else
14971 	scratch = gen_rtx_SCRATCH (SImode);
14972 
14973       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14974       set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14975       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14976     }
14977   else
14978     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14979 
14980   return cc_reg;
14981 }
14982 
14983 /* Generate a sequence of insns that will generate the correct return
14984    address mask depending on the physical architecture that the program
14985    is running on.  */
14986 rtx
arm_gen_return_addr_mask(void)14987 arm_gen_return_addr_mask (void)
14988 {
14989   rtx reg = gen_reg_rtx (Pmode);
14990 
14991   emit_insn (gen_return_addr_mask (reg));
14992   return reg;
14993 }
14994 
14995 void
arm_reload_in_hi(rtx * operands)14996 arm_reload_in_hi (rtx *operands)
14997 {
14998   rtx ref = operands[1];
14999   rtx base, scratch;
15000   HOST_WIDE_INT offset = 0;
15001 
15002   if (GET_CODE (ref) == SUBREG)
15003     {
15004       offset = SUBREG_BYTE (ref);
15005       ref = SUBREG_REG (ref);
15006     }
15007 
15008   if (REG_P (ref))
15009     {
15010       /* We have a pseudo which has been spilt onto the stack; there
15011 	 are two cases here: the first where there is a simple
15012 	 stack-slot replacement and a second where the stack-slot is
15013 	 out of range, or is used as a subreg.  */
15014       if (reg_equiv_mem (REGNO (ref)))
15015 	{
15016 	  ref = reg_equiv_mem (REGNO (ref));
15017 	  base = find_replacement (&XEXP (ref, 0));
15018 	}
15019       else
15020 	/* The slot is out of range, or was dressed up in a SUBREG.  */
15021 	base = reg_equiv_address (REGNO (ref));
15022 
15023       /* PR 62554: If there is no equivalent memory location then just move
15024 	 the value as an SImode register move.  This happens when the target
15025 	 architecture variant does not have an HImode register move.  */
15026       if (base == NULL)
15027 	{
15028 	  gcc_assert (REG_P (operands[0]));
15029 	  emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15030 				gen_rtx_SUBREG (SImode, ref, 0)));
15031 	  return;
15032 	}
15033     }
15034   else
15035     base = find_replacement (&XEXP (ref, 0));
15036 
15037   /* Handle the case where the address is too complex to be offset by 1.  */
15038   if (GET_CODE (base) == MINUS
15039       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15040     {
15041       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15042 
15043       emit_set_insn (base_plus, base);
15044       base = base_plus;
15045     }
15046   else if (GET_CODE (base) == PLUS)
15047     {
15048       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15049       HOST_WIDE_INT hi, lo;
15050 
15051       offset += INTVAL (XEXP (base, 1));
15052       base = XEXP (base, 0);
15053 
15054       /* Rework the address into a legal sequence of insns.  */
15055       /* Valid range for lo is -4095 -> 4095 */
15056       lo = (offset >= 0
15057 	    ? (offset & 0xfff)
15058 	    : -((-offset) & 0xfff));
15059 
15060       /* Corner case, if lo is the max offset then we would be out of range
15061 	 once we have added the additional 1 below, so bump the msb into the
15062 	 pre-loading insn(s).  */
15063       if (lo == 4095)
15064 	lo &= 0x7ff;
15065 
15066       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15067 	     ^ (HOST_WIDE_INT) 0x80000000)
15068 	    - (HOST_WIDE_INT) 0x80000000);
15069 
15070       gcc_assert (hi + lo == offset);
15071 
15072       if (hi != 0)
15073 	{
15074 	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15075 
15076 	  /* Get the base address; addsi3 knows how to handle constants
15077 	     that require more than one insn.  */
15078 	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15079 	  base = base_plus;
15080 	  offset = lo;
15081 	}
15082     }
15083 
15084   /* Operands[2] may overlap operands[0] (though it won't overlap
15085      operands[1]), that's why we asked for a DImode reg -- so we can
15086      use the bit that does not overlap.  */
15087   if (REGNO (operands[2]) == REGNO (operands[0]))
15088     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15089   else
15090     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15091 
15092   emit_insn (gen_zero_extendqisi2 (scratch,
15093 				   gen_rtx_MEM (QImode,
15094 						plus_constant (Pmode, base,
15095 							       offset))));
15096   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15097 				   gen_rtx_MEM (QImode,
15098 						plus_constant (Pmode, base,
15099 							       offset + 1))));
15100   if (!BYTES_BIG_ENDIAN)
15101     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15102 		   gen_rtx_IOR (SImode,
15103 				gen_rtx_ASHIFT
15104 				(SImode,
15105 				 gen_rtx_SUBREG (SImode, operands[0], 0),
15106 				 GEN_INT (8)),
15107 				scratch));
15108   else
15109     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15110 		   gen_rtx_IOR (SImode,
15111 				gen_rtx_ASHIFT (SImode, scratch,
15112 						GEN_INT (8)),
15113 				gen_rtx_SUBREG (SImode, operands[0], 0)));
15114 }
15115 
15116 /* Handle storing a half-word to memory during reload by synthesizing as two
15117    byte stores.  Take care not to clobber the input values until after we
15118    have moved them somewhere safe.  This code assumes that if the DImode
15119    scratch in operands[2] overlaps either the input value or output address
15120    in some way, then that value must die in this insn (we absolutely need
15121    two scratch registers for some corner cases).  */
15122 void
arm_reload_out_hi(rtx * operands)15123 arm_reload_out_hi (rtx *operands)
15124 {
15125   rtx ref = operands[0];
15126   rtx outval = operands[1];
15127   rtx base, scratch;
15128   HOST_WIDE_INT offset = 0;
15129 
15130   if (GET_CODE (ref) == SUBREG)
15131     {
15132       offset = SUBREG_BYTE (ref);
15133       ref = SUBREG_REG (ref);
15134     }
15135 
15136   if (REG_P (ref))
15137     {
15138       /* We have a pseudo which has been spilt onto the stack; there
15139 	 are two cases here: the first where there is a simple
15140 	 stack-slot replacement and a second where the stack-slot is
15141 	 out of range, or is used as a subreg.  */
15142       if (reg_equiv_mem (REGNO (ref)))
15143 	{
15144 	  ref = reg_equiv_mem (REGNO (ref));
15145 	  base = find_replacement (&XEXP (ref, 0));
15146 	}
15147       else
15148 	/* The slot is out of range, or was dressed up in a SUBREG.  */
15149 	base = reg_equiv_address (REGNO (ref));
15150 
15151       /* PR 62254: If there is no equivalent memory location then just move
15152 	 the value as an SImode register move.  This happens when the target
15153 	 architecture variant does not have an HImode register move.  */
15154       if (base == NULL)
15155 	{
15156 	  gcc_assert (REG_P (outval) || SUBREG_P (outval));
15157 
15158 	  if (REG_P (outval))
15159 	    {
15160 	      emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15161 				    gen_rtx_SUBREG (SImode, outval, 0)));
15162 	    }
15163 	  else /* SUBREG_P (outval)  */
15164 	    {
15165 	      if (GET_MODE (SUBREG_REG (outval)) == SImode)
15166 		emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15167 				      SUBREG_REG (outval)));
15168 	      else
15169 		/* FIXME: Handle other cases ?  */
15170 		gcc_unreachable ();
15171 	    }
15172 	  return;
15173 	}
15174     }
15175   else
15176     base = find_replacement (&XEXP (ref, 0));
15177 
15178   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15179 
15180   /* Handle the case where the address is too complex to be offset by 1.  */
15181   if (GET_CODE (base) == MINUS
15182       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15183     {
15184       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15185 
15186       /* Be careful not to destroy OUTVAL.  */
15187       if (reg_overlap_mentioned_p (base_plus, outval))
15188 	{
15189 	  /* Updating base_plus might destroy outval, see if we can
15190 	     swap the scratch and base_plus.  */
15191 	  if (!reg_overlap_mentioned_p (scratch, outval))
15192 	    std::swap (scratch, base_plus);
15193 	  else
15194 	    {
15195 	      rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15196 
15197 	      /* Be conservative and copy OUTVAL into the scratch now,
15198 		 this should only be necessary if outval is a subreg
15199 		 of something larger than a word.  */
15200 	      /* XXX Might this clobber base?  I can't see how it can,
15201 		 since scratch is known to overlap with OUTVAL, and
15202 		 must be wider than a word.  */
15203 	      emit_insn (gen_movhi (scratch_hi, outval));
15204 	      outval = scratch_hi;
15205 	    }
15206 	}
15207 
15208       emit_set_insn (base_plus, base);
15209       base = base_plus;
15210     }
15211   else if (GET_CODE (base) == PLUS)
15212     {
15213       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15214       HOST_WIDE_INT hi, lo;
15215 
15216       offset += INTVAL (XEXP (base, 1));
15217       base = XEXP (base, 0);
15218 
15219       /* Rework the address into a legal sequence of insns.  */
15220       /* Valid range for lo is -4095 -> 4095 */
15221       lo = (offset >= 0
15222 	    ? (offset & 0xfff)
15223 	    : -((-offset) & 0xfff));
15224 
15225       /* Corner case, if lo is the max offset then we would be out of range
15226 	 once we have added the additional 1 below, so bump the msb into the
15227 	 pre-loading insn(s).  */
15228       if (lo == 4095)
15229 	lo &= 0x7ff;
15230 
15231       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15232 	     ^ (HOST_WIDE_INT) 0x80000000)
15233 	    - (HOST_WIDE_INT) 0x80000000);
15234 
15235       gcc_assert (hi + lo == offset);
15236 
15237       if (hi != 0)
15238 	{
15239 	  rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15240 
15241 	  /* Be careful not to destroy OUTVAL.  */
15242 	  if (reg_overlap_mentioned_p (base_plus, outval))
15243 	    {
15244 	      /* Updating base_plus might destroy outval, see if we
15245 		 can swap the scratch and base_plus.  */
15246 	      if (!reg_overlap_mentioned_p (scratch, outval))
15247 	        std::swap (scratch, base_plus);
15248 	      else
15249 		{
15250 		  rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15251 
15252 		  /* Be conservative and copy outval into scratch now,
15253 		     this should only be necessary if outval is a
15254 		     subreg of something larger than a word.  */
15255 		  /* XXX Might this clobber base?  I can't see how it
15256 		     can, since scratch is known to overlap with
15257 		     outval.  */
15258 		  emit_insn (gen_movhi (scratch_hi, outval));
15259 		  outval = scratch_hi;
15260 		}
15261 	    }
15262 
15263 	  /* Get the base address; addsi3 knows how to handle constants
15264 	     that require more than one insn.  */
15265 	  emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15266 	  base = base_plus;
15267 	  offset = lo;
15268 	}
15269     }
15270 
15271   if (BYTES_BIG_ENDIAN)
15272     {
15273       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15274 					 plus_constant (Pmode, base,
15275 							offset + 1)),
15276 			    gen_lowpart (QImode, outval)));
15277       emit_insn (gen_lshrsi3 (scratch,
15278 			      gen_rtx_SUBREG (SImode, outval, 0),
15279 			      GEN_INT (8)));
15280       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15281 								offset)),
15282 			    gen_lowpart (QImode, scratch)));
15283     }
15284   else
15285     {
15286       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15287 								offset)),
15288 			    gen_lowpart (QImode, outval)));
15289       emit_insn (gen_lshrsi3 (scratch,
15290 			      gen_rtx_SUBREG (SImode, outval, 0),
15291 			      GEN_INT (8)));
15292       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15293 					 plus_constant (Pmode, base,
15294 							offset + 1)),
15295 			    gen_lowpart (QImode, scratch)));
15296     }
15297 }
15298 
15299 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15300    (padded to the size of a word) should be passed in a register.  */
15301 
15302 static bool
arm_must_pass_in_stack(machine_mode mode,const_tree type)15303 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15304 {
15305   if (TARGET_AAPCS_BASED)
15306     return must_pass_in_stack_var_size (mode, type);
15307   else
15308     return must_pass_in_stack_var_size_or_pad (mode, type);
15309 }
15310 
15311 
15312 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15313    byte of a stack argument has useful data.  For legacy APCS ABIs we use
15314    the default.  For AAPCS based ABIs small aggregate types are placed
15315    in the lowest memory address.  */
15316 
15317 static pad_direction
arm_function_arg_padding(machine_mode mode,const_tree type)15318 arm_function_arg_padding (machine_mode mode, const_tree type)
15319 {
15320   if (!TARGET_AAPCS_BASED)
15321     return default_function_arg_padding (mode, type);
15322 
15323   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15324     return PAD_DOWNWARD;
15325 
15326   return PAD_UPWARD;
15327 }
15328 
15329 
15330 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15331    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15332    register has useful data, and return the opposite if the most
15333    significant byte does.  */
15334 
15335 bool
arm_pad_reg_upward(machine_mode mode,tree type,int first ATTRIBUTE_UNUSED)15336 arm_pad_reg_upward (machine_mode mode,
15337                     tree type, int first ATTRIBUTE_UNUSED)
15338 {
15339   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15340     {
15341       /* For AAPCS, small aggregates, small fixed-point types,
15342 	 and small complex types are always padded upwards.  */
15343       if (type)
15344 	{
15345 	  if ((AGGREGATE_TYPE_P (type)
15346 	       || TREE_CODE (type) == COMPLEX_TYPE
15347 	       || FIXED_POINT_TYPE_P (type))
15348 	      && int_size_in_bytes (type) <= 4)
15349 	    return true;
15350 	}
15351       else
15352 	{
15353 	  if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15354 	      && GET_MODE_SIZE (mode) <= 4)
15355 	    return true;
15356 	}
15357     }
15358 
15359   /* Otherwise, use default padding.  */
15360   return !BYTES_BIG_ENDIAN;
15361 }
15362 
15363 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15364    assuming that the address in the base register is word aligned.  */
15365 bool
offset_ok_for_ldrd_strd(HOST_WIDE_INT offset)15366 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15367 {
15368   HOST_WIDE_INT max_offset;
15369 
15370   /* Offset must be a multiple of 4 in Thumb mode.  */
15371   if (TARGET_THUMB2 && ((offset & 3) != 0))
15372     return false;
15373 
15374   if (TARGET_THUMB2)
15375     max_offset = 1020;
15376   else if (TARGET_ARM)
15377     max_offset = 255;
15378   else
15379     return false;
15380 
15381   return ((offset <= max_offset) && (offset >= -max_offset));
15382 }
15383 
15384 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15385    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15386    Assumes that the address in the base register RN is word aligned.  Pattern
15387    guarantees that both memory accesses use the same base register,
15388    the offsets are constants within the range, and the gap between the offsets is 4.
15389    If preload complete then check that registers are legal.  WBACK indicates whether
15390    address is updated.  LOAD indicates whether memory access is load or store.  */
15391 bool
operands_ok_ldrd_strd(rtx rt,rtx rt2,rtx rn,HOST_WIDE_INT offset,bool wback,bool load)15392 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15393                        bool wback, bool load)
15394 {
15395   unsigned int t, t2, n;
15396 
15397   if (!reload_completed)
15398     return true;
15399 
15400   if (!offset_ok_for_ldrd_strd (offset))
15401     return false;
15402 
15403   t = REGNO (rt);
15404   t2 = REGNO (rt2);
15405   n = REGNO (rn);
15406 
15407   if ((TARGET_THUMB2)
15408       && ((wback && (n == t || n == t2))
15409           || (t == SP_REGNUM)
15410           || (t == PC_REGNUM)
15411           || (t2 == SP_REGNUM)
15412           || (t2 == PC_REGNUM)
15413           || (!load && (n == PC_REGNUM))
15414           || (load && (t == t2))
15415           /* Triggers Cortex-M3 LDRD errata.  */
15416           || (!wback && load && fix_cm3_ldrd && (n == t))))
15417     return false;
15418 
15419   if ((TARGET_ARM)
15420       && ((wback && (n == t || n == t2))
15421           || (t2 == PC_REGNUM)
15422           || (t % 2 != 0)   /* First destination register is not even.  */
15423           || (t2 != t + 1)
15424           /* PC can be used as base register (for offset addressing only),
15425              but it is depricated.  */
15426           || (n == PC_REGNUM)))
15427     return false;
15428 
15429   return true;
15430 }
15431 
15432 /* Return true if a 64-bit access with alignment ALIGN and with a
15433    constant offset OFFSET from the base pointer is permitted on this
15434    architecture.  */
15435 static bool
align_ok_ldrd_strd(HOST_WIDE_INT align,HOST_WIDE_INT offset)15436 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15437 {
15438   return (unaligned_access
15439 	  ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15440 	  : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15441 }
15442 
15443 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15444    operand MEM's address contains an immediate offset from the base
15445    register and has no side effects, in which case it sets BASE,
15446    OFFSET and ALIGN accordingly.  */
15447 static bool
mem_ok_for_ldrd_strd(rtx mem,rtx * base,rtx * offset,HOST_WIDE_INT * align)15448 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15449 {
15450   rtx addr;
15451 
15452   gcc_assert (base != NULL && offset != NULL);
15453 
15454   /* TODO: Handle more general memory operand patterns, such as
15455      PRE_DEC and PRE_INC.  */
15456 
15457   if (side_effects_p (mem))
15458     return false;
15459 
15460   /* Can't deal with subregs.  */
15461   if (GET_CODE (mem) == SUBREG)
15462     return false;
15463 
15464   gcc_assert (MEM_P (mem));
15465 
15466   *offset = const0_rtx;
15467   *align = MEM_ALIGN (mem);
15468 
15469   addr = XEXP (mem, 0);
15470 
15471   /* If addr isn't valid for DImode, then we can't handle it.  */
15472   if (!arm_legitimate_address_p (DImode, addr,
15473 				 reload_in_progress || reload_completed))
15474     return false;
15475 
15476   if (REG_P (addr))
15477     {
15478       *base = addr;
15479       return true;
15480     }
15481   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15482     {
15483       *base = XEXP (addr, 0);
15484       *offset = XEXP (addr, 1);
15485       return (REG_P (*base) && CONST_INT_P (*offset));
15486     }
15487 
15488   return false;
15489 }
15490 
15491 /* Called from a peephole2 to replace two word-size accesses with a
15492    single LDRD/STRD instruction.  Returns true iff we can generate a
15493    new instruction sequence.  That is, both accesses use the same base
15494    register and the gap between constant offsets is 4.  This function
15495    may reorder its operands to match ldrd/strd RTL templates.
15496    OPERANDS are the operands found by the peephole matcher;
15497    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15498    corresponding memory operands.  LOAD indicaates whether the access
15499    is load or store.  CONST_STORE indicates a store of constant
15500    integer values held in OPERANDS[4,5] and assumes that the pattern
15501    is of length 4 insn, for the purpose of checking dead registers.
15502    COMMUTE indicates that register operands may be reordered.  */
15503 bool
gen_operands_ldrd_strd(rtx * operands,bool load,bool const_store,bool commute)15504 gen_operands_ldrd_strd (rtx *operands, bool load,
15505                         bool const_store, bool commute)
15506 {
15507   int nops = 2;
15508   HOST_WIDE_INT offsets[2], offset, align[2];
15509   rtx base = NULL_RTX;
15510   rtx cur_base, cur_offset, tmp;
15511   int i, gap;
15512   HARD_REG_SET regset;
15513 
15514   gcc_assert (!const_store || !load);
15515   /* Check that the memory references are immediate offsets from the
15516      same base register.  Extract the base register, the destination
15517      registers, and the corresponding memory offsets.  */
15518   for (i = 0; i < nops; i++)
15519     {
15520       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15521 				 &align[i]))
15522         return false;
15523 
15524       if (i == 0)
15525         base = cur_base;
15526       else if (REGNO (base) != REGNO (cur_base))
15527         return false;
15528 
15529       offsets[i] = INTVAL (cur_offset);
15530       if (GET_CODE (operands[i]) == SUBREG)
15531         {
15532           tmp = SUBREG_REG (operands[i]);
15533           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15534           operands[i] = tmp;
15535         }
15536     }
15537 
15538   /* Make sure there is no dependency between the individual loads.  */
15539   if (load && REGNO (operands[0]) == REGNO (base))
15540     return false; /* RAW */
15541 
15542   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15543     return false; /* WAW */
15544 
15545   /* If the same input register is used in both stores
15546      when storing different constants, try to find a free register.
15547      For example, the code
15548 	mov r0, 0
15549 	str r0, [r2]
15550 	mov r0, 1
15551 	str r0, [r2, #4]
15552      can be transformed into
15553 	mov r1, 0
15554 	mov r0, 1
15555 	strd r1, r0, [r2]
15556      in Thumb mode assuming that r1 is free.
15557      For ARM mode do the same but only if the starting register
15558      can be made to be even.  */
15559   if (const_store
15560       && REGNO (operands[0]) == REGNO (operands[1])
15561       && INTVAL (operands[4]) != INTVAL (operands[5]))
15562     {
15563     if (TARGET_THUMB2)
15564       {
15565         CLEAR_HARD_REG_SET (regset);
15566         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15567         if (tmp == NULL_RTX)
15568           return false;
15569 
15570         /* Use the new register in the first load to ensure that
15571            if the original input register is not dead after peephole,
15572            then it will have the correct constant value.  */
15573         operands[0] = tmp;
15574       }
15575     else if (TARGET_ARM)
15576       {
15577         int regno = REGNO (operands[0]);
15578         if (!peep2_reg_dead_p (4, operands[0]))
15579           {
15580             /* When the input register is even and is not dead after the
15581                pattern, it has to hold the second constant but we cannot
15582                form a legal STRD in ARM mode with this register as the second
15583                register.  */
15584             if (regno % 2 == 0)
15585               return false;
15586 
15587             /* Is regno-1 free? */
15588             SET_HARD_REG_SET (regset);
15589             CLEAR_HARD_REG_BIT(regset, regno - 1);
15590             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15591             if (tmp == NULL_RTX)
15592               return false;
15593 
15594             operands[0] = tmp;
15595           }
15596         else
15597           {
15598             /* Find a DImode register.  */
15599             CLEAR_HARD_REG_SET (regset);
15600             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15601             if (tmp != NULL_RTX)
15602               {
15603                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15604                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15605               }
15606             else
15607               {
15608                 /* Can we use the input register to form a DI register?  */
15609                 SET_HARD_REG_SET (regset);
15610                 CLEAR_HARD_REG_BIT(regset,
15611                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15612                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15613                 if (tmp == NULL_RTX)
15614                   return false;
15615                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15616               }
15617           }
15618 
15619         gcc_assert (operands[0] != NULL_RTX);
15620         gcc_assert (operands[1] != NULL_RTX);
15621         gcc_assert (REGNO (operands[0]) % 2 == 0);
15622         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15623       }
15624     }
15625 
15626   /* Make sure the instructions are ordered with lower memory access first.  */
15627   if (offsets[0] > offsets[1])
15628     {
15629       gap = offsets[0] - offsets[1];
15630       offset = offsets[1];
15631 
15632       /* Swap the instructions such that lower memory is accessed first.  */
15633       std::swap (operands[0], operands[1]);
15634       std::swap (operands[2], operands[3]);
15635       std::swap (align[0], align[1]);
15636       if (const_store)
15637         std::swap (operands[4], operands[5]);
15638     }
15639   else
15640     {
15641       gap = offsets[1] - offsets[0];
15642       offset = offsets[0];
15643     }
15644 
15645   /* Make sure accesses are to consecutive memory locations.  */
15646   if (gap != 4)
15647     return false;
15648 
15649   if (!align_ok_ldrd_strd (align[0], offset))
15650     return false;
15651 
15652   /* Make sure we generate legal instructions.  */
15653   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15654                              false, load))
15655     return true;
15656 
15657   /* In Thumb state, where registers are almost unconstrained, there
15658      is little hope to fix it.  */
15659   if (TARGET_THUMB2)
15660     return false;
15661 
15662   if (load && commute)
15663     {
15664       /* Try reordering registers.  */
15665       std::swap (operands[0], operands[1]);
15666       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15667                                  false, load))
15668         return true;
15669     }
15670 
15671   if (const_store)
15672     {
15673       /* If input registers are dead after this pattern, they can be
15674          reordered or replaced by other registers that are free in the
15675          current pattern.  */
15676       if (!peep2_reg_dead_p (4, operands[0])
15677           || !peep2_reg_dead_p (4, operands[1]))
15678         return false;
15679 
15680       /* Try to reorder the input registers.  */
15681       /* For example, the code
15682            mov r0, 0
15683            mov r1, 1
15684            str r1, [r2]
15685            str r0, [r2, #4]
15686          can be transformed into
15687            mov r1, 0
15688            mov r0, 1
15689            strd r0, [r2]
15690       */
15691       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15692                                   false, false))
15693         {
15694           std::swap (operands[0], operands[1]);
15695           return true;
15696         }
15697 
15698       /* Try to find a free DI register.  */
15699       CLEAR_HARD_REG_SET (regset);
15700       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15701       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15702       while (true)
15703         {
15704           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15705           if (tmp == NULL_RTX)
15706             return false;
15707 
15708           /* DREG must be an even-numbered register in DImode.
15709              Split it into SI registers.  */
15710           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15711           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15712           gcc_assert (operands[0] != NULL_RTX);
15713           gcc_assert (operands[1] != NULL_RTX);
15714           gcc_assert (REGNO (operands[0]) % 2 == 0);
15715           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15716 
15717           return (operands_ok_ldrd_strd (operands[0], operands[1],
15718                                          base, offset,
15719                                          false, load));
15720         }
15721     }
15722 
15723   return false;
15724 }
15725 
15726 
15727 
15728 
15729 /* Print a symbolic form of X to the debug file, F.  */
15730 static void
arm_print_value(FILE * f,rtx x)15731 arm_print_value (FILE *f, rtx x)
15732 {
15733   switch (GET_CODE (x))
15734     {
15735     case CONST_INT:
15736       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15737       return;
15738 
15739     case CONST_DOUBLE:
15740       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15741       return;
15742 
15743     case CONST_VECTOR:
15744       {
15745 	int i;
15746 
15747 	fprintf (f, "<");
15748 	for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15749 	  {
15750 	    fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15751 	    if (i < (CONST_VECTOR_NUNITS (x) - 1))
15752 	      fputc (',', f);
15753 	  }
15754 	fprintf (f, ">");
15755       }
15756       return;
15757 
15758     case CONST_STRING:
15759       fprintf (f, "\"%s\"", XSTR (x, 0));
15760       return;
15761 
15762     case SYMBOL_REF:
15763       fprintf (f, "`%s'", XSTR (x, 0));
15764       return;
15765 
15766     case LABEL_REF:
15767       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15768       return;
15769 
15770     case CONST:
15771       arm_print_value (f, XEXP (x, 0));
15772       return;
15773 
15774     case PLUS:
15775       arm_print_value (f, XEXP (x, 0));
15776       fprintf (f, "+");
15777       arm_print_value (f, XEXP (x, 1));
15778       return;
15779 
15780     case PC:
15781       fprintf (f, "pc");
15782       return;
15783 
15784     default:
15785       fprintf (f, "????");
15786       return;
15787     }
15788 }
15789 
15790 /* Routines for manipulation of the constant pool.  */
15791 
15792 /* Arm instructions cannot load a large constant directly into a
15793    register; they have to come from a pc relative load.  The constant
15794    must therefore be placed in the addressable range of the pc
15795    relative load.  Depending on the precise pc relative load
15796    instruction the range is somewhere between 256 bytes and 4k.  This
15797    means that we often have to dump a constant inside a function, and
15798    generate code to branch around it.
15799 
15800    It is important to minimize this, since the branches will slow
15801    things down and make the code larger.
15802 
15803    Normally we can hide the table after an existing unconditional
15804    branch so that there is no interruption of the flow, but in the
15805    worst case the code looks like this:
15806 
15807 	ldr	rn, L1
15808 	...
15809 	b	L2
15810 	align
15811 	L1:	.long value
15812 	L2:
15813 	...
15814 
15815 	ldr	rn, L3
15816 	...
15817 	b	L4
15818 	align
15819 	L3:	.long value
15820 	L4:
15821 	...
15822 
15823    We fix this by performing a scan after scheduling, which notices
15824    which instructions need to have their operands fetched from the
15825    constant table and builds the table.
15826 
15827    The algorithm starts by building a table of all the constants that
15828    need fixing up and all the natural barriers in the function (places
15829    where a constant table can be dropped without breaking the flow).
15830    For each fixup we note how far the pc-relative replacement will be
15831    able to reach and the offset of the instruction into the function.
15832 
15833    Having built the table we then group the fixes together to form
15834    tables that are as large as possible (subject to addressing
15835    constraints) and emit each table of constants after the last
15836    barrier that is within range of all the instructions in the group.
15837    If a group does not contain a barrier, then we forcibly create one
15838    by inserting a jump instruction into the flow.  Once the table has
15839    been inserted, the insns are then modified to reference the
15840    relevant entry in the pool.
15841 
15842    Possible enhancements to the algorithm (not implemented) are:
15843 
15844    1) For some processors and object formats, there may be benefit in
15845    aligning the pools to the start of cache lines; this alignment
15846    would need to be taken into account when calculating addressability
15847    of a pool.  */
15848 
15849 /* These typedefs are located at the start of this file, so that
15850    they can be used in the prototypes there.  This comment is to
15851    remind readers of that fact so that the following structures
15852    can be understood more easily.
15853 
15854      typedef struct minipool_node    Mnode;
15855      typedef struct minipool_fixup   Mfix;  */
15856 
15857 struct minipool_node
15858 {
15859   /* Doubly linked chain of entries.  */
15860   Mnode * next;
15861   Mnode * prev;
15862   /* The maximum offset into the code that this entry can be placed.  While
15863      pushing fixes for forward references, all entries are sorted in order
15864      of increasing max_address.  */
15865   HOST_WIDE_INT max_address;
15866   /* Similarly for an entry inserted for a backwards ref.  */
15867   HOST_WIDE_INT min_address;
15868   /* The number of fixes referencing this entry.  This can become zero
15869      if we "unpush" an entry.  In this case we ignore the entry when we
15870      come to emit the code.  */
15871   int refcount;
15872   /* The offset from the start of the minipool.  */
15873   HOST_WIDE_INT offset;
15874   /* The value in table.  */
15875   rtx value;
15876   /* The mode of value.  */
15877   machine_mode mode;
15878   /* The size of the value.  With iWMMXt enabled
15879      sizes > 4 also imply an alignment of 8-bytes.  */
15880   int fix_size;
15881 };
15882 
15883 struct minipool_fixup
15884 {
15885   Mfix *            next;
15886   rtx_insn *        insn;
15887   HOST_WIDE_INT     address;
15888   rtx *             loc;
15889   machine_mode mode;
15890   int               fix_size;
15891   rtx               value;
15892   Mnode *           minipool;
15893   HOST_WIDE_INT     forwards;
15894   HOST_WIDE_INT     backwards;
15895 };
15896 
15897 /* Fixes less than a word need padding out to a word boundary.  */
15898 #define MINIPOOL_FIX_SIZE(mode) \
15899   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15900 
15901 static Mnode *	minipool_vector_head;
15902 static Mnode *	minipool_vector_tail;
15903 static rtx_code_label	*minipool_vector_label;
15904 static int	minipool_pad;
15905 
15906 /* The linked list of all minipool fixes required for this function.  */
15907 Mfix * 		minipool_fix_head;
15908 Mfix * 		minipool_fix_tail;
15909 /* The fix entry for the current minipool, once it has been placed.  */
15910 Mfix *		minipool_barrier;
15911 
15912 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15913 #define JUMP_TABLES_IN_TEXT_SECTION 0
15914 #endif
15915 
15916 static HOST_WIDE_INT
get_jump_table_size(rtx_jump_table_data * insn)15917 get_jump_table_size (rtx_jump_table_data *insn)
15918 {
15919   /* ADDR_VECs only take room if read-only data does into the text
15920      section.  */
15921   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15922     {
15923       rtx body = PATTERN (insn);
15924       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15925       HOST_WIDE_INT size;
15926       HOST_WIDE_INT modesize;
15927 
15928       modesize = GET_MODE_SIZE (GET_MODE (body));
15929       size = modesize * XVECLEN (body, elt);
15930       switch (modesize)
15931 	{
15932 	case 1:
15933 	  /* Round up size  of TBB table to a halfword boundary.  */
15934 	  size = (size + 1) & ~HOST_WIDE_INT_1;
15935 	  break;
15936 	case 2:
15937 	  /* No padding necessary for TBH.  */
15938 	  break;
15939 	case 4:
15940 	  /* Add two bytes for alignment on Thumb.  */
15941 	  if (TARGET_THUMB)
15942 	    size += 2;
15943 	  break;
15944 	default:
15945 	  gcc_unreachable ();
15946 	}
15947       return size;
15948     }
15949 
15950   return 0;
15951 }
15952 
15953 /* Return the maximum amount of padding that will be inserted before
15954    label LABEL.  */
15955 
15956 static HOST_WIDE_INT
get_label_padding(rtx label)15957 get_label_padding (rtx label)
15958 {
15959   HOST_WIDE_INT align, min_insn_size;
15960 
15961   align = 1 << label_to_alignment (label);
15962   min_insn_size = TARGET_THUMB ? 2 : 4;
15963   return align > min_insn_size ? align - min_insn_size : 0;
15964 }
15965 
15966 /* Move a minipool fix MP from its current location to before MAX_MP.
15967    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15968    constraints may need updating.  */
15969 static Mnode *
move_minipool_fix_forward_ref(Mnode * mp,Mnode * max_mp,HOST_WIDE_INT max_address)15970 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15971 			       HOST_WIDE_INT max_address)
15972 {
15973   /* The code below assumes these are different.  */
15974   gcc_assert (mp != max_mp);
15975 
15976   if (max_mp == NULL)
15977     {
15978       if (max_address < mp->max_address)
15979 	mp->max_address = max_address;
15980     }
15981   else
15982     {
15983       if (max_address > max_mp->max_address - mp->fix_size)
15984 	mp->max_address = max_mp->max_address - mp->fix_size;
15985       else
15986 	mp->max_address = max_address;
15987 
15988       /* Unlink MP from its current position.  Since max_mp is non-null,
15989        mp->prev must be non-null.  */
15990       mp->prev->next = mp->next;
15991       if (mp->next != NULL)
15992 	mp->next->prev = mp->prev;
15993       else
15994 	minipool_vector_tail = mp->prev;
15995 
15996       /* Re-insert it before MAX_MP.  */
15997       mp->next = max_mp;
15998       mp->prev = max_mp->prev;
15999       max_mp->prev = mp;
16000 
16001       if (mp->prev != NULL)
16002 	mp->prev->next = mp;
16003       else
16004 	minipool_vector_head = mp;
16005     }
16006 
16007   /* Save the new entry.  */
16008   max_mp = mp;
16009 
16010   /* Scan over the preceding entries and adjust their addresses as
16011      required.  */
16012   while (mp->prev != NULL
16013 	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16014     {
16015       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16016       mp = mp->prev;
16017     }
16018 
16019   return max_mp;
16020 }
16021 
16022 /* Add a constant to the minipool for a forward reference.  Returns the
16023    node added or NULL if the constant will not fit in this pool.  */
16024 static Mnode *
add_minipool_forward_ref(Mfix * fix)16025 add_minipool_forward_ref (Mfix *fix)
16026 {
16027   /* If set, max_mp is the first pool_entry that has a lower
16028      constraint than the one we are trying to add.  */
16029   Mnode *       max_mp = NULL;
16030   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16031   Mnode *       mp;
16032 
16033   /* If the minipool starts before the end of FIX->INSN then this FIX
16034      can not be placed into the current pool.  Furthermore, adding the
16035      new constant pool entry may cause the pool to start FIX_SIZE bytes
16036      earlier.  */
16037   if (minipool_vector_head &&
16038       (fix->address + get_attr_length (fix->insn)
16039        >= minipool_vector_head->max_address - fix->fix_size))
16040     return NULL;
16041 
16042   /* Scan the pool to see if a constant with the same value has
16043      already been added.  While we are doing this, also note the
16044      location where we must insert the constant if it doesn't already
16045      exist.  */
16046   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16047     {
16048       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16049 	  && fix->mode == mp->mode
16050 	  && (!LABEL_P (fix->value)
16051 	      || (CODE_LABEL_NUMBER (fix->value)
16052 		  == CODE_LABEL_NUMBER (mp->value)))
16053 	  && rtx_equal_p (fix->value, mp->value))
16054 	{
16055 	  /* More than one fix references this entry.  */
16056 	  mp->refcount++;
16057 	  return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16058 	}
16059 
16060       /* Note the insertion point if necessary.  */
16061       if (max_mp == NULL
16062 	  && mp->max_address > max_address)
16063 	max_mp = mp;
16064 
16065       /* If we are inserting an 8-bytes aligned quantity and
16066 	 we have not already found an insertion point, then
16067 	 make sure that all such 8-byte aligned quantities are
16068 	 placed at the start of the pool.  */
16069       if (ARM_DOUBLEWORD_ALIGN
16070 	  && max_mp == NULL
16071 	  && fix->fix_size >= 8
16072 	  && mp->fix_size < 8)
16073 	{
16074 	  max_mp = mp;
16075 	  max_address = mp->max_address;
16076 	}
16077     }
16078 
16079   /* The value is not currently in the minipool, so we need to create
16080      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16081      the end of the list since the placement is less constrained than
16082      any existing entry.  Otherwise, we insert the new fix before
16083      MAX_MP and, if necessary, adjust the constraints on the other
16084      entries.  */
16085   mp = XNEW (Mnode);
16086   mp->fix_size = fix->fix_size;
16087   mp->mode = fix->mode;
16088   mp->value = fix->value;
16089   mp->refcount = 1;
16090   /* Not yet required for a backwards ref.  */
16091   mp->min_address = -65536;
16092 
16093   if (max_mp == NULL)
16094     {
16095       mp->max_address = max_address;
16096       mp->next = NULL;
16097       mp->prev = minipool_vector_tail;
16098 
16099       if (mp->prev == NULL)
16100 	{
16101 	  minipool_vector_head = mp;
16102 	  minipool_vector_label = gen_label_rtx ();
16103 	}
16104       else
16105 	mp->prev->next = mp;
16106 
16107       minipool_vector_tail = mp;
16108     }
16109   else
16110     {
16111       if (max_address > max_mp->max_address - mp->fix_size)
16112 	mp->max_address = max_mp->max_address - mp->fix_size;
16113       else
16114 	mp->max_address = max_address;
16115 
16116       mp->next = max_mp;
16117       mp->prev = max_mp->prev;
16118       max_mp->prev = mp;
16119       if (mp->prev != NULL)
16120 	mp->prev->next = mp;
16121       else
16122 	minipool_vector_head = mp;
16123     }
16124 
16125   /* Save the new entry.  */
16126   max_mp = mp;
16127 
16128   /* Scan over the preceding entries and adjust their addresses as
16129      required.  */
16130   while (mp->prev != NULL
16131 	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16132     {
16133       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16134       mp = mp->prev;
16135     }
16136 
16137   return max_mp;
16138 }
16139 
16140 static Mnode *
move_minipool_fix_backward_ref(Mnode * mp,Mnode * min_mp,HOST_WIDE_INT min_address)16141 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16142 				HOST_WIDE_INT  min_address)
16143 {
16144   HOST_WIDE_INT offset;
16145 
16146   /* The code below assumes these are different.  */
16147   gcc_assert (mp != min_mp);
16148 
16149   if (min_mp == NULL)
16150     {
16151       if (min_address > mp->min_address)
16152 	mp->min_address = min_address;
16153     }
16154   else
16155     {
16156       /* We will adjust this below if it is too loose.  */
16157       mp->min_address = min_address;
16158 
16159       /* Unlink MP from its current position.  Since min_mp is non-null,
16160 	 mp->next must be non-null.  */
16161       mp->next->prev = mp->prev;
16162       if (mp->prev != NULL)
16163 	mp->prev->next = mp->next;
16164       else
16165 	minipool_vector_head = mp->next;
16166 
16167       /* Reinsert it after MIN_MP.  */
16168       mp->prev = min_mp;
16169       mp->next = min_mp->next;
16170       min_mp->next = mp;
16171       if (mp->next != NULL)
16172 	mp->next->prev = mp;
16173       else
16174 	minipool_vector_tail = mp;
16175     }
16176 
16177   min_mp = mp;
16178 
16179   offset = 0;
16180   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16181     {
16182       mp->offset = offset;
16183       if (mp->refcount > 0)
16184 	offset += mp->fix_size;
16185 
16186       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16187 	mp->next->min_address = mp->min_address + mp->fix_size;
16188     }
16189 
16190   return min_mp;
16191 }
16192 
16193 /* Add a constant to the minipool for a backward reference.  Returns the
16194    node added or NULL if the constant will not fit in this pool.
16195 
16196    Note that the code for insertion for a backwards reference can be
16197    somewhat confusing because the calculated offsets for each fix do
16198    not take into account the size of the pool (which is still under
16199    construction.  */
16200 static Mnode *
add_minipool_backward_ref(Mfix * fix)16201 add_minipool_backward_ref (Mfix *fix)
16202 {
16203   /* If set, min_mp is the last pool_entry that has a lower constraint
16204      than the one we are trying to add.  */
16205   Mnode *min_mp = NULL;
16206   /* This can be negative, since it is only a constraint.  */
16207   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16208   Mnode *mp;
16209 
16210   /* If we can't reach the current pool from this insn, or if we can't
16211      insert this entry at the end of the pool without pushing other
16212      fixes out of range, then we don't try.  This ensures that we
16213      can't fail later on.  */
16214   if (min_address >= minipool_barrier->address
16215       || (minipool_vector_tail->min_address + fix->fix_size
16216 	  >= minipool_barrier->address))
16217     return NULL;
16218 
16219   /* Scan the pool to see if a constant with the same value has
16220      already been added.  While we are doing this, also note the
16221      location where we must insert the constant if it doesn't already
16222      exist.  */
16223   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16224     {
16225       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16226 	  && fix->mode == mp->mode
16227 	  && (!LABEL_P (fix->value)
16228 	      || (CODE_LABEL_NUMBER (fix->value)
16229 		  == CODE_LABEL_NUMBER (mp->value)))
16230 	  && rtx_equal_p (fix->value, mp->value)
16231 	  /* Check that there is enough slack to move this entry to the
16232 	     end of the table (this is conservative).  */
16233 	  && (mp->max_address
16234 	      > (minipool_barrier->address
16235 		 + minipool_vector_tail->offset
16236 		 + minipool_vector_tail->fix_size)))
16237 	{
16238 	  mp->refcount++;
16239 	  return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16240 	}
16241 
16242       if (min_mp != NULL)
16243 	mp->min_address += fix->fix_size;
16244       else
16245 	{
16246 	  /* Note the insertion point if necessary.  */
16247 	  if (mp->min_address < min_address)
16248 	    {
16249 	      /* For now, we do not allow the insertion of 8-byte alignment
16250 		 requiring nodes anywhere but at the start of the pool.  */
16251 	      if (ARM_DOUBLEWORD_ALIGN
16252 		  && fix->fix_size >= 8 && mp->fix_size < 8)
16253 		return NULL;
16254 	      else
16255 		min_mp = mp;
16256 	    }
16257 	  else if (mp->max_address
16258 		   < minipool_barrier->address + mp->offset + fix->fix_size)
16259 	    {
16260 	      /* Inserting before this entry would push the fix beyond
16261 		 its maximum address (which can happen if we have
16262 		 re-located a forwards fix); force the new fix to come
16263 		 after it.  */
16264 	      if (ARM_DOUBLEWORD_ALIGN
16265 		  && fix->fix_size >= 8 && mp->fix_size < 8)
16266 		return NULL;
16267 	      else
16268 		{
16269 		  min_mp = mp;
16270 		  min_address = mp->min_address + fix->fix_size;
16271 		}
16272 	    }
16273 	  /* Do not insert a non-8-byte aligned quantity before 8-byte
16274 	     aligned quantities.  */
16275 	  else if (ARM_DOUBLEWORD_ALIGN
16276 		   && fix->fix_size < 8
16277 		   && mp->fix_size >= 8)
16278 	    {
16279 	      min_mp = mp;
16280 	      min_address = mp->min_address + fix->fix_size;
16281 	    }
16282 	}
16283     }
16284 
16285   /* We need to create a new entry.  */
16286   mp = XNEW (Mnode);
16287   mp->fix_size = fix->fix_size;
16288   mp->mode = fix->mode;
16289   mp->value = fix->value;
16290   mp->refcount = 1;
16291   mp->max_address = minipool_barrier->address + 65536;
16292 
16293   mp->min_address = min_address;
16294 
16295   if (min_mp == NULL)
16296     {
16297       mp->prev = NULL;
16298       mp->next = minipool_vector_head;
16299 
16300       if (mp->next == NULL)
16301 	{
16302 	  minipool_vector_tail = mp;
16303 	  minipool_vector_label = gen_label_rtx ();
16304 	}
16305       else
16306 	mp->next->prev = mp;
16307 
16308       minipool_vector_head = mp;
16309     }
16310   else
16311     {
16312       mp->next = min_mp->next;
16313       mp->prev = min_mp;
16314       min_mp->next = mp;
16315 
16316       if (mp->next != NULL)
16317 	mp->next->prev = mp;
16318       else
16319 	minipool_vector_tail = mp;
16320     }
16321 
16322   /* Save the new entry.  */
16323   min_mp = mp;
16324 
16325   if (mp->prev)
16326     mp = mp->prev;
16327   else
16328     mp->offset = 0;
16329 
16330   /* Scan over the following entries and adjust their offsets.  */
16331   while (mp->next != NULL)
16332     {
16333       if (mp->next->min_address < mp->min_address + mp->fix_size)
16334 	mp->next->min_address = mp->min_address + mp->fix_size;
16335 
16336       if (mp->refcount)
16337 	mp->next->offset = mp->offset + mp->fix_size;
16338       else
16339 	mp->next->offset = mp->offset;
16340 
16341       mp = mp->next;
16342     }
16343 
16344   return min_mp;
16345 }
16346 
16347 static void
assign_minipool_offsets(Mfix * barrier)16348 assign_minipool_offsets (Mfix *barrier)
16349 {
16350   HOST_WIDE_INT offset = 0;
16351   Mnode *mp;
16352 
16353   minipool_barrier = barrier;
16354 
16355   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16356     {
16357       mp->offset = offset;
16358 
16359       if (mp->refcount > 0)
16360 	offset += mp->fix_size;
16361     }
16362 }
16363 
16364 /* Output the literal table */
16365 static void
dump_minipool(rtx_insn * scan)16366 dump_minipool (rtx_insn *scan)
16367 {
16368   Mnode * mp;
16369   Mnode * nmp;
16370   int align64 = 0;
16371 
16372   if (ARM_DOUBLEWORD_ALIGN)
16373     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16374       if (mp->refcount > 0 && mp->fix_size >= 8)
16375 	{
16376 	  align64 = 1;
16377 	  break;
16378 	}
16379 
16380   if (dump_file)
16381     fprintf (dump_file,
16382 	     ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16383 	     INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16384 
16385   scan = emit_label_after (gen_label_rtx (), scan);
16386   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16387   scan = emit_label_after (minipool_vector_label, scan);
16388 
16389   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16390     {
16391       if (mp->refcount > 0)
16392 	{
16393 	  if (dump_file)
16394 	    {
16395 	      fprintf (dump_file,
16396 		       ";;  Offset %u, min %ld, max %ld ",
16397 		       (unsigned) mp->offset, (unsigned long) mp->min_address,
16398 		       (unsigned long) mp->max_address);
16399 	      arm_print_value (dump_file, mp->value);
16400 	      fputc ('\n', dump_file);
16401 	    }
16402 
16403 	  rtx val = copy_rtx (mp->value);
16404 
16405 	  switch (GET_MODE_SIZE (mp->mode))
16406 	    {
16407 #ifdef HAVE_consttable_1
16408 	    case 1:
16409 	      scan = emit_insn_after (gen_consttable_1 (val), scan);
16410 	      break;
16411 
16412 #endif
16413 #ifdef HAVE_consttable_2
16414 	    case 2:
16415 	      scan = emit_insn_after (gen_consttable_2 (val), scan);
16416 	      break;
16417 
16418 #endif
16419 #ifdef HAVE_consttable_4
16420 	    case 4:
16421 	      scan = emit_insn_after (gen_consttable_4 (val), scan);
16422 	      break;
16423 
16424 #endif
16425 #ifdef HAVE_consttable_8
16426 	    case 8:
16427 	      scan = emit_insn_after (gen_consttable_8 (val), scan);
16428 	      break;
16429 
16430 #endif
16431 #ifdef HAVE_consttable_16
16432 	    case 16:
16433               scan = emit_insn_after (gen_consttable_16 (val), scan);
16434               break;
16435 
16436 #endif
16437 	    default:
16438 	      gcc_unreachable ();
16439 	    }
16440 	}
16441 
16442       nmp = mp->next;
16443       free (mp);
16444     }
16445 
16446   minipool_vector_head = minipool_vector_tail = NULL;
16447   scan = emit_insn_after (gen_consttable_end (), scan);
16448   scan = emit_barrier_after (scan);
16449 }
16450 
16451 /* Return the cost of forcibly inserting a barrier after INSN.  */
16452 static int
arm_barrier_cost(rtx_insn * insn)16453 arm_barrier_cost (rtx_insn *insn)
16454 {
16455   /* Basing the location of the pool on the loop depth is preferable,
16456      but at the moment, the basic block information seems to be
16457      corrupt by this stage of the compilation.  */
16458   int base_cost = 50;
16459   rtx_insn *next = next_nonnote_insn (insn);
16460 
16461   if (next != NULL && LABEL_P (next))
16462     base_cost -= 20;
16463 
16464   switch (GET_CODE (insn))
16465     {
16466     case CODE_LABEL:
16467       /* It will always be better to place the table before the label, rather
16468 	 than after it.  */
16469       return 50;
16470 
16471     case INSN:
16472     case CALL_INSN:
16473       return base_cost;
16474 
16475     case JUMP_INSN:
16476       return base_cost - 10;
16477 
16478     default:
16479       return base_cost + 10;
16480     }
16481 }
16482 
16483 /* Find the best place in the insn stream in the range
16484    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16485    Create the barrier by inserting a jump and add a new fix entry for
16486    it.  */
16487 static Mfix *
create_fix_barrier(Mfix * fix,HOST_WIDE_INT max_address)16488 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16489 {
16490   HOST_WIDE_INT count = 0;
16491   rtx_barrier *barrier;
16492   rtx_insn *from = fix->insn;
16493   /* The instruction after which we will insert the jump.  */
16494   rtx_insn *selected = NULL;
16495   int selected_cost;
16496   /* The address at which the jump instruction will be placed.  */
16497   HOST_WIDE_INT selected_address;
16498   Mfix * new_fix;
16499   HOST_WIDE_INT max_count = max_address - fix->address;
16500   rtx_code_label *label = gen_label_rtx ();
16501 
16502   selected_cost = arm_barrier_cost (from);
16503   selected_address = fix->address;
16504 
16505   while (from && count < max_count)
16506     {
16507       rtx_jump_table_data *tmp;
16508       int new_cost;
16509 
16510       /* This code shouldn't have been called if there was a natural barrier
16511 	 within range.  */
16512       gcc_assert (!BARRIER_P (from));
16513 
16514       /* Count the length of this insn.  This must stay in sync with the
16515 	 code that pushes minipool fixes.  */
16516       if (LABEL_P (from))
16517 	count += get_label_padding (from);
16518       else
16519 	count += get_attr_length (from);
16520 
16521       /* If there is a jump table, add its length.  */
16522       if (tablejump_p (from, NULL, &tmp))
16523 	{
16524 	  count += get_jump_table_size (tmp);
16525 
16526 	  /* Jump tables aren't in a basic block, so base the cost on
16527 	     the dispatch insn.  If we select this location, we will
16528 	     still put the pool after the table.  */
16529 	  new_cost = arm_barrier_cost (from);
16530 
16531 	  if (count < max_count
16532 	      && (!selected || new_cost <= selected_cost))
16533 	    {
16534 	      selected = tmp;
16535 	      selected_cost = new_cost;
16536 	      selected_address = fix->address + count;
16537 	    }
16538 
16539 	  /* Continue after the dispatch table.  */
16540 	  from = NEXT_INSN (tmp);
16541 	  continue;
16542 	}
16543 
16544       new_cost = arm_barrier_cost (from);
16545 
16546       if (count < max_count
16547 	  && (!selected || new_cost <= selected_cost))
16548 	{
16549 	  selected = from;
16550 	  selected_cost = new_cost;
16551 	  selected_address = fix->address + count;
16552 	}
16553 
16554       from = NEXT_INSN (from);
16555     }
16556 
16557   /* Make sure that we found a place to insert the jump.  */
16558   gcc_assert (selected);
16559 
16560   /* Create a new JUMP_INSN that branches around a barrier.  */
16561   from = emit_jump_insn_after (gen_jump (label), selected);
16562   JUMP_LABEL (from) = label;
16563   barrier = emit_barrier_after (from);
16564   emit_label_after (label, barrier);
16565 
16566   /* Create a minipool barrier entry for the new barrier.  */
16567   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16568   new_fix->insn = barrier;
16569   new_fix->address = selected_address;
16570   new_fix->next = fix->next;
16571   fix->next = new_fix;
16572 
16573   return new_fix;
16574 }
16575 
16576 /* Record that there is a natural barrier in the insn stream at
16577    ADDRESS.  */
16578 static void
push_minipool_barrier(rtx_insn * insn,HOST_WIDE_INT address)16579 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16580 {
16581   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16582 
16583   fix->insn = insn;
16584   fix->address = address;
16585 
16586   fix->next = NULL;
16587   if (minipool_fix_head != NULL)
16588     minipool_fix_tail->next = fix;
16589   else
16590     minipool_fix_head = fix;
16591 
16592   minipool_fix_tail = fix;
16593 }
16594 
16595 /* Record INSN, which will need fixing up to load a value from the
16596    minipool.  ADDRESS is the offset of the insn since the start of the
16597    function; LOC is a pointer to the part of the insn which requires
16598    fixing; VALUE is the constant that must be loaded, which is of type
16599    MODE.  */
16600 static void
push_minipool_fix(rtx_insn * insn,HOST_WIDE_INT address,rtx * loc,machine_mode mode,rtx value)16601 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16602 		   machine_mode mode, rtx value)
16603 {
16604   gcc_assert (!arm_disable_literal_pool);
16605   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16606 
16607   fix->insn = insn;
16608   fix->address = address;
16609   fix->loc = loc;
16610   fix->mode = mode;
16611   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16612   fix->value = value;
16613   fix->forwards = get_attr_pool_range (insn);
16614   fix->backwards = get_attr_neg_pool_range (insn);
16615   fix->minipool = NULL;
16616 
16617   /* If an insn doesn't have a range defined for it, then it isn't
16618      expecting to be reworked by this code.  Better to stop now than
16619      to generate duff assembly code.  */
16620   gcc_assert (fix->forwards || fix->backwards);
16621 
16622   /* If an entry requires 8-byte alignment then assume all constant pools
16623      require 4 bytes of padding.  Trying to do this later on a per-pool
16624      basis is awkward because existing pool entries have to be modified.  */
16625   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16626     minipool_pad = 4;
16627 
16628   if (dump_file)
16629     {
16630       fprintf (dump_file,
16631 	       ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16632 	       GET_MODE_NAME (mode),
16633 	       INSN_UID (insn), (unsigned long) address,
16634 	       -1 * (long)fix->backwards, (long)fix->forwards);
16635       arm_print_value (dump_file, fix->value);
16636       fprintf (dump_file, "\n");
16637     }
16638 
16639   /* Add it to the chain of fixes.  */
16640   fix->next = NULL;
16641 
16642   if (minipool_fix_head != NULL)
16643     minipool_fix_tail->next = fix;
16644   else
16645     minipool_fix_head = fix;
16646 
16647   minipool_fix_tail = fix;
16648 }
16649 
16650 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16651    Returns the number of insns needed, or 99 if we always want to synthesize
16652    the value.  */
16653 int
arm_max_const_double_inline_cost()16654 arm_max_const_double_inline_cost ()
16655 {
16656   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16657 }
16658 
16659 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16660    Returns the number of insns needed, or 99 if we don't know how to
16661    do it.  */
16662 int
arm_const_double_inline_cost(rtx val)16663 arm_const_double_inline_cost (rtx val)
16664 {
16665   rtx lowpart, highpart;
16666   machine_mode mode;
16667 
16668   mode = GET_MODE (val);
16669 
16670   if (mode == VOIDmode)
16671     mode = DImode;
16672 
16673   gcc_assert (GET_MODE_SIZE (mode) == 8);
16674 
16675   lowpart = gen_lowpart (SImode, val);
16676   highpart = gen_highpart_mode (SImode, mode, val);
16677 
16678   gcc_assert (CONST_INT_P (lowpart));
16679   gcc_assert (CONST_INT_P (highpart));
16680 
16681   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16682 			    NULL_RTX, NULL_RTX, 0, 0)
16683 	  + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16684 			      NULL_RTX, NULL_RTX, 0, 0));
16685 }
16686 
16687 /* Cost of loading a SImode constant.  */
16688 static inline int
arm_const_inline_cost(enum rtx_code code,rtx val)16689 arm_const_inline_cost (enum rtx_code code, rtx val)
16690 {
16691   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16692                            NULL_RTX, NULL_RTX, 1, 0);
16693 }
16694 
16695 /* Return true if it is worthwhile to split a 64-bit constant into two
16696    32-bit operations.  This is the case if optimizing for size, or
16697    if we have load delay slots, or if one 32-bit part can be done with
16698    a single data operation.  */
16699 bool
arm_const_double_by_parts(rtx val)16700 arm_const_double_by_parts (rtx val)
16701 {
16702   machine_mode mode = GET_MODE (val);
16703   rtx part;
16704 
16705   if (optimize_size || arm_ld_sched)
16706     return true;
16707 
16708   if (mode == VOIDmode)
16709     mode = DImode;
16710 
16711   part = gen_highpart_mode (SImode, mode, val);
16712 
16713   gcc_assert (CONST_INT_P (part));
16714 
16715   if (const_ok_for_arm (INTVAL (part))
16716       || const_ok_for_arm (~INTVAL (part)))
16717     return true;
16718 
16719   part = gen_lowpart (SImode, val);
16720 
16721   gcc_assert (CONST_INT_P (part));
16722 
16723   if (const_ok_for_arm (INTVAL (part))
16724       || const_ok_for_arm (~INTVAL (part)))
16725     return true;
16726 
16727   return false;
16728 }
16729 
16730 /* Return true if it is possible to inline both the high and low parts
16731    of a 64-bit constant into 32-bit data processing instructions.  */
16732 bool
arm_const_double_by_immediates(rtx val)16733 arm_const_double_by_immediates (rtx val)
16734 {
16735   machine_mode mode = GET_MODE (val);
16736   rtx part;
16737 
16738   if (mode == VOIDmode)
16739     mode = DImode;
16740 
16741   part = gen_highpart_mode (SImode, mode, val);
16742 
16743   gcc_assert (CONST_INT_P (part));
16744 
16745   if (!const_ok_for_arm (INTVAL (part)))
16746     return false;
16747 
16748   part = gen_lowpart (SImode, val);
16749 
16750   gcc_assert (CONST_INT_P (part));
16751 
16752   if (!const_ok_for_arm (INTVAL (part)))
16753     return false;
16754 
16755   return true;
16756 }
16757 
16758 /* Scan INSN and note any of its operands that need fixing.
16759    If DO_PUSHES is false we do not actually push any of the fixups
16760    needed.  */
16761 static void
note_invalid_constants(rtx_insn * insn,HOST_WIDE_INT address,int do_pushes)16762 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16763 {
16764   int opno;
16765 
16766   extract_constrain_insn (insn);
16767 
16768   if (recog_data.n_alternatives == 0)
16769     return;
16770 
16771   /* Fill in recog_op_alt with information about the constraints of
16772      this insn.  */
16773   preprocess_constraints (insn);
16774 
16775   const operand_alternative *op_alt = which_op_alt ();
16776   for (opno = 0; opno < recog_data.n_operands; opno++)
16777     {
16778       /* Things we need to fix can only occur in inputs.  */
16779       if (recog_data.operand_type[opno] != OP_IN)
16780 	continue;
16781 
16782       /* If this alternative is a memory reference, then any mention
16783 	 of constants in this alternative is really to fool reload
16784 	 into allowing us to accept one there.  We need to fix them up
16785 	 now so that we output the right code.  */
16786       if (op_alt[opno].memory_ok)
16787 	{
16788 	  rtx op = recog_data.operand[opno];
16789 
16790 	  if (CONSTANT_P (op))
16791 	    {
16792 	      if (do_pushes)
16793 		push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16794 				   recog_data.operand_mode[opno], op);
16795 	    }
16796 	  else if (MEM_P (op)
16797 		   && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16798 		   && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16799 	    {
16800 	      if (do_pushes)
16801 		{
16802 		  rtx cop = avoid_constant_pool_reference (op);
16803 
16804 		  /* Casting the address of something to a mode narrower
16805 		     than a word can cause avoid_constant_pool_reference()
16806 		     to return the pool reference itself.  That's no good to
16807 		     us here.  Lets just hope that we can use the
16808 		     constant pool value directly.  */
16809 		  if (op == cop)
16810 		    cop = get_pool_constant (XEXP (op, 0));
16811 
16812 		  push_minipool_fix (insn, address,
16813 				     recog_data.operand_loc[opno],
16814 				     recog_data.operand_mode[opno], cop);
16815 		}
16816 
16817 	    }
16818 	}
16819     }
16820 
16821   return;
16822 }
16823 
16824 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16825    and unions in the context of ARMv8-M Security Extensions.  It is used as a
16826    helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16827    functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16828    or four masks, depending on whether it is being computed for a
16829    'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16830    respectively.  The tree for the type of the argument or a field within an
16831    argument is passed in ARG_TYPE, the current register this argument or field
16832    starts in is kept in the pointer REGNO and updated accordingly, the bit this
16833    argument or field starts at is passed in STARTING_BIT and the last used bit
16834    is kept in LAST_USED_BIT which is also updated accordingly.  */
16835 
16836 static unsigned HOST_WIDE_INT
comp_not_to_clear_mask_str_un(tree arg_type,int * regno,uint32_t * padding_bits_to_clear,unsigned starting_bit,int * last_used_bit)16837 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16838 			       uint32_t * padding_bits_to_clear,
16839 			       unsigned starting_bit, int * last_used_bit)
16840 
16841 {
16842   unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16843 
16844   if (TREE_CODE (arg_type) == RECORD_TYPE)
16845     {
16846       unsigned current_bit = starting_bit;
16847       tree field;
16848       long int offset, size;
16849 
16850 
16851       field = TYPE_FIELDS (arg_type);
16852       while (field)
16853 	{
16854 	  /* The offset within a structure is always an offset from
16855 	     the start of that structure.  Make sure we take that into the
16856 	     calculation of the register based offset that we use here.  */
16857 	  offset = starting_bit;
16858 	  offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16859 	  offset %= 32;
16860 
16861 	  /* This is the actual size of the field, for bitfields this is the
16862 	     bitfield width and not the container size.  */
16863 	  size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16864 
16865 	  if (*last_used_bit != offset)
16866 	    {
16867 	      if (offset < *last_used_bit)
16868 		{
16869 		  /* This field's offset is before the 'last_used_bit', that
16870 		     means this field goes on the next register.  So we need to
16871 		     pad the rest of the current register and increase the
16872 		     register number.  */
16873 		  uint32_t mask;
16874 		  mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16875 		  mask++;
16876 
16877 		  padding_bits_to_clear[*regno] |= mask;
16878 		  not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16879 		  (*regno)++;
16880 		}
16881 	      else
16882 		{
16883 		  /* Otherwise we pad the bits between the last field's end and
16884 		     the start of the new field.  */
16885 		  uint32_t mask;
16886 
16887 		  mask = ((uint32_t)-1) >> (32 - offset);
16888 		  mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16889 		  padding_bits_to_clear[*regno] |= mask;
16890 		}
16891 	      current_bit = offset;
16892 	    }
16893 
16894 	  /* Calculate further padding bits for inner structs/unions too.  */
16895 	  if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16896 	    {
16897 	      *last_used_bit = current_bit;
16898 	      not_to_clear_reg_mask
16899 		|= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16900 						  padding_bits_to_clear, offset,
16901 						  last_used_bit);
16902 	    }
16903 	  else
16904 	    {
16905 	      /* Update 'current_bit' with this field's size.  If the
16906 		 'current_bit' lies in a subsequent register, update 'regno' and
16907 		 reset 'current_bit' to point to the current bit in that new
16908 		 register.  */
16909 	      current_bit += size;
16910 	      while (current_bit >= 32)
16911 		{
16912 		  current_bit-=32;
16913 		  not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16914 		  (*regno)++;
16915 		}
16916 	      *last_used_bit = current_bit;
16917 	    }
16918 
16919 	  field = TREE_CHAIN (field);
16920 	}
16921       not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16922     }
16923   else if (TREE_CODE (arg_type) == UNION_TYPE)
16924     {
16925       tree field, field_t;
16926       int i, regno_t, field_size;
16927       int max_reg = -1;
16928       int max_bit = -1;
16929       uint32_t mask;
16930       uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16931 	= {-1, -1, -1, -1};
16932 
16933       /* To compute the padding bits in a union we only consider bits as
16934 	 padding bits if they are always either a padding bit or fall outside a
16935 	 fields size for all fields in the union.  */
16936       field = TYPE_FIELDS (arg_type);
16937       while (field)
16938 	{
16939 	  uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16940 	    = {0U, 0U, 0U, 0U};
16941 	  int last_used_bit_t = *last_used_bit;
16942 	  regno_t = *regno;
16943 	  field_t = TREE_TYPE (field);
16944 
16945 	  /* If the field's type is either a record or a union make sure to
16946 	     compute their padding bits too.  */
16947 	  if (RECORD_OR_UNION_TYPE_P (field_t))
16948 	    not_to_clear_reg_mask
16949 	      |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16950 						&padding_bits_to_clear_t[0],
16951 						starting_bit, &last_used_bit_t);
16952 	  else
16953 	    {
16954 	      field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16955 	      regno_t = (field_size / 32) + *regno;
16956 	      last_used_bit_t = (starting_bit + field_size) % 32;
16957 	    }
16958 
16959 	  for (i = *regno; i < regno_t; i++)
16960 	    {
16961 	      /* For all but the last register used by this field only keep the
16962 		 padding bits that were padding bits in this field.  */
16963 	      padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16964 	    }
16965 
16966 	    /* For the last register, keep all padding bits that were padding
16967 	       bits in this field and any padding bits that are still valid
16968 	       as padding bits but fall outside of this field's size.  */
16969 	    mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16970 	    padding_bits_to_clear_res[regno_t]
16971 	      &= padding_bits_to_clear_t[regno_t] | mask;
16972 
16973 	  /* Update the maximum size of the fields in terms of registers used
16974 	     ('max_reg') and the 'last_used_bit' in said register.  */
16975 	  if (max_reg < regno_t)
16976 	    {
16977 	      max_reg = regno_t;
16978 	      max_bit = last_used_bit_t;
16979 	    }
16980 	  else if (max_reg == regno_t && max_bit < last_used_bit_t)
16981 	    max_bit = last_used_bit_t;
16982 
16983 	  field = TREE_CHAIN (field);
16984 	}
16985 
16986       /* Update the current padding_bits_to_clear using the intersection of the
16987 	 padding bits of all the fields.  */
16988       for (i=*regno; i < max_reg; i++)
16989 	padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16990 
16991       /* Do not keep trailing padding bits, we do not know yet whether this
16992 	 is the end of the argument.  */
16993       mask = ((uint32_t) 1 << max_bit) - 1;
16994       padding_bits_to_clear[max_reg]
16995 	|= padding_bits_to_clear_res[max_reg] & mask;
16996 
16997       *regno = max_reg;
16998       *last_used_bit = max_bit;
16999     }
17000   else
17001     /* This function should only be used for structs and unions.  */
17002     gcc_unreachable ();
17003 
17004   return not_to_clear_reg_mask;
17005 }
17006 
17007 /* In the context of ARMv8-M Security Extensions, this function is used for both
17008    'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17009    registers are used when returning or passing arguments, which is then
17010    returned as a mask.  It will also compute a mask to indicate padding/unused
17011    bits for each of these registers, and passes this through the
17012    PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
17013    ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17014    the starting register used to pass this argument or return value is passed
17015    in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17016    for struct and union types.  */
17017 
17018 static unsigned HOST_WIDE_INT
compute_not_to_clear_mask(tree arg_type,rtx arg_rtx,int regno,uint32_t * padding_bits_to_clear)17019 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17020 			     uint32_t * padding_bits_to_clear)
17021 
17022 {
17023   int last_used_bit = 0;
17024   unsigned HOST_WIDE_INT not_to_clear_mask;
17025 
17026   if (RECORD_OR_UNION_TYPE_P (arg_type))
17027     {
17028       not_to_clear_mask
17029 	= comp_not_to_clear_mask_str_un (arg_type, &regno,
17030 					 padding_bits_to_clear, 0,
17031 					 &last_used_bit);
17032 
17033 
17034       /* If the 'last_used_bit' is not zero, that means we are still using a
17035 	 part of the last 'regno'.  In such cases we must clear the trailing
17036 	 bits.  Otherwise we are not using regno and we should mark it as to
17037 	 clear.  */
17038       if (last_used_bit != 0)
17039 	padding_bits_to_clear[regno]
17040 	  |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17041       else
17042 	not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17043     }
17044   else
17045     {
17046       not_to_clear_mask = 0;
17047       /* We are not dealing with structs nor unions.  So these arguments may be
17048 	 passed in floating point registers too.  In some cases a BLKmode is
17049 	 used when returning or passing arguments in multiple VFP registers.  */
17050       if (GET_MODE (arg_rtx) == BLKmode)
17051 	{
17052 	  int i, arg_regs;
17053 	  rtx reg;
17054 
17055 	  /* This should really only occur when dealing with the hard-float
17056 	     ABI.  */
17057 	  gcc_assert (TARGET_HARD_FLOAT_ABI);
17058 
17059 	  for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17060 	    {
17061 	      reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17062 	      gcc_assert (REG_P (reg));
17063 
17064 	      not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17065 
17066 	      /* If we are dealing with DF mode, make sure we don't
17067 		 clear either of the registers it addresses.  */
17068 	      arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17069 	      if (arg_regs > 1)
17070 		{
17071 		  unsigned HOST_WIDE_INT mask;
17072 		  mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17073 		  mask -= HOST_WIDE_INT_1U << REGNO (reg);
17074 		  not_to_clear_mask |= mask;
17075 		}
17076 	    }
17077 	}
17078       else
17079 	{
17080 	  /* Otherwise we can rely on the MODE to determine how many registers
17081 	     are being used by this argument.  */
17082 	  int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17083 	  not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17084 	  if (arg_regs > 1)
17085 	    {
17086 	      unsigned HOST_WIDE_INT
17087 	      mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17088 	      mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17089 	      not_to_clear_mask |= mask;
17090 	    }
17091 	}
17092     }
17093 
17094   return not_to_clear_mask;
17095 }
17096 
17097 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17098    a cmse_nonsecure_entry function.  TO_CLEAR_BITMAP indicates which registers
17099    are to be fully cleared, using the value in register CLEARING_REG if more
17100    efficient.  The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17101    the bits that needs to be cleared in caller-saved core registers, with
17102    SCRATCH_REG used as a scratch register for that clearing.
17103 
17104    NOTE: one of three following assertions must hold:
17105    - SCRATCH_REG is a low register
17106    - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17107      in TO_CLEAR_BITMAP)
17108    - CLEARING_REG is a low register.  */
17109 
17110 static void
cmse_clear_registers(sbitmap to_clear_bitmap,uint32_t * padding_bits_to_clear,int padding_bits_len,rtx scratch_reg,rtx clearing_reg)17111 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17112 		      int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17113 {
17114   bool saved_clearing = false;
17115   rtx saved_clearing_reg = NULL_RTX;
17116   int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17117 
17118   gcc_assert (arm_arch_cmse);
17119 
17120   if (!bitmap_empty_p (to_clear_bitmap))
17121     {
17122       minregno = bitmap_first_set_bit (to_clear_bitmap);
17123       maxregno = bitmap_last_set_bit (to_clear_bitmap);
17124     }
17125   clearing_regno = REGNO (clearing_reg);
17126 
17127   /* Clear padding bits.  */
17128   gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17129   for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17130     {
17131       uint64_t mask;
17132       rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17133 
17134       if (padding_bits_to_clear[i] == 0)
17135 	continue;
17136 
17137       /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17138 	 CLEARING_REG as scratch.  */
17139       if (TARGET_THUMB1
17140 	  && REGNO (scratch_reg) > LAST_LO_REGNUM)
17141 	{
17142 	  /* clearing_reg is not to be cleared, copy its value into scratch_reg
17143 	     such that we can use clearing_reg to clear the unused bits in the
17144 	     arguments.  */
17145 	  if ((clearing_regno > maxregno
17146 	       || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17147 	      && !saved_clearing)
17148 	    {
17149 	      gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17150 	      emit_move_insn (scratch_reg, clearing_reg);
17151 	      saved_clearing = true;
17152 	      saved_clearing_reg = scratch_reg;
17153 	    }
17154 	  scratch_reg = clearing_reg;
17155 	}
17156 
17157       /* Fill the lower half of the negated padding_bits_to_clear[i].  */
17158       mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17159       emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17160 
17161       /* Fill the top half of the negated padding_bits_to_clear[i].  */
17162       mask = (~padding_bits_to_clear[i]) >> 16;
17163       rtx16 = gen_int_mode (16, SImode);
17164       dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17165       if (mask)
17166 	emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17167 
17168       emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17169     }
17170   if (saved_clearing)
17171     emit_move_insn (clearing_reg, saved_clearing_reg);
17172 
17173 
17174   /* Clear full registers.  */
17175 
17176   /* If not marked for clearing, clearing_reg already does not contain
17177      any secret.  */
17178   if (clearing_regno <= maxregno
17179       && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17180     {
17181       emit_move_insn (clearing_reg, const0_rtx);
17182       emit_use (clearing_reg);
17183       bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17184     }
17185 
17186   for (regno = minregno; regno <= maxregno; regno++)
17187     {
17188       if (!bitmap_bit_p (to_clear_bitmap, regno))
17189 	continue;
17190 
17191       if (IS_VFP_REGNUM (regno))
17192 	{
17193 	  /* If regno is an even vfp register and its successor is also to
17194 	     be cleared, use vmov.  */
17195 	  if (TARGET_VFP_DOUBLE
17196 	      && VFP_REGNO_OK_FOR_DOUBLE (regno)
17197 	      && bitmap_bit_p (to_clear_bitmap, regno + 1))
17198 	    {
17199 	      emit_move_insn (gen_rtx_REG (DFmode, regno),
17200 			      CONST1_RTX (DFmode));
17201 	      emit_use (gen_rtx_REG (DFmode, regno));
17202 	      regno++;
17203 	    }
17204 	  else
17205 	    {
17206 	      emit_move_insn (gen_rtx_REG (SFmode, regno),
17207 			      CONST1_RTX (SFmode));
17208 	      emit_use (gen_rtx_REG (SFmode, regno));
17209 	    }
17210 	}
17211       else
17212 	{
17213 	  emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17214 	  emit_use (gen_rtx_REG (SImode, regno));
17215 	}
17216     }
17217 }
17218 
17219 /* Clears caller saved registers not used to pass arguments before a
17220    cmse_nonsecure_call.  Saving, clearing and restoring of callee saved
17221    registers is done in __gnu_cmse_nonsecure_call libcall.
17222    See libgcc/config/arm/cmse_nonsecure_call.S.  */
17223 
17224 static void
cmse_nonsecure_call_clear_caller_saved(void)17225 cmse_nonsecure_call_clear_caller_saved (void)
17226 {
17227   basic_block bb;
17228 
17229   FOR_EACH_BB_FN (bb, cfun)
17230     {
17231       rtx_insn *insn;
17232 
17233       FOR_BB_INSNS (bb, insn)
17234 	{
17235 	  unsigned address_regnum, regno, maxregno =
17236 	    TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17237 	  auto_sbitmap to_clear_bitmap (maxregno + 1);
17238 	  rtx_insn *seq;
17239 	  rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17240 	  rtx address;
17241 	  CUMULATIVE_ARGS args_so_far_v;
17242 	  cumulative_args_t args_so_far;
17243 	  tree arg_type, fntype;
17244 	  bool first_param = true;
17245 	  function_args_iterator args_iter;
17246 	  uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17247 
17248 	  if (!NONDEBUG_INSN_P (insn))
17249 	    continue;
17250 
17251 	  if (!CALL_P (insn))
17252 	    continue;
17253 
17254 	  pat = PATTERN (insn);
17255 	  gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17256 	  call = XVECEXP (pat, 0, 0);
17257 
17258 	  /* Get the real call RTX if the insn sets a value, ie. returns.  */
17259 	  if (GET_CODE (call) == SET)
17260 	      call = SET_SRC (call);
17261 
17262 	  /* Check if it is a cmse_nonsecure_call.  */
17263 	  unspec = XEXP (call, 0);
17264 	  if (GET_CODE (unspec) != UNSPEC
17265 	      || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17266 	    continue;
17267 
17268 	  /* Determine the caller-saved registers we need to clear.  */
17269 	  bitmap_clear (to_clear_bitmap);
17270 	  bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17271 
17272 	  /* Only look at the caller-saved floating point registers in case of
17273 	     -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
17274 	     lazy store and loads which clear both caller- and callee-saved
17275 	     registers.  */
17276 	  if (TARGET_HARD_FLOAT_ABI)
17277 	    {
17278 	      auto_sbitmap float_bitmap (maxregno + 1);
17279 
17280 	      bitmap_clear (float_bitmap);
17281 	      bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17282 				D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17283 	      bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17284 	    }
17285 
17286 	  /* Make sure the register used to hold the function address is not
17287 	     cleared.  */
17288 	  address = RTVEC_ELT (XVEC (unspec, 0), 0);
17289 	  gcc_assert (MEM_P (address));
17290 	  gcc_assert (REG_P (XEXP (address, 0)));
17291 	  address_regnum = REGNO (XEXP (address, 0));
17292 	  if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17293 	    bitmap_clear_bit (to_clear_bitmap, address_regnum);
17294 
17295 	  /* Set basic block of call insn so that df rescan is performed on
17296 	     insns inserted here.  */
17297 	  set_block_for_insn (insn, bb);
17298 	  df_set_flags (DF_DEFER_INSN_RESCAN);
17299 	  start_sequence ();
17300 
17301 	  /* Make sure the scheduler doesn't schedule other insns beyond
17302 	     here.  */
17303 	  emit_insn (gen_blockage ());
17304 
17305 	  /* Walk through all arguments and clear registers appropriately.
17306 	  */
17307 	  fntype = TREE_TYPE (MEM_EXPR (address));
17308 	  arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17309 				    NULL_TREE);
17310 	  args_so_far = pack_cumulative_args (&args_so_far_v);
17311 	  FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17312 	    {
17313 	      rtx arg_rtx;
17314 	      uint64_t to_clear_args_mask;
17315 	      machine_mode arg_mode = TYPE_MODE (arg_type);
17316 
17317 	      if (VOID_TYPE_P (arg_type))
17318 		continue;
17319 
17320 	      if (!first_param)
17321 		arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17322 					  true);
17323 
17324 	      arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17325 					  true);
17326 	      gcc_assert (REG_P (arg_rtx));
17327 	      to_clear_args_mask
17328 		= compute_not_to_clear_mask (arg_type, arg_rtx,
17329 					     REGNO (arg_rtx),
17330 					     &padding_bits_to_clear[0]);
17331 	      if (to_clear_args_mask)
17332 		{
17333 		  for (regno = R0_REGNUM; regno <= maxregno; regno++)
17334 		    {
17335 		      if (to_clear_args_mask & (1ULL << regno))
17336 			bitmap_clear_bit (to_clear_bitmap, regno);
17337 		    }
17338 		}
17339 
17340 	      first_param = false;
17341 	    }
17342 
17343 	  /* We use right shift and left shift to clear the LSB of the address
17344 	     we jump to instead of using bic, to avoid having to use an extra
17345 	     register on Thumb-1.  */
17346 	  clearing_reg = XEXP (address, 0);
17347 	  shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17348 	  emit_insn (gen_rtx_SET (clearing_reg, shift));
17349 	  shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17350 	  emit_insn (gen_rtx_SET (clearing_reg, shift));
17351 
17352 	  /* Clear caller-saved registers that leak before doing a non-secure
17353 	     call.  */
17354 	  ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17355 	  cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17356 				NUM_ARG_REGS, ip_reg, clearing_reg);
17357 
17358 	  seq = get_insns ();
17359 	  end_sequence ();
17360 	  emit_insn_before (seq, insn);
17361 	}
17362     }
17363 }
17364 
17365 /* Rewrite move insn into subtract of 0 if the condition codes will
17366    be useful in next conditional jump insn.  */
17367 
17368 static void
thumb1_reorg(void)17369 thumb1_reorg (void)
17370 {
17371   basic_block bb;
17372 
17373   FOR_EACH_BB_FN (bb, cfun)
17374     {
17375       rtx dest, src;
17376       rtx cmp, op0, op1, set = NULL;
17377       rtx_insn *prev, *insn = BB_END (bb);
17378       bool insn_clobbered = false;
17379 
17380       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17381 	insn = PREV_INSN (insn);
17382 
17383       /* Find the last cbranchsi4_insn in basic block BB.  */
17384       if (insn == BB_HEAD (bb)
17385 	  || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17386 	continue;
17387 
17388       /* Get the register with which we are comparing.  */
17389       cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17390       op0 = XEXP (cmp, 0);
17391       op1 = XEXP (cmp, 1);
17392 
17393       /* Check that comparison is against ZERO.  */
17394       if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17395 	continue;
17396 
17397       /* Find the first flag setting insn before INSN in basic block BB.  */
17398       gcc_assert (insn != BB_HEAD (bb));
17399       for (prev = PREV_INSN (insn);
17400 	   (!insn_clobbered
17401 	    && prev != BB_HEAD (bb)
17402 	    && (NOTE_P (prev)
17403 		|| DEBUG_INSN_P (prev)
17404 		|| ((set = single_set (prev)) != NULL
17405 		    && get_attr_conds (prev) == CONDS_NOCOND)));
17406 	   prev = PREV_INSN (prev))
17407 	{
17408 	  if (reg_set_p (op0, prev))
17409 	    insn_clobbered = true;
17410 	}
17411 
17412       /* Skip if op0 is clobbered by insn other than prev. */
17413       if (insn_clobbered)
17414 	continue;
17415 
17416       if (!set)
17417 	continue;
17418 
17419       dest = SET_DEST (set);
17420       src = SET_SRC (set);
17421       if (!low_register_operand (dest, SImode)
17422 	  || !low_register_operand (src, SImode))
17423 	continue;
17424 
17425       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17426 	 in INSN.  Both src and dest of the move insn are checked.  */
17427       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17428 	{
17429 	  dest = copy_rtx (dest);
17430 	  src = copy_rtx (src);
17431 	  src = gen_rtx_MINUS (SImode, src, const0_rtx);
17432 	  PATTERN (prev) = gen_rtx_SET (dest, src);
17433 	  INSN_CODE (prev) = -1;
17434 	  /* Set test register in INSN to dest.  */
17435 	  XEXP (cmp, 0) = copy_rtx (dest);
17436 	  INSN_CODE (insn) = -1;
17437 	}
17438     }
17439 }
17440 
17441 /* Convert instructions to their cc-clobbering variant if possible, since
17442    that allows us to use smaller encodings.  */
17443 
17444 static void
thumb2_reorg(void)17445 thumb2_reorg (void)
17446 {
17447   basic_block bb;
17448   regset_head live;
17449 
17450   INIT_REG_SET (&live);
17451 
17452   /* We are freeing block_for_insn in the toplev to keep compatibility
17453      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17454   compute_bb_for_insn ();
17455   df_analyze ();
17456 
17457   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17458 
17459   FOR_EACH_BB_FN (bb, cfun)
17460     {
17461       if ((current_tune->disparage_flag_setting_t16_encodings
17462 	   == tune_params::DISPARAGE_FLAGS_ALL)
17463 	  && optimize_bb_for_speed_p (bb))
17464 	continue;
17465 
17466       rtx_insn *insn;
17467       Convert_Action action = SKIP;
17468       Convert_Action action_for_partial_flag_setting
17469 	= ((current_tune->disparage_flag_setting_t16_encodings
17470 	    != tune_params::DISPARAGE_FLAGS_NEITHER)
17471 	   && optimize_bb_for_speed_p (bb))
17472 	  ? SKIP : CONV;
17473 
17474       COPY_REG_SET (&live, DF_LR_OUT (bb));
17475       df_simulate_initialize_backwards (bb, &live);
17476       FOR_BB_INSNS_REVERSE (bb, insn)
17477 	{
17478 	  if (NONJUMP_INSN_P (insn)
17479 	      && !REGNO_REG_SET_P (&live, CC_REGNUM)
17480 	      && GET_CODE (PATTERN (insn)) == SET)
17481 	    {
17482 	      action = SKIP;
17483 	      rtx pat = PATTERN (insn);
17484 	      rtx dst = XEXP (pat, 0);
17485 	      rtx src = XEXP (pat, 1);
17486 	      rtx op0 = NULL_RTX, op1 = NULL_RTX;
17487 
17488 	      if (UNARY_P (src) || BINARY_P (src))
17489 		  op0 = XEXP (src, 0);
17490 
17491 	      if (BINARY_P (src))
17492 		  op1 = XEXP (src, 1);
17493 
17494 	      if (low_register_operand (dst, SImode))
17495 		{
17496 		  switch (GET_CODE (src))
17497 		    {
17498 		    case PLUS:
17499 		      /* Adding two registers and storing the result
17500 			 in the first source is already a 16-bit
17501 			 operation.  */
17502 		      if (rtx_equal_p (dst, op0)
17503 			  && register_operand (op1, SImode))
17504 			break;
17505 
17506 		      if (low_register_operand (op0, SImode))
17507 			{
17508 			  /* ADDS <Rd>,<Rn>,<Rm>  */
17509 			  if (low_register_operand (op1, SImode))
17510 			    action = CONV;
17511 			  /* ADDS <Rdn>,#<imm8>  */
17512 			  /* SUBS <Rdn>,#<imm8>  */
17513 			  else if (rtx_equal_p (dst, op0)
17514 				   && CONST_INT_P (op1)
17515 				   && IN_RANGE (INTVAL (op1), -255, 255))
17516 			    action = CONV;
17517 			  /* ADDS <Rd>,<Rn>,#<imm3>  */
17518 			  /* SUBS <Rd>,<Rn>,#<imm3>  */
17519 			  else if (CONST_INT_P (op1)
17520 				   && IN_RANGE (INTVAL (op1), -7, 7))
17521 			    action = CONV;
17522 			}
17523 		      /* ADCS <Rd>, <Rn>  */
17524 		      else if (GET_CODE (XEXP (src, 0)) == PLUS
17525 			      && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17526 			      && low_register_operand (XEXP (XEXP (src, 0), 1),
17527 						       SImode)
17528 			      && COMPARISON_P (op1)
17529 			      && cc_register (XEXP (op1, 0), VOIDmode)
17530 			      && maybe_get_arm_condition_code (op1) == ARM_CS
17531 			      && XEXP (op1, 1) == const0_rtx)
17532 		        action = CONV;
17533 		      break;
17534 
17535 		    case MINUS:
17536 		      /* RSBS <Rd>,<Rn>,#0
17537 			 Not handled here: see NEG below.  */
17538 		      /* SUBS <Rd>,<Rn>,#<imm3>
17539 			 SUBS <Rdn>,#<imm8>
17540 			 Not handled here: see PLUS above.  */
17541 		      /* SUBS <Rd>,<Rn>,<Rm>  */
17542 		      if (low_register_operand (op0, SImode)
17543 			  && low_register_operand (op1, SImode))
17544 			    action = CONV;
17545 		      break;
17546 
17547 		    case MULT:
17548 		      /* MULS <Rdm>,<Rn>,<Rdm>
17549 			 As an exception to the rule, this is only used
17550 			 when optimizing for size since MULS is slow on all
17551 			 known implementations.  We do not even want to use
17552 			 MULS in cold code, if optimizing for speed, so we
17553 			 test the global flag here.  */
17554 		      if (!optimize_size)
17555 			break;
17556 		      /* Fall through.  */
17557 		    case AND:
17558 		    case IOR:
17559 		    case XOR:
17560 		      /* ANDS <Rdn>,<Rm>  */
17561 		      if (rtx_equal_p (dst, op0)
17562 			  && low_register_operand (op1, SImode))
17563 			action = action_for_partial_flag_setting;
17564 		      else if (rtx_equal_p (dst, op1)
17565 			       && low_register_operand (op0, SImode))
17566 			action = action_for_partial_flag_setting == SKIP
17567 				 ? SKIP : SWAP_CONV;
17568 		      break;
17569 
17570 		    case ASHIFTRT:
17571 		    case ASHIFT:
17572 		    case LSHIFTRT:
17573 		      /* ASRS <Rdn>,<Rm> */
17574 		      /* LSRS <Rdn>,<Rm> */
17575 		      /* LSLS <Rdn>,<Rm> */
17576 		      if (rtx_equal_p (dst, op0)
17577 			  && low_register_operand (op1, SImode))
17578 			action = action_for_partial_flag_setting;
17579 		      /* ASRS <Rd>,<Rm>,#<imm5> */
17580 		      /* LSRS <Rd>,<Rm>,#<imm5> */
17581 		      /* LSLS <Rd>,<Rm>,#<imm5> */
17582 		      else if (low_register_operand (op0, SImode)
17583 			       && CONST_INT_P (op1)
17584 			       && IN_RANGE (INTVAL (op1), 0, 31))
17585 			action = action_for_partial_flag_setting;
17586 		      break;
17587 
17588 		    case ROTATERT:
17589 		      /* RORS <Rdn>,<Rm>  */
17590 		      if (rtx_equal_p (dst, op0)
17591 			  && low_register_operand (op1, SImode))
17592 			action = action_for_partial_flag_setting;
17593 		      break;
17594 
17595 		    case NOT:
17596 		      /* MVNS <Rd>,<Rm>  */
17597 		      if (low_register_operand (op0, SImode))
17598 			action = action_for_partial_flag_setting;
17599 		      break;
17600 
17601 		    case NEG:
17602 		      /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17603 		      if (low_register_operand (op0, SImode))
17604 			action = CONV;
17605 		      break;
17606 
17607 		    case CONST_INT:
17608 		      /* MOVS <Rd>,#<imm8>  */
17609 		      if (CONST_INT_P (src)
17610 			  && IN_RANGE (INTVAL (src), 0, 255))
17611 			action = action_for_partial_flag_setting;
17612 		      break;
17613 
17614 		    case REG:
17615 		      /* MOVS and MOV<c> with registers have different
17616 			 encodings, so are not relevant here.  */
17617 		      break;
17618 
17619 		    default:
17620 		      break;
17621 		    }
17622 		}
17623 
17624 	      if (action != SKIP)
17625 		{
17626 		  rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17627 		  rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17628 		  rtvec vec;
17629 
17630 		  if (action == SWAP_CONV)
17631 		    {
17632 		      src = copy_rtx (src);
17633 		      XEXP (src, 0) = op1;
17634 		      XEXP (src, 1) = op0;
17635 		      pat = gen_rtx_SET (dst, src);
17636 		      vec = gen_rtvec (2, pat, clobber);
17637 		    }
17638 		  else /* action == CONV */
17639 		    vec = gen_rtvec (2, pat, clobber);
17640 
17641 		  PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17642 		  INSN_CODE (insn) = -1;
17643 		}
17644 	    }
17645 
17646 	  if (NONDEBUG_INSN_P (insn))
17647 	    df_simulate_one_insn_backwards (bb, insn, &live);
17648 	}
17649     }
17650 
17651   CLEAR_REG_SET (&live);
17652 }
17653 
17654 /* Gcc puts the pool in the wrong place for ARM, since we can only
17655    load addresses a limited distance around the pc.  We do some
17656    special munging to move the constant pool values to the correct
17657    point in the code.  */
17658 static void
arm_reorg(void)17659 arm_reorg (void)
17660 {
17661   rtx_insn *insn;
17662   HOST_WIDE_INT address = 0;
17663   Mfix * fix;
17664 
17665   if (use_cmse)
17666     cmse_nonsecure_call_clear_caller_saved ();
17667 
17668   /* We cannot run the Thumb passes for thunks because there is no CFG.  */
17669   if (cfun->is_thunk)
17670     ;
17671   else if (TARGET_THUMB1)
17672     thumb1_reorg ();
17673   else if (TARGET_THUMB2)
17674     thumb2_reorg ();
17675 
17676   /* Ensure all insns that must be split have been split at this point.
17677      Otherwise, the pool placement code below may compute incorrect
17678      insn lengths.  Note that when optimizing, all insns have already
17679      been split at this point.  */
17680   if (!optimize)
17681     split_all_insns_noflow ();
17682 
17683   /* Make sure we do not attempt to create a literal pool even though it should
17684      no longer be necessary to create any.  */
17685   if (arm_disable_literal_pool)
17686     return ;
17687 
17688   minipool_fix_head = minipool_fix_tail = NULL;
17689 
17690   /* The first insn must always be a note, or the code below won't
17691      scan it properly.  */
17692   insn = get_insns ();
17693   gcc_assert (NOTE_P (insn));
17694   minipool_pad = 0;
17695 
17696   /* Scan all the insns and record the operands that will need fixing.  */
17697   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17698     {
17699       if (BARRIER_P (insn))
17700 	push_minipool_barrier (insn, address);
17701       else if (INSN_P (insn))
17702 	{
17703 	  rtx_jump_table_data *table;
17704 
17705 	  note_invalid_constants (insn, address, true);
17706 	  address += get_attr_length (insn);
17707 
17708 	  /* If the insn is a vector jump, add the size of the table
17709 	     and skip the table.  */
17710 	  if (tablejump_p (insn, NULL, &table))
17711 	    {
17712 	      address += get_jump_table_size (table);
17713 	      insn = table;
17714 	    }
17715 	}
17716       else if (LABEL_P (insn))
17717 	/* Add the worst-case padding due to alignment.  We don't add
17718 	   the _current_ padding because the minipool insertions
17719 	   themselves might change it.  */
17720 	address += get_label_padding (insn);
17721     }
17722 
17723   fix = minipool_fix_head;
17724 
17725   /* Now scan the fixups and perform the required changes.  */
17726   while (fix)
17727     {
17728       Mfix * ftmp;
17729       Mfix * fdel;
17730       Mfix *  last_added_fix;
17731       Mfix * last_barrier = NULL;
17732       Mfix * this_fix;
17733 
17734       /* Skip any further barriers before the next fix.  */
17735       while (fix && BARRIER_P (fix->insn))
17736 	fix = fix->next;
17737 
17738       /* No more fixes.  */
17739       if (fix == NULL)
17740 	break;
17741 
17742       last_added_fix = NULL;
17743 
17744       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17745 	{
17746 	  if (BARRIER_P (ftmp->insn))
17747 	    {
17748 	      if (ftmp->address >= minipool_vector_head->max_address)
17749 		break;
17750 
17751 	      last_barrier = ftmp;
17752 	    }
17753 	  else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17754 	    break;
17755 
17756 	  last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17757 	}
17758 
17759       /* If we found a barrier, drop back to that; any fixes that we
17760 	 could have reached but come after the barrier will now go in
17761 	 the next mini-pool.  */
17762       if (last_barrier != NULL)
17763 	{
17764 	  /* Reduce the refcount for those fixes that won't go into this
17765 	     pool after all.  */
17766 	  for (fdel = last_barrier->next;
17767 	       fdel && fdel != ftmp;
17768 	       fdel = fdel->next)
17769 	    {
17770 	      fdel->minipool->refcount--;
17771 	      fdel->minipool = NULL;
17772 	    }
17773 
17774 	  ftmp = last_barrier;
17775 	}
17776       else
17777         {
17778 	  /* ftmp is first fix that we can't fit into this pool and
17779 	     there no natural barriers that we could use.  Insert a
17780 	     new barrier in the code somewhere between the previous
17781 	     fix and this one, and arrange to jump around it.  */
17782 	  HOST_WIDE_INT max_address;
17783 
17784 	  /* The last item on the list of fixes must be a barrier, so
17785 	     we can never run off the end of the list of fixes without
17786 	     last_barrier being set.  */
17787 	  gcc_assert (ftmp);
17788 
17789 	  max_address = minipool_vector_head->max_address;
17790 	  /* Check that there isn't another fix that is in range that
17791 	     we couldn't fit into this pool because the pool was
17792 	     already too large: we need to put the pool before such an
17793 	     instruction.  The pool itself may come just after the
17794 	     fix because create_fix_barrier also allows space for a
17795 	     jump instruction.  */
17796 	  if (ftmp->address < max_address)
17797 	    max_address = ftmp->address + 1;
17798 
17799 	  last_barrier = create_fix_barrier (last_added_fix, max_address);
17800 	}
17801 
17802       assign_minipool_offsets (last_barrier);
17803 
17804       while (ftmp)
17805 	{
17806 	  if (!BARRIER_P (ftmp->insn)
17807 	      && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17808 		  == NULL))
17809 	    break;
17810 
17811 	  ftmp = ftmp->next;
17812 	}
17813 
17814       /* Scan over the fixes we have identified for this pool, fixing them
17815 	 up and adding the constants to the pool itself.  */
17816       for (this_fix = fix; this_fix && ftmp != this_fix;
17817 	   this_fix = this_fix->next)
17818 	if (!BARRIER_P (this_fix->insn))
17819 	  {
17820 	    rtx addr
17821 	      = plus_constant (Pmode,
17822 			       gen_rtx_LABEL_REF (VOIDmode,
17823 						  minipool_vector_label),
17824 			       this_fix->minipool->offset);
17825 	    *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17826 	  }
17827 
17828       dump_minipool (last_barrier->insn);
17829       fix = ftmp;
17830     }
17831 
17832   /* From now on we must synthesize any constants that we can't handle
17833      directly.  This can happen if the RTL gets split during final
17834      instruction generation.  */
17835   cfun->machine->after_arm_reorg = 1;
17836 
17837   /* Free the minipool memory.  */
17838   obstack_free (&minipool_obstack, minipool_startobj);
17839 }
17840 
17841 /* Routines to output assembly language.  */
17842 
17843 /* Return string representation of passed in real value.  */
17844 static const char *
fp_const_from_val(REAL_VALUE_TYPE * r)17845 fp_const_from_val (REAL_VALUE_TYPE *r)
17846 {
17847   if (!fp_consts_inited)
17848     init_fp_table ();
17849 
17850   gcc_assert (real_equal (r, &value_fp0));
17851   return "0";
17852 }
17853 
17854 /* OPERANDS[0] is the entire list of insns that constitute pop,
17855    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17856    is in the list, UPDATE is true iff the list contains explicit
17857    update of base register.  */
17858 void
arm_output_multireg_pop(rtx * operands,bool return_pc,rtx cond,bool reverse,bool update)17859 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17860                          bool update)
17861 {
17862   int i;
17863   char pattern[100];
17864   int offset;
17865   const char *conditional;
17866   int num_saves = XVECLEN (operands[0], 0);
17867   unsigned int regno;
17868   unsigned int regno_base = REGNO (operands[1]);
17869   bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17870 
17871   offset = 0;
17872   offset += update ? 1 : 0;
17873   offset += return_pc ? 1 : 0;
17874 
17875   /* Is the base register in the list?  */
17876   for (i = offset; i < num_saves; i++)
17877     {
17878       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17879       /* If SP is in the list, then the base register must be SP.  */
17880       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17881       /* If base register is in the list, there must be no explicit update.  */
17882       if (regno == regno_base)
17883         gcc_assert (!update);
17884     }
17885 
17886   conditional = reverse ? "%?%D0" : "%?%d0";
17887   /* Can't use POP if returning from an interrupt.  */
17888   if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17889     sprintf (pattern, "pop%s\t{", conditional);
17890   else
17891     {
17892       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17893          It's just a convention, their semantics are identical.  */
17894       if (regno_base == SP_REGNUM)
17895 	sprintf (pattern, "ldmfd%s\t", conditional);
17896       else if (update)
17897 	sprintf (pattern, "ldmia%s\t", conditional);
17898       else
17899 	sprintf (pattern, "ldm%s\t", conditional);
17900 
17901       strcat (pattern, reg_names[regno_base]);
17902       if (update)
17903         strcat (pattern, "!, {");
17904       else
17905         strcat (pattern, ", {");
17906     }
17907 
17908   /* Output the first destination register.  */
17909   strcat (pattern,
17910           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17911 
17912   /* Output the rest of the destination registers.  */
17913   for (i = offset + 1; i < num_saves; i++)
17914     {
17915       strcat (pattern, ", ");
17916       strcat (pattern,
17917               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17918     }
17919 
17920   strcat (pattern, "}");
17921 
17922   if (interrupt_p && return_pc)
17923     strcat (pattern, "^");
17924 
17925   output_asm_insn (pattern, &cond);
17926 }
17927 
17928 
17929 /* Output the assembly for a store multiple.  */
17930 
17931 const char *
vfp_output_vstmd(rtx * operands)17932 vfp_output_vstmd (rtx * operands)
17933 {
17934   char pattern[100];
17935   int p;
17936   int base;
17937   int i;
17938   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17939 		   ? XEXP (operands[0], 0)
17940 		   : XEXP (XEXP (operands[0], 0), 0);
17941   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17942 
17943   if (push_p)
17944     strcpy (pattern, "vpush%?.64\t{%P1");
17945   else
17946     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17947 
17948   p = strlen (pattern);
17949 
17950   gcc_assert (REG_P (operands[1]));
17951 
17952   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17953   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17954     {
17955       p += sprintf (&pattern[p], ", d%d", base + i);
17956     }
17957   strcpy (&pattern[p], "}");
17958 
17959   output_asm_insn (pattern, operands);
17960   return "";
17961 }
17962 
17963 
17964 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17965    number of bytes pushed.  */
17966 
17967 static int
vfp_emit_fstmd(int base_reg,int count)17968 vfp_emit_fstmd (int base_reg, int count)
17969 {
17970   rtx par;
17971   rtx dwarf;
17972   rtx tmp, reg;
17973   int i;
17974 
17975   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17976      register pairs are stored by a store multiple insn.  We avoid this
17977      by pushing an extra pair.  */
17978   if (count == 2 && !arm_arch6)
17979     {
17980       if (base_reg == LAST_VFP_REGNUM - 3)
17981 	base_reg -= 2;
17982       count++;
17983     }
17984 
17985   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17986      larger stores into multiple parts (up to a maximum of two, in
17987      practice).  */
17988   if (count > 16)
17989     {
17990       int saved;
17991       /* NOTE: base_reg is an internal register number, so each D register
17992          counts as 2.  */
17993       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17994       saved += vfp_emit_fstmd (base_reg, 16);
17995       return saved;
17996     }
17997 
17998   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17999   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
18000 
18001   reg = gen_rtx_REG (DFmode, base_reg);
18002   base_reg += 2;
18003 
18004   XVECEXP (par, 0, 0)
18005     = gen_rtx_SET (gen_frame_mem
18006 		   (BLKmode,
18007 		    gen_rtx_PRE_MODIFY (Pmode,
18008 					stack_pointer_rtx,
18009 					plus_constant
18010 					(Pmode, stack_pointer_rtx,
18011 					 - (count * 8)))
18012 		    ),
18013 		   gen_rtx_UNSPEC (BLKmode,
18014 				   gen_rtvec (1, reg),
18015 				   UNSPEC_PUSH_MULT));
18016 
18017   tmp = gen_rtx_SET (stack_pointer_rtx,
18018 		     plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18019   RTX_FRAME_RELATED_P (tmp) = 1;
18020   XVECEXP (dwarf, 0, 0) = tmp;
18021 
18022   tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18023   RTX_FRAME_RELATED_P (tmp) = 1;
18024   XVECEXP (dwarf, 0, 1) = tmp;
18025 
18026   for (i = 1; i < count; i++)
18027     {
18028       reg = gen_rtx_REG (DFmode, base_reg);
18029       base_reg += 2;
18030       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18031 
18032       tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18033 					plus_constant (Pmode,
18034 						       stack_pointer_rtx,
18035 						       i * 8)),
18036 			 reg);
18037       RTX_FRAME_RELATED_P (tmp) = 1;
18038       XVECEXP (dwarf, 0, i + 1) = tmp;
18039     }
18040 
18041   par = emit_insn (par);
18042   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18043   RTX_FRAME_RELATED_P (par) = 1;
18044 
18045   return count * 8;
18046 }
18047 
18048 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18049    has the cmse_nonsecure_call attribute and returns false otherwise.  */
18050 
18051 bool
detect_cmse_nonsecure_call(tree addr)18052 detect_cmse_nonsecure_call (tree addr)
18053 {
18054   if (!addr)
18055     return FALSE;
18056 
18057   tree fntype = TREE_TYPE (addr);
18058   if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18059 				    TYPE_ATTRIBUTES (fntype)))
18060     return TRUE;
18061   return FALSE;
18062 }
18063 
18064 
18065 /* Emit a call instruction with pattern PAT.  ADDR is the address of
18066    the call target.  */
18067 
18068 void
arm_emit_call_insn(rtx pat,rtx addr,bool sibcall)18069 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18070 {
18071   rtx insn;
18072 
18073   insn = emit_call_insn (pat);
18074 
18075   /* The PIC register is live on entry to VxWorks PIC PLT entries.
18076      If the call might use such an entry, add a use of the PIC register
18077      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
18078   if (TARGET_VXWORKS_RTP
18079       && flag_pic
18080       && !sibcall
18081       && GET_CODE (addr) == SYMBOL_REF
18082       && (SYMBOL_REF_DECL (addr)
18083 	  ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18084 	  : !SYMBOL_REF_LOCAL_P (addr)))
18085     {
18086       require_pic_register ();
18087       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18088     }
18089 
18090   if (TARGET_AAPCS_BASED)
18091     {
18092       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18093 	 linker.  We need to add an IP clobber to allow setting
18094 	 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
18095 	 is not needed since it's a fixed register.  */
18096       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18097       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18098     }
18099 }
18100 
18101 /* Output a 'call' insn.  */
18102 const char *
output_call(rtx * operands)18103 output_call (rtx *operands)
18104 {
18105   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
18106 
18107   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
18108   if (REGNO (operands[0]) == LR_REGNUM)
18109     {
18110       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18111       output_asm_insn ("mov%?\t%0, %|lr", operands);
18112     }
18113 
18114   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18115 
18116   if (TARGET_INTERWORK || arm_arch4t)
18117     output_asm_insn ("bx%?\t%0", operands);
18118   else
18119     output_asm_insn ("mov%?\t%|pc, %0", operands);
18120 
18121   return "";
18122 }
18123 
18124 /* Output a move from arm registers to arm registers of a long double
18125    OPERANDS[0] is the destination.
18126    OPERANDS[1] is the source.  */
18127 const char *
output_mov_long_double_arm_from_arm(rtx * operands)18128 output_mov_long_double_arm_from_arm (rtx *operands)
18129 {
18130   /* We have to be careful here because the two might overlap.  */
18131   int dest_start = REGNO (operands[0]);
18132   int src_start = REGNO (operands[1]);
18133   rtx ops[2];
18134   int i;
18135 
18136   if (dest_start < src_start)
18137     {
18138       for (i = 0; i < 3; i++)
18139 	{
18140 	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
18141 	  ops[1] = gen_rtx_REG (SImode, src_start + i);
18142 	  output_asm_insn ("mov%?\t%0, %1", ops);
18143 	}
18144     }
18145   else
18146     {
18147       for (i = 2; i >= 0; i--)
18148 	{
18149 	  ops[0] = gen_rtx_REG (SImode, dest_start + i);
18150 	  ops[1] = gen_rtx_REG (SImode, src_start + i);
18151 	  output_asm_insn ("mov%?\t%0, %1", ops);
18152 	}
18153     }
18154 
18155   return "";
18156 }
18157 
18158 void
arm_emit_movpair(rtx dest,rtx src)18159 arm_emit_movpair (rtx dest, rtx src)
18160  {
18161   /* If the src is an immediate, simplify it.  */
18162   if (CONST_INT_P (src))
18163     {
18164       HOST_WIDE_INT val = INTVAL (src);
18165       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18166       if ((val >> 16) & 0x0000ffff)
18167 	{
18168 	  emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18169 					       GEN_INT (16)),
18170 			 GEN_INT ((val >> 16) & 0x0000ffff));
18171 	  rtx_insn *insn = get_last_insn ();
18172 	  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18173 	}
18174       return;
18175     }
18176    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18177    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18178    rtx_insn *insn = get_last_insn ();
18179    set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18180  }
18181 
18182 /* Output a move between double words.  It must be REG<-MEM
18183    or MEM<-REG.  */
18184 const char *
output_move_double(rtx * operands,bool emit,int * count)18185 output_move_double (rtx *operands, bool emit, int *count)
18186 {
18187   enum rtx_code code0 = GET_CODE (operands[0]);
18188   enum rtx_code code1 = GET_CODE (operands[1]);
18189   rtx otherops[3];
18190   if (count)
18191     *count = 1;
18192 
18193   /* The only case when this might happen is when
18194      you are looking at the length of a DImode instruction
18195      that has an invalid constant in it.  */
18196   if (code0 == REG && code1 != MEM)
18197     {
18198       gcc_assert (!emit);
18199       *count = 2;
18200       return "";
18201     }
18202 
18203   if (code0 == REG)
18204     {
18205       unsigned int reg0 = REGNO (operands[0]);
18206 
18207       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18208 
18209       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
18210 
18211       switch (GET_CODE (XEXP (operands[1], 0)))
18212 	{
18213 	case REG:
18214 
18215 	  if (emit)
18216 	    {
18217 	      if (TARGET_LDRD
18218 		  && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18219 		output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18220 	      else
18221 		output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18222 	    }
18223 	  break;
18224 
18225 	case PRE_INC:
18226 	  gcc_assert (TARGET_LDRD);
18227 	  if (emit)
18228 	    output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18229 	  break;
18230 
18231 	case PRE_DEC:
18232 	  if (emit)
18233 	    {
18234 	      if (TARGET_LDRD)
18235 		output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18236 	      else
18237 		output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18238 	    }
18239 	  break;
18240 
18241 	case POST_INC:
18242 	  if (emit)
18243 	    {
18244 	      if (TARGET_LDRD)
18245 		output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18246 	      else
18247 		output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18248 	    }
18249 	  break;
18250 
18251 	case POST_DEC:
18252 	  gcc_assert (TARGET_LDRD);
18253 	  if (emit)
18254 	    output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18255 	  break;
18256 
18257 	case PRE_MODIFY:
18258 	case POST_MODIFY:
18259 	  /* Autoicrement addressing modes should never have overlapping
18260 	     base and destination registers, and overlapping index registers
18261 	     are already prohibited, so this doesn't need to worry about
18262 	     fix_cm3_ldrd.  */
18263 	  otherops[0] = operands[0];
18264 	  otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18265 	  otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18266 
18267 	  if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18268 	    {
18269 	      if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18270 		{
18271 		  /* Registers overlap so split out the increment.  */
18272 		  if (emit)
18273 		    {
18274 		      output_asm_insn ("add%?\t%1, %1, %2", otherops);
18275 		      output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18276 		    }
18277 		  if (count)
18278 		    *count = 2;
18279 		}
18280 	      else
18281 		{
18282 		  /* Use a single insn if we can.
18283 		     FIXME: IWMMXT allows offsets larger than ldrd can
18284 		     handle, fix these up with a pair of ldr.  */
18285 		  if (TARGET_THUMB2
18286 		      || !CONST_INT_P (otherops[2])
18287 		      || (INTVAL (otherops[2]) > -256
18288 			  && INTVAL (otherops[2]) < 256))
18289 		    {
18290 		      if (emit)
18291 			output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18292 		    }
18293 		  else
18294 		    {
18295 		      if (emit)
18296 			{
18297 			  output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18298 			  output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18299 			}
18300 		      if (count)
18301 			*count = 2;
18302 
18303 		    }
18304 		}
18305 	    }
18306 	  else
18307 	    {
18308 	      /* Use a single insn if we can.
18309 		 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18310 		 fix these up with a pair of ldr.  */
18311 	      if (TARGET_THUMB2
18312 		  || !CONST_INT_P (otherops[2])
18313 		  || (INTVAL (otherops[2]) > -256
18314 		      && INTVAL (otherops[2]) < 256))
18315 		{
18316 		  if (emit)
18317 		    output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18318 		}
18319 	      else
18320 		{
18321 		  if (emit)
18322 		    {
18323 		      output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18324 		      output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18325 		    }
18326 		  if (count)
18327 		    *count = 2;
18328 		}
18329 	    }
18330 	  break;
18331 
18332 	case LABEL_REF:
18333 	case CONST:
18334 	  /* We might be able to use ldrd %0, %1 here.  However the range is
18335 	     different to ldr/adr, and it is broken on some ARMv7-M
18336 	     implementations.  */
18337 	  /* Use the second register of the pair to avoid problematic
18338 	     overlap.  */
18339 	  otherops[1] = operands[1];
18340 	  if (emit)
18341 	    output_asm_insn ("adr%?\t%0, %1", otherops);
18342 	  operands[1] = otherops[0];
18343 	  if (emit)
18344 	    {
18345 	      if (TARGET_LDRD)
18346 		output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18347 	      else
18348 		output_asm_insn ("ldmia%?\t%1, %M0", operands);
18349 	    }
18350 
18351 	  if (count)
18352 	    *count = 2;
18353 	  break;
18354 
18355 	  /* ??? This needs checking for thumb2.  */
18356 	default:
18357 	  if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18358 			       GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18359 	    {
18360 	      otherops[0] = operands[0];
18361 	      otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18362 	      otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18363 
18364 	      if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18365 		{
18366 		  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18367 		    {
18368 		      switch ((int) INTVAL (otherops[2]))
18369 			{
18370 			case -8:
18371 			  if (emit)
18372 			    output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18373 			  return "";
18374 			case -4:
18375 			  if (TARGET_THUMB2)
18376 			    break;
18377 			  if (emit)
18378 			    output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18379 			  return "";
18380 			case 4:
18381 			  if (TARGET_THUMB2)
18382 			    break;
18383 			  if (emit)
18384 			    output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18385 			  return "";
18386 			}
18387 		    }
18388 		  otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18389 		  operands[1] = otherops[0];
18390 		  if (TARGET_LDRD
18391 		      && (REG_P (otherops[2])
18392 			  || TARGET_THUMB2
18393 			  || (CONST_INT_P (otherops[2])
18394 			      && INTVAL (otherops[2]) > -256
18395 			      && INTVAL (otherops[2]) < 256)))
18396 		    {
18397 		      if (reg_overlap_mentioned_p (operands[0],
18398 						   otherops[2]))
18399 			{
18400 			  /* Swap base and index registers over to
18401 			     avoid a conflict.  */
18402 			  std::swap (otherops[1], otherops[2]);
18403 			}
18404 		      /* If both registers conflict, it will usually
18405 			 have been fixed by a splitter.  */
18406 		      if (reg_overlap_mentioned_p (operands[0], otherops[2])
18407 			  || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18408 			{
18409 			  if (emit)
18410 			    {
18411 			      output_asm_insn ("add%?\t%0, %1, %2", otherops);
18412 			      output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18413 			    }
18414 			  if (count)
18415 			    *count = 2;
18416 			}
18417 		      else
18418 			{
18419 			  otherops[0] = operands[0];
18420 			  if (emit)
18421 			    output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18422 			}
18423 		      return "";
18424 		    }
18425 
18426 		  if (CONST_INT_P (otherops[2]))
18427 		    {
18428 		      if (emit)
18429 			{
18430 			  if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18431 			    output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18432 			  else
18433 			    output_asm_insn ("add%?\t%0, %1, %2", otherops);
18434 			}
18435 		    }
18436 		  else
18437 		    {
18438 		      if (emit)
18439 			output_asm_insn ("add%?\t%0, %1, %2", otherops);
18440 		    }
18441 		}
18442 	      else
18443 		{
18444 		  if (emit)
18445 		    output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18446 		}
18447 
18448 	      if (count)
18449 		*count = 2;
18450 
18451 	      if (TARGET_LDRD)
18452 		return "ldrd%?\t%0, [%1]";
18453 
18454 	      return "ldmia%?\t%1, %M0";
18455 	    }
18456 	  else
18457 	    {
18458 	      otherops[1] = adjust_address (operands[1], SImode, 4);
18459 	      /* Take care of overlapping base/data reg.  */
18460 	      if (reg_mentioned_p (operands[0], operands[1]))
18461 		{
18462 		  if (emit)
18463 		    {
18464 		      output_asm_insn ("ldr%?\t%0, %1", otherops);
18465 		      output_asm_insn ("ldr%?\t%0, %1", operands);
18466 		    }
18467 		  if (count)
18468 		    *count = 2;
18469 
18470 		}
18471 	      else
18472 		{
18473 		  if (emit)
18474 		    {
18475 		      output_asm_insn ("ldr%?\t%0, %1", operands);
18476 		      output_asm_insn ("ldr%?\t%0, %1", otherops);
18477 		    }
18478 		  if (count)
18479 		    *count = 2;
18480 		}
18481 	    }
18482 	}
18483     }
18484   else
18485     {
18486       /* Constraints should ensure this.  */
18487       gcc_assert (code0 == MEM && code1 == REG);
18488       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18489                   || (TARGET_ARM && TARGET_LDRD));
18490 
18491       /* For TARGET_ARM the first source register of an STRD
18492 	 must be even.  This is usually the case for double-word
18493 	 values but user assembly constraints can force an odd
18494 	 starting register.  */
18495       bool allow_strd = TARGET_LDRD
18496 			 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
18497       switch (GET_CODE (XEXP (operands[0], 0)))
18498         {
18499 	case REG:
18500 	  if (emit)
18501 	    {
18502 	      if (allow_strd)
18503 		output_asm_insn ("strd%?\t%1, [%m0]", operands);
18504 	      else
18505 		output_asm_insn ("stm%?\t%m0, %M1", operands);
18506 	    }
18507 	  break;
18508 
18509         case PRE_INC:
18510 	  gcc_assert (allow_strd);
18511 	  if (emit)
18512 	    output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18513 	  break;
18514 
18515         case PRE_DEC:
18516 	  if (emit)
18517 	    {
18518 	      if (allow_strd)
18519 		output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18520 	      else
18521 		output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18522 	    }
18523 	  break;
18524 
18525         case POST_INC:
18526 	  if (emit)
18527 	    {
18528 	      if (allow_strd)
18529 		output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18530 	      else
18531 		output_asm_insn ("stm%?\t%m0!, %M1", operands);
18532 	    }
18533 	  break;
18534 
18535         case POST_DEC:
18536 	  gcc_assert (allow_strd);
18537 	  if (emit)
18538 	    output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18539 	  break;
18540 
18541 	case PRE_MODIFY:
18542 	case POST_MODIFY:
18543 	  otherops[0] = operands[1];
18544 	  otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18545 	  otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18546 
18547 	  /* IWMMXT allows offsets larger than strd can handle,
18548 	     fix these up with a pair of str.  */
18549 	  if (!TARGET_THUMB2
18550 	      && CONST_INT_P (otherops[2])
18551 	      && (INTVAL(otherops[2]) <= -256
18552 		  || INTVAL(otherops[2]) >= 256))
18553 	    {
18554 	      if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18555 		{
18556 		  if (emit)
18557 		    {
18558 		      output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18559 		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18560 		    }
18561 		  if (count)
18562 		    *count = 2;
18563 		}
18564 	      else
18565 		{
18566 		  if (emit)
18567 		    {
18568 		      output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18569 		      output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18570 		    }
18571 		  if (count)
18572 		    *count = 2;
18573 		}
18574 	    }
18575 	  else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18576 	    {
18577 	      if (emit)
18578 		output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18579 	    }
18580 	  else
18581 	    {
18582 	      if (emit)
18583 		output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18584 	    }
18585 	  break;
18586 
18587 	case PLUS:
18588 	  otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18589 	  if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18590 	    {
18591 	      switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18592 		{
18593 		case -8:
18594 		  if (emit)
18595 		    output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18596 		  return "";
18597 
18598 		case -4:
18599 		  if (TARGET_THUMB2)
18600 		    break;
18601 		  if (emit)
18602 		    output_asm_insn ("stmda%?\t%m0, %M1", operands);
18603 		  return "";
18604 
18605 		case 4:
18606 		  if (TARGET_THUMB2)
18607 		    break;
18608 		  if (emit)
18609 		    output_asm_insn ("stmib%?\t%m0, %M1", operands);
18610 		  return "";
18611 		}
18612 	    }
18613 	  if (allow_strd
18614 	      && (REG_P (otherops[2])
18615 		  || TARGET_THUMB2
18616 		  || (CONST_INT_P (otherops[2])
18617 		      && INTVAL (otherops[2]) > -256
18618 		      && INTVAL (otherops[2]) < 256)))
18619 	    {
18620 	      otherops[0] = operands[1];
18621 	      otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18622 	      if (emit)
18623 		output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18624 	      return "";
18625 	    }
18626 	  /* Fall through */
18627 
18628         default:
18629 	  otherops[0] = adjust_address (operands[0], SImode, 4);
18630 	  otherops[1] = operands[1];
18631 	  if (emit)
18632 	    {
18633 	      output_asm_insn ("str%?\t%1, %0", operands);
18634 	      output_asm_insn ("str%?\t%H1, %0", otherops);
18635 	    }
18636 	  if (count)
18637 	    *count = 2;
18638 	}
18639     }
18640 
18641   return "";
18642 }
18643 
18644 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18645    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18646 
18647 const char *
output_move_quad(rtx * operands)18648 output_move_quad (rtx *operands)
18649 {
18650   if (REG_P (operands[0]))
18651     {
18652       /* Load, or reg->reg move.  */
18653 
18654       if (MEM_P (operands[1]))
18655         {
18656           switch (GET_CODE (XEXP (operands[1], 0)))
18657             {
18658             case REG:
18659               output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18660               break;
18661 
18662             case LABEL_REF:
18663             case CONST:
18664               output_asm_insn ("adr%?\t%0, %1", operands);
18665               output_asm_insn ("ldmia%?\t%0, %M0", operands);
18666               break;
18667 
18668             default:
18669               gcc_unreachable ();
18670             }
18671         }
18672       else
18673         {
18674           rtx ops[2];
18675           int dest, src, i;
18676 
18677           gcc_assert (REG_P (operands[1]));
18678 
18679           dest = REGNO (operands[0]);
18680           src = REGNO (operands[1]);
18681 
18682           /* This seems pretty dumb, but hopefully GCC won't try to do it
18683              very often.  */
18684           if (dest < src)
18685             for (i = 0; i < 4; i++)
18686               {
18687                 ops[0] = gen_rtx_REG (SImode, dest + i);
18688                 ops[1] = gen_rtx_REG (SImode, src + i);
18689                 output_asm_insn ("mov%?\t%0, %1", ops);
18690               }
18691           else
18692             for (i = 3; i >= 0; i--)
18693               {
18694                 ops[0] = gen_rtx_REG (SImode, dest + i);
18695                 ops[1] = gen_rtx_REG (SImode, src + i);
18696                 output_asm_insn ("mov%?\t%0, %1", ops);
18697               }
18698         }
18699     }
18700   else
18701     {
18702       gcc_assert (MEM_P (operands[0]));
18703       gcc_assert (REG_P (operands[1]));
18704       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18705 
18706       switch (GET_CODE (XEXP (operands[0], 0)))
18707         {
18708         case REG:
18709           output_asm_insn ("stm%?\t%m0, %M1", operands);
18710           break;
18711 
18712         default:
18713           gcc_unreachable ();
18714         }
18715     }
18716 
18717   return "";
18718 }
18719 
18720 /* Output a VFP load or store instruction.  */
18721 
18722 const char *
output_move_vfp(rtx * operands)18723 output_move_vfp (rtx *operands)
18724 {
18725   rtx reg, mem, addr, ops[2];
18726   int load = REG_P (operands[0]);
18727   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18728   int sp = (!TARGET_VFP_FP16INST
18729 	    || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18730   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18731   const char *templ;
18732   char buff[50];
18733   machine_mode mode;
18734 
18735   reg = operands[!load];
18736   mem = operands[load];
18737 
18738   mode = GET_MODE (reg);
18739 
18740   gcc_assert (REG_P (reg));
18741   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18742   gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18743 	      || mode == SFmode
18744 	      || mode == DFmode
18745 	      || mode == HImode
18746 	      || mode == SImode
18747 	      || mode == DImode
18748               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18749   gcc_assert (MEM_P (mem));
18750 
18751   addr = XEXP (mem, 0);
18752 
18753   switch (GET_CODE (addr))
18754     {
18755     case PRE_DEC:
18756       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18757       ops[0] = XEXP (addr, 0);
18758       ops[1] = reg;
18759       break;
18760 
18761     case POST_INC:
18762       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18763       ops[0] = XEXP (addr, 0);
18764       ops[1] = reg;
18765       break;
18766 
18767     default:
18768       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18769       ops[0] = reg;
18770       ops[1] = mem;
18771       break;
18772     }
18773 
18774   sprintf (buff, templ,
18775 	   load ? "ld" : "st",
18776 	   dp ? "64" : sp ? "32" : "16",
18777 	   dp ? "P" : "",
18778 	   integer_p ? "\t%@ int" : "");
18779   output_asm_insn (buff, ops);
18780 
18781   return "";
18782 }
18783 
18784 /* Output a Neon double-word or quad-word load or store, or a load
18785    or store for larger structure modes.
18786 
18787    WARNING: The ordering of elements is weird in big-endian mode,
18788    because the EABI requires that vectors stored in memory appear
18789    as though they were stored by a VSTM, as required by the EABI.
18790    GCC RTL defines element ordering based on in-memory order.
18791    This can be different from the architectural ordering of elements
18792    within a NEON register. The intrinsics defined in arm_neon.h use the
18793    NEON register element ordering, not the GCC RTL element ordering.
18794 
18795    For example, the in-memory ordering of a big-endian a quadword
18796    vector with 16-bit elements when stored from register pair {d0,d1}
18797    will be (lowest address first, d0[N] is NEON register element N):
18798 
18799      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18800 
18801    When necessary, quadword registers (dN, dN+1) are moved to ARM
18802    registers from rN in the order:
18803 
18804      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18805 
18806    So that STM/LDM can be used on vectors in ARM registers, and the
18807    same memory layout will result as if VSTM/VLDM were used.
18808 
18809    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18810    possible, which allows use of appropriate alignment tags.
18811    Note that the choice of "64" is independent of the actual vector
18812    element size; this size simply ensures that the behavior is
18813    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18814 
18815    Due to limitations of those instructions, use of VST1.64/VLD1.64
18816    is not possible if:
18817     - the address contains PRE_DEC, or
18818     - the mode refers to more than 4 double-word registers
18819 
18820    In those cases, it would be possible to replace VSTM/VLDM by a
18821    sequence of instructions; this is not currently implemented since
18822    this is not certain to actually improve performance.  */
18823 
18824 const char *
output_move_neon(rtx * operands)18825 output_move_neon (rtx *operands)
18826 {
18827   rtx reg, mem, addr, ops[2];
18828   int regno, nregs, load = REG_P (operands[0]);
18829   const char *templ;
18830   char buff[50];
18831   machine_mode mode;
18832 
18833   reg = operands[!load];
18834   mem = operands[load];
18835 
18836   mode = GET_MODE (reg);
18837 
18838   gcc_assert (REG_P (reg));
18839   regno = REGNO (reg);
18840   nregs = REG_NREGS (reg) / 2;
18841   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18842 	      || NEON_REGNO_OK_FOR_QUAD (regno));
18843   gcc_assert (VALID_NEON_DREG_MODE (mode)
18844 	      || VALID_NEON_QREG_MODE (mode)
18845 	      || VALID_NEON_STRUCT_MODE (mode));
18846   gcc_assert (MEM_P (mem));
18847 
18848   addr = XEXP (mem, 0);
18849 
18850   /* Strip off const from addresses like (const (plus (...))).  */
18851   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18852     addr = XEXP (addr, 0);
18853 
18854   switch (GET_CODE (addr))
18855     {
18856     case POST_INC:
18857       /* We have to use vldm / vstm for too-large modes.  */
18858       if (nregs > 4)
18859 	{
18860 	  templ = "v%smia%%?\t%%0!, %%h1";
18861 	  ops[0] = XEXP (addr, 0);
18862 	}
18863       else
18864 	{
18865 	  templ = "v%s1.64\t%%h1, %%A0";
18866 	  ops[0] = mem;
18867 	}
18868       ops[1] = reg;
18869       break;
18870 
18871     case PRE_DEC:
18872       /* We have to use vldm / vstm in this case, since there is no
18873 	 pre-decrement form of the vld1 / vst1 instructions.  */
18874       templ = "v%smdb%%?\t%%0!, %%h1";
18875       ops[0] = XEXP (addr, 0);
18876       ops[1] = reg;
18877       break;
18878 
18879     case POST_MODIFY:
18880       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18881       gcc_unreachable ();
18882 
18883     case REG:
18884       /* We have to use vldm / vstm for too-large modes.  */
18885       if (nregs > 1)
18886 	{
18887 	  if (nregs > 4)
18888 	    templ = "v%smia%%?\t%%m0, %%h1";
18889 	  else
18890 	    templ = "v%s1.64\t%%h1, %%A0";
18891 
18892 	  ops[0] = mem;
18893 	  ops[1] = reg;
18894 	  break;
18895 	}
18896       /* Fall through.  */
18897     case LABEL_REF:
18898     case PLUS:
18899       {
18900 	int i;
18901 	int overlap = -1;
18902 	for (i = 0; i < nregs; i++)
18903 	  {
18904 	    /* We're only using DImode here because it's a convenient size.  */
18905 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18906 	    ops[1] = adjust_address (mem, DImode, 8 * i);
18907 	    if (reg_overlap_mentioned_p (ops[0], mem))
18908 	      {
18909 		gcc_assert (overlap == -1);
18910 		overlap = i;
18911 	      }
18912 	    else
18913 	      {
18914 		sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18915 		output_asm_insn (buff, ops);
18916 	      }
18917 	  }
18918 	if (overlap != -1)
18919 	  {
18920 	    ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18921 	    ops[1] = adjust_address (mem, SImode, 8 * overlap);
18922 	    sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18923 	    output_asm_insn (buff, ops);
18924 	  }
18925 
18926         return "";
18927       }
18928 
18929     default:
18930       gcc_unreachable ();
18931     }
18932 
18933   sprintf (buff, templ, load ? "ld" : "st");
18934   output_asm_insn (buff, ops);
18935 
18936   return "";
18937 }
18938 
18939 /* Compute and return the length of neon_mov<mode>, where <mode> is
18940    one of VSTRUCT modes: EI, OI, CI or XI.  */
18941 int
arm_attr_length_move_neon(rtx_insn * insn)18942 arm_attr_length_move_neon (rtx_insn *insn)
18943 {
18944   rtx reg, mem, addr;
18945   int load;
18946   machine_mode mode;
18947 
18948   extract_insn_cached (insn);
18949 
18950   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18951     {
18952       mode = GET_MODE (recog_data.operand[0]);
18953       switch (mode)
18954 	{
18955 	case E_EImode:
18956 	case E_OImode:
18957 	  return 8;
18958 	case E_CImode:
18959 	  return 12;
18960 	case E_XImode:
18961 	  return 16;
18962 	default:
18963 	  gcc_unreachable ();
18964 	}
18965     }
18966 
18967   load = REG_P (recog_data.operand[0]);
18968   reg = recog_data.operand[!load];
18969   mem = recog_data.operand[load];
18970 
18971   gcc_assert (MEM_P (mem));
18972 
18973   addr = XEXP (mem, 0);
18974 
18975   /* Strip off const from addresses like (const (plus (...))).  */
18976   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18977     addr = XEXP (addr, 0);
18978 
18979   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18980     {
18981       int insns = REG_NREGS (reg) / 2;
18982       return insns * 4;
18983     }
18984   else
18985     return 4;
18986 }
18987 
18988 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18989    return zero.  */
18990 
18991 int
arm_address_offset_is_imm(rtx_insn * insn)18992 arm_address_offset_is_imm (rtx_insn *insn)
18993 {
18994   rtx mem, addr;
18995 
18996   extract_insn_cached (insn);
18997 
18998   if (REG_P (recog_data.operand[0]))
18999     return 0;
19000 
19001   mem = recog_data.operand[0];
19002 
19003   gcc_assert (MEM_P (mem));
19004 
19005   addr = XEXP (mem, 0);
19006 
19007   if (REG_P (addr)
19008       || (GET_CODE (addr) == PLUS
19009 	  && REG_P (XEXP (addr, 0))
19010 	  && CONST_INT_P (XEXP (addr, 1))))
19011     return 1;
19012   else
19013     return 0;
19014 }
19015 
19016 /* Output an ADD r, s, #n where n may be too big for one instruction.
19017    If adding zero to one register, output nothing.  */
19018 const char *
output_add_immediate(rtx * operands)19019 output_add_immediate (rtx *operands)
19020 {
19021   HOST_WIDE_INT n = INTVAL (operands[2]);
19022 
19023   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19024     {
19025       if (n < 0)
19026 	output_multi_immediate (operands,
19027 				"sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19028 				-n);
19029       else
19030 	output_multi_immediate (operands,
19031 				"add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19032 				n);
19033     }
19034 
19035   return "";
19036 }
19037 
19038 /* Output a multiple immediate operation.
19039    OPERANDS is the vector of operands referred to in the output patterns.
19040    INSTR1 is the output pattern to use for the first constant.
19041    INSTR2 is the output pattern to use for subsequent constants.
19042    IMMED_OP is the index of the constant slot in OPERANDS.
19043    N is the constant value.  */
19044 static const char *
output_multi_immediate(rtx * operands,const char * instr1,const char * instr2,int immed_op,HOST_WIDE_INT n)19045 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19046 			int immed_op, HOST_WIDE_INT n)
19047 {
19048 #if HOST_BITS_PER_WIDE_INT > 32
19049   n &= 0xffffffff;
19050 #endif
19051 
19052   if (n == 0)
19053     {
19054       /* Quick and easy output.  */
19055       operands[immed_op] = const0_rtx;
19056       output_asm_insn (instr1, operands);
19057     }
19058   else
19059     {
19060       int i;
19061       const char * instr = instr1;
19062 
19063       /* Note that n is never zero here (which would give no output).  */
19064       for (i = 0; i < 32; i += 2)
19065 	{
19066 	  if (n & (3 << i))
19067 	    {
19068 	      operands[immed_op] = GEN_INT (n & (255 << i));
19069 	      output_asm_insn (instr, operands);
19070 	      instr = instr2;
19071 	      i += 6;
19072 	    }
19073 	}
19074     }
19075 
19076   return "";
19077 }
19078 
19079 /* Return the name of a shifter operation.  */
19080 static const char *
arm_shift_nmem(enum rtx_code code)19081 arm_shift_nmem(enum rtx_code code)
19082 {
19083   switch (code)
19084     {
19085     case ASHIFT:
19086       return ARM_LSL_NAME;
19087 
19088     case ASHIFTRT:
19089       return "asr";
19090 
19091     case LSHIFTRT:
19092       return "lsr";
19093 
19094     case ROTATERT:
19095       return "ror";
19096 
19097     default:
19098       abort();
19099     }
19100 }
19101 
19102 /* Return the appropriate ARM instruction for the operation code.
19103    The returned result should not be overwritten.  OP is the rtx of the
19104    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19105    was shifted.  */
19106 const char *
arithmetic_instr(rtx op,int shift_first_arg)19107 arithmetic_instr (rtx op, int shift_first_arg)
19108 {
19109   switch (GET_CODE (op))
19110     {
19111     case PLUS:
19112       return "add";
19113 
19114     case MINUS:
19115       return shift_first_arg ? "rsb" : "sub";
19116 
19117     case IOR:
19118       return "orr";
19119 
19120     case XOR:
19121       return "eor";
19122 
19123     case AND:
19124       return "and";
19125 
19126     case ASHIFT:
19127     case ASHIFTRT:
19128     case LSHIFTRT:
19129     case ROTATERT:
19130       return arm_shift_nmem(GET_CODE(op));
19131 
19132     default:
19133       gcc_unreachable ();
19134     }
19135 }
19136 
19137 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19138    for the operation code.  The returned result should not be overwritten.
19139    OP is the rtx code of the shift.
19140    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19141    shift.  */
19142 static const char *
shift_op(rtx op,HOST_WIDE_INT * amountp)19143 shift_op (rtx op, HOST_WIDE_INT *amountp)
19144 {
19145   const char * mnem;
19146   enum rtx_code code = GET_CODE (op);
19147 
19148   switch (code)
19149     {
19150     case ROTATE:
19151       if (!CONST_INT_P (XEXP (op, 1)))
19152 	{
19153 	  output_operand_lossage ("invalid shift operand");
19154 	  return NULL;
19155 	}
19156 
19157       code = ROTATERT;
19158       *amountp = 32 - INTVAL (XEXP (op, 1));
19159       mnem = "ror";
19160       break;
19161 
19162     case ASHIFT:
19163     case ASHIFTRT:
19164     case LSHIFTRT:
19165     case ROTATERT:
19166       mnem = arm_shift_nmem(code);
19167       if (CONST_INT_P (XEXP (op, 1)))
19168 	{
19169 	  *amountp = INTVAL (XEXP (op, 1));
19170 	}
19171       else if (REG_P (XEXP (op, 1)))
19172 	{
19173 	  *amountp = -1;
19174 	  return mnem;
19175 	}
19176       else
19177 	{
19178 	  output_operand_lossage ("invalid shift operand");
19179 	  return NULL;
19180 	}
19181       break;
19182 
19183     case MULT:
19184       /* We never have to worry about the amount being other than a
19185 	 power of 2, since this case can never be reloaded from a reg.  */
19186       if (!CONST_INT_P (XEXP (op, 1)))
19187 	{
19188 	  output_operand_lossage ("invalid shift operand");
19189 	  return NULL;
19190 	}
19191 
19192       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19193 
19194       /* Amount must be a power of two.  */
19195       if (*amountp & (*amountp - 1))
19196 	{
19197 	  output_operand_lossage ("invalid shift operand");
19198 	  return NULL;
19199 	}
19200 
19201       *amountp = exact_log2 (*amountp);
19202       gcc_assert (IN_RANGE (*amountp, 0, 31));
19203       return ARM_LSL_NAME;
19204 
19205     default:
19206       output_operand_lossage ("invalid shift operand");
19207       return NULL;
19208     }
19209 
19210   /* This is not 100% correct, but follows from the desire to merge
19211      multiplication by a power of 2 with the recognizer for a
19212      shift.  >=32 is not a valid shift for "lsl", so we must try and
19213      output a shift that produces the correct arithmetical result.
19214      Using lsr #32 is identical except for the fact that the carry bit
19215      is not set correctly if we set the flags; but we never use the
19216      carry bit from such an operation, so we can ignore that.  */
19217   if (code == ROTATERT)
19218     /* Rotate is just modulo 32.  */
19219     *amountp &= 31;
19220   else if (*amountp != (*amountp & 31))
19221     {
19222       if (code == ASHIFT)
19223 	mnem = "lsr";
19224       *amountp = 32;
19225     }
19226 
19227   /* Shifts of 0 are no-ops.  */
19228   if (*amountp == 0)
19229     return NULL;
19230 
19231   return mnem;
19232 }
19233 
19234 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
19235    because /bin/as is horribly restrictive.  The judgement about
19236    whether or not each character is 'printable' (and can be output as
19237    is) or not (and must be printed with an octal escape) must be made
19238    with reference to the *host* character set -- the situation is
19239    similar to that discussed in the comments above pp_c_char in
19240    c-pretty-print.c.  */
19241 
19242 #define MAX_ASCII_LEN 51
19243 
19244 void
output_ascii_pseudo_op(FILE * stream,const unsigned char * p,int len)19245 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19246 {
19247   int i;
19248   int len_so_far = 0;
19249 
19250   fputs ("\t.ascii\t\"", stream);
19251 
19252   for (i = 0; i < len; i++)
19253     {
19254       int c = p[i];
19255 
19256       if (len_so_far >= MAX_ASCII_LEN)
19257 	{
19258 	  fputs ("\"\n\t.ascii\t\"", stream);
19259 	  len_so_far = 0;
19260 	}
19261 
19262       if (ISPRINT (c))
19263 	{
19264 	  if (c == '\\' || c == '\"')
19265 	    {
19266 	      putc ('\\', stream);
19267 	      len_so_far++;
19268 	    }
19269 	  putc (c, stream);
19270 	  len_so_far++;
19271 	}
19272       else
19273 	{
19274 	  fprintf (stream, "\\%03o", c);
19275 	  len_so_far += 4;
19276 	}
19277     }
19278 
19279   fputs ("\"\n", stream);
19280 }
19281 
19282 /* Whether a register is callee saved or not.  This is necessary because high
19283    registers are marked as caller saved when optimizing for size on Thumb-1
19284    targets despite being callee saved in order to avoid using them.  */
19285 #define callee_saved_reg_p(reg) \
19286   (!call_used_regs[reg] \
19287    || (TARGET_THUMB1 && optimize_size \
19288        && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19289 
19290 /* Compute the register save mask for registers 0 through 12
19291    inclusive.  This code is used by arm_compute_save_core_reg_mask ().  */
19292 
19293 static unsigned long
arm_compute_save_reg0_reg12_mask(void)19294 arm_compute_save_reg0_reg12_mask (void)
19295 {
19296   unsigned long func_type = arm_current_func_type ();
19297   unsigned long save_reg_mask = 0;
19298   unsigned int reg;
19299 
19300   if (IS_INTERRUPT (func_type))
19301     {
19302       unsigned int max_reg;
19303       /* Interrupt functions must not corrupt any registers,
19304 	 even call clobbered ones.  If this is a leaf function
19305 	 we can just examine the registers used by the RTL, but
19306 	 otherwise we have to assume that whatever function is
19307 	 called might clobber anything, and so we have to save
19308 	 all the call-clobbered registers as well.  */
19309       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19310 	/* FIQ handlers have registers r8 - r12 banked, so
19311 	   we only need to check r0 - r7, Normal ISRs only
19312 	   bank r14 and r15, so we must check up to r12.
19313 	   r13 is the stack pointer which is always preserved,
19314 	   so we do not need to consider it here.  */
19315 	max_reg = 7;
19316       else
19317 	max_reg = 12;
19318 
19319       for (reg = 0; reg <= max_reg; reg++)
19320 	if (df_regs_ever_live_p (reg)
19321 	    || (! crtl->is_leaf && call_used_regs[reg]))
19322 	  save_reg_mask |= (1 << reg);
19323 
19324       /* Also save the pic base register if necessary.  */
19325       if (flag_pic
19326 	  && !TARGET_SINGLE_PIC_BASE
19327 	  && arm_pic_register != INVALID_REGNUM
19328 	  && crtl->uses_pic_offset_table)
19329 	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19330     }
19331   else if (IS_VOLATILE(func_type))
19332     {
19333       /* For noreturn functions we historically omitted register saves
19334 	 altogether.  However this really messes up debugging.  As a
19335 	 compromise save just the frame pointers.  Combined with the link
19336 	 register saved elsewhere this should be sufficient to get
19337 	 a backtrace.  */
19338       if (frame_pointer_needed)
19339 	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19340       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19341 	save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19342       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19343 	save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19344     }
19345   else
19346     {
19347       /* In the normal case we only need to save those registers
19348 	 which are call saved and which are used by this function.  */
19349       for (reg = 0; reg <= 11; reg++)
19350 	if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19351 	  save_reg_mask |= (1 << reg);
19352 
19353       /* Handle the frame pointer as a special case.  */
19354       if (frame_pointer_needed)
19355 	save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19356 
19357       /* If we aren't loading the PIC register,
19358 	 don't stack it even though it may be live.  */
19359       if (flag_pic
19360 	  && !TARGET_SINGLE_PIC_BASE
19361 	  && arm_pic_register != INVALID_REGNUM
19362 	  && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19363 	      || crtl->uses_pic_offset_table))
19364 	save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19365 
19366       /* The prologue will copy SP into R0, so save it.  */
19367       if (IS_STACKALIGN (func_type))
19368 	save_reg_mask |= 1;
19369     }
19370 
19371   /* Save registers so the exception handler can modify them.  */
19372   if (crtl->calls_eh_return)
19373     {
19374       unsigned int i;
19375 
19376       for (i = 0; ; i++)
19377 	{
19378 	  reg = EH_RETURN_DATA_REGNO (i);
19379 	  if (reg == INVALID_REGNUM)
19380 	    break;
19381 	  save_reg_mask |= 1 << reg;
19382 	}
19383     }
19384 
19385   return save_reg_mask;
19386 }
19387 
19388 /* Return true if r3 is live at the start of the function.  */
19389 
19390 static bool
arm_r3_live_at_start_p(void)19391 arm_r3_live_at_start_p (void)
19392 {
19393   /* Just look at cfg info, which is still close enough to correct at this
19394      point.  This gives false positives for broken functions that might use
19395      uninitialized data that happens to be allocated in r3, but who cares?  */
19396   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19397 }
19398 
19399 /* Compute the number of bytes used to store the static chain register on the
19400    stack, above the stack frame.  We need to know this accurately to get the
19401    alignment of the rest of the stack frame correct.  */
19402 
19403 static int
arm_compute_static_chain_stack_bytes(void)19404 arm_compute_static_chain_stack_bytes (void)
19405 {
19406   /* Once the value is updated from the init value of -1, do not
19407      re-compute.  */
19408   if (cfun->machine->static_chain_stack_bytes != -1)
19409     return cfun->machine->static_chain_stack_bytes;
19410 
19411   /* See the defining assertion in arm_expand_prologue.  */
19412   if (IS_NESTED (arm_current_func_type ())
19413       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19414 	  || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19415 	       || flag_stack_clash_protection)
19416 	      && !df_regs_ever_live_p (LR_REGNUM)))
19417       && arm_r3_live_at_start_p ()
19418       && crtl->args.pretend_args_size == 0)
19419     return 4;
19420 
19421   return 0;
19422 }
19423 
19424 /* Compute a bit mask of which core registers need to be
19425    saved on the stack for the current function.
19426    This is used by arm_compute_frame_layout, which may add extra registers.  */
19427 
19428 static unsigned long
arm_compute_save_core_reg_mask(void)19429 arm_compute_save_core_reg_mask (void)
19430 {
19431   unsigned int save_reg_mask = 0;
19432   unsigned long func_type = arm_current_func_type ();
19433   unsigned int reg;
19434 
19435   if (IS_NAKED (func_type))
19436     /* This should never really happen.  */
19437     return 0;
19438 
19439   /* If we are creating a stack frame, then we must save the frame pointer,
19440      IP (which will hold the old stack pointer), LR and the PC.  */
19441   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19442     save_reg_mask |=
19443       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19444       | (1 << IP_REGNUM)
19445       | (1 << LR_REGNUM)
19446       | (1 << PC_REGNUM);
19447 
19448   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19449 
19450   /* Decide if we need to save the link register.
19451      Interrupt routines have their own banked link register,
19452      so they never need to save it.
19453      Otherwise if we do not use the link register we do not need to save
19454      it.  If we are pushing other registers onto the stack however, we
19455      can save an instruction in the epilogue by pushing the link register
19456      now and then popping it back into the PC.  This incurs extra memory
19457      accesses though, so we only do it when optimizing for size, and only
19458      if we know that we will not need a fancy return sequence.  */
19459   if (df_regs_ever_live_p (LR_REGNUM)
19460       || (save_reg_mask
19461 	  && optimize_size
19462 	  && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19463 	  && !crtl->tail_call_emit
19464 	  && !crtl->calls_eh_return))
19465     save_reg_mask |= 1 << LR_REGNUM;
19466 
19467   if (cfun->machine->lr_save_eliminated)
19468     save_reg_mask &= ~ (1 << LR_REGNUM);
19469 
19470   if (TARGET_REALLY_IWMMXT
19471       && ((bit_count (save_reg_mask)
19472 	   + ARM_NUM_INTS (crtl->args.pretend_args_size +
19473 			   arm_compute_static_chain_stack_bytes())
19474 	   ) % 2) != 0)
19475     {
19476       /* The total number of registers that are going to be pushed
19477 	 onto the stack is odd.  We need to ensure that the stack
19478 	 is 64-bit aligned before we start to save iWMMXt registers,
19479 	 and also before we start to create locals.  (A local variable
19480 	 might be a double or long long which we will load/store using
19481 	 an iWMMXt instruction).  Therefore we need to push another
19482 	 ARM register, so that the stack will be 64-bit aligned.  We
19483 	 try to avoid using the arg registers (r0 -r3) as they might be
19484 	 used to pass values in a tail call.  */
19485       for (reg = 4; reg <= 12; reg++)
19486 	if ((save_reg_mask & (1 << reg)) == 0)
19487 	  break;
19488 
19489       if (reg <= 12)
19490 	save_reg_mask |= (1 << reg);
19491       else
19492 	{
19493 	  cfun->machine->sibcall_blocked = 1;
19494 	  save_reg_mask |= (1 << 3);
19495 	}
19496     }
19497 
19498   /* We may need to push an additional register for use initializing the
19499      PIC base register.  */
19500   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19501       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19502     {
19503       reg = thumb_find_work_register (1 << 4);
19504       if (!call_used_regs[reg])
19505 	save_reg_mask |= (1 << reg);
19506     }
19507 
19508   return save_reg_mask;
19509 }
19510 
19511 /* Compute a bit mask of which core registers need to be
19512    saved on the stack for the current function.  */
19513 static unsigned long
thumb1_compute_save_core_reg_mask(void)19514 thumb1_compute_save_core_reg_mask (void)
19515 {
19516   unsigned long mask;
19517   unsigned reg;
19518 
19519   mask = 0;
19520   for (reg = 0; reg < 12; reg ++)
19521     if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19522       mask |= 1 << reg;
19523 
19524   /* Handle the frame pointer as a special case.  */
19525   if (frame_pointer_needed)
19526     mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19527 
19528   if (flag_pic
19529       && !TARGET_SINGLE_PIC_BASE
19530       && arm_pic_register != INVALID_REGNUM
19531       && crtl->uses_pic_offset_table)
19532     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19533 
19534   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19535   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19536     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19537 
19538   /* LR will also be pushed if any lo regs are pushed.  */
19539   if (mask & 0xff || thumb_force_lr_save ())
19540     mask |= (1 << LR_REGNUM);
19541 
19542   /* Make sure we have a low work register if we need one.
19543      We will need one if we are going to push a high register,
19544      but we are not currently intending to push a low register.  */
19545   if ((mask & 0xff) == 0
19546       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19547     {
19548       /* Use thumb_find_work_register to choose which register
19549 	 we will use.  If the register is live then we will
19550 	 have to push it.  Use LAST_LO_REGNUM as our fallback
19551 	 choice for the register to select.  */
19552       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19553       /* Make sure the register returned by thumb_find_work_register is
19554 	 not part of the return value.  */
19555       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19556 	reg = LAST_LO_REGNUM;
19557 
19558       if (callee_saved_reg_p (reg))
19559 	mask |= 1 << reg;
19560     }
19561 
19562   /* The 504 below is 8 bytes less than 512 because there are two possible
19563      alignment words.  We can't tell here if they will be present or not so we
19564      have to play it safe and assume that they are. */
19565   if ((CALLER_INTERWORKING_SLOT_SIZE +
19566        ROUND_UP_WORD (get_frame_size ()) +
19567        crtl->outgoing_args_size) >= 504)
19568     {
19569       /* This is the same as the code in thumb1_expand_prologue() which
19570 	 determines which register to use for stack decrement. */
19571       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19572 	if (mask & (1 << reg))
19573 	  break;
19574 
19575       if (reg > LAST_LO_REGNUM)
19576 	{
19577 	  /* Make sure we have a register available for stack decrement. */
19578 	  mask |= 1 << LAST_LO_REGNUM;
19579 	}
19580     }
19581 
19582   return mask;
19583 }
19584 
19585 
19586 /* Return the number of bytes required to save VFP registers.  */
19587 static int
arm_get_vfp_saved_size(void)19588 arm_get_vfp_saved_size (void)
19589 {
19590   unsigned int regno;
19591   int count;
19592   int saved;
19593 
19594   saved = 0;
19595   /* Space for saved VFP registers.  */
19596   if (TARGET_HARD_FLOAT)
19597     {
19598       count = 0;
19599       for (regno = FIRST_VFP_REGNUM;
19600 	   regno < LAST_VFP_REGNUM;
19601 	   regno += 2)
19602 	{
19603 	  if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19604 	      && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19605 	    {
19606 	      if (count > 0)
19607 		{
19608 		  /* Workaround ARM10 VFPr1 bug.  */
19609 		  if (count == 2 && !arm_arch6)
19610 		    count++;
19611 		  saved += count * 8;
19612 		}
19613 	      count = 0;
19614 	    }
19615 	  else
19616 	    count++;
19617 	}
19618       if (count > 0)
19619 	{
19620 	  if (count == 2 && !arm_arch6)
19621 	    count++;
19622 	  saved += count * 8;
19623 	}
19624     }
19625   return saved;
19626 }
19627 
19628 
19629 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19630    everything bar the final return instruction.  If simple_return is true,
19631    then do not output epilogue, because it has already been emitted in RTL.
19632 
19633    Note: do not forget to update length attribute of corresponding insn pattern
19634    when changing assembly output (eg. length attribute of
19635    thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19636    register clearing sequences).  */
19637 const char *
output_return_instruction(rtx operand,bool really_return,bool reverse,bool simple_return)19638 output_return_instruction (rtx operand, bool really_return, bool reverse,
19639                            bool simple_return)
19640 {
19641   char conditional[10];
19642   char instr[100];
19643   unsigned reg;
19644   unsigned long live_regs_mask;
19645   unsigned long func_type;
19646   arm_stack_offsets *offsets;
19647 
19648   func_type = arm_current_func_type ();
19649 
19650   if (IS_NAKED (func_type))
19651     return "";
19652 
19653   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19654     {
19655       /* If this function was declared non-returning, and we have
19656 	 found a tail call, then we have to trust that the called
19657 	 function won't return.  */
19658       if (really_return)
19659 	{
19660 	  rtx ops[2];
19661 
19662 	  /* Otherwise, trap an attempted return by aborting.  */
19663 	  ops[0] = operand;
19664 	  ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19665 				       : "abort");
19666 	  assemble_external_libcall (ops[1]);
19667 	  output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19668 	}
19669 
19670       return "";
19671     }
19672 
19673   gcc_assert (!cfun->calls_alloca || really_return);
19674 
19675   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19676 
19677   cfun->machine->return_used_this_function = 1;
19678 
19679   offsets = arm_get_frame_offsets ();
19680   live_regs_mask = offsets->saved_regs_mask;
19681 
19682   if (!simple_return && live_regs_mask)
19683     {
19684       const char * return_reg;
19685 
19686       /* If we do not have any special requirements for function exit
19687 	 (e.g. interworking) then we can load the return address
19688 	 directly into the PC.  Otherwise we must load it into LR.  */
19689       if (really_return
19690 	  && !IS_CMSE_ENTRY (func_type)
19691 	  && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19692 	return_reg = reg_names[PC_REGNUM];
19693       else
19694 	return_reg = reg_names[LR_REGNUM];
19695 
19696       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19697 	{
19698 	  /* There are three possible reasons for the IP register
19699 	     being saved.  1) a stack frame was created, in which case
19700 	     IP contains the old stack pointer, or 2) an ISR routine
19701 	     corrupted it, or 3) it was saved to align the stack on
19702 	     iWMMXt.  In case 1, restore IP into SP, otherwise just
19703 	     restore IP.  */
19704 	  if (frame_pointer_needed)
19705 	    {
19706 	      live_regs_mask &= ~ (1 << IP_REGNUM);
19707 	      live_regs_mask |=   (1 << SP_REGNUM);
19708 	    }
19709 	  else
19710 	    gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19711 	}
19712 
19713       /* On some ARM architectures it is faster to use LDR rather than
19714 	 LDM to load a single register.  On other architectures, the
19715 	 cost is the same.  In 26 bit mode, or for exception handlers,
19716 	 we have to use LDM to load the PC so that the CPSR is also
19717 	 restored.  */
19718       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19719 	if (live_regs_mask == (1U << reg))
19720 	  break;
19721 
19722       if (reg <= LAST_ARM_REGNUM
19723 	  && (reg != LR_REGNUM
19724 	      || ! really_return
19725 	      || ! IS_INTERRUPT (func_type)))
19726 	{
19727 	  sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19728 		   (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19729 	}
19730       else
19731 	{
19732 	  char *p;
19733 	  int first = 1;
19734 
19735 	  /* Generate the load multiple instruction to restore the
19736 	     registers.  Note we can get here, even if
19737 	     frame_pointer_needed is true, but only if sp already
19738 	     points to the base of the saved core registers.  */
19739 	  if (live_regs_mask & (1 << SP_REGNUM))
19740 	    {
19741 	      unsigned HOST_WIDE_INT stack_adjust;
19742 
19743 	      stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19744 	      gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19745 
19746 	      if (stack_adjust && arm_arch5 && TARGET_ARM)
19747 		  sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19748 	      else
19749 		{
19750 		  /* If we can't use ldmib (SA110 bug),
19751 		     then try to pop r3 instead.  */
19752 		  if (stack_adjust)
19753 		    live_regs_mask |= 1 << 3;
19754 
19755 		  sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19756 		}
19757 	    }
19758 	  /* For interrupt returns we have to use an LDM rather than
19759 	     a POP so that we can use the exception return variant.  */
19760 	  else if (IS_INTERRUPT (func_type))
19761 	    sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19762 	  else
19763 	    sprintf (instr, "pop%s\t{", conditional);
19764 
19765 	  p = instr + strlen (instr);
19766 
19767 	  for (reg = 0; reg <= SP_REGNUM; reg++)
19768 	    if (live_regs_mask & (1 << reg))
19769 	      {
19770 		int l = strlen (reg_names[reg]);
19771 
19772 		if (first)
19773 		  first = 0;
19774 		else
19775 		  {
19776 		    memcpy (p, ", ", 2);
19777 		    p += 2;
19778 		  }
19779 
19780 		memcpy (p, "%|", 2);
19781 		memcpy (p + 2, reg_names[reg], l);
19782 		p += l + 2;
19783 	      }
19784 
19785 	  if (live_regs_mask & (1 << LR_REGNUM))
19786 	    {
19787 	      sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19788 	      /* If returning from an interrupt, restore the CPSR.  */
19789 	      if (IS_INTERRUPT (func_type))
19790 		strcat (p, "^");
19791 	    }
19792 	  else
19793 	    strcpy (p, "}");
19794 	}
19795 
19796       output_asm_insn (instr, & operand);
19797 
19798       /* See if we need to generate an extra instruction to
19799 	 perform the actual function return.  */
19800       if (really_return
19801 	  && func_type != ARM_FT_INTERWORKED
19802 	  && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19803 	{
19804 	  /* The return has already been handled
19805 	     by loading the LR into the PC.  */
19806           return "";
19807 	}
19808     }
19809 
19810   if (really_return)
19811     {
19812       switch ((int) ARM_FUNC_TYPE (func_type))
19813 	{
19814 	case ARM_FT_ISR:
19815 	case ARM_FT_FIQ:
19816 	  /* ??? This is wrong for unified assembly syntax.  */
19817 	  sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19818 	  break;
19819 
19820 	case ARM_FT_INTERWORKED:
19821 	  gcc_assert (arm_arch5 || arm_arch4t);
19822 	  sprintf (instr, "bx%s\t%%|lr", conditional);
19823 	  break;
19824 
19825 	case ARM_FT_EXCEPTION:
19826 	  /* ??? This is wrong for unified assembly syntax.  */
19827 	  sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19828 	  break;
19829 
19830 	default:
19831 	  if (IS_CMSE_ENTRY (func_type))
19832 	    {
19833 	      /* Check if we have to clear the 'GE bits' which is only used if
19834 		 parallel add and subtraction instructions are available.  */
19835 	      if (TARGET_INT_SIMD)
19836 		snprintf (instr, sizeof (instr),
19837 			  "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19838 	      else
19839 		snprintf (instr, sizeof (instr),
19840 			  "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19841 
19842 	      output_asm_insn (instr, & operand);
19843 	      if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19844 		{
19845 		  /* Clear the cumulative exception-status bits (0-4,7) and the
19846 		     condition code bits (28-31) of the FPSCR.  We need to
19847 		     remember to clear the first scratch register used (IP) and
19848 		     save and restore the second (r4).  */
19849 		  snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19850 		  output_asm_insn (instr, & operand);
19851 		  snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19852 		  output_asm_insn (instr, & operand);
19853 		  snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19854 		  output_asm_insn (instr, & operand);
19855 		  snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19856 		  output_asm_insn (instr, & operand);
19857 		  snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19858 		  output_asm_insn (instr, & operand);
19859 		  snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19860 		  output_asm_insn (instr, & operand);
19861 		  snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19862 		  output_asm_insn (instr, & operand);
19863 		  snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19864 		  output_asm_insn (instr, & operand);
19865 		}
19866 	      snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19867 	    }
19868 	  /* Use bx if it's available.  */
19869 	  else if (arm_arch5 || arm_arch4t)
19870 	    sprintf (instr, "bx%s\t%%|lr", conditional);
19871 	  else
19872 	    sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19873 	  break;
19874 	}
19875 
19876       output_asm_insn (instr, & operand);
19877     }
19878 
19879   return "";
19880 }
19881 
19882 /* Output in FILE asm statements needed to declare the NAME of the function
19883    defined by its DECL node.  */
19884 
19885 void
arm_asm_declare_function_name(FILE * file,const char * name,tree decl)19886 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19887 {
19888   size_t cmse_name_len;
19889   char *cmse_name = 0;
19890   char cmse_prefix[] = "__acle_se_";
19891 
19892   /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19893      extra function label for each function with the 'cmse_nonsecure_entry'
19894      attribute.  This extra function label should be prepended with
19895      '__acle_se_', telling the linker that it needs to create secure gateway
19896      veneers for this function.  */
19897   if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19898 				    DECL_ATTRIBUTES (decl)))
19899     {
19900       cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19901       cmse_name = XALLOCAVEC (char, cmse_name_len);
19902       snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19903       targetm.asm_out.globalize_label (file, cmse_name);
19904 
19905       ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19906       ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19907     }
19908 
19909   ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19910   ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19911   ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19912   ASM_OUTPUT_LABEL (file, name);
19913 
19914   if (cmse_name)
19915     ASM_OUTPUT_LABEL (file, cmse_name);
19916 
19917   ARM_OUTPUT_FN_UNWIND (file, TRUE);
19918 }
19919 
19920 /* Write the function name into the code section, directly preceding
19921    the function prologue.
19922 
19923    Code will be output similar to this:
19924      t0
19925 	 .ascii "arm_poke_function_name", 0
19926 	 .align
19927      t1
19928 	 .word 0xff000000 + (t1 - t0)
19929      arm_poke_function_name
19930 	 mov     ip, sp
19931 	 stmfd   sp!, {fp, ip, lr, pc}
19932 	 sub     fp, ip, #4
19933 
19934    When performing a stack backtrace, code can inspect the value
19935    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19936    at location pc - 12 and the top 8 bits are set, then we know
19937    that there is a function name embedded immediately preceding this
19938    location and has length ((pc[-3]) & 0xff000000).
19939 
19940    We assume that pc is declared as a pointer to an unsigned long.
19941 
19942    It is of no benefit to output the function name if we are assembling
19943    a leaf function.  These function types will not contain a stack
19944    backtrace structure, therefore it is not possible to determine the
19945    function name.  */
19946 void
arm_poke_function_name(FILE * stream,const char * name)19947 arm_poke_function_name (FILE *stream, const char *name)
19948 {
19949   unsigned long alignlength;
19950   unsigned long length;
19951   rtx           x;
19952 
19953   length      = strlen (name) + 1;
19954   alignlength = ROUND_UP_WORD (length);
19955 
19956   ASM_OUTPUT_ASCII (stream, name, length);
19957   ASM_OUTPUT_ALIGN (stream, 2);
19958   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19959   assemble_aligned_integer (UNITS_PER_WORD, x);
19960 }
19961 
19962 /* Place some comments into the assembler stream
19963    describing the current function.  */
19964 static void
arm_output_function_prologue(FILE * f)19965 arm_output_function_prologue (FILE *f)
19966 {
19967   unsigned long func_type;
19968 
19969   /* Sanity check.  */
19970   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19971 
19972   func_type = arm_current_func_type ();
19973 
19974   switch ((int) ARM_FUNC_TYPE (func_type))
19975     {
19976     default:
19977     case ARM_FT_NORMAL:
19978       break;
19979     case ARM_FT_INTERWORKED:
19980       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19981       break;
19982     case ARM_FT_ISR:
19983       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19984       break;
19985     case ARM_FT_FIQ:
19986       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19987       break;
19988     case ARM_FT_EXCEPTION:
19989       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19990       break;
19991     }
19992 
19993   if (IS_NAKED (func_type))
19994     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19995 
19996   if (IS_VOLATILE (func_type))
19997     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19998 
19999   if (IS_NESTED (func_type))
20000     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
20001   if (IS_STACKALIGN (func_type))
20002     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
20003   if (IS_CMSE_ENTRY (func_type))
20004     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
20005 
20006   asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
20007 	       (HOST_WIDE_INT) crtl->args.size,
20008 	       crtl->args.pretend_args_size,
20009 	       (HOST_WIDE_INT) get_frame_size ());
20010 
20011   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20012 	       frame_pointer_needed,
20013 	       cfun->machine->uses_anonymous_args);
20014 
20015   if (cfun->machine->lr_save_eliminated)
20016     asm_fprintf (f, "\t%@ link register save eliminated.\n");
20017 
20018   if (crtl->calls_eh_return)
20019     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20020 
20021 }
20022 
20023 static void
arm_output_function_epilogue(FILE *)20024 arm_output_function_epilogue (FILE *)
20025 {
20026   arm_stack_offsets *offsets;
20027 
20028   if (TARGET_THUMB1)
20029     {
20030       int regno;
20031 
20032       /* Emit any call-via-reg trampolines that are needed for v4t support
20033 	 of call_reg and call_value_reg type insns.  */
20034       for (regno = 0; regno < LR_REGNUM; regno++)
20035 	{
20036 	  rtx label = cfun->machine->call_via[regno];
20037 
20038 	  if (label != NULL)
20039 	    {
20040 	      switch_to_section (function_section (current_function_decl));
20041 	      targetm.asm_out.internal_label (asm_out_file, "L",
20042 					      CODE_LABEL_NUMBER (label));
20043 	      asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20044 	    }
20045 	}
20046 
20047       /* ??? Probably not safe to set this here, since it assumes that a
20048 	 function will be emitted as assembly immediately after we generate
20049 	 RTL for it.  This does not happen for inline functions.  */
20050       cfun->machine->return_used_this_function = 0;
20051     }
20052   else /* TARGET_32BIT */
20053     {
20054       /* We need to take into account any stack-frame rounding.  */
20055       offsets = arm_get_frame_offsets ();
20056 
20057       gcc_assert (!use_return_insn (FALSE, NULL)
20058 		  || (cfun->machine->return_used_this_function != 0)
20059 		  || offsets->saved_regs == offsets->outgoing_args
20060 		  || frame_pointer_needed);
20061     }
20062 }
20063 
20064 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20065    STR and STRD.  If an even number of registers are being pushed, one
20066    or more STRD patterns are created for each register pair.  If an
20067    odd number of registers are pushed, emit an initial STR followed by
20068    as many STRD instructions as are needed.  This works best when the
20069    stack is initially 64-bit aligned (the normal case), since it
20070    ensures that each STRD is also 64-bit aligned.  */
20071 static void
thumb2_emit_strd_push(unsigned long saved_regs_mask)20072 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20073 {
20074   int num_regs = 0;
20075   int i;
20076   int regno;
20077   rtx par = NULL_RTX;
20078   rtx dwarf = NULL_RTX;
20079   rtx tmp;
20080   bool first = true;
20081 
20082   num_regs = bit_count (saved_regs_mask);
20083 
20084   /* Must be at least one register to save, and can't save SP or PC.  */
20085   gcc_assert (num_regs > 0 && num_regs <= 14);
20086   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20087   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20088 
20089   /* Create sequence for DWARF info.  All the frame-related data for
20090      debugging is held in this wrapper.  */
20091   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20092 
20093   /* Describe the stack adjustment.  */
20094   tmp = gen_rtx_SET (stack_pointer_rtx,
20095 		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20096   RTX_FRAME_RELATED_P (tmp) = 1;
20097   XVECEXP (dwarf, 0, 0) = tmp;
20098 
20099   /* Find the first register.  */
20100   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20101     ;
20102 
20103   i = 0;
20104 
20105   /* If there's an odd number of registers to push.  Start off by
20106      pushing a single register.  This ensures that subsequent strd
20107      operations are dword aligned (assuming that SP was originally
20108      64-bit aligned).  */
20109   if ((num_regs & 1) != 0)
20110     {
20111       rtx reg, mem, insn;
20112 
20113       reg = gen_rtx_REG (SImode, regno);
20114       if (num_regs == 1)
20115 	mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20116 						     stack_pointer_rtx));
20117       else
20118 	mem = gen_frame_mem (Pmode,
20119 			     gen_rtx_PRE_MODIFY
20120 			     (Pmode, stack_pointer_rtx,
20121 			      plus_constant (Pmode, stack_pointer_rtx,
20122 					     -4 * num_regs)));
20123 
20124       tmp = gen_rtx_SET (mem, reg);
20125       RTX_FRAME_RELATED_P (tmp) = 1;
20126       insn = emit_insn (tmp);
20127       RTX_FRAME_RELATED_P (insn) = 1;
20128       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20129       tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20130       RTX_FRAME_RELATED_P (tmp) = 1;
20131       i++;
20132       regno++;
20133       XVECEXP (dwarf, 0, i) = tmp;
20134       first = false;
20135     }
20136 
20137   while (i < num_regs)
20138     if (saved_regs_mask & (1 << regno))
20139       {
20140 	rtx reg1, reg2, mem1, mem2;
20141 	rtx tmp0, tmp1, tmp2;
20142 	int regno2;
20143 
20144 	/* Find the register to pair with this one.  */
20145 	for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20146 	     regno2++)
20147 	  ;
20148 
20149 	reg1 = gen_rtx_REG (SImode, regno);
20150 	reg2 = gen_rtx_REG (SImode, regno2);
20151 
20152 	if (first)
20153 	  {
20154 	    rtx insn;
20155 
20156 	    first = false;
20157 	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20158 							stack_pointer_rtx,
20159 							-4 * num_regs));
20160 	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20161 							stack_pointer_rtx,
20162 							-4 * (num_regs - 1)));
20163 	    tmp0 = gen_rtx_SET (stack_pointer_rtx,
20164 				plus_constant (Pmode, stack_pointer_rtx,
20165 					       -4 * (num_regs)));
20166 	    tmp1 = gen_rtx_SET (mem1, reg1);
20167 	    tmp2 = gen_rtx_SET (mem2, reg2);
20168 	    RTX_FRAME_RELATED_P (tmp0) = 1;
20169 	    RTX_FRAME_RELATED_P (tmp1) = 1;
20170 	    RTX_FRAME_RELATED_P (tmp2) = 1;
20171 	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20172 	    XVECEXP (par, 0, 0) = tmp0;
20173 	    XVECEXP (par, 0, 1) = tmp1;
20174 	    XVECEXP (par, 0, 2) = tmp2;
20175 	    insn = emit_insn (par);
20176 	    RTX_FRAME_RELATED_P (insn) = 1;
20177 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20178 	  }
20179 	else
20180 	  {
20181 	    mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20182 							stack_pointer_rtx,
20183 							4 * i));
20184 	    mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20185 							stack_pointer_rtx,
20186 							4 * (i + 1)));
20187 	    tmp1 = gen_rtx_SET (mem1, reg1);
20188 	    tmp2 = gen_rtx_SET (mem2, reg2);
20189 	    RTX_FRAME_RELATED_P (tmp1) = 1;
20190 	    RTX_FRAME_RELATED_P (tmp2) = 1;
20191 	    par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20192 	    XVECEXP (par, 0, 0) = tmp1;
20193 	    XVECEXP (par, 0, 1) = tmp2;
20194 	    emit_insn (par);
20195 	  }
20196 
20197 	/* Create unwind information.  This is an approximation.  */
20198 	tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20199 					   plus_constant (Pmode,
20200 							  stack_pointer_rtx,
20201 							  4 * i)),
20202 			    reg1);
20203 	tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20204 					   plus_constant (Pmode,
20205 							  stack_pointer_rtx,
20206 							  4 * (i + 1))),
20207 			    reg2);
20208 
20209 	RTX_FRAME_RELATED_P (tmp1) = 1;
20210 	RTX_FRAME_RELATED_P (tmp2) = 1;
20211 	XVECEXP (dwarf, 0, i + 1) = tmp1;
20212 	XVECEXP (dwarf, 0, i + 2) = tmp2;
20213 	i += 2;
20214 	regno = regno2 + 1;
20215       }
20216     else
20217       regno++;
20218 
20219   return;
20220 }
20221 
20222 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
20223    whenever possible, otherwise it emits single-word stores.  The first store
20224    also allocates stack space for all saved registers, using writeback with
20225    post-addressing mode.  All other stores use offset addressing.  If no STRD
20226    can be emitted, this function emits a sequence of single-word stores,
20227    and not an STM as before, because single-word stores provide more freedom
20228    scheduling and can be turned into an STM by peephole optimizations.  */
20229 static void
arm_emit_strd_push(unsigned long saved_regs_mask)20230 arm_emit_strd_push (unsigned long saved_regs_mask)
20231 {
20232   int num_regs = 0;
20233   int i, j, dwarf_index  = 0;
20234   int offset = 0;
20235   rtx dwarf = NULL_RTX;
20236   rtx insn = NULL_RTX;
20237   rtx tmp, mem;
20238 
20239   /* TODO: A more efficient code can be emitted by changing the
20240      layout, e.g., first push all pairs that can use STRD to keep the
20241      stack aligned, and then push all other registers.  */
20242   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20243     if (saved_regs_mask & (1 << i))
20244       num_regs++;
20245 
20246   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20247   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20248   gcc_assert (num_regs > 0);
20249 
20250   /* Create sequence for DWARF info.  */
20251   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20252 
20253   /* For dwarf info, we generate explicit stack update.  */
20254   tmp = gen_rtx_SET (stack_pointer_rtx,
20255                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20256   RTX_FRAME_RELATED_P (tmp) = 1;
20257   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20258 
20259   /* Save registers.  */
20260   offset = - 4 * num_regs;
20261   j = 0;
20262   while (j <= LAST_ARM_REGNUM)
20263     if (saved_regs_mask & (1 << j))
20264       {
20265         if ((j % 2 == 0)
20266             && (saved_regs_mask & (1 << (j + 1))))
20267           {
20268             /* Current register and previous register form register pair for
20269                which STRD can be generated.  */
20270             if (offset < 0)
20271               {
20272                 /* Allocate stack space for all saved registers.  */
20273                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20274                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20275                 mem = gen_frame_mem (DImode, tmp);
20276                 offset = 0;
20277               }
20278             else if (offset > 0)
20279               mem = gen_frame_mem (DImode,
20280                                    plus_constant (Pmode,
20281                                                   stack_pointer_rtx,
20282                                                   offset));
20283             else
20284               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20285 
20286             tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20287             RTX_FRAME_RELATED_P (tmp) = 1;
20288             tmp = emit_insn (tmp);
20289 
20290             /* Record the first store insn.  */
20291             if (dwarf_index == 1)
20292               insn = tmp;
20293 
20294             /* Generate dwarf info.  */
20295             mem = gen_frame_mem (SImode,
20296                                  plus_constant (Pmode,
20297                                                 stack_pointer_rtx,
20298                                                 offset));
20299             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20300             RTX_FRAME_RELATED_P (tmp) = 1;
20301             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20302 
20303             mem = gen_frame_mem (SImode,
20304                                  plus_constant (Pmode,
20305                                                 stack_pointer_rtx,
20306                                                 offset + 4));
20307             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20308             RTX_FRAME_RELATED_P (tmp) = 1;
20309             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20310 
20311             offset += 8;
20312             j += 2;
20313           }
20314         else
20315           {
20316             /* Emit a single word store.  */
20317             if (offset < 0)
20318               {
20319                 /* Allocate stack space for all saved registers.  */
20320                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20321                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20322                 mem = gen_frame_mem (SImode, tmp);
20323                 offset = 0;
20324               }
20325             else if (offset > 0)
20326               mem = gen_frame_mem (SImode,
20327                                    plus_constant (Pmode,
20328                                                   stack_pointer_rtx,
20329                                                   offset));
20330             else
20331               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20332 
20333             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20334             RTX_FRAME_RELATED_P (tmp) = 1;
20335             tmp = emit_insn (tmp);
20336 
20337             /* Record the first store insn.  */
20338             if (dwarf_index == 1)
20339               insn = tmp;
20340 
20341             /* Generate dwarf info.  */
20342             mem = gen_frame_mem (SImode,
20343                                  plus_constant(Pmode,
20344                                                stack_pointer_rtx,
20345                                                offset));
20346             tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20347             RTX_FRAME_RELATED_P (tmp) = 1;
20348             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20349 
20350             offset += 4;
20351             j += 1;
20352           }
20353       }
20354     else
20355       j++;
20356 
20357   /* Attach dwarf info to the first insn we generate.  */
20358   gcc_assert (insn != NULL_RTX);
20359   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20360   RTX_FRAME_RELATED_P (insn) = 1;
20361 }
20362 
20363 /* Generate and emit an insn that we will recognize as a push_multi.
20364    Unfortunately, since this insn does not reflect very well the actual
20365    semantics of the operation, we need to annotate the insn for the benefit
20366    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
20367    MASK for registers that should be annotated for DWARF2 frame unwind
20368    information.  */
20369 static rtx
emit_multi_reg_push(unsigned long mask,unsigned long dwarf_regs_mask)20370 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20371 {
20372   int num_regs = 0;
20373   int num_dwarf_regs = 0;
20374   int i, j;
20375   rtx par;
20376   rtx dwarf;
20377   int dwarf_par_index;
20378   rtx tmp, reg;
20379 
20380   /* We don't record the PC in the dwarf frame information.  */
20381   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20382 
20383   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20384     {
20385       if (mask & (1 << i))
20386 	num_regs++;
20387       if (dwarf_regs_mask & (1 << i))
20388 	num_dwarf_regs++;
20389     }
20390 
20391   gcc_assert (num_regs && num_regs <= 16);
20392   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20393 
20394   /* For the body of the insn we are going to generate an UNSPEC in
20395      parallel with several USEs.  This allows the insn to be recognized
20396      by the push_multi pattern in the arm.md file.
20397 
20398      The body of the insn looks something like this:
20399 
20400        (parallel [
20401            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20402 	                                (const_int:SI <num>)))
20403 	        (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20404            (use (reg:SI XX))
20405            (use (reg:SI YY))
20406 	   ...
20407         ])
20408 
20409      For the frame note however, we try to be more explicit and actually
20410      show each register being stored into the stack frame, plus a (single)
20411      decrement of the stack pointer.  We do it this way in order to be
20412      friendly to the stack unwinding code, which only wants to see a single
20413      stack decrement per instruction.  The RTL we generate for the note looks
20414      something like this:
20415 
20416       (sequence [
20417            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20418            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20419            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20420            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20421 	   ...
20422         ])
20423 
20424      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20425      instead we'd have a parallel expression detailing all
20426      the stores to the various memory addresses so that debug
20427      information is more up-to-date. Remember however while writing
20428      this to take care of the constraints with the push instruction.
20429 
20430      Note also that this has to be taken care of for the VFP registers.
20431 
20432      For more see PR43399.  */
20433 
20434   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20435   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20436   dwarf_par_index = 1;
20437 
20438   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20439     {
20440       if (mask & (1 << i))
20441 	{
20442 	  reg = gen_rtx_REG (SImode, i);
20443 
20444 	  XVECEXP (par, 0, 0)
20445 	    = gen_rtx_SET (gen_frame_mem
20446 			   (BLKmode,
20447 			    gen_rtx_PRE_MODIFY (Pmode,
20448 						stack_pointer_rtx,
20449 						plus_constant
20450 						(Pmode, stack_pointer_rtx,
20451 						 -4 * num_regs))
20452 			    ),
20453 			   gen_rtx_UNSPEC (BLKmode,
20454 					   gen_rtvec (1, reg),
20455 					   UNSPEC_PUSH_MULT));
20456 
20457 	  if (dwarf_regs_mask & (1 << i))
20458 	    {
20459 	      tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20460 				 reg);
20461 	      RTX_FRAME_RELATED_P (tmp) = 1;
20462 	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20463 	    }
20464 
20465 	  break;
20466 	}
20467     }
20468 
20469   for (j = 1, i++; j < num_regs; i++)
20470     {
20471       if (mask & (1 << i))
20472 	{
20473 	  reg = gen_rtx_REG (SImode, i);
20474 
20475 	  XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20476 
20477 	  if (dwarf_regs_mask & (1 << i))
20478 	    {
20479 	      tmp
20480 		= gen_rtx_SET (gen_frame_mem
20481 			       (SImode,
20482 				plus_constant (Pmode, stack_pointer_rtx,
20483 					       4 * j)),
20484 			       reg);
20485 	      RTX_FRAME_RELATED_P (tmp) = 1;
20486 	      XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20487 	    }
20488 
20489 	  j++;
20490 	}
20491     }
20492 
20493   par = emit_insn (par);
20494 
20495   tmp = gen_rtx_SET (stack_pointer_rtx,
20496 		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20497   RTX_FRAME_RELATED_P (tmp) = 1;
20498   XVECEXP (dwarf, 0, 0) = tmp;
20499 
20500   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20501 
20502   return par;
20503 }
20504 
20505 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20506    SIZE is the offset to be adjusted.
20507    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20508 static void
arm_add_cfa_adjust_cfa_note(rtx insn,int size,rtx dest,rtx src)20509 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20510 {
20511   rtx dwarf;
20512 
20513   RTX_FRAME_RELATED_P (insn) = 1;
20514   dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20515   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20516 }
20517 
20518 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20519    SAVED_REGS_MASK shows which registers need to be restored.
20520 
20521    Unfortunately, since this insn does not reflect very well the actual
20522    semantics of the operation, we need to annotate the insn for the benefit
20523    of DWARF2 frame unwind information.  */
20524 static void
arm_emit_multi_reg_pop(unsigned long saved_regs_mask)20525 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20526 {
20527   int num_regs = 0;
20528   int i, j;
20529   rtx par;
20530   rtx dwarf = NULL_RTX;
20531   rtx tmp, reg;
20532   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20533   int offset_adj;
20534   int emit_update;
20535 
20536   offset_adj = return_in_pc ? 1 : 0;
20537   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20538     if (saved_regs_mask & (1 << i))
20539       num_regs++;
20540 
20541   gcc_assert (num_regs && num_regs <= 16);
20542 
20543   /* If SP is in reglist, then we don't emit SP update insn.  */
20544   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20545 
20546   /* The parallel needs to hold num_regs SETs
20547      and one SET for the stack update.  */
20548   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20549 
20550   if (return_in_pc)
20551     XVECEXP (par, 0, 0) = ret_rtx;
20552 
20553   if (emit_update)
20554     {
20555       /* Increment the stack pointer, based on there being
20556          num_regs 4-byte registers to restore.  */
20557       tmp = gen_rtx_SET (stack_pointer_rtx,
20558                          plus_constant (Pmode,
20559                                         stack_pointer_rtx,
20560                                         4 * num_regs));
20561       RTX_FRAME_RELATED_P (tmp) = 1;
20562       XVECEXP (par, 0, offset_adj) = tmp;
20563     }
20564 
20565   /* Now restore every reg, which may include PC.  */
20566   for (j = 0, i = 0; j < num_regs; i++)
20567     if (saved_regs_mask & (1 << i))
20568       {
20569         reg = gen_rtx_REG (SImode, i);
20570         if ((num_regs == 1) && emit_update && !return_in_pc)
20571           {
20572             /* Emit single load with writeback.  */
20573             tmp = gen_frame_mem (SImode,
20574                                  gen_rtx_POST_INC (Pmode,
20575                                                    stack_pointer_rtx));
20576             tmp = emit_insn (gen_rtx_SET (reg, tmp));
20577             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20578             return;
20579           }
20580 
20581         tmp = gen_rtx_SET (reg,
20582                            gen_frame_mem
20583                            (SImode,
20584                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20585         RTX_FRAME_RELATED_P (tmp) = 1;
20586         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20587 
20588         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20589            should not have PC, skip PC.  */
20590         if (i != PC_REGNUM)
20591           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20592 
20593         j++;
20594       }
20595 
20596   if (return_in_pc)
20597     par = emit_jump_insn (par);
20598   else
20599     par = emit_insn (par);
20600 
20601   REG_NOTES (par) = dwarf;
20602   if (!return_in_pc)
20603     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20604 				 stack_pointer_rtx, stack_pointer_rtx);
20605 }
20606 
20607 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20608    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20609 
20610    Unfortunately, since this insn does not reflect very well the actual
20611    semantics of the operation, we need to annotate the insn for the benefit
20612    of DWARF2 frame unwind information.  */
20613 static void
arm_emit_vfp_multi_reg_pop(int first_reg,int num_regs,rtx base_reg)20614 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20615 {
20616   int i, j;
20617   rtx par;
20618   rtx dwarf = NULL_RTX;
20619   rtx tmp, reg;
20620 
20621   gcc_assert (num_regs && num_regs <= 32);
20622 
20623     /* Workaround ARM10 VFPr1 bug.  */
20624   if (num_regs == 2 && !arm_arch6)
20625     {
20626       if (first_reg == 15)
20627         first_reg--;
20628 
20629       num_regs++;
20630     }
20631 
20632   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20633      there could be up to 32 D-registers to restore.
20634      If there are more than 16 D-registers, make two recursive calls,
20635      each of which emits one pop_multi instruction.  */
20636   if (num_regs > 16)
20637     {
20638       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20639       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20640       return;
20641     }
20642 
20643   /* The parallel needs to hold num_regs SETs
20644      and one SET for the stack update.  */
20645   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20646 
20647   /* Increment the stack pointer, based on there being
20648      num_regs 8-byte registers to restore.  */
20649   tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20650   RTX_FRAME_RELATED_P (tmp) = 1;
20651   XVECEXP (par, 0, 0) = tmp;
20652 
20653   /* Now show every reg that will be restored, using a SET for each.  */
20654   for (j = 0, i=first_reg; j < num_regs; i += 2)
20655     {
20656       reg = gen_rtx_REG (DFmode, i);
20657 
20658       tmp = gen_rtx_SET (reg,
20659                          gen_frame_mem
20660                          (DFmode,
20661                           plus_constant (Pmode, base_reg, 8 * j)));
20662       RTX_FRAME_RELATED_P (tmp) = 1;
20663       XVECEXP (par, 0, j + 1) = tmp;
20664 
20665       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20666 
20667       j++;
20668     }
20669 
20670   par = emit_insn (par);
20671   REG_NOTES (par) = dwarf;
20672 
20673   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20674   if (REGNO (base_reg) == IP_REGNUM)
20675     {
20676       RTX_FRAME_RELATED_P (par) = 1;
20677       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20678     }
20679   else
20680     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20681 				 base_reg, base_reg);
20682 }
20683 
20684 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20685    number of registers are being popped, multiple LDRD patterns are created for
20686    all register pairs.  If odd number of registers are popped, last register is
20687    loaded by using LDR pattern.  */
20688 static void
thumb2_emit_ldrd_pop(unsigned long saved_regs_mask)20689 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20690 {
20691   int num_regs = 0;
20692   int i, j;
20693   rtx par = NULL_RTX;
20694   rtx dwarf = NULL_RTX;
20695   rtx tmp, reg, tmp1;
20696   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20697 
20698   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20699     if (saved_regs_mask & (1 << i))
20700       num_regs++;
20701 
20702   gcc_assert (num_regs && num_regs <= 16);
20703 
20704   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20705      to be popped.  So, if num_regs is even, now it will become odd,
20706      and we can generate pop with PC.  If num_regs is odd, it will be
20707      even now, and ldr with return can be generated for PC.  */
20708   if (return_in_pc)
20709     num_regs--;
20710 
20711   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20712 
20713   /* Var j iterates over all the registers to gather all the registers in
20714      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20715      A PARALLEL RTX of register-pair is created here, so that pattern for
20716      LDRD can be matched.  As PC is always last register to be popped, and
20717      we have already decremented num_regs if PC, we don't have to worry
20718      about PC in this loop.  */
20719   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20720     if (saved_regs_mask & (1 << j))
20721       {
20722         /* Create RTX for memory load.  */
20723         reg = gen_rtx_REG (SImode, j);
20724         tmp = gen_rtx_SET (reg,
20725                            gen_frame_mem (SImode,
20726                                plus_constant (Pmode,
20727                                               stack_pointer_rtx, 4 * i)));
20728         RTX_FRAME_RELATED_P (tmp) = 1;
20729 
20730         if (i % 2 == 0)
20731           {
20732             /* When saved-register index (i) is even, the RTX to be emitted is
20733                yet to be created.  Hence create it first.  The LDRD pattern we
20734                are generating is :
20735                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20736                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20737                where target registers need not be consecutive.  */
20738             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20739             dwarf = NULL_RTX;
20740           }
20741 
20742         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20743            added as 0th element and if i is odd, reg_i is added as 1st element
20744            of LDRD pattern shown above.  */
20745         XVECEXP (par, 0, (i % 2)) = tmp;
20746         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20747 
20748         if ((i % 2) == 1)
20749           {
20750             /* When saved-register index (i) is odd, RTXs for both the registers
20751                to be loaded are generated in above given LDRD pattern, and the
20752                pattern can be emitted now.  */
20753             par = emit_insn (par);
20754             REG_NOTES (par) = dwarf;
20755 	    RTX_FRAME_RELATED_P (par) = 1;
20756           }
20757 
20758         i++;
20759       }
20760 
20761   /* If the number of registers pushed is odd AND return_in_pc is false OR
20762      number of registers are even AND return_in_pc is true, last register is
20763      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20764      then LDR with post increment.  */
20765 
20766   /* Increment the stack pointer, based on there being
20767      num_regs 4-byte registers to restore.  */
20768   tmp = gen_rtx_SET (stack_pointer_rtx,
20769                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20770   RTX_FRAME_RELATED_P (tmp) = 1;
20771   tmp = emit_insn (tmp);
20772   if (!return_in_pc)
20773     {
20774       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20775 				   stack_pointer_rtx, stack_pointer_rtx);
20776     }
20777 
20778   dwarf = NULL_RTX;
20779 
20780   if (((num_regs % 2) == 1 && !return_in_pc)
20781       || ((num_regs % 2) == 0 && return_in_pc))
20782     {
20783       /* Scan for the single register to be popped.  Skip until the saved
20784          register is found.  */
20785       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20786 
20787       /* Gen LDR with post increment here.  */
20788       tmp1 = gen_rtx_MEM (SImode,
20789                           gen_rtx_POST_INC (SImode,
20790                                             stack_pointer_rtx));
20791       set_mem_alias_set (tmp1, get_frame_alias_set ());
20792 
20793       reg = gen_rtx_REG (SImode, j);
20794       tmp = gen_rtx_SET (reg, tmp1);
20795       RTX_FRAME_RELATED_P (tmp) = 1;
20796       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20797 
20798       if (return_in_pc)
20799         {
20800           /* If return_in_pc, j must be PC_REGNUM.  */
20801           gcc_assert (j == PC_REGNUM);
20802           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20803           XVECEXP (par, 0, 0) = ret_rtx;
20804           XVECEXP (par, 0, 1) = tmp;
20805           par = emit_jump_insn (par);
20806         }
20807       else
20808         {
20809           par = emit_insn (tmp);
20810 	  REG_NOTES (par) = dwarf;
20811 	  arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20812 				       stack_pointer_rtx, stack_pointer_rtx);
20813         }
20814 
20815     }
20816   else if ((num_regs % 2) == 1 && return_in_pc)
20817     {
20818       /* There are 2 registers to be popped.  So, generate the pattern
20819          pop_multiple_with_stack_update_and_return to pop in PC.  */
20820       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20821     }
20822 
20823   return;
20824 }
20825 
20826 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20827    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20828    offset addressing and then generates one separate stack udpate. This provides
20829    more scheduling freedom, compared to writeback on every load.  However,
20830    if the function returns using load into PC directly
20831    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20832    before the last load.  TODO: Add a peephole optimization to recognize
20833    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20834    peephole optimization to merge the load at stack-offset zero
20835    with the stack update instruction using load with writeback
20836    in post-index addressing mode.  */
20837 static void
arm_emit_ldrd_pop(unsigned long saved_regs_mask)20838 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20839 {
20840   int j = 0;
20841   int offset = 0;
20842   rtx par = NULL_RTX;
20843   rtx dwarf = NULL_RTX;
20844   rtx tmp, mem;
20845 
20846   /* Restore saved registers.  */
20847   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20848   j = 0;
20849   while (j <= LAST_ARM_REGNUM)
20850     if (saved_regs_mask & (1 << j))
20851       {
20852         if ((j % 2) == 0
20853             && (saved_regs_mask & (1 << (j + 1)))
20854             && (j + 1) != PC_REGNUM)
20855           {
20856             /* Current register and next register form register pair for which
20857                LDRD can be generated. PC is always the last register popped, and
20858                we handle it separately.  */
20859             if (offset > 0)
20860               mem = gen_frame_mem (DImode,
20861                                    plus_constant (Pmode,
20862                                                   stack_pointer_rtx,
20863                                                   offset));
20864             else
20865               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20866 
20867             tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20868             tmp = emit_insn (tmp);
20869 	    RTX_FRAME_RELATED_P (tmp) = 1;
20870 
20871             /* Generate dwarf info.  */
20872 
20873             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20874                                     gen_rtx_REG (SImode, j),
20875                                     NULL_RTX);
20876             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20877                                     gen_rtx_REG (SImode, j + 1),
20878                                     dwarf);
20879 
20880             REG_NOTES (tmp) = dwarf;
20881 
20882             offset += 8;
20883             j += 2;
20884           }
20885         else if (j != PC_REGNUM)
20886           {
20887             /* Emit a single word load.  */
20888             if (offset > 0)
20889               mem = gen_frame_mem (SImode,
20890                                    plus_constant (Pmode,
20891                                                   stack_pointer_rtx,
20892                                                   offset));
20893             else
20894               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20895 
20896             tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20897             tmp = emit_insn (tmp);
20898 	    RTX_FRAME_RELATED_P (tmp) = 1;
20899 
20900             /* Generate dwarf info.  */
20901             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20902                                               gen_rtx_REG (SImode, j),
20903                                               NULL_RTX);
20904 
20905             offset += 4;
20906             j += 1;
20907           }
20908         else /* j == PC_REGNUM */
20909           j++;
20910       }
20911     else
20912       j++;
20913 
20914   /* Update the stack.  */
20915   if (offset > 0)
20916     {
20917       tmp = gen_rtx_SET (stack_pointer_rtx,
20918                          plus_constant (Pmode,
20919                                         stack_pointer_rtx,
20920                                         offset));
20921       tmp = emit_insn (tmp);
20922       arm_add_cfa_adjust_cfa_note (tmp, offset,
20923 				   stack_pointer_rtx, stack_pointer_rtx);
20924       offset = 0;
20925     }
20926 
20927   if (saved_regs_mask & (1 << PC_REGNUM))
20928     {
20929       /* Only PC is to be popped.  */
20930       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20931       XVECEXP (par, 0, 0) = ret_rtx;
20932       tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20933                          gen_frame_mem (SImode,
20934                                         gen_rtx_POST_INC (SImode,
20935                                                           stack_pointer_rtx)));
20936       RTX_FRAME_RELATED_P (tmp) = 1;
20937       XVECEXP (par, 0, 1) = tmp;
20938       par = emit_jump_insn (par);
20939 
20940       /* Generate dwarf info.  */
20941       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20942                               gen_rtx_REG (SImode, PC_REGNUM),
20943                               NULL_RTX);
20944       REG_NOTES (par) = dwarf;
20945       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20946 				   stack_pointer_rtx, stack_pointer_rtx);
20947     }
20948 }
20949 
20950 /* Calculate the size of the return value that is passed in registers.  */
20951 static unsigned
arm_size_return_regs(void)20952 arm_size_return_regs (void)
20953 {
20954   machine_mode mode;
20955 
20956   if (crtl->return_rtx != 0)
20957     mode = GET_MODE (crtl->return_rtx);
20958   else
20959     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20960 
20961   return GET_MODE_SIZE (mode);
20962 }
20963 
20964 /* Return true if the current function needs to save/restore LR.  */
20965 static bool
thumb_force_lr_save(void)20966 thumb_force_lr_save (void)
20967 {
20968   return !cfun->machine->lr_save_eliminated
20969 	 && (!crtl->is_leaf
20970 	     || thumb_far_jump_used_p ()
20971 	     || df_regs_ever_live_p (LR_REGNUM));
20972 }
20973 
20974 /* We do not know if r3 will be available because
20975    we do have an indirect tailcall happening in this
20976    particular case.  */
20977 static bool
is_indirect_tailcall_p(rtx call)20978 is_indirect_tailcall_p (rtx call)
20979 {
20980   rtx pat = PATTERN (call);
20981 
20982   /* Indirect tail call.  */
20983   pat = XVECEXP (pat, 0, 0);
20984   if (GET_CODE (pat) == SET)
20985     pat = SET_SRC (pat);
20986 
20987   pat = XEXP (XEXP (pat, 0), 0);
20988   return REG_P (pat);
20989 }
20990 
20991 /* Return true if r3 is used by any of the tail call insns in the
20992    current function.  */
20993 static bool
any_sibcall_could_use_r3(void)20994 any_sibcall_could_use_r3 (void)
20995 {
20996   edge_iterator ei;
20997   edge e;
20998 
20999   if (!crtl->tail_call_emit)
21000     return false;
21001   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21002     if (e->flags & EDGE_SIBCALL)
21003       {
21004 	rtx_insn *call = BB_END (e->src);
21005 	if (!CALL_P (call))
21006 	  call = prev_nonnote_nondebug_insn (call);
21007 	gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
21008 	if (find_regno_fusage (call, USE, 3)
21009 	    || is_indirect_tailcall_p (call))
21010 	  return true;
21011       }
21012   return false;
21013 }
21014 
21015 
21016 /* Compute the distance from register FROM to register TO.
21017    These can be the arg pointer (26), the soft frame pointer (25),
21018    the stack pointer (13) or the hard frame pointer (11).
21019    In thumb mode r7 is used as the soft frame pointer, if needed.
21020    Typical stack layout looks like this:
21021 
21022        old stack pointer -> |    |
21023                              ----
21024                             |    | \
21025                             |    |   saved arguments for
21026                             |    |   vararg functions
21027 			    |    | /
21028                               --
21029    hard FP & arg pointer -> |    | \
21030                             |    |   stack
21031                             |    |   frame
21032                             |    | /
21033                               --
21034                             |    | \
21035                             |    |   call saved
21036                             |    |   registers
21037       soft frame pointer -> |    | /
21038                               --
21039                             |    | \
21040                             |    |   local
21041                             |    |   variables
21042      locals base pointer -> |    | /
21043                               --
21044                             |    | \
21045                             |    |   outgoing
21046                             |    |   arguments
21047    current stack pointer -> |    | /
21048                               --
21049 
21050   For a given function some or all of these stack components
21051   may not be needed, giving rise to the possibility of
21052   eliminating some of the registers.
21053 
21054   The values returned by this function must reflect the behavior
21055   of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21056 
21057   The sign of the number returned reflects the direction of stack
21058   growth, so the values are positive for all eliminations except
21059   from the soft frame pointer to the hard frame pointer.
21060 
21061   SFP may point just inside the local variables block to ensure correct
21062   alignment.  */
21063 
21064 
21065 /* Return cached stack offsets.  */
21066 
21067 static arm_stack_offsets *
arm_get_frame_offsets(void)21068 arm_get_frame_offsets (void)
21069 {
21070   struct arm_stack_offsets *offsets;
21071 
21072   offsets = &cfun->machine->stack_offsets;
21073 
21074   return offsets;
21075 }
21076 
21077 
21078 /* Calculate stack offsets.  These are used to calculate register elimination
21079    offsets and in prologue/epilogue code.  Also calculates which registers
21080    should be saved.  */
21081 
21082 static void
arm_compute_frame_layout(void)21083 arm_compute_frame_layout (void)
21084 {
21085   struct arm_stack_offsets *offsets;
21086   unsigned long func_type;
21087   int saved;
21088   int core_saved;
21089   HOST_WIDE_INT frame_size;
21090   int i;
21091 
21092   offsets = &cfun->machine->stack_offsets;
21093 
21094   /* Initially this is the size of the local variables.  It will translated
21095      into an offset once we have determined the size of preceding data.  */
21096   frame_size = ROUND_UP_WORD (get_frame_size ());
21097 
21098   /* Space for variadic functions.  */
21099   offsets->saved_args = crtl->args.pretend_args_size;
21100 
21101   /* In Thumb mode this is incorrect, but never used.  */
21102   offsets->frame
21103     = (offsets->saved_args
21104        + arm_compute_static_chain_stack_bytes ()
21105        + (frame_pointer_needed ? 4 : 0));
21106 
21107   if (TARGET_32BIT)
21108     {
21109       unsigned int regno;
21110 
21111       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21112       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21113       saved = core_saved;
21114 
21115       /* We know that SP will be doubleword aligned on entry, and we must
21116 	 preserve that condition at any subroutine call.  We also require the
21117 	 soft frame pointer to be doubleword aligned.  */
21118 
21119       if (TARGET_REALLY_IWMMXT)
21120 	{
21121 	  /* Check for the call-saved iWMMXt registers.  */
21122 	  for (regno = FIRST_IWMMXT_REGNUM;
21123 	       regno <= LAST_IWMMXT_REGNUM;
21124 	       regno++)
21125 	    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21126 	      saved += 8;
21127 	}
21128 
21129       func_type = arm_current_func_type ();
21130       /* Space for saved VFP registers.  */
21131       if (! IS_VOLATILE (func_type)
21132 	  && TARGET_HARD_FLOAT)
21133 	saved += arm_get_vfp_saved_size ();
21134     }
21135   else /* TARGET_THUMB1 */
21136     {
21137       offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21138       core_saved = bit_count (offsets->saved_regs_mask) * 4;
21139       saved = core_saved;
21140       if (TARGET_BACKTRACE)
21141 	saved += 16;
21142     }
21143 
21144   /* Saved registers include the stack frame.  */
21145   offsets->saved_regs
21146     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21147   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21148 
21149   /* A leaf function does not need any stack alignment if it has nothing
21150      on the stack.  */
21151   if (crtl->is_leaf && frame_size == 0
21152       /* However if it calls alloca(), we have a dynamically allocated
21153 	 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
21154       && ! cfun->calls_alloca)
21155     {
21156       offsets->outgoing_args = offsets->soft_frame;
21157       offsets->locals_base = offsets->soft_frame;
21158       return;
21159     }
21160 
21161   /* Ensure SFP has the correct alignment.  */
21162   if (ARM_DOUBLEWORD_ALIGN
21163       && (offsets->soft_frame & 7))
21164     {
21165       offsets->soft_frame += 4;
21166       /* Try to align stack by pushing an extra reg.  Don't bother doing this
21167          when there is a stack frame as the alignment will be rolled into
21168 	 the normal stack adjustment.  */
21169       if (frame_size + crtl->outgoing_args_size == 0)
21170 	{
21171 	  int reg = -1;
21172 
21173 	  /* Register r3 is caller-saved.  Normally it does not need to be
21174 	     saved on entry by the prologue.  However if we choose to save
21175 	     it for padding then we may confuse the compiler into thinking
21176 	     a prologue sequence is required when in fact it is not.  This
21177 	     will occur when shrink-wrapping if r3 is used as a scratch
21178 	     register and there are no other callee-saved writes.
21179 
21180 	     This situation can be avoided when other callee-saved registers
21181 	     are available and r3 is not mandatory if we choose a callee-saved
21182 	     register for padding.  */
21183 	  bool prefer_callee_reg_p = false;
21184 
21185 	  /* If it is safe to use r3, then do so.  This sometimes
21186 	     generates better code on Thumb-2 by avoiding the need to
21187 	     use 32-bit push/pop instructions.  */
21188           if (! any_sibcall_could_use_r3 ()
21189 	      && arm_size_return_regs () <= 12
21190 	      && (offsets->saved_regs_mask & (1 << 3)) == 0
21191 	      && (TARGET_THUMB2
21192 		  || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21193 	    {
21194 	      reg = 3;
21195 	      if (!TARGET_THUMB2)
21196 		prefer_callee_reg_p = true;
21197 	    }
21198 	  if (reg == -1
21199 	      || prefer_callee_reg_p)
21200 	    {
21201 	      for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21202 		{
21203 		  /* Avoid fixed registers; they may be changed at
21204 		     arbitrary times so it's unsafe to restore them
21205 		     during the epilogue.  */
21206 		  if (!fixed_regs[i]
21207 		      && (offsets->saved_regs_mask & (1 << i)) == 0)
21208 		    {
21209 		      reg = i;
21210 		      break;
21211 		    }
21212 		}
21213 	    }
21214 
21215 	  if (reg != -1)
21216 	    {
21217 	      offsets->saved_regs += 4;
21218 	      offsets->saved_regs_mask |= (1 << reg);
21219 	    }
21220 	}
21221     }
21222 
21223   offsets->locals_base = offsets->soft_frame + frame_size;
21224   offsets->outgoing_args = (offsets->locals_base
21225 			    + crtl->outgoing_args_size);
21226 
21227   if (ARM_DOUBLEWORD_ALIGN)
21228     {
21229       /* Ensure SP remains doubleword aligned.  */
21230       if (offsets->outgoing_args & 7)
21231 	offsets->outgoing_args += 4;
21232       gcc_assert (!(offsets->outgoing_args & 7));
21233     }
21234 }
21235 
21236 
21237 /* Calculate the relative offsets for the different stack pointers.  Positive
21238    offsets are in the direction of stack growth.  */
21239 
21240 HOST_WIDE_INT
arm_compute_initial_elimination_offset(unsigned int from,unsigned int to)21241 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21242 {
21243   arm_stack_offsets *offsets;
21244 
21245   offsets = arm_get_frame_offsets ();
21246 
21247   /* OK, now we have enough information to compute the distances.
21248      There must be an entry in these switch tables for each pair
21249      of registers in ELIMINABLE_REGS, even if some of the entries
21250      seem to be redundant or useless.  */
21251   switch (from)
21252     {
21253     case ARG_POINTER_REGNUM:
21254       switch (to)
21255 	{
21256 	case THUMB_HARD_FRAME_POINTER_REGNUM:
21257 	  return 0;
21258 
21259 	case FRAME_POINTER_REGNUM:
21260 	  /* This is the reverse of the soft frame pointer
21261 	     to hard frame pointer elimination below.  */
21262 	  return offsets->soft_frame - offsets->saved_args;
21263 
21264 	case ARM_HARD_FRAME_POINTER_REGNUM:
21265 	  /* This is only non-zero in the case where the static chain register
21266 	     is stored above the frame.  */
21267 	  return offsets->frame - offsets->saved_args - 4;
21268 
21269 	case STACK_POINTER_REGNUM:
21270 	  /* If nothing has been pushed on the stack at all
21271 	     then this will return -4.  This *is* correct!  */
21272 	  return offsets->outgoing_args - (offsets->saved_args + 4);
21273 
21274 	default:
21275 	  gcc_unreachable ();
21276 	}
21277       gcc_unreachable ();
21278 
21279     case FRAME_POINTER_REGNUM:
21280       switch (to)
21281 	{
21282 	case THUMB_HARD_FRAME_POINTER_REGNUM:
21283 	  return 0;
21284 
21285 	case ARM_HARD_FRAME_POINTER_REGNUM:
21286 	  /* The hard frame pointer points to the top entry in the
21287 	     stack frame.  The soft frame pointer to the bottom entry
21288 	     in the stack frame.  If there is no stack frame at all,
21289 	     then they are identical.  */
21290 
21291 	  return offsets->frame - offsets->soft_frame;
21292 
21293 	case STACK_POINTER_REGNUM:
21294 	  return offsets->outgoing_args - offsets->soft_frame;
21295 
21296 	default:
21297 	  gcc_unreachable ();
21298 	}
21299       gcc_unreachable ();
21300 
21301     default:
21302       /* You cannot eliminate from the stack pointer.
21303 	 In theory you could eliminate from the hard frame
21304 	 pointer to the stack pointer, but this will never
21305 	 happen, since if a stack frame is not needed the
21306 	 hard frame pointer will never be used.  */
21307       gcc_unreachable ();
21308     }
21309 }
21310 
21311 /* Given FROM and TO register numbers, say whether this elimination is
21312    allowed.  Frame pointer elimination is automatically handled.
21313 
21314    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
21315    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
21316    pointer, we must eliminate FRAME_POINTER_REGNUM into
21317    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21318    ARG_POINTER_REGNUM.  */
21319 
21320 bool
arm_can_eliminate(const int from,const int to)21321 arm_can_eliminate (const int from, const int to)
21322 {
21323   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21324           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21325           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21326           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21327            true);
21328 }
21329 
21330 /* Emit RTL to save coprocessor registers on function entry.  Returns the
21331    number of bytes pushed.  */
21332 
21333 static int
arm_save_coproc_regs(void)21334 arm_save_coproc_regs(void)
21335 {
21336   int saved_size = 0;
21337   unsigned reg;
21338   unsigned start_reg;
21339   rtx insn;
21340 
21341   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21342     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21343       {
21344 	insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21345 	insn = gen_rtx_MEM (V2SImode, insn);
21346 	insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21347 	RTX_FRAME_RELATED_P (insn) = 1;
21348 	saved_size += 8;
21349       }
21350 
21351   if (TARGET_HARD_FLOAT)
21352     {
21353       start_reg = FIRST_VFP_REGNUM;
21354 
21355       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21356 	{
21357 	  if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21358 	      && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21359 	    {
21360 	      if (start_reg != reg)
21361 		saved_size += vfp_emit_fstmd (start_reg,
21362 					      (reg - start_reg) / 2);
21363 	      start_reg = reg + 2;
21364 	    }
21365 	}
21366       if (start_reg != reg)
21367 	saved_size += vfp_emit_fstmd (start_reg,
21368 				      (reg - start_reg) / 2);
21369     }
21370   return saved_size;
21371 }
21372 
21373 
21374 /* Set the Thumb frame pointer from the stack pointer.  */
21375 
21376 static void
thumb_set_frame_pointer(arm_stack_offsets * offsets)21377 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21378 {
21379   HOST_WIDE_INT amount;
21380   rtx insn, dwarf;
21381 
21382   amount = offsets->outgoing_args - offsets->locals_base;
21383   if (amount < 1024)
21384     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21385 				  stack_pointer_rtx, GEN_INT (amount)));
21386   else
21387     {
21388       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21389       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21390          expects the first two operands to be the same.  */
21391       if (TARGET_THUMB2)
21392 	{
21393 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21394 					stack_pointer_rtx,
21395 					hard_frame_pointer_rtx));
21396 	}
21397       else
21398 	{
21399 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21400 					hard_frame_pointer_rtx,
21401 					stack_pointer_rtx));
21402 	}
21403       dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21404 			   plus_constant (Pmode, stack_pointer_rtx, amount));
21405       RTX_FRAME_RELATED_P (dwarf) = 1;
21406       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21407     }
21408 
21409   RTX_FRAME_RELATED_P (insn) = 1;
21410 }
21411 
21412 struct scratch_reg {
21413   rtx reg;
21414   bool saved;
21415 };
21416 
21417 /* Return a short-lived scratch register for use as a 2nd scratch register on
21418    function entry after the registers are saved in the prologue.  This register
21419    must be released by means of release_scratch_register_on_entry.  IP is not
21420    considered since it is always used as the 1st scratch register if available.
21421 
21422    REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21423    mask of live registers.  */
21424 
21425 static void
get_scratch_register_on_entry(struct scratch_reg * sr,unsigned int regno1,unsigned long live_regs)21426 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21427 			       unsigned long live_regs)
21428 {
21429   int regno = -1;
21430 
21431   sr->saved = false;
21432 
21433   if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21434     regno = LR_REGNUM;
21435   else
21436     {
21437       unsigned int i;
21438 
21439       for (i = 4; i < 11; i++)
21440 	if (regno1 != i && (live_regs & (1 << i)) != 0)
21441 	  {
21442 	    regno = i;
21443 	    break;
21444 	  }
21445 
21446       if (regno < 0)
21447 	{
21448 	  /* If IP is used as the 1st scratch register for a nested function,
21449 	     then either r3 wasn't available or is used to preserve IP.  */
21450 	  if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21451 	    regno1 = 3;
21452 	  regno = (regno1 == 3 ? 2 : 3);
21453 	  sr->saved
21454 	    = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21455 			       regno);
21456 	}
21457     }
21458 
21459   sr->reg = gen_rtx_REG (SImode, regno);
21460   if (sr->saved)
21461     {
21462       rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21463       rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21464       rtx x = gen_rtx_SET (stack_pointer_rtx,
21465 		           plus_constant (Pmode, stack_pointer_rtx, -4));
21466       RTX_FRAME_RELATED_P (insn) = 1;
21467       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21468     }
21469 }
21470 
21471 /* Release a scratch register obtained from the preceding function.  */
21472 
21473 static void
release_scratch_register_on_entry(struct scratch_reg * sr)21474 release_scratch_register_on_entry (struct scratch_reg *sr)
21475 {
21476   if (sr->saved)
21477     {
21478       rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21479       rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21480       rtx x = gen_rtx_SET (stack_pointer_rtx,
21481 			   plus_constant (Pmode, stack_pointer_rtx, 4));
21482       RTX_FRAME_RELATED_P (insn) = 1;
21483       add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21484     }
21485 }
21486 
21487 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21488 
21489 #if PROBE_INTERVAL > 4096
21490 #error Cannot use indexed addressing mode for stack probing
21491 #endif
21492 
21493 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21494    inclusive.  These are offsets from the current stack pointer.  REGNO1
21495    is the index number of the 1st scratch register and LIVE_REGS is the
21496    mask of live registers.  */
21497 
21498 static void
arm_emit_probe_stack_range(HOST_WIDE_INT first,HOST_WIDE_INT size,unsigned int regno1,unsigned long live_regs)21499 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21500 			    unsigned int regno1, unsigned long live_regs)
21501 {
21502   rtx reg1 = gen_rtx_REG (Pmode, regno1);
21503 
21504   /* See if we have a constant small number of probes to generate.  If so,
21505      that's the easy case.  */
21506   if (size <= PROBE_INTERVAL)
21507     {
21508       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21509       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21510       emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21511     }
21512 
21513   /* The run-time loop is made up of 10 insns in the generic case while the
21514      compile-time loop is made up of 4+2*(n-2) insns for n # of intervals.  */
21515   else if (size <= 5 * PROBE_INTERVAL)
21516     {
21517       HOST_WIDE_INT i, rem;
21518 
21519       emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21520       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21521       emit_stack_probe (reg1);
21522 
21523       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21524 	 it exceeds SIZE.  If only two probes are needed, this will not
21525 	 generate any code.  Then probe at FIRST + SIZE.  */
21526       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21527 	{
21528 	  emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21529 	  emit_stack_probe (reg1);
21530 	}
21531 
21532       rem = size - (i - PROBE_INTERVAL);
21533       if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21534 	{
21535 	  emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21536 	  emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21537 	}
21538       else
21539 	emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21540     }
21541 
21542   /* Otherwise, do the same as above, but in a loop.  Note that we must be
21543      extra careful with variables wrapping around because we might be at
21544      the very top (or the very bottom) of the address space and we have
21545      to be able to handle this case properly; in particular, we use an
21546      equality test for the loop condition.  */
21547   else
21548     {
21549       HOST_WIDE_INT rounded_size;
21550       struct scratch_reg sr;
21551 
21552       get_scratch_register_on_entry (&sr, regno1, live_regs);
21553 
21554       emit_move_insn (reg1, GEN_INT (first));
21555 
21556 
21557       /* Step 1: round SIZE to the previous multiple of the interval.  */
21558 
21559       rounded_size = size & -PROBE_INTERVAL;
21560       emit_move_insn (sr.reg, GEN_INT (rounded_size));
21561 
21562 
21563       /* Step 2: compute initial and final value of the loop counter.  */
21564 
21565       /* TEST_ADDR = SP + FIRST.  */
21566       emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21567 
21568       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
21569       emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21570 
21571 
21572       /* Step 3: the loop
21573 
21574 	 do
21575 	   {
21576 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21577 	     probe at TEST_ADDR
21578 	   }
21579 	 while (TEST_ADDR != LAST_ADDR)
21580 
21581 	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21582 	 until it is equal to ROUNDED_SIZE.  */
21583 
21584       emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21585 
21586 
21587       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21588 	 that SIZE is equal to ROUNDED_SIZE.  */
21589 
21590       if (size != rounded_size)
21591 	{
21592 	  HOST_WIDE_INT rem = size - rounded_size;
21593 
21594 	  if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21595 	    {
21596 	      emit_set_insn (sr.reg,
21597 			     plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21598 	      emit_stack_probe (plus_constant (Pmode, sr.reg,
21599 					       PROBE_INTERVAL - rem));
21600 	    }
21601 	  else
21602 	    emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21603 	}
21604 
21605       release_scratch_register_on_entry (&sr);
21606     }
21607 
21608   /* Make sure nothing is scheduled before we are done.  */
21609   emit_insn (gen_blockage ());
21610 }
21611 
21612 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
21613    absolute addresses.  */
21614 
21615 const char *
output_probe_stack_range(rtx reg1,rtx reg2)21616 output_probe_stack_range (rtx reg1, rtx reg2)
21617 {
21618   static int labelno = 0;
21619   char loop_lab[32];
21620   rtx xops[2];
21621 
21622   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21623 
21624   /* Loop.  */
21625   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21626 
21627   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
21628   xops[0] = reg1;
21629   xops[1] = GEN_INT (PROBE_INTERVAL);
21630   output_asm_insn ("sub\t%0, %0, %1", xops);
21631 
21632   /* Probe at TEST_ADDR.  */
21633   output_asm_insn ("str\tr0, [%0, #0]", xops);
21634 
21635   /* Test if TEST_ADDR == LAST_ADDR.  */
21636   xops[1] = reg2;
21637   output_asm_insn ("cmp\t%0, %1", xops);
21638 
21639   /* Branch.  */
21640   fputs ("\tbne\t", asm_out_file);
21641   assemble_name_raw (asm_out_file, loop_lab);
21642   fputc ('\n', asm_out_file);
21643 
21644   return "";
21645 }
21646 
21647 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21648    function.  */
21649 void
arm_expand_prologue(void)21650 arm_expand_prologue (void)
21651 {
21652   rtx amount;
21653   rtx insn;
21654   rtx ip_rtx;
21655   unsigned long live_regs_mask;
21656   unsigned long func_type;
21657   int fp_offset = 0;
21658   int saved_pretend_args = 0;
21659   int saved_regs = 0;
21660   unsigned HOST_WIDE_INT args_to_push;
21661   HOST_WIDE_INT size;
21662   arm_stack_offsets *offsets;
21663   bool clobber_ip;
21664 
21665   func_type = arm_current_func_type ();
21666 
21667   /* Naked functions don't have prologues.  */
21668   if (IS_NAKED (func_type))
21669     {
21670       if (flag_stack_usage_info)
21671 	current_function_static_stack_size = 0;
21672       return;
21673     }
21674 
21675   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21676   args_to_push = crtl->args.pretend_args_size;
21677 
21678   /* Compute which register we will have to save onto the stack.  */
21679   offsets = arm_get_frame_offsets ();
21680   live_regs_mask = offsets->saved_regs_mask;
21681 
21682   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21683 
21684   if (IS_STACKALIGN (func_type))
21685     {
21686       rtx r0, r1;
21687 
21688       /* Handle a word-aligned stack pointer.  We generate the following:
21689 
21690 	  mov r0, sp
21691 	  bic r1, r0, #7
21692 	  mov sp, r1
21693 	  <save and restore r0 in normal prologue/epilogue>
21694 	  mov sp, r0
21695 	  bx lr
21696 
21697 	 The unwinder doesn't need to know about the stack realignment.
21698 	 Just tell it we saved SP in r0.  */
21699       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21700 
21701       r0 = gen_rtx_REG (SImode, R0_REGNUM);
21702       r1 = gen_rtx_REG (SImode, R1_REGNUM);
21703 
21704       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21705       RTX_FRAME_RELATED_P (insn) = 1;
21706       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21707 
21708       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21709 
21710       /* ??? The CFA changes here, which may cause GDB to conclude that it
21711 	 has entered a different function.  That said, the unwind info is
21712 	 correct, individually, before and after this instruction because
21713 	 we've described the save of SP, which will override the default
21714 	 handling of SP as restoring from the CFA.  */
21715       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21716     }
21717 
21718   /* Let's compute the static_chain_stack_bytes required and store it.  Right
21719      now the value must be -1 as stored by arm_init_machine_status ().  */
21720   cfun->machine->static_chain_stack_bytes
21721     = arm_compute_static_chain_stack_bytes ();
21722 
21723   /* The static chain register is the same as the IP register.  If it is
21724      clobbered when creating the frame, we need to save and restore it.  */
21725   clobber_ip = IS_NESTED (func_type)
21726 	       && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21727 		   || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21728 			|| flag_stack_clash_protection)
21729 		       && !df_regs_ever_live_p (LR_REGNUM)
21730 		       && arm_r3_live_at_start_p ()));
21731 
21732   /* Find somewhere to store IP whilst the frame is being created.
21733      We try the following places in order:
21734 
21735        1. The last argument register r3 if it is available.
21736        2. A slot on the stack above the frame if there are no
21737 	  arguments to push onto the stack.
21738        3. Register r3 again, after pushing the argument registers
21739 	  onto the stack, if this is a varargs function.
21740        4. The last slot on the stack created for the arguments to
21741 	  push, if this isn't a varargs function.
21742 
21743      Note - we only need to tell the dwarf2 backend about the SP
21744      adjustment in the second variant; the static chain register
21745      doesn't need to be unwound, as it doesn't contain a value
21746      inherited from the caller.  */
21747   if (clobber_ip)
21748     {
21749       if (!arm_r3_live_at_start_p ())
21750 	insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21751       else if (args_to_push == 0)
21752 	{
21753 	  rtx addr, dwarf;
21754 
21755 	  gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21756 	  saved_regs += 4;
21757 
21758 	  addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21759 	  insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21760 	  fp_offset = 4;
21761 
21762 	  /* Just tell the dwarf backend that we adjusted SP.  */
21763 	  dwarf = gen_rtx_SET (stack_pointer_rtx,
21764 			       plus_constant (Pmode, stack_pointer_rtx,
21765 					      -fp_offset));
21766 	  RTX_FRAME_RELATED_P (insn) = 1;
21767 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21768 	}
21769       else
21770 	{
21771 	  /* Store the args on the stack.  */
21772 	  if (cfun->machine->uses_anonymous_args)
21773 	    {
21774 	      insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21775 					  (0xf0 >> (args_to_push / 4)) & 0xf);
21776 	      emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21777 	      saved_pretend_args = 1;
21778 	    }
21779 	  else
21780 	    {
21781 	      rtx addr, dwarf;
21782 
21783 	      if (args_to_push == 4)
21784 		addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21785 	      else
21786 		addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21787 					   plus_constant (Pmode,
21788 							  stack_pointer_rtx,
21789 							  -args_to_push));
21790 
21791 	      insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21792 
21793 	      /* Just tell the dwarf backend that we adjusted SP.  */
21794 	      dwarf = gen_rtx_SET (stack_pointer_rtx,
21795 				   plus_constant (Pmode, stack_pointer_rtx,
21796 						  -args_to_push));
21797 	      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21798 	    }
21799 
21800 	  RTX_FRAME_RELATED_P (insn) = 1;
21801 	  fp_offset = args_to_push;
21802 	  args_to_push = 0;
21803 	}
21804     }
21805 
21806   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21807     {
21808       if (IS_INTERRUPT (func_type))
21809 	{
21810 	  /* Interrupt functions must not corrupt any registers.
21811 	     Creating a frame pointer however, corrupts the IP
21812 	     register, so we must push it first.  */
21813 	  emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21814 
21815 	  /* Do not set RTX_FRAME_RELATED_P on this insn.
21816 	     The dwarf stack unwinding code only wants to see one
21817 	     stack decrement per function, and this is not it.  If
21818 	     this instruction is labeled as being part of the frame
21819 	     creation sequence then dwarf2out_frame_debug_expr will
21820 	     die when it encounters the assignment of IP to FP
21821 	     later on, since the use of SP here establishes SP as
21822 	     the CFA register and not IP.
21823 
21824 	     Anyway this instruction is not really part of the stack
21825 	     frame creation although it is part of the prologue.  */
21826 	}
21827 
21828       insn = emit_set_insn (ip_rtx,
21829 			    plus_constant (Pmode, stack_pointer_rtx,
21830 					   fp_offset));
21831       RTX_FRAME_RELATED_P (insn) = 1;
21832     }
21833 
21834   if (args_to_push)
21835     {
21836       /* Push the argument registers, or reserve space for them.  */
21837       if (cfun->machine->uses_anonymous_args)
21838 	insn = emit_multi_reg_push
21839 	  ((0xf0 >> (args_to_push / 4)) & 0xf,
21840 	   (0xf0 >> (args_to_push / 4)) & 0xf);
21841       else
21842 	insn = emit_insn
21843 	  (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21844 		       GEN_INT (- args_to_push)));
21845       RTX_FRAME_RELATED_P (insn) = 1;
21846     }
21847 
21848   /* If this is an interrupt service routine, and the link register
21849      is going to be pushed, and we're not generating extra
21850      push of IP (needed when frame is needed and frame layout if apcs),
21851      subtracting four from LR now will mean that the function return
21852      can be done with a single instruction.  */
21853   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21854       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21855       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21856       && TARGET_ARM)
21857     {
21858       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21859 
21860       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21861     }
21862 
21863   if (live_regs_mask)
21864     {
21865       unsigned long dwarf_regs_mask = live_regs_mask;
21866 
21867       saved_regs += bit_count (live_regs_mask) * 4;
21868       if (optimize_size && !frame_pointer_needed
21869 	  && saved_regs == offsets->saved_regs - offsets->saved_args)
21870 	{
21871 	  /* If no coprocessor registers are being pushed and we don't have
21872 	     to worry about a frame pointer then push extra registers to
21873 	     create the stack frame.  This is done in a way that does not
21874 	     alter the frame layout, so is independent of the epilogue.  */
21875 	  int n;
21876 	  int frame;
21877 	  n = 0;
21878 	  while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21879 	    n++;
21880 	  frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21881 	  if (frame && n * 4 >= frame)
21882 	    {
21883 	      n = frame / 4;
21884 	      live_regs_mask |= (1 << n) - 1;
21885 	      saved_regs += frame;
21886 	    }
21887 	}
21888 
21889       if (TARGET_LDRD
21890 	  && current_tune->prefer_ldrd_strd
21891           && !optimize_function_for_size_p (cfun))
21892         {
21893 	  gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21894           if (TARGET_THUMB2)
21895 	    thumb2_emit_strd_push (live_regs_mask);
21896           else if (TARGET_ARM
21897                    && !TARGET_APCS_FRAME
21898                    && !IS_INTERRUPT (func_type))
21899 	    arm_emit_strd_push (live_regs_mask);
21900           else
21901             {
21902 	      insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21903               RTX_FRAME_RELATED_P (insn) = 1;
21904             }
21905         }
21906       else
21907         {
21908 	  insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21909           RTX_FRAME_RELATED_P (insn) = 1;
21910         }
21911     }
21912 
21913   if (! IS_VOLATILE (func_type))
21914     saved_regs += arm_save_coproc_regs ();
21915 
21916   if (frame_pointer_needed && TARGET_ARM)
21917     {
21918       /* Create the new frame pointer.  */
21919       if (TARGET_APCS_FRAME)
21920 	{
21921 	  insn = GEN_INT (-(4 + args_to_push + fp_offset));
21922 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21923 	  RTX_FRAME_RELATED_P (insn) = 1;
21924 	}
21925       else
21926 	{
21927 	  insn = GEN_INT (saved_regs - (4 + fp_offset));
21928 	  insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21929 					stack_pointer_rtx, insn));
21930 	  RTX_FRAME_RELATED_P (insn) = 1;
21931 	}
21932     }
21933 
21934   size = offsets->outgoing_args - offsets->saved_args;
21935   if (flag_stack_usage_info)
21936     current_function_static_stack_size = size;
21937 
21938   /* If this isn't an interrupt service routine and we have a frame, then do
21939      stack checking.  We use IP as the first scratch register, except for the
21940      non-APCS nested functions if LR or r3 are available (see clobber_ip).  */
21941   if (!IS_INTERRUPT (func_type)
21942       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21943 	  || flag_stack_clash_protection))
21944     {
21945       unsigned int regno;
21946 
21947       if (!IS_NESTED (func_type) || clobber_ip)
21948 	regno = IP_REGNUM;
21949       else if (df_regs_ever_live_p (LR_REGNUM))
21950 	regno = LR_REGNUM;
21951       else
21952 	regno = 3;
21953 
21954       if (crtl->is_leaf && !cfun->calls_alloca)
21955 	{
21956 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21957 	    arm_emit_probe_stack_range (get_stack_check_protect (),
21958 					size - get_stack_check_protect (),
21959 					regno, live_regs_mask);
21960 	}
21961       else if (size > 0)
21962 	arm_emit_probe_stack_range (get_stack_check_protect (), size,
21963 				    regno, live_regs_mask);
21964     }
21965 
21966   /* Recover the static chain register.  */
21967   if (clobber_ip)
21968     {
21969       if (!arm_r3_live_at_start_p () || saved_pretend_args)
21970 	insn = gen_rtx_REG (SImode, 3);
21971       else
21972 	{
21973 	  insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21974 	  insn = gen_frame_mem (SImode, insn);
21975 	}
21976       emit_set_insn (ip_rtx, insn);
21977       emit_insn (gen_force_register_use (ip_rtx));
21978     }
21979 
21980   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21981     {
21982       /* This add can produce multiple insns for a large constant, so we
21983 	 need to get tricky.  */
21984       rtx_insn *last = get_last_insn ();
21985 
21986       amount = GEN_INT (offsets->saved_args + saved_regs
21987 			- offsets->outgoing_args);
21988 
21989       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21990 				    amount));
21991       do
21992 	{
21993 	  last = last ? NEXT_INSN (last) : get_insns ();
21994 	  RTX_FRAME_RELATED_P (last) = 1;
21995 	}
21996       while (last != insn);
21997 
21998       /* If the frame pointer is needed, emit a special barrier that
21999 	 will prevent the scheduler from moving stores to the frame
22000 	 before the stack adjustment.  */
22001       if (frame_pointer_needed)
22002 	emit_insn (gen_stack_tie (stack_pointer_rtx,
22003 				  hard_frame_pointer_rtx));
22004     }
22005 
22006 
22007   if (frame_pointer_needed && TARGET_THUMB2)
22008     thumb_set_frame_pointer (offsets);
22009 
22010   if (flag_pic && arm_pic_register != INVALID_REGNUM)
22011     {
22012       unsigned long mask;
22013 
22014       mask = live_regs_mask;
22015       mask &= THUMB2_WORK_REGS;
22016       if (!IS_NESTED (func_type))
22017 	mask |= (1 << IP_REGNUM);
22018       arm_load_pic_register (mask);
22019     }
22020 
22021   /* If we are profiling, make sure no instructions are scheduled before
22022      the call to mcount.  Similarly if the user has requested no
22023      scheduling in the prolog.  Similarly if we want non-call exceptions
22024      using the EABI unwinder, to prevent faulting instructions from being
22025      swapped with a stack adjustment.  */
22026   if (crtl->profile || !TARGET_SCHED_PROLOG
22027       || (arm_except_unwind_info (&global_options) == UI_TARGET
22028 	  && cfun->can_throw_non_call_exceptions))
22029     emit_insn (gen_blockage ());
22030 
22031   /* If the link register is being kept alive, with the return address in it,
22032      then make sure that it does not get reused by the ce2 pass.  */
22033   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22034     cfun->machine->lr_save_eliminated = 1;
22035 }
22036 
22037 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
22038 static void
arm_print_condition(FILE * stream)22039 arm_print_condition (FILE *stream)
22040 {
22041   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22042     {
22043       /* Branch conversion is not implemented for Thumb-2.  */
22044       if (TARGET_THUMB)
22045 	{
22046 	  output_operand_lossage ("predicated Thumb instruction");
22047 	  return;
22048 	}
22049       if (current_insn_predicate != NULL)
22050 	{
22051 	  output_operand_lossage
22052 	    ("predicated instruction in conditional sequence");
22053 	  return;
22054 	}
22055 
22056       fputs (arm_condition_codes[arm_current_cc], stream);
22057     }
22058   else if (current_insn_predicate)
22059     {
22060       enum arm_cond_code code;
22061 
22062       if (TARGET_THUMB1)
22063 	{
22064 	  output_operand_lossage ("predicated Thumb instruction");
22065 	  return;
22066 	}
22067 
22068       code = get_arm_condition_code (current_insn_predicate);
22069       fputs (arm_condition_codes[code], stream);
22070     }
22071 }
22072 
22073 
22074 /* Globally reserved letters: acln
22075    Puncutation letters currently used: @_|?().!#
22076    Lower case letters currently used: bcdefhimpqtvwxyz
22077    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22078    Letters previously used, but now deprecated/obsolete: sVWXYZ.
22079 
22080    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22081 
22082    If CODE is 'd', then the X is a condition operand and the instruction
22083    should only be executed if the condition is true.
22084    if CODE is 'D', then the X is a condition operand and the instruction
22085    should only be executed if the condition is false: however, if the mode
22086    of the comparison is CCFPEmode, then always execute the instruction -- we
22087    do this because in these circumstances !GE does not necessarily imply LT;
22088    in these cases the instruction pattern will take care to make sure that
22089    an instruction containing %d will follow, thereby undoing the effects of
22090    doing this instruction unconditionally.
22091    If CODE is 'N' then X is a floating point operand that must be negated
22092    before output.
22093    If CODE is 'B' then output a bitwise inverted value of X (a const int).
22094    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
22095 static void
arm_print_operand(FILE * stream,rtx x,int code)22096 arm_print_operand (FILE *stream, rtx x, int code)
22097 {
22098   switch (code)
22099     {
22100     case '@':
22101       fputs (ASM_COMMENT_START, stream);
22102       return;
22103 
22104     case '_':
22105       fputs (user_label_prefix, stream);
22106       return;
22107 
22108     case '|':
22109       fputs (REGISTER_PREFIX, stream);
22110       return;
22111 
22112     case '?':
22113       arm_print_condition (stream);
22114       return;
22115 
22116     case '.':
22117       /* The current condition code for a condition code setting instruction.
22118 	 Preceded by 's' in unified syntax, otherwise followed by 's'.  */
22119       fputc('s', stream);
22120       arm_print_condition (stream);
22121       return;
22122 
22123     case '!':
22124       /* If the instruction is conditionally executed then print
22125 	 the current condition code, otherwise print 's'.  */
22126       gcc_assert (TARGET_THUMB2);
22127       if (current_insn_predicate)
22128 	arm_print_condition (stream);
22129       else
22130 	fputc('s', stream);
22131       break;
22132 
22133     /* %# is a "break" sequence. It doesn't output anything, but is used to
22134        separate e.g. operand numbers from following text, if that text consists
22135        of further digits which we don't want to be part of the operand
22136        number.  */
22137     case '#':
22138       return;
22139 
22140     case 'N':
22141       {
22142 	REAL_VALUE_TYPE r;
22143 	r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22144 	fprintf (stream, "%s", fp_const_from_val (&r));
22145       }
22146       return;
22147 
22148     /* An integer or symbol address without a preceding # sign.  */
22149     case 'c':
22150       switch (GET_CODE (x))
22151 	{
22152 	case CONST_INT:
22153 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22154 	  break;
22155 
22156 	case SYMBOL_REF:
22157 	  output_addr_const (stream, x);
22158 	  break;
22159 
22160 	case CONST:
22161 	  if (GET_CODE (XEXP (x, 0)) == PLUS
22162 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22163 	    {
22164 	      output_addr_const (stream, x);
22165 	      break;
22166 	    }
22167 	  /* Fall through.  */
22168 
22169 	default:
22170 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
22171 	}
22172       return;
22173 
22174     /* An integer that we want to print in HEX.  */
22175     case 'x':
22176       switch (GET_CODE (x))
22177 	{
22178 	case CONST_INT:
22179 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22180 	  break;
22181 
22182 	default:
22183 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
22184 	}
22185       return;
22186 
22187     case 'B':
22188       if (CONST_INT_P (x))
22189 	{
22190 	  HOST_WIDE_INT val;
22191 	  val = ARM_SIGN_EXTEND (~INTVAL (x));
22192 	  fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22193 	}
22194       else
22195 	{
22196 	  putc ('~', stream);
22197 	  output_addr_const (stream, x);
22198 	}
22199       return;
22200 
22201     case 'b':
22202       /* Print the log2 of a CONST_INT.  */
22203       {
22204 	HOST_WIDE_INT val;
22205 
22206 	if (!CONST_INT_P (x)
22207 	    || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22208 	  output_operand_lossage ("Unsupported operand for code '%c'", code);
22209 	else
22210 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22211       }
22212       return;
22213 
22214     case 'L':
22215       /* The low 16 bits of an immediate constant.  */
22216       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22217       return;
22218 
22219     case 'i':
22220       fprintf (stream, "%s", arithmetic_instr (x, 1));
22221       return;
22222 
22223     case 'I':
22224       fprintf (stream, "%s", arithmetic_instr (x, 0));
22225       return;
22226 
22227     case 'S':
22228       {
22229 	HOST_WIDE_INT val;
22230 	const char *shift;
22231 
22232 	shift = shift_op (x, &val);
22233 
22234 	if (shift)
22235 	  {
22236 	    fprintf (stream, ", %s ", shift);
22237 	    if (val == -1)
22238 	      arm_print_operand (stream, XEXP (x, 1), 0);
22239 	    else
22240 	      fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22241 	  }
22242       }
22243       return;
22244 
22245       /* An explanation of the 'Q', 'R' and 'H' register operands:
22246 
22247 	 In a pair of registers containing a DI or DF value the 'Q'
22248 	 operand returns the register number of the register containing
22249 	 the least significant part of the value.  The 'R' operand returns
22250 	 the register number of the register containing the most
22251 	 significant part of the value.
22252 
22253 	 The 'H' operand returns the higher of the two register numbers.
22254 	 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22255 	 same as the 'Q' operand, since the most significant part of the
22256 	 value is held in the lower number register.  The reverse is true
22257 	 on systems where WORDS_BIG_ENDIAN is false.
22258 
22259 	 The purpose of these operands is to distinguish between cases
22260 	 where the endian-ness of the values is important (for example
22261 	 when they are added together), and cases where the endian-ness
22262 	 is irrelevant, but the order of register operations is important.
22263 	 For example when loading a value from memory into a register
22264 	 pair, the endian-ness does not matter.  Provided that the value
22265 	 from the lower memory address is put into the lower numbered
22266 	 register, and the value from the higher address is put into the
22267 	 higher numbered register, the load will work regardless of whether
22268 	 the value being loaded is big-wordian or little-wordian.  The
22269 	 order of the two register loads can matter however, if the address
22270 	 of the memory location is actually held in one of the registers
22271 	 being overwritten by the load.
22272 
22273 	 The 'Q' and 'R' constraints are also available for 64-bit
22274 	 constants.  */
22275     case 'Q':
22276       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22277 	{
22278 	  rtx part = gen_lowpart (SImode, x);
22279 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22280 	  return;
22281 	}
22282 
22283       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22284 	{
22285 	  output_operand_lossage ("invalid operand for code '%c'", code);
22286 	  return;
22287 	}
22288 
22289       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22290       return;
22291 
22292     case 'R':
22293       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22294 	{
22295 	  machine_mode mode = GET_MODE (x);
22296 	  rtx part;
22297 
22298 	  if (mode == VOIDmode)
22299 	    mode = DImode;
22300 	  part = gen_highpart_mode (SImode, mode, x);
22301 	  fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22302 	  return;
22303 	}
22304 
22305       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22306 	{
22307 	  output_operand_lossage ("invalid operand for code '%c'", code);
22308 	  return;
22309 	}
22310 
22311       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22312       return;
22313 
22314     case 'H':
22315       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22316 	{
22317 	  output_operand_lossage ("invalid operand for code '%c'", code);
22318 	  return;
22319 	}
22320 
22321       asm_fprintf (stream, "%r", REGNO (x) + 1);
22322       return;
22323 
22324     case 'J':
22325       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22326 	{
22327 	  output_operand_lossage ("invalid operand for code '%c'", code);
22328 	  return;
22329 	}
22330 
22331       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22332       return;
22333 
22334     case 'K':
22335       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22336 	{
22337 	  output_operand_lossage ("invalid operand for code '%c'", code);
22338 	  return;
22339 	}
22340 
22341       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22342       return;
22343 
22344     case 'm':
22345       asm_fprintf (stream, "%r",
22346 		   REG_P (XEXP (x, 0))
22347 		   ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22348       return;
22349 
22350     case 'M':
22351       asm_fprintf (stream, "{%r-%r}",
22352 		   REGNO (x),
22353 		   REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22354       return;
22355 
22356     /* Like 'M', but writing doubleword vector registers, for use by Neon
22357        insns.  */
22358     case 'h':
22359       {
22360         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22361         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22362         if (numregs == 1)
22363           asm_fprintf (stream, "{d%d}", regno);
22364         else
22365           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22366       }
22367       return;
22368 
22369     case 'd':
22370       /* CONST_TRUE_RTX means always -- that's the default.  */
22371       if (x == const_true_rtx)
22372 	return;
22373 
22374       if (!COMPARISON_P (x))
22375 	{
22376 	  output_operand_lossage ("invalid operand for code '%c'", code);
22377 	  return;
22378 	}
22379 
22380       fputs (arm_condition_codes[get_arm_condition_code (x)],
22381 	     stream);
22382       return;
22383 
22384     case 'D':
22385       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
22386 	 want to do that.  */
22387       if (x == const_true_rtx)
22388 	{
22389 	  output_operand_lossage ("instruction never executed");
22390 	  return;
22391 	}
22392       if (!COMPARISON_P (x))
22393 	{
22394 	  output_operand_lossage ("invalid operand for code '%c'", code);
22395 	  return;
22396 	}
22397 
22398       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22399 				 (get_arm_condition_code (x))],
22400 	     stream);
22401       return;
22402 
22403     case 's':
22404     case 'V':
22405     case 'W':
22406     case 'X':
22407     case 'Y':
22408     case 'Z':
22409       /* Former Maverick support, removed after GCC-4.7.  */
22410       output_operand_lossage ("obsolete Maverick format code '%c'", code);
22411       return;
22412 
22413     case 'U':
22414       if (!REG_P (x)
22415 	  || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22416 	  || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22417 	/* Bad value for wCG register number.  */
22418 	{
22419 	  output_operand_lossage ("invalid operand for code '%c'", code);
22420 	  return;
22421 	}
22422 
22423       else
22424 	fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22425       return;
22426 
22427       /* Print an iWMMXt control register name.  */
22428     case 'w':
22429       if (!CONST_INT_P (x)
22430 	  || INTVAL (x) < 0
22431 	  || INTVAL (x) >= 16)
22432 	/* Bad value for wC register number.  */
22433 	{
22434 	  output_operand_lossage ("invalid operand for code '%c'", code);
22435 	  return;
22436 	}
22437 
22438       else
22439 	{
22440 	  static const char * wc_reg_names [16] =
22441 	    {
22442 	      "wCID",  "wCon",  "wCSSF", "wCASF",
22443 	      "wC4",   "wC5",   "wC6",   "wC7",
22444 	      "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22445 	      "wC12",  "wC13",  "wC14",  "wC15"
22446 	    };
22447 
22448 	  fputs (wc_reg_names [INTVAL (x)], stream);
22449 	}
22450       return;
22451 
22452     /* Print the high single-precision register of a VFP double-precision
22453        register.  */
22454     case 'p':
22455       {
22456         machine_mode mode = GET_MODE (x);
22457         int regno;
22458 
22459         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22460           {
22461 	    output_operand_lossage ("invalid operand for code '%c'", code);
22462 	    return;
22463           }
22464 
22465         regno = REGNO (x);
22466         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22467           {
22468 	    output_operand_lossage ("invalid operand for code '%c'", code);
22469 	    return;
22470           }
22471 
22472 	fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22473       }
22474       return;
22475 
22476     /* Print a VFP/Neon double precision or quad precision register name.  */
22477     case 'P':
22478     case 'q':
22479       {
22480 	machine_mode mode = GET_MODE (x);
22481 	int is_quad = (code == 'q');
22482 	int regno;
22483 
22484 	if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22485 	  {
22486 	    output_operand_lossage ("invalid operand for code '%c'", code);
22487 	    return;
22488 	  }
22489 
22490 	if (!REG_P (x)
22491 	    || !IS_VFP_REGNUM (REGNO (x)))
22492 	  {
22493 	    output_operand_lossage ("invalid operand for code '%c'", code);
22494 	    return;
22495 	  }
22496 
22497 	regno = REGNO (x);
22498 	if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22499             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22500 	  {
22501 	    output_operand_lossage ("invalid operand for code '%c'", code);
22502 	    return;
22503 	  }
22504 
22505 	fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22506 	  (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22507       }
22508       return;
22509 
22510     /* These two codes print the low/high doubleword register of a Neon quad
22511        register, respectively.  For pair-structure types, can also print
22512        low/high quadword registers.  */
22513     case 'e':
22514     case 'f':
22515       {
22516         machine_mode mode = GET_MODE (x);
22517         int regno;
22518 
22519         if ((GET_MODE_SIZE (mode) != 16
22520 	     && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22521           {
22522 	    output_operand_lossage ("invalid operand for code '%c'", code);
22523 	    return;
22524           }
22525 
22526         regno = REGNO (x);
22527         if (!NEON_REGNO_OK_FOR_QUAD (regno))
22528           {
22529 	    output_operand_lossage ("invalid operand for code '%c'", code);
22530 	    return;
22531           }
22532 
22533         if (GET_MODE_SIZE (mode) == 16)
22534           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22535 				  + (code == 'f' ? 1 : 0));
22536         else
22537           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22538 				  + (code == 'f' ? 1 : 0));
22539       }
22540       return;
22541 
22542     /* Print a VFPv3 floating-point constant, represented as an integer
22543        index.  */
22544     case 'G':
22545       {
22546         int index = vfp3_const_double_index (x);
22547 	gcc_assert (index != -1);
22548 	fprintf (stream, "%d", index);
22549       }
22550       return;
22551 
22552     /* Print bits representing opcode features for Neon.
22553 
22554        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
22555        and polynomials as unsigned.
22556 
22557        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22558 
22559        Bit 2 is 1 for rounding functions, 0 otherwise.  */
22560 
22561     /* Identify the type as 's', 'u', 'p' or 'f'.  */
22562     case 'T':
22563       {
22564         HOST_WIDE_INT bits = INTVAL (x);
22565         fputc ("uspf"[bits & 3], stream);
22566       }
22567       return;
22568 
22569     /* Likewise, but signed and unsigned integers are both 'i'.  */
22570     case 'F':
22571       {
22572         HOST_WIDE_INT bits = INTVAL (x);
22573         fputc ("iipf"[bits & 3], stream);
22574       }
22575       return;
22576 
22577     /* As for 'T', but emit 'u' instead of 'p'.  */
22578     case 't':
22579       {
22580         HOST_WIDE_INT bits = INTVAL (x);
22581         fputc ("usuf"[bits & 3], stream);
22582       }
22583       return;
22584 
22585     /* Bit 2: rounding (vs none).  */
22586     case 'O':
22587       {
22588         HOST_WIDE_INT bits = INTVAL (x);
22589         fputs ((bits & 4) != 0 ? "r" : "", stream);
22590       }
22591       return;
22592 
22593     /* Memory operand for vld1/vst1 instruction.  */
22594     case 'A':
22595       {
22596 	rtx addr;
22597 	bool postinc = FALSE;
22598 	rtx postinc_reg = NULL;
22599 	unsigned align, memsize, align_bits;
22600 
22601 	gcc_assert (MEM_P (x));
22602 	addr = XEXP (x, 0);
22603 	if (GET_CODE (addr) == POST_INC)
22604 	  {
22605 	    postinc = 1;
22606 	    addr = XEXP (addr, 0);
22607 	  }
22608 	if (GET_CODE (addr) == POST_MODIFY)
22609 	  {
22610 	    postinc_reg = XEXP( XEXP (addr, 1), 1);
22611 	    addr = XEXP (addr, 0);
22612 	  }
22613 	asm_fprintf (stream, "[%r", REGNO (addr));
22614 
22615 	/* We know the alignment of this access, so we can emit a hint in the
22616 	   instruction (for some alignments) as an aid to the memory subsystem
22617 	   of the target.  */
22618 	align = MEM_ALIGN (x) >> 3;
22619 	memsize = MEM_SIZE (x);
22620 
22621 	/* Only certain alignment specifiers are supported by the hardware.  */
22622 	if (memsize == 32 && (align % 32) == 0)
22623 	  align_bits = 256;
22624 	else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22625 	  align_bits = 128;
22626 	else if (memsize >= 8 && (align % 8) == 0)
22627 	  align_bits = 64;
22628 	else
22629 	  align_bits = 0;
22630 
22631 	if (align_bits != 0)
22632 	  asm_fprintf (stream, ":%d", align_bits);
22633 
22634 	asm_fprintf (stream, "]");
22635 
22636 	if (postinc)
22637 	  fputs("!", stream);
22638 	if (postinc_reg)
22639 	  asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22640       }
22641       return;
22642 
22643     case 'C':
22644       {
22645 	rtx addr;
22646 
22647 	gcc_assert (MEM_P (x));
22648 	addr = XEXP (x, 0);
22649 	gcc_assert (REG_P (addr));
22650 	asm_fprintf (stream, "[%r]", REGNO (addr));
22651       }
22652       return;
22653 
22654     /* Translate an S register number into a D register number and element index.  */
22655     case 'y':
22656       {
22657         machine_mode mode = GET_MODE (x);
22658         int regno;
22659 
22660         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22661           {
22662 	    output_operand_lossage ("invalid operand for code '%c'", code);
22663 	    return;
22664           }
22665 
22666         regno = REGNO (x);
22667         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22668           {
22669 	    output_operand_lossage ("invalid operand for code '%c'", code);
22670 	    return;
22671           }
22672 
22673 	regno = regno - FIRST_VFP_REGNUM;
22674 	fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22675       }
22676       return;
22677 
22678     case 'v':
22679 	gcc_assert (CONST_DOUBLE_P (x));
22680 	int result;
22681 	result = vfp3_const_double_for_fract_bits (x);
22682 	if (result == 0)
22683 	  result = vfp3_const_double_for_bits (x);
22684 	fprintf (stream, "#%d", result);
22685 	return;
22686 
22687     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22688        number into a D register number and element index.  */
22689     case 'z':
22690       {
22691         machine_mode mode = GET_MODE (x);
22692         int regno;
22693 
22694         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22695           {
22696 	    output_operand_lossage ("invalid operand for code '%c'", code);
22697 	    return;
22698           }
22699 
22700         regno = REGNO (x);
22701         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22702           {
22703 	    output_operand_lossage ("invalid operand for code '%c'", code);
22704 	    return;
22705           }
22706 
22707 	regno = regno - FIRST_VFP_REGNUM;
22708 	fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22709       }
22710       return;
22711 
22712     default:
22713       if (x == 0)
22714 	{
22715 	  output_operand_lossage ("missing operand");
22716 	  return;
22717 	}
22718 
22719       switch (GET_CODE (x))
22720 	{
22721 	case REG:
22722 	  asm_fprintf (stream, "%r", REGNO (x));
22723 	  break;
22724 
22725 	case MEM:
22726 	  output_address (GET_MODE (x), XEXP (x, 0));
22727 	  break;
22728 
22729 	case CONST_DOUBLE:
22730 	  {
22731             char fpstr[20];
22732             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22733 			      sizeof (fpstr), 0, 1);
22734             fprintf (stream, "#%s", fpstr);
22735 	  }
22736 	  break;
22737 
22738 	default:
22739 	  gcc_assert (GET_CODE (x) != NEG);
22740 	  fputc ('#', stream);
22741 	  if (GET_CODE (x) == HIGH)
22742 	    {
22743 	      fputs (":lower16:", stream);
22744 	      x = XEXP (x, 0);
22745 	    }
22746 
22747 	  output_addr_const (stream, x);
22748 	  break;
22749 	}
22750     }
22751 }
22752 
22753 /* Target hook for printing a memory address.  */
22754 static void
arm_print_operand_address(FILE * stream,machine_mode mode,rtx x)22755 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22756 {
22757   if (TARGET_32BIT)
22758     {
22759       int is_minus = GET_CODE (x) == MINUS;
22760 
22761       if (REG_P (x))
22762 	asm_fprintf (stream, "[%r]", REGNO (x));
22763       else if (GET_CODE (x) == PLUS || is_minus)
22764 	{
22765 	  rtx base = XEXP (x, 0);
22766 	  rtx index = XEXP (x, 1);
22767 	  HOST_WIDE_INT offset = 0;
22768 	  if (!REG_P (base)
22769 	      || (REG_P (index) && REGNO (index) == SP_REGNUM))
22770 	    {
22771 	      /* Ensure that BASE is a register.  */
22772 	      /* (one of them must be).  */
22773 	      /* Also ensure the SP is not used as in index register.  */
22774 	      std::swap (base, index);
22775 	    }
22776 	  switch (GET_CODE (index))
22777 	    {
22778 	    case CONST_INT:
22779 	      offset = INTVAL (index);
22780 	      if (is_minus)
22781 		offset = -offset;
22782 	      asm_fprintf (stream, "[%r, #%wd]",
22783 			   REGNO (base), offset);
22784 	      break;
22785 
22786 	    case REG:
22787 	      asm_fprintf (stream, "[%r, %s%r]",
22788 			   REGNO (base), is_minus ? "-" : "",
22789 			   REGNO (index));
22790 	      break;
22791 
22792 	    case MULT:
22793 	    case ASHIFTRT:
22794 	    case LSHIFTRT:
22795 	    case ASHIFT:
22796 	    case ROTATERT:
22797 	      {
22798 		asm_fprintf (stream, "[%r, %s%r",
22799 			     REGNO (base), is_minus ? "-" : "",
22800 			     REGNO (XEXP (index, 0)));
22801 		arm_print_operand (stream, index, 'S');
22802 		fputs ("]", stream);
22803 		break;
22804 	      }
22805 
22806 	    default:
22807 	      gcc_unreachable ();
22808 	    }
22809 	}
22810       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22811 	       || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22812 	{
22813 	  gcc_assert (REG_P (XEXP (x, 0)));
22814 
22815 	  if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22816 	    asm_fprintf (stream, "[%r, #%s%d]!",
22817 			 REGNO (XEXP (x, 0)),
22818 			 GET_CODE (x) == PRE_DEC ? "-" : "",
22819 			 GET_MODE_SIZE (mode));
22820 	  else
22821 	    asm_fprintf (stream, "[%r], #%s%d",
22822 			 REGNO (XEXP (x, 0)),
22823 			 GET_CODE (x) == POST_DEC ? "-" : "",
22824 			 GET_MODE_SIZE (mode));
22825 	}
22826       else if (GET_CODE (x) == PRE_MODIFY)
22827 	{
22828 	  asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22829 	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22830 	    asm_fprintf (stream, "#%wd]!",
22831 			 INTVAL (XEXP (XEXP (x, 1), 1)));
22832 	  else
22833 	    asm_fprintf (stream, "%r]!",
22834 			 REGNO (XEXP (XEXP (x, 1), 1)));
22835 	}
22836       else if (GET_CODE (x) == POST_MODIFY)
22837 	{
22838 	  asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22839 	  if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22840 	    asm_fprintf (stream, "#%wd",
22841 			 INTVAL (XEXP (XEXP (x, 1), 1)));
22842 	  else
22843 	    asm_fprintf (stream, "%r",
22844 			 REGNO (XEXP (XEXP (x, 1), 1)));
22845 	}
22846       else output_addr_const (stream, x);
22847     }
22848   else
22849     {
22850       if (REG_P (x))
22851 	asm_fprintf (stream, "[%r]", REGNO (x));
22852       else if (GET_CODE (x) == POST_INC)
22853 	asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22854       else if (GET_CODE (x) == PLUS)
22855 	{
22856 	  gcc_assert (REG_P (XEXP (x, 0)));
22857 	  if (CONST_INT_P (XEXP (x, 1)))
22858 	    asm_fprintf (stream, "[%r, #%wd]",
22859 			 REGNO (XEXP (x, 0)),
22860 			 INTVAL (XEXP (x, 1)));
22861 	  else
22862 	    asm_fprintf (stream, "[%r, %r]",
22863 			 REGNO (XEXP (x, 0)),
22864 			 REGNO (XEXP (x, 1)));
22865 	}
22866       else
22867 	output_addr_const (stream, x);
22868     }
22869 }
22870 
22871 /* Target hook for indicating whether a punctuation character for
22872    TARGET_PRINT_OPERAND is valid.  */
22873 static bool
arm_print_operand_punct_valid_p(unsigned char code)22874 arm_print_operand_punct_valid_p (unsigned char code)
22875 {
22876   return (code == '@' || code == '|' || code == '.'
22877 	  || code == '(' || code == ')' || code == '#'
22878 	  || (TARGET_32BIT && (code == '?'))
22879 	  || (TARGET_THUMB2 && (code == '!'))
22880 	  || (TARGET_THUMB && (code == '_')));
22881 }
22882 
22883 /* Target hook for assembling integer objects.  The ARM version needs to
22884    handle word-sized values specially.  */
22885 static bool
arm_assemble_integer(rtx x,unsigned int size,int aligned_p)22886 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22887 {
22888   machine_mode mode;
22889 
22890   if (size == UNITS_PER_WORD && aligned_p)
22891     {
22892       fputs ("\t.word\t", asm_out_file);
22893       output_addr_const (asm_out_file, x);
22894 
22895       /* Mark symbols as position independent.  We only do this in the
22896 	 .text segment, not in the .data segment.  */
22897       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22898 	  (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22899 	{
22900 	  /* See legitimize_pic_address for an explanation of the
22901 	     TARGET_VXWORKS_RTP check.  */
22902 	  /* References to weak symbols cannot be resolved locally:
22903 	     they may be overridden by a non-weak definition at link
22904 	     time.  */
22905 	  if (!arm_pic_data_is_text_relative
22906 	      || (GET_CODE (x) == SYMBOL_REF
22907 		  && (!SYMBOL_REF_LOCAL_P (x)
22908 		      || (SYMBOL_REF_DECL (x)
22909 			  ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22910 	    fputs ("(GOT)", asm_out_file);
22911 	  else
22912 	    fputs ("(GOTOFF)", asm_out_file);
22913 	}
22914       fputc ('\n', asm_out_file);
22915       return true;
22916     }
22917 
22918   mode = GET_MODE (x);
22919 
22920   if (arm_vector_mode_supported_p (mode))
22921     {
22922       int i, units;
22923 
22924       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22925 
22926       units = CONST_VECTOR_NUNITS (x);
22927       size = GET_MODE_UNIT_SIZE (mode);
22928 
22929       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22930         for (i = 0; i < units; i++)
22931 	  {
22932 	    rtx elt = CONST_VECTOR_ELT (x, i);
22933 	    assemble_integer
22934 	      (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22935 	  }
22936       else
22937         for (i = 0; i < units; i++)
22938           {
22939             rtx elt = CONST_VECTOR_ELT (x, i);
22940 	    assemble_real
22941 	      (*CONST_DOUBLE_REAL_VALUE (elt),
22942 	       as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22943 	       i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22944           }
22945 
22946       return true;
22947     }
22948 
22949   return default_assemble_integer (x, size, aligned_p);
22950 }
22951 
22952 static void
arm_elf_asm_cdtor(rtx symbol,int priority,bool is_ctor)22953 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22954 {
22955   section *s;
22956 
22957   if (!TARGET_AAPCS_BASED)
22958     {
22959       (is_ctor ?
22960        default_named_section_asm_out_constructor
22961        : default_named_section_asm_out_destructor) (symbol, priority);
22962       return;
22963     }
22964 
22965   /* Put these in the .init_array section, using a special relocation.  */
22966   if (priority != DEFAULT_INIT_PRIORITY)
22967     {
22968       char buf[18];
22969       sprintf (buf, "%s.%.5u",
22970 	       is_ctor ? ".init_array" : ".fini_array",
22971 	       priority);
22972       s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22973     }
22974   else if (is_ctor)
22975     s = ctors_section;
22976   else
22977     s = dtors_section;
22978 
22979   switch_to_section (s);
22980   assemble_align (POINTER_SIZE);
22981   fputs ("\t.word\t", asm_out_file);
22982   output_addr_const (asm_out_file, symbol);
22983   fputs ("(target1)\n", asm_out_file);
22984 }
22985 
22986 /* Add a function to the list of static constructors.  */
22987 
22988 static void
arm_elf_asm_constructor(rtx symbol,int priority)22989 arm_elf_asm_constructor (rtx symbol, int priority)
22990 {
22991   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22992 }
22993 
22994 /* Add a function to the list of static destructors.  */
22995 
22996 static void
arm_elf_asm_destructor(rtx symbol,int priority)22997 arm_elf_asm_destructor (rtx symbol, int priority)
22998 {
22999   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
23000 }
23001 
23002 /* A finite state machine takes care of noticing whether or not instructions
23003    can be conditionally executed, and thus decrease execution time and code
23004    size by deleting branch instructions.  The fsm is controlled by
23005    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
23006 
23007 /* The state of the fsm controlling condition codes are:
23008    0: normal, do nothing special
23009    1: make ASM_OUTPUT_OPCODE not output this instruction
23010    2: make ASM_OUTPUT_OPCODE not output this instruction
23011    3: make instructions conditional
23012    4: make instructions conditional
23013 
23014    State transitions (state->state by whom under condition):
23015    0 -> 1 final_prescan_insn if the `target' is a label
23016    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23017    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23018    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23019    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23020           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23021    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23022           (the target insn is arm_target_insn).
23023 
23024    If the jump clobbers the conditions then we use states 2 and 4.
23025 
23026    A similar thing can be done with conditional return insns.
23027 
23028    XXX In case the `target' is an unconditional branch, this conditionalising
23029    of the instructions always reduces code size, but not always execution
23030    time.  But then, I want to reduce the code size to somewhere near what
23031    /bin/cc produces.  */
23032 
23033 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23034    instructions.  When a COND_EXEC instruction is seen the subsequent
23035    instructions are scanned so that multiple conditional instructions can be
23036    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
23037    specify the length and true/false mask for the IT block.  These will be
23038    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
23039 
23040 /* Returns the index of the ARM condition code string in
23041    `arm_condition_codes', or ARM_NV if the comparison is invalid.
23042    COMPARISON should be an rtx like `(eq (...) (...))'.  */
23043 
23044 enum arm_cond_code
maybe_get_arm_condition_code(rtx comparison)23045 maybe_get_arm_condition_code (rtx comparison)
23046 {
23047   machine_mode mode = GET_MODE (XEXP (comparison, 0));
23048   enum arm_cond_code code;
23049   enum rtx_code comp_code = GET_CODE (comparison);
23050 
23051   if (GET_MODE_CLASS (mode) != MODE_CC)
23052     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23053 			   XEXP (comparison, 1));
23054 
23055   switch (mode)
23056     {
23057     case E_CC_DNEmode: code = ARM_NE; goto dominance;
23058     case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23059     case E_CC_DGEmode: code = ARM_GE; goto dominance;
23060     case E_CC_DGTmode: code = ARM_GT; goto dominance;
23061     case E_CC_DLEmode: code = ARM_LE; goto dominance;
23062     case E_CC_DLTmode: code = ARM_LT; goto dominance;
23063     case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23064     case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23065     case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23066     case E_CC_DLTUmode: code = ARM_CC;
23067 
23068     dominance:
23069       if (comp_code == EQ)
23070 	return ARM_INVERSE_CONDITION_CODE (code);
23071       if (comp_code == NE)
23072 	return code;
23073       return ARM_NV;
23074 
23075     case E_CC_NOOVmode:
23076       switch (comp_code)
23077 	{
23078 	case NE: return ARM_NE;
23079 	case EQ: return ARM_EQ;
23080 	case GE: return ARM_PL;
23081 	case LT: return ARM_MI;
23082 	default: return ARM_NV;
23083 	}
23084 
23085     case E_CC_Zmode:
23086       switch (comp_code)
23087 	{
23088 	case NE: return ARM_NE;
23089 	case EQ: return ARM_EQ;
23090 	default: return ARM_NV;
23091 	}
23092 
23093     case E_CC_Nmode:
23094       switch (comp_code)
23095 	{
23096 	case NE: return ARM_MI;
23097 	case EQ: return ARM_PL;
23098 	default: return ARM_NV;
23099 	}
23100 
23101     case E_CCFPEmode:
23102     case E_CCFPmode:
23103       /* We can handle all cases except UNEQ and LTGT.  */
23104       switch (comp_code)
23105 	{
23106 	case GE: return ARM_GE;
23107 	case GT: return ARM_GT;
23108 	case LE: return ARM_LS;
23109 	case LT: return ARM_MI;
23110 	case NE: return ARM_NE;
23111 	case EQ: return ARM_EQ;
23112 	case ORDERED: return ARM_VC;
23113 	case UNORDERED: return ARM_VS;
23114 	case UNLT: return ARM_LT;
23115 	case UNLE: return ARM_LE;
23116 	case UNGT: return ARM_HI;
23117 	case UNGE: return ARM_PL;
23118 	  /* UNEQ and LTGT do not have a representation.  */
23119 	case UNEQ: /* Fall through.  */
23120 	case LTGT: /* Fall through.  */
23121 	default: return ARM_NV;
23122 	}
23123 
23124     case E_CC_SWPmode:
23125       switch (comp_code)
23126 	{
23127 	case NE: return ARM_NE;
23128 	case EQ: return ARM_EQ;
23129 	case GE: return ARM_LE;
23130 	case GT: return ARM_LT;
23131 	case LE: return ARM_GE;
23132 	case LT: return ARM_GT;
23133 	case GEU: return ARM_LS;
23134 	case GTU: return ARM_CC;
23135 	case LEU: return ARM_CS;
23136 	case LTU: return ARM_HI;
23137 	default: return ARM_NV;
23138 	}
23139 
23140     case E_CC_Cmode:
23141       switch (comp_code)
23142 	{
23143 	case LTU: return ARM_CS;
23144 	case GEU: return ARM_CC;
23145 	case NE: return ARM_CS;
23146 	case EQ: return ARM_CC;
23147 	default: return ARM_NV;
23148 	}
23149 
23150     case E_CC_CZmode:
23151       switch (comp_code)
23152 	{
23153 	case NE: return ARM_NE;
23154 	case EQ: return ARM_EQ;
23155 	case GEU: return ARM_CS;
23156 	case GTU: return ARM_HI;
23157 	case LEU: return ARM_LS;
23158 	case LTU: return ARM_CC;
23159 	default: return ARM_NV;
23160 	}
23161 
23162     case E_CC_NCVmode:
23163       switch (comp_code)
23164 	{
23165 	case GE: return ARM_GE;
23166 	case LT: return ARM_LT;
23167 	case GEU: return ARM_CS;
23168 	case LTU: return ARM_CC;
23169 	default: return ARM_NV;
23170 	}
23171 
23172     case E_CC_Vmode:
23173       switch (comp_code)
23174 	{
23175 	case NE: return ARM_VS;
23176 	case EQ: return ARM_VC;
23177 	default: return ARM_NV;
23178 	}
23179 
23180     case E_CCmode:
23181       switch (comp_code)
23182 	{
23183 	case NE: return ARM_NE;
23184 	case EQ: return ARM_EQ;
23185 	case GE: return ARM_GE;
23186 	case GT: return ARM_GT;
23187 	case LE: return ARM_LE;
23188 	case LT: return ARM_LT;
23189 	case GEU: return ARM_CS;
23190 	case GTU: return ARM_HI;
23191 	case LEU: return ARM_LS;
23192 	case LTU: return ARM_CC;
23193 	default: return ARM_NV;
23194 	}
23195 
23196     default: gcc_unreachable ();
23197     }
23198 }
23199 
23200 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
23201 static enum arm_cond_code
get_arm_condition_code(rtx comparison)23202 get_arm_condition_code (rtx comparison)
23203 {
23204   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23205   gcc_assert (code != ARM_NV);
23206   return code;
23207 }
23208 
23209 /* Implement TARGET_FIXED_CONDITION_CODE_REGS.  We only have condition
23210    code registers when not targetting Thumb1.  The VFP condition register
23211    only exists when generating hard-float code.  */
23212 static bool
arm_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)23213 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23214 {
23215   if (!TARGET_32BIT)
23216     return false;
23217 
23218   *p1 = CC_REGNUM;
23219   *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23220   return true;
23221 }
23222 
23223 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23224    instructions.  */
23225 void
thumb2_final_prescan_insn(rtx_insn * insn)23226 thumb2_final_prescan_insn (rtx_insn *insn)
23227 {
23228   rtx_insn *first_insn = insn;
23229   rtx body = PATTERN (insn);
23230   rtx predicate;
23231   enum arm_cond_code code;
23232   int n;
23233   int mask;
23234   int max;
23235 
23236   /* max_insns_skipped in the tune was already taken into account in the
23237      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
23238      just emit the IT blocks as we can.  It does not make sense to split
23239      the IT blocks.  */
23240   max = MAX_INSN_PER_IT_BLOCK;
23241 
23242   /* Remove the previous insn from the count of insns to be output.  */
23243   if (arm_condexec_count)
23244       arm_condexec_count--;
23245 
23246   /* Nothing to do if we are already inside a conditional block.  */
23247   if (arm_condexec_count)
23248     return;
23249 
23250   if (GET_CODE (body) != COND_EXEC)
23251     return;
23252 
23253   /* Conditional jumps are implemented directly.  */
23254   if (JUMP_P (insn))
23255     return;
23256 
23257   predicate = COND_EXEC_TEST (body);
23258   arm_current_cc = get_arm_condition_code (predicate);
23259 
23260   n = get_attr_ce_count (insn);
23261   arm_condexec_count = 1;
23262   arm_condexec_mask = (1 << n) - 1;
23263   arm_condexec_masklen = n;
23264   /* See if subsequent instructions can be combined into the same block.  */
23265   for (;;)
23266     {
23267       insn = next_nonnote_insn (insn);
23268 
23269       /* Jumping into the middle of an IT block is illegal, so a label or
23270          barrier terminates the block.  */
23271       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23272 	break;
23273 
23274       body = PATTERN (insn);
23275       /* USE and CLOBBER aren't really insns, so just skip them.  */
23276       if (GET_CODE (body) == USE
23277 	  || GET_CODE (body) == CLOBBER)
23278 	continue;
23279 
23280       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
23281       if (GET_CODE (body) != COND_EXEC)
23282 	break;
23283       /* Maximum number of conditionally executed instructions in a block.  */
23284       n = get_attr_ce_count (insn);
23285       if (arm_condexec_masklen + n > max)
23286 	break;
23287 
23288       predicate = COND_EXEC_TEST (body);
23289       code = get_arm_condition_code (predicate);
23290       mask = (1 << n) - 1;
23291       if (arm_current_cc == code)
23292 	arm_condexec_mask |= (mask << arm_condexec_masklen);
23293       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23294 	break;
23295 
23296       arm_condexec_count++;
23297       arm_condexec_masklen += n;
23298 
23299       /* A jump must be the last instruction in a conditional block.  */
23300       if (JUMP_P (insn))
23301 	break;
23302     }
23303   /* Restore recog_data (getting the attributes of other insns can
23304      destroy this array, but final.c assumes that it remains intact
23305      across this call).  */
23306   extract_constrain_insn_cached (first_insn);
23307 }
23308 
23309 void
arm_final_prescan_insn(rtx_insn * insn)23310 arm_final_prescan_insn (rtx_insn *insn)
23311 {
23312   /* BODY will hold the body of INSN.  */
23313   rtx body = PATTERN (insn);
23314 
23315   /* This will be 1 if trying to repeat the trick, and things need to be
23316      reversed if it appears to fail.  */
23317   int reverse = 0;
23318 
23319   /* If we start with a return insn, we only succeed if we find another one.  */
23320   int seeking_return = 0;
23321   enum rtx_code return_code = UNKNOWN;
23322 
23323   /* START_INSN will hold the insn from where we start looking.  This is the
23324      first insn after the following code_label if REVERSE is true.  */
23325   rtx_insn *start_insn = insn;
23326 
23327   /* If in state 4, check if the target branch is reached, in order to
23328      change back to state 0.  */
23329   if (arm_ccfsm_state == 4)
23330     {
23331       if (insn == arm_target_insn)
23332 	{
23333 	  arm_target_insn = NULL;
23334 	  arm_ccfsm_state = 0;
23335 	}
23336       return;
23337     }
23338 
23339   /* If in state 3, it is possible to repeat the trick, if this insn is an
23340      unconditional branch to a label, and immediately following this branch
23341      is the previous target label which is only used once, and the label this
23342      branch jumps to is not too far off.  */
23343   if (arm_ccfsm_state == 3)
23344     {
23345       if (simplejump_p (insn))
23346 	{
23347 	  start_insn = next_nonnote_insn (start_insn);
23348 	  if (BARRIER_P (start_insn))
23349 	    {
23350 	      /* XXX Isn't this always a barrier?  */
23351 	      start_insn = next_nonnote_insn (start_insn);
23352 	    }
23353 	  if (LABEL_P (start_insn)
23354 	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23355 	      && LABEL_NUSES (start_insn) == 1)
23356 	    reverse = TRUE;
23357 	  else
23358 	    return;
23359 	}
23360       else if (ANY_RETURN_P (body))
23361         {
23362 	  start_insn = next_nonnote_insn (start_insn);
23363 	  if (BARRIER_P (start_insn))
23364 	    start_insn = next_nonnote_insn (start_insn);
23365 	  if (LABEL_P (start_insn)
23366 	      && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23367 	      && LABEL_NUSES (start_insn) == 1)
23368 	    {
23369 	      reverse = TRUE;
23370 	      seeking_return = 1;
23371 	      return_code = GET_CODE (body);
23372 	    }
23373 	  else
23374 	    return;
23375         }
23376       else
23377 	return;
23378     }
23379 
23380   gcc_assert (!arm_ccfsm_state || reverse);
23381   if (!JUMP_P (insn))
23382     return;
23383 
23384   /* This jump might be paralleled with a clobber of the condition codes
23385      the jump should always come first */
23386   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23387     body = XVECEXP (body, 0, 0);
23388 
23389   if (reverse
23390       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23391 	  && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23392     {
23393       int insns_skipped;
23394       int fail = FALSE, succeed = FALSE;
23395       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
23396       int then_not_else = TRUE;
23397       rtx_insn *this_insn = start_insn;
23398       rtx label = 0;
23399 
23400       /* Register the insn jumped to.  */
23401       if (reverse)
23402         {
23403 	  if (!seeking_return)
23404 	    label = XEXP (SET_SRC (body), 0);
23405         }
23406       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23407 	label = XEXP (XEXP (SET_SRC (body), 1), 0);
23408       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23409 	{
23410 	  label = XEXP (XEXP (SET_SRC (body), 2), 0);
23411 	  then_not_else = FALSE;
23412 	}
23413       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23414 	{
23415 	  seeking_return = 1;
23416 	  return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23417 	}
23418       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23419         {
23420 	  seeking_return = 1;
23421 	  return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23422 	  then_not_else = FALSE;
23423         }
23424       else
23425 	gcc_unreachable ();
23426 
23427       /* See how many insns this branch skips, and what kind of insns.  If all
23428 	 insns are okay, and the label or unconditional branch to the same
23429 	 label is not too far away, succeed.  */
23430       for (insns_skipped = 0;
23431 	   !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23432 	{
23433 	  rtx scanbody;
23434 
23435 	  this_insn = next_nonnote_insn (this_insn);
23436 	  if (!this_insn)
23437 	    break;
23438 
23439 	  switch (GET_CODE (this_insn))
23440 	    {
23441 	    case CODE_LABEL:
23442 	      /* Succeed if it is the target label, otherwise fail since
23443 		 control falls in from somewhere else.  */
23444 	      if (this_insn == label)
23445 		{
23446 		  arm_ccfsm_state = 1;
23447 		  succeed = TRUE;
23448 		}
23449 	      else
23450 		fail = TRUE;
23451 	      break;
23452 
23453 	    case BARRIER:
23454 	      /* Succeed if the following insn is the target label.
23455 		 Otherwise fail.
23456 		 If return insns are used then the last insn in a function
23457 		 will be a barrier.  */
23458 	      this_insn = next_nonnote_insn (this_insn);
23459 	      if (this_insn && this_insn == label)
23460 		{
23461 		  arm_ccfsm_state = 1;
23462 		  succeed = TRUE;
23463 		}
23464 	      else
23465 		fail = TRUE;
23466 	      break;
23467 
23468 	    case CALL_INSN:
23469 	      /* The AAPCS says that conditional calls should not be
23470 		 used since they make interworking inefficient (the
23471 		 linker can't transform BL<cond> into BLX).  That's
23472 		 only a problem if the machine has BLX.  */
23473 	      if (arm_arch5)
23474 		{
23475 		  fail = TRUE;
23476 		  break;
23477 		}
23478 
23479 	      /* Succeed if the following insn is the target label, or
23480 		 if the following two insns are a barrier and the
23481 		 target label.  */
23482 	      this_insn = next_nonnote_insn (this_insn);
23483 	      if (this_insn && BARRIER_P (this_insn))
23484 		this_insn = next_nonnote_insn (this_insn);
23485 
23486 	      if (this_insn && this_insn == label
23487 		  && insns_skipped < max_insns_skipped)
23488 		{
23489 		  arm_ccfsm_state = 1;
23490 		  succeed = TRUE;
23491 		}
23492 	      else
23493 		fail = TRUE;
23494 	      break;
23495 
23496 	    case JUMP_INSN:
23497       	      /* If this is an unconditional branch to the same label, succeed.
23498 		 If it is to another label, do nothing.  If it is conditional,
23499 		 fail.  */
23500 	      /* XXX Probably, the tests for SET and the PC are
23501 		 unnecessary.  */
23502 
23503 	      scanbody = PATTERN (this_insn);
23504 	      if (GET_CODE (scanbody) == SET
23505 		  && GET_CODE (SET_DEST (scanbody)) == PC)
23506 		{
23507 		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23508 		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23509 		    {
23510 		      arm_ccfsm_state = 2;
23511 		      succeed = TRUE;
23512 		    }
23513 		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23514 		    fail = TRUE;
23515 		}
23516 	      /* Fail if a conditional return is undesirable (e.g. on a
23517 		 StrongARM), but still allow this if optimizing for size.  */
23518 	      else if (GET_CODE (scanbody) == return_code
23519 		       && !use_return_insn (TRUE, NULL)
23520 		       && !optimize_size)
23521 		fail = TRUE;
23522 	      else if (GET_CODE (scanbody) == return_code)
23523 	        {
23524 		  arm_ccfsm_state = 2;
23525 		  succeed = TRUE;
23526 	        }
23527 	      else if (GET_CODE (scanbody) == PARALLEL)
23528 	        {
23529 		  switch (get_attr_conds (this_insn))
23530 		    {
23531 		    case CONDS_NOCOND:
23532 		      break;
23533 		    default:
23534 		      fail = TRUE;
23535 		      break;
23536 		    }
23537 		}
23538 	      else
23539 		fail = TRUE;	/* Unrecognized jump (e.g. epilogue).  */
23540 
23541 	      break;
23542 
23543 	    case INSN:
23544 	      /* Instructions using or affecting the condition codes make it
23545 		 fail.  */
23546 	      scanbody = PATTERN (this_insn);
23547 	      if (!(GET_CODE (scanbody) == SET
23548 		    || GET_CODE (scanbody) == PARALLEL)
23549 		  || get_attr_conds (this_insn) != CONDS_NOCOND)
23550 		fail = TRUE;
23551 	      break;
23552 
23553 	    default:
23554 	      break;
23555 	    }
23556 	}
23557       if (succeed)
23558 	{
23559 	  if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23560 	    arm_target_label = CODE_LABEL_NUMBER (label);
23561 	  else
23562 	    {
23563 	      gcc_assert (seeking_return || arm_ccfsm_state == 2);
23564 
23565 	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23566 	        {
23567 		  this_insn = next_nonnote_insn (this_insn);
23568 		  gcc_assert (!this_insn
23569 			      || (!BARRIER_P (this_insn)
23570 				  && !LABEL_P (this_insn)));
23571 	        }
23572 	      if (!this_insn)
23573 	        {
23574 		  /* Oh, dear! we ran off the end.. give up.  */
23575 		  extract_constrain_insn_cached (insn);
23576 		  arm_ccfsm_state = 0;
23577 		  arm_target_insn = NULL;
23578 		  return;
23579 	        }
23580 	      arm_target_insn = this_insn;
23581 	    }
23582 
23583 	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23584 	     what it was.  */
23585 	  if (!reverse)
23586 	    arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23587 
23588 	  if (reverse || then_not_else)
23589 	    arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23590 	}
23591 
23592       /* Restore recog_data (getting the attributes of other insns can
23593 	 destroy this array, but final.c assumes that it remains intact
23594 	 across this call.  */
23595       extract_constrain_insn_cached (insn);
23596     }
23597 }
23598 
23599 /* Output IT instructions.  */
23600 void
thumb2_asm_output_opcode(FILE * stream)23601 thumb2_asm_output_opcode (FILE * stream)
23602 {
23603   char buff[5];
23604   int n;
23605 
23606   if (arm_condexec_mask)
23607     {
23608       for (n = 0; n < arm_condexec_masklen; n++)
23609 	buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23610       buff[n] = 0;
23611       asm_fprintf(stream, "i%s\t%s\n\t", buff,
23612 		  arm_condition_codes[arm_current_cc]);
23613       arm_condexec_mask = 0;
23614     }
23615 }
23616 
23617 /* Implement TARGET_HARD_REGNO_NREGS.  On the ARM core regs are
23618    UNITS_PER_WORD bytes wide.  */
23619 static unsigned int
arm_hard_regno_nregs(unsigned int regno,machine_mode mode)23620 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23621 {
23622   if (TARGET_32BIT
23623       && regno > PC_REGNUM
23624       && regno != FRAME_POINTER_REGNUM
23625       && regno != ARG_POINTER_REGNUM
23626       && !IS_VFP_REGNUM (regno))
23627     return 1;
23628 
23629   return ARM_NUM_REGS (mode);
23630 }
23631 
23632 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
23633 static bool
arm_hard_regno_mode_ok(unsigned int regno,machine_mode mode)23634 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23635 {
23636   if (GET_MODE_CLASS (mode) == MODE_CC)
23637     return (regno == CC_REGNUM
23638 	    || (TARGET_HARD_FLOAT
23639 		&& regno == VFPCC_REGNUM));
23640 
23641   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23642     return false;
23643 
23644   if (TARGET_THUMB1)
23645     /* For the Thumb we only allow values bigger than SImode in
23646        registers 0 - 6, so that there is always a second low
23647        register available to hold the upper part of the value.
23648        We probably we ought to ensure that the register is the
23649        start of an even numbered register pair.  */
23650     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23651 
23652   if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23653     {
23654       if (mode == SFmode || mode == SImode)
23655 	return VFP_REGNO_OK_FOR_SINGLE (regno);
23656 
23657       if (mode == DFmode)
23658 	return VFP_REGNO_OK_FOR_DOUBLE (regno);
23659 
23660       if (mode == HFmode)
23661 	return VFP_REGNO_OK_FOR_SINGLE (regno);
23662 
23663       /* VFP registers can hold HImode values.  */
23664       if (mode == HImode)
23665 	return VFP_REGNO_OK_FOR_SINGLE (regno);
23666 
23667       if (TARGET_NEON)
23668         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23669                || (VALID_NEON_QREG_MODE (mode)
23670                    && NEON_REGNO_OK_FOR_QUAD (regno))
23671 	       || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23672 	       || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23673 	       || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23674 	       || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23675 	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23676 
23677       return false;
23678     }
23679 
23680   if (TARGET_REALLY_IWMMXT)
23681     {
23682       if (IS_IWMMXT_GR_REGNUM (regno))
23683 	return mode == SImode;
23684 
23685       if (IS_IWMMXT_REGNUM (regno))
23686 	return VALID_IWMMXT_REG_MODE (mode);
23687     }
23688 
23689   /* We allow almost any value to be stored in the general registers.
23690      Restrict doubleword quantities to even register pairs in ARM state
23691      so that we can use ldrd.  Do not allow very large Neon structure
23692      opaque modes in general registers; they would use too many.  */
23693   if (regno <= LAST_ARM_REGNUM)
23694     {
23695       if (ARM_NUM_REGS (mode) > 4)
23696 	return false;
23697 
23698       if (TARGET_THUMB2)
23699 	return true;
23700 
23701       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23702     }
23703 
23704   if (regno == FRAME_POINTER_REGNUM
23705       || regno == ARG_POINTER_REGNUM)
23706     /* We only allow integers in the fake hard registers.  */
23707     return GET_MODE_CLASS (mode) == MODE_INT;
23708 
23709   return false;
23710 }
23711 
23712 /* Implement TARGET_MODES_TIEABLE_P.  */
23713 
23714 static bool
arm_modes_tieable_p(machine_mode mode1,machine_mode mode2)23715 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23716 {
23717   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23718     return true;
23719 
23720   /* We specifically want to allow elements of "structure" modes to
23721      be tieable to the structure.  This more general condition allows
23722      other rarer situations too.  */
23723   if (TARGET_NEON
23724       && (VALID_NEON_DREG_MODE (mode1)
23725 	  || VALID_NEON_QREG_MODE (mode1)
23726 	  || VALID_NEON_STRUCT_MODE (mode1))
23727       && (VALID_NEON_DREG_MODE (mode2)
23728 	  || VALID_NEON_QREG_MODE (mode2)
23729 	  || VALID_NEON_STRUCT_MODE (mode2)))
23730     return true;
23731 
23732   return false;
23733 }
23734 
23735 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23736    not used in arm mode.  */
23737 
23738 enum reg_class
arm_regno_class(int regno)23739 arm_regno_class (int regno)
23740 {
23741   if (regno == PC_REGNUM)
23742     return NO_REGS;
23743 
23744   if (TARGET_THUMB1)
23745     {
23746       if (regno == STACK_POINTER_REGNUM)
23747 	return STACK_REG;
23748       if (regno == CC_REGNUM)
23749 	return CC_REG;
23750       if (regno < 8)
23751 	return LO_REGS;
23752       return HI_REGS;
23753     }
23754 
23755   if (TARGET_THUMB2 && regno < 8)
23756     return LO_REGS;
23757 
23758   if (   regno <= LAST_ARM_REGNUM
23759       || regno == FRAME_POINTER_REGNUM
23760       || regno == ARG_POINTER_REGNUM)
23761     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23762 
23763   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23764     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23765 
23766   if (IS_VFP_REGNUM (regno))
23767     {
23768       if (regno <= D7_VFP_REGNUM)
23769 	return VFP_D0_D7_REGS;
23770       else if (regno <= LAST_LO_VFP_REGNUM)
23771         return VFP_LO_REGS;
23772       else
23773         return VFP_HI_REGS;
23774     }
23775 
23776   if (IS_IWMMXT_REGNUM (regno))
23777     return IWMMXT_REGS;
23778 
23779   if (IS_IWMMXT_GR_REGNUM (regno))
23780     return IWMMXT_GR_REGS;
23781 
23782   return NO_REGS;
23783 }
23784 
23785 /* Handle a special case when computing the offset
23786    of an argument from the frame pointer.  */
23787 int
arm_debugger_arg_offset(int value,rtx addr)23788 arm_debugger_arg_offset (int value, rtx addr)
23789 {
23790   rtx_insn *insn;
23791 
23792   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23793   if (value != 0)
23794     return 0;
23795 
23796   /* We can only cope with the case where the address is held in a register.  */
23797   if (!REG_P (addr))
23798     return 0;
23799 
23800   /* If we are using the frame pointer to point at the argument, then
23801      an offset of 0 is correct.  */
23802   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23803     return 0;
23804 
23805   /* If we are using the stack pointer to point at the
23806      argument, then an offset of 0 is correct.  */
23807   /* ??? Check this is consistent with thumb2 frame layout.  */
23808   if ((TARGET_THUMB || !frame_pointer_needed)
23809       && REGNO (addr) == SP_REGNUM)
23810     return 0;
23811 
23812   /* Oh dear.  The argument is pointed to by a register rather
23813      than being held in a register, or being stored at a known
23814      offset from the frame pointer.  Since GDB only understands
23815      those two kinds of argument we must translate the address
23816      held in the register into an offset from the frame pointer.
23817      We do this by searching through the insns for the function
23818      looking to see where this register gets its value.  If the
23819      register is initialized from the frame pointer plus an offset
23820      then we are in luck and we can continue, otherwise we give up.
23821 
23822      This code is exercised by producing debugging information
23823      for a function with arguments like this:
23824 
23825            double func (double a, double b, int c, double d) {return d;}
23826 
23827      Without this code the stab for parameter 'd' will be set to
23828      an offset of 0 from the frame pointer, rather than 8.  */
23829 
23830   /* The if() statement says:
23831 
23832      If the insn is a normal instruction
23833      and if the insn is setting the value in a register
23834      and if the register being set is the register holding the address of the argument
23835      and if the address is computing by an addition
23836      that involves adding to a register
23837      which is the frame pointer
23838      a constant integer
23839 
23840      then...  */
23841 
23842   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23843     {
23844       if (   NONJUMP_INSN_P (insn)
23845 	  && GET_CODE (PATTERN (insn)) == SET
23846 	  && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23847 	  && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23848 	  && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23849 	  && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23850 	  && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23851 	     )
23852 	{
23853 	  value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23854 
23855 	  break;
23856 	}
23857     }
23858 
23859   if (value == 0)
23860     {
23861       debug_rtx (addr);
23862       warning (0, "unable to compute real location of stacked parameter");
23863       value = 8; /* XXX magic hack */
23864     }
23865 
23866   return value;
23867 }
23868 
23869 /* Implement TARGET_PROMOTED_TYPE.  */
23870 
23871 static tree
arm_promoted_type(const_tree t)23872 arm_promoted_type (const_tree t)
23873 {
23874   if (SCALAR_FLOAT_TYPE_P (t)
23875       && TYPE_PRECISION (t) == 16
23876       && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23877     return float_type_node;
23878   return NULL_TREE;
23879 }
23880 
23881 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23882    This simply adds HFmode as a supported mode; even though we don't
23883    implement arithmetic on this type directly, it's supported by
23884    optabs conversions, much the way the double-word arithmetic is
23885    special-cased in the default hook.  */
23886 
23887 static bool
arm_scalar_mode_supported_p(scalar_mode mode)23888 arm_scalar_mode_supported_p (scalar_mode mode)
23889 {
23890   if (mode == HFmode)
23891     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23892   else if (ALL_FIXED_POINT_MODE_P (mode))
23893     return true;
23894   else
23895     return default_scalar_mode_supported_p (mode);
23896 }
23897 
23898 /* Set the value of FLT_EVAL_METHOD.
23899    ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23900 
23901     0: evaluate all operations and constants, whose semantic type has at
23902        most the range and precision of type float, to the range and
23903        precision of float; evaluate all other operations and constants to
23904        the range and precision of the semantic type;
23905 
23906     N, where _FloatN is a supported interchange floating type
23907        evaluate all operations and constants, whose semantic type has at
23908        most the range and precision of _FloatN type, to the range and
23909        precision of the _FloatN type; evaluate all other operations and
23910        constants to the range and precision of the semantic type;
23911 
23912    If we have the ARMv8.2-A extensions then we support _Float16 in native
23913    precision, so we should set this to 16.  Otherwise, we support the type,
23914    but want to evaluate expressions in float precision, so set this to
23915    0.  */
23916 
23917 static enum flt_eval_method
arm_excess_precision(enum excess_precision_type type)23918 arm_excess_precision (enum excess_precision_type type)
23919 {
23920   switch (type)
23921     {
23922       case EXCESS_PRECISION_TYPE_FAST:
23923       case EXCESS_PRECISION_TYPE_STANDARD:
23924 	/* We can calculate either in 16-bit range and precision or
23925 	   32-bit range and precision.  Make that decision based on whether
23926 	   we have native support for the ARMv8.2-A 16-bit floating-point
23927 	   instructions or not.  */
23928 	return (TARGET_VFP_FP16INST
23929 		? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23930 		: FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23931       case EXCESS_PRECISION_TYPE_IMPLICIT:
23932 	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23933       default:
23934 	gcc_unreachable ();
23935     }
23936   return FLT_EVAL_METHOD_UNPREDICTABLE;
23937 }
23938 
23939 
23940 /* Implement TARGET_FLOATN_MODE.  Make very sure that we don't provide
23941    _Float16 if we are using anything other than ieee format for 16-bit
23942    floating point.  Otherwise, punt to the default implementation.  */
23943 static opt_scalar_float_mode
arm_floatn_mode(int n,bool extended)23944 arm_floatn_mode (int n, bool extended)
23945 {
23946   if (!extended && n == 16)
23947     {
23948       if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23949 	return HFmode;
23950       return opt_scalar_float_mode ();
23951     }
23952 
23953   return default_floatn_mode (n, extended);
23954 }
23955 
23956 
23957 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23958    not to early-clobber SRC registers in the process.
23959 
23960    We assume that the operands described by SRC and DEST represent a
23961    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23962    number of components into which the copy has been decomposed.  */
23963 void
neon_disambiguate_copy(rtx * operands,rtx * dest,rtx * src,unsigned int count)23964 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23965 {
23966   unsigned int i;
23967 
23968   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23969       || REGNO (operands[0]) < REGNO (operands[1]))
23970     {
23971       for (i = 0; i < count; i++)
23972 	{
23973 	  operands[2 * i] = dest[i];
23974 	  operands[2 * i + 1] = src[i];
23975 	}
23976     }
23977   else
23978     {
23979       for (i = 0; i < count; i++)
23980 	{
23981 	  operands[2 * i] = dest[count - i - 1];
23982 	  operands[2 * i + 1] = src[count - i - 1];
23983 	}
23984     }
23985 }
23986 
23987 /* Split operands into moves from op[1] + op[2] into op[0].  */
23988 
23989 void
neon_split_vcombine(rtx operands[3])23990 neon_split_vcombine (rtx operands[3])
23991 {
23992   unsigned int dest = REGNO (operands[0]);
23993   unsigned int src1 = REGNO (operands[1]);
23994   unsigned int src2 = REGNO (operands[2]);
23995   machine_mode halfmode = GET_MODE (operands[1]);
23996   unsigned int halfregs = REG_NREGS (operands[1]);
23997   rtx destlo, desthi;
23998 
23999   if (src1 == dest && src2 == dest + halfregs)
24000     {
24001       /* No-op move.  Can't split to nothing; emit something.  */
24002       emit_note (NOTE_INSN_DELETED);
24003       return;
24004     }
24005 
24006   /* Preserve register attributes for variable tracking.  */
24007   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
24008   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24009 			       GET_MODE_SIZE (halfmode));
24010 
24011   /* Special case of reversed high/low parts.  Use VSWP.  */
24012   if (src2 == dest && src1 == dest + halfregs)
24013     {
24014       rtx x = gen_rtx_SET (destlo, operands[1]);
24015       rtx y = gen_rtx_SET (desthi, operands[2]);
24016       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24017       return;
24018     }
24019 
24020   if (!reg_overlap_mentioned_p (operands[2], destlo))
24021     {
24022       /* Try to avoid unnecessary moves if part of the result
24023 	 is in the right place already.  */
24024       if (src1 != dest)
24025 	emit_move_insn (destlo, operands[1]);
24026       if (src2 != dest + halfregs)
24027 	emit_move_insn (desthi, operands[2]);
24028     }
24029   else
24030     {
24031       if (src2 != dest + halfregs)
24032 	emit_move_insn (desthi, operands[2]);
24033       if (src1 != dest)
24034 	emit_move_insn (destlo, operands[1]);
24035     }
24036 }
24037 
24038 /* Return the number (counting from 0) of
24039    the least significant set bit in MASK.  */
24040 
24041 inline static int
number_of_first_bit_set(unsigned mask)24042 number_of_first_bit_set (unsigned mask)
24043 {
24044   return ctz_hwi (mask);
24045 }
24046 
24047 /* Like emit_multi_reg_push, but allowing for a different set of
24048    registers to be described as saved.  MASK is the set of registers
24049    to be saved; REAL_REGS is the set of registers to be described as
24050    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
24051 
24052 static rtx_insn *
thumb1_emit_multi_reg_push(unsigned long mask,unsigned long real_regs)24053 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24054 {
24055   unsigned long regno;
24056   rtx par[10], tmp, reg;
24057   rtx_insn *insn;
24058   int i, j;
24059 
24060   /* Build the parallel of the registers actually being stored.  */
24061   for (i = 0; mask; ++i, mask &= mask - 1)
24062     {
24063       regno = ctz_hwi (mask);
24064       reg = gen_rtx_REG (SImode, regno);
24065 
24066       if (i == 0)
24067 	tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24068       else
24069 	tmp = gen_rtx_USE (VOIDmode, reg);
24070 
24071       par[i] = tmp;
24072     }
24073 
24074   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24075   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24076   tmp = gen_frame_mem (BLKmode, tmp);
24077   tmp = gen_rtx_SET (tmp, par[0]);
24078   par[0] = tmp;
24079 
24080   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24081   insn = emit_insn (tmp);
24082 
24083   /* Always build the stack adjustment note for unwind info.  */
24084   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24085   tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24086   par[0] = tmp;
24087 
24088   /* Build the parallel of the registers recorded as saved for unwind.  */
24089   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24090     {
24091       regno = ctz_hwi (real_regs);
24092       reg = gen_rtx_REG (SImode, regno);
24093 
24094       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24095       tmp = gen_frame_mem (SImode, tmp);
24096       tmp = gen_rtx_SET (tmp, reg);
24097       RTX_FRAME_RELATED_P (tmp) = 1;
24098       par[j + 1] = tmp;
24099     }
24100 
24101   if (j == 0)
24102     tmp = par[0];
24103   else
24104     {
24105       RTX_FRAME_RELATED_P (par[0]) = 1;
24106       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24107     }
24108 
24109   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24110 
24111   return insn;
24112 }
24113 
24114 /* Emit code to push or pop registers to or from the stack.  F is the
24115    assembly file.  MASK is the registers to pop.  */
24116 static void
thumb_pop(FILE * f,unsigned long mask)24117 thumb_pop (FILE *f, unsigned long mask)
24118 {
24119   int regno;
24120   int lo_mask = mask & 0xFF;
24121 
24122   gcc_assert (mask);
24123 
24124   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24125     {
24126       /* Special case.  Do not generate a POP PC statement here, do it in
24127 	 thumb_exit() */
24128       thumb_exit (f, -1);
24129       return;
24130     }
24131 
24132   fprintf (f, "\tpop\t{");
24133 
24134   /* Look at the low registers first.  */
24135   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24136     {
24137       if (lo_mask & 1)
24138 	{
24139 	  asm_fprintf (f, "%r", regno);
24140 
24141 	  if ((lo_mask & ~1) != 0)
24142 	    fprintf (f, ", ");
24143 	}
24144     }
24145 
24146   if (mask & (1 << PC_REGNUM))
24147     {
24148       /* Catch popping the PC.  */
24149       if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24150 	  || IS_CMSE_ENTRY (arm_current_func_type ()))
24151 	{
24152 	  /* The PC is never poped directly, instead
24153 	     it is popped into r3 and then BX is used.  */
24154 	  fprintf (f, "}\n");
24155 
24156 	  thumb_exit (f, -1);
24157 
24158 	  return;
24159 	}
24160       else
24161 	{
24162 	  if (mask & 0xFF)
24163 	    fprintf (f, ", ");
24164 
24165 	  asm_fprintf (f, "%r", PC_REGNUM);
24166 	}
24167     }
24168 
24169   fprintf (f, "}\n");
24170 }
24171 
24172 /* Generate code to return from a thumb function.
24173    If 'reg_containing_return_addr' is -1, then the return address is
24174    actually on the stack, at the stack pointer.
24175 
24176    Note: do not forget to update length attribute of corresponding insn pattern
24177    when changing assembly output (eg. length attribute of epilogue_insns when
24178    updating Armv8-M Baseline Security Extensions register clearing
24179    sequences).  */
24180 static void
thumb_exit(FILE * f,int reg_containing_return_addr)24181 thumb_exit (FILE *f, int reg_containing_return_addr)
24182 {
24183   unsigned regs_available_for_popping;
24184   unsigned regs_to_pop;
24185   int pops_needed;
24186   unsigned available;
24187   unsigned required;
24188   machine_mode mode;
24189   int size;
24190   int restore_a4 = FALSE;
24191 
24192   /* Compute the registers we need to pop.  */
24193   regs_to_pop = 0;
24194   pops_needed = 0;
24195 
24196   if (reg_containing_return_addr == -1)
24197     {
24198       regs_to_pop |= 1 << LR_REGNUM;
24199       ++pops_needed;
24200     }
24201 
24202   if (TARGET_BACKTRACE)
24203     {
24204       /* Restore the (ARM) frame pointer and stack pointer.  */
24205       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24206       pops_needed += 2;
24207     }
24208 
24209   /* If there is nothing to pop then just emit the BX instruction and
24210      return.  */
24211   if (pops_needed == 0)
24212     {
24213       if (crtl->calls_eh_return)
24214 	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24215 
24216       if (IS_CMSE_ENTRY (arm_current_func_type ()))
24217 	{
24218 	  asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24219 		       reg_containing_return_addr);
24220 	  asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24221 	}
24222       else
24223 	asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24224       return;
24225     }
24226   /* Otherwise if we are not supporting interworking and we have not created
24227      a backtrace structure and the function was not entered in ARM mode then
24228      just pop the return address straight into the PC.  */
24229   else if (!TARGET_INTERWORK
24230 	   && !TARGET_BACKTRACE
24231 	   && !is_called_in_ARM_mode (current_function_decl)
24232 	   && !crtl->calls_eh_return
24233 	   && !IS_CMSE_ENTRY (arm_current_func_type ()))
24234     {
24235       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24236       return;
24237     }
24238 
24239   /* Find out how many of the (return) argument registers we can corrupt.  */
24240   regs_available_for_popping = 0;
24241 
24242   /* If returning via __builtin_eh_return, the bottom three registers
24243      all contain information needed for the return.  */
24244   if (crtl->calls_eh_return)
24245     size = 12;
24246   else
24247     {
24248       /* If we can deduce the registers used from the function's
24249 	 return value.  This is more reliable that examining
24250 	 df_regs_ever_live_p () because that will be set if the register is
24251 	 ever used in the function, not just if the register is used
24252 	 to hold a return value.  */
24253 
24254       if (crtl->return_rtx != 0)
24255 	mode = GET_MODE (crtl->return_rtx);
24256       else
24257 	mode = DECL_MODE (DECL_RESULT (current_function_decl));
24258 
24259       size = GET_MODE_SIZE (mode);
24260 
24261       if (size == 0)
24262 	{
24263 	  /* In a void function we can use any argument register.
24264 	     In a function that returns a structure on the stack
24265 	     we can use the second and third argument registers.  */
24266 	  if (mode == VOIDmode)
24267 	    regs_available_for_popping =
24268 	      (1 << ARG_REGISTER (1))
24269 	      | (1 << ARG_REGISTER (2))
24270 	      | (1 << ARG_REGISTER (3));
24271 	  else
24272 	    regs_available_for_popping =
24273 	      (1 << ARG_REGISTER (2))
24274 	      | (1 << ARG_REGISTER (3));
24275 	}
24276       else if (size <= 4)
24277 	regs_available_for_popping =
24278 	  (1 << ARG_REGISTER (2))
24279 	  | (1 << ARG_REGISTER (3));
24280       else if (size <= 8)
24281 	regs_available_for_popping =
24282 	  (1 << ARG_REGISTER (3));
24283     }
24284 
24285   /* Match registers to be popped with registers into which we pop them.  */
24286   for (available = regs_available_for_popping,
24287        required  = regs_to_pop;
24288        required != 0 && available != 0;
24289        available &= ~(available & - available),
24290        required  &= ~(required  & - required))
24291     -- pops_needed;
24292 
24293   /* If we have any popping registers left over, remove them.  */
24294   if (available > 0)
24295     regs_available_for_popping &= ~available;
24296 
24297   /* Otherwise if we need another popping register we can use
24298      the fourth argument register.  */
24299   else if (pops_needed)
24300     {
24301       /* If we have not found any free argument registers and
24302 	 reg a4 contains the return address, we must move it.  */
24303       if (regs_available_for_popping == 0
24304 	  && reg_containing_return_addr == LAST_ARG_REGNUM)
24305 	{
24306 	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24307 	  reg_containing_return_addr = LR_REGNUM;
24308 	}
24309       else if (size > 12)
24310 	{
24311 	  /* Register a4 is being used to hold part of the return value,
24312 	     but we have dire need of a free, low register.  */
24313 	  restore_a4 = TRUE;
24314 
24315 	  asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24316 	}
24317 
24318       if (reg_containing_return_addr != LAST_ARG_REGNUM)
24319 	{
24320 	  /* The fourth argument register is available.  */
24321 	  regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24322 
24323 	  --pops_needed;
24324 	}
24325     }
24326 
24327   /* Pop as many registers as we can.  */
24328   thumb_pop (f, regs_available_for_popping);
24329 
24330   /* Process the registers we popped.  */
24331   if (reg_containing_return_addr == -1)
24332     {
24333       /* The return address was popped into the lowest numbered register.  */
24334       regs_to_pop &= ~(1 << LR_REGNUM);
24335 
24336       reg_containing_return_addr =
24337 	number_of_first_bit_set (regs_available_for_popping);
24338 
24339       /* Remove this register for the mask of available registers, so that
24340          the return address will not be corrupted by further pops.  */
24341       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24342     }
24343 
24344   /* If we popped other registers then handle them here.  */
24345   if (regs_available_for_popping)
24346     {
24347       int frame_pointer;
24348 
24349       /* Work out which register currently contains the frame pointer.  */
24350       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24351 
24352       /* Move it into the correct place.  */
24353       asm_fprintf (f, "\tmov\t%r, %r\n",
24354 		   ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24355 
24356       /* (Temporarily) remove it from the mask of popped registers.  */
24357       regs_available_for_popping &= ~(1 << frame_pointer);
24358       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24359 
24360       if (regs_available_for_popping)
24361 	{
24362 	  int stack_pointer;
24363 
24364 	  /* We popped the stack pointer as well,
24365 	     find the register that contains it.  */
24366 	  stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24367 
24368 	  /* Move it into the stack register.  */
24369 	  asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24370 
24371 	  /* At this point we have popped all necessary registers, so
24372 	     do not worry about restoring regs_available_for_popping
24373 	     to its correct value:
24374 
24375 	     assert (pops_needed == 0)
24376 	     assert (regs_available_for_popping == (1 << frame_pointer))
24377 	     assert (regs_to_pop == (1 << STACK_POINTER))  */
24378 	}
24379       else
24380 	{
24381 	  /* Since we have just move the popped value into the frame
24382 	     pointer, the popping register is available for reuse, and
24383 	     we know that we still have the stack pointer left to pop.  */
24384 	  regs_available_for_popping |= (1 << frame_pointer);
24385 	}
24386     }
24387 
24388   /* If we still have registers left on the stack, but we no longer have
24389      any registers into which we can pop them, then we must move the return
24390      address into the link register and make available the register that
24391      contained it.  */
24392   if (regs_available_for_popping == 0 && pops_needed > 0)
24393     {
24394       regs_available_for_popping |= 1 << reg_containing_return_addr;
24395 
24396       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24397 		   reg_containing_return_addr);
24398 
24399       reg_containing_return_addr = LR_REGNUM;
24400     }
24401 
24402   /* If we have registers left on the stack then pop some more.
24403      We know that at most we will want to pop FP and SP.  */
24404   if (pops_needed > 0)
24405     {
24406       int  popped_into;
24407       int  move_to;
24408 
24409       thumb_pop (f, regs_available_for_popping);
24410 
24411       /* We have popped either FP or SP.
24412 	 Move whichever one it is into the correct register.  */
24413       popped_into = number_of_first_bit_set (regs_available_for_popping);
24414       move_to     = number_of_first_bit_set (regs_to_pop);
24415 
24416       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24417       --pops_needed;
24418     }
24419 
24420   /* If we still have not popped everything then we must have only
24421      had one register available to us and we are now popping the SP.  */
24422   if (pops_needed > 0)
24423     {
24424       int  popped_into;
24425 
24426       thumb_pop (f, regs_available_for_popping);
24427 
24428       popped_into = number_of_first_bit_set (regs_available_for_popping);
24429 
24430       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24431       /*
24432 	assert (regs_to_pop == (1 << STACK_POINTER))
24433 	assert (pops_needed == 1)
24434       */
24435     }
24436 
24437   /* If necessary restore the a4 register.  */
24438   if (restore_a4)
24439     {
24440       if (reg_containing_return_addr != LR_REGNUM)
24441 	{
24442 	  asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24443 	  reg_containing_return_addr = LR_REGNUM;
24444 	}
24445 
24446       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24447     }
24448 
24449   if (crtl->calls_eh_return)
24450     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24451 
24452   /* Return to caller.  */
24453   if (IS_CMSE_ENTRY (arm_current_func_type ()))
24454     {
24455       /* This is for the cases where LR is not being used to contain the return
24456          address.  It may therefore contain information that we might not want
24457 	 to leak, hence it must be cleared.  The value in R0 will never be a
24458 	 secret at this point, so it is safe to use it, see the clearing code
24459 	 in 'cmse_nonsecure_entry_clear_before_return'.  */
24460       if (reg_containing_return_addr != LR_REGNUM)
24461 	asm_fprintf (f, "\tmov\tlr, r0\n");
24462 
24463       asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24464       asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24465     }
24466   else
24467     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24468 }
24469 
24470 /* Scan INSN just before assembler is output for it.
24471    For Thumb-1, we track the status of the condition codes; this
24472    information is used in the cbranchsi4_insn pattern.  */
24473 void
thumb1_final_prescan_insn(rtx_insn * insn)24474 thumb1_final_prescan_insn (rtx_insn *insn)
24475 {
24476   if (flag_print_asm_name)
24477     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24478 		 INSN_ADDRESSES (INSN_UID (insn)));
24479   /* Don't overwrite the previous setter when we get to a cbranch.  */
24480   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24481     {
24482       enum attr_conds conds;
24483 
24484       if (cfun->machine->thumb1_cc_insn)
24485 	{
24486 	  if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24487 	      || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24488 	    CC_STATUS_INIT;
24489 	}
24490       conds = get_attr_conds (insn);
24491       if (conds == CONDS_SET)
24492 	{
24493 	  rtx set = single_set (insn);
24494 	  cfun->machine->thumb1_cc_insn = insn;
24495 	  cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24496 	  cfun->machine->thumb1_cc_op1 = const0_rtx;
24497 	  cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24498 	  if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24499 	    {
24500 	      rtx src1 = XEXP (SET_SRC (set), 1);
24501 	      if (src1 == const0_rtx)
24502 		cfun->machine->thumb1_cc_mode = CCmode;
24503 	    }
24504 	  else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24505 	    {
24506 	      /* Record the src register operand instead of dest because
24507 		 cprop_hardreg pass propagates src.  */
24508 	      cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24509 	    }
24510 	}
24511       else if (conds != CONDS_NOCOND)
24512 	cfun->machine->thumb1_cc_insn = NULL_RTX;
24513     }
24514 
24515     /* Check if unexpected far jump is used.  */
24516     if (cfun->machine->lr_save_eliminated
24517         && get_attr_far_jump (insn) == FAR_JUMP_YES)
24518       internal_error("Unexpected thumb1 far jump");
24519 }
24520 
24521 int
thumb_shiftable_const(unsigned HOST_WIDE_INT val)24522 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24523 {
24524   unsigned HOST_WIDE_INT mask = 0xff;
24525   int i;
24526 
24527   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24528   if (val == 0) /* XXX */
24529     return 0;
24530 
24531   for (i = 0; i < 25; i++)
24532     if ((val & (mask << i)) == val)
24533       return 1;
24534 
24535   return 0;
24536 }
24537 
24538 /* Returns nonzero if the current function contains,
24539    or might contain a far jump.  */
24540 static int
thumb_far_jump_used_p(void)24541 thumb_far_jump_used_p (void)
24542 {
24543   rtx_insn *insn;
24544   bool far_jump = false;
24545   unsigned int func_size = 0;
24546 
24547   /* If we have already decided that far jumps may be used,
24548      do not bother checking again, and always return true even if
24549      it turns out that they are not being used.  Once we have made
24550      the decision that far jumps are present (and that hence the link
24551      register will be pushed onto the stack) we cannot go back on it.  */
24552   if (cfun->machine->far_jump_used)
24553     return 1;
24554 
24555   /* If this function is not being called from the prologue/epilogue
24556      generation code then it must be being called from the
24557      INITIAL_ELIMINATION_OFFSET macro.  */
24558   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24559     {
24560       /* In this case we know that we are being asked about the elimination
24561 	 of the arg pointer register.  If that register is not being used,
24562 	 then there are no arguments on the stack, and we do not have to
24563 	 worry that a far jump might force the prologue to push the link
24564 	 register, changing the stack offsets.  In this case we can just
24565 	 return false, since the presence of far jumps in the function will
24566 	 not affect stack offsets.
24567 
24568 	 If the arg pointer is live (or if it was live, but has now been
24569 	 eliminated and so set to dead) then we do have to test to see if
24570 	 the function might contain a far jump.  This test can lead to some
24571 	 false negatives, since before reload is completed, then length of
24572 	 branch instructions is not known, so gcc defaults to returning their
24573 	 longest length, which in turn sets the far jump attribute to true.
24574 
24575 	 A false negative will not result in bad code being generated, but it
24576 	 will result in a needless push and pop of the link register.  We
24577 	 hope that this does not occur too often.
24578 
24579 	 If we need doubleword stack alignment this could affect the other
24580 	 elimination offsets so we can't risk getting it wrong.  */
24581       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24582 	cfun->machine->arg_pointer_live = 1;
24583       else if (!cfun->machine->arg_pointer_live)
24584 	return 0;
24585     }
24586 
24587   /* We should not change far_jump_used during or after reload, as there is
24588      no chance to change stack frame layout.  */
24589   if (reload_in_progress || reload_completed)
24590     return 0;
24591 
24592   /* Check to see if the function contains a branch
24593      insn with the far jump attribute set.  */
24594   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24595     {
24596       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24597 	{
24598 	  far_jump = true;
24599 	}
24600       func_size += get_attr_length (insn);
24601     }
24602 
24603   /* Attribute far_jump will always be true for thumb1 before
24604      shorten_branch pass.  So checking far_jump attribute before
24605      shorten_branch isn't much useful.
24606 
24607      Following heuristic tries to estimate more accurately if a far jump
24608      may finally be used.  The heuristic is very conservative as there is
24609      no chance to roll-back the decision of not to use far jump.
24610 
24611      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
24612      2-byte insn is associated with a 4 byte constant pool.  Using
24613      function size 2048/3 as the threshold is conservative enough.  */
24614   if (far_jump)
24615     {
24616       if ((func_size * 3) >= 2048)
24617         {
24618 	  /* Record the fact that we have decided that
24619 	     the function does use far jumps.  */
24620 	  cfun->machine->far_jump_used = 1;
24621 	  return 1;
24622 	}
24623     }
24624 
24625   return 0;
24626 }
24627 
24628 /* Return nonzero if FUNC must be entered in ARM mode.  */
24629 static bool
is_called_in_ARM_mode(tree func)24630 is_called_in_ARM_mode (tree func)
24631 {
24632   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24633 
24634   /* Ignore the problem about functions whose address is taken.  */
24635   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24636     return true;
24637 
24638 #ifdef ARM_PE
24639   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24640 #else
24641   return false;
24642 #endif
24643 }
24644 
24645 /* Given the stack offsets and register mask in OFFSETS, decide how
24646    many additional registers to push instead of subtracting a constant
24647    from SP.  For epilogues the principle is the same except we use pop.
24648    FOR_PROLOGUE indicates which we're generating.  */
24649 static int
thumb1_extra_regs_pushed(arm_stack_offsets * offsets,bool for_prologue)24650 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24651 {
24652   HOST_WIDE_INT amount;
24653   unsigned long live_regs_mask = offsets->saved_regs_mask;
24654   /* Extract a mask of the ones we can give to the Thumb's push/pop
24655      instruction.  */
24656   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24657   /* Then count how many other high registers will need to be pushed.  */
24658   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24659   int n_free, reg_base, size;
24660 
24661   if (!for_prologue && frame_pointer_needed)
24662     amount = offsets->locals_base - offsets->saved_regs;
24663   else
24664     amount = offsets->outgoing_args - offsets->saved_regs;
24665 
24666   /* If the stack frame size is 512 exactly, we can save one load
24667      instruction, which should make this a win even when optimizing
24668      for speed.  */
24669   if (!optimize_size && amount != 512)
24670     return 0;
24671 
24672   /* Can't do this if there are high registers to push.  */
24673   if (high_regs_pushed != 0)
24674     return 0;
24675 
24676   /* Shouldn't do it in the prologue if no registers would normally
24677      be pushed at all.  In the epilogue, also allow it if we'll have
24678      a pop insn for the PC.  */
24679   if  (l_mask == 0
24680        && (for_prologue
24681 	   || TARGET_BACKTRACE
24682 	   || (live_regs_mask & 1 << LR_REGNUM) == 0
24683 	   || TARGET_INTERWORK
24684 	   || crtl->args.pretend_args_size != 0))
24685     return 0;
24686 
24687   /* Don't do this if thumb_expand_prologue wants to emit instructions
24688      between the push and the stack frame allocation.  */
24689   if (for_prologue
24690       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24691 	  || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24692     return 0;
24693 
24694   reg_base = 0;
24695   n_free = 0;
24696   if (!for_prologue)
24697     {
24698       size = arm_size_return_regs ();
24699       reg_base = ARM_NUM_INTS (size);
24700       live_regs_mask >>= reg_base;
24701     }
24702 
24703   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24704 	 && (for_prologue || call_used_regs[reg_base + n_free]))
24705     {
24706       live_regs_mask >>= 1;
24707       n_free++;
24708     }
24709 
24710   if (n_free == 0)
24711     return 0;
24712   gcc_assert (amount / 4 * 4 == amount);
24713 
24714   if (amount >= 512 && (amount - n_free * 4) < 512)
24715     return (amount - 508) / 4;
24716   if (amount <= n_free * 4)
24717     return amount / 4;
24718   return 0;
24719 }
24720 
24721 /* The bits which aren't usefully expanded as rtl.  */
24722 const char *
thumb1_unexpanded_epilogue(void)24723 thumb1_unexpanded_epilogue (void)
24724 {
24725   arm_stack_offsets *offsets;
24726   int regno;
24727   unsigned long live_regs_mask = 0;
24728   int high_regs_pushed = 0;
24729   int extra_pop;
24730   int had_to_push_lr;
24731   int size;
24732 
24733   if (cfun->machine->return_used_this_function != 0)
24734     return "";
24735 
24736   if (IS_NAKED (arm_current_func_type ()))
24737     return "";
24738 
24739   offsets = arm_get_frame_offsets ();
24740   live_regs_mask = offsets->saved_regs_mask;
24741   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24742 
24743   /* If we can deduce the registers used from the function's return value.
24744      This is more reliable that examining df_regs_ever_live_p () because that
24745      will be set if the register is ever used in the function, not just if
24746      the register is used to hold a return value.  */
24747   size = arm_size_return_regs ();
24748 
24749   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24750   if (extra_pop > 0)
24751     {
24752       unsigned long extra_mask = (1 << extra_pop) - 1;
24753       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24754     }
24755 
24756   /* The prolog may have pushed some high registers to use as
24757      work registers.  e.g. the testsuite file:
24758      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24759      compiles to produce:
24760 	push	{r4, r5, r6, r7, lr}
24761 	mov	r7, r9
24762 	mov	r6, r8
24763 	push	{r6, r7}
24764      as part of the prolog.  We have to undo that pushing here.  */
24765 
24766   if (high_regs_pushed)
24767     {
24768       unsigned long mask = live_regs_mask & 0xff;
24769       int next_hi_reg;
24770 
24771       /* The available low registers depend on the size of the value we are
24772          returning.  */
24773       if (size <= 12)
24774 	mask |=  1 << 3;
24775       if (size <= 8)
24776 	mask |= 1 << 2;
24777 
24778       if (mask == 0)
24779 	/* Oh dear!  We have no low registers into which we can pop
24780            high registers!  */
24781 	internal_error
24782 	  ("no low registers available for popping high registers");
24783 
24784       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24785 	if (live_regs_mask & (1 << next_hi_reg))
24786 	  break;
24787 
24788       while (high_regs_pushed)
24789 	{
24790 	  /* Find lo register(s) into which the high register(s) can
24791              be popped.  */
24792 	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24793 	    {
24794 	      if (mask & (1 << regno))
24795 		high_regs_pushed--;
24796 	      if (high_regs_pushed == 0)
24797 		break;
24798 	    }
24799 
24800 	  mask &= (2 << regno) - 1;	/* A noop if regno == 8 */
24801 
24802 	  /* Pop the values into the low register(s).  */
24803 	  thumb_pop (asm_out_file, mask);
24804 
24805 	  /* Move the value(s) into the high registers.  */
24806 	  for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24807 	    {
24808 	      if (mask & (1 << regno))
24809 		{
24810 		  asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24811 			       regno);
24812 
24813 		  for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24814 		    if (live_regs_mask & (1 << next_hi_reg))
24815 		      break;
24816 		}
24817 	    }
24818 	}
24819       live_regs_mask &= ~0x0f00;
24820     }
24821 
24822   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24823   live_regs_mask &= 0xff;
24824 
24825   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24826     {
24827       /* Pop the return address into the PC.  */
24828       if (had_to_push_lr)
24829 	live_regs_mask |= 1 << PC_REGNUM;
24830 
24831       /* Either no argument registers were pushed or a backtrace
24832 	 structure was created which includes an adjusted stack
24833 	 pointer, so just pop everything.  */
24834       if (live_regs_mask)
24835 	thumb_pop (asm_out_file, live_regs_mask);
24836 
24837       /* We have either just popped the return address into the
24838 	 PC or it is was kept in LR for the entire function.
24839 	 Note that thumb_pop has already called thumb_exit if the
24840 	 PC was in the list.  */
24841       if (!had_to_push_lr)
24842 	thumb_exit (asm_out_file, LR_REGNUM);
24843     }
24844   else
24845     {
24846       /* Pop everything but the return address.  */
24847       if (live_regs_mask)
24848 	thumb_pop (asm_out_file, live_regs_mask);
24849 
24850       if (had_to_push_lr)
24851 	{
24852 	  if (size > 12)
24853 	    {
24854 	      /* We have no free low regs, so save one.  */
24855 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24856 			   LAST_ARG_REGNUM);
24857 	    }
24858 
24859 	  /* Get the return address into a temporary register.  */
24860 	  thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24861 
24862 	  if (size > 12)
24863 	    {
24864 	      /* Move the return address to lr.  */
24865 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24866 			   LAST_ARG_REGNUM);
24867 	      /* Restore the low register.  */
24868 	      asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24869 			   IP_REGNUM);
24870 	      regno = LR_REGNUM;
24871 	    }
24872 	  else
24873 	    regno = LAST_ARG_REGNUM;
24874 	}
24875       else
24876 	regno = LR_REGNUM;
24877 
24878       /* Remove the argument registers that were pushed onto the stack.  */
24879       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24880 		   SP_REGNUM, SP_REGNUM,
24881 		   crtl->args.pretend_args_size);
24882 
24883       thumb_exit (asm_out_file, regno);
24884     }
24885 
24886   return "";
24887 }
24888 
24889 /* Functions to save and restore machine-specific function data.  */
24890 static struct machine_function *
arm_init_machine_status(void)24891 arm_init_machine_status (void)
24892 {
24893   struct machine_function *machine;
24894   machine = ggc_cleared_alloc<machine_function> ();
24895 
24896 #if ARM_FT_UNKNOWN != 0
24897   machine->func_type = ARM_FT_UNKNOWN;
24898 #endif
24899   machine->static_chain_stack_bytes = -1;
24900   return machine;
24901 }
24902 
24903 /* Return an RTX indicating where the return address to the
24904    calling function can be found.  */
24905 rtx
arm_return_addr(int count,rtx frame ATTRIBUTE_UNUSED)24906 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24907 {
24908   if (count != 0)
24909     return NULL_RTX;
24910 
24911   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24912 }
24913 
24914 /* Do anything needed before RTL is emitted for each function.  */
24915 void
arm_init_expanders(void)24916 arm_init_expanders (void)
24917 {
24918   /* Arrange to initialize and mark the machine per-function status.  */
24919   init_machine_status = arm_init_machine_status;
24920 
24921   /* This is to stop the combine pass optimizing away the alignment
24922      adjustment of va_arg.  */
24923   /* ??? It is claimed that this should not be necessary.  */
24924   if (cfun)
24925     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24926 }
24927 
24928 /* Check that FUNC is called with a different mode.  */
24929 
24930 bool
arm_change_mode_p(tree func)24931 arm_change_mode_p (tree func)
24932 {
24933   if (TREE_CODE (func) != FUNCTION_DECL)
24934     return false;
24935 
24936   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24937 
24938   if (!callee_tree)
24939     callee_tree = target_option_default_node;
24940 
24941   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24942   int flags = callee_opts->x_target_flags;
24943 
24944   return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24945 }
24946 
24947 /* Like arm_compute_initial_elimination offset.  Simpler because there
24948    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24949    to point at the base of the local variables after static stack
24950    space for a function has been allocated.  */
24951 
24952 HOST_WIDE_INT
thumb_compute_initial_elimination_offset(unsigned int from,unsigned int to)24953 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24954 {
24955   arm_stack_offsets *offsets;
24956 
24957   offsets = arm_get_frame_offsets ();
24958 
24959   switch (from)
24960     {
24961     case ARG_POINTER_REGNUM:
24962       switch (to)
24963 	{
24964 	case STACK_POINTER_REGNUM:
24965 	  return offsets->outgoing_args - offsets->saved_args;
24966 
24967 	case FRAME_POINTER_REGNUM:
24968 	  return offsets->soft_frame - offsets->saved_args;
24969 
24970 	case ARM_HARD_FRAME_POINTER_REGNUM:
24971 	  return offsets->saved_regs - offsets->saved_args;
24972 
24973 	case THUMB_HARD_FRAME_POINTER_REGNUM:
24974 	  return offsets->locals_base - offsets->saved_args;
24975 
24976 	default:
24977 	  gcc_unreachable ();
24978 	}
24979       break;
24980 
24981     case FRAME_POINTER_REGNUM:
24982       switch (to)
24983 	{
24984 	case STACK_POINTER_REGNUM:
24985 	  return offsets->outgoing_args - offsets->soft_frame;
24986 
24987 	case ARM_HARD_FRAME_POINTER_REGNUM:
24988 	  return offsets->saved_regs - offsets->soft_frame;
24989 
24990 	case THUMB_HARD_FRAME_POINTER_REGNUM:
24991 	  return offsets->locals_base - offsets->soft_frame;
24992 
24993 	default:
24994 	  gcc_unreachable ();
24995 	}
24996       break;
24997 
24998     default:
24999       gcc_unreachable ();
25000     }
25001 }
25002 
25003 /* Generate the function's prologue.  */
25004 
25005 void
thumb1_expand_prologue(void)25006 thumb1_expand_prologue (void)
25007 {
25008   rtx_insn *insn;
25009 
25010   HOST_WIDE_INT amount;
25011   HOST_WIDE_INT size;
25012   arm_stack_offsets *offsets;
25013   unsigned long func_type;
25014   int regno;
25015   unsigned long live_regs_mask;
25016   unsigned long l_mask;
25017   unsigned high_regs_pushed = 0;
25018   bool lr_needs_saving;
25019 
25020   func_type = arm_current_func_type ();
25021 
25022   /* Naked functions don't have prologues.  */
25023   if (IS_NAKED (func_type))
25024     {
25025       if (flag_stack_usage_info)
25026 	current_function_static_stack_size = 0;
25027       return;
25028     }
25029 
25030   if (IS_INTERRUPT (func_type))
25031     {
25032       error ("interrupt Service Routines cannot be coded in Thumb mode");
25033       return;
25034     }
25035 
25036   if (is_called_in_ARM_mode (current_function_decl))
25037     emit_insn (gen_prologue_thumb1_interwork ());
25038 
25039   offsets = arm_get_frame_offsets ();
25040   live_regs_mask = offsets->saved_regs_mask;
25041   lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25042 
25043   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
25044   l_mask = live_regs_mask & 0x40ff;
25045   /* Then count how many other high registers will need to be pushed.  */
25046   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25047 
25048   if (crtl->args.pretend_args_size)
25049     {
25050       rtx x = GEN_INT (-crtl->args.pretend_args_size);
25051 
25052       if (cfun->machine->uses_anonymous_args)
25053 	{
25054 	  int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25055 	  unsigned long mask;
25056 
25057 	  mask = 1ul << (LAST_ARG_REGNUM + 1);
25058 	  mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25059 
25060 	  insn = thumb1_emit_multi_reg_push (mask, 0);
25061 	}
25062       else
25063 	{
25064 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25065 					stack_pointer_rtx, x));
25066 	}
25067       RTX_FRAME_RELATED_P (insn) = 1;
25068     }
25069 
25070   if (TARGET_BACKTRACE)
25071     {
25072       HOST_WIDE_INT offset = 0;
25073       unsigned work_register;
25074       rtx work_reg, x, arm_hfp_rtx;
25075 
25076       /* We have been asked to create a stack backtrace structure.
25077          The code looks like this:
25078 
25079 	 0   .align 2
25080 	 0   func:
25081          0     sub   SP, #16         Reserve space for 4 registers.
25082 	 2     push  {R7}            Push low registers.
25083          4     add   R7, SP, #20     Get the stack pointer before the push.
25084          6     str   R7, [SP, #8]    Store the stack pointer
25085 					(before reserving the space).
25086          8     mov   R7, PC          Get hold of the start of this code + 12.
25087         10     str   R7, [SP, #16]   Store it.
25088         12     mov   R7, FP          Get hold of the current frame pointer.
25089         14     str   R7, [SP, #4]    Store it.
25090         16     mov   R7, LR          Get hold of the current return address.
25091         18     str   R7, [SP, #12]   Store it.
25092         20     add   R7, SP, #16     Point at the start of the
25093 					backtrace structure.
25094         22     mov   FP, R7          Put this value into the frame pointer.  */
25095 
25096       work_register = thumb_find_work_register (live_regs_mask);
25097       work_reg = gen_rtx_REG (SImode, work_register);
25098       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25099 
25100       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25101 				    stack_pointer_rtx, GEN_INT (-16)));
25102       RTX_FRAME_RELATED_P (insn) = 1;
25103 
25104       if (l_mask)
25105 	{
25106 	  insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25107 	  RTX_FRAME_RELATED_P (insn) = 1;
25108 	  lr_needs_saving = false;
25109 
25110 	  offset = bit_count (l_mask) * UNITS_PER_WORD;
25111 	}
25112 
25113       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25114       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25115 
25116       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25117       x = gen_frame_mem (SImode, x);
25118       emit_move_insn (x, work_reg);
25119 
25120       /* Make sure that the instruction fetching the PC is in the right place
25121 	 to calculate "start of backtrace creation code + 12".  */
25122       /* ??? The stores using the common WORK_REG ought to be enough to
25123 	 prevent the scheduler from doing anything weird.  Failing that
25124 	 we could always move all of the following into an UNSPEC_VOLATILE.  */
25125       if (l_mask)
25126 	{
25127 	  x = gen_rtx_REG (SImode, PC_REGNUM);
25128 	  emit_move_insn (work_reg, x);
25129 
25130 	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25131 	  x = gen_frame_mem (SImode, x);
25132 	  emit_move_insn (x, work_reg);
25133 
25134 	  emit_move_insn (work_reg, arm_hfp_rtx);
25135 
25136 	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
25137 	  x = gen_frame_mem (SImode, x);
25138 	  emit_move_insn (x, work_reg);
25139 	}
25140       else
25141 	{
25142 	  emit_move_insn (work_reg, arm_hfp_rtx);
25143 
25144 	  x = plus_constant (Pmode, stack_pointer_rtx, offset);
25145 	  x = gen_frame_mem (SImode, x);
25146 	  emit_move_insn (x, work_reg);
25147 
25148 	  x = gen_rtx_REG (SImode, PC_REGNUM);
25149 	  emit_move_insn (work_reg, x);
25150 
25151 	  x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25152 	  x = gen_frame_mem (SImode, x);
25153 	  emit_move_insn (x, work_reg);
25154 	}
25155 
25156       x = gen_rtx_REG (SImode, LR_REGNUM);
25157       emit_move_insn (work_reg, x);
25158 
25159       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25160       x = gen_frame_mem (SImode, x);
25161       emit_move_insn (x, work_reg);
25162 
25163       x = GEN_INT (offset + 12);
25164       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25165 
25166       emit_move_insn (arm_hfp_rtx, work_reg);
25167     }
25168   /* Optimization:  If we are not pushing any low registers but we are going
25169      to push some high registers then delay our first push.  This will just
25170      be a push of LR and we can combine it with the push of the first high
25171      register.  */
25172   else if ((l_mask & 0xff) != 0
25173 	   || (high_regs_pushed == 0 && lr_needs_saving))
25174     {
25175       unsigned long mask = l_mask;
25176       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25177       insn = thumb1_emit_multi_reg_push (mask, mask);
25178       RTX_FRAME_RELATED_P (insn) = 1;
25179       lr_needs_saving = false;
25180     }
25181 
25182   if (high_regs_pushed)
25183     {
25184       unsigned pushable_regs;
25185       unsigned next_hi_reg;
25186       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25187 						 : crtl->args.info.nregs;
25188       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25189 
25190       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25191 	if (live_regs_mask & (1 << next_hi_reg))
25192 	  break;
25193 
25194       /* Here we need to mask out registers used for passing arguments
25195 	 even if they can be pushed.  This is to avoid using them to stash the high
25196 	 registers.  Such kind of stash may clobber the use of arguments.  */
25197       pushable_regs = l_mask & (~arg_regs_mask);
25198       if (lr_needs_saving)
25199 	pushable_regs &= ~(1 << LR_REGNUM);
25200 
25201       if (pushable_regs == 0)
25202 	pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25203 
25204       while (high_regs_pushed > 0)
25205 	{
25206 	  unsigned long real_regs_mask = 0;
25207 	  unsigned long push_mask = 0;
25208 
25209 	  for (regno = LR_REGNUM; regno >= 0; regno --)
25210 	    {
25211 	      if (pushable_regs & (1 << regno))
25212 		{
25213 		  emit_move_insn (gen_rtx_REG (SImode, regno),
25214 				  gen_rtx_REG (SImode, next_hi_reg));
25215 
25216 		  high_regs_pushed --;
25217 		  real_regs_mask |= (1 << next_hi_reg);
25218 		  push_mask |= (1 << regno);
25219 
25220 		  if (high_regs_pushed)
25221 		    {
25222 		      for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25223 			   next_hi_reg --)
25224 			if (live_regs_mask & (1 << next_hi_reg))
25225 			  break;
25226 		    }
25227 		  else
25228 		    break;
25229 		}
25230 	    }
25231 
25232 	  /* If we had to find a work register and we have not yet
25233 	     saved the LR then add it to the list of regs to push.  */
25234 	  if (lr_needs_saving)
25235 	    {
25236 	      push_mask |= 1 << LR_REGNUM;
25237 	      real_regs_mask |= 1 << LR_REGNUM;
25238 	      lr_needs_saving = false;
25239 	    }
25240 
25241 	  insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25242 	  RTX_FRAME_RELATED_P (insn) = 1;
25243 	}
25244     }
25245 
25246   /* Load the pic register before setting the frame pointer,
25247      so we can use r7 as a temporary work register.  */
25248   if (flag_pic && arm_pic_register != INVALID_REGNUM)
25249     arm_load_pic_register (live_regs_mask);
25250 
25251   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25252     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25253 		    stack_pointer_rtx);
25254 
25255   size = offsets->outgoing_args - offsets->saved_args;
25256   if (flag_stack_usage_info)
25257     current_function_static_stack_size = size;
25258 
25259   /* If we have a frame, then do stack checking.  FIXME: not implemented.  */
25260   if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25261        || flag_stack_clash_protection)
25262       && size)
25263     sorry ("-fstack-check=specific for Thumb-1");
25264 
25265   amount = offsets->outgoing_args - offsets->saved_regs;
25266   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25267   if (amount)
25268     {
25269       if (amount < 512)
25270 	{
25271 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25272 					GEN_INT (- amount)));
25273 	  RTX_FRAME_RELATED_P (insn) = 1;
25274 	}
25275       else
25276 	{
25277 	  rtx reg, dwarf;
25278 
25279 	  /* The stack decrement is too big for an immediate value in a single
25280 	     insn.  In theory we could issue multiple subtracts, but after
25281 	     three of them it becomes more space efficient to place the full
25282 	     value in the constant pool and load into a register.  (Also the
25283 	     ARM debugger really likes to see only one stack decrement per
25284 	     function).  So instead we look for a scratch register into which
25285 	     we can load the decrement, and then we subtract this from the
25286 	     stack pointer.  Unfortunately on the thumb the only available
25287 	     scratch registers are the argument registers, and we cannot use
25288 	     these as they may hold arguments to the function.  Instead we
25289 	     attempt to locate a call preserved register which is used by this
25290 	     function.  If we can find one, then we know that it will have
25291 	     been pushed at the start of the prologue and so we can corrupt
25292 	     it now.  */
25293 	  for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25294 	    if (live_regs_mask & (1 << regno))
25295 	      break;
25296 
25297 	  gcc_assert(regno <= LAST_LO_REGNUM);
25298 
25299 	  reg = gen_rtx_REG (SImode, regno);
25300 
25301 	  emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25302 
25303 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25304 					stack_pointer_rtx, reg));
25305 
25306 	  dwarf = gen_rtx_SET (stack_pointer_rtx,
25307 			       plus_constant (Pmode, stack_pointer_rtx,
25308 					      -amount));
25309 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25310 	  RTX_FRAME_RELATED_P (insn) = 1;
25311 	}
25312     }
25313 
25314   if (frame_pointer_needed)
25315     thumb_set_frame_pointer (offsets);
25316 
25317   /* If we are profiling, make sure no instructions are scheduled before
25318      the call to mcount.  Similarly if the user has requested no
25319      scheduling in the prolog.  Similarly if we want non-call exceptions
25320      using the EABI unwinder, to prevent faulting instructions from being
25321      swapped with a stack adjustment.  */
25322   if (crtl->profile || !TARGET_SCHED_PROLOG
25323       || (arm_except_unwind_info (&global_options) == UI_TARGET
25324 	  && cfun->can_throw_non_call_exceptions))
25325     emit_insn (gen_blockage ());
25326 
25327   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25328   if (live_regs_mask & 0xff)
25329     cfun->machine->lr_save_eliminated = 0;
25330 }
25331 
25332 /* Clear caller saved registers not used to pass return values and leaked
25333    condition flags before exiting a cmse_nonsecure_entry function.  */
25334 
25335 void
cmse_nonsecure_entry_clear_before_return(void)25336 cmse_nonsecure_entry_clear_before_return (void)
25337 {
25338   int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25339   uint32_t padding_bits_to_clear = 0;
25340   auto_sbitmap to_clear_bitmap (maxregno + 1);
25341   rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25342   tree result_type;
25343 
25344   bitmap_clear (to_clear_bitmap);
25345   bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25346   bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25347 
25348   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25349      registers.  */
25350   if (TARGET_HARD_FLOAT)
25351     {
25352       int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25353 
25354       bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25355 
25356       /* Make sure we don't clear the two scratch registers used to clear the
25357 	 relevant FPSCR bits in output_return_instruction.  */
25358       emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25359       bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25360       emit_use (gen_rtx_REG (SImode, 4));
25361       bitmap_clear_bit (to_clear_bitmap, 4);
25362     }
25363 
25364   /* If the user has defined registers to be caller saved, these are no longer
25365      restored by the function before returning and must thus be cleared for
25366      security purposes.  */
25367   for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25368     {
25369       /* We do not touch registers that can be used to pass arguments as per
25370 	 the AAPCS, since these should never be made callee-saved by user
25371 	 options.  */
25372       if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25373 	continue;
25374       if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25375 	continue;
25376       if (call_used_regs[regno])
25377 	bitmap_set_bit (to_clear_bitmap, regno);
25378     }
25379 
25380   /* Make sure we do not clear the registers used to return the result in.  */
25381   result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25382   if (!VOID_TYPE_P (result_type))
25383     {
25384       uint64_t to_clear_return_mask;
25385       result_rtl = arm_function_value (result_type, current_function_decl, 0);
25386 
25387       /* No need to check that we return in registers, because we don't
25388 	 support returning on stack yet.  */
25389       gcc_assert (REG_P (result_rtl));
25390       to_clear_return_mask
25391 	= compute_not_to_clear_mask (result_type, result_rtl, 0,
25392 				     &padding_bits_to_clear);
25393       if (to_clear_return_mask)
25394 	{
25395 	  gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25396 	  for (regno = R0_REGNUM; regno <= maxregno; regno++)
25397 	    {
25398 	      if (to_clear_return_mask & (1ULL << regno))
25399 		bitmap_clear_bit (to_clear_bitmap, regno);
25400 	    }
25401 	}
25402     }
25403 
25404   if (padding_bits_to_clear != 0)
25405     {
25406       int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25407       auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25408 
25409       /* Padding_bits_to_clear is not 0 so we know we are dealing with
25410 	 returning a composite type, which only uses r0.  Let's make sure that
25411 	 r1-r3 is cleared too.  */
25412       bitmap_clear (to_clear_arg_regs_bitmap);
25413       bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25414       gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25415     }
25416 
25417   /* Clear full registers that leak before returning.  */
25418   clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25419   r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25420   cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25421 			clearing_reg);
25422 }
25423 
25424 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25425    POP instruction can be generated.  LR should be replaced by PC.  All
25426    the checks required are already done by  USE_RETURN_INSN ().  Hence,
25427    all we really need to check here is if single register is to be
25428    returned, or multiple register return.  */
25429 void
thumb2_expand_return(bool simple_return)25430 thumb2_expand_return (bool simple_return)
25431 {
25432   int i, num_regs;
25433   unsigned long saved_regs_mask;
25434   arm_stack_offsets *offsets;
25435 
25436   offsets = arm_get_frame_offsets ();
25437   saved_regs_mask = offsets->saved_regs_mask;
25438 
25439   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25440     if (saved_regs_mask & (1 << i))
25441       num_regs++;
25442 
25443   if (!simple_return && saved_regs_mask)
25444     {
25445       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25446 	 functions or adapt code to handle according to ACLE.  This path should
25447 	 not be reachable for cmse_nonsecure_entry functions though we prefer
25448 	 to assert it for now to ensure that future code changes do not silently
25449 	 change this behavior.  */
25450       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25451       if (num_regs == 1)
25452         {
25453           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25454           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25455           rtx addr = gen_rtx_MEM (SImode,
25456                                   gen_rtx_POST_INC (SImode,
25457                                                     stack_pointer_rtx));
25458           set_mem_alias_set (addr, get_frame_alias_set ());
25459           XVECEXP (par, 0, 0) = ret_rtx;
25460           XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25461           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25462           emit_jump_insn (par);
25463         }
25464       else
25465         {
25466           saved_regs_mask &= ~ (1 << LR_REGNUM);
25467           saved_regs_mask |=   (1 << PC_REGNUM);
25468           arm_emit_multi_reg_pop (saved_regs_mask);
25469         }
25470     }
25471   else
25472     {
25473       if (IS_CMSE_ENTRY (arm_current_func_type ()))
25474 	cmse_nonsecure_entry_clear_before_return ();
25475       emit_jump_insn (simple_return_rtx);
25476     }
25477 }
25478 
25479 void
thumb1_expand_epilogue(void)25480 thumb1_expand_epilogue (void)
25481 {
25482   HOST_WIDE_INT amount;
25483   arm_stack_offsets *offsets;
25484   int regno;
25485 
25486   /* Naked functions don't have prologues.  */
25487   if (IS_NAKED (arm_current_func_type ()))
25488     return;
25489 
25490   offsets = arm_get_frame_offsets ();
25491   amount = offsets->outgoing_args - offsets->saved_regs;
25492 
25493   if (frame_pointer_needed)
25494     {
25495       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25496       amount = offsets->locals_base - offsets->saved_regs;
25497     }
25498   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25499 
25500   gcc_assert (amount >= 0);
25501   if (amount)
25502     {
25503       emit_insn (gen_blockage ());
25504 
25505       if (amount < 512)
25506 	emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25507 			       GEN_INT (amount)));
25508       else
25509 	{
25510 	  /* r3 is always free in the epilogue.  */
25511 	  rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25512 
25513 	  emit_insn (gen_movsi (reg, GEN_INT (amount)));
25514 	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25515 	}
25516     }
25517 
25518   /* Emit a USE (stack_pointer_rtx), so that
25519      the stack adjustment will not be deleted.  */
25520   emit_insn (gen_force_register_use (stack_pointer_rtx));
25521 
25522   if (crtl->profile || !TARGET_SCHED_PROLOG)
25523     emit_insn (gen_blockage ());
25524 
25525   /* Emit a clobber for each insn that will be restored in the epilogue,
25526      so that flow2 will get register lifetimes correct.  */
25527   for (regno = 0; regno < 13; regno++)
25528     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25529       emit_clobber (gen_rtx_REG (SImode, regno));
25530 
25531   if (! df_regs_ever_live_p (LR_REGNUM))
25532     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25533 
25534   /* Clear all caller-saved regs that are not used to return.  */
25535   if (IS_CMSE_ENTRY (arm_current_func_type ()))
25536     cmse_nonsecure_entry_clear_before_return ();
25537 }
25538 
25539 /* Epilogue code for APCS frame.  */
25540 static void
arm_expand_epilogue_apcs_frame(bool really_return)25541 arm_expand_epilogue_apcs_frame (bool really_return)
25542 {
25543   unsigned long func_type;
25544   unsigned long saved_regs_mask;
25545   int num_regs = 0;
25546   int i;
25547   int floats_from_frame = 0;
25548   arm_stack_offsets *offsets;
25549 
25550   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25551   func_type = arm_current_func_type ();
25552 
25553   /* Get frame offsets for ARM.  */
25554   offsets = arm_get_frame_offsets ();
25555   saved_regs_mask = offsets->saved_regs_mask;
25556 
25557   /* Find the offset of the floating-point save area in the frame.  */
25558   floats_from_frame
25559     = (offsets->saved_args
25560        + arm_compute_static_chain_stack_bytes ()
25561        - offsets->frame);
25562 
25563   /* Compute how many core registers saved and how far away the floats are.  */
25564   for (i = 0; i <= LAST_ARM_REGNUM; i++)
25565     if (saved_regs_mask & (1 << i))
25566       {
25567         num_regs++;
25568         floats_from_frame += 4;
25569       }
25570 
25571   if (TARGET_HARD_FLOAT)
25572     {
25573       int start_reg;
25574       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25575 
25576       /* The offset is from IP_REGNUM.  */
25577       int saved_size = arm_get_vfp_saved_size ();
25578       if (saved_size > 0)
25579         {
25580 	  rtx_insn *insn;
25581           floats_from_frame += saved_size;
25582           insn = emit_insn (gen_addsi3 (ip_rtx,
25583 					hard_frame_pointer_rtx,
25584 					GEN_INT (-floats_from_frame)));
25585 	  arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25586 				       ip_rtx, hard_frame_pointer_rtx);
25587         }
25588 
25589       /* Generate VFP register multi-pop.  */
25590       start_reg = FIRST_VFP_REGNUM;
25591 
25592       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25593         /* Look for a case where a reg does not need restoring.  */
25594         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25595             && (!df_regs_ever_live_p (i + 1)
25596                 || call_used_regs[i + 1]))
25597           {
25598             if (start_reg != i)
25599               arm_emit_vfp_multi_reg_pop (start_reg,
25600                                           (i - start_reg) / 2,
25601                                           gen_rtx_REG (SImode,
25602                                                        IP_REGNUM));
25603             start_reg = i + 2;
25604           }
25605 
25606       /* Restore the remaining regs that we have discovered (or possibly
25607          even all of them, if the conditional in the for loop never
25608          fired).  */
25609       if (start_reg != i)
25610         arm_emit_vfp_multi_reg_pop (start_reg,
25611                                     (i - start_reg) / 2,
25612                                     gen_rtx_REG (SImode, IP_REGNUM));
25613     }
25614 
25615   if (TARGET_IWMMXT)
25616     {
25617       /* The frame pointer is guaranteed to be non-double-word aligned, as
25618          it is set to double-word-aligned old_stack_pointer - 4.  */
25619       rtx_insn *insn;
25620       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25621 
25622       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25623         if (df_regs_ever_live_p (i) && !call_used_regs[i])
25624           {
25625             rtx addr = gen_frame_mem (V2SImode,
25626                                  plus_constant (Pmode, hard_frame_pointer_rtx,
25627                                                 - lrm_count * 4));
25628             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25629             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25630                                                gen_rtx_REG (V2SImode, i),
25631                                                NULL_RTX);
25632             lrm_count += 2;
25633           }
25634     }
25635 
25636   /* saved_regs_mask should contain IP which contains old stack pointer
25637      at the time of activation creation.  Since SP and IP are adjacent registers,
25638      we can restore the value directly into SP.  */
25639   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25640   saved_regs_mask &= ~(1 << IP_REGNUM);
25641   saved_regs_mask |= (1 << SP_REGNUM);
25642 
25643   /* There are two registers left in saved_regs_mask - LR and PC.  We
25644      only need to restore LR (the return address), but to
25645      save time we can load it directly into PC, unless we need a
25646      special function exit sequence, or we are not really returning.  */
25647   if (really_return
25648       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25649       && !crtl->calls_eh_return)
25650     /* Delete LR from the register mask, so that LR on
25651        the stack is loaded into the PC in the register mask.  */
25652     saved_regs_mask &= ~(1 << LR_REGNUM);
25653   else
25654     saved_regs_mask &= ~(1 << PC_REGNUM);
25655 
25656   num_regs = bit_count (saved_regs_mask);
25657   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25658     {
25659       rtx_insn *insn;
25660       emit_insn (gen_blockage ());
25661       /* Unwind the stack to just below the saved registers.  */
25662       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25663 				    hard_frame_pointer_rtx,
25664 				    GEN_INT (- 4 * num_regs)));
25665 
25666       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25667 				   stack_pointer_rtx, hard_frame_pointer_rtx);
25668     }
25669 
25670   arm_emit_multi_reg_pop (saved_regs_mask);
25671 
25672   if (IS_INTERRUPT (func_type))
25673     {
25674       /* Interrupt handlers will have pushed the
25675          IP onto the stack, so restore it now.  */
25676       rtx_insn *insn;
25677       rtx addr = gen_rtx_MEM (SImode,
25678                               gen_rtx_POST_INC (SImode,
25679                               stack_pointer_rtx));
25680       set_mem_alias_set (addr, get_frame_alias_set ());
25681       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25682       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25683                                          gen_rtx_REG (SImode, IP_REGNUM),
25684                                          NULL_RTX);
25685     }
25686 
25687   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25688     return;
25689 
25690   if (crtl->calls_eh_return)
25691     emit_insn (gen_addsi3 (stack_pointer_rtx,
25692 			   stack_pointer_rtx,
25693 			   gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25694 
25695   if (IS_STACKALIGN (func_type))
25696     /* Restore the original stack pointer.  Before prologue, the stack was
25697        realigned and the original stack pointer saved in r0.  For details,
25698        see comment in arm_expand_prologue.  */
25699     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25700 
25701   emit_jump_insn (simple_return_rtx);
25702 }
25703 
25704 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
25705    function is not a sibcall.  */
25706 void
arm_expand_epilogue(bool really_return)25707 arm_expand_epilogue (bool really_return)
25708 {
25709   unsigned long func_type;
25710   unsigned long saved_regs_mask;
25711   int num_regs = 0;
25712   int i;
25713   int amount;
25714   arm_stack_offsets *offsets;
25715 
25716   func_type = arm_current_func_type ();
25717 
25718   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
25719      let output_return_instruction take care of instruction emission if any.  */
25720   if (IS_NAKED (func_type)
25721       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25722     {
25723       if (really_return)
25724         emit_jump_insn (simple_return_rtx);
25725       return;
25726     }
25727 
25728   /* If we are throwing an exception, then we really must be doing a
25729      return, so we can't tail-call.  */
25730   gcc_assert (!crtl->calls_eh_return || really_return);
25731 
25732   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25733     {
25734       arm_expand_epilogue_apcs_frame (really_return);
25735       return;
25736     }
25737 
25738   /* Get frame offsets for ARM.  */
25739   offsets = arm_get_frame_offsets ();
25740   saved_regs_mask = offsets->saved_regs_mask;
25741   num_regs = bit_count (saved_regs_mask);
25742 
25743   if (frame_pointer_needed)
25744     {
25745       rtx_insn *insn;
25746       /* Restore stack pointer if necessary.  */
25747       if (TARGET_ARM)
25748         {
25749           /* In ARM mode, frame pointer points to first saved register.
25750              Restore stack pointer to last saved register.  */
25751           amount = offsets->frame - offsets->saved_regs;
25752 
25753           /* Force out any pending memory operations that reference stacked data
25754              before stack de-allocation occurs.  */
25755           emit_insn (gen_blockage ());
25756 	  insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25757 			    hard_frame_pointer_rtx,
25758 			    GEN_INT (amount)));
25759 	  arm_add_cfa_adjust_cfa_note (insn, amount,
25760 				       stack_pointer_rtx,
25761 				       hard_frame_pointer_rtx);
25762 
25763           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25764              deleted.  */
25765           emit_insn (gen_force_register_use (stack_pointer_rtx));
25766         }
25767       else
25768         {
25769           /* In Thumb-2 mode, the frame pointer points to the last saved
25770              register.  */
25771 	  amount = offsets->locals_base - offsets->saved_regs;
25772 	  if (amount)
25773 	    {
25774 	      insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25775 				hard_frame_pointer_rtx,
25776 				GEN_INT (amount)));
25777 	      arm_add_cfa_adjust_cfa_note (insn, amount,
25778 					   hard_frame_pointer_rtx,
25779 					   hard_frame_pointer_rtx);
25780 	    }
25781 
25782           /* Force out any pending memory operations that reference stacked data
25783              before stack de-allocation occurs.  */
25784           emit_insn (gen_blockage ());
25785 	  insn = emit_insn (gen_movsi (stack_pointer_rtx,
25786 				       hard_frame_pointer_rtx));
25787 	  arm_add_cfa_adjust_cfa_note (insn, 0,
25788 				       stack_pointer_rtx,
25789 				       hard_frame_pointer_rtx);
25790           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25791              deleted.  */
25792           emit_insn (gen_force_register_use (stack_pointer_rtx));
25793         }
25794     }
25795   else
25796     {
25797       /* Pop off outgoing args and local frame to adjust stack pointer to
25798          last saved register.  */
25799       amount = offsets->outgoing_args - offsets->saved_regs;
25800       if (amount)
25801         {
25802 	  rtx_insn *tmp;
25803           /* Force out any pending memory operations that reference stacked data
25804              before stack de-allocation occurs.  */
25805           emit_insn (gen_blockage ());
25806 	  tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25807 				       stack_pointer_rtx,
25808 				       GEN_INT (amount)));
25809 	  arm_add_cfa_adjust_cfa_note (tmp, amount,
25810 				       stack_pointer_rtx, stack_pointer_rtx);
25811           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25812              not deleted.  */
25813           emit_insn (gen_force_register_use (stack_pointer_rtx));
25814         }
25815     }
25816 
25817   if (TARGET_HARD_FLOAT)
25818     {
25819       /* Generate VFP register multi-pop.  */
25820       int end_reg = LAST_VFP_REGNUM + 1;
25821 
25822       /* Scan the registers in reverse order.  We need to match
25823          any groupings made in the prologue and generate matching
25824          vldm operations.  The need to match groups is because,
25825          unlike pop, vldm can only do consecutive regs.  */
25826       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25827         /* Look for a case where a reg does not need restoring.  */
25828         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25829             && (!df_regs_ever_live_p (i + 1)
25830                 || call_used_regs[i + 1]))
25831           {
25832             /* Restore the regs discovered so far (from reg+2 to
25833                end_reg).  */
25834             if (end_reg > i + 2)
25835               arm_emit_vfp_multi_reg_pop (i + 2,
25836                                           (end_reg - (i + 2)) / 2,
25837                                           stack_pointer_rtx);
25838             end_reg = i;
25839           }
25840 
25841       /* Restore the remaining regs that we have discovered (or possibly
25842          even all of them, if the conditional in the for loop never
25843          fired).  */
25844       if (end_reg > i + 2)
25845         arm_emit_vfp_multi_reg_pop (i + 2,
25846                                     (end_reg - (i + 2)) / 2,
25847                                     stack_pointer_rtx);
25848     }
25849 
25850   if (TARGET_IWMMXT)
25851     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25852       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25853         {
25854           rtx_insn *insn;
25855           rtx addr = gen_rtx_MEM (V2SImode,
25856                                   gen_rtx_POST_INC (SImode,
25857                                                     stack_pointer_rtx));
25858           set_mem_alias_set (addr, get_frame_alias_set ());
25859           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25860           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25861                                              gen_rtx_REG (V2SImode, i),
25862                                              NULL_RTX);
25863 	  arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25864 				       stack_pointer_rtx, stack_pointer_rtx);
25865         }
25866 
25867   if (saved_regs_mask)
25868     {
25869       rtx insn;
25870       bool return_in_pc = false;
25871 
25872       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25873           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25874 	  && !IS_CMSE_ENTRY (func_type)
25875           && !IS_STACKALIGN (func_type)
25876           && really_return
25877           && crtl->args.pretend_args_size == 0
25878           && saved_regs_mask & (1 << LR_REGNUM)
25879           && !crtl->calls_eh_return)
25880         {
25881           saved_regs_mask &= ~(1 << LR_REGNUM);
25882           saved_regs_mask |= (1 << PC_REGNUM);
25883           return_in_pc = true;
25884         }
25885 
25886       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25887         {
25888           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25889             if (saved_regs_mask & (1 << i))
25890               {
25891                 rtx addr = gen_rtx_MEM (SImode,
25892                                         gen_rtx_POST_INC (SImode,
25893                                                           stack_pointer_rtx));
25894                 set_mem_alias_set (addr, get_frame_alias_set ());
25895 
25896                 if (i == PC_REGNUM)
25897                   {
25898                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25899                     XVECEXP (insn, 0, 0) = ret_rtx;
25900                     XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25901                                                         addr);
25902                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25903                     insn = emit_jump_insn (insn);
25904                   }
25905                 else
25906                   {
25907                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25908                                                  addr));
25909                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25910                                                        gen_rtx_REG (SImode, i),
25911                                                        NULL_RTX);
25912 		    arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25913 						 stack_pointer_rtx,
25914 						 stack_pointer_rtx);
25915                   }
25916               }
25917         }
25918       else
25919         {
25920           if (TARGET_LDRD
25921 	      && current_tune->prefer_ldrd_strd
25922               && !optimize_function_for_size_p (cfun))
25923             {
25924               if (TARGET_THUMB2)
25925                 thumb2_emit_ldrd_pop (saved_regs_mask);
25926               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25927                 arm_emit_ldrd_pop (saved_regs_mask);
25928               else
25929                 arm_emit_multi_reg_pop (saved_regs_mask);
25930             }
25931           else
25932             arm_emit_multi_reg_pop (saved_regs_mask);
25933         }
25934 
25935       if (return_in_pc)
25936         return;
25937     }
25938 
25939   amount
25940     = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25941   if (amount)
25942     {
25943       int i, j;
25944       rtx dwarf = NULL_RTX;
25945       rtx_insn *tmp =
25946 	emit_insn (gen_addsi3 (stack_pointer_rtx,
25947 			       stack_pointer_rtx,
25948 			       GEN_INT (amount)));
25949 
25950       RTX_FRAME_RELATED_P (tmp) = 1;
25951 
25952       if (cfun->machine->uses_anonymous_args)
25953 	{
25954 	  /* Restore pretend args.  Refer arm_expand_prologue on how to save
25955 	     pretend_args in stack.  */
25956 	  int num_regs = crtl->args.pretend_args_size / 4;
25957 	  saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25958 	  for (j = 0, i = 0; j < num_regs; i++)
25959 	    if (saved_regs_mask & (1 << i))
25960 	      {
25961 		rtx reg = gen_rtx_REG (SImode, i);
25962 		dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25963 		j++;
25964 	      }
25965 	  REG_NOTES (tmp) = dwarf;
25966 	}
25967       arm_add_cfa_adjust_cfa_note (tmp, amount,
25968 				   stack_pointer_rtx, stack_pointer_rtx);
25969     }
25970 
25971     /* Clear all caller-saved regs that are not used to return.  */
25972     if (IS_CMSE_ENTRY (arm_current_func_type ()))
25973       {
25974 	/* CMSE_ENTRY always returns.  */
25975 	gcc_assert (really_return);
25976 	cmse_nonsecure_entry_clear_before_return ();
25977       }
25978 
25979   if (!really_return)
25980     return;
25981 
25982   if (crtl->calls_eh_return)
25983     emit_insn (gen_addsi3 (stack_pointer_rtx,
25984                            stack_pointer_rtx,
25985                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25986 
25987   if (IS_STACKALIGN (func_type))
25988     /* Restore the original stack pointer.  Before prologue, the stack was
25989        realigned and the original stack pointer saved in r0.  For details,
25990        see comment in arm_expand_prologue.  */
25991     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25992 
25993   emit_jump_insn (simple_return_rtx);
25994 }
25995 
25996 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25997    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25998 
25999 const char *
thumb1_output_interwork(void)26000 thumb1_output_interwork (void)
26001 {
26002   const char * name;
26003   FILE *f = asm_out_file;
26004 
26005   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
26006   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
26007 	      == SYMBOL_REF);
26008   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26009 
26010   /* Generate code sequence to switch us into Thumb mode.  */
26011   /* The .code 32 directive has already been emitted by
26012      ASM_DECLARE_FUNCTION_NAME.  */
26013   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26014   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26015 
26016   /* Generate a label, so that the debugger will notice the
26017      change in instruction sets.  This label is also used by
26018      the assembler to bypass the ARM code when this function
26019      is called from a Thumb encoded function elsewhere in the
26020      same file.  Hence the definition of STUB_NAME here must
26021      agree with the definition in gas/config/tc-arm.c.  */
26022 
26023 #define STUB_NAME ".real_start_of"
26024 
26025   fprintf (f, "\t.code\t16\n");
26026 #ifdef ARM_PE
26027   if (arm_dllexport_name_p (name))
26028     name = arm_strip_name_encoding (name);
26029 #endif
26030   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26031   fprintf (f, "\t.thumb_func\n");
26032   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26033 
26034   return "";
26035 }
26036 
26037 /* Handle the case of a double word load into a low register from
26038    a computed memory address.  The computed address may involve a
26039    register which is overwritten by the load.  */
26040 const char *
thumb_load_double_from_address(rtx * operands)26041 thumb_load_double_from_address (rtx *operands)
26042 {
26043   rtx addr;
26044   rtx base;
26045   rtx offset;
26046   rtx arg1;
26047   rtx arg2;
26048 
26049   gcc_assert (REG_P (operands[0]));
26050   gcc_assert (MEM_P (operands[1]));
26051 
26052   /* Get the memory address.  */
26053   addr = XEXP (operands[1], 0);
26054 
26055   /* Work out how the memory address is computed.  */
26056   switch (GET_CODE (addr))
26057     {
26058     case REG:
26059       operands[2] = adjust_address (operands[1], SImode, 4);
26060 
26061       if (REGNO (operands[0]) == REGNO (addr))
26062 	{
26063 	  output_asm_insn ("ldr\t%H0, %2", operands);
26064 	  output_asm_insn ("ldr\t%0, %1", operands);
26065 	}
26066       else
26067 	{
26068 	  output_asm_insn ("ldr\t%0, %1", operands);
26069 	  output_asm_insn ("ldr\t%H0, %2", operands);
26070 	}
26071       break;
26072 
26073     case CONST:
26074       /* Compute <address> + 4 for the high order load.  */
26075       operands[2] = adjust_address (operands[1], SImode, 4);
26076 
26077       output_asm_insn ("ldr\t%0, %1", operands);
26078       output_asm_insn ("ldr\t%H0, %2", operands);
26079       break;
26080 
26081     case PLUS:
26082       arg1   = XEXP (addr, 0);
26083       arg2   = XEXP (addr, 1);
26084 
26085       if (CONSTANT_P (arg1))
26086 	base = arg2, offset = arg1;
26087       else
26088 	base = arg1, offset = arg2;
26089 
26090       gcc_assert (REG_P (base));
26091 
26092       /* Catch the case of <address> = <reg> + <reg> */
26093       if (REG_P (offset))
26094 	{
26095 	  int reg_offset = REGNO (offset);
26096 	  int reg_base   = REGNO (base);
26097 	  int reg_dest   = REGNO (operands[0]);
26098 
26099 	  /* Add the base and offset registers together into the
26100              higher destination register.  */
26101 	  asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26102 		       reg_dest + 1, reg_base, reg_offset);
26103 
26104 	  /* Load the lower destination register from the address in
26105              the higher destination register.  */
26106 	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26107 		       reg_dest, reg_dest + 1);
26108 
26109 	  /* Load the higher destination register from its own address
26110              plus 4.  */
26111 	  asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26112 		       reg_dest + 1, reg_dest + 1);
26113 	}
26114       else
26115 	{
26116 	  /* Compute <address> + 4 for the high order load.  */
26117 	  operands[2] = adjust_address (operands[1], SImode, 4);
26118 
26119 	  /* If the computed address is held in the low order register
26120 	     then load the high order register first, otherwise always
26121 	     load the low order register first.  */
26122 	  if (REGNO (operands[0]) == REGNO (base))
26123 	    {
26124 	      output_asm_insn ("ldr\t%H0, %2", operands);
26125 	      output_asm_insn ("ldr\t%0, %1", operands);
26126 	    }
26127 	  else
26128 	    {
26129 	      output_asm_insn ("ldr\t%0, %1", operands);
26130 	      output_asm_insn ("ldr\t%H0, %2", operands);
26131 	    }
26132 	}
26133       break;
26134 
26135     case LABEL_REF:
26136       /* With no registers to worry about we can just load the value
26137          directly.  */
26138       operands[2] = adjust_address (operands[1], SImode, 4);
26139 
26140       output_asm_insn ("ldr\t%H0, %2", operands);
26141       output_asm_insn ("ldr\t%0, %1", operands);
26142       break;
26143 
26144     default:
26145       gcc_unreachable ();
26146     }
26147 
26148   return "";
26149 }
26150 
26151 const char *
thumb_output_move_mem_multiple(int n,rtx * operands)26152 thumb_output_move_mem_multiple (int n, rtx *operands)
26153 {
26154   switch (n)
26155     {
26156     case 2:
26157       if (REGNO (operands[4]) > REGNO (operands[5]))
26158 	std::swap (operands[4], operands[5]);
26159 
26160       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26161       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26162       break;
26163 
26164     case 3:
26165       if (REGNO (operands[4]) > REGNO (operands[5]))
26166         std::swap (operands[4], operands[5]);
26167       if (REGNO (operands[5]) > REGNO (operands[6]))
26168         std::swap (operands[5], operands[6]);
26169       if (REGNO (operands[4]) > REGNO (operands[5]))
26170         std::swap (operands[4], operands[5]);
26171 
26172       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26173       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26174       break;
26175 
26176     default:
26177       gcc_unreachable ();
26178     }
26179 
26180   return "";
26181 }
26182 
26183 /* Output a call-via instruction for thumb state.  */
26184 const char *
thumb_call_via_reg(rtx reg)26185 thumb_call_via_reg (rtx reg)
26186 {
26187   int regno = REGNO (reg);
26188   rtx *labelp;
26189 
26190   gcc_assert (regno < LR_REGNUM);
26191 
26192   /* If we are in the normal text section we can use a single instance
26193      per compilation unit.  If we are doing function sections, then we need
26194      an entry per section, since we can't rely on reachability.  */
26195   if (in_section == text_section)
26196     {
26197       thumb_call_reg_needed = 1;
26198 
26199       if (thumb_call_via_label[regno] == NULL)
26200 	thumb_call_via_label[regno] = gen_label_rtx ();
26201       labelp = thumb_call_via_label + regno;
26202     }
26203   else
26204     {
26205       if (cfun->machine->call_via[regno] == NULL)
26206 	cfun->machine->call_via[regno] = gen_label_rtx ();
26207       labelp = cfun->machine->call_via + regno;
26208     }
26209 
26210   output_asm_insn ("bl\t%a0", labelp);
26211   return "";
26212 }
26213 
26214 /* Routines for generating rtl.  */
26215 void
thumb_expand_movmemqi(rtx * operands)26216 thumb_expand_movmemqi (rtx *operands)
26217 {
26218   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26219   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26220   HOST_WIDE_INT len = INTVAL (operands[2]);
26221   HOST_WIDE_INT offset = 0;
26222 
26223   while (len >= 12)
26224     {
26225       emit_insn (gen_movmem12b (out, in, out, in));
26226       len -= 12;
26227     }
26228 
26229   if (len >= 8)
26230     {
26231       emit_insn (gen_movmem8b (out, in, out, in));
26232       len -= 8;
26233     }
26234 
26235   if (len >= 4)
26236     {
26237       rtx reg = gen_reg_rtx (SImode);
26238       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26239       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26240       len -= 4;
26241       offset += 4;
26242     }
26243 
26244   if (len >= 2)
26245     {
26246       rtx reg = gen_reg_rtx (HImode);
26247       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26248 					      plus_constant (Pmode, in,
26249 							     offset))));
26250       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26251 								offset)),
26252 			    reg));
26253       len -= 2;
26254       offset += 2;
26255     }
26256 
26257   if (len)
26258     {
26259       rtx reg = gen_reg_rtx (QImode);
26260       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26261 					      plus_constant (Pmode, in,
26262 							     offset))));
26263       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26264 								offset)),
26265 			    reg));
26266     }
26267 }
26268 
26269 void
thumb_reload_out_hi(rtx * operands)26270 thumb_reload_out_hi (rtx *operands)
26271 {
26272   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26273 }
26274 
26275 /* Return the length of a function name prefix
26276     that starts with the character 'c'.  */
26277 static int
arm_get_strip_length(int c)26278 arm_get_strip_length (int c)
26279 {
26280   switch (c)
26281     {
26282     ARM_NAME_ENCODING_LENGTHS
26283       default: return 0;
26284     }
26285 }
26286 
26287 /* Return a pointer to a function's name with any
26288    and all prefix encodings stripped from it.  */
26289 const char *
arm_strip_name_encoding(const char * name)26290 arm_strip_name_encoding (const char *name)
26291 {
26292   int skip;
26293 
26294   while ((skip = arm_get_strip_length (* name)))
26295     name += skip;
26296 
26297   return name;
26298 }
26299 
26300 /* If there is a '*' anywhere in the name's prefix, then
26301    emit the stripped name verbatim, otherwise prepend an
26302    underscore if leading underscores are being used.  */
26303 void
arm_asm_output_labelref(FILE * stream,const char * name)26304 arm_asm_output_labelref (FILE *stream, const char *name)
26305 {
26306   int skip;
26307   int verbatim = 0;
26308 
26309   while ((skip = arm_get_strip_length (* name)))
26310     {
26311       verbatim |= (*name == '*');
26312       name += skip;
26313     }
26314 
26315   if (verbatim)
26316     fputs (name, stream);
26317   else
26318     asm_fprintf (stream, "%U%s", name);
26319 }
26320 
26321 /* This function is used to emit an EABI tag and its associated value.
26322    We emit the numerical value of the tag in case the assembler does not
26323    support textual tags.  (Eg gas prior to 2.20).  If requested we include
26324    the tag name in a comment so that anyone reading the assembler output
26325    will know which tag is being set.
26326 
26327    This function is not static because arm-c.c needs it too.  */
26328 
26329 void
arm_emit_eabi_attribute(const char * name,int num,int val)26330 arm_emit_eabi_attribute (const char *name, int num, int val)
26331 {
26332   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26333   if (flag_verbose_asm || flag_debug_asm)
26334     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26335   asm_fprintf (asm_out_file, "\n");
26336 }
26337 
26338 /* This function is used to print CPU tuning information as comment
26339    in assembler file.  Pointers are not printed for now.  */
26340 
26341 void
arm_print_tune_info(void)26342 arm_print_tune_info (void)
26343 {
26344   asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26345   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26346 	       current_tune->constant_limit);
26347   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26348 	       "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26349   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26350 	       "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26351   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26352 	       "prefetch.l1_cache_size:\t%d\n",
26353 	       current_tune->prefetch.l1_cache_size);
26354   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26355 	       "prefetch.l1_cache_line_size:\t%d\n",
26356 	       current_tune->prefetch.l1_cache_line_size);
26357   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26358 	       "prefer_constant_pool:\t%d\n",
26359 	       (int) current_tune->prefer_constant_pool);
26360   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26361 	       "branch_cost:\t(s:speed, p:predictable)\n");
26362   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26363   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26364 	       current_tune->branch_cost (false, false));
26365   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26366 	       current_tune->branch_cost (false, true));
26367   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26368 	       current_tune->branch_cost (true, false));
26369   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26370 	       current_tune->branch_cost (true, true));
26371   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26372 	       "prefer_ldrd_strd:\t%d\n",
26373 	       (int) current_tune->prefer_ldrd_strd);
26374   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26375 	       "logical_op_non_short_circuit:\t[%d,%d]\n",
26376 	       (int) current_tune->logical_op_non_short_circuit_thumb,
26377 	       (int) current_tune->logical_op_non_short_circuit_arm);
26378   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26379 	       "prefer_neon_for_64bits:\t%d\n",
26380 	       (int) current_tune->prefer_neon_for_64bits);
26381   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26382 	       "disparage_flag_setting_t16_encodings:\t%d\n",
26383 	       (int) current_tune->disparage_flag_setting_t16_encodings);
26384   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26385 	       "string_ops_prefer_neon:\t%d\n",
26386 	       (int) current_tune->string_ops_prefer_neon);
26387   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26388 	       "max_insns_inline_memset:\t%d\n",
26389 	       current_tune->max_insns_inline_memset);
26390   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26391 	       current_tune->fusible_ops);
26392   asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26393 	       (int) current_tune->sched_autopref);
26394 }
26395 
26396 /* Print .arch and .arch_extension directives corresponding to the
26397    current architecture configuration.  */
26398 static void
arm_print_asm_arch_directives()26399 arm_print_asm_arch_directives ()
26400 {
26401   const arch_option *arch
26402     = arm_parse_arch_option_name (all_architectures, "-march",
26403 				  arm_active_target.arch_name);
26404   auto_sbitmap opt_bits (isa_num_bits);
26405 
26406   gcc_assert (arch);
26407 
26408   asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26409   arm_last_printed_arch_string = arm_active_target.arch_name;
26410   if (!arch->common.extensions)
26411     return;
26412 
26413   for (const struct cpu_arch_extension *opt = arch->common.extensions;
26414        opt->name != NULL;
26415        opt++)
26416     {
26417       if (!opt->remove)
26418 	{
26419 	  arm_initialize_isa (opt_bits, opt->isa_bits);
26420 
26421 	  /* If every feature bit of this option is set in the target
26422 	     ISA specification, print out the option name.  However,
26423 	     don't print anything if all the bits are part of the
26424 	     FPU specification.  */
26425 	  if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26426 	      && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26427 	    asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26428 	}
26429     }
26430 }
26431 
26432 static void
arm_file_start(void)26433 arm_file_start (void)
26434 {
26435   int val;
26436 
26437   if (TARGET_BPABI)
26438     {
26439       /* We don't have a specified CPU.  Use the architecture to
26440 	 generate the tags.
26441 
26442 	 Note: it might be better to do this unconditionally, then the
26443 	 assembler would not need to know about all new CPU names as
26444 	 they are added.  */
26445       if (!arm_active_target.core_name)
26446 	{
26447 	  /* armv7ve doesn't support any extensions.  */
26448 	  if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26449 	    {
26450 	      /* Keep backward compatability for assemblers
26451 		 which don't support armv7ve.  */
26452 	      asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26453 	      asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26454 	      asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26455 	      asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26456 	      asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26457 	      arm_last_printed_arch_string = "armv7ve";
26458 	    }
26459 	  else
26460 	    arm_print_asm_arch_directives ();
26461 	}
26462       else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26463 	{
26464 	  asm_fprintf (asm_out_file, "\t.arch %s\n",
26465 		       arm_active_target.core_name + 8);
26466 	  arm_last_printed_arch_string = arm_active_target.core_name + 8;
26467 	}
26468       else
26469 	{
26470 	  const char* truncated_name
26471 	    = arm_rewrite_selected_cpu (arm_active_target.core_name);
26472 	  asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26473 	}
26474 
26475       if (print_tune_info)
26476 	arm_print_tune_info ();
26477 
26478       if (! TARGET_SOFT_FLOAT)
26479 	{
26480 	  if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26481 	    arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26482 
26483 	  if (TARGET_HARD_FLOAT_ABI)
26484 	    arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26485 	}
26486 
26487       /* Some of these attributes only apply when the corresponding features
26488 	 are used.  However we don't have any easy way of figuring this out.
26489 	 Conservatively record the setting that would have been used.  */
26490 
26491       if (flag_rounding_math)
26492 	arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26493 
26494       if (!flag_unsafe_math_optimizations)
26495 	{
26496 	  arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26497 	  arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26498 	}
26499       if (flag_signaling_nans)
26500 	arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26501 
26502       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26503 			   flag_finite_math_only ? 1 : 3);
26504 
26505       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26506       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26507       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26508 			       flag_short_enums ? 1 : 2);
26509 
26510       /* Tag_ABI_optimization_goals.  */
26511       if (optimize_size)
26512 	val = 4;
26513       else if (optimize >= 2)
26514 	val = 2;
26515       else if (optimize)
26516 	val = 1;
26517       else
26518 	val = 6;
26519       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26520 
26521       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26522 			       unaligned_access);
26523 
26524       if (arm_fp16_format)
26525 	arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26526 			     (int) arm_fp16_format);
26527 
26528       if (arm_lang_output_object_attributes_hook)
26529 	arm_lang_output_object_attributes_hook();
26530     }
26531 
26532   default_file_start ();
26533 }
26534 
26535 static void
arm_file_end(void)26536 arm_file_end (void)
26537 {
26538   int regno;
26539 
26540   if (NEED_INDICATE_EXEC_STACK)
26541     /* Add .note.GNU-stack.  */
26542     file_end_indicate_exec_stack ();
26543 
26544   if (! thumb_call_reg_needed)
26545     return;
26546 
26547   switch_to_section (text_section);
26548   asm_fprintf (asm_out_file, "\t.code 16\n");
26549   ASM_OUTPUT_ALIGN (asm_out_file, 1);
26550 
26551   for (regno = 0; regno < LR_REGNUM; regno++)
26552     {
26553       rtx label = thumb_call_via_label[regno];
26554 
26555       if (label != 0)
26556 	{
26557 	  targetm.asm_out.internal_label (asm_out_file, "L",
26558 					  CODE_LABEL_NUMBER (label));
26559 	  asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26560 	}
26561     }
26562 }
26563 
26564 #ifndef ARM_PE
26565 /* Symbols in the text segment can be accessed without indirecting via the
26566    constant pool; it may take an extra binary operation, but this is still
26567    faster than indirecting via memory.  Don't do this when not optimizing,
26568    since we won't be calculating al of the offsets necessary to do this
26569    simplification.  */
26570 
26571 static void
arm_encode_section_info(tree decl,rtx rtl,int first)26572 arm_encode_section_info (tree decl, rtx rtl, int first)
26573 {
26574   if (optimize > 0 && TREE_CONSTANT (decl))
26575     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26576 
26577   default_encode_section_info (decl, rtl, first);
26578 }
26579 #endif /* !ARM_PE */
26580 
26581 static void
arm_internal_label(FILE * stream,const char * prefix,unsigned long labelno)26582 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26583 {
26584   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26585       && !strcmp (prefix, "L"))
26586     {
26587       arm_ccfsm_state = 0;
26588       arm_target_insn = NULL;
26589     }
26590   default_internal_label (stream, prefix, labelno);
26591 }
26592 
26593 /* Output code to add DELTA to the first argument, and then jump
26594    to FUNCTION.  Used for C++ multiple inheritance.  */
26595 
26596 static void
arm_thumb1_mi_thunk(FILE * file,tree,HOST_WIDE_INT delta,HOST_WIDE_INT,tree function)26597 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26598 		     HOST_WIDE_INT, tree function)
26599 {
26600   static int thunk_label = 0;
26601   char label[256];
26602   char labelpc[256];
26603   int mi_delta = delta;
26604   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26605   int shift = 0;
26606   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26607                     ? 1 : 0);
26608   if (mi_delta < 0)
26609     mi_delta = - mi_delta;
26610 
26611   final_start_function (emit_barrier (), file, 1);
26612 
26613   if (TARGET_THUMB1)
26614     {
26615       int labelno = thunk_label++;
26616       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26617       /* Thunks are entered in arm mode when available.  */
26618       if (TARGET_THUMB1_ONLY)
26619 	{
26620 	  /* push r3 so we can use it as a temporary.  */
26621 	  /* TODO: Omit this save if r3 is not used.  */
26622 	  fputs ("\tpush {r3}\n", file);
26623 	  fputs ("\tldr\tr3, ", file);
26624 	}
26625       else
26626 	{
26627 	  fputs ("\tldr\tr12, ", file);
26628 	}
26629       assemble_name (file, label);
26630       fputc ('\n', file);
26631       if (flag_pic)
26632 	{
26633 	  /* If we are generating PIC, the ldr instruction below loads
26634 	     "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
26635 	     the address of the add + 8, so we have:
26636 
26637 	     r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26638 	         = target + 1.
26639 
26640 	     Note that we have "+ 1" because some versions of GNU ld
26641 	     don't set the low bit of the result for R_ARM_REL32
26642 	     relocations against thumb function symbols.
26643 	     On ARMv6M this is +4, not +8.  */
26644 	  ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26645 	  assemble_name (file, labelpc);
26646 	  fputs (":\n", file);
26647 	  if (TARGET_THUMB1_ONLY)
26648 	    {
26649 	      /* This is 2 insns after the start of the thunk, so we know it
26650 	         is 4-byte aligned.  */
26651 	      fputs ("\tadd\tr3, pc, r3\n", file);
26652 	      fputs ("\tmov r12, r3\n", file);
26653 	    }
26654 	  else
26655 	    fputs ("\tadd\tr12, pc, r12\n", file);
26656 	}
26657       else if (TARGET_THUMB1_ONLY)
26658 	fputs ("\tmov r12, r3\n", file);
26659     }
26660   if (TARGET_THUMB1_ONLY)
26661     {
26662       if (mi_delta > 255)
26663 	{
26664 	  fputs ("\tldr\tr3, ", file);
26665 	  assemble_name (file, label);
26666 	  fputs ("+4\n", file);
26667 	  asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26668 		       mi_op, this_regno, this_regno);
26669 	}
26670       else if (mi_delta != 0)
26671 	{
26672 	  /* Thumb1 unified syntax requires s suffix in instruction name when
26673 	     one of the operands is immediate.  */
26674 	  asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26675 		       mi_op, this_regno, this_regno,
26676 		       mi_delta);
26677 	}
26678     }
26679   else
26680     {
26681       /* TODO: Use movw/movt for large constants when available.  */
26682       while (mi_delta != 0)
26683 	{
26684 	  if ((mi_delta & (3 << shift)) == 0)
26685 	    shift += 2;
26686 	  else
26687 	    {
26688 	      asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26689 			   mi_op, this_regno, this_regno,
26690 			   mi_delta & (0xff << shift));
26691 	      mi_delta &= ~(0xff << shift);
26692 	      shift += 8;
26693 	    }
26694 	}
26695     }
26696   if (TARGET_THUMB1)
26697     {
26698       if (TARGET_THUMB1_ONLY)
26699 	fputs ("\tpop\t{r3}\n", file);
26700 
26701       fprintf (file, "\tbx\tr12\n");
26702       ASM_OUTPUT_ALIGN (file, 2);
26703       assemble_name (file, label);
26704       fputs (":\n", file);
26705       if (flag_pic)
26706 	{
26707 	  /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
26708 	  rtx tem = XEXP (DECL_RTL (function), 0);
26709 	  /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26710 	     pipeline offset is four rather than eight.  Adjust the offset
26711 	     accordingly.  */
26712 	  tem = plus_constant (GET_MODE (tem), tem,
26713 			       TARGET_THUMB1_ONLY ? -3 : -7);
26714 	  tem = gen_rtx_MINUS (GET_MODE (tem),
26715 			       tem,
26716 			       gen_rtx_SYMBOL_REF (Pmode,
26717 						   ggc_strdup (labelpc)));
26718 	  assemble_integer (tem, 4, BITS_PER_WORD, 1);
26719 	}
26720       else
26721 	/* Output ".word .LTHUNKn".  */
26722 	assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26723 
26724       if (TARGET_THUMB1_ONLY && mi_delta > 255)
26725 	assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26726     }
26727   else
26728     {
26729       fputs ("\tb\t", file);
26730       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26731       if (NEED_PLT_RELOC)
26732         fputs ("(PLT)", file);
26733       fputc ('\n', file);
26734     }
26735 
26736   final_end_function ();
26737 }
26738 
26739 /* MI thunk handling for TARGET_32BIT.  */
26740 
26741 static void
arm32_output_mi_thunk(FILE * file,tree,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)26742 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26743 		       HOST_WIDE_INT vcall_offset, tree function)
26744 {
26745   const bool long_call_p = arm_is_long_call_p (function);
26746 
26747   /* On ARM, this_regno is R0 or R1 depending on
26748      whether the function returns an aggregate or not.
26749   */
26750   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26751 				       function)
26752 		    ? R1_REGNUM : R0_REGNUM);
26753 
26754   rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26755   rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26756   reload_completed = 1;
26757   emit_note (NOTE_INSN_PROLOGUE_END);
26758 
26759   /* Add DELTA to THIS_RTX.  */
26760   if (delta != 0)
26761     arm_split_constant (PLUS, Pmode, NULL_RTX,
26762 			delta, this_rtx, this_rtx, false);
26763 
26764   /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
26765   if (vcall_offset != 0)
26766     {
26767       /* Load *THIS_RTX.  */
26768       emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26769       /* Compute *THIS_RTX + VCALL_OFFSET.  */
26770       arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26771 			  false);
26772       /* Compute *(*THIS_RTX + VCALL_OFFSET).  */
26773       emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26774       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26775     }
26776 
26777   /* Generate a tail call to the target function.  */
26778   if (!TREE_USED (function))
26779     {
26780       assemble_external (function);
26781       TREE_USED (function) = 1;
26782     }
26783   rtx funexp = XEXP (DECL_RTL (function), 0);
26784   if (long_call_p)
26785     {
26786       emit_move_insn (temp, funexp);
26787       funexp = temp;
26788     }
26789   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26790   rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26791   SIBLING_CALL_P (insn) = 1;
26792   emit_barrier ();
26793 
26794   /* Indirect calls require a bit of fixup in PIC mode.  */
26795   if (long_call_p)
26796     {
26797       split_all_insns_noflow ();
26798       arm_reorg ();
26799     }
26800 
26801   insn = get_insns ();
26802   shorten_branches (insn);
26803   final_start_function (insn, file, 1);
26804   final (insn, file, 1);
26805   final_end_function ();
26806 
26807   /* Stop pretending this is a post-reload pass.  */
26808   reload_completed = 0;
26809 }
26810 
26811 /* Output code to add DELTA to the first argument, and then jump
26812    to FUNCTION.  Used for C++ multiple inheritance.  */
26813 
26814 static void
arm_output_mi_thunk(FILE * file,tree thunk,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)26815 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26816 		     HOST_WIDE_INT vcall_offset, tree function)
26817 {
26818   if (TARGET_32BIT)
26819     arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26820   else
26821     arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26822 }
26823 
26824 int
arm_emit_vector_const(FILE * file,rtx x)26825 arm_emit_vector_const (FILE *file, rtx x)
26826 {
26827   int i;
26828   const char * pattern;
26829 
26830   gcc_assert (GET_CODE (x) == CONST_VECTOR);
26831 
26832   switch (GET_MODE (x))
26833     {
26834     case E_V2SImode: pattern = "%08x"; break;
26835     case E_V4HImode: pattern = "%04x"; break;
26836     case E_V8QImode: pattern = "%02x"; break;
26837     default:       gcc_unreachable ();
26838     }
26839 
26840   fprintf (file, "0x");
26841   for (i = CONST_VECTOR_NUNITS (x); i--;)
26842     {
26843       rtx element;
26844 
26845       element = CONST_VECTOR_ELT (x, i);
26846       fprintf (file, pattern, INTVAL (element));
26847     }
26848 
26849   return 1;
26850 }
26851 
26852 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26853    HFmode constant pool entries are actually loaded with ldr.  */
26854 void
arm_emit_fp16_const(rtx c)26855 arm_emit_fp16_const (rtx c)
26856 {
26857   long bits;
26858 
26859   bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26860   if (WORDS_BIG_ENDIAN)
26861     assemble_zeros (2);
26862   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26863   if (!WORDS_BIG_ENDIAN)
26864     assemble_zeros (2);
26865 }
26866 
26867 const char *
arm_output_load_gr(rtx * operands)26868 arm_output_load_gr (rtx *operands)
26869 {
26870   rtx reg;
26871   rtx offset;
26872   rtx wcgr;
26873   rtx sum;
26874 
26875   if (!MEM_P (operands [1])
26876       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26877       || !REG_P (reg = XEXP (sum, 0))
26878       || !CONST_INT_P (offset = XEXP (sum, 1))
26879       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26880     return "wldrw%?\t%0, %1";
26881 
26882   /* Fix up an out-of-range load of a GR register.  */
26883   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26884   wcgr = operands[0];
26885   operands[0] = reg;
26886   output_asm_insn ("ldr%?\t%0, %1", operands);
26887 
26888   operands[0] = wcgr;
26889   operands[1] = reg;
26890   output_asm_insn ("tmcr%?\t%0, %1", operands);
26891   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26892 
26893   return "";
26894 }
26895 
26896 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26897 
26898    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26899    named arg and all anonymous args onto the stack.
26900    XXX I know the prologue shouldn't be pushing registers, but it is faster
26901    that way.  */
26902 
26903 static void
arm_setup_incoming_varargs(cumulative_args_t pcum_v,machine_mode mode,tree type,int * pretend_size,int second_time ATTRIBUTE_UNUSED)26904 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26905 			    machine_mode mode,
26906 			    tree type,
26907 			    int *pretend_size,
26908 			    int second_time ATTRIBUTE_UNUSED)
26909 {
26910   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26911   int nregs;
26912 
26913   cfun->machine->uses_anonymous_args = 1;
26914   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26915     {
26916       nregs = pcum->aapcs_ncrn;
26917       if (nregs & 1)
26918 	{
26919 	  int res = arm_needs_doubleword_align (mode, type);
26920 	  if (res < 0 && warn_psabi)
26921 	    inform (input_location, "parameter passing for argument of "
26922 		    "type %qT changed in GCC 7.1", type);
26923 	  else if (res > 0)
26924 	    nregs++;
26925 	}
26926     }
26927   else
26928     nregs = pcum->nregs;
26929 
26930   if (nregs < NUM_ARG_REGS)
26931     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26932 }
26933 
26934 /* We can't rely on the caller doing the proper promotion when
26935    using APCS or ATPCS.  */
26936 
26937 static bool
arm_promote_prototypes(const_tree t ATTRIBUTE_UNUSED)26938 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26939 {
26940     return !TARGET_AAPCS_BASED;
26941 }
26942 
26943 static machine_mode
arm_promote_function_mode(const_tree type ATTRIBUTE_UNUSED,machine_mode mode,int * punsignedp ATTRIBUTE_UNUSED,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)26944 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26945                            machine_mode mode,
26946                            int *punsignedp ATTRIBUTE_UNUSED,
26947                            const_tree fntype ATTRIBUTE_UNUSED,
26948                            int for_return ATTRIBUTE_UNUSED)
26949 {
26950   if (GET_MODE_CLASS (mode) == MODE_INT
26951       && GET_MODE_SIZE (mode) < 4)
26952     return SImode;
26953 
26954   return mode;
26955 }
26956 
26957 
26958 static bool
arm_default_short_enums(void)26959 arm_default_short_enums (void)
26960 {
26961   return ARM_DEFAULT_SHORT_ENUMS;
26962 }
26963 
26964 
26965 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
26966 
26967 static bool
arm_align_anon_bitfield(void)26968 arm_align_anon_bitfield (void)
26969 {
26970   return TARGET_AAPCS_BASED;
26971 }
26972 
26973 
26974 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
26975 
26976 static tree
arm_cxx_guard_type(void)26977 arm_cxx_guard_type (void)
26978 {
26979   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26980 }
26981 
26982 
26983 /* The EABI says test the least significant bit of a guard variable.  */
26984 
26985 static bool
arm_cxx_guard_mask_bit(void)26986 arm_cxx_guard_mask_bit (void)
26987 {
26988   return TARGET_AAPCS_BASED;
26989 }
26990 
26991 
26992 /* The EABI specifies that all array cookies are 8 bytes long.  */
26993 
26994 static tree
arm_get_cookie_size(tree type)26995 arm_get_cookie_size (tree type)
26996 {
26997   tree size;
26998 
26999   if (!TARGET_AAPCS_BASED)
27000     return default_cxx_get_cookie_size (type);
27001 
27002   size = build_int_cst (sizetype, 8);
27003   return size;
27004 }
27005 
27006 
27007 /* The EABI says that array cookies should also contain the element size.  */
27008 
27009 static bool
arm_cookie_has_size(void)27010 arm_cookie_has_size (void)
27011 {
27012   return TARGET_AAPCS_BASED;
27013 }
27014 
27015 
27016 /* The EABI says constructors and destructors should return a pointer to
27017    the object constructed/destroyed.  */
27018 
27019 static bool
arm_cxx_cdtor_returns_this(void)27020 arm_cxx_cdtor_returns_this (void)
27021 {
27022   return TARGET_AAPCS_BASED;
27023 }
27024 
27025 /* The EABI says that an inline function may never be the key
27026    method.  */
27027 
27028 static bool
arm_cxx_key_method_may_be_inline(void)27029 arm_cxx_key_method_may_be_inline (void)
27030 {
27031   return !TARGET_AAPCS_BASED;
27032 }
27033 
27034 static void
arm_cxx_determine_class_data_visibility(tree decl)27035 arm_cxx_determine_class_data_visibility (tree decl)
27036 {
27037   if (!TARGET_AAPCS_BASED
27038       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27039     return;
27040 
27041   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27042      is exported.  However, on systems without dynamic vague linkage,
27043      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
27044   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27045     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27046   else
27047     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27048   DECL_VISIBILITY_SPECIFIED (decl) = 1;
27049 }
27050 
27051 static bool
arm_cxx_class_data_always_comdat(void)27052 arm_cxx_class_data_always_comdat (void)
27053 {
27054   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27055      vague linkage if the class has no key function.  */
27056   return !TARGET_AAPCS_BASED;
27057 }
27058 
27059 
27060 /* The EABI says __aeabi_atexit should be used to register static
27061    destructors.  */
27062 
27063 static bool
arm_cxx_use_aeabi_atexit(void)27064 arm_cxx_use_aeabi_atexit (void)
27065 {
27066   return TARGET_AAPCS_BASED;
27067 }
27068 
27069 
27070 void
arm_set_return_address(rtx source,rtx scratch)27071 arm_set_return_address (rtx source, rtx scratch)
27072 {
27073   arm_stack_offsets *offsets;
27074   HOST_WIDE_INT delta;
27075   rtx addr, mem;
27076   unsigned long saved_regs;
27077 
27078   offsets = arm_get_frame_offsets ();
27079   saved_regs = offsets->saved_regs_mask;
27080 
27081   if ((saved_regs & (1 << LR_REGNUM)) == 0)
27082     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27083   else
27084     {
27085       if (frame_pointer_needed)
27086 	addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27087       else
27088 	{
27089 	  /* LR will be the first saved register.  */
27090 	  delta = offsets->outgoing_args - (offsets->frame + 4);
27091 
27092 
27093 	  if (delta >= 4096)
27094 	    {
27095 	      emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27096 				     GEN_INT (delta & ~4095)));
27097 	      addr = scratch;
27098 	      delta &= 4095;
27099 	    }
27100 	  else
27101 	    addr = stack_pointer_rtx;
27102 
27103 	  addr = plus_constant (Pmode, addr, delta);
27104 	}
27105 
27106       /* The store needs to be marked to prevent DSE from deleting
27107 	 it as dead if it is based on fp.  */
27108       mem = gen_frame_mem (Pmode, addr);
27109       MEM_VOLATILE_P (mem) = true;
27110       emit_move_insn (mem, source);
27111     }
27112 }
27113 
27114 
27115 void
thumb_set_return_address(rtx source,rtx scratch)27116 thumb_set_return_address (rtx source, rtx scratch)
27117 {
27118   arm_stack_offsets *offsets;
27119   HOST_WIDE_INT delta;
27120   HOST_WIDE_INT limit;
27121   int reg;
27122   rtx addr, mem;
27123   unsigned long mask;
27124 
27125   emit_use (source);
27126 
27127   offsets = arm_get_frame_offsets ();
27128   mask = offsets->saved_regs_mask;
27129   if (mask & (1 << LR_REGNUM))
27130     {
27131       limit = 1024;
27132       /* Find the saved regs.  */
27133       if (frame_pointer_needed)
27134 	{
27135 	  delta = offsets->soft_frame - offsets->saved_args;
27136 	  reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27137 	  if (TARGET_THUMB1)
27138 	    limit = 128;
27139 	}
27140       else
27141 	{
27142 	  delta = offsets->outgoing_args - offsets->saved_args;
27143 	  reg = SP_REGNUM;
27144 	}
27145       /* Allow for the stack frame.  */
27146       if (TARGET_THUMB1 && TARGET_BACKTRACE)
27147 	delta -= 16;
27148       /* The link register is always the first saved register.  */
27149       delta -= 4;
27150 
27151       /* Construct the address.  */
27152       addr = gen_rtx_REG (SImode, reg);
27153       if (delta > limit)
27154 	{
27155 	  emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27156 	  emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27157 	  addr = scratch;
27158 	}
27159       else
27160 	addr = plus_constant (Pmode, addr, delta);
27161 
27162       /* The store needs to be marked to prevent DSE from deleting
27163 	 it as dead if it is based on fp.  */
27164       mem = gen_frame_mem (Pmode, addr);
27165       MEM_VOLATILE_P (mem) = true;
27166       emit_move_insn (mem, source);
27167     }
27168   else
27169     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27170 }
27171 
27172 /* Implements target hook vector_mode_supported_p.  */
27173 bool
arm_vector_mode_supported_p(machine_mode mode)27174 arm_vector_mode_supported_p (machine_mode mode)
27175 {
27176   /* Neon also supports V2SImode, etc. listed in the clause below.  */
27177   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27178       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27179       || mode == V2DImode || mode == V8HFmode))
27180     return true;
27181 
27182   if ((TARGET_NEON || TARGET_IWMMXT)
27183       && ((mode == V2SImode)
27184 	  || (mode == V4HImode)
27185 	  || (mode == V8QImode)))
27186     return true;
27187 
27188   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27189       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27190       || mode == V2HAmode))
27191     return true;
27192 
27193   return false;
27194 }
27195 
27196 /* Implements target hook array_mode_supported_p.  */
27197 
27198 static bool
arm_array_mode_supported_p(machine_mode mode,unsigned HOST_WIDE_INT nelems)27199 arm_array_mode_supported_p (machine_mode mode,
27200 			    unsigned HOST_WIDE_INT nelems)
27201 {
27202   /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27203      for now, as the lane-swapping logic needs to be extended in the expanders.
27204      See PR target/82518.  */
27205   if (TARGET_NEON && !BYTES_BIG_ENDIAN
27206       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27207       && (nelems >= 2 && nelems <= 4))
27208     return true;
27209 
27210   return false;
27211 }
27212 
27213 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27214    registers when autovectorizing for Neon, at least until multiple vector
27215    widths are supported properly by the middle-end.  */
27216 
27217 static machine_mode
arm_preferred_simd_mode(scalar_mode mode)27218 arm_preferred_simd_mode (scalar_mode mode)
27219 {
27220   if (TARGET_NEON)
27221     switch (mode)
27222       {
27223       case E_SFmode:
27224 	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27225       case E_SImode:
27226 	return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27227       case E_HImode:
27228 	return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27229       case E_QImode:
27230 	return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27231       case E_DImode:
27232 	if (!TARGET_NEON_VECTORIZE_DOUBLE)
27233 	  return V2DImode;
27234 	break;
27235 
27236       default:;
27237       }
27238 
27239   if (TARGET_REALLY_IWMMXT)
27240     switch (mode)
27241       {
27242       case E_SImode:
27243 	return V2SImode;
27244       case E_HImode:
27245 	return V4HImode;
27246       case E_QImode:
27247 	return V8QImode;
27248 
27249       default:;
27250       }
27251 
27252   return word_mode;
27253 }
27254 
27255 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27256 
27257    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
27258    using r0-r4 for function arguments, r7 for the stack frame and don't have
27259    enough left over to do doubleword arithmetic.  For Thumb-2 all the
27260    potentially problematic instructions accept high registers so this is not
27261    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
27262    that require many low registers.  */
27263 static bool
arm_class_likely_spilled_p(reg_class_t rclass)27264 arm_class_likely_spilled_p (reg_class_t rclass)
27265 {
27266   if ((TARGET_THUMB1 && rclass == LO_REGS)
27267       || rclass  == CC_REG)
27268     return true;
27269 
27270   return false;
27271 }
27272 
27273 /* Implements target hook small_register_classes_for_mode_p.  */
27274 bool
arm_small_register_classes_for_mode_p(machine_mode mode ATTRIBUTE_UNUSED)27275 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27276 {
27277   return TARGET_THUMB1;
27278 }
27279 
27280 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
27281    ARM insns and therefore guarantee that the shift count is modulo 256.
27282    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27283    guarantee no particular behavior for out-of-range counts.  */
27284 
27285 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask(machine_mode mode)27286 arm_shift_truncation_mask (machine_mode mode)
27287 {
27288   return mode == SImode ? 255 : 0;
27289 }
27290 
27291 
27292 /* Map internal gcc register numbers to DWARF2 register numbers.  */
27293 
27294 unsigned int
arm_dbx_register_number(unsigned int regno)27295 arm_dbx_register_number (unsigned int regno)
27296 {
27297   if (regno < 16)
27298     return regno;
27299 
27300   if (IS_VFP_REGNUM (regno))
27301     {
27302       /* See comment in arm_dwarf_register_span.  */
27303       if (VFP_REGNO_OK_FOR_SINGLE (regno))
27304 	return 64 + regno - FIRST_VFP_REGNUM;
27305       else
27306 	return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27307     }
27308 
27309   if (IS_IWMMXT_GR_REGNUM (regno))
27310     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27311 
27312   if (IS_IWMMXT_REGNUM (regno))
27313     return 112 + regno - FIRST_IWMMXT_REGNUM;
27314 
27315   return DWARF_FRAME_REGISTERS;
27316 }
27317 
27318 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27319    GCC models tham as 64 32-bit registers, so we need to describe this to
27320    the DWARF generation code.  Other registers can use the default.  */
27321 static rtx
arm_dwarf_register_span(rtx rtl)27322 arm_dwarf_register_span (rtx rtl)
27323 {
27324   machine_mode mode;
27325   unsigned regno;
27326   rtx parts[16];
27327   int nregs;
27328   int i;
27329 
27330   regno = REGNO (rtl);
27331   if (!IS_VFP_REGNUM (regno))
27332     return NULL_RTX;
27333 
27334   /* XXX FIXME: The EABI defines two VFP register ranges:
27335 	64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27336 	256-287: D0-D31
27337      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27338      corresponding D register.  Until GDB supports this, we shall use the
27339      legacy encodings.  We also use these encodings for D0-D15 for
27340      compatibility with older debuggers.  */
27341   mode = GET_MODE (rtl);
27342   if (GET_MODE_SIZE (mode) < 8)
27343     return NULL_RTX;
27344 
27345   if (VFP_REGNO_OK_FOR_SINGLE (regno))
27346     {
27347       nregs = GET_MODE_SIZE (mode) / 4;
27348       for (i = 0; i < nregs; i += 2)
27349 	if (TARGET_BIG_END)
27350 	  {
27351 	    parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27352 	    parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27353 	  }
27354 	else
27355 	  {
27356 	    parts[i] = gen_rtx_REG (SImode, regno + i);
27357 	    parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27358 	  }
27359     }
27360   else
27361     {
27362       nregs = GET_MODE_SIZE (mode) / 8;
27363       for (i = 0; i < nregs; i++)
27364 	parts[i] = gen_rtx_REG (DImode, regno + i);
27365     }
27366 
27367   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27368 }
27369 
27370 #if ARM_UNWIND_INFO
27371 /* Emit unwind directives for a store-multiple instruction or stack pointer
27372    push during alignment.
27373    These should only ever be generated by the function prologue code, so
27374    expect them to have a particular form.
27375    The store-multiple instruction sometimes pushes pc as the last register,
27376    although it should not be tracked into unwind information, or for -Os
27377    sometimes pushes some dummy registers before first register that needs
27378    to be tracked in unwind information; such dummy registers are there just
27379    to avoid separate stack adjustment, and will not be restored in the
27380    epilogue.  */
27381 
27382 static void
arm_unwind_emit_sequence(FILE * asm_out_file,rtx p)27383 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27384 {
27385   int i;
27386   HOST_WIDE_INT offset;
27387   HOST_WIDE_INT nregs;
27388   int reg_size;
27389   unsigned reg;
27390   unsigned lastreg;
27391   unsigned padfirst = 0, padlast = 0;
27392   rtx e;
27393 
27394   e = XVECEXP (p, 0, 0);
27395   gcc_assert (GET_CODE (e) == SET);
27396 
27397   /* First insn will adjust the stack pointer.  */
27398   gcc_assert (GET_CODE (e) == SET
27399 	      && REG_P (SET_DEST (e))
27400 	      && REGNO (SET_DEST (e)) == SP_REGNUM
27401 	      && GET_CODE (SET_SRC (e)) == PLUS);
27402 
27403   offset = -INTVAL (XEXP (SET_SRC (e), 1));
27404   nregs = XVECLEN (p, 0) - 1;
27405   gcc_assert (nregs);
27406 
27407   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27408   if (reg < 16)
27409     {
27410       /* For -Os dummy registers can be pushed at the beginning to
27411 	 avoid separate stack pointer adjustment.  */
27412       e = XVECEXP (p, 0, 1);
27413       e = XEXP (SET_DEST (e), 0);
27414       if (GET_CODE (e) == PLUS)
27415 	padfirst = INTVAL (XEXP (e, 1));
27416       gcc_assert (padfirst == 0 || optimize_size);
27417       /* The function prologue may also push pc, but not annotate it as it is
27418 	 never restored.  We turn this into a stack pointer adjustment.  */
27419       e = XVECEXP (p, 0, nregs);
27420       e = XEXP (SET_DEST (e), 0);
27421       if (GET_CODE (e) == PLUS)
27422 	padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27423       else
27424 	padlast = offset - 4;
27425       gcc_assert (padlast == 0 || padlast == 4);
27426       if (padlast == 4)
27427 	fprintf (asm_out_file, "\t.pad #4\n");
27428       reg_size = 4;
27429       fprintf (asm_out_file, "\t.save {");
27430     }
27431   else if (IS_VFP_REGNUM (reg))
27432     {
27433       reg_size = 8;
27434       fprintf (asm_out_file, "\t.vsave {");
27435     }
27436   else
27437     /* Unknown register type.  */
27438     gcc_unreachable ();
27439 
27440   /* If the stack increment doesn't match the size of the saved registers,
27441      something has gone horribly wrong.  */
27442   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27443 
27444   offset = padfirst;
27445   lastreg = 0;
27446   /* The remaining insns will describe the stores.  */
27447   for (i = 1; i <= nregs; i++)
27448     {
27449       /* Expect (set (mem <addr>) (reg)).
27450          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
27451       e = XVECEXP (p, 0, i);
27452       gcc_assert (GET_CODE (e) == SET
27453 		  && MEM_P (SET_DEST (e))
27454 		  && REG_P (SET_SRC (e)));
27455 
27456       reg = REGNO (SET_SRC (e));
27457       gcc_assert (reg >= lastreg);
27458 
27459       if (i != 1)
27460 	fprintf (asm_out_file, ", ");
27461       /* We can't use %r for vfp because we need to use the
27462 	 double precision register names.  */
27463       if (IS_VFP_REGNUM (reg))
27464 	asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27465       else
27466 	asm_fprintf (asm_out_file, "%r", reg);
27467 
27468       if (flag_checking)
27469 	{
27470 	  /* Check that the addresses are consecutive.  */
27471 	  e = XEXP (SET_DEST (e), 0);
27472 	  if (GET_CODE (e) == PLUS)
27473 	    gcc_assert (REG_P (XEXP (e, 0))
27474 			&& REGNO (XEXP (e, 0)) == SP_REGNUM
27475 			&& CONST_INT_P (XEXP (e, 1))
27476 			&& offset == INTVAL (XEXP (e, 1)));
27477 	  else
27478 	    gcc_assert (i == 1
27479 			&& REG_P (e)
27480 			&& REGNO (e) == SP_REGNUM);
27481 	  offset += reg_size;
27482 	}
27483     }
27484   fprintf (asm_out_file, "}\n");
27485   if (padfirst)
27486     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27487 }
27488 
27489 /*  Emit unwind directives for a SET.  */
27490 
27491 static void
arm_unwind_emit_set(FILE * asm_out_file,rtx p)27492 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27493 {
27494   rtx e0;
27495   rtx e1;
27496   unsigned reg;
27497 
27498   e0 = XEXP (p, 0);
27499   e1 = XEXP (p, 1);
27500   switch (GET_CODE (e0))
27501     {
27502     case MEM:
27503       /* Pushing a single register.  */
27504       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27505 	  || !REG_P (XEXP (XEXP (e0, 0), 0))
27506 	  || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27507 	abort ();
27508 
27509       asm_fprintf (asm_out_file, "\t.save ");
27510       if (IS_VFP_REGNUM (REGNO (e1)))
27511 	asm_fprintf(asm_out_file, "{d%d}\n",
27512 		    (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27513       else
27514 	asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27515       break;
27516 
27517     case REG:
27518       if (REGNO (e0) == SP_REGNUM)
27519 	{
27520 	  /* A stack increment.  */
27521 	  if (GET_CODE (e1) != PLUS
27522 	      || !REG_P (XEXP (e1, 0))
27523 	      || REGNO (XEXP (e1, 0)) != SP_REGNUM
27524 	      || !CONST_INT_P (XEXP (e1, 1)))
27525 	    abort ();
27526 
27527 	  asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27528 		       -INTVAL (XEXP (e1, 1)));
27529 	}
27530       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27531 	{
27532 	  HOST_WIDE_INT offset;
27533 
27534 	  if (GET_CODE (e1) == PLUS)
27535 	    {
27536 	      if (!REG_P (XEXP (e1, 0))
27537 		  || !CONST_INT_P (XEXP (e1, 1)))
27538 		abort ();
27539 	      reg = REGNO (XEXP (e1, 0));
27540 	      offset = INTVAL (XEXP (e1, 1));
27541 	      asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27542 			   HARD_FRAME_POINTER_REGNUM, reg,
27543 			   offset);
27544 	    }
27545 	  else if (REG_P (e1))
27546 	    {
27547 	      reg = REGNO (e1);
27548 	      asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27549 			   HARD_FRAME_POINTER_REGNUM, reg);
27550 	    }
27551 	  else
27552 	    abort ();
27553 	}
27554       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27555 	{
27556 	  /* Move from sp to reg.  */
27557 	  asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27558 	}
27559      else if (GET_CODE (e1) == PLUS
27560 	      && REG_P (XEXP (e1, 0))
27561 	      && REGNO (XEXP (e1, 0)) == SP_REGNUM
27562 	      && CONST_INT_P (XEXP (e1, 1)))
27563 	{
27564 	  /* Set reg to offset from sp.  */
27565 	  asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27566 		       REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27567 	}
27568       else
27569 	abort ();
27570       break;
27571 
27572     default:
27573       abort ();
27574     }
27575 }
27576 
27577 
27578 /* Emit unwind directives for the given insn.  */
27579 
27580 static void
arm_unwind_emit(FILE * asm_out_file,rtx_insn * insn)27581 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27582 {
27583   rtx note, pat;
27584   bool handled_one = false;
27585 
27586   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27587     return;
27588 
27589   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27590       && (TREE_NOTHROW (current_function_decl)
27591 	  || crtl->all_throwers_are_sibcalls))
27592     return;
27593 
27594   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27595     return;
27596 
27597   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27598     {
27599       switch (REG_NOTE_KIND (note))
27600 	{
27601 	case REG_FRAME_RELATED_EXPR:
27602 	  pat = XEXP (note, 0);
27603 	  goto found;
27604 
27605 	case REG_CFA_REGISTER:
27606 	  pat = XEXP (note, 0);
27607 	  if (pat == NULL)
27608 	    {
27609 	      pat = PATTERN (insn);
27610 	      if (GET_CODE (pat) == PARALLEL)
27611 		pat = XVECEXP (pat, 0, 0);
27612 	    }
27613 
27614 	  /* Only emitted for IS_STACKALIGN re-alignment.  */
27615 	  {
27616 	    rtx dest, src;
27617 	    unsigned reg;
27618 
27619 	    src = SET_SRC (pat);
27620 	    dest = SET_DEST (pat);
27621 
27622 	    gcc_assert (src == stack_pointer_rtx);
27623 	    reg = REGNO (dest);
27624 	    asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27625 			 reg + 0x90, reg);
27626 	  }
27627 	  handled_one = true;
27628 	  break;
27629 
27630 	/* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
27631 	   to get correct dwarf information for shrink-wrap.  We should not
27632 	   emit unwind information for it because these are used either for
27633 	   pretend arguments or notes to adjust sp and restore registers from
27634 	   stack.  */
27635 	case REG_CFA_DEF_CFA:
27636 	case REG_CFA_ADJUST_CFA:
27637 	case REG_CFA_RESTORE:
27638 	  return;
27639 
27640 	case REG_CFA_EXPRESSION:
27641 	case REG_CFA_OFFSET:
27642 	  /* ??? Only handling here what we actually emit.  */
27643 	  gcc_unreachable ();
27644 
27645 	default:
27646 	  break;
27647 	}
27648     }
27649   if (handled_one)
27650     return;
27651   pat = PATTERN (insn);
27652  found:
27653 
27654   switch (GET_CODE (pat))
27655     {
27656     case SET:
27657       arm_unwind_emit_set (asm_out_file, pat);
27658       break;
27659 
27660     case SEQUENCE:
27661       /* Store multiple.  */
27662       arm_unwind_emit_sequence (asm_out_file, pat);
27663       break;
27664 
27665     default:
27666       abort();
27667     }
27668 }
27669 
27670 
27671 /* Output a reference from a function exception table to the type_info
27672    object X.  The EABI specifies that the symbol should be relocated by
27673    an R_ARM_TARGET2 relocation.  */
27674 
27675 static bool
arm_output_ttype(rtx x)27676 arm_output_ttype (rtx x)
27677 {
27678   fputs ("\t.word\t", asm_out_file);
27679   output_addr_const (asm_out_file, x);
27680   /* Use special relocations for symbol references.  */
27681   if (!CONST_INT_P (x))
27682     fputs ("(TARGET2)", asm_out_file);
27683   fputc ('\n', asm_out_file);
27684 
27685   return TRUE;
27686 }
27687 
27688 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
27689 
27690 static void
arm_asm_emit_except_personality(rtx personality)27691 arm_asm_emit_except_personality (rtx personality)
27692 {
27693   fputs ("\t.personality\t", asm_out_file);
27694   output_addr_const (asm_out_file, personality);
27695   fputc ('\n', asm_out_file);
27696 }
27697 #endif /* ARM_UNWIND_INFO */
27698 
27699 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
27700 
27701 static void
arm_asm_init_sections(void)27702 arm_asm_init_sections (void)
27703 {
27704 #if ARM_UNWIND_INFO
27705   exception_section = get_unnamed_section (0, output_section_asm_op,
27706 					   "\t.handlerdata");
27707 #endif /* ARM_UNWIND_INFO */
27708 
27709 #ifdef OBJECT_FORMAT_ELF
27710   if (target_pure_code)
27711     text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27712 #endif
27713 }
27714 
27715 /* Output unwind directives for the start/end of a function.  */
27716 
27717 void
arm_output_fn_unwind(FILE * f,bool prologue)27718 arm_output_fn_unwind (FILE * f, bool prologue)
27719 {
27720   if (arm_except_unwind_info (&global_options) != UI_TARGET)
27721     return;
27722 
27723   if (prologue)
27724     fputs ("\t.fnstart\n", f);
27725   else
27726     {
27727       /* If this function will never be unwound, then mark it as such.
27728          The came condition is used in arm_unwind_emit to suppress
27729 	 the frame annotations.  */
27730       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27731 	  && (TREE_NOTHROW (current_function_decl)
27732 	      || crtl->all_throwers_are_sibcalls))
27733 	fputs("\t.cantunwind\n", f);
27734 
27735       fputs ("\t.fnend\n", f);
27736     }
27737 }
27738 
27739 static bool
arm_emit_tls_decoration(FILE * fp,rtx x)27740 arm_emit_tls_decoration (FILE *fp, rtx x)
27741 {
27742   enum tls_reloc reloc;
27743   rtx val;
27744 
27745   val = XVECEXP (x, 0, 0);
27746   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27747 
27748   output_addr_const (fp, val);
27749 
27750   switch (reloc)
27751     {
27752     case TLS_GD32:
27753       fputs ("(tlsgd)", fp);
27754       break;
27755     case TLS_LDM32:
27756       fputs ("(tlsldm)", fp);
27757       break;
27758     case TLS_LDO32:
27759       fputs ("(tlsldo)", fp);
27760       break;
27761     case TLS_IE32:
27762       fputs ("(gottpoff)", fp);
27763       break;
27764     case TLS_LE32:
27765       fputs ("(tpoff)", fp);
27766       break;
27767     case TLS_DESCSEQ:
27768       fputs ("(tlsdesc)", fp);
27769       break;
27770     default:
27771       gcc_unreachable ();
27772     }
27773 
27774   switch (reloc)
27775     {
27776     case TLS_GD32:
27777     case TLS_LDM32:
27778     case TLS_IE32:
27779     case TLS_DESCSEQ:
27780       fputs (" + (. - ", fp);
27781       output_addr_const (fp, XVECEXP (x, 0, 2));
27782       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27783       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27784       output_addr_const (fp, XVECEXP (x, 0, 3));
27785       fputc (')', fp);
27786       break;
27787     default:
27788       break;
27789     }
27790 
27791   return TRUE;
27792 }
27793 
27794 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
27795 
27796 static void
arm_output_dwarf_dtprel(FILE * file,int size,rtx x)27797 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27798 {
27799   gcc_assert (size == 4);
27800   fputs ("\t.word\t", file);
27801   output_addr_const (file, x);
27802   fputs ("(tlsldo)", file);
27803 }
27804 
27805 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
27806 
27807 static bool
arm_output_addr_const_extra(FILE * fp,rtx x)27808 arm_output_addr_const_extra (FILE *fp, rtx x)
27809 {
27810   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27811     return arm_emit_tls_decoration (fp, x);
27812   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27813     {
27814       char label[256];
27815       int labelno = INTVAL (XVECEXP (x, 0, 0));
27816 
27817       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27818       assemble_name_raw (fp, label);
27819 
27820       return TRUE;
27821     }
27822   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27823     {
27824       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27825       if (GOT_PCREL)
27826 	fputs ("+.", fp);
27827       fputs ("-(", fp);
27828       output_addr_const (fp, XVECEXP (x, 0, 0));
27829       fputc (')', fp);
27830       return TRUE;
27831     }
27832   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27833     {
27834       output_addr_const (fp, XVECEXP (x, 0, 0));
27835       if (GOT_PCREL)
27836         fputs ("+.", fp);
27837       fputs ("-(", fp);
27838       output_addr_const (fp, XVECEXP (x, 0, 1));
27839       fputc (')', fp);
27840       return TRUE;
27841     }
27842   else if (GET_CODE (x) == CONST_VECTOR)
27843     return arm_emit_vector_const (fp, x);
27844 
27845   return FALSE;
27846 }
27847 
27848 /* Output assembly for a shift instruction.
27849    SET_FLAGS determines how the instruction modifies the condition codes.
27850    0 - Do not set condition codes.
27851    1 - Set condition codes.
27852    2 - Use smallest instruction.  */
27853 const char *
arm_output_shift(rtx * operands,int set_flags)27854 arm_output_shift(rtx * operands, int set_flags)
27855 {
27856   char pattern[100];
27857   static const char flag_chars[3] = {'?', '.', '!'};
27858   const char *shift;
27859   HOST_WIDE_INT val;
27860   char c;
27861 
27862   c = flag_chars[set_flags];
27863   shift = shift_op(operands[3], &val);
27864   if (shift)
27865     {
27866       if (val != -1)
27867 	operands[2] = GEN_INT(val);
27868       sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27869     }
27870   else
27871     sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27872 
27873   output_asm_insn (pattern, operands);
27874   return "";
27875 }
27876 
27877 /* Output assembly for a WMMX immediate shift instruction.  */
27878 const char *
arm_output_iwmmxt_shift_immediate(const char * insn_name,rtx * operands,bool wror_or_wsra)27879 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27880 {
27881   int shift = INTVAL (operands[2]);
27882   char templ[50];
27883   machine_mode opmode = GET_MODE (operands[0]);
27884 
27885   gcc_assert (shift >= 0);
27886 
27887   /* If the shift value in the register versions is > 63 (for D qualifier),
27888      31 (for W qualifier) or 15 (for H qualifier).  */
27889   if (((opmode == V4HImode) && (shift > 15))
27890 	|| ((opmode == V2SImode) && (shift > 31))
27891 	|| ((opmode == DImode) && (shift > 63)))
27892   {
27893     if (wror_or_wsra)
27894       {
27895         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27896         output_asm_insn (templ, operands);
27897         if (opmode == DImode)
27898           {
27899 	    sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27900 	    output_asm_insn (templ, operands);
27901           }
27902       }
27903     else
27904       {
27905         /* The destination register will contain all zeros.  */
27906         sprintf (templ, "wzero\t%%0");
27907         output_asm_insn (templ, operands);
27908       }
27909     return "";
27910   }
27911 
27912   if ((opmode == DImode) && (shift > 32))
27913     {
27914       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27915       output_asm_insn (templ, operands);
27916       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27917       output_asm_insn (templ, operands);
27918     }
27919   else
27920     {
27921       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27922       output_asm_insn (templ, operands);
27923     }
27924   return "";
27925 }
27926 
27927 /* Output assembly for a WMMX tinsr instruction.  */
27928 const char *
arm_output_iwmmxt_tinsr(rtx * operands)27929 arm_output_iwmmxt_tinsr (rtx *operands)
27930 {
27931   int mask = INTVAL (operands[3]);
27932   int i;
27933   char templ[50];
27934   int units = mode_nunits[GET_MODE (operands[0])];
27935   gcc_assert ((mask & (mask - 1)) == 0);
27936   for (i = 0; i < units; ++i)
27937     {
27938       if ((mask & 0x01) == 1)
27939         {
27940           break;
27941         }
27942       mask >>= 1;
27943     }
27944   gcc_assert (i < units);
27945   {
27946     switch (GET_MODE (operands[0]))
27947       {
27948       case E_V8QImode:
27949 	sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27950 	break;
27951       case E_V4HImode:
27952 	sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27953 	break;
27954       case E_V2SImode:
27955 	sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27956 	break;
27957       default:
27958 	gcc_unreachable ();
27959 	break;
27960       }
27961     output_asm_insn (templ, operands);
27962   }
27963   return "";
27964 }
27965 
27966 /* Output a Thumb-1 casesi dispatch sequence.  */
27967 const char *
thumb1_output_casesi(rtx * operands)27968 thumb1_output_casesi (rtx *operands)
27969 {
27970   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27971 
27972   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27973 
27974   switch (GET_MODE(diff_vec))
27975     {
27976     case E_QImode:
27977       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27978 	      "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27979     case E_HImode:
27980       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27981 	      "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27982     case E_SImode:
27983       return "bl\t%___gnu_thumb1_case_si";
27984     default:
27985       gcc_unreachable ();
27986     }
27987 }
27988 
27989 /* Output a Thumb-2 casesi instruction.  */
27990 const char *
thumb2_output_casesi(rtx * operands)27991 thumb2_output_casesi (rtx *operands)
27992 {
27993   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27994 
27995   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27996 
27997   output_asm_insn ("cmp\t%0, %1", operands);
27998   output_asm_insn ("bhi\t%l3", operands);
27999   switch (GET_MODE(diff_vec))
28000     {
28001     case E_QImode:
28002       return "tbb\t[%|pc, %0]";
28003     case E_HImode:
28004       return "tbh\t[%|pc, %0, lsl #1]";
28005     case E_SImode:
28006       if (flag_pic)
28007 	{
28008 	  output_asm_insn ("adr\t%4, %l2", operands);
28009 	  output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28010 	  output_asm_insn ("add\t%4, %4, %5", operands);
28011 	  return "bx\t%4";
28012 	}
28013       else
28014 	{
28015 	  output_asm_insn ("adr\t%4, %l2", operands);
28016 	  return "ldr\t%|pc, [%4, %0, lsl #2]";
28017 	}
28018     default:
28019       gcc_unreachable ();
28020     }
28021 }
28022 
28023 /* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
28024    per-core tuning structs.  */
28025 static int
arm_issue_rate(void)28026 arm_issue_rate (void)
28027 {
28028   return current_tune->issue_rate;
28029 }
28030 
28031 /* Return how many instructions should scheduler lookahead to choose the
28032    best one.  */
28033 static int
arm_first_cycle_multipass_dfa_lookahead(void)28034 arm_first_cycle_multipass_dfa_lookahead (void)
28035 {
28036   int issue_rate = arm_issue_rate ();
28037 
28038   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28039 }
28040 
28041 /* Enable modeling of L2 auto-prefetcher.  */
28042 static int
arm_first_cycle_multipass_dfa_lookahead_guard(rtx_insn * insn,int ready_index)28043 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28044 {
28045   return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28046 }
28047 
28048 const char *
arm_mangle_type(const_tree type)28049 arm_mangle_type (const_tree type)
28050 {
28051   /* The ARM ABI documents (10th October 2008) say that "__va_list"
28052      has to be managled as if it is in the "std" namespace.  */
28053   if (TARGET_AAPCS_BASED
28054       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28055     return "St9__va_list";
28056 
28057   /* Half-precision float.  */
28058   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28059     return "Dh";
28060 
28061   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28062      builtin type.  */
28063   if (TYPE_NAME (type) != NULL)
28064     return arm_mangle_builtin_type (type);
28065 
28066   /* Use the default mangling.  */
28067   return NULL;
28068 }
28069 
28070 /* Order of allocation of core registers for Thumb: this allocation is
28071    written over the corresponding initial entries of the array
28072    initialized with REG_ALLOC_ORDER.  We allocate all low registers
28073    first.  Saving and restoring a low register is usually cheaper than
28074    using a call-clobbered high register.  */
28075 
28076 static const int thumb_core_reg_alloc_order[] =
28077 {
28078    3,  2,  1,  0,  4,  5,  6,  7,
28079   12, 14,  8,  9, 10, 11
28080 };
28081 
28082 /* Adjust register allocation order when compiling for Thumb.  */
28083 
28084 void
arm_order_regs_for_local_alloc(void)28085 arm_order_regs_for_local_alloc (void)
28086 {
28087   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28088   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28089   if (TARGET_THUMB)
28090     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28091             sizeof (thumb_core_reg_alloc_order));
28092 }
28093 
28094 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
28095 
28096 bool
arm_frame_pointer_required(void)28097 arm_frame_pointer_required (void)
28098 {
28099   if (SUBTARGET_FRAME_POINTER_REQUIRED)
28100     return true;
28101 
28102   /* If the function receives nonlocal gotos, it needs to save the frame
28103      pointer in the nonlocal_goto_save_area object.  */
28104   if (cfun->has_nonlocal_label)
28105     return true;
28106 
28107   /* The frame pointer is required for non-leaf APCS frames.  */
28108   if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28109     return true;
28110 
28111   /* If we are probing the stack in the prologue, we will have a faulting
28112      instruction prior to the stack adjustment and this requires a frame
28113      pointer if we want to catch the exception using the EABI unwinder.  */
28114   if (!IS_INTERRUPT (arm_current_func_type ())
28115       && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28116 	  || flag_stack_clash_protection)
28117       && arm_except_unwind_info (&global_options) == UI_TARGET
28118       && cfun->can_throw_non_call_exceptions)
28119     {
28120       HOST_WIDE_INT size = get_frame_size ();
28121 
28122       /* That's irrelevant if there is no stack adjustment.  */
28123       if (size <= 0)
28124 	return false;
28125 
28126       /* That's relevant only if there is a stack probe.  */
28127       if (crtl->is_leaf && !cfun->calls_alloca)
28128 	{
28129 	  /* We don't have the final size of the frame so adjust.  */
28130 	  size += 32 * UNITS_PER_WORD;
28131 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28132 	    return true;
28133 	}
28134       else
28135 	return true;
28136     }
28137 
28138   return false;
28139 }
28140 
28141 /* Only thumb1 can't support conditional execution, so return true if
28142    the target is not thumb1.  */
28143 static bool
arm_have_conditional_execution(void)28144 arm_have_conditional_execution (void)
28145 {
28146   return !TARGET_THUMB1;
28147 }
28148 
28149 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
28150 static HOST_WIDE_INT
arm_vector_alignment(const_tree type)28151 arm_vector_alignment (const_tree type)
28152 {
28153   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28154 
28155   if (TARGET_AAPCS_BASED)
28156     align = MIN (align, 64);
28157 
28158   return align;
28159 }
28160 
28161 static void
arm_autovectorize_vector_sizes(vector_sizes * sizes)28162 arm_autovectorize_vector_sizes (vector_sizes *sizes)
28163 {
28164   if (!TARGET_NEON_VECTORIZE_DOUBLE)
28165     {
28166       sizes->safe_push (16);
28167       sizes->safe_push (8);
28168     }
28169 }
28170 
28171 static bool
arm_vector_alignment_reachable(const_tree type,bool is_packed)28172 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28173 {
28174   /* Vectors which aren't in packed structures will not be less aligned than
28175      the natural alignment of their element type, so this is safe.  */
28176   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28177     return !is_packed;
28178 
28179   return default_builtin_vector_alignment_reachable (type, is_packed);
28180 }
28181 
28182 static bool
arm_builtin_support_vector_misalignment(machine_mode mode,const_tree type,int misalignment,bool is_packed)28183 arm_builtin_support_vector_misalignment (machine_mode mode,
28184 					 const_tree type, int misalignment,
28185 					 bool is_packed)
28186 {
28187   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28188     {
28189       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28190 
28191       if (is_packed)
28192         return align == 1;
28193 
28194       /* If the misalignment is unknown, we should be able to handle the access
28195 	 so long as it is not to a member of a packed data structure.  */
28196       if (misalignment == -1)
28197         return true;
28198 
28199       /* Return true if the misalignment is a multiple of the natural alignment
28200          of the vector's element type.  This is probably always going to be
28201 	 true in practice, since we've already established that this isn't a
28202 	 packed access.  */
28203       return ((misalignment % align) == 0);
28204     }
28205 
28206   return default_builtin_support_vector_misalignment (mode, type, misalignment,
28207 						      is_packed);
28208 }
28209 
28210 static void
arm_conditional_register_usage(void)28211 arm_conditional_register_usage (void)
28212 {
28213   int regno;
28214 
28215   if (TARGET_THUMB1 && optimize_size)
28216     {
28217       /* When optimizing for size on Thumb-1, it's better not
28218         to use the HI regs, because of the overhead of
28219         stacking them.  */
28220       for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28221 	fixed_regs[regno] = call_used_regs[regno] = 1;
28222     }
28223 
28224   /* The link register can be clobbered by any branch insn,
28225      but we have no way to track that at present, so mark
28226      it as unavailable.  */
28227   if (TARGET_THUMB1)
28228     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28229 
28230   if (TARGET_32BIT && TARGET_HARD_FLOAT)
28231     {
28232       /* VFPv3 registers are disabled when earlier VFP
28233 	 versions are selected due to the definition of
28234 	 LAST_VFP_REGNUM.  */
28235       for (regno = FIRST_VFP_REGNUM;
28236 	   regno <= LAST_VFP_REGNUM; ++ regno)
28237 	{
28238 	  fixed_regs[regno] = 0;
28239 	  call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28240 	    || regno >= FIRST_VFP_REGNUM + 32;
28241 	}
28242     }
28243 
28244   if (TARGET_REALLY_IWMMXT)
28245     {
28246       regno = FIRST_IWMMXT_GR_REGNUM;
28247       /* The 2002/10/09 revision of the XScale ABI has wCG0
28248          and wCG1 as call-preserved registers.  The 2002/11/21
28249          revision changed this so that all wCG registers are
28250          scratch registers.  */
28251       for (regno = FIRST_IWMMXT_GR_REGNUM;
28252 	   regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28253 	fixed_regs[regno] = 0;
28254       /* The XScale ABI has wR0 - wR9 as scratch registers,
28255 	 the rest as call-preserved registers.  */
28256       for (regno = FIRST_IWMMXT_REGNUM;
28257 	   regno <= LAST_IWMMXT_REGNUM; ++ regno)
28258 	{
28259 	  fixed_regs[regno] = 0;
28260 	  call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28261 	}
28262     }
28263 
28264   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28265     {
28266       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28267       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28268     }
28269   else if (TARGET_APCS_STACK)
28270     {
28271       fixed_regs[10]     = 1;
28272       call_used_regs[10] = 1;
28273     }
28274   /* -mcaller-super-interworking reserves r11 for calls to
28275      _interwork_r11_call_via_rN().  Making the register global
28276      is an easy way of ensuring that it remains valid for all
28277      calls.  */
28278   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28279       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28280     {
28281       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28282       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28283       if (TARGET_CALLER_INTERWORKING)
28284 	global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28285     }
28286   SUBTARGET_CONDITIONAL_REGISTER_USAGE
28287 }
28288 
28289 static reg_class_t
arm_preferred_rename_class(reg_class_t rclass)28290 arm_preferred_rename_class (reg_class_t rclass)
28291 {
28292   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28293      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
28294      and code size can be reduced.  */
28295   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28296     return LO_REGS;
28297   else
28298     return NO_REGS;
28299 }
28300 
28301 /* Compute the attribute "length" of insn "*push_multi".
28302    So this function MUST be kept in sync with that insn pattern.  */
28303 int
arm_attr_length_push_multi(rtx parallel_op,rtx first_op)28304 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28305 {
28306   int i, regno, hi_reg;
28307   int num_saves = XVECLEN (parallel_op, 0);
28308 
28309   /* ARM mode.  */
28310   if (TARGET_ARM)
28311     return 4;
28312   /* Thumb1 mode.  */
28313   if (TARGET_THUMB1)
28314     return 2;
28315 
28316   /* Thumb2 mode.  */
28317   regno = REGNO (first_op);
28318   /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28319      list is 8-bit.  Normally this means all registers in the list must be
28320      LO_REGS, that is (R0 -R7).  If any HI_REGS used, then we must use 32-bit
28321      encodings.  There is one exception for PUSH that LR in HI_REGS can be used
28322      with 16-bit encoding.  */
28323   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28324   for (i = 1; i < num_saves && !hi_reg; i++)
28325     {
28326       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28327       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28328     }
28329 
28330   if (!hi_reg)
28331     return 2;
28332   return 4;
28333 }
28334 
28335 /* Compute the attribute "length" of insn.  Currently, this function is used
28336    for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28337    "*pop_multiple_with_writeback_and_return".  OPERANDS is the toplevel PARALLEL
28338    rtx, RETURN_PC is true if OPERANDS contains return insn.  WRITE_BACK_P is
28339    true if OPERANDS contains insn which explicit updates base register.  */
28340 
28341 int
arm_attr_length_pop_multi(rtx * operands,bool return_pc,bool write_back_p)28342 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28343 {
28344   /* ARM mode.  */
28345   if (TARGET_ARM)
28346     return 4;
28347   /* Thumb1 mode.  */
28348   if (TARGET_THUMB1)
28349     return 2;
28350 
28351   rtx parallel_op = operands[0];
28352   /* Initialize to elements number of PARALLEL.  */
28353   unsigned indx = XVECLEN (parallel_op, 0) - 1;
28354   /* Initialize the value to base register.  */
28355   unsigned regno = REGNO (operands[1]);
28356   /* Skip return and write back pattern.
28357      We only need register pop pattern for later analysis.  */
28358   unsigned first_indx = 0;
28359   first_indx += return_pc ? 1 : 0;
28360   first_indx += write_back_p ? 1 : 0;
28361 
28362   /* A pop operation can be done through LDM or POP.  If the base register is SP
28363      and if it's with write back, then a LDM will be alias of POP.  */
28364   bool pop_p = (regno == SP_REGNUM && write_back_p);
28365   bool ldm_p = !pop_p;
28366 
28367   /* Check base register for LDM.  */
28368   if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28369     return 4;
28370 
28371   /* Check each register in the list.  */
28372   for (; indx >= first_indx; indx--)
28373     {
28374       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28375       /* For POP, PC in HI_REGS can be used with 16-bit encoding.  See similar
28376 	 comment in arm_attr_length_push_multi.  */
28377       if (REGNO_REG_CLASS (regno) == HI_REGS
28378 	  && (regno != PC_REGNUM || ldm_p))
28379 	return 4;
28380     }
28381 
28382   return 2;
28383 }
28384 
28385 /* Compute the number of instructions emitted by output_move_double.  */
28386 int
arm_count_output_move_double_insns(rtx * operands)28387 arm_count_output_move_double_insns (rtx *operands)
28388 {
28389   int count;
28390   rtx ops[2];
28391   /* output_move_double may modify the operands array, so call it
28392      here on a copy of the array.  */
28393   ops[0] = operands[0];
28394   ops[1] = operands[1];
28395   output_move_double (ops, false, &count);
28396   return count;
28397 }
28398 
28399 int
vfp3_const_double_for_fract_bits(rtx operand)28400 vfp3_const_double_for_fract_bits (rtx operand)
28401 {
28402   REAL_VALUE_TYPE r0;
28403 
28404   if (!CONST_DOUBLE_P (operand))
28405     return 0;
28406 
28407   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28408   if (exact_real_inverse (DFmode, &r0)
28409       && !REAL_VALUE_NEGATIVE (r0))
28410     {
28411       if (exact_real_truncate (DFmode, &r0))
28412 	{
28413 	  HOST_WIDE_INT value = real_to_integer (&r0);
28414 	  value = value & 0xffffffff;
28415 	  if ((value != 0) && ( (value & (value - 1)) == 0))
28416 	    {
28417 	      int ret = exact_log2 (value);
28418 	      gcc_assert (IN_RANGE (ret, 0, 31));
28419 	      return ret;
28420 	    }
28421 	}
28422     }
28423   return 0;
28424 }
28425 
28426 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28427    log2 is in [1, 32], return that log2.  Otherwise return -1.
28428    This is used in the patterns for vcvt.s32.f32 floating-point to
28429    fixed-point conversions.  */
28430 
28431 int
vfp3_const_double_for_bits(rtx x)28432 vfp3_const_double_for_bits (rtx x)
28433 {
28434   const REAL_VALUE_TYPE *r;
28435 
28436   if (!CONST_DOUBLE_P (x))
28437     return -1;
28438 
28439   r = CONST_DOUBLE_REAL_VALUE (x);
28440 
28441   if (REAL_VALUE_NEGATIVE (*r)
28442       || REAL_VALUE_ISNAN (*r)
28443       || REAL_VALUE_ISINF (*r)
28444       || !real_isinteger (r, SFmode))
28445     return -1;
28446 
28447   HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28448 
28449 /* The exact_log2 above will have returned -1 if this is
28450    not an exact log2.  */
28451   if (!IN_RANGE (hwint, 1, 32))
28452     return -1;
28453 
28454   return hwint;
28455 }
28456 
28457 
28458 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
28459 
28460 static void
arm_pre_atomic_barrier(enum memmodel model)28461 arm_pre_atomic_barrier (enum memmodel model)
28462 {
28463   if (need_atomic_barrier_p (model, true))
28464     emit_insn (gen_memory_barrier ());
28465 }
28466 
28467 static void
arm_post_atomic_barrier(enum memmodel model)28468 arm_post_atomic_barrier (enum memmodel model)
28469 {
28470   if (need_atomic_barrier_p (model, false))
28471     emit_insn (gen_memory_barrier ());
28472 }
28473 
28474 /* Emit the load-exclusive and store-exclusive instructions.
28475    Use acquire and release versions if necessary.  */
28476 
28477 static void
arm_emit_load_exclusive(machine_mode mode,rtx rval,rtx mem,bool acq)28478 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28479 {
28480   rtx (*gen) (rtx, rtx);
28481 
28482   if (acq)
28483     {
28484       switch (mode)
28485         {
28486         case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28487         case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28488         case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28489         case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28490         default:
28491           gcc_unreachable ();
28492         }
28493     }
28494   else
28495     {
28496       switch (mode)
28497         {
28498         case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28499         case E_HImode: gen = gen_arm_load_exclusivehi; break;
28500         case E_SImode: gen = gen_arm_load_exclusivesi; break;
28501         case E_DImode: gen = gen_arm_load_exclusivedi; break;
28502         default:
28503           gcc_unreachable ();
28504         }
28505     }
28506 
28507   emit_insn (gen (rval, mem));
28508 }
28509 
28510 static void
arm_emit_store_exclusive(machine_mode mode,rtx bval,rtx rval,rtx mem,bool rel)28511 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28512                           rtx mem, bool rel)
28513 {
28514   rtx (*gen) (rtx, rtx, rtx);
28515 
28516   if (rel)
28517     {
28518       switch (mode)
28519         {
28520         case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28521         case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28522         case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28523         case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28524         default:
28525           gcc_unreachable ();
28526         }
28527     }
28528   else
28529     {
28530       switch (mode)
28531         {
28532         case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28533         case E_HImode: gen = gen_arm_store_exclusivehi; break;
28534         case E_SImode: gen = gen_arm_store_exclusivesi; break;
28535         case E_DImode: gen = gen_arm_store_exclusivedi; break;
28536         default:
28537           gcc_unreachable ();
28538         }
28539     }
28540 
28541   emit_insn (gen (bval, rval, mem));
28542 }
28543 
28544 /* Mark the previous jump instruction as unlikely.  */
28545 
28546 static void
emit_unlikely_jump(rtx insn)28547 emit_unlikely_jump (rtx insn)
28548 {
28549   rtx_insn *jump = emit_jump_insn (insn);
28550   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28551 }
28552 
28553 /* Expand a compare and swap pattern.  */
28554 
28555 void
arm_expand_compare_and_swap(rtx operands[])28556 arm_expand_compare_and_swap (rtx operands[])
28557 {
28558   rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28559   machine_mode mode;
28560   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28561 
28562   bval = operands[0];
28563   rval = operands[1];
28564   mem = operands[2];
28565   oldval = operands[3];
28566   newval = operands[4];
28567   is_weak = operands[5];
28568   mod_s = operands[6];
28569   mod_f = operands[7];
28570   mode = GET_MODE (mem);
28571 
28572   /* Normally the succ memory model must be stronger than fail, but in the
28573      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28574      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
28575 
28576   if (TARGET_HAVE_LDACQ
28577       && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28578       && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28579     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28580 
28581   switch (mode)
28582     {
28583     case E_QImode:
28584     case E_HImode:
28585       /* For narrow modes, we're going to perform the comparison in SImode,
28586 	 so do the zero-extension now.  */
28587       rval = gen_reg_rtx (SImode);
28588       oldval = convert_modes (SImode, mode, oldval, true);
28589       /* FALLTHRU */
28590 
28591     case E_SImode:
28592       /* Force the value into a register if needed.  We waited until after
28593 	 the zero-extension above to do this properly.  */
28594       if (!arm_add_operand (oldval, SImode))
28595 	oldval = force_reg (SImode, oldval);
28596       break;
28597 
28598     case E_DImode:
28599       if (!cmpdi_operand (oldval, mode))
28600 	oldval = force_reg (mode, oldval);
28601       break;
28602 
28603     default:
28604       gcc_unreachable ();
28605     }
28606 
28607   if (TARGET_THUMB1)
28608     {
28609       switch (mode)
28610 	{
28611 	case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28612 	case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28613 	case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28614 	case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28615 	default:
28616 	  gcc_unreachable ();
28617 	}
28618     }
28619   else
28620     {
28621       switch (mode)
28622 	{
28623 	case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28624 	case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28625 	case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28626 	case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28627 	default:
28628 	  gcc_unreachable ();
28629 	}
28630     }
28631 
28632   bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28633   emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28634 
28635   if (mode == QImode || mode == HImode)
28636     emit_move_insn (operands[1], gen_lowpart (mode, rval));
28637 
28638   /* In all cases, we arrange for success to be signaled by Z set.
28639      This arrangement allows for the boolean result to be used directly
28640      in a subsequent branch, post optimization.  For Thumb-1 targets, the
28641      boolean negation of the result is also stored in bval because Thumb-1
28642      backend lacks dependency tracking for CC flag due to flag-setting not
28643      being represented at RTL level.  */
28644   if (TARGET_THUMB1)
28645       emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28646   else
28647     {
28648       x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28649       emit_insn (gen_rtx_SET (bval, x));
28650     }
28651 }
28652 
28653 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
28654    another memory store between the load-exclusive and store-exclusive can
28655    reset the monitor from Exclusive to Open state.  This means we must wait
28656    until after reload to split the pattern, lest we get a register spill in
28657    the middle of the atomic sequence.  Success of the compare and swap is
28658    indicated by the Z flag set for 32bit targets and by neg_bval being zero
28659    for Thumb-1 targets (ie. negation of the boolean value returned by
28660    atomic_compare_and_swapmode standard pattern in operand 0).  */
28661 
28662 void
arm_split_compare_and_swap(rtx operands[])28663 arm_split_compare_and_swap (rtx operands[])
28664 {
28665   rtx rval, mem, oldval, newval, neg_bval;
28666   machine_mode mode;
28667   enum memmodel mod_s, mod_f;
28668   bool is_weak;
28669   rtx_code_label *label1, *label2;
28670   rtx x, cond;
28671 
28672   rval = operands[1];
28673   mem = operands[2];
28674   oldval = operands[3];
28675   newval = operands[4];
28676   is_weak = (operands[5] != const0_rtx);
28677   mod_s = memmodel_from_int (INTVAL (operands[6]));
28678   mod_f = memmodel_from_int (INTVAL (operands[7]));
28679   neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28680   mode = GET_MODE (mem);
28681 
28682   bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28683 
28684   bool use_acquire = TARGET_HAVE_LDACQ
28685                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28686 			  || is_mm_release (mod_s));
28687 
28688   bool use_release = TARGET_HAVE_LDACQ
28689                      && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28690 			  || is_mm_acquire (mod_s));
28691 
28692   /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
28693      a full barrier is emitted after the store-release.  */
28694   if (is_armv8_sync)
28695     use_acquire = false;
28696 
28697   /* Checks whether a barrier is needed and emits one accordingly.  */
28698   if (!(use_acquire || use_release))
28699     arm_pre_atomic_barrier (mod_s);
28700 
28701   label1 = NULL;
28702   if (!is_weak)
28703     {
28704       label1 = gen_label_rtx ();
28705       emit_label (label1);
28706     }
28707   label2 = gen_label_rtx ();
28708 
28709   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28710 
28711   /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28712      as required to communicate with arm_expand_compare_and_swap.  */
28713   if (TARGET_32BIT)
28714     {
28715       cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28716       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28717       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28718 				gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28719       emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28720     }
28721   else
28722     {
28723       emit_move_insn (neg_bval, const1_rtx);
28724       cond = gen_rtx_NE (VOIDmode, rval, oldval);
28725       if (thumb1_cmpneg_operand (oldval, SImode))
28726 	emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28727 						    label2, cond));
28728       else
28729 	emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28730     }
28731 
28732   arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28733 
28734   /* Weak or strong, we want EQ to be true for success, so that we
28735      match the flags that we got from the compare above.  */
28736   if (TARGET_32BIT)
28737     {
28738       cond = gen_rtx_REG (CCmode, CC_REGNUM);
28739       x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28740       emit_insn (gen_rtx_SET (cond, x));
28741     }
28742 
28743   if (!is_weak)
28744     {
28745       /* Z is set to boolean value of !neg_bval, as required to communicate
28746 	 with arm_expand_compare_and_swap.  */
28747       x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28748       emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28749     }
28750 
28751   if (!is_mm_relaxed (mod_f))
28752     emit_label (label2);
28753 
28754   /* Checks whether a barrier is needed and emits one accordingly.  */
28755   if (is_armv8_sync
28756       || !(use_acquire || use_release))
28757     arm_post_atomic_barrier (mod_s);
28758 
28759   if (is_mm_relaxed (mod_f))
28760     emit_label (label2);
28761 }
28762 
28763 /* Split an atomic operation pattern.  Operation is given by CODE and is one
28764    of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28765    operation).  Operation is performed on the content at MEM and on VALUE
28766    following the memory model MODEL_RTX.  The content at MEM before and after
28767    the operation is returned in OLD_OUT and NEW_OUT respectively while the
28768    success of the operation is returned in COND.  Using a scratch register or
28769    an operand register for these determines what result is returned for that
28770    pattern.  */
28771 
28772 void
arm_split_atomic_op(enum rtx_code code,rtx old_out,rtx new_out,rtx mem,rtx value,rtx model_rtx,rtx cond)28773 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28774 		     rtx value, rtx model_rtx, rtx cond)
28775 {
28776   enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28777   machine_mode mode = GET_MODE (mem);
28778   machine_mode wmode = (mode == DImode ? DImode : SImode);
28779   rtx_code_label *label;
28780   bool all_low_regs, bind_old_new;
28781   rtx x;
28782 
28783   bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28784 
28785   bool use_acquire = TARGET_HAVE_LDACQ
28786                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28787 			  || is_mm_release (model));
28788 
28789   bool use_release = TARGET_HAVE_LDACQ
28790                      && !(is_mm_relaxed (model) || is_mm_consume (model)
28791 			  || is_mm_acquire (model));
28792 
28793   /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
28794      a full barrier is emitted after the store-release.  */
28795   if (is_armv8_sync)
28796     use_acquire = false;
28797 
28798   /* Checks whether a barrier is needed and emits one accordingly.  */
28799   if (!(use_acquire || use_release))
28800     arm_pre_atomic_barrier (model);
28801 
28802   label = gen_label_rtx ();
28803   emit_label (label);
28804 
28805   if (new_out)
28806     new_out = gen_lowpart (wmode, new_out);
28807   if (old_out)
28808     old_out = gen_lowpart (wmode, old_out);
28809   else
28810     old_out = new_out;
28811   value = simplify_gen_subreg (wmode, value, mode, 0);
28812 
28813   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28814 
28815   /* Does the operation require destination and first operand to use the same
28816      register?  This is decided by register constraints of relevant insn
28817      patterns in thumb1.md.  */
28818   gcc_assert (!new_out || REG_P (new_out));
28819   all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28820 		 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28821 		 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28822   bind_old_new =
28823     (TARGET_THUMB1
28824      && code != SET
28825      && code != MINUS
28826      && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28827 
28828   /* We want to return the old value while putting the result of the operation
28829      in the same register as the old value so copy the old value over to the
28830      destination register and use that register for the operation.  */
28831   if (old_out && bind_old_new)
28832     {
28833       emit_move_insn (new_out, old_out);
28834       old_out = new_out;
28835     }
28836 
28837   switch (code)
28838     {
28839     case SET:
28840       new_out = value;
28841       break;
28842 
28843     case NOT:
28844       x = gen_rtx_AND (wmode, old_out, value);
28845       emit_insn (gen_rtx_SET (new_out, x));
28846       x = gen_rtx_NOT (wmode, new_out);
28847       emit_insn (gen_rtx_SET (new_out, x));
28848       break;
28849 
28850     case MINUS:
28851       if (CONST_INT_P (value))
28852 	{
28853 	  value = GEN_INT (-INTVAL (value));
28854 	  code = PLUS;
28855 	}
28856       /* FALLTHRU */
28857 
28858     case PLUS:
28859       if (mode == DImode)
28860 	{
28861 	  /* DImode plus/minus need to clobber flags.  */
28862 	  /* The adddi3 and subdi3 patterns are incorrectly written so that
28863 	     they require matching operands, even when we could easily support
28864 	     three operands.  Thankfully, this can be fixed up post-splitting,
28865 	     as the individual add+adc patterns do accept three operands and
28866 	     post-reload cprop can make these moves go away.  */
28867 	  emit_move_insn (new_out, old_out);
28868 	  if (code == PLUS)
28869 	    x = gen_adddi3 (new_out, new_out, value);
28870 	  else
28871 	    x = gen_subdi3 (new_out, new_out, value);
28872 	  emit_insn (x);
28873 	  break;
28874 	}
28875       /* FALLTHRU */
28876 
28877     default:
28878       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28879       emit_insn (gen_rtx_SET (new_out, x));
28880       break;
28881     }
28882 
28883   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28884                             use_release);
28885 
28886   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28887   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28888 
28889   /* Checks whether a barrier is needed and emits one accordingly.  */
28890   if (is_armv8_sync
28891       || !(use_acquire || use_release))
28892     arm_post_atomic_barrier (model);
28893 }
28894 
28895 #define MAX_VECT_LEN 16
28896 
28897 struct expand_vec_perm_d
28898 {
28899   rtx target, op0, op1;
28900   vec_perm_indices perm;
28901   machine_mode vmode;
28902   bool one_vector_p;
28903   bool testing_p;
28904 };
28905 
28906 /* Generate a variable permutation.  */
28907 
28908 static void
arm_expand_vec_perm_1(rtx target,rtx op0,rtx op1,rtx sel)28909 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28910 {
28911   machine_mode vmode = GET_MODE (target);
28912   bool one_vector_p = rtx_equal_p (op0, op1);
28913 
28914   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28915   gcc_checking_assert (GET_MODE (op0) == vmode);
28916   gcc_checking_assert (GET_MODE (op1) == vmode);
28917   gcc_checking_assert (GET_MODE (sel) == vmode);
28918   gcc_checking_assert (TARGET_NEON);
28919 
28920   if (one_vector_p)
28921     {
28922       if (vmode == V8QImode)
28923 	emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28924       else
28925 	emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28926     }
28927   else
28928     {
28929       rtx pair;
28930 
28931       if (vmode == V8QImode)
28932 	{
28933 	  pair = gen_reg_rtx (V16QImode);
28934 	  emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28935 	  pair = gen_lowpart (TImode, pair);
28936 	  emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28937 	}
28938       else
28939 	{
28940 	  pair = gen_reg_rtx (OImode);
28941 	  emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28942 	  emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28943 	}
28944     }
28945 }
28946 
28947 void
arm_expand_vec_perm(rtx target,rtx op0,rtx op1,rtx sel)28948 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28949 {
28950   machine_mode vmode = GET_MODE (target);
28951   unsigned int nelt = GET_MODE_NUNITS (vmode);
28952   bool one_vector_p = rtx_equal_p (op0, op1);
28953   rtx mask;
28954 
28955   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28956      numbering of elements for big-endian, we must reverse the order.  */
28957   gcc_checking_assert (!BYTES_BIG_ENDIAN);
28958 
28959   /* The VTBL instruction does not use a modulo index, so we must take care
28960      of that ourselves.  */
28961   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28962   mask = gen_const_vec_duplicate (vmode, mask);
28963   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28964 
28965   arm_expand_vec_perm_1 (target, op0, op1, sel);
28966 }
28967 
28968 /* Map lane ordering between architectural lane order, and GCC lane order,
28969    taking into account ABI.  See comment above output_move_neon for details.  */
28970 
28971 static int
neon_endian_lane_map(machine_mode mode,int lane)28972 neon_endian_lane_map (machine_mode mode, int lane)
28973 {
28974   if (BYTES_BIG_ENDIAN)
28975   {
28976     int nelems = GET_MODE_NUNITS (mode);
28977     /* Reverse lane order.  */
28978     lane = (nelems - 1 - lane);
28979     /* Reverse D register order, to match ABI.  */
28980     if (GET_MODE_SIZE (mode) == 16)
28981       lane = lane ^ (nelems / 2);
28982   }
28983   return lane;
28984 }
28985 
28986 /* Some permutations index into pairs of vectors, this is a helper function
28987    to map indexes into those pairs of vectors.  */
28988 
28989 static int
neon_pair_endian_lane_map(machine_mode mode,int lane)28990 neon_pair_endian_lane_map (machine_mode mode, int lane)
28991 {
28992   int nelem = GET_MODE_NUNITS (mode);
28993   if (BYTES_BIG_ENDIAN)
28994     lane =
28995       neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28996   return lane;
28997 }
28998 
28999 /* Generate or test for an insn that supports a constant permutation.  */
29000 
29001 /* Recognize patterns for the VUZP insns.  */
29002 
29003 static bool
arm_evpc_neon_vuzp(struct expand_vec_perm_d * d)29004 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
29005 {
29006   unsigned int i, odd, mask, nelt = d->perm.length ();
29007   rtx out0, out1, in0, in1;
29008   rtx (*gen)(rtx, rtx, rtx, rtx);
29009   int first_elem;
29010   int swap_nelt;
29011 
29012   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29013     return false;
29014 
29015   /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
29016      big endian pattern on 64 bit vectors, so we correct for that.  */
29017   swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
29018     && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
29019 
29020   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
29021 
29022   if (first_elem == neon_endian_lane_map (d->vmode, 0))
29023     odd = 0;
29024   else if (first_elem == neon_endian_lane_map (d->vmode, 1))
29025     odd = 1;
29026   else
29027     return false;
29028   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29029 
29030   for (i = 0; i < nelt; i++)
29031     {
29032       unsigned elt =
29033 	(neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
29034       if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
29035 	return false;
29036     }
29037 
29038   /* Success!  */
29039   if (d->testing_p)
29040     return true;
29041 
29042   switch (d->vmode)
29043     {
29044     case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29045     case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
29046     case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
29047     case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
29048     case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
29049     case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
29050     case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
29051     case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
29052     case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
29053     case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
29054     default:
29055       gcc_unreachable ();
29056     }
29057 
29058   in0 = d->op0;
29059   in1 = d->op1;
29060   if (swap_nelt != 0)
29061     std::swap (in0, in1);
29062 
29063   out0 = d->target;
29064   out1 = gen_reg_rtx (d->vmode);
29065   if (odd)
29066     std::swap (out0, out1);
29067 
29068   emit_insn (gen (out0, in0, in1, out1));
29069   return true;
29070 }
29071 
29072 /* Recognize patterns for the VZIP insns.  */
29073 
29074 static bool
arm_evpc_neon_vzip(struct expand_vec_perm_d * d)29075 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29076 {
29077   unsigned int i, high, mask, nelt = d->perm.length ();
29078   rtx out0, out1, in0, in1;
29079   rtx (*gen)(rtx, rtx, rtx, rtx);
29080   int first_elem;
29081   bool is_swapped;
29082 
29083   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29084     return false;
29085 
29086   is_swapped = BYTES_BIG_ENDIAN;
29087 
29088   first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29089 
29090   high = nelt / 2;
29091   if (first_elem == neon_endian_lane_map (d->vmode, high))
29092     ;
29093   else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29094     high = 0;
29095   else
29096     return false;
29097   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29098 
29099   for (i = 0; i < nelt / 2; i++)
29100     {
29101       unsigned elt =
29102 	neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29103       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29104 	  != elt)
29105 	return false;
29106       elt =
29107 	neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29108       if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29109 	  != elt)
29110 	return false;
29111     }
29112 
29113   /* Success!  */
29114   if (d->testing_p)
29115     return true;
29116 
29117   switch (d->vmode)
29118     {
29119     case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29120     case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
29121     case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
29122     case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
29123     case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
29124     case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
29125     case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
29126     case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
29127     case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
29128     case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
29129     default:
29130       gcc_unreachable ();
29131     }
29132 
29133   in0 = d->op0;
29134   in1 = d->op1;
29135   if (is_swapped)
29136     std::swap (in0, in1);
29137 
29138   out0 = d->target;
29139   out1 = gen_reg_rtx (d->vmode);
29140   if (high)
29141     std::swap (out0, out1);
29142 
29143   emit_insn (gen (out0, in0, in1, out1));
29144   return true;
29145 }
29146 
29147 /* Recognize patterns for the VREV insns.  */
29148 
29149 static bool
arm_evpc_neon_vrev(struct expand_vec_perm_d * d)29150 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29151 {
29152   unsigned int i, j, diff, nelt = d->perm.length ();
29153   rtx (*gen)(rtx, rtx);
29154 
29155   if (!d->one_vector_p)
29156     return false;
29157 
29158   diff = d->perm[0];
29159   switch (diff)
29160     {
29161     case 7:
29162       switch (d->vmode)
29163 	{
29164 	case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
29165 	case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
29166 	default:
29167 	  return false;
29168 	}
29169       break;
29170     case 3:
29171       switch (d->vmode)
29172 	{
29173 	case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
29174 	case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
29175 	case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
29176 	case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
29177 	case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
29178 	case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
29179 	default:
29180 	  return false;
29181 	}
29182       break;
29183     case 1:
29184       switch (d->vmode)
29185 	{
29186 	case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29187 	case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
29188 	case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
29189 	case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
29190 	case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
29191 	case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
29192 	case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
29193 	case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
29194 	default:
29195 	  return false;
29196 	}
29197       break;
29198     default:
29199       return false;
29200     }
29201 
29202   for (i = 0; i < nelt ; i += diff + 1)
29203     for (j = 0; j <= diff; j += 1)
29204       {
29205 	/* This is guaranteed to be true as the value of diff
29206 	   is 7, 3, 1 and we should have enough elements in the
29207 	   queue to generate this. Getting a vector mask with a
29208 	   value of diff other than these values implies that
29209 	   something is wrong by the time we get here.  */
29210 	gcc_assert (i + j < nelt);
29211 	if (d->perm[i + j] != i + diff - j)
29212 	  return false;
29213       }
29214 
29215   /* Success! */
29216   if (d->testing_p)
29217     return true;
29218 
29219   emit_insn (gen (d->target, d->op0));
29220   return true;
29221 }
29222 
29223 /* Recognize patterns for the VTRN insns.  */
29224 
29225 static bool
arm_evpc_neon_vtrn(struct expand_vec_perm_d * d)29226 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29227 {
29228   unsigned int i, odd, mask, nelt = d->perm.length ();
29229   rtx out0, out1, in0, in1;
29230   rtx (*gen)(rtx, rtx, rtx, rtx);
29231 
29232   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29233     return false;
29234 
29235   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
29236   if (d->perm[0] == 0)
29237     odd = 0;
29238   else if (d->perm[0] == 1)
29239     odd = 1;
29240   else
29241     return false;
29242   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29243 
29244   for (i = 0; i < nelt; i += 2)
29245     {
29246       if (d->perm[i] != i + odd)
29247 	return false;
29248       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29249 	return false;
29250     }
29251 
29252   /* Success!  */
29253   if (d->testing_p)
29254     return true;
29255 
29256   switch (d->vmode)
29257     {
29258     case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29259     case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
29260     case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
29261     case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
29262     case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
29263     case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
29264     case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
29265     case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
29266     case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
29267     case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
29268     default:
29269       gcc_unreachable ();
29270     }
29271 
29272   in0 = d->op0;
29273   in1 = d->op1;
29274   if (BYTES_BIG_ENDIAN)
29275     {
29276       std::swap (in0, in1);
29277       odd = !odd;
29278     }
29279 
29280   out0 = d->target;
29281   out1 = gen_reg_rtx (d->vmode);
29282   if (odd)
29283     std::swap (out0, out1);
29284 
29285   emit_insn (gen (out0, in0, in1, out1));
29286   return true;
29287 }
29288 
29289 /* Recognize patterns for the VEXT insns.  */
29290 
29291 static bool
arm_evpc_neon_vext(struct expand_vec_perm_d * d)29292 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29293 {
29294   unsigned int i, nelt = d->perm.length ();
29295   rtx (*gen) (rtx, rtx, rtx, rtx);
29296   rtx offset;
29297 
29298   unsigned int location;
29299 
29300   unsigned int next  = d->perm[0] + 1;
29301 
29302   /* TODO: Handle GCC's numbering of elements for big-endian.  */
29303   if (BYTES_BIG_ENDIAN)
29304     return false;
29305 
29306   /* Check if the extracted indexes are increasing by one.  */
29307   for (i = 1; i < nelt; next++, i++)
29308     {
29309       /* If we hit the most significant element of the 2nd vector in
29310 	 the previous iteration, no need to test further.  */
29311       if (next == 2 * nelt)
29312 	return false;
29313 
29314       /* If we are operating on only one vector: it could be a
29315 	 rotation.  If there are only two elements of size < 64, let
29316 	 arm_evpc_neon_vrev catch it.  */
29317       if (d->one_vector_p && (next == nelt))
29318 	{
29319 	  if ((nelt == 2) && (d->vmode != V2DImode))
29320 	    return false;
29321 	  else
29322 	    next = 0;
29323 	}
29324 
29325       if (d->perm[i] != next)
29326 	return false;
29327     }
29328 
29329   location = d->perm[0];
29330 
29331   switch (d->vmode)
29332     {
29333     case E_V16QImode: gen = gen_neon_vextv16qi; break;
29334     case E_V8QImode: gen = gen_neon_vextv8qi; break;
29335     case E_V4HImode: gen = gen_neon_vextv4hi; break;
29336     case E_V8HImode: gen = gen_neon_vextv8hi; break;
29337     case E_V2SImode: gen = gen_neon_vextv2si; break;
29338     case E_V4SImode: gen = gen_neon_vextv4si; break;
29339     case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29340     case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29341     case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29342     case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29343     case E_V2DImode: gen = gen_neon_vextv2di; break;
29344     default:
29345       return false;
29346     }
29347 
29348   /* Success! */
29349   if (d->testing_p)
29350     return true;
29351 
29352   offset = GEN_INT (location);
29353   emit_insn (gen (d->target, d->op0, d->op1, offset));
29354   return true;
29355 }
29356 
29357 /* The NEON VTBL instruction is a fully variable permuation that's even
29358    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
29359    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
29360    can do slightly better by expanding this as a constant where we don't
29361    have to apply a mask.  */
29362 
29363 static bool
arm_evpc_neon_vtbl(struct expand_vec_perm_d * d)29364 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29365 {
29366   rtx rperm[MAX_VECT_LEN], sel;
29367   machine_mode vmode = d->vmode;
29368   unsigned int i, nelt = d->perm.length ();
29369 
29370   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
29371      numbering of elements for big-endian, we must reverse the order.  */
29372   if (BYTES_BIG_ENDIAN)
29373     return false;
29374 
29375   if (d->testing_p)
29376     return true;
29377 
29378   /* Generic code will try constant permutation twice.  Once with the
29379      original mode and again with the elements lowered to QImode.
29380      So wait and don't do the selector expansion ourselves.  */
29381   if (vmode != V8QImode && vmode != V16QImode)
29382     return false;
29383 
29384   for (i = 0; i < nelt; ++i)
29385     rperm[i] = GEN_INT (d->perm[i]);
29386   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29387   sel = force_reg (vmode, sel);
29388 
29389   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29390   return true;
29391 }
29392 
29393 static bool
arm_expand_vec_perm_const_1(struct expand_vec_perm_d * d)29394 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29395 {
29396   /* Check if the input mask matches vext before reordering the
29397      operands.  */
29398   if (TARGET_NEON)
29399     if (arm_evpc_neon_vext (d))
29400       return true;
29401 
29402   /* The pattern matching functions above are written to look for a small
29403      number to begin the sequence (0, 1, N/2).  If we begin with an index
29404      from the second operand, we can swap the operands.  */
29405   unsigned int nelt = d->perm.length ();
29406   if (d->perm[0] >= nelt)
29407     {
29408       d->perm.rotate_inputs (1);
29409       std::swap (d->op0, d->op1);
29410     }
29411 
29412   if (TARGET_NEON)
29413     {
29414       if (arm_evpc_neon_vuzp (d))
29415 	return true;
29416       if (arm_evpc_neon_vzip (d))
29417 	return true;
29418       if (arm_evpc_neon_vrev (d))
29419 	return true;
29420       if (arm_evpc_neon_vtrn (d))
29421 	return true;
29422       return arm_evpc_neon_vtbl (d);
29423     }
29424   return false;
29425 }
29426 
29427 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
29428 
29429 static bool
arm_vectorize_vec_perm_const(machine_mode vmode,rtx target,rtx op0,rtx op1,const vec_perm_indices & sel)29430 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29431 			      const vec_perm_indices &sel)
29432 {
29433   struct expand_vec_perm_d d;
29434   int i, nelt, which;
29435 
29436   if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29437     return false;
29438 
29439   d.target = target;
29440   d.op0 = op0;
29441   d.op1 = op1;
29442 
29443   d.vmode = vmode;
29444   gcc_assert (VECTOR_MODE_P (d.vmode));
29445   d.testing_p = !target;
29446 
29447   nelt = GET_MODE_NUNITS (d.vmode);
29448   for (i = which = 0; i < nelt; ++i)
29449     {
29450       int ei = sel[i] & (2 * nelt - 1);
29451       which |= (ei < nelt ? 1 : 2);
29452     }
29453 
29454   switch (which)
29455     {
29456     default:
29457       gcc_unreachable();
29458 
29459     case 3:
29460       d.one_vector_p = false;
29461       if (d.testing_p || !rtx_equal_p (op0, op1))
29462 	break;
29463 
29464       /* The elements of PERM do not suggest that only the first operand
29465 	 is used, but both operands are identical.  Allow easier matching
29466 	 of the permutation by folding the permutation into the single
29467 	 input vector.  */
29468       /* FALLTHRU */
29469     case 2:
29470       d.op0 = op1;
29471       d.one_vector_p = true;
29472       break;
29473 
29474     case 1:
29475       d.op1 = op0;
29476       d.one_vector_p = true;
29477       break;
29478     }
29479 
29480   d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29481 
29482   if (!d.testing_p)
29483     return arm_expand_vec_perm_const_1 (&d);
29484 
29485   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29486   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29487   if (!d.one_vector_p)
29488     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29489 
29490   start_sequence ();
29491   bool ret = arm_expand_vec_perm_const_1 (&d);
29492   end_sequence ();
29493 
29494   return ret;
29495 }
29496 
29497 bool
arm_autoinc_modes_ok_p(machine_mode mode,enum arm_auto_incmodes code)29498 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29499 {
29500   /* If we are soft float and we do not have ldrd
29501      then all auto increment forms are ok.  */
29502   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29503     return true;
29504 
29505   switch (code)
29506     {
29507       /* Post increment and Pre Decrement are supported for all
29508 	 instruction forms except for vector forms.  */
29509     case ARM_POST_INC:
29510     case ARM_PRE_DEC:
29511       if (VECTOR_MODE_P (mode))
29512 	{
29513 	  if (code != ARM_PRE_DEC)
29514 	    return true;
29515 	  else
29516 	    return false;
29517 	}
29518 
29519       return true;
29520 
29521     case ARM_POST_DEC:
29522     case ARM_PRE_INC:
29523       /* Without LDRD and mode size greater than
29524 	 word size, there is no point in auto-incrementing
29525          because ldm and stm will not have these forms.  */
29526       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29527 	return false;
29528 
29529       /* Vector and floating point modes do not support
29530 	 these auto increment forms.  */
29531       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29532 	return false;
29533 
29534       return true;
29535 
29536     default:
29537       return false;
29538 
29539     }
29540 
29541   return false;
29542 }
29543 
29544 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29545    on ARM, since we know that shifts by negative amounts are no-ops.
29546    Additionally, the default expansion code is not available or suitable
29547    for post-reload insn splits (this can occur when the register allocator
29548    chooses not to do a shift in NEON).
29549 
29550    This function is used in both initial expand and post-reload splits, and
29551    handles all kinds of 64-bit shifts.
29552 
29553    Input requirements:
29554     - It is safe for the input and output to be the same register, but
29555       early-clobber rules apply for the shift amount and scratch registers.
29556     - Shift by register requires both scratch registers.  In all other cases
29557       the scratch registers may be NULL.
29558     - Ashiftrt by a register also clobbers the CC register.  */
29559 void
arm_emit_coreregs_64bit_shift(enum rtx_code code,rtx out,rtx in,rtx amount,rtx scratch1,rtx scratch2)29560 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29561 			       rtx amount, rtx scratch1, rtx scratch2)
29562 {
29563   rtx out_high = gen_highpart (SImode, out);
29564   rtx out_low = gen_lowpart (SImode, out);
29565   rtx in_high = gen_highpart (SImode, in);
29566   rtx in_low = gen_lowpart (SImode, in);
29567 
29568   /* Terminology:
29569 	in = the register pair containing the input value.
29570 	out = the destination register pair.
29571 	up = the high- or low-part of each pair.
29572 	down = the opposite part to "up".
29573      In a shift, we can consider bits to shift from "up"-stream to
29574      "down"-stream, so in a left-shift "up" is the low-part and "down"
29575      is the high-part of each register pair.  */
29576 
29577   rtx out_up   = code == ASHIFT ? out_low : out_high;
29578   rtx out_down = code == ASHIFT ? out_high : out_low;
29579   rtx in_up   = code == ASHIFT ? in_low : in_high;
29580   rtx in_down = code == ASHIFT ? in_high : in_low;
29581 
29582   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29583   gcc_assert (out
29584 	      && (REG_P (out) || GET_CODE (out) == SUBREG)
29585 	      && GET_MODE (out) == DImode);
29586   gcc_assert (in
29587 	      && (REG_P (in) || GET_CODE (in) == SUBREG)
29588 	      && GET_MODE (in) == DImode);
29589   gcc_assert (amount
29590 	      && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29591 		   && GET_MODE (amount) == SImode)
29592 		  || CONST_INT_P (amount)));
29593   gcc_assert (scratch1 == NULL
29594 	      || (GET_CODE (scratch1) == SCRATCH)
29595 	      || (GET_MODE (scratch1) == SImode
29596 		  && REG_P (scratch1)));
29597   gcc_assert (scratch2 == NULL
29598 	      || (GET_CODE (scratch2) == SCRATCH)
29599 	      || (GET_MODE (scratch2) == SImode
29600 		  && REG_P (scratch2)));
29601   gcc_assert (!REG_P (out) || !REG_P (amount)
29602 	      || !HARD_REGISTER_P (out)
29603 	      || (REGNO (out) != REGNO (amount)
29604 		  && REGNO (out) + 1 != REGNO (amount)));
29605 
29606   /* Macros to make following code more readable.  */
29607   #define SUB_32(DEST,SRC) \
29608 	    gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29609   #define RSB_32(DEST,SRC) \
29610 	    gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29611   #define SUB_S_32(DEST,SRC) \
29612 	    gen_addsi3_compare0 ((DEST), (SRC), \
29613 				 GEN_INT (-32))
29614   #define SET(DEST,SRC) \
29615 	    gen_rtx_SET ((DEST), (SRC))
29616   #define SHIFT(CODE,SRC,AMOUNT) \
29617 	    gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29618   #define LSHIFT(CODE,SRC,AMOUNT) \
29619 	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29620 			    SImode, (SRC), (AMOUNT))
29621   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29622 	    gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29623 			    SImode, (SRC), (AMOUNT))
29624   #define ORR(A,B) \
29625 	    gen_rtx_IOR (SImode, (A), (B))
29626   #define BRANCH(COND,LABEL) \
29627 	    gen_arm_cond_branch ((LABEL), \
29628 				 gen_rtx_ ## COND (CCmode, cc_reg, \
29629 						   const0_rtx), \
29630 				 cc_reg)
29631 
29632   /* Shifts by register and shifts by constant are handled separately.  */
29633   if (CONST_INT_P (amount))
29634     {
29635       /* We have a shift-by-constant.  */
29636 
29637       /* First, handle out-of-range shift amounts.
29638 	 In both cases we try to match the result an ARM instruction in a
29639 	 shift-by-register would give.  This helps reduce execution
29640 	 differences between optimization levels, but it won't stop other
29641          parts of the compiler doing different things.  This is "undefined
29642          behavior, in any case.  */
29643       if (INTVAL (amount) <= 0)
29644 	emit_insn (gen_movdi (out, in));
29645       else if (INTVAL (amount) >= 64)
29646 	{
29647 	  if (code == ASHIFTRT)
29648 	    {
29649 	      rtx const31_rtx = GEN_INT (31);
29650 	      emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29651 	      emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29652 	    }
29653 	  else
29654 	    emit_insn (gen_movdi (out, const0_rtx));
29655 	}
29656 
29657       /* Now handle valid shifts. */
29658       else if (INTVAL (amount) < 32)
29659 	{
29660 	  /* Shifts by a constant less than 32.  */
29661 	  rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29662 
29663 	  /* Clearing the out register in DImode first avoids lots
29664 	     of spilling and results in less stack usage.
29665 	     Later this redundant insn is completely removed.
29666 	     Do that only if "in" and "out" are different registers.  */
29667 	  if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29668 	    emit_insn (SET (out, const0_rtx));
29669 	  emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29670 	  emit_insn (SET (out_down,
29671 			  ORR (REV_LSHIFT (code, in_up, reverse_amount),
29672 			       out_down)));
29673 	  emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29674 	}
29675       else
29676 	{
29677 	  /* Shifts by a constant greater than 31.  */
29678 	  rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29679 
29680 	  if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29681 	    emit_insn (SET (out, const0_rtx));
29682 	  emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29683 	  if (code == ASHIFTRT)
29684 	    emit_insn (gen_ashrsi3 (out_up, in_up,
29685 				    GEN_INT (31)));
29686 	  else
29687 	    emit_insn (SET (out_up, const0_rtx));
29688 	}
29689     }
29690   else
29691     {
29692       /* We have a shift-by-register.  */
29693       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29694 
29695       /* This alternative requires the scratch registers.  */
29696       gcc_assert (scratch1 && REG_P (scratch1));
29697       gcc_assert (scratch2 && REG_P (scratch2));
29698 
29699       /* We will need the values "amount-32" and "32-amount" later.
29700          Swapping them around now allows the later code to be more general. */
29701       switch (code)
29702 	{
29703 	case ASHIFT:
29704 	  emit_insn (SUB_32 (scratch1, amount));
29705 	  emit_insn (RSB_32 (scratch2, amount));
29706 	  break;
29707 	case ASHIFTRT:
29708 	  emit_insn (RSB_32 (scratch1, amount));
29709 	  /* Also set CC = amount > 32.  */
29710 	  emit_insn (SUB_S_32 (scratch2, amount));
29711 	  break;
29712 	case LSHIFTRT:
29713 	  emit_insn (RSB_32 (scratch1, amount));
29714 	  emit_insn (SUB_32 (scratch2, amount));
29715 	  break;
29716 	default:
29717 	  gcc_unreachable ();
29718 	}
29719 
29720       /* Emit code like this:
29721 
29722 	 arithmetic-left:
29723 	    out_down = in_down << amount;
29724 	    out_down = (in_up << (amount - 32)) | out_down;
29725 	    out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29726 	    out_up = in_up << amount;
29727 
29728 	 arithmetic-right:
29729 	    out_down = in_down >> amount;
29730 	    out_down = (in_up << (32 - amount)) | out_down;
29731 	    if (amount < 32)
29732 	      out_down = ((signed)in_up >> (amount - 32)) | out_down;
29733 	    out_up = in_up << amount;
29734 
29735 	 logical-right:
29736 	    out_down = in_down >> amount;
29737 	    out_down = (in_up << (32 - amount)) | out_down;
29738 	    if (amount < 32)
29739 	      out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29740 	    out_up = in_up << amount;
29741 
29742 	  The ARM and Thumb2 variants are the same but implemented slightly
29743 	  differently.  If this were only called during expand we could just
29744 	  use the Thumb2 case and let combine do the right thing, but this
29745 	  can also be called from post-reload splitters.  */
29746 
29747       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29748 
29749       if (!TARGET_THUMB2)
29750 	{
29751 	  /* Emit code for ARM mode.  */
29752 	  emit_insn (SET (out_down,
29753 			  ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29754 	  if (code == ASHIFTRT)
29755 	    {
29756 	      rtx_code_label *done_label = gen_label_rtx ();
29757 	      emit_jump_insn (BRANCH (LT, done_label));
29758 	      emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29759 					     out_down)));
29760 	      emit_label (done_label);
29761 	    }
29762 	  else
29763 	    emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29764 					   out_down)));
29765 	}
29766       else
29767 	{
29768 	  /* Emit code for Thumb2 mode.
29769 	     Thumb2 can't do shift and or in one insn.  */
29770 	  emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29771 	  emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29772 
29773 	  if (code == ASHIFTRT)
29774 	    {
29775 	      rtx_code_label *done_label = gen_label_rtx ();
29776 	      emit_jump_insn (BRANCH (LT, done_label));
29777 	      emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29778 	      emit_insn (SET (out_down, ORR (out_down, scratch2)));
29779 	      emit_label (done_label);
29780 	    }
29781 	  else
29782 	    {
29783 	      emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29784 	      emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29785 	    }
29786 	}
29787 
29788       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29789     }
29790 
29791   #undef SUB_32
29792   #undef RSB_32
29793   #undef SUB_S_32
29794   #undef SET
29795   #undef SHIFT
29796   #undef LSHIFT
29797   #undef REV_LSHIFT
29798   #undef ORR
29799   #undef BRANCH
29800 }
29801 
29802 /* Returns true if the pattern is a valid symbolic address, which is either a
29803    symbol_ref or (symbol_ref + addend).
29804 
29805    According to the ARM ELF ABI, the initial addend of REL-type relocations
29806    processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29807    literal field of the instruction as a 16-bit signed value in the range
29808    -32768 <= A < 32768.  */
29809 
29810 bool
arm_valid_symbolic_address_p(rtx addr)29811 arm_valid_symbolic_address_p (rtx addr)
29812 {
29813   rtx xop0, xop1 = NULL_RTX;
29814   rtx tmp = addr;
29815 
29816   if (target_word_relocations)
29817     return false;
29818 
29819   if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29820     return true;
29821 
29822   /* (const (plus: symbol_ref const_int))  */
29823   if (GET_CODE (addr) == CONST)
29824     tmp = XEXP (addr, 0);
29825 
29826   if (GET_CODE (tmp) == PLUS)
29827     {
29828       xop0 = XEXP (tmp, 0);
29829       xop1 = XEXP (tmp, 1);
29830 
29831       if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29832 	  return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29833     }
29834 
29835   return false;
29836 }
29837 
29838 /* Returns true if a valid comparison operation and makes
29839    the operands in a form that is valid.  */
29840 bool
arm_validize_comparison(rtx * comparison,rtx * op1,rtx * op2)29841 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29842 {
29843   enum rtx_code code = GET_CODE (*comparison);
29844   int code_int;
29845   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29846     ? GET_MODE (*op2) : GET_MODE (*op1);
29847 
29848   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29849 
29850   if (code == UNEQ || code == LTGT)
29851     return false;
29852 
29853   code_int = (int)code;
29854   arm_canonicalize_comparison (&code_int, op1, op2, 0);
29855   PUT_CODE (*comparison, (enum rtx_code)code_int);
29856 
29857   switch (mode)
29858     {
29859     case E_SImode:
29860       if (!arm_add_operand (*op1, mode))
29861 	*op1 = force_reg (mode, *op1);
29862       if (!arm_add_operand (*op2, mode))
29863 	*op2 = force_reg (mode, *op2);
29864       return true;
29865 
29866     case E_DImode:
29867       if (!cmpdi_operand (*op1, mode))
29868 	*op1 = force_reg (mode, *op1);
29869       if (!cmpdi_operand (*op2, mode))
29870 	*op2 = force_reg (mode, *op2);
29871       return true;
29872 
29873     case E_HFmode:
29874       if (!TARGET_VFP_FP16INST)
29875 	break;
29876       /* FP16 comparisons are done in SF mode.  */
29877       mode = SFmode;
29878       *op1 = convert_to_mode (mode, *op1, 1);
29879       *op2 = convert_to_mode (mode, *op2, 1);
29880       /* Fall through.  */
29881     case E_SFmode:
29882     case E_DFmode:
29883       if (!vfp_compare_operand (*op1, mode))
29884 	*op1 = force_reg (mode, *op1);
29885       if (!vfp_compare_operand (*op2, mode))
29886 	*op2 = force_reg (mode, *op2);
29887       return true;
29888     default:
29889       break;
29890     }
29891 
29892   return false;
29893 
29894 }
29895 
29896 /* Maximum number of instructions to set block of memory.  */
29897 static int
arm_block_set_max_insns(void)29898 arm_block_set_max_insns (void)
29899 {
29900   if (optimize_function_for_size_p (cfun))
29901     return 4;
29902   else
29903     return current_tune->max_insns_inline_memset;
29904 }
29905 
29906 /* Return TRUE if it's profitable to set block of memory for
29907    non-vectorized case.  VAL is the value to set the memory
29908    with.  LENGTH is the number of bytes to set.  ALIGN is the
29909    alignment of the destination memory in bytes.  UNALIGNED_P
29910    is TRUE if we can only set the memory with instructions
29911    meeting alignment requirements.  USE_STRD_P is TRUE if we
29912    can use strd to set the memory.  */
29913 static bool
arm_block_set_non_vect_profit_p(rtx val,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT align,bool unaligned_p,bool use_strd_p)29914 arm_block_set_non_vect_profit_p (rtx val,
29915 				 unsigned HOST_WIDE_INT length,
29916 				 unsigned HOST_WIDE_INT align,
29917 				 bool unaligned_p, bool use_strd_p)
29918 {
29919   int num = 0;
29920   /* For leftovers in bytes of 0-7, we can set the memory block using
29921      strb/strh/str with minimum instruction number.  */
29922   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29923 
29924   if (unaligned_p)
29925     {
29926       num = arm_const_inline_cost (SET, val);
29927       num += length / align + length % align;
29928     }
29929   else if (use_strd_p)
29930     {
29931       num = arm_const_double_inline_cost (val);
29932       num += (length >> 3) + leftover[length & 7];
29933     }
29934   else
29935     {
29936       num = arm_const_inline_cost (SET, val);
29937       num += (length >> 2) + leftover[length & 3];
29938     }
29939 
29940   /* We may be able to combine last pair STRH/STRB into a single STR
29941      by shifting one byte back.  */
29942   if (unaligned_access && length > 3 && (length & 3) == 3)
29943     num--;
29944 
29945   return (num <= arm_block_set_max_insns ());
29946 }
29947 
29948 /* Return TRUE if it's profitable to set block of memory for
29949    vectorized case.  LENGTH is the number of bytes to set.
29950    ALIGN is the alignment of destination memory in bytes.
29951    MODE is the vector mode used to set the memory.  */
29952 static bool
arm_block_set_vect_profit_p(unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT align,machine_mode mode)29953 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29954 			     unsigned HOST_WIDE_INT align,
29955 			     machine_mode mode)
29956 {
29957   int num;
29958   bool unaligned_p = ((align & 3) != 0);
29959   unsigned int nelt = GET_MODE_NUNITS (mode);
29960 
29961   /* Instruction loading constant value.  */
29962   num = 1;
29963   /* Instructions storing the memory.  */
29964   num += (length + nelt - 1) / nelt;
29965   /* Instructions adjusting the address expression.  Only need to
29966      adjust address expression if it's 4 bytes aligned and bytes
29967      leftover can only be stored by mis-aligned store instruction.  */
29968   if (!unaligned_p && (length & 3) != 0)
29969     num++;
29970 
29971   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
29972   if (!unaligned_p && mode == V16QImode)
29973     num--;
29974 
29975   return (num <= arm_block_set_max_insns ());
29976 }
29977 
29978 /* Set a block of memory using vectorization instructions for the
29979    unaligned case.  We fill the first LENGTH bytes of the memory
29980    area starting from DSTBASE with byte constant VALUE.  ALIGN is
29981    the alignment requirement of memory.  Return TRUE if succeeded.  */
29982 static bool
arm_block_set_unaligned_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)29983 arm_block_set_unaligned_vect (rtx dstbase,
29984 			      unsigned HOST_WIDE_INT length,
29985 			      unsigned HOST_WIDE_INT value,
29986 			      unsigned HOST_WIDE_INT align)
29987 {
29988   unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29989   rtx dst, mem;
29990   rtx val_vec, reg;
29991   rtx (*gen_func) (rtx, rtx);
29992   machine_mode mode;
29993   unsigned HOST_WIDE_INT v = value;
29994   unsigned int offset = 0;
29995   gcc_assert ((align & 0x3) != 0);
29996   nelt_v8 = GET_MODE_NUNITS (V8QImode);
29997   nelt_v16 = GET_MODE_NUNITS (V16QImode);
29998   if (length >= nelt_v16)
29999     {
30000       mode = V16QImode;
30001       gen_func = gen_movmisalignv16qi;
30002     }
30003   else
30004     {
30005       mode = V8QImode;
30006       gen_func = gen_movmisalignv8qi;
30007     }
30008   nelt_mode = GET_MODE_NUNITS (mode);
30009   gcc_assert (length >= nelt_mode);
30010   /* Skip if it isn't profitable.  */
30011   if (!arm_block_set_vect_profit_p (length, align, mode))
30012     return false;
30013 
30014   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30015   mem = adjust_automodify_address (dstbase, mode, dst, offset);
30016 
30017   v = sext_hwi (v, BITS_PER_WORD);
30018 
30019   reg = gen_reg_rtx (mode);
30020   val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30021   /* Emit instruction loading the constant value.  */
30022   emit_move_insn (reg, val_vec);
30023 
30024   /* Handle nelt_mode bytes in a vector.  */
30025   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30026     {
30027       emit_insn ((*gen_func) (mem, reg));
30028       if (i + 2 * nelt_mode <= length)
30029 	{
30030 	  emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30031 	  offset += nelt_mode;
30032 	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
30033 	}
30034     }
30035 
30036   /* If there are not less than nelt_v8 bytes leftover, we must be in
30037      V16QI mode.  */
30038   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30039 
30040   /* Handle (8, 16) bytes leftover.  */
30041   if (i + nelt_v8 < length)
30042     {
30043       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30044       offset += length - i;
30045       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30046 
30047       /* We are shifting bytes back, set the alignment accordingly.  */
30048       if ((length & 1) != 0 && align >= 2)
30049 	set_mem_align (mem, BITS_PER_UNIT);
30050 
30051       emit_insn (gen_movmisalignv16qi (mem, reg));
30052     }
30053   /* Handle (0, 8] bytes leftover.  */
30054   else if (i < length && i + nelt_v8 >= length)
30055     {
30056       if (mode == V16QImode)
30057 	reg = gen_lowpart (V8QImode, reg);
30058 
30059       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30060 					      + (nelt_mode - nelt_v8))));
30061       offset += (length - i) + (nelt_mode - nelt_v8);
30062       mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30063 
30064       /* We are shifting bytes back, set the alignment accordingly.  */
30065       if ((length & 1) != 0 && align >= 2)
30066 	set_mem_align (mem, BITS_PER_UNIT);
30067 
30068       emit_insn (gen_movmisalignv8qi (mem, reg));
30069     }
30070 
30071   return true;
30072 }
30073 
30074 /* Set a block of memory using vectorization instructions for the
30075    aligned case.  We fill the first LENGTH bytes of the memory area
30076    starting from DSTBASE with byte constant VALUE.  ALIGN is the
30077    alignment requirement of memory.  Return TRUE if succeeded.  */
30078 static bool
arm_block_set_aligned_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)30079 arm_block_set_aligned_vect (rtx dstbase,
30080 			    unsigned HOST_WIDE_INT length,
30081 			    unsigned HOST_WIDE_INT value,
30082 			    unsigned HOST_WIDE_INT align)
30083 {
30084   unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30085   rtx dst, addr, mem;
30086   rtx val_vec, reg;
30087   machine_mode mode;
30088   unsigned int offset = 0;
30089 
30090   gcc_assert ((align & 0x3) == 0);
30091   nelt_v8 = GET_MODE_NUNITS (V8QImode);
30092   nelt_v16 = GET_MODE_NUNITS (V16QImode);
30093   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30094     mode = V16QImode;
30095   else
30096     mode = V8QImode;
30097 
30098   nelt_mode = GET_MODE_NUNITS (mode);
30099   gcc_assert (length >= nelt_mode);
30100   /* Skip if it isn't profitable.  */
30101   if (!arm_block_set_vect_profit_p (length, align, mode))
30102     return false;
30103 
30104   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30105 
30106   reg = gen_reg_rtx (mode);
30107   val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
30108   /* Emit instruction loading the constant value.  */
30109   emit_move_insn (reg, val_vec);
30110 
30111   i = 0;
30112   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
30113   if (mode == V16QImode)
30114     {
30115       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30116       emit_insn (gen_movmisalignv16qi (mem, reg));
30117       i += nelt_mode;
30118       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
30119       if (i + nelt_v8 < length && i + nelt_v16 > length)
30120 	{
30121 	  emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30122 	  offset += length - nelt_mode;
30123 	  mem = adjust_automodify_address (dstbase, mode, dst, offset);
30124 	  /* We are shifting bytes back, set the alignment accordingly.  */
30125 	  if ((length & 0x3) == 0)
30126 	    set_mem_align (mem, BITS_PER_UNIT * 4);
30127 	  else if ((length & 0x1) == 0)
30128 	    set_mem_align (mem, BITS_PER_UNIT * 2);
30129 	  else
30130 	    set_mem_align (mem, BITS_PER_UNIT);
30131 
30132 	  emit_insn (gen_movmisalignv16qi (mem, reg));
30133 	  return true;
30134 	}
30135       /* Fall through for bytes leftover.  */
30136       mode = V8QImode;
30137       nelt_mode = GET_MODE_NUNITS (mode);
30138       reg = gen_lowpart (V8QImode, reg);
30139     }
30140 
30141   /* Handle 8 bytes in a vector.  */
30142   for (; (i + nelt_mode <= length); i += nelt_mode)
30143     {
30144       addr = plus_constant (Pmode, dst, i);
30145       mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30146       emit_move_insn (mem, reg);
30147     }
30148 
30149   /* Handle single word leftover by shifting 4 bytes back.  We can
30150      use aligned access for this case.  */
30151   if (i + UNITS_PER_WORD == length)
30152     {
30153       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30154       offset += i - UNITS_PER_WORD;
30155       mem = adjust_automodify_address (dstbase, mode, addr, offset);
30156       /* We are shifting 4 bytes back, set the alignment accordingly.  */
30157       if (align > UNITS_PER_WORD)
30158 	set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30159 
30160       emit_move_insn (mem, reg);
30161     }
30162   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30163      We have to use unaligned access for this case.  */
30164   else if (i < length)
30165     {
30166       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30167       offset += length - nelt_mode;
30168       mem = adjust_automodify_address (dstbase, mode, dst, offset);
30169       /* We are shifting bytes back, set the alignment accordingly.  */
30170       if ((length & 1) == 0)
30171 	set_mem_align (mem, BITS_PER_UNIT * 2);
30172       else
30173 	set_mem_align (mem, BITS_PER_UNIT);
30174 
30175       emit_insn (gen_movmisalignv8qi (mem, reg));
30176     }
30177 
30178   return true;
30179 }
30180 
30181 /* Set a block of memory using plain strh/strb instructions, only
30182    using instructions allowed by ALIGN on processor.  We fill the
30183    first LENGTH bytes of the memory area starting from DSTBASE
30184    with byte constant VALUE.  ALIGN is the alignment requirement
30185    of memory.  */
30186 static bool
arm_block_set_unaligned_non_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)30187 arm_block_set_unaligned_non_vect (rtx dstbase,
30188 				  unsigned HOST_WIDE_INT length,
30189 				  unsigned HOST_WIDE_INT value,
30190 				  unsigned HOST_WIDE_INT align)
30191 {
30192   unsigned int i;
30193   rtx dst, addr, mem;
30194   rtx val_exp, val_reg, reg;
30195   machine_mode mode;
30196   HOST_WIDE_INT v = value;
30197 
30198   gcc_assert (align == 1 || align == 2);
30199 
30200   if (align == 2)
30201     v |= (value << BITS_PER_UNIT);
30202 
30203   v = sext_hwi (v, BITS_PER_WORD);
30204   val_exp = GEN_INT (v);
30205   /* Skip if it isn't profitable.  */
30206   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30207 					align, true, false))
30208     return false;
30209 
30210   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30211   mode = (align == 2 ? HImode : QImode);
30212   val_reg = force_reg (SImode, val_exp);
30213   reg = gen_lowpart (mode, val_reg);
30214 
30215   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30216     {
30217       addr = plus_constant (Pmode, dst, i);
30218       mem = adjust_automodify_address (dstbase, mode, addr, i);
30219       emit_move_insn (mem, reg);
30220     }
30221 
30222   /* Handle single byte leftover.  */
30223   if (i + 1 == length)
30224     {
30225       reg = gen_lowpart (QImode, val_reg);
30226       addr = plus_constant (Pmode, dst, i);
30227       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30228       emit_move_insn (mem, reg);
30229       i++;
30230     }
30231 
30232   gcc_assert (i == length);
30233   return true;
30234 }
30235 
30236 /* Set a block of memory using plain strd/str/strh/strb instructions,
30237    to permit unaligned copies on processors which support unaligned
30238    semantics for those instructions.  We fill the first LENGTH bytes
30239    of the memory area starting from DSTBASE with byte constant VALUE.
30240    ALIGN is the alignment requirement of memory.  */
30241 static bool
arm_block_set_aligned_non_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)30242 arm_block_set_aligned_non_vect (rtx dstbase,
30243 				unsigned HOST_WIDE_INT length,
30244 				unsigned HOST_WIDE_INT value,
30245 				unsigned HOST_WIDE_INT align)
30246 {
30247   unsigned int i;
30248   rtx dst, addr, mem;
30249   rtx val_exp, val_reg, reg;
30250   unsigned HOST_WIDE_INT v;
30251   bool use_strd_p;
30252 
30253   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30254 		&& TARGET_LDRD && current_tune->prefer_ldrd_strd);
30255 
30256   v = (value | (value << 8) | (value << 16) | (value << 24));
30257   if (length < UNITS_PER_WORD)
30258     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30259 
30260   if (use_strd_p)
30261     v |= (v << BITS_PER_WORD);
30262   else
30263     v = sext_hwi (v, BITS_PER_WORD);
30264 
30265   val_exp = GEN_INT (v);
30266   /* Skip if it isn't profitable.  */
30267   if (!arm_block_set_non_vect_profit_p (val_exp, length,
30268 					align, false, use_strd_p))
30269     {
30270       if (!use_strd_p)
30271 	return false;
30272 
30273       /* Try without strd.  */
30274       v = (v >> BITS_PER_WORD);
30275       v = sext_hwi (v, BITS_PER_WORD);
30276       val_exp = GEN_INT (v);
30277       use_strd_p = false;
30278       if (!arm_block_set_non_vect_profit_p (val_exp, length,
30279 					    align, false, use_strd_p))
30280 	return false;
30281     }
30282 
30283   i = 0;
30284   dst = copy_addr_to_reg (XEXP (dstbase, 0));
30285   /* Handle double words using strd if possible.  */
30286   if (use_strd_p)
30287     {
30288       val_reg = force_reg (DImode, val_exp);
30289       reg = val_reg;
30290       for (; (i + 8 <= length); i += 8)
30291 	{
30292 	  addr = plus_constant (Pmode, dst, i);
30293 	  mem = adjust_automodify_address (dstbase, DImode, addr, i);
30294 	  emit_move_insn (mem, reg);
30295 	}
30296     }
30297   else
30298     val_reg = force_reg (SImode, val_exp);
30299 
30300   /* Handle words.  */
30301   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30302   for (; (i + 4 <= length); i += 4)
30303     {
30304       addr = plus_constant (Pmode, dst, i);
30305       mem = adjust_automodify_address (dstbase, SImode, addr, i);
30306       if ((align & 3) == 0)
30307 	emit_move_insn (mem, reg);
30308       else
30309 	emit_insn (gen_unaligned_storesi (mem, reg));
30310     }
30311 
30312   /* Merge last pair of STRH and STRB into a STR if possible.  */
30313   if (unaligned_access && i > 0 && (i + 3) == length)
30314     {
30315       addr = plus_constant (Pmode, dst, i - 1);
30316       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30317       /* We are shifting one byte back, set the alignment accordingly.  */
30318       if ((align & 1) == 0)
30319 	set_mem_align (mem, BITS_PER_UNIT);
30320 
30321       /* Most likely this is an unaligned access, and we can't tell at
30322 	 compilation time.  */
30323       emit_insn (gen_unaligned_storesi (mem, reg));
30324       return true;
30325     }
30326 
30327   /* Handle half word leftover.  */
30328   if (i + 2 <= length)
30329     {
30330       reg = gen_lowpart (HImode, val_reg);
30331       addr = plus_constant (Pmode, dst, i);
30332       mem = adjust_automodify_address (dstbase, HImode, addr, i);
30333       if ((align & 1) == 0)
30334 	emit_move_insn (mem, reg);
30335       else
30336 	emit_insn (gen_unaligned_storehi (mem, reg));
30337 
30338       i += 2;
30339     }
30340 
30341   /* Handle single byte leftover.  */
30342   if (i + 1 == length)
30343     {
30344       reg = gen_lowpart (QImode, val_reg);
30345       addr = plus_constant (Pmode, dst, i);
30346       mem = adjust_automodify_address (dstbase, QImode, addr, i);
30347       emit_move_insn (mem, reg);
30348     }
30349 
30350   return true;
30351 }
30352 
30353 /* Set a block of memory using vectorization instructions for both
30354    aligned and unaligned cases.  We fill the first LENGTH bytes of
30355    the memory area starting from DSTBASE with byte constant VALUE.
30356    ALIGN is the alignment requirement of memory.  */
30357 static bool
arm_block_set_vect(rtx dstbase,unsigned HOST_WIDE_INT length,unsigned HOST_WIDE_INT value,unsigned HOST_WIDE_INT align)30358 arm_block_set_vect (rtx dstbase,
30359 		    unsigned HOST_WIDE_INT length,
30360 		    unsigned HOST_WIDE_INT value,
30361 		    unsigned HOST_WIDE_INT align)
30362 {
30363   /* Check whether we need to use unaligned store instruction.  */
30364   if (((align & 3) != 0 || (length & 3) != 0)
30365       /* Check whether unaligned store instruction is available.  */
30366       && (!unaligned_access || BYTES_BIG_ENDIAN))
30367     return false;
30368 
30369   if ((align & 3) == 0)
30370     return arm_block_set_aligned_vect (dstbase, length, value, align);
30371   else
30372     return arm_block_set_unaligned_vect (dstbase, length, value, align);
30373 }
30374 
30375 /* Expand string store operation.  Firstly we try to do that by using
30376    vectorization instructions, then try with ARM unaligned access and
30377    double-word store if profitable.  OPERANDS[0] is the destination,
30378    OPERANDS[1] is the number of bytes, operands[2] is the value to
30379    initialize the memory, OPERANDS[3] is the known alignment of the
30380    destination.  */
30381 bool
arm_gen_setmem(rtx * operands)30382 arm_gen_setmem (rtx *operands)
30383 {
30384   rtx dstbase = operands[0];
30385   unsigned HOST_WIDE_INT length;
30386   unsigned HOST_WIDE_INT value;
30387   unsigned HOST_WIDE_INT align;
30388 
30389   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30390     return false;
30391 
30392   length = UINTVAL (operands[1]);
30393   if (length > 64)
30394     return false;
30395 
30396   value = (UINTVAL (operands[2]) & 0xFF);
30397   align = UINTVAL (operands[3]);
30398   if (TARGET_NEON && length >= 8
30399       && current_tune->string_ops_prefer_neon
30400       && arm_block_set_vect (dstbase, length, value, align))
30401     return true;
30402 
30403   if (!unaligned_access && (align & 3) != 0)
30404     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30405 
30406   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30407 }
30408 
30409 
30410 static bool
arm_macro_fusion_p(void)30411 arm_macro_fusion_p (void)
30412 {
30413   return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30414 }
30415 
30416 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30417    for MOVW / MOVT macro fusion.  */
30418 
30419 static bool
arm_sets_movw_movt_fusible_p(rtx prev_set,rtx curr_set)30420 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30421 {
30422   /* We are trying to fuse
30423      movw imm / movt imm
30424     instructions as a group that gets scheduled together.  */
30425 
30426   rtx set_dest = SET_DEST (curr_set);
30427 
30428   if (GET_MODE (set_dest) != SImode)
30429     return false;
30430 
30431   /* We are trying to match:
30432      prev (movw)  == (set (reg r0) (const_int imm16))
30433      curr (movt) == (set (zero_extract (reg r0)
30434 					(const_int 16)
30435 					(const_int 16))
30436 			  (const_int imm16_1))
30437      or
30438      prev (movw) == (set (reg r1)
30439 			  (high (symbol_ref ("SYM"))))
30440     curr (movt) == (set (reg r0)
30441 			(lo_sum (reg r1)
30442 				(symbol_ref ("SYM"))))  */
30443 
30444     if (GET_CODE (set_dest) == ZERO_EXTRACT)
30445       {
30446 	if (CONST_INT_P (SET_SRC (curr_set))
30447 	    && CONST_INT_P (SET_SRC (prev_set))
30448 	    && REG_P (XEXP (set_dest, 0))
30449 	    && REG_P (SET_DEST (prev_set))
30450 	    && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30451 	  return true;
30452 
30453       }
30454     else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30455 	     && REG_P (SET_DEST (curr_set))
30456 	     && REG_P (SET_DEST (prev_set))
30457 	     && GET_CODE (SET_SRC (prev_set)) == HIGH
30458 	     && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30459       return true;
30460 
30461   return false;
30462 }
30463 
30464 static bool
aarch_macro_fusion_pair_p(rtx_insn * prev,rtx_insn * curr)30465 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30466 {
30467   rtx prev_set = single_set (prev);
30468   rtx curr_set = single_set (curr);
30469 
30470   if (!prev_set
30471       || !curr_set)
30472     return false;
30473 
30474   if (any_condjump_p (curr))
30475     return false;
30476 
30477   if (!arm_macro_fusion_p ())
30478     return false;
30479 
30480   if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30481       && aarch_crypto_can_dual_issue (prev, curr))
30482     return true;
30483 
30484   if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30485       && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30486     return true;
30487 
30488   return false;
30489 }
30490 
30491 /* Return true iff the instruction fusion described by OP is enabled.  */
30492 bool
arm_fusion_enabled_p(tune_params::fuse_ops op)30493 arm_fusion_enabled_p (tune_params::fuse_ops op)
30494 {
30495   return current_tune->fusible_ops & op;
30496 }
30497 
30498 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
30499    scheduled for speculative execution.  Reject the long-running division
30500    and square-root instructions.  */
30501 
30502 static bool
arm_sched_can_speculate_insn(rtx_insn * insn)30503 arm_sched_can_speculate_insn (rtx_insn *insn)
30504 {
30505   switch (get_attr_type (insn))
30506     {
30507       case TYPE_SDIV:
30508       case TYPE_UDIV:
30509       case TYPE_FDIVS:
30510       case TYPE_FDIVD:
30511       case TYPE_FSQRTS:
30512       case TYPE_FSQRTD:
30513       case TYPE_NEON_FP_SQRT_S:
30514       case TYPE_NEON_FP_SQRT_D:
30515       case TYPE_NEON_FP_SQRT_S_Q:
30516       case TYPE_NEON_FP_SQRT_D_Q:
30517       case TYPE_NEON_FP_DIV_S:
30518       case TYPE_NEON_FP_DIV_D:
30519       case TYPE_NEON_FP_DIV_S_Q:
30520       case TYPE_NEON_FP_DIV_D_Q:
30521 	return false;
30522       default:
30523 	return true;
30524     }
30525 }
30526 
30527 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
30528 
30529 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset(void)30530 arm_asan_shadow_offset (void)
30531 {
30532   return HOST_WIDE_INT_1U << 29;
30533 }
30534 
30535 
30536 /* This is a temporary fix for PR60655.  Ideally we need
30537    to handle most of these cases in the generic part but
30538    currently we reject minus (..) (sym_ref).  We try to
30539    ameliorate the case with minus (sym_ref1) (sym_ref2)
30540    where they are in the same section.  */
30541 
30542 static bool
arm_const_not_ok_for_debug_p(rtx p)30543 arm_const_not_ok_for_debug_p (rtx p)
30544 {
30545   tree decl_op0 = NULL;
30546   tree decl_op1 = NULL;
30547 
30548   if (GET_CODE (p) == UNSPEC)
30549     return true;
30550   if (GET_CODE (p) == MINUS)
30551     {
30552       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30553 	{
30554 	  decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30555 	  if (decl_op1
30556 	      && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30557 	      && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30558 	    {
30559 	      if ((VAR_P (decl_op1)
30560 		   || TREE_CODE (decl_op1) == CONST_DECL)
30561 		  && (VAR_P (decl_op0)
30562 		      || TREE_CODE (decl_op0) == CONST_DECL))
30563 		return (get_variable_section (decl_op1, false)
30564 			!= get_variable_section (decl_op0, false));
30565 
30566 	      if (TREE_CODE (decl_op1) == LABEL_DECL
30567 		  && TREE_CODE (decl_op0) == LABEL_DECL)
30568 		return (DECL_CONTEXT (decl_op1)
30569 			!= DECL_CONTEXT (decl_op0));
30570 	    }
30571 
30572 	  return true;
30573 	}
30574     }
30575 
30576   return false;
30577 }
30578 
30579 /* return TRUE if x is a reference to a value in a constant pool */
30580 extern bool
arm_is_constant_pool_ref(rtx x)30581 arm_is_constant_pool_ref (rtx x)
30582 {
30583   return (MEM_P (x)
30584 	  && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30585 	  && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30586 }
30587 
30588 /* Remember the last target of arm_set_current_function.  */
30589 static GTY(()) tree arm_previous_fndecl;
30590 
30591 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.  */
30592 
30593 void
save_restore_target_globals(tree new_tree)30594 save_restore_target_globals (tree new_tree)
30595 {
30596   /* If we have a previous state, use it.  */
30597   if (TREE_TARGET_GLOBALS (new_tree))
30598     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30599   else if (new_tree == target_option_default_node)
30600     restore_target_globals (&default_target_globals);
30601   else
30602     {
30603       /* Call target_reinit and save the state for TARGET_GLOBALS.  */
30604       TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30605     }
30606 
30607   arm_option_params_internal ();
30608 }
30609 
30610 /* Invalidate arm_previous_fndecl.  */
30611 
30612 void
arm_reset_previous_fndecl(void)30613 arm_reset_previous_fndecl (void)
30614 {
30615   arm_previous_fndecl = NULL_TREE;
30616 }
30617 
30618 /* Establish appropriate back-end context for processing the function
30619    FNDECL.  The argument might be NULL to indicate processing at top
30620    level, outside of any function scope.  */
30621 
30622 static void
arm_set_current_function(tree fndecl)30623 arm_set_current_function (tree fndecl)
30624 {
30625   if (!fndecl || fndecl == arm_previous_fndecl)
30626     return;
30627 
30628   tree old_tree = (arm_previous_fndecl
30629 		   ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30630 		   : NULL_TREE);
30631 
30632   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30633 
30634   /* If current function has no attributes but previous one did,
30635      use the default node.  */
30636   if (! new_tree && old_tree)
30637     new_tree = target_option_default_node;
30638 
30639   /* If nothing to do return.  #pragma GCC reset or #pragma GCC pop to
30640      the default have been handled by save_restore_target_globals from
30641      arm_pragma_target_parse.  */
30642   if (old_tree == new_tree)
30643     return;
30644 
30645   arm_previous_fndecl = fndecl;
30646 
30647   /* First set the target options.  */
30648   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30649 
30650   save_restore_target_globals (new_tree);
30651 }
30652 
30653 /* Implement TARGET_OPTION_PRINT.  */
30654 
30655 static void
arm_option_print(FILE * file,int indent,struct cl_target_option * ptr)30656 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30657 {
30658   int flags = ptr->x_target_flags;
30659   const char *fpu_name;
30660 
30661   fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30662 	      ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30663 
30664   fprintf (file, "%*sselected isa %s\n", indent, "",
30665 	   TARGET_THUMB2_P (flags) ? "thumb2" :
30666 	   TARGET_THUMB_P (flags) ? "thumb1" :
30667 	   "arm");
30668 
30669   if (ptr->x_arm_arch_string)
30670     fprintf (file, "%*sselected architecture %s\n", indent, "",
30671 	     ptr->x_arm_arch_string);
30672 
30673   if (ptr->x_arm_cpu_string)
30674     fprintf (file, "%*sselected CPU %s\n", indent, "",
30675 	     ptr->x_arm_cpu_string);
30676 
30677   if (ptr->x_arm_tune_string)
30678     fprintf (file, "%*sselected tune %s\n", indent, "",
30679 	     ptr->x_arm_tune_string);
30680 
30681   fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30682 }
30683 
30684 /* Hook to determine if one function can safely inline another.  */
30685 
30686 static bool
arm_can_inline_p(tree caller,tree callee)30687 arm_can_inline_p (tree caller, tree callee)
30688 {
30689   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30690   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30691   bool can_inline = true;
30692 
30693   struct cl_target_option *caller_opts
30694 	= TREE_TARGET_OPTION (caller_tree ? caller_tree
30695 					   : target_option_default_node);
30696 
30697   struct cl_target_option *callee_opts
30698 	= TREE_TARGET_OPTION (callee_tree ? callee_tree
30699 					   : target_option_default_node);
30700 
30701   if (callee_opts == caller_opts)
30702     return true;
30703 
30704   /* Callee's ISA features should be a subset of the caller's.  */
30705   struct arm_build_target caller_target;
30706   struct arm_build_target callee_target;
30707   caller_target.isa = sbitmap_alloc (isa_num_bits);
30708   callee_target.isa = sbitmap_alloc (isa_num_bits);
30709 
30710   arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30711 			      false);
30712   arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30713 			      false);
30714   if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30715     can_inline = false;
30716 
30717   sbitmap_free (caller_target.isa);
30718   sbitmap_free (callee_target.isa);
30719 
30720   /* OK to inline between different modes.
30721      Function with mode specific instructions, e.g using asm,
30722      must be explicitly protected with noinline.  */
30723   return can_inline;
30724 }
30725 
30726 /* Hook to fix function's alignment affected by target attribute.  */
30727 
30728 static void
arm_relayout_function(tree fndecl)30729 arm_relayout_function (tree fndecl)
30730 {
30731   if (DECL_USER_ALIGN (fndecl))
30732     return;
30733 
30734   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30735 
30736   if (!callee_tree)
30737     callee_tree = target_option_default_node;
30738 
30739   struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30740   SET_DECL_ALIGN
30741     (fndecl,
30742      FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30743 }
30744 
30745 /* Inner function to process the attribute((target(...))), take an argument and
30746    set the current options from the argument.  If we have a list, recursively
30747    go over the list.  */
30748 
30749 static bool
arm_valid_target_attribute_rec(tree args,struct gcc_options * opts)30750 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30751 {
30752   if (TREE_CODE (args) == TREE_LIST)
30753     {
30754       bool ret = true;
30755 
30756       for (; args; args = TREE_CHAIN (args))
30757 	if (TREE_VALUE (args)
30758 	    && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30759 	  ret = false;
30760       return ret;
30761     }
30762 
30763   else if (TREE_CODE (args) != STRING_CST)
30764     {
30765       error ("attribute %<target%> argument not a string");
30766       return false;
30767     }
30768 
30769   char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30770   char *q;
30771 
30772   while ((q = strtok (argstr, ",")) != NULL)
30773     {
30774       while (ISSPACE (*q)) ++q;
30775 
30776       argstr = NULL;
30777       if (!strncmp (q, "thumb", 5))
30778 	  opts->x_target_flags |= MASK_THUMB;
30779 
30780       else if (!strncmp (q, "arm", 3))
30781 	  opts->x_target_flags &= ~MASK_THUMB;
30782 
30783       else if (!strncmp (q, "fpu=", 4))
30784 	{
30785 	  int fpu_index;
30786 	  if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30787 				       &fpu_index, CL_TARGET))
30788 	    {
30789 	      error ("invalid fpu for target attribute or pragma %qs", q);
30790 	      return false;
30791 	    }
30792 	  if (fpu_index == TARGET_FPU_auto)
30793 	    {
30794 	      /* This doesn't really make sense until we support
30795 		 general dynamic selection of the architecture and all
30796 		 sub-features.  */
30797 	      sorry ("auto fpu selection not currently permitted here");
30798 	      return false;
30799 	    }
30800 	  opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30801 	}
30802       else if (!strncmp (q, "arch=", 5))
30803 	{
30804 	  char* arch = q+5;
30805 	  const arch_option *arm_selected_arch
30806 	     = arm_parse_arch_option_name (all_architectures, "arch", arch);
30807 
30808 	  if (!arm_selected_arch)
30809 	    {
30810 	      error ("invalid architecture for target attribute or pragma %qs",
30811 		     q);
30812 	      return false;
30813 	    }
30814 
30815 	  opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30816 	}
30817       else if (q[0] == '+')
30818 	{
30819 	  opts->x_arm_arch_string
30820 	    = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30821 	}
30822       else
30823 	{
30824 	  error ("unknown target attribute or pragma %qs", q);
30825 	  return false;
30826 	}
30827     }
30828 
30829   return true;
30830 }
30831 
30832 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
30833 
30834 tree
arm_valid_target_attribute_tree(tree args,struct gcc_options * opts,struct gcc_options * opts_set)30835 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30836 				 struct gcc_options *opts_set)
30837 {
30838   struct cl_target_option cl_opts;
30839 
30840   if (!arm_valid_target_attribute_rec (args, opts))
30841     return NULL_TREE;
30842 
30843   cl_target_option_save (&cl_opts, opts);
30844   arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30845   arm_option_check_internal (opts);
30846   /* Do any overrides, such as global options arch=xxx.
30847      We do this since arm_active_target was overridden.  */
30848   arm_option_reconfigure_globals ();
30849   arm_options_perform_arch_sanity_checks ();
30850   arm_option_override_internal (opts, opts_set);
30851 
30852   return build_target_option_node (opts);
30853 }
30854 
30855 static void
add_attribute(const char * mode,tree * attributes)30856 add_attribute  (const char * mode, tree *attributes)
30857 {
30858   size_t len = strlen (mode);
30859   tree value = build_string (len, mode);
30860 
30861   TREE_TYPE (value) = build_array_type (char_type_node,
30862 					build_index_type (size_int (len)));
30863 
30864   *attributes = tree_cons (get_identifier ("target"),
30865 			   build_tree_list (NULL_TREE, value),
30866 			   *attributes);
30867 }
30868 
30869 /* For testing. Insert thumb or arm modes alternatively on functions.  */
30870 
30871 static void
arm_insert_attributes(tree fndecl,tree * attributes)30872 arm_insert_attributes (tree fndecl, tree * attributes)
30873 {
30874   const char *mode;
30875 
30876   if (! TARGET_FLIP_THUMB)
30877     return;
30878 
30879   if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30880       || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30881    return;
30882 
30883   /* Nested definitions must inherit mode.  */
30884   if (current_function_decl)
30885    {
30886      mode = TARGET_THUMB ? "thumb" : "arm";
30887      add_attribute (mode, attributes);
30888      return;
30889    }
30890 
30891   /* If there is already a setting don't change it.  */
30892   if (lookup_attribute ("target", *attributes) != NULL)
30893     return;
30894 
30895   mode = thumb_flipper ? "thumb" : "arm";
30896   add_attribute (mode, attributes);
30897 
30898   thumb_flipper = !thumb_flipper;
30899 }
30900 
30901 /* Hook to validate attribute((target("string"))).  */
30902 
30903 static bool
arm_valid_target_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int ARG_UNUSED (flags))30904 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30905 			      tree args, int ARG_UNUSED (flags))
30906 {
30907   bool ret = true;
30908   struct gcc_options func_options;
30909   tree cur_tree, new_optimize;
30910   gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30911 
30912   /* Get the optimization options of the current function.  */
30913   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30914 
30915   /* If the function changed the optimization levels as well as setting target
30916      options, start with the optimizations specified.  */
30917   if (!func_optimize)
30918     func_optimize = optimization_default_node;
30919 
30920   /* Init func_options.  */
30921   memset (&func_options, 0, sizeof (func_options));
30922   init_options_struct (&func_options, NULL);
30923   lang_hooks.init_options_struct (&func_options);
30924 
30925   /* Initialize func_options to the defaults.  */
30926   cl_optimization_restore (&func_options,
30927 			   TREE_OPTIMIZATION (func_optimize));
30928 
30929   cl_target_option_restore (&func_options,
30930 			    TREE_TARGET_OPTION (target_option_default_node));
30931 
30932   /* Set func_options flags with new target mode.  */
30933   cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30934 					      &global_options_set);
30935 
30936   if (cur_tree == NULL_TREE)
30937     ret = false;
30938 
30939   new_optimize = build_optimization_node (&func_options);
30940 
30941   DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30942 
30943   DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30944 
30945   finalize_options_struct (&func_options);
30946 
30947   return ret;
30948 }
30949 
30950 /* Match an ISA feature bitmap to a named FPU.  We always use the
30951    first entry that exactly matches the feature set, so that we
30952    effectively canonicalize the FPU name for the assembler.  */
30953 static const char*
arm_identify_fpu_from_isa(sbitmap isa)30954 arm_identify_fpu_from_isa (sbitmap isa)
30955 {
30956   auto_sbitmap fpubits (isa_num_bits);
30957   auto_sbitmap cand_fpubits (isa_num_bits);
30958 
30959   bitmap_and (fpubits, isa, isa_all_fpubits);
30960 
30961   /* If there are no ISA feature bits relating to the FPU, we must be
30962      doing soft-float.  */
30963   if (bitmap_empty_p (fpubits))
30964     return "softvfp";
30965 
30966   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30967     {
30968       arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30969       if (bitmap_equal_p (fpubits, cand_fpubits))
30970 	return all_fpus[i].name;
30971     }
30972   /* We must find an entry, or things have gone wrong.  */
30973   gcc_unreachable ();
30974 }
30975 
30976 /* Implement ASM_DECLARE_FUNCTION_NAME.  Output the ISA features used
30977    by the function fndecl.  */
30978 void
arm_declare_function_name(FILE * stream,const char * name,tree decl)30979 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30980 {
30981   tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
30982 
30983   struct cl_target_option *targ_options;
30984   if (target_parts)
30985     targ_options = TREE_TARGET_OPTION (target_parts);
30986   else
30987     targ_options = TREE_TARGET_OPTION (target_option_current_node);
30988   gcc_assert (targ_options);
30989 
30990   /* Only update the assembler .arch string if it is distinct from the last
30991      such string we printed. arch_to_print is set conditionally in case
30992      targ_options->x_arm_arch_string is NULL which can be the case
30993      when cc1 is invoked directly without passing -march option.  */
30994   std::string arch_to_print;
30995   if (targ_options->x_arm_arch_string)
30996     arch_to_print = targ_options->x_arm_arch_string;
30997 
30998   if (arch_to_print != arm_last_printed_arch_string)
30999     {
31000       std::string arch_name
31001 	= arch_to_print.substr (0, arch_to_print.find ("+"));
31002       asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
31003       const arch_option *arch
31004 	= arm_parse_arch_option_name (all_architectures, "-march",
31005 				      targ_options->x_arm_arch_string);
31006       auto_sbitmap opt_bits (isa_num_bits);
31007 
31008       gcc_assert (arch);
31009       if (arch->common.extensions)
31010 	{
31011 	  for (const struct cpu_arch_extension *opt = arch->common.extensions;
31012 	       opt->name != NULL;
31013 	       opt++)
31014 	    {
31015 	      if (!opt->remove)
31016 		{
31017 		  arm_initialize_isa (opt_bits, opt->isa_bits);
31018 		  if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31019 		      && !bitmap_subset_p (opt_bits, isa_all_fpubits))
31020 		    asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31021 				 opt->name);
31022 		}
31023 	     }
31024 	}
31025 
31026       arm_last_printed_arch_string = arch_to_print;
31027     }
31028 
31029   fprintf (stream, "\t.syntax unified\n");
31030 
31031   if (TARGET_THUMB)
31032     {
31033       if (is_called_in_ARM_mode (decl)
31034 	  || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31035 	      && cfun->is_thunk))
31036 	fprintf (stream, "\t.code 32\n");
31037       else if (TARGET_THUMB1)
31038 	fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31039       else
31040 	fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31041     }
31042   else
31043     fprintf (stream, "\t.arm\n");
31044 
31045   std::string fpu_to_print
31046     = TARGET_SOFT_FLOAT
31047 	? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31048 
31049   if (fpu_to_print != arm_last_printed_arch_string)
31050     {
31051       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31052       arm_last_printed_fpu_string = fpu_to_print;
31053     }
31054 
31055   if (TARGET_POKE_FUNCTION_NAME)
31056     arm_poke_function_name (stream, (const char *) name);
31057 }
31058 
31059 /* If MEM is in the form of [base+offset], extract the two parts
31060    of address and set to BASE and OFFSET, otherwise return false
31061    after clearing BASE and OFFSET.  */
31062 
31063 static bool
extract_base_offset_in_addr(rtx mem,rtx * base,rtx * offset)31064 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31065 {
31066   rtx addr;
31067 
31068   gcc_assert (MEM_P (mem));
31069 
31070   addr = XEXP (mem, 0);
31071 
31072   /* Strip off const from addresses like (const (addr)).  */
31073   if (GET_CODE (addr) == CONST)
31074     addr = XEXP (addr, 0);
31075 
31076   if (GET_CODE (addr) == REG)
31077     {
31078       *base = addr;
31079       *offset = const0_rtx;
31080       return true;
31081     }
31082 
31083   if (GET_CODE (addr) == PLUS
31084       && GET_CODE (XEXP (addr, 0)) == REG
31085       && CONST_INT_P (XEXP (addr, 1)))
31086     {
31087       *base = XEXP (addr, 0);
31088       *offset = XEXP (addr, 1);
31089       return true;
31090     }
31091 
31092   *base = NULL_RTX;
31093   *offset = NULL_RTX;
31094 
31095   return false;
31096 }
31097 
31098 /* If INSN is a load or store of address in the form of [base+offset],
31099    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
31100    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
31101    otherwise return FALSE.  */
31102 
31103 static bool
fusion_load_store(rtx_insn * insn,rtx * base,rtx * offset,bool * is_load)31104 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31105 {
31106   rtx x, dest, src;
31107 
31108   gcc_assert (INSN_P (insn));
31109   x = PATTERN (insn);
31110   if (GET_CODE (x) != SET)
31111     return false;
31112 
31113   src = SET_SRC (x);
31114   dest = SET_DEST (x);
31115   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31116     {
31117       *is_load = false;
31118       extract_base_offset_in_addr (dest, base, offset);
31119     }
31120   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31121     {
31122       *is_load = true;
31123       extract_base_offset_in_addr (src, base, offset);
31124     }
31125   else
31126     return false;
31127 
31128   return (*base != NULL_RTX && *offset != NULL_RTX);
31129 }
31130 
31131 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31132 
31133    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31134    and PRI are only calculated for these instructions.  For other instruction,
31135    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
31136    instruction fusion can be supported by returning different priorities.
31137 
31138    It's important that irrelevant instructions get the largest FUSION_PRI.  */
31139 
31140 static void
arm_sched_fusion_priority(rtx_insn * insn,int max_pri,int * fusion_pri,int * pri)31141 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31142 			   int *fusion_pri, int *pri)
31143 {
31144   int tmp, off_val;
31145   bool is_load;
31146   rtx base, offset;
31147 
31148   gcc_assert (INSN_P (insn));
31149 
31150   tmp = max_pri - 1;
31151   if (!fusion_load_store (insn, &base, &offset, &is_load))
31152     {
31153       *pri = tmp;
31154       *fusion_pri = tmp;
31155       return;
31156     }
31157 
31158   /* Load goes first.  */
31159   if (is_load)
31160     *fusion_pri = tmp - 1;
31161   else
31162     *fusion_pri = tmp - 2;
31163 
31164   tmp /= 2;
31165 
31166   /* INSN with smaller base register goes first.  */
31167   tmp -= ((REGNO (base) & 0xff) << 20);
31168 
31169   /* INSN with smaller offset goes first.  */
31170   off_val = (int)(INTVAL (offset));
31171   if (off_val >= 0)
31172     tmp -= (off_val & 0xfffff);
31173   else
31174     tmp += ((- off_val) & 0xfffff);
31175 
31176   *pri = tmp;
31177   return;
31178 }
31179 
31180 
31181 /* Construct and return a PARALLEL RTX vector with elements numbering the
31182    lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31183    the vector - from the perspective of the architecture.  This does not
31184    line up with GCC's perspective on lane numbers, so we end up with
31185    different masks depending on our target endian-ness.  The diagram
31186    below may help.  We must draw the distinction when building masks
31187    which select one half of the vector.  An instruction selecting
31188    architectural low-lanes for a big-endian target, must be described using
31189    a mask selecting GCC high-lanes.
31190 
31191                  Big-Endian             Little-Endian
31192 
31193 GCC             0   1   2   3           3   2   1   0
31194               | x | x | x | x |       | x | x | x | x |
31195 Architecture    3   2   1   0           3   2   1   0
31196 
31197 Low Mask:         { 2, 3 }                { 0, 1 }
31198 High Mask:        { 0, 1 }                { 2, 3 }
31199 */
31200 
31201 rtx
arm_simd_vect_par_cnst_half(machine_mode mode,bool high)31202 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31203 {
31204   int nunits = GET_MODE_NUNITS (mode);
31205   rtvec v = rtvec_alloc (nunits / 2);
31206   int high_base = nunits / 2;
31207   int low_base = 0;
31208   int base;
31209   rtx t1;
31210   int i;
31211 
31212   if (BYTES_BIG_ENDIAN)
31213     base = high ? low_base : high_base;
31214   else
31215     base = high ? high_base : low_base;
31216 
31217   for (i = 0; i < nunits / 2; i++)
31218     RTVEC_ELT (v, i) = GEN_INT (base + i);
31219 
31220   t1 = gen_rtx_PARALLEL (mode, v);
31221   return t1;
31222 }
31223 
31224 /* Check OP for validity as a PARALLEL RTX vector with elements
31225    numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31226    from the perspective of the architecture.  See the diagram above
31227    arm_simd_vect_par_cnst_half_p for more details.  */
31228 
31229 bool
arm_simd_check_vect_par_cnst_half_p(rtx op,machine_mode mode,bool high)31230 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31231 				       bool high)
31232 {
31233   rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31234   HOST_WIDE_INT count_op = XVECLEN (op, 0);
31235   HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31236   int i = 0;
31237 
31238   if (!VECTOR_MODE_P (mode))
31239     return false;
31240 
31241   if (count_op != count_ideal)
31242     return false;
31243 
31244   for (i = 0; i < count_ideal; i++)
31245     {
31246       rtx elt_op = XVECEXP (op, 0, i);
31247       rtx elt_ideal = XVECEXP (ideal, 0, i);
31248 
31249       if (!CONST_INT_P (elt_op)
31250 	  || INTVAL (elt_ideal) != INTVAL (elt_op))
31251 	return false;
31252     }
31253   return true;
31254 }
31255 
31256 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31257    in Thumb1.  */
31258 static bool
arm_can_output_mi_thunk(const_tree,HOST_WIDE_INT,HOST_WIDE_INT vcall_offset,const_tree)31259 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31260 			 const_tree)
31261 {
31262   /* For now, we punt and not handle this for TARGET_THUMB1.  */
31263   if (vcall_offset && TARGET_THUMB1)
31264     return false;
31265 
31266   /* Otherwise ok.  */
31267   return true;
31268 }
31269 
31270 /* Generate RTL for a conditional branch with rtx comparison CODE in
31271    mode CC_MODE. The destination of the unlikely conditional branch
31272    is LABEL_REF.  */
31273 
31274 void
arm_gen_unlikely_cbranch(enum rtx_code code,machine_mode cc_mode,rtx label_ref)31275 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31276 			  rtx label_ref)
31277 {
31278   rtx x;
31279   x = gen_rtx_fmt_ee (code, VOIDmode,
31280 		      gen_rtx_REG (cc_mode, CC_REGNUM),
31281 		      const0_rtx);
31282 
31283   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31284 			    gen_rtx_LABEL_REF (VOIDmode, label_ref),
31285 			    pc_rtx);
31286   emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31287 }
31288 
31289 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31290 
31291    For pure-code sections there is no letter code for this attribute, so
31292    output all the section flags numerically when this is needed.  */
31293 
31294 static bool
arm_asm_elf_flags_numeric(unsigned int flags,unsigned int * num)31295 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31296 {
31297 
31298   if (flags & SECTION_ARM_PURECODE)
31299     {
31300       *num = 0x20000000;
31301 
31302       if (!(flags & SECTION_DEBUG))
31303 	*num |= 0x2;
31304       if (flags & SECTION_EXCLUDE)
31305 	*num |= 0x80000000;
31306       if (flags & SECTION_WRITE)
31307 	*num |= 0x1;
31308       if (flags & SECTION_CODE)
31309 	*num |= 0x4;
31310       if (flags & SECTION_MERGE)
31311 	*num |= 0x10;
31312       if (flags & SECTION_STRINGS)
31313 	*num |= 0x20;
31314       if (flags & SECTION_TLS)
31315 	*num |= 0x400;
31316       if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31317 	*num |= 0x200;
31318 
31319 	return true;
31320     }
31321 
31322   return false;
31323 }
31324 
31325 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31326 
31327    If pure-code is passed as an option, make sure all functions are in
31328    sections that have the SHF_ARM_PURECODE attribute.  */
31329 
31330 static section *
arm_function_section(tree decl,enum node_frequency freq,bool startup,bool exit)31331 arm_function_section (tree decl, enum node_frequency freq,
31332 		      bool startup, bool exit)
31333 {
31334   const char * section_name;
31335   section * sec;
31336 
31337   if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31338     return default_function_section (decl, freq, startup, exit);
31339 
31340   if (!target_pure_code)
31341     return default_function_section (decl, freq, startup, exit);
31342 
31343 
31344   section_name = DECL_SECTION_NAME (decl);
31345 
31346   /* If a function is not in a named section then it falls under the 'default'
31347      text section, also known as '.text'.  We can preserve previous behavior as
31348      the default text section already has the SHF_ARM_PURECODE section
31349      attribute.  */
31350   if (!section_name)
31351     {
31352       section *default_sec = default_function_section (decl, freq, startup,
31353 						       exit);
31354 
31355       /* If default_sec is not null, then it must be a special section like for
31356 	 example .text.startup.  We set the pure-code attribute and return the
31357 	 same section to preserve existing behavior.  */
31358       if (default_sec)
31359 	  default_sec->common.flags |= SECTION_ARM_PURECODE;
31360       return default_sec;
31361     }
31362 
31363   /* Otherwise look whether a section has already been created with
31364      'section_name'.  */
31365   sec = get_named_section (decl, section_name, 0);
31366   if (!sec)
31367     /* If that is not the case passing NULL as the section's name to
31368        'get_named_section' will create a section with the declaration's
31369        section name.  */
31370     sec = get_named_section (decl, NULL, 0);
31371 
31372   /* Set the SHF_ARM_PURECODE attribute.  */
31373   sec->common.flags |= SECTION_ARM_PURECODE;
31374 
31375   return sec;
31376 }
31377 
31378 /* Implements the TARGET_SECTION_FLAGS hook.
31379 
31380    If DECL is a function declaration and pure-code is passed as an option
31381    then add the SFH_ARM_PURECODE attribute to the section flags.  NAME is the
31382    section's name and RELOC indicates whether the declarations initializer may
31383    contain runtime relocations.  */
31384 
31385 static unsigned int
arm_elf_section_type_flags(tree decl,const char * name,int reloc)31386 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31387 {
31388   unsigned int flags = default_section_type_flags (decl, name, reloc);
31389 
31390   if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31391     flags |= SECTION_ARM_PURECODE;
31392 
31393   return flags;
31394 }
31395 
31396 /* Generate call to __aeabi_[mode]divmod (op0, op1).  */
31397 
31398 static void
arm_expand_divmod_libfunc(rtx libfunc,machine_mode mode,rtx op0,rtx op1,rtx * quot_p,rtx * rem_p)31399 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31400 			   rtx op0, rtx op1,
31401 			   rtx *quot_p, rtx *rem_p)
31402 {
31403   if (mode == SImode)
31404     gcc_assert (!TARGET_IDIV);
31405 
31406   scalar_int_mode libval_mode
31407     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31408 
31409   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31410 					libval_mode,
31411 					op0, GET_MODE (op0),
31412 					op1, GET_MODE (op1));
31413 
31414   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31415   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31416 				       GET_MODE_SIZE (mode));
31417 
31418   gcc_assert (quotient);
31419   gcc_assert (remainder);
31420 
31421   *quot_p = quotient;
31422   *rem_p = remainder;
31423 }
31424 
31425 /*  This function checks for the availability of the coprocessor builtin passed
31426     in BUILTIN for the current target.  Returns true if it is available and
31427     false otherwise.  If a BUILTIN is passed for which this function has not
31428     been implemented it will cause an exception.  */
31429 
31430 bool
arm_coproc_builtin_available(enum unspecv builtin)31431 arm_coproc_builtin_available (enum unspecv builtin)
31432 {
31433   /* None of these builtins are available in Thumb mode if the target only
31434      supports Thumb-1.  */
31435   if (TARGET_THUMB1)
31436     return false;
31437 
31438   switch (builtin)
31439     {
31440       case VUNSPEC_CDP:
31441       case VUNSPEC_LDC:
31442       case VUNSPEC_LDCL:
31443       case VUNSPEC_STC:
31444       case VUNSPEC_STCL:
31445       case VUNSPEC_MCR:
31446       case VUNSPEC_MRC:
31447 	if (arm_arch4)
31448 	  return true;
31449 	break;
31450       case VUNSPEC_CDP2:
31451       case VUNSPEC_LDC2:
31452       case VUNSPEC_LDC2L:
31453       case VUNSPEC_STC2:
31454       case VUNSPEC_STC2L:
31455       case VUNSPEC_MCR2:
31456       case VUNSPEC_MRC2:
31457 	/* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31458 	   ARMv8-{A,M}.  */
31459 	if (arm_arch5)
31460 	  return true;
31461 	break;
31462       case VUNSPEC_MCRR:
31463       case VUNSPEC_MRRC:
31464 	/* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31465 	   ARMv8-{A,M}.  */
31466 	if (arm_arch6 || arm_arch5te)
31467 	  return true;
31468 	break;
31469       case VUNSPEC_MCRR2:
31470       case VUNSPEC_MRRC2:
31471 	if (arm_arch6)
31472 	  return true;
31473 	break;
31474       default:
31475 	gcc_unreachable ();
31476     }
31477   return false;
31478 }
31479 
31480 /* This function returns true if OP is a valid memory operand for the ldc and
31481    stc coprocessor instructions and false otherwise.  */
31482 
31483 bool
arm_coproc_ldc_stc_legitimate_address(rtx op)31484 arm_coproc_ldc_stc_legitimate_address (rtx op)
31485 {
31486   HOST_WIDE_INT range;
31487   /* Has to be a memory operand.  */
31488   if (!MEM_P (op))
31489     return false;
31490 
31491   op = XEXP (op, 0);
31492 
31493   /* We accept registers.  */
31494   if (REG_P (op))
31495     return true;
31496 
31497   switch GET_CODE (op)
31498     {
31499       case PLUS:
31500 	{
31501 	  /* Or registers with an offset.  */
31502 	  if (!REG_P (XEXP (op, 0)))
31503 	    return false;
31504 
31505 	  op = XEXP (op, 1);
31506 
31507 	  /* The offset must be an immediate though.  */
31508 	  if (!CONST_INT_P (op))
31509 	    return false;
31510 
31511 	  range = INTVAL (op);
31512 
31513 	  /* Within the range of [-1020,1020].  */
31514 	  if (!IN_RANGE (range, -1020, 1020))
31515 	    return false;
31516 
31517 	  /* And a multiple of 4.  */
31518 	  return (range % 4) == 0;
31519 	}
31520       case PRE_INC:
31521       case POST_INC:
31522       case PRE_DEC:
31523       case POST_DEC:
31524 	return REG_P (XEXP (op, 0));
31525       default:
31526 	gcc_unreachable ();
31527     }
31528   return false;
31529 }
31530 
31531 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31532 
31533    In VFPv1, VFP registers could only be accessed in the mode they were
31534    set, so subregs would be invalid there.  However, we don't support
31535    VFPv1 at the moment, and the restriction was lifted in VFPv2.
31536 
31537    In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31538    VFP registers in little-endian order.  We can't describe that accurately to
31539    GCC, so avoid taking subregs of such values.
31540 
31541    The only exception is going from a 128-bit to a 64-bit type.  In that
31542    case the data layout happens to be consistent for big-endian, so we
31543    explicitly allow that case.  */
31544 
31545 static bool
arm_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)31546 arm_can_change_mode_class (machine_mode from, machine_mode to,
31547 			   reg_class_t rclass)
31548 {
31549   if (TARGET_BIG_END
31550       && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31551       && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31552 	  || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31553       && reg_classes_intersect_p (VFP_REGS, rclass))
31554     return false;
31555   return true;
31556 }
31557 
31558 /* Implement TARGET_CONSTANT_ALIGNMENT.  Make strings word-aligned so
31559    strcpy from constants will be faster.  */
31560 
31561 static HOST_WIDE_INT
arm_constant_alignment(const_tree exp,HOST_WIDE_INT align)31562 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31563 {
31564   unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31565   if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31566     return MAX (align, BITS_PER_WORD * factor);
31567   return align;
31568 }
31569 
31570 #if CHECKING_P
31571 namespace selftest {
31572 
31573 /* Scan the static data tables generated by parsecpu.awk looking for
31574    potential issues with the data.  We primarily check for
31575    inconsistencies in the option extensions at present (extensions
31576    that duplicate others but aren't marked as aliases).  Furthermore,
31577    for correct canonicalization later options must never be a subset
31578    of an earlier option.  Any extension should also only specify other
31579    feature bits and never an architecture bit.  The architecture is inferred
31580    from the declaration of the extension.  */
31581 static void
arm_test_cpu_arch_data(void)31582 arm_test_cpu_arch_data (void)
31583 {
31584   const arch_option *arch;
31585   const cpu_option *cpu;
31586   auto_sbitmap target_isa (isa_num_bits);
31587   auto_sbitmap isa1 (isa_num_bits);
31588   auto_sbitmap isa2 (isa_num_bits);
31589 
31590   for (arch = all_architectures; arch->common.name != NULL; ++arch)
31591     {
31592       const cpu_arch_extension *ext1, *ext2;
31593 
31594       if (arch->common.extensions == NULL)
31595 	continue;
31596 
31597       arm_initialize_isa (target_isa, arch->common.isa_bits);
31598 
31599       for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31600 	{
31601 	  if (ext1->alias)
31602 	    continue;
31603 
31604 	  arm_initialize_isa (isa1, ext1->isa_bits);
31605 	  for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31606 	    {
31607 	      if (ext2->alias || ext1->remove != ext2->remove)
31608 		continue;
31609 
31610 	      arm_initialize_isa (isa2, ext2->isa_bits);
31611 	      /* If the option is a subset of the parent option, it doesn't
31612 		 add anything and so isn't useful.  */
31613 	      ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31614 
31615 	      /* If the extension specifies any architectural bits then
31616 		 disallow it.  Extensions should only specify feature bits.  */
31617 	      ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31618 	    }
31619 	}
31620     }
31621 
31622   for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31623     {
31624       const cpu_arch_extension *ext1, *ext2;
31625 
31626       if (cpu->common.extensions == NULL)
31627 	continue;
31628 
31629       arm_initialize_isa (target_isa, arch->common.isa_bits);
31630 
31631       for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31632 	{
31633 	  if (ext1->alias)
31634 	    continue;
31635 
31636 	  arm_initialize_isa (isa1, ext1->isa_bits);
31637 	  for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31638 	    {
31639 	      if (ext2->alias || ext1->remove != ext2->remove)
31640 		continue;
31641 
31642 	      arm_initialize_isa (isa2, ext2->isa_bits);
31643 	      /* If the option is a subset of the parent option, it doesn't
31644 		 add anything and so isn't useful.  */
31645 	      ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31646 
31647 	      /* If the extension specifies any architectural bits then
31648 		 disallow it.  Extensions should only specify feature bits.  */
31649 	      ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31650 	    }
31651 	}
31652     }
31653 }
31654 
31655 /* Scan the static data tables generated by parsecpu.awk looking for
31656    potential issues with the data.  Here we check for consistency between the
31657    fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31658    a feature bit that is not defined by any FPU flag.  */
31659 static void
arm_test_fpu_data(void)31660 arm_test_fpu_data (void)
31661 {
31662   auto_sbitmap isa_all_fpubits (isa_num_bits);
31663   auto_sbitmap fpubits (isa_num_bits);
31664   auto_sbitmap tmpset (isa_num_bits);
31665 
31666   static const enum isa_feature fpu_bitlist[]
31667     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31668   arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31669 
31670   for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31671   {
31672     arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31673     bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31674     bitmap_clear (isa_all_fpubits);
31675     bitmap_copy (isa_all_fpubits, tmpset);
31676   }
31677 
31678   if (!bitmap_empty_p (isa_all_fpubits))
31679     {
31680 	fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31681 			 " group that are not defined by any FPU.\n"
31682 			 "       Check your arm-cpus.in.\n");
31683 	ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31684     }
31685 }
31686 
31687 static void
arm_run_selftests(void)31688 arm_run_selftests (void)
31689 {
31690   arm_test_cpu_arch_data ();
31691   arm_test_fpu_data ();
31692 }
31693 } /* Namespace selftest.  */
31694 
31695 #undef TARGET_RUN_TARGET_SELFTESTS
31696 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31697 #endif /* CHECKING_P */
31698 
31699 struct gcc_target targetm = TARGET_INITIALIZER;
31700 
31701 #include "gt-arm.h"
31702