1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2018 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sstream>
23
24 #define IN_TARGET_CODE 1
25
26 #include "config.h"
27 #define INCLUDE_VECTOR
28 #include "system.h"
29 #include "coretypes.h"
30 #include "backend.h"
31 #include "target.h"
32 #include "rtl.h"
33 #include "tree.h"
34 #include "gimple.h"
35 #include "cfghooks.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "optabs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "flags.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "reload.h"
54 #include "output.h"
55 #include "insn-attr.h"
56 #include "dwarf2.h"
57 #include "langhooks.h"
58 #include "cfgrtl.h"
59 #include "intl.h"
60 #include "sched-int.h"
61 #include "gimplify.h"
62 #include "tm-constrs.h"
63 #include "opts.h"
64 #include "tree-pass.h"
65 #include "context.h"
66 #include "builtins.h"
67 #include "rtl-iter.h"
68 #include "regs.h"
69
70 /* This file should be included last. */
71 #include "target-def.h"
72
73 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
74
75 #define CONST_OK_FOR_ADD(size) CONST_OK_FOR_I08 (size)
76 #define GEN_MOV (*(gen_movsi))
77 #define GEN_ADD3 (*(gen_addsi3))
78 #define GEN_SUB3 (*(gen_subsi3))
79
80 /* Used to simplify the logic below. Find the attributes wherever
81 they may be. */
82 #define SH_ATTRIBUTES(decl) \
83 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
84 : DECL_ATTRIBUTES (decl) \
85 ? (DECL_ATTRIBUTES (decl)) \
86 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
87
88 /* Set to true by expand_prologue() when the function is an
89 interrupt handler. */
90 bool current_function_interrupt;
91
92 tree sh_deferred_function_attributes;
93 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
94
95 /* Global variables for machine-dependent things. */
96
97 /* Which cpu are we scheduling for. */
98 enum processor_type sh_cpu;
99
100 /* Definitions used in ready queue reordering for first scheduling pass. */
101
102 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
103 static short *regmode_weight[2];
104
105 /* Total SFmode and SImode weights of scheduled insns. */
106 static int curr_regmode_pressure[2];
107
108 /* Number of r0 life regions. */
109 static int r0_life_regions;
110
111 /* If true, skip cycles for Q -> R movement. */
112 static int skip_cycles = 0;
113
114 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
115 and returned from sh_reorder2. */
116 static short cached_can_issue_more;
117
118 /* Unique number for UNSPEC_BBR pattern. */
119 static unsigned int unspec_bbr_uid = 1;
120
121 /* Provides the class number of the smallest class containing
122 reg number. */
123 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
124 {
125 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
158 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
159 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
160 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
161 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
162 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
163 GENERAL_REGS, GENERAL_REGS,
164 };
165
166 char sh_register_names[FIRST_PSEUDO_REGISTER] \
167 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
168
169 char sh_additional_register_names[ADDREGNAMES_SIZE] \
170 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
171 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
172
173 int assembler_dialect;
174
175 static void split_branches (rtx_insn *);
176 static int branch_dest (rtx);
177 static void print_slot (rtx_sequence *);
178 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
179 static void dump_table (rtx_insn *, rtx_insn *);
180 static bool broken_move (rtx_insn *);
181 static bool mova_p (rtx_insn *);
182 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
183 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
184 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
185 static void sh_reorg (void);
186 static void sh_option_override (void);
187 static void sh_override_options_after_change (void);
188 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
189 static rtx_insn* emit_frame_insn (rtx);
190 static rtx push (int);
191 static void pop (int);
192 static void push_regs (HARD_REG_SET* mask, bool interrupt_handler);
193 static int calc_live_regs (HARD_REG_SET *);
194 static HOST_WIDE_INT rounded_frame_size (int);
195 static bool sh_frame_pointer_required (void);
196 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
197 static int sh_mode_needed (int, rtx_insn *);
198 static int sh_mode_after (int, int, rtx_insn *);
199 static int sh_mode_entry (int);
200 static int sh_mode_exit (int);
201 static int sh_mode_priority (int entity, int n);
202
203 static rtx mark_constant_pool_use (rtx);
204 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
205 int, bool *);
206 static tree sh_handle_resbank_handler_attribute (tree *, tree,
207 tree, int, bool *);
208 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
209 tree, int, bool *);
210 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
211 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
212 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
213 static void sh_print_operand (FILE *, rtx, int);
214 static void sh_print_operand_address (FILE *, machine_mode, rtx);
215 static bool sh_print_operand_punct_valid_p (unsigned char code);
216 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
217 static void sh_output_function_epilogue (FILE *);
218 static void sh_insert_attributes (tree, tree *);
219 static const char *sh_check_pch_target_flags (int);
220 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
221 static int sh_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
222 static int sh_issue_rate (void);
223 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
224 static short find_set_regmode_weight (rtx, machine_mode);
225 static short find_insn_regmode_weight (rtx, machine_mode);
226 static void find_regmode_weight (basic_block, machine_mode);
227 static int find_r0_life_regions (basic_block);
228 static void sh_md_init_global (FILE *, int, int);
229 static void sh_md_finish_global (FILE *, int);
230 static int rank_for_reorder (const void *, const void *);
231 static void swap_reorder (rtx_insn **, int);
232 static void ready_reorder (rtx_insn **, int);
233 static bool high_pressure (machine_mode);
234 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
235 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
236 static void sh_md_init (FILE *, int, int);
237 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
238
239 static bool sh_function_ok_for_sibcall (tree, tree);
240
241 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
242 static bool sh_ms_bitfield_layout_p (const_tree);
243
244 static void sh_init_builtins (void);
245 static tree sh_builtin_decl (unsigned, bool);
246 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
247 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
248 HOST_WIDE_INT, tree);
249 static void sh_file_start (void);
250 static bool sh_assemble_integer (rtx, unsigned int, int);
251 static bool flow_dependent_p (rtx, rtx);
252 static void flow_dependent_p_1 (rtx, const_rtx, void *);
253 static int shiftcosts (rtx);
254 static int and_xor_ior_costs (rtx, int);
255 static int addsubcosts (rtx);
256 static int multcosts (rtx);
257 static bool unspec_caller_rtx_p (rtx);
258 static bool sh_cannot_copy_insn_p (rtx_insn *);
259 static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
260 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
261 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
262 static int sh_pr_n_sets (void);
263 static rtx sh_allocate_initial_value (rtx);
264 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
265 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
266 machine_mode,
267 struct secondary_reload_info *);
268 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
269 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
270 static rtx sh_delegitimize_address (rtx);
271 static bool sh_cannot_substitute_mem_equiv_p (rtx);
272 static bool sh_legitimize_address_displacement (rtx *, rtx *,
273 poly_int64, machine_mode);
274 static int scavenge_reg (HARD_REG_SET *s);
275
276 static rtx sh_struct_value_rtx (tree, int);
277 static rtx sh_function_value (const_tree, const_tree, bool);
278 static bool sh_function_value_regno_p (const unsigned int);
279 static rtx sh_libcall_value (machine_mode, const_rtx);
280 static bool sh_return_in_memory (const_tree, const_tree);
281 static rtx sh_builtin_saveregs (void);
282 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
283 tree, int *, int);
284 static bool sh_strict_argument_naming (cumulative_args_t);
285 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
286 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
287 static tree sh_build_builtin_va_list (void);
288 static void sh_va_start (tree, rtx);
289 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
290 static bool sh_promote_prototypes (const_tree);
291 static machine_mode sh_promote_function_mode (const_tree type,
292 machine_mode,
293 int *punsignedp,
294 const_tree funtype,
295 int for_return);
296 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
297 const_tree, bool);
298 static bool sh_callee_copies (cumulative_args_t, machine_mode,
299 const_tree, bool);
300 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
301 tree, bool);
302 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
303 const_tree, bool);
304 static rtx sh_function_arg (cumulative_args_t, machine_mode,
305 const_tree, bool);
306 static int sh_dwarf_calling_convention (const_tree);
307 static void sh_encode_section_info (tree, rtx, int);
308 static bool sh2a_function_vector_p (tree);
309 static void sh_trampoline_init (rtx, tree, rtx);
310 static rtx sh_trampoline_adjust_address (rtx);
311 static void sh_conditional_register_usage (void);
312 static bool sh_legitimate_constant_p (machine_mode, rtx);
313 static int mov_insn_size (machine_mode, bool);
314 static int mov_insn_alignment_mask (machine_mode, bool);
315 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
316 unsigned int,
317 enum by_pieces_operation,
318 bool);
319 static bool sequence_insn_p (rtx_insn *);
320 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
321 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
322 machine_mode, bool);
323 static bool sh_legitimate_combined_insn (rtx_insn* insn);
324
325 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
326
327 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
328 static unsigned int sh_hard_regno_nregs (unsigned int, machine_mode);
329 static bool sh_hard_regno_mode_ok (unsigned int, machine_mode);
330 static bool sh_modes_tieable_p (machine_mode, machine_mode);
331 static bool sh_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
332
333 static const struct attribute_spec sh_attribute_table[] =
334 {
335 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
336 affects_type_identity, handler, exclude } */
337 { "interrupt_handler", 0, 0, true, false, false, false,
338 sh_handle_interrupt_handler_attribute, NULL },
339 { "sp_switch", 1, 1, true, false, false, false,
340 sh_handle_sp_switch_attribute, NULL },
341 { "trap_exit", 1, 1, true, false, false, false,
342 sh_handle_trap_exit_attribute, NULL },
343 { "renesas", 0, 0, false, true, false, false,
344 sh_handle_renesas_attribute, NULL },
345 { "trapa_handler", 0, 0, true, false, false, false,
346 sh_handle_interrupt_handler_attribute, NULL },
347 { "nosave_low_regs", 0, 0, true, false, false, false,
348 sh_handle_interrupt_handler_attribute, NULL },
349 { "resbank", 0, 0, true, false, false, false,
350 sh_handle_resbank_handler_attribute, NULL },
351 { "function_vector", 1, 1, true, false, false, false,
352 sh2a_handle_function_vector_handler_attribute, NULL },
353 { NULL, 0, 0, false, false, false, false, NULL, NULL }
354 };
355
356 /* Initialize the GCC target structure. */
357 #undef TARGET_ATTRIBUTE_TABLE
358 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
359
360 /* The next two are used for debug info when compiling with -gdwarf. */
361 #undef TARGET_ASM_UNALIGNED_HI_OP
362 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
363 #undef TARGET_ASM_UNALIGNED_SI_OP
364 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
365
366 #undef TARGET_OPTION_OVERRIDE
367 #define TARGET_OPTION_OVERRIDE sh_option_override
368
369 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
370 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
371 sh_override_options_after_change
372
373 #undef TARGET_PRINT_OPERAND
374 #define TARGET_PRINT_OPERAND sh_print_operand
375 #undef TARGET_PRINT_OPERAND_ADDRESS
376 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
377 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
378 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
379 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
380 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
381
382 #undef TARGET_ASM_FUNCTION_EPILOGUE
383 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
384
385 #undef TARGET_ASM_OUTPUT_MI_THUNK
386 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
387
388 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
389 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
390 hook_bool_const_tree_hwi_hwi_const_tree_true
391
392 #undef TARGET_ASM_FILE_START
393 #define TARGET_ASM_FILE_START sh_file_start
394 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
395 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
396
397 #undef TARGET_ASM_INTEGER
398 #define TARGET_ASM_INTEGER sh_assemble_integer
399
400 #undef TARGET_REGISTER_MOVE_COST
401 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
402
403 #undef TARGET_INSERT_ATTRIBUTES
404 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
405
406 #undef TARGET_SCHED_ADJUST_COST
407 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
408
409 #undef TARGET_SCHED_ISSUE_RATE
410 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
411
412 /* The next 5 hooks have been implemented for reenabling sched1. With the
413 help of these macros we are limiting the movement of insns in sched1 to
414 reduce the register pressure. The overall idea is to keep count of SImode
415 and SFmode regs required by already scheduled insns. When these counts
416 cross some threshold values; give priority to insns that free registers.
417 The insn that frees registers is most likely to be the insn with lowest
418 LUID (original insn order); but such an insn might be there in the stalled
419 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
420 up to a max of 8 cycles so that such insns may move from Q -> R.
421
422 The description of the hooks are as below:
423
424 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
425 scheduler; it is called inside the sched_init function just after
426 find_insn_reg_weights function call. It is used to calculate the SImode
427 and SFmode weights of insns of basic blocks; much similar to what
428 find_insn_reg_weights does.
429 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
430
431 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
432 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
433 (Q)->(R).
434
435 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
436 high; reorder the ready queue so that the insn with lowest LUID will be
437 issued next.
438
439 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
440 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
441
442 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
443 can be returned from TARGET_SCHED_REORDER2.
444
445 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
446
447 #undef TARGET_SCHED_DFA_NEW_CYCLE
448 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
449
450 #undef TARGET_SCHED_INIT_GLOBAL
451 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
452
453 #undef TARGET_SCHED_FINISH_GLOBAL
454 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
455
456 #undef TARGET_SCHED_VARIABLE_ISSUE
457 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
458
459 #undef TARGET_SCHED_REORDER
460 #define TARGET_SCHED_REORDER sh_reorder
461
462 #undef TARGET_SCHED_REORDER2
463 #define TARGET_SCHED_REORDER2 sh_reorder2
464
465 #undef TARGET_SCHED_INIT
466 #define TARGET_SCHED_INIT sh_md_init
467
468 #undef TARGET_DELEGITIMIZE_ADDRESS
469 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
470
471 #undef TARGET_LEGITIMIZE_ADDRESS
472 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
473
474 #undef TARGET_CAN_FOLLOW_JUMP
475 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
476
477 #undef TARGET_MS_BITFIELD_LAYOUT_P
478 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
479
480 #undef TARGET_INIT_BUILTINS
481 #define TARGET_INIT_BUILTINS sh_init_builtins
482 #undef TARGET_BUILTIN_DECL
483 #define TARGET_BUILTIN_DECL sh_builtin_decl
484 #undef TARGET_EXPAND_BUILTIN
485 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
486
487 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
488 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
489
490 #undef TARGET_CANNOT_COPY_INSN_P
491 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
492 #undef TARGET_RTX_COSTS
493 #define TARGET_RTX_COSTS sh_rtx_costs
494 #undef TARGET_ADDRESS_COST
495 #define TARGET_ADDRESS_COST sh_address_cost
496 #undef TARGET_ALLOCATE_INITIAL_VALUE
497 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
498
499 #undef TARGET_MACHINE_DEPENDENT_REORG
500 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
501
502 #undef TARGET_DWARF_REGISTER_SPAN
503 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
504
505 #ifdef HAVE_AS_TLS
506 #undef TARGET_HAVE_TLS
507 #define TARGET_HAVE_TLS true
508 #endif
509
510 #undef TARGET_PROMOTE_PROTOTYPES
511 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
512 #undef TARGET_PROMOTE_FUNCTION_MODE
513 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
514
515 #undef TARGET_FUNCTION_VALUE
516 #define TARGET_FUNCTION_VALUE sh_function_value
517 #undef TARGET_FUNCTION_VALUE_REGNO_P
518 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
519 #undef TARGET_LIBCALL_VALUE
520 #define TARGET_LIBCALL_VALUE sh_libcall_value
521 #undef TARGET_STRUCT_VALUE_RTX
522 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
523 #undef TARGET_RETURN_IN_MEMORY
524 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
525
526 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
527 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
528 #undef TARGET_SETUP_INCOMING_VARARGS
529 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
530 #undef TARGET_STRICT_ARGUMENT_NAMING
531 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
532 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
533 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
534 #undef TARGET_MUST_PASS_IN_STACK
535 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
536 #undef TARGET_PASS_BY_REFERENCE
537 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
538 #undef TARGET_CALLEE_COPIES
539 #define TARGET_CALLEE_COPIES sh_callee_copies
540 #undef TARGET_ARG_PARTIAL_BYTES
541 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
542 #undef TARGET_FUNCTION_ARG
543 #define TARGET_FUNCTION_ARG sh_function_arg
544 #undef TARGET_FUNCTION_ARG_ADVANCE
545 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
546
547 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
548 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
549
550 #undef TARGET_BUILD_BUILTIN_VA_LIST
551 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
552 #undef TARGET_EXPAND_BUILTIN_VA_START
553 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
554 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
555 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
556
557 #undef TARGET_VECTOR_MODE_SUPPORTED_P
558 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
559
560 #undef TARGET_CHECK_PCH_TARGET_FLAGS
561 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
562
563 #undef TARGET_DWARF_CALLING_CONVENTION
564 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
565
566 #undef TARGET_FRAME_POINTER_REQUIRED
567 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
568
569 #undef TARGET_MODE_EMIT
570 #define TARGET_MODE_EMIT sh_emit_mode_set
571
572 #undef TARGET_MODE_NEEDED
573 #define TARGET_MODE_NEEDED sh_mode_needed
574
575 #undef TARGET_MODE_AFTER
576 #define TARGET_MODE_AFTER sh_mode_after
577
578 #undef TARGET_MODE_ENTRY
579 #define TARGET_MODE_ENTRY sh_mode_entry
580
581 #undef TARGET_MODE_EXIT
582 #define TARGET_MODE_EXIT sh_mode_exit
583
584 #undef TARGET_MODE_PRIORITY
585 #define TARGET_MODE_PRIORITY sh_mode_priority
586
587 /* Return regmode weight for insn. */
588 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
589 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
590
591 /* Return current register pressure for regmode. */
592 #define CURR_REGMODE_PRESSURE(MODE)\
593 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
594
595 #undef TARGET_ENCODE_SECTION_INFO
596 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
597
598 #undef TARGET_LRA_P
599 #define TARGET_LRA_P sh_lra_p
600
601 #undef TARGET_SECONDARY_RELOAD
602 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
603
604 #undef TARGET_PREFERRED_RELOAD_CLASS
605 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
606
607 #undef TARGET_CONDITIONAL_REGISTER_USAGE
608 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
609
610 #undef TARGET_LEGITIMATE_ADDRESS_P
611 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
612
613 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
614 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
615
616 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
617 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
618 sh_legitimize_address_displacement
619
620 #undef TARGET_TRAMPOLINE_INIT
621 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
622 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
623 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
624
625 #undef TARGET_LEGITIMATE_CONSTANT_P
626 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
627
628 #undef TARGET_CANONICALIZE_COMPARISON
629 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
630
631 #undef TARGET_LEGITIMATE_COMBINED_INSN
632 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
633
634 #undef TARGET_FIXED_CONDITION_CODE_REGS
635 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
636
637 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
638 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
639 sh_use_by_pieces_infrastructure_p
640
641 /* Machine-specific symbol_ref flags. */
642 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
643
644 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
645 is used by optabs.c atomic op expansion code as well as in sync.md. */
646 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
647 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
648
649 #undef TARGET_CANNOT_FORCE_CONST_MEM
650 #define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
651
652 #undef TARGET_HARD_REGNO_NREGS
653 #define TARGET_HARD_REGNO_NREGS sh_hard_regno_nregs
654 #undef TARGET_HARD_REGNO_MODE_OK
655 #define TARGET_HARD_REGNO_MODE_OK sh_hard_regno_mode_ok
656
657 #undef TARGET_MODES_TIEABLE_P
658 #define TARGET_MODES_TIEABLE_P sh_modes_tieable_p
659
660 #undef TARGET_CAN_CHANGE_MODE_CLASS
661 #define TARGET_CAN_CHANGE_MODE_CLASS sh_can_change_mode_class
662
663 #undef TARGET_CONSTANT_ALIGNMENT
664 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
665
666 struct gcc_target targetm = TARGET_INITIALIZER;
667
668
669 /* Information on the currently selected atomic model.
670 This is initialized in sh_option_override. */
671 static sh_atomic_model selected_atomic_model_;
672
673 const sh_atomic_model&
selected_atomic_model(void)674 selected_atomic_model (void)
675 {
676 return selected_atomic_model_;
677 }
678
679 static sh_atomic_model
parse_validate_atomic_model_option(const char * str)680 parse_validate_atomic_model_option (const char* str)
681 {
682 const char* model_names[sh_atomic_model::num_models];
683 model_names[sh_atomic_model::none] = "none";
684 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
685 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
686 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
687 model_names[sh_atomic_model::soft_imask] = "soft-imask";
688
689 const char* model_cdef_names[sh_atomic_model::num_models];
690 model_cdef_names[sh_atomic_model::none] = "NONE";
691 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
692 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
693 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
694 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
695
696 sh_atomic_model ret;
697 ret.type = sh_atomic_model::none;
698 ret.name = model_names[sh_atomic_model::none];
699 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
700 ret.strict = false;
701 ret.tcb_gbr_offset = -1;
702
703 /* Handle empty string as 'none'. */
704 if (str == NULL || *str == '\0')
705 return ret;
706
707 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
708
709 std::vector<std::string> tokens;
710 for (std::stringstream ss (str); ss.good (); )
711 {
712 tokens.push_back (std::string ());
713 std::getline (ss, tokens.back (), ',');
714 }
715
716 if (tokens.empty ())
717 err_ret ("invalid atomic model option");
718
719 /* The first token must be the atomic model name. */
720 {
721 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
722 if (tokens.front () == model_names[i])
723 {
724 ret.type = (sh_atomic_model::enum_type)i;
725 ret.name = model_names[i];
726 ret.cdef_name = model_cdef_names[i];
727 goto got_mode_name;
728 }
729
730 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
731 got_mode_name:;
732 }
733
734 /* Go through the remaining tokens. */
735 for (size_t i = 1; i < tokens.size (); ++i)
736 {
737 if (tokens[i] == "strict")
738 ret.strict = true;
739 else if (!tokens[i].compare (0, strlen ("gbr-offset="), "gbr-offset="))
740 {
741 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
742 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
743 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
744 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
745 "option", offset_str.c_str ());
746 }
747 else
748 err_ret ("unknown parameter \"%s\" in atomic model option",
749 tokens[i].c_str ());
750 }
751
752 /* Check that the selection makes sense. */
753 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
754 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
755 ret.name);
756
757 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
758 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
759
760 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
761 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
762
763 if (ret.type == sh_atomic_model::soft_tcb
764 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
765 || (ret.tcb_gbr_offset & 3) != 0))
766 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
767 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
768 ret.name);
769
770 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
771 err_ret ("cannot use atomic model %s in user mode", ret.name);
772
773 return ret;
774
775 #undef err_ret
776 }
777
778 /* Register SH specific RTL passes. */
779 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
780 const char* name);
781 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
782 const char* name);
783 static void
register_sh_passes(void)784 register_sh_passes (void)
785 {
786 /* Running the sh_treg_combine pass after ce1 generates better code when
787 comparisons are combined and reg-reg moves are introduced, because
788 reg-reg moves will be eliminated afterwards. However, there are quite
789 some cases where combine will be unable to fold comparison related insns,
790 thus for now don't do it.
791 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
792 PASS_POS_INSERT_AFTER, "ce1", 1);
793 */
794
795 /* Run sh_treg_combine pass after combine but before register allocation. */
796 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
797 PASS_POS_INSERT_AFTER, "split1", 1);
798
799 /* Run sh_treg_combine pass after register allocation and basic block
800 reordering as this sometimes creates new opportunities. */
801 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
802 PASS_POS_INSERT_AFTER, "split4", 1);
803
804 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
805 is known after a conditional branch.
806 This must be done after basic blocks and branch conditions have
807 stabilized and won't be changed by further passes. */
808 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
809 PASS_POS_INSERT_BEFORE, "sched2", 1);
810 }
811
812 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
813 various options, and do some machine dependent initialization. */
814 static void
sh_option_override(void)815 sh_option_override (void)
816 {
817 int regno;
818
819 SUBTARGET_OVERRIDE_OPTIONS;
820
821 sh_cpu = PROCESSOR_SH1;
822 assembler_dialect = 0;
823 if (TARGET_SH2)
824 sh_cpu = PROCESSOR_SH2;
825 if (TARGET_SH2E)
826 sh_cpu = PROCESSOR_SH2E;
827 if (TARGET_SH2A)
828 sh_cpu = PROCESSOR_SH2A;
829 if (TARGET_SH3)
830 sh_cpu = PROCESSOR_SH3;
831 if (TARGET_SH3E)
832 sh_cpu = PROCESSOR_SH3E;
833 if (TARGET_SH4)
834 {
835 assembler_dialect = 1;
836 sh_cpu = PROCESSOR_SH4;
837 }
838 if (TARGET_SH4A)
839 {
840 assembler_dialect = 1;
841 sh_cpu = PROCESSOR_SH4A;
842 }
843
844 /* User/priviledged mode is supported only on SH3* and SH4*.
845 Disable it for everything else. */
846 if (!TARGET_SH3 && TARGET_USERMODE)
847 TARGET_USERMODE = false;
848
849 if (! strcmp (sh_div_str, "call-div1"))
850 sh_div_strategy = SH_DIV_CALL_DIV1;
851 else if (! strcmp (sh_div_str, "call-fp") && TARGET_FPU_ANY)
852 sh_div_strategy = SH_DIV_CALL_FP;
853 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
854 sh_div_strategy = SH_DIV_CALL_TABLE;
855 else
856 {
857 /* Pick one that makes most sense for the target in general.
858 It is not much good to use different functions depending on -Os,
859 since then we'll end up with two different functions when some of
860 the code is compiled for size, and some for speed. */
861
862 /* SH4 tends to emphasize speed. */
863 if (TARGET_HARD_SH4)
864 sh_div_strategy = SH_DIV_CALL_TABLE;
865 /* These have their own way of doing things. */
866 else if (TARGET_SH2A)
867 sh_div_strategy = SH_DIV_INTRINSIC;
868 /* SH1 .. SH3 cores often go into small-footprint systems, so
869 default to the smallest implementation available. */
870 else
871 sh_div_strategy = SH_DIV_CALL_DIV1;
872 }
873
874 if (sh_divsi3_libfunc[0])
875 ; /* User supplied - leave it alone. */
876 else if (TARGET_DIVIDE_CALL_FP)
877 sh_divsi3_libfunc = "__sdivsi3_i4";
878 else if (TARGET_DIVIDE_CALL_TABLE)
879 sh_divsi3_libfunc = "__sdivsi3_i4i";
880 else
881 sh_divsi3_libfunc = "__sdivsi3";
882
883 if (sh_branch_cost == -1)
884 {
885 /* The SH1 does not have delay slots, hence we get a pipeline stall
886 at every branch. The SH4 is superscalar, so the single delay slot
887 is not sufficient to keep both pipelines filled.
888 In any case, set the default branch cost to '2', as it results in
889 slightly overall smaller code and also enables some if conversions
890 that are required for matching special T bit related insns. */
891 sh_branch_cost = 2;
892 }
893
894 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
895 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
896 TARGET_ZDCBRANCH = 1;
897
898 /* FDPIC code is a special form of PIC, and the vast majority of code
899 generation constraints that apply to PIC also apply to FDPIC, so we
900 set flag_pic to avoid the need to check TARGET_FDPIC everywhere
901 flag_pic is checked. */
902 if (TARGET_FDPIC && !flag_pic)
903 flag_pic = 2;
904
905 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
906 if (! VALID_REGISTER_P (regno))
907 sh_register_names[regno][0] = '\0';
908
909 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
910 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
911 sh_additional_register_names[regno][0] = '\0';
912
913 if (flag_pic && ! TARGET_PREFERGOT)
914 flag_no_function_cse = 1;
915
916 if (targetm.small_register_classes_for_mode_p (VOIDmode))
917 {
918 /* Never run scheduling before reload, since that can
919 break global alloc, and generates slower code anyway due
920 to the pressure on R0. */
921 /* Enable sched1 for SH4 if the user explicitly requests.
922 When sched1 is enabled, the ready queue will be reordered by
923 the target hooks if pressure is high. We can not do this for
924 PIC, SH3 and lower as they give spill failures for R0. */
925 if (!TARGET_HARD_SH4 || flag_pic)
926 flag_schedule_insns = 0;
927 /* ??? Current exception handling places basic block boundaries
928 after call_insns. It causes the high pressure on R0 and gives
929 spill failures for R0 in reload. See PR 22553 and the thread
930 on gcc-patches
931 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
932 else if (flag_exceptions)
933 {
934 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
935 warning (0, "ignoring -fschedule-insns because of exception "
936 "handling bug");
937 flag_schedule_insns = 0;
938 }
939 else if (flag_schedule_insns
940 && !global_options_set.x_flag_schedule_insns)
941 flag_schedule_insns = 0;
942 }
943
944 /* Unwind info is not correct around the CFG unless either a frame
945 pointer is present or M_A_O_A is set. Fixing this requires rewriting
946 unwind info generation to be aware of the CFG and propagating states
947 around edges. */
948 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
949 || flag_exceptions || flag_non_call_exceptions)
950 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
951 {
952 warning (0, "unwind tables currently require either a frame pointer "
953 "or -maccumulate-outgoing-args for correctness");
954 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
955 }
956
957 if (flag_unsafe_math_optimizations)
958 {
959 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
960 if (global_options_set.x_TARGET_FSCA == 0
961 && (TARGET_SH4A_FP || TARGET_FPU_SH4_300))
962 TARGET_FSCA = 1;
963
964 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
965 if (global_options_set.x_TARGET_FSRRA == 0
966 && (TARGET_SH4A_FP || TARGET_FPU_SH4_300))
967 TARGET_FSRRA = 1;
968 }
969
970 /* Allow fsrra insn only if -funsafe-math-optimizations and
971 -ffinite-math-only is enabled. */
972 TARGET_FSRRA = TARGET_FSRRA
973 && flag_unsafe_math_optimizations
974 && flag_finite_math_only;
975
976 /* If the -mieee option was not explicitly set by the user, turn it on
977 unless -ffinite-math-only was specified. See also PR 33135. */
978 if (! global_options_set.x_TARGET_IEEE)
979 TARGET_IEEE = ! flag_finite_math_only;
980
981 if (sh_fixed_range_str)
982 sh_fix_range (sh_fixed_range_str);
983
984 /* This target defaults to strict volatile bitfields. */
985 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
986 flag_strict_volatile_bitfields = 1;
987
988 sh_override_options_after_change ();
989
990 /* Parse atomic model option and make sure it is valid for the current
991 target CPU. */
992 selected_atomic_model_
993 = parse_validate_atomic_model_option (sh_atomic_model_str);
994
995 register_sh_passes ();
996 }
997
998 /* Implement targetm.override_options_after_change. */
999
1000 static void
sh_override_options_after_change(void)1001 sh_override_options_after_change (void)
1002 {
1003 /* Adjust loop, jump and function alignment values (in bytes), if those
1004 were not specified by the user using -falign-loops, -falign-jumps
1005 and -falign-functions options.
1006 32 bit alignment is better for speed, because instructions can be
1007 fetched as a pair from a longword boundary. For size use 16 bit
1008 alignment to get more compact code.
1009 Aligning all jumps increases the code size, even if it might
1010 result in slightly faster code. Thus, it is set to the smallest
1011 alignment possible if not specified by the user. */
1012 if (align_loops == 0)
1013 align_loops = optimize_size ? 2 : 4;
1014
1015 if (align_jumps == 0)
1016 align_jumps = 2;
1017 else if (align_jumps < 2)
1018 align_jumps = 2;
1019
1020 if (align_functions == 0)
1021 align_functions = optimize_size ? 2 : 4;
1022
1023 /* The linker relaxation code breaks when a function contains
1024 alignments that are larger than that at the start of a
1025 compilation unit. */
1026 if (TARGET_RELAX)
1027 {
1028 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1029
1030 /* Also take possible .long constants / mova tables into account. */
1031 if (min_align < 4)
1032 min_align = 4;
1033 if (align_functions < min_align)
1034 align_functions = min_align;
1035 }
1036 }
1037
1038 /* Print the operand address in x to the stream. */
1039 static void
sh_print_operand_address(FILE * stream,machine_mode,rtx x)1040 sh_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
1041 {
1042 switch (GET_CODE (x))
1043 {
1044 case REG:
1045 case SUBREG:
1046 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1047 break;
1048
1049 case PLUS:
1050 {
1051 rtx base = XEXP (x, 0);
1052 rtx index = XEXP (x, 1);
1053
1054 switch (GET_CODE (index))
1055 {
1056 case CONST_INT:
1057 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1058 reg_names[true_regnum (base)]);
1059 break;
1060
1061 case REG:
1062 case SUBREG:
1063 {
1064 int base_num = true_regnum (base);
1065 int index_num = true_regnum (index);
1066
1067 /* If base or index is R0, make sure that it comes first.
1068 Usually one of them will be R0, but the order might be wrong.
1069 If neither base nor index are R0 it's an error and we just
1070 pass it on to the assembler. This avoids silent wrong code
1071 bugs. */
1072 if (base_num == 0 && index_num != 0)
1073 std::swap (base_num, index_num);
1074
1075 fprintf (stream, "@(%s,%s)", reg_names[index_num],
1076 reg_names[base_num]);
1077 break;
1078 }
1079
1080 default:
1081 gcc_unreachable ();
1082 }
1083 }
1084 break;
1085
1086 case PRE_DEC:
1087 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1088 break;
1089
1090 case POST_INC:
1091 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1092 break;
1093
1094 default:
1095 x = mark_constant_pool_use (x);
1096 output_addr_const (stream, x);
1097 break;
1098 }
1099 }
1100
1101 /* Print operand x (an rtx) in assembler syntax to file stream
1102 according to modifier code.
1103
1104 '.' print a .s if insn needs delay slot
1105 ',' print LOCAL_LABEL_PREFIX
1106 '@' print trap, rte or rts depending upon pragma interruptness
1107 '#' output a nop if there is nothing to put in the delay slot
1108 ''' print likelihood suffix (/u for unlikely).
1109 '>' print branch target if -fverbose-asm
1110 'O' print a constant without the #
1111 'R' print the LSW of a dp value - changes if in little endian
1112 'S' print the MSW of a dp value - changes if in little endian
1113 'T' print the next word of a dp value - same as 'R' in big endian mode.
1114 'M' print .b / .w / .l / .s / .d suffix if operand is a MEM.
1115 'N' print 'r63' if the operand is (const_int 0).
1116 'd' print a V2SF reg as dN instead of fpN.
1117 'm' print a pair `base,offset' or `base,index', for LD and ST.
1118 'U' Likewise for {LD,ST}{HI,LO}.
1119 'V' print the position of a single bit set.
1120 'W' print the position of a single bit cleared.
1121 't' print a memory address which is a register.
1122 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1123 'o' output an operator. */
1124 static void
sh_print_operand(FILE * stream,rtx x,int code)1125 sh_print_operand (FILE *stream, rtx x, int code)
1126 {
1127 int regno;
1128 machine_mode mode;
1129
1130 switch (code)
1131 {
1132 tree trapa_attr;
1133
1134 case '.':
1135 if (final_sequence
1136 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1137 && get_attr_length (final_sequence->insn (1)))
1138 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1139 break;
1140 case ',':
1141 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1142 break;
1143 case '@':
1144 trapa_attr = lookup_attribute ("trap_exit",
1145 DECL_ATTRIBUTES (current_function_decl));
1146 if (trapa_attr)
1147 fprintf (stream, "trapa #%ld",
1148 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1149 else if (sh_cfun_interrupt_handler_p ())
1150 {
1151 if (sh_cfun_resbank_handler_p ())
1152 fprintf (stream, "resbank\n");
1153 fprintf (stream, "rte");
1154 }
1155 else
1156 fprintf (stream, "rts");
1157 break;
1158 case '#':
1159 /* Output a nop if there's nothing in the delay slot. */
1160 if (dbr_sequence_length () == 0)
1161 fprintf (stream, "\n\tnop");
1162 break;
1163 case '\'':
1164 {
1165 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1166
1167 if (note
1168 && profile_probability::from_reg_br_prob_note (XINT (note, 0))
1169 < profile_probability::even ())
1170 fputs ("/u", stream);
1171 break;
1172 }
1173 case '>':
1174 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1175 {
1176 fputs ("\t! target: ", stream);
1177 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1178 }
1179 break;
1180 case 'O':
1181 x = mark_constant_pool_use (x);
1182 output_addr_const (stream, x);
1183 break;
1184 /* N.B.: %R / %S / %T adjust memory addresses by four.
1185 While they can be used to access 64 bit parts of a larger value
1186 held in general purpose registers, that won't work with memory -
1187 neither for fp registers, since the frxx names are used. */
1188 case 'R':
1189 if (REG_P (x) || GET_CODE (x) == SUBREG)
1190 {
1191 regno = true_regnum (x);
1192 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1193 fputs (reg_names[regno], (stream));
1194 }
1195 else if (MEM_P (x))
1196 {
1197 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1198 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1199 }
1200 else
1201 {
1202 rtx sub = NULL_RTX;
1203
1204 mode = GET_MODE (x);
1205 if (mode == VOIDmode)
1206 mode = DImode;
1207 if (GET_MODE_SIZE (mode) >= 8)
1208 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1209 if (sub)
1210 sh_print_operand (stream, sub, 0);
1211 else
1212 output_operand_lossage ("invalid operand to %%R");
1213 }
1214 break;
1215 case 'S':
1216 if (REG_P (x) || GET_CODE (x) == SUBREG)
1217 {
1218 regno = true_regnum (x);
1219 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1220 fputs (reg_names[regno], (stream));
1221 }
1222 else if (MEM_P (x))
1223 {
1224 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1225 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1226 }
1227 else
1228 {
1229 rtx sub = NULL_RTX;
1230
1231 mode = GET_MODE (x);
1232 if (mode == VOIDmode)
1233 mode = DImode;
1234 if (GET_MODE_SIZE (mode) >= 8)
1235 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1236 if (sub)
1237 sh_print_operand (stream, sub, 0);
1238 else
1239 output_operand_lossage ("invalid operand to %%S");
1240 }
1241 break;
1242 case 'T':
1243 /* Next word of a double. */
1244 switch (GET_CODE (x))
1245 {
1246 case REG:
1247 fputs (reg_names[REGNO (x) + 1], (stream));
1248 break;
1249 case MEM:
1250 {
1251 machine_mode mode = GET_MODE (x);
1252 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1253 && GET_CODE (XEXP (x, 0)) != POST_INC)
1254 x = adjust_address (x, SImode, 4);
1255 sh_print_operand_address (stream, mode, XEXP (x, 0));
1256 }
1257 break;
1258 default:
1259 break;
1260 }
1261 break;
1262
1263 case 't':
1264 gcc_assert (MEM_P (x));
1265 x = XEXP (x, 0);
1266 switch (GET_CODE (x))
1267 {
1268 case REG:
1269 case SUBREG:
1270 sh_print_operand (stream, x, 0);
1271 break;
1272 default:
1273 break;
1274 }
1275 break;
1276
1277 case 'o':
1278 switch (GET_CODE (x))
1279 {
1280 case PLUS: fputs ("add", stream); break;
1281 case MINUS: fputs ("sub", stream); break;
1282 case MULT: fputs ("mul", stream); break;
1283 case DIV: fputs ("div", stream); break;
1284 case EQ: fputs ("eq", stream); break;
1285 case NE: fputs ("ne", stream); break;
1286 case GT: case LT: fputs ("gt", stream); break;
1287 case GE: case LE: fputs ("ge", stream); break;
1288 case GTU: case LTU: fputs ("gtu", stream); break;
1289 case GEU: case LEU: fputs ("geu", stream); break;
1290 default:
1291 break;
1292 }
1293 break;
1294 case 'M':
1295 if (MEM_P (x))
1296 {
1297 switch (GET_MODE (x))
1298 {
1299 case E_QImode: fputs (".b", stream); break;
1300 case E_HImode: fputs (".w", stream); break;
1301 case E_SImode: fputs (".l", stream); break;
1302 case E_SFmode: fputs (".s", stream); break;
1303 case E_DFmode: fputs (".d", stream); break;
1304 default: gcc_unreachable ();
1305 }
1306 }
1307 break;
1308
1309 case 'm':
1310 gcc_assert (MEM_P (x));
1311 x = XEXP (x, 0);
1312 /* Fall through. */
1313 case 'U':
1314 switch (GET_CODE (x))
1315 {
1316 case REG:
1317 case SUBREG:
1318 sh_print_operand (stream, x, 0);
1319 fputs (", 0", stream);
1320 break;
1321
1322 case PLUS:
1323 sh_print_operand (stream, XEXP (x, 0), 0);
1324 fputs (", ", stream);
1325 sh_print_operand (stream, XEXP (x, 1), 0);
1326 break;
1327
1328 default:
1329 gcc_unreachable ();
1330 }
1331 break;
1332
1333 case 'V':
1334 {
1335 int num = exact_log2 (INTVAL (x));
1336 gcc_assert (num >= 0);
1337 fprintf (stream, "#%d", num);
1338 }
1339 break;
1340
1341 case 'W':
1342 {
1343 int num = exact_log2 (~INTVAL (x));
1344 gcc_assert (num >= 0);
1345 fprintf (stream, "#%d", num);
1346 }
1347 break;
1348
1349 case 'd':
1350 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1351
1352 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1353 break;
1354
1355 case 'N':
1356 if (x == CONST0_RTX (GET_MODE (x)))
1357 {
1358 fprintf ((stream), "r63");
1359 break;
1360 }
1361 goto default_output;
1362 case 'u':
1363 if (CONST_INT_P (x))
1364 {
1365 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1366 break;
1367 }
1368 /* Fall through. */
1369
1370 default_output:
1371 default:
1372 regno = 0;
1373 mode = GET_MODE (x);
1374
1375 switch (GET_CODE (x))
1376 {
1377 case TRUNCATE:
1378 {
1379 rtx inner = XEXP (x, 0);
1380 int offset = 0;
1381 machine_mode inner_mode;
1382
1383 /* We might see SUBREGs with vector mode registers inside. */
1384 if (GET_CODE (inner) == SUBREG
1385 && (GET_MODE_SIZE (GET_MODE (inner))
1386 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1387 && subreg_lowpart_p (inner))
1388 inner = SUBREG_REG (inner);
1389 if (CONST_INT_P (inner))
1390 {
1391 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1392 goto default_output;
1393 }
1394 inner_mode = GET_MODE (inner);
1395 if (GET_CODE (inner) == SUBREG
1396 && (GET_MODE_SIZE (GET_MODE (inner))
1397 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1398 && REG_P (SUBREG_REG (inner)))
1399 {
1400 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1401 GET_MODE (SUBREG_REG (inner)),
1402 SUBREG_BYTE (inner),
1403 GET_MODE (inner));
1404 inner = SUBREG_REG (inner);
1405 }
1406 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1407 abort ();
1408 /* Floating point register pairs are always big endian;
1409 general purpose registers are 64 bit wide. */
1410 regno = REGNO (inner);
1411 regno = (hard_regno_nregs (regno, inner_mode)
1412 - hard_regno_nregs (regno, mode))
1413 + offset;
1414 x = inner;
1415 goto reg;
1416 }
1417 case SIGN_EXTEND:
1418 x = XEXP (x, 0);
1419 goto reg;
1420 case SUBREG:
1421 gcc_assert (SUBREG_BYTE (x) == 0
1422 && REG_P (SUBREG_REG (x)));
1423
1424 x = SUBREG_REG (x);
1425 /* Fall through. */
1426
1427 reg:
1428 case REG:
1429 regno += REGNO (x);
1430 if (FP_REGISTER_P (regno)
1431 && mode == V16SFmode)
1432 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1433 else if (FP_REGISTER_P (REGNO (x))
1434 && mode == V4SFmode)
1435 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1436 else if (REG_P (x)
1437 && mode == V2SFmode)
1438 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1439 else if (FP_REGISTER_P (REGNO (x))
1440 && GET_MODE_SIZE (mode) > 4)
1441 fprintf ((stream), "d%s", reg_names[regno] + 1);
1442 else
1443 fputs (reg_names[regno], (stream));
1444 break;
1445
1446 case MEM:
1447 output_address (GET_MODE (x), XEXP (x, 0));
1448 break;
1449
1450 default:
1451 fputc ('#', stream);
1452 output_addr_const (stream, x);
1453 break;
1454 }
1455 break;
1456 }
1457 }
1458
1459 static bool
sh_print_operand_punct_valid_p(unsigned char code)1460 sh_print_operand_punct_valid_p (unsigned char code)
1461 {
1462 return (code == '.' || code == '#' || code == '@' || code == ','
1463 || code == '$' || code == '\'' || code == '>');
1464 }
1465
1466 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1467 static bool
sh_asm_output_addr_const_extra(FILE * file,rtx x)1468 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1469 {
1470 if (GET_CODE (x) == UNSPEC)
1471 {
1472 switch (XINT (x, 1))
1473 {
1474 case UNSPEC_PIC:
1475 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1476 output_addr_const (file, XVECEXP (x, 0, 0));
1477 break;
1478 case UNSPEC_GOT:
1479 output_addr_const (file, XVECEXP (x, 0, 0));
1480 fputs ("@GOT", file);
1481 break;
1482 case UNSPEC_GOTOFF:
1483 output_addr_const (file, XVECEXP (x, 0, 0));
1484 fputs ("@GOTOFF", file);
1485 break;
1486 case UNSPEC_PLT:
1487 output_addr_const (file, XVECEXP (x, 0, 0));
1488 fputs ("@PLT", file);
1489 break;
1490 case UNSPEC_GOTPLT:
1491 output_addr_const (file, XVECEXP (x, 0, 0));
1492 fputs ("@GOTPLT", file);
1493 break;
1494 case UNSPEC_PCREL:
1495 output_addr_const (file, XVECEXP (x, 0, 0));
1496 fputs ("@PCREL", file);
1497 break;
1498 case UNSPEC_DTPOFF:
1499 output_addr_const (file, XVECEXP (x, 0, 0));
1500 fputs ("@DTPOFF", file);
1501 break;
1502 case UNSPEC_GOTTPOFF:
1503 output_addr_const (file, XVECEXP (x, 0, 0));
1504 fputs ("@GOTTPOFF", file);
1505 break;
1506 case UNSPEC_TPOFF:
1507 output_addr_const (file, XVECEXP (x, 0, 0));
1508 fputs ("@TPOFF", file);
1509 break;
1510 case UNSPEC_CALLER:
1511 {
1512 char name[32];
1513 /* LPCS stands for Label for PIC Call Site. */
1514 targetm.asm_out.generate_internal_label (name, "LPCS",
1515 INTVAL (XVECEXP (x, 0, 0)));
1516 assemble_name (file, name);
1517 }
1518 break;
1519 case UNSPEC_SYMOFF:
1520 output_addr_const (file, XVECEXP (x, 0, 0));
1521 fputc ('-', file);
1522 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1523 {
1524 fputc ('(', file);
1525 output_addr_const (file, XVECEXP (x, 0, 1));
1526 fputc (')', file);
1527 }
1528 else
1529 output_addr_const (file, XVECEXP (x, 0, 1));
1530 break;
1531 case UNSPEC_PCREL_SYMOFF:
1532 output_addr_const (file, XVECEXP (x, 0, 0));
1533 fputs ("-(", file);
1534 output_addr_const (file, XVECEXP (x, 0, 1));
1535 fputs ("-.)", file);
1536 break;
1537 case UNSPEC_GOTFUNCDESC:
1538 output_addr_const (file, XVECEXP (x, 0, 0));
1539 fputs ("@GOTFUNCDESC", file);
1540 break;
1541 case UNSPEC_GOTOFFFUNCDESC:
1542 output_addr_const (file, XVECEXP (x, 0, 0));
1543 fputs ("@GOTOFFFUNCDESC", file);
1544 break;
1545 default:
1546 return false;
1547 }
1548 return true;
1549 }
1550 else
1551 return false;
1552 }
1553
1554 /* Encode symbol attributes of a SYMBOL_REF into its
1555 SYMBOL_REF_FLAGS. */
1556 static void
sh_encode_section_info(tree decl,rtx rtl,int first)1557 sh_encode_section_info (tree decl, rtx rtl, int first)
1558 {
1559 default_encode_section_info (decl, rtl, first);
1560
1561 if (TREE_CODE (decl) == FUNCTION_DECL
1562 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1563 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1564 }
1565
1566 /* Prepare operands for a move define_expand; specifically, one of the
1567 operands must be in a register. */
1568 void
prepare_move_operands(rtx operands[],machine_mode mode)1569 prepare_move_operands (rtx operands[], machine_mode mode)
1570 {
1571 if ((mode == SImode || mode == DImode)
1572 && flag_pic
1573 && ! ((mode == Pmode || mode == ptr_mode)
1574 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1575 {
1576 rtx temp;
1577 if (SYMBOLIC_CONST_P (operands[1]))
1578 {
1579 if (MEM_P (operands[0]))
1580 operands[1] = force_reg (Pmode, operands[1]);
1581 else
1582 {
1583 temp = (!can_create_pseudo_p ()
1584 ? operands[0]
1585 : gen_reg_rtx (Pmode));
1586 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1587 }
1588 }
1589 else if (GET_CODE (operands[1]) == CONST
1590 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1591 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1592 {
1593 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1594 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1595 mode, temp);
1596 operands[1] = expand_binop (mode, add_optab, temp,
1597 XEXP (XEXP (operands[1], 0), 1),
1598 (!can_create_pseudo_p ()
1599 ? temp
1600 : gen_reg_rtx (Pmode)),
1601 0, OPTAB_LIB_WIDEN);
1602 }
1603 }
1604
1605 if (! reload_in_progress && ! reload_completed)
1606 {
1607 /* Copy the source to a register if both operands aren't registers. */
1608 if (! register_operand (operands[0], mode)
1609 && ! register_operand (operands[1], mode))
1610 operands[1] = copy_to_mode_reg (mode, operands[1]);
1611
1612 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1613 {
1614 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1615 except that we can't use that function because it is static. */
1616 rtx new_rtx = change_address (operands[0], mode, 0);
1617 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1618 operands[0] = new_rtx;
1619 }
1620
1621 /* This case can happen while generating code to move the result
1622 of a library call to the target. Reject `st r0,@(rX,rY)' because
1623 reload will fail to find a spill register for rX, since r0 is already
1624 being used for the source. */
1625 else if (refers_to_regno_p (R0_REG, operands[1])
1626 && MEM_P (operands[0])
1627 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1628 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1629 operands[1] = copy_to_mode_reg (mode, operands[1]);
1630
1631 /* When the displacement addressing is used, RA will assign r0 to
1632 the pseudo register operand for the QI/HImode load/store.
1633 This tends to make a long live range for R0 and might cause
1634 anomalous register spills in some case with LRA. See PR
1635 target/55212.
1636 We split possible load/store to two move insns via r0 so as to
1637 shorten R0 live range. It will make some codes worse but will
1638 win on average for LRA.
1639 Also when base+index addressing is used and the index term is
1640 a subreg, LRA assumes that more hard registers can be available
1641 in some situation. It isn't the case for SH in the problematic
1642 case. We can pre-allocate R0 for that index term to avoid
1643 the issue. See PR target/66591. */
1644 else if (sh_lra_p ()
1645 && ! TARGET_SH2A
1646 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1647 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1648 {
1649 bool load_p = REG_P (operands[0]);
1650 rtx reg = operands[load_p ? 0 : 1];
1651 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1652
1653 if ((mode == QImode || mode == HImode)
1654 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1655 && GET_CODE (adr) == PLUS
1656 && REG_P (XEXP (adr, 0))
1657 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1658 && CONST_INT_P (XEXP (adr, 1))
1659 && INTVAL (XEXP (adr, 1)) != 0
1660 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1661 {
1662 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1663 emit_move_insn (r0_rtx, operands[1]);
1664 operands[1] = r0_rtx;
1665 }
1666 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1667 && GET_CODE (adr) == PLUS
1668 && REG_P (XEXP (adr, 0))
1669 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1670 && SUBREG_P (XEXP (adr, 1))
1671 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1672 {
1673 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1674 emit_move_insn (r0_rtx, XEXP (adr, 1));
1675 XEXP (adr, 1) = r0_rtx;
1676 }
1677 }
1678 }
1679
1680 if (mode == Pmode || mode == ptr_mode)
1681 {
1682 rtx op0 = operands[0];
1683 rtx op1 = operands[1];
1684 rtx opc;
1685 if (GET_CODE (op1) == CONST
1686 && GET_CODE (XEXP (op1, 0)) == PLUS
1687 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1688 != TLS_MODEL_NONE))
1689 {
1690 opc = XEXP (XEXP (op1, 0), 1);
1691 op1 = XEXP (XEXP (op1, 0), 0);
1692 }
1693 else
1694 opc = NULL_RTX;
1695
1696 enum tls_model tls_kind;
1697
1698 if (! reload_in_progress && ! reload_completed
1699 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1700 {
1701 rtx tga_op1, tga_ret, tmp, tmp2;
1702
1703 if (! flag_pic
1704 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1705 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1706 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1707 {
1708 static int got_labelno;
1709 /* Don't schedule insns for getting GOT address when
1710 the first scheduling is enabled, to avoid spill
1711 failures for R0. */
1712 if (flag_schedule_insns)
1713 emit_insn (gen_blockage ());
1714 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1715 emit_use (gen_rtx_REG (SImode, PIC_REG));
1716 if (flag_schedule_insns)
1717 emit_insn (gen_blockage ());
1718 }
1719
1720 switch (tls_kind)
1721 {
1722 case TLS_MODEL_GLOBAL_DYNAMIC:
1723 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1724 if (TARGET_FDPIC)
1725 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1726 sh_get_fdpic_reg_initial_val ());
1727 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1728 tmp = gen_reg_rtx (Pmode);
1729 emit_move_insn (tmp, tga_ret);
1730 op1 = tmp;
1731 break;
1732
1733 case TLS_MODEL_LOCAL_DYNAMIC:
1734 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1735 if (TARGET_FDPIC)
1736 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1737 sh_get_fdpic_reg_initial_val ());
1738 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1739
1740 tmp = gen_reg_rtx (Pmode);
1741 emit_move_insn (tmp, tga_ret);
1742
1743 if (register_operand (op0, Pmode))
1744 tmp2 = op0;
1745 else
1746 tmp2 = gen_reg_rtx (Pmode);
1747
1748 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1749 op1 = tmp2;
1750 break;
1751
1752 case TLS_MODEL_INITIAL_EXEC:
1753 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1754 tmp = gen_sym2GOTTPOFF (op1);
1755 if (TARGET_FDPIC)
1756 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1757 sh_get_fdpic_reg_initial_val ());
1758 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1759 op1 = tga_op1;
1760 break;
1761
1762 case TLS_MODEL_LOCAL_EXEC:
1763 tmp2 = gen_reg_rtx (Pmode);
1764 emit_insn (gen_store_gbr (tmp2));
1765 tmp = gen_reg_rtx (Pmode);
1766 emit_insn (gen_symTPOFF2reg (tmp, op1));
1767
1768 if (register_operand (op0, Pmode))
1769 op1 = op0;
1770 else
1771 op1 = gen_reg_rtx (Pmode);
1772
1773 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1774 break;
1775
1776 default:
1777 gcc_unreachable ();
1778 }
1779 if (opc)
1780 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1781 operands[1] = op1;
1782 }
1783 }
1784
1785 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
1786 {
1787 rtx base, offset;
1788 split_const (operands[1], &base, &offset);
1789
1790 if (GET_CODE (base) == SYMBOL_REF
1791 && !offset_within_block_p (base, INTVAL (offset)))
1792 {
1793 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
1794 emit_move_insn (tmp, base);
1795 if (!arith_operand (offset, mode))
1796 offset = force_reg (mode, offset);
1797 emit_insn (gen_add3_insn (operands[0], tmp, offset));
1798 }
1799 }
1800 }
1801
1802 /* Implement the canonicalize_comparison target hook for the combine
1803 pass. For the target hook this function is invoked via
1804 sh_canonicalize_comparison. This function is also re-used to
1805 canonicalize comparisons in cbranch pattern expanders. */
1806 static void
sh_canonicalize_comparison(enum rtx_code & cmp,rtx & op0,rtx & op1,machine_mode mode,bool op0_preserve_value)1807 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1808 machine_mode mode,
1809 bool op0_preserve_value)
1810 {
1811 /* When invoked from within the combine pass the mode is not specified,
1812 so try to get it from one of the operands. */
1813 if (mode == VOIDmode)
1814 mode = GET_MODE (op0);
1815 if (mode == VOIDmode)
1816 mode = GET_MODE (op1);
1817
1818 // We need to have a mode to do something useful here.
1819 if (mode == VOIDmode)
1820 return;
1821
1822 // Currently, we don't deal with floats here.
1823 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1824 return;
1825
1826 // Make sure that the constant operand is the second operand.
1827 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1828 {
1829 if (op0_preserve_value)
1830 return;
1831
1832 std::swap (op0, op1);
1833 cmp = swap_condition (cmp);
1834 }
1835
1836 if (CONST_INT_P (op1))
1837 {
1838 /* Try to adjust the constant operand in such a way that available
1839 comparison insns can be utilized better and the constant can be
1840 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1841 constant pool. */
1842 const HOST_WIDE_INT val = INTVAL (op1);
1843
1844 /* x > -1 --> x >= 0
1845 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1846 x <= -1 --> x < 0
1847 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1848 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1849 {
1850 cmp = cmp == GT ? GE : LT;
1851 op1 = gen_int_mode (val + 1, mode);
1852 }
1853
1854 /* x >= 1 --> x > 0
1855 x >= 0x80 --> x > 0x7F
1856 x < 1 --> x <= 0
1857 x < 0x80 --> x <= 0x7F */
1858 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1859 {
1860 cmp = cmp == GE ? GT : LE;
1861 op1 = gen_int_mode (val - 1, mode);
1862 }
1863
1864 /* unsigned x >= 1 --> x != 0
1865 unsigned x < 1 --> x == 0 */
1866 else if (val == 1 && (cmp == GEU || cmp == LTU))
1867 {
1868 cmp = cmp == GEU ? NE : EQ;
1869 op1 = CONST0_RTX (mode);
1870 }
1871
1872 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1873 unsigned x < 0x80 --> unsigned x < 0x7F */
1874 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1875 {
1876 cmp = cmp == GEU ? GTU : LEU;
1877 op1 = gen_int_mode (val - 1, mode);
1878 }
1879
1880 /* unsigned x > 0 --> x != 0
1881 unsigned x <= 0 --> x == 0 */
1882 else if (val == 0 && (cmp == GTU || cmp == LEU))
1883 cmp = cmp == GTU ? NE : EQ;
1884
1885 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1886 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1887 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1888 && val == 0x7FFFFFFF)
1889 {
1890 cmp = cmp == GTU ? LT : GE;
1891 op1 = const0_rtx;
1892 }
1893
1894 /* unsigned x >= 0x80000000 --> signed x < 0
1895 unsigned x < 0x80000000 --> signed x >= 0 */
1896 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1897 && (unsigned HOST_WIDE_INT)val
1898 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1899 {
1900 cmp = cmp == GEU ? LT : GE;
1901 op1 = const0_rtx;
1902 }
1903 }
1904 }
1905
1906 /* This function implements the canonicalize_comparison target hook.
1907 This wrapper around the internally used sh_canonicalize_comparison
1908 function is needed to do the enum rtx_code <-> int conversion.
1909 Target hooks cannot use enum rtx_code in its definition. */
1910 static void
sh_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)1911 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1912 bool op0_preserve_value)
1913 {
1914 enum rtx_code tmp_code = (enum rtx_code)*code;
1915 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1916 VOIDmode, op0_preserve_value);
1917 *code = (int)tmp_code;
1918 }
1919
1920 /* This function implements the legitimate_combined_insn target hook,
1921 which the combine pass uses to early reject combined insns, before
1922 it tries to recog the insn and determine its cost. */
1923 static bool
sh_legitimate_combined_insn(rtx_insn * insn)1924 sh_legitimate_combined_insn (rtx_insn* insn)
1925 {
1926 /* Reject combinations of memory loads and zero extensions, as these
1927 interfere with other combine patterns such as zero extracts and bit
1928 tests. The SH2A movu.{b|w} insns are formed later in the
1929 'sh_optimize_extu_exts' pass after combine/split1. */
1930 rtx p = PATTERN (insn);
1931 if (GET_CODE (p) == SET
1932 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
1933 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
1934 && MEM_P (XEXP (XEXP (p, 1), 0)))
1935 return false;
1936
1937 return true;
1938 }
1939
1940 bool
sh_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)1941 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1942 {
1943 *p1 = T_REG;
1944 *p2 = INVALID_REGNUM;
1945 return true;
1946 }
1947
1948 /* Try to calculate the branch distance of a conditional branch in bytes.
1949
1950 FIXME: Because of PR 59189 we can't use the CFG here. Instead just
1951 walk from this insn into the next (fall-through) basic block and see if
1952 we hit the label. */
1953 unsigned int
sh_cbranch_distance(rtx_insn * _cbranch_insn,unsigned int max_dist)1954 sh_cbranch_distance (rtx_insn* _cbranch_insn, unsigned int max_dist)
1955 {
1956 rtx_jump_insn* cbranch_insn = safe_as_a<rtx_jump_insn*> (_cbranch_insn);
1957
1958 if (dump_file)
1959 {
1960 fprintf (dump_file, "sh_cbranch_distance insn = \n");
1961 print_rtl_single (dump_file, cbranch_insn);
1962 }
1963
1964 unsigned int dist = 0;
1965
1966 for (rtx_insn* i = next_nonnote_insn (cbranch_insn);
1967 i != NULL && dist < max_dist; i = next_nonnote_insn (i))
1968 {
1969 const unsigned int i_len = get_attr_length (i);
1970 dist += i_len;
1971
1972 if (dump_file)
1973 fprintf (dump_file, " insn %d length = %u dist = %u\n",
1974 INSN_UID (i), i_len, dist);
1975
1976 if (rtx_code_label* l = dyn_cast<rtx_code_label*> (i))
1977 {
1978 if (l == cbranch_insn->jump_target ())
1979 {
1980 if (dump_file)
1981 fprintf (dump_file, " cbranch dist = %u\n", dist);
1982 return dist;
1983 }
1984 break;
1985 }
1986 }
1987
1988 if (dump_file)
1989 fprintf (dump_file, " cbranch dist = unknown\n");
1990
1991 return unknown_cbranch_distance;
1992 }
1993
1994 enum rtx_code
prepare_cbranch_operands(rtx * operands,machine_mode mode,enum rtx_code comparison)1995 prepare_cbranch_operands (rtx *operands, machine_mode mode,
1996 enum rtx_code comparison)
1997 {
1998 gcc_assert (can_create_pseudo_p ());
1999
2000 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2001 comparison = GET_CODE (operands[0]);
2002
2003 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2004 mode, false);
2005
2006 rtx op1 = operands[1];
2007 operands[1] = force_reg (mode, op1);
2008
2009 /* When we are handling DImode comparisons, we want to keep constants so
2010 that we can optimize the component comparisons; however, memory loads
2011 are better issued as a whole so that they can be scheduled well.
2012 SImode equality comparisons allow I08 constants, but only when they
2013 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2014 into a register, that register might as well be r0, and we allow the
2015 constant. If it is already in a register, this is likely to be
2016 allocated to a different hard register, thus we load the constant into
2017 a register unless it is zero. */
2018 if (!REG_P (operands[2])
2019 && (!CONST_INT_P (operands[2])
2020 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2021 && ((comparison != EQ && comparison != NE)
2022 || (REG_P (op1) && REGNO (op1) != R0_REG)
2023 || !satisfies_constraint_I08 (operands[2])))))
2024 operands[2] = force_reg (mode, operands[2]);
2025
2026 return comparison;
2027 }
2028
2029 static void
expand_cbranchsi4(rtx * operands,enum rtx_code comparison,profile_probability probability)2030 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison,
2031 profile_probability probability)
2032 {
2033 rtx (*branch_expander) (rtx) = gen_branch_true;
2034 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2035 switch (comparison)
2036 {
2037 case NE: case LT: case LE: case LTU: case LEU:
2038 comparison = reverse_condition (comparison);
2039 branch_expander = gen_branch_false;
2040 default: ;
2041 }
2042 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2043 gen_rtx_fmt_ee (comparison, SImode,
2044 operands[1], operands[2])));
2045 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2046 if (probability.initialized_p ())
2047 add_reg_br_prob_note (jump, probability);
2048 }
2049
2050 void
expand_cbranchsi4(rtx * operands,enum rtx_code comparison)2051 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison)
2052 {
2053 expand_cbranchsi4 (operands, comparison,
2054 profile_probability::uninitialized ());
2055 }
2056
2057 /* ??? How should we distribute probabilities when more than one branch
2058 is generated. So far we only have some ad-hoc observations:
2059 - If the operands are random, they are likely to differ in both parts.
2060 - If comparing items in a hash chain, the operands are random or equal;
2061 operation should be EQ or NE.
2062 - If items are searched in an ordered tree from the root, we can expect
2063 the highpart to be unequal about half of the time; operation should be
2064 an inequality comparison, operands non-constant, and overall probability
2065 about 50%. Likewise for quicksort.
2066 - Range checks will be often made against constants. Even if we assume for
2067 simplicity an even distribution of the non-constant operand over a
2068 sub-range here, the same probability could be generated with differently
2069 wide sub-ranges - as long as the ratio of the part of the subrange that
2070 is before the threshold to the part that comes after the threshold stays
2071 the same. Thus, we can't really tell anything here;
2072 assuming random distribution is at least simple.
2073 */
2074 bool
expand_cbranchdi4(rtx * operands,enum rtx_code comparison)2075 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2076 {
2077 enum rtx_code msw_taken, msw_skip, lsw_taken;
2078 rtx_code_label *skip_label = NULL;
2079 rtx op1h, op1l, op2h, op2l;
2080 int num_branches;
2081 profile_probability prob, rev_prob;
2082 profile_probability msw_taken_prob = profile_probability::uninitialized (),
2083 msw_skip_prob = profile_probability::uninitialized (),
2084 lsw_taken_prob = profile_probability::uninitialized ();
2085
2086 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2087 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2088 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2089 op1l = gen_lowpart (SImode, operands[1]);
2090 op2l = gen_lowpart (SImode, operands[2]);
2091 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2092 prob = split_branch_probability;
2093 rev_prob = prob.invert ();
2094 switch (comparison)
2095 {
2096 case EQ:
2097 msw_skip = NE;
2098 lsw_taken = EQ;
2099 if (prob.initialized_p ())
2100 {
2101 /* FIXME: This is not optimal. We do not really know the probablity
2102 that values differ by MCW only, but we should probably distribute
2103 probabilities more evenly. */
2104 msw_skip_prob = rev_prob;
2105 lsw_taken_prob = prob > profile_probability::never ()
2106 ? profile_probability::guessed_always ()
2107 : profile_probability::guessed_never ();
2108 }
2109 break;
2110 case NE:
2111 msw_taken = NE;
2112 msw_taken_prob = prob;
2113 lsw_taken = NE;
2114 lsw_taken_prob = profile_probability::guessed_never ();
2115 break;
2116 case GTU: case GT:
2117 msw_taken = comparison;
2118 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2119 break;
2120 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2121 msw_skip = swap_condition (msw_taken);
2122 lsw_taken = GTU;
2123 break;
2124 case GEU: case GE:
2125 if (op2l == CONST0_RTX (SImode))
2126 msw_taken = comparison;
2127 else
2128 {
2129 msw_taken = comparison == GE ? GT : GTU;
2130 msw_skip = swap_condition (msw_taken);
2131 lsw_taken = GEU;
2132 }
2133 break;
2134 case LTU: case LT:
2135 msw_taken = comparison;
2136 if (op2l == CONST0_RTX (SImode))
2137 break;
2138 msw_skip = swap_condition (msw_taken);
2139 lsw_taken = LTU;
2140 break;
2141 case LEU: case LE:
2142 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2143 msw_taken = comparison;
2144 else
2145 {
2146 lsw_taken = LEU;
2147 if (comparison == LE)
2148 msw_taken = LT;
2149 else if (op2h != CONST0_RTX (SImode))
2150 msw_taken = LTU;
2151 else
2152 {
2153 msw_skip = swap_condition (LTU);
2154 break;
2155 }
2156 msw_skip = swap_condition (msw_taken);
2157 }
2158 break;
2159 default: return false;
2160 }
2161 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2162 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2163 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2164 if (comparison != EQ && comparison != NE && num_branches > 1)
2165 {
2166 if (!CONSTANT_P (operands[2])
2167 && prob.initialized_p ()
2168 && prob.to_reg_br_prob_base () >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2169 && prob.to_reg_br_prob_base () <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2170 {
2171 msw_taken_prob = prob.apply_scale (1, 2);
2172 msw_skip_prob = rev_prob.apply_scale (REG_BR_PROB_BASE,
2173 rev_prob.to_reg_br_prob_base ()
2174 + REG_BR_PROB_BASE);
2175 lsw_taken_prob = prob;
2176 }
2177 else
2178 {
2179 msw_taken_prob = prob;
2180 msw_skip_prob = profile_probability::guessed_always ();
2181 /* ??? If we have a constant op2h, should we use that when
2182 calculating lsw_taken_prob? */
2183 lsw_taken_prob = prob;
2184 }
2185 }
2186 operands[1] = op1h;
2187 operands[2] = op2h;
2188
2189 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2190 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2191 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2192 {
2193 rtx taken_label = operands[3];
2194
2195 /* Operands were possibly modified, but msw_skip doesn't expect this.
2196 Always use the original ones. */
2197 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2198 {
2199 operands[1] = op1h;
2200 operands[2] = op2h;
2201 }
2202
2203 operands[3] = skip_label = gen_label_rtx ();
2204 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2205 operands[3] = taken_label;
2206 }
2207 operands[1] = op1l;
2208 operands[2] = op2l;
2209 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2210 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2211 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2212 emit_label (skip_label);
2213 return true;
2214 }
2215
2216 /* Given an operand, return 1 if the evaluated operand plugged into an
2217 if_then_else will result in a branch_true, 0 if branch_false, or
2218 -1 if neither nor applies. The truth table goes like this:
2219
2220 op | cmpval | code | result
2221 ---------+--------+---------+--------------------
2222 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2223 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2224 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2225 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2226 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2227 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2228 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2229 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2230 int
sh_eval_treg_value(rtx op)2231 sh_eval_treg_value (rtx op)
2232 {
2233 if (t_reg_operand (op, GET_MODE (op)))
2234 return 1;
2235 if (negt_reg_operand (op, GET_MODE (op)))
2236 return 0;
2237
2238 rtx_code code = GET_CODE (op);
2239 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2240 return -1;
2241
2242 int cmpop = code == EQ ? 1 : 0;
2243 int cmpval = INTVAL (XEXP (op, 1));
2244 if (cmpval != 0 && cmpval != 1)
2245 return -1;
2246
2247 int t;
2248 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2249 t = 0;
2250 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2251 t = 1;
2252 else
2253 return -1;
2254
2255 return t ^ (cmpval == cmpop);
2256 }
2257
2258 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2259 of floating-point comparisons. */
2260 static void
sh_emit_set_t_insn(rtx insn,machine_mode mode)2261 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2262 {
2263 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2264 && GET_CODE (insn) != PARALLEL)
2265 {
2266 insn = gen_rtx_PARALLEL (VOIDmode,
2267 gen_rtvec (3, insn,
2268 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2269 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2270 }
2271 emit_insn (insn);
2272 }
2273
2274 /* Prepare the operands for an scc instruction; make sure that the
2275 compare has been done and the result is in T_REG. */
2276 void
sh_emit_scc_to_t(enum rtx_code code,rtx op0,rtx op1)2277 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2278 {
2279 rtx t_reg = get_t_reg_rtx ();
2280 enum rtx_code oldcode = code;
2281
2282 /* First need a compare insn. */
2283 switch (code)
2284 {
2285 case NE:
2286 /* It isn't possible to handle this case. */
2287 gcc_unreachable ();
2288 case LT:
2289 code = GT;
2290 break;
2291 case LE:
2292 code = GE;
2293 break;
2294 case LTU:
2295 code = GTU;
2296 break;
2297 case LEU:
2298 code = GEU;
2299 break;
2300 default:
2301 break;
2302 }
2303 if (code != oldcode)
2304 std::swap (op0, op1);
2305
2306 machine_mode mode = GET_MODE (op0);
2307 if (mode == VOIDmode)
2308 mode = GET_MODE (op1);
2309
2310 op0 = force_reg (mode, op0);
2311 if ((code != EQ && code != NE
2312 && (op1 != const0_rtx
2313 || code == GTU || code == GEU || code == LTU || code == LEU))
2314 || (mode == DImode && op1 != const0_rtx)
2315 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2316 op1 = force_reg (mode, op1);
2317
2318 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2319 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2320 mode);
2321 }
2322
2323 /* Called from the md file, set up the operands of a compare instruction. */
2324 void
sh_emit_compare_and_branch(rtx * operands,machine_mode mode)2325 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2326 {
2327 enum rtx_code code = GET_CODE (operands[0]);
2328 enum rtx_code branch_code;
2329 rtx op0 = operands[1];
2330 rtx op1 = operands[2];
2331 rtx insn;
2332 bool need_ccmpeq = false;
2333
2334 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2335 {
2336 op0 = force_reg (mode, op0);
2337 op1 = force_reg (mode, op1);
2338 }
2339 else
2340 {
2341 if (code != EQ || mode == DImode)
2342 {
2343 /* Force args into regs, since we can't use constants here. */
2344 op0 = force_reg (mode, op0);
2345 if (op1 != const0_rtx || code == GTU || code == GEU)
2346 op1 = force_reg (mode, op1);
2347 }
2348 }
2349
2350 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2351 {
2352 if (code == LT
2353 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2354 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2355 {
2356 std::swap (op0, op1);
2357 code = swap_condition (code);
2358 }
2359
2360 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2361 if (code == GE)
2362 {
2363 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2364 need_ccmpeq = true;
2365 code = GT;
2366 }
2367
2368 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2369 to EQ/GT respectively. */
2370 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2371 }
2372
2373 switch (code)
2374 {
2375 case EQ:
2376 case GT:
2377 case GE:
2378 case GTU:
2379 case GEU:
2380 branch_code = code;
2381 break;
2382 case NE:
2383 case LT:
2384 case LE:
2385 case LTU:
2386 case LEU:
2387 branch_code = reverse_condition (code);
2388 break;
2389 default:
2390 gcc_unreachable ();
2391 }
2392
2393 insn = gen_rtx_SET (get_t_reg_rtx (),
2394 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2395
2396 sh_emit_set_t_insn (insn, mode);
2397 if (need_ccmpeq)
2398 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2399
2400 if (branch_code == code)
2401 emit_jump_insn (gen_branch_true (operands[3]));
2402 else
2403 emit_jump_insn (gen_branch_false (operands[3]));
2404 }
2405
2406 void
sh_emit_compare_and_set(rtx * operands,machine_mode mode)2407 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2408 {
2409 enum rtx_code code = GET_CODE (operands[1]);
2410 rtx op0 = operands[2];
2411 rtx op1 = operands[3];
2412 rtx_code_label *lab = NULL;
2413 bool invert = false;
2414
2415 op0 = force_reg (mode, op0);
2416 if ((code != EQ && code != NE
2417 && (op1 != const0_rtx
2418 || code == GTU || code == GEU || code == LTU || code == LEU))
2419 || (mode == DImode && op1 != const0_rtx)
2420 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2421 op1 = force_reg (mode, op1);
2422
2423 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2424 {
2425 if (code == LT || code == LE)
2426 {
2427 std::swap (op0, op1);
2428 code = swap_condition (code);
2429 }
2430 if (code == GE)
2431 {
2432 if (TARGET_IEEE)
2433 {
2434 lab = gen_label_rtx ();
2435 sh_emit_scc_to_t (EQ, op0, op1);
2436 emit_jump_insn (gen_branch_true (lab));
2437 code = GT;
2438 }
2439 else
2440 {
2441 code = LT;
2442 invert = true;
2443 }
2444 }
2445 }
2446
2447 if (code == NE)
2448 {
2449 code = EQ;
2450 invert = true;
2451 }
2452
2453 sh_emit_scc_to_t (code, op0, op1);
2454 if (lab)
2455 emit_label (lab);
2456 if (invert)
2457 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2458 else
2459 emit_move_insn (operands[0], get_t_reg_rtx ());
2460 }
2461
2462 /* Functions to output assembly code. */
2463
2464 /* Return a sequence of instructions to perform DI or DF move.
2465
2466 Since the SH cannot move a DI or DF in one instruction, we have
2467 to take care when we see overlapping source and dest registers. */
2468 const char *
output_movedouble(rtx insn ATTRIBUTE_UNUSED,rtx operands[],machine_mode mode)2469 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2470 machine_mode mode)
2471 {
2472 rtx dst = operands[0];
2473 rtx src = operands[1];
2474
2475 if (MEM_P (dst)
2476 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2477 return "mov.l %T1,%0" "\n"
2478 " mov.l %1,%0";
2479
2480 if (register_operand (dst, mode)
2481 && register_operand (src, mode))
2482 {
2483 if (REGNO (src) == MACH_REG)
2484 return "sts mach,%S0" "\n"
2485 " sts macl,%R0";
2486
2487 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2488 when mov.d r1,r0 do r1->r0 then r2->r1. */
2489 if (REGNO (src) + 1 == REGNO (dst))
2490 return "mov %T1,%T0" "\n"
2491 " mov %1,%0";
2492 else
2493 return "mov %1,%0" "\n"
2494 " mov %T1,%T0";
2495 }
2496 else if (CONST_INT_P (src))
2497 {
2498 if (INTVAL (src) < 0)
2499 output_asm_insn ("mov #-1,%S0", operands);
2500 else
2501 output_asm_insn ("mov #0,%S0", operands);
2502
2503 return "mov %1,%R0";
2504 }
2505 else if (MEM_P (src))
2506 {
2507 int ptrreg = -1;
2508 int dreg = REGNO (dst);
2509 rtx inside = XEXP (src, 0);
2510
2511 switch (GET_CODE (inside))
2512 {
2513 case REG:
2514 ptrreg = REGNO (inside);
2515 break;
2516
2517 case SUBREG:
2518 ptrreg = subreg_regno (inside);
2519 break;
2520
2521 case PLUS:
2522 ptrreg = REGNO (XEXP (inside, 0));
2523 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2524 an offsettable address. Unfortunately, offsettable addresses use
2525 QImode to check the offset, and a QImode offsettable address
2526 requires r0 for the other operand, which is not currently
2527 supported, so we can't use the 'o' constraint.
2528 Thus we must check for and handle r0+REG addresses here.
2529 We punt for now, since this is likely very rare. */
2530 gcc_assert (!REG_P (XEXP (inside, 1)));
2531 break;
2532
2533 case LABEL_REF:
2534 return "mov.l %1,%0" "\n"
2535 " mov.l %1+4,%T0";
2536 case POST_INC:
2537 return "mov.l %1,%0" "\n"
2538 " mov.l %1,%T0";
2539 default:
2540 gcc_unreachable ();
2541 }
2542
2543 /* Work out the safe way to copy. Copy into the second half first. */
2544 if (dreg == ptrreg)
2545 return "mov.l %T1,%T0" "\n"
2546 " mov.l %1,%0";
2547 }
2548
2549 return "mov.l %1,%0" "\n"
2550 " mov.l %T1,%T0";
2551 }
2552
2553 /* Print an instruction which would have gone into a delay slot after
2554 another instruction, but couldn't because the other instruction expanded
2555 into a sequence where putting the slot insn at the end wouldn't work. */
2556 static void
print_slot(rtx_sequence * seq)2557 print_slot (rtx_sequence *seq)
2558 {
2559 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2560
2561 seq->insn (1)->set_deleted ();
2562 }
2563
2564 const char *
output_far_jump(rtx_insn * insn,rtx op)2565 output_far_jump (rtx_insn *insn, rtx op)
2566 {
2567 struct { rtx lab, reg, op; } this_jmp;
2568 rtx_code_label *braf_base_lab = NULL;
2569 const char *jump;
2570 int far;
2571 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2572 rtx_insn *prev;
2573
2574 this_jmp.lab = gen_label_rtx ();
2575
2576 if (TARGET_SH2
2577 && offset >= -32764
2578 && offset - get_attr_length (insn) <= 32766
2579 && ! CROSSING_JUMP_P (insn))
2580 {
2581 far = 0;
2582 jump = "mov.w %O0,%1" "\n"
2583 " braf %1";
2584 }
2585 else
2586 {
2587 far = 1;
2588 if (flag_pic)
2589 {
2590 if (TARGET_SH2)
2591 jump = "mov.l %O0,%1" "\n"
2592 " braf %1";
2593 else
2594 jump = "mov.l r0,@-r15" "\n"
2595 " mova %O0,r0" "\n"
2596 " mov.l @r0,%1" "\n"
2597 " add r0,%1" "\n"
2598 " mov.l @r15+,r0" "\n"
2599 " jmp @%1";
2600 }
2601 else
2602 jump = "mov.l %O0,%1" "\n"
2603 " jmp @%1";
2604 }
2605 /* If we have a scratch register available, use it. */
2606 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2607 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2608 {
2609 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2610 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2611 jump = "mov.l r1,@-r15" "\n"
2612 " mova %O0,r0" "\n"
2613 " mov.l @r0,r1" "\n"
2614 " add r1,r0" "\n"
2615 " mov.l @r15+,r1" "\n"
2616 " jmp @%1";
2617 output_asm_insn (jump, &this_jmp.lab);
2618 if (dbr_sequence_length ())
2619 print_slot (final_sequence);
2620 else
2621 output_asm_insn ("nop", 0);
2622 }
2623 else
2624 {
2625 /* Output the delay slot insn first if any. */
2626 if (dbr_sequence_length ())
2627 print_slot (final_sequence);
2628
2629 this_jmp.reg = gen_rtx_REG (SImode, 13);
2630 output_asm_insn ("mov.l r13,@-r15", 0);
2631 output_asm_insn (jump, &this_jmp.lab);
2632 output_asm_insn ("mov.l @r15+,r13", 0);
2633 }
2634 if (far && flag_pic && TARGET_SH2)
2635 {
2636 braf_base_lab = gen_label_rtx ();
2637 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2638 CODE_LABEL_NUMBER (braf_base_lab));
2639 }
2640 if (far)
2641 output_asm_insn (".align 2", 0);
2642 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2643 this_jmp.op = op;
2644 if (far && flag_pic)
2645 {
2646 if (TARGET_SH2)
2647 this_jmp.lab = braf_base_lab;
2648 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2649 }
2650 else
2651 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2652 return "";
2653 }
2654
2655 /* Local label counter, used for constants in the pool and inside
2656 pattern branches. */
2657 static int lf = 100;
2658
2659 /* Output code for ordinary branches. */
2660 const char *
output_branch(int logic,rtx_insn * insn,rtx * operands)2661 output_branch (int logic, rtx_insn *insn, rtx *operands)
2662 {
2663 switch (get_attr_length (insn))
2664 {
2665 case 6:
2666 /* This can happen if filling the delay slot has caused a forward
2667 branch to exceed its range (we could reverse it, but only
2668 when we know we won't overextend other branches; this should
2669 best be handled by relaxation).
2670 It can also happen when other condbranches hoist delay slot insn
2671 from their destination, thus leading to code size increase.
2672 But the branch will still be in the range -4092..+4098 bytes. */
2673 if (! TARGET_RELAX)
2674 {
2675 int label = lf++;
2676 /* The call to print_slot will clobber the operands. */
2677 rtx op0 = operands[0];
2678
2679 /* If the instruction in the delay slot is annulled (true), then
2680 there is no delay slot where we can put it now. The only safe
2681 place for it is after the label. final will do that by default. */
2682
2683 if (final_sequence
2684 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2685 && get_attr_length (final_sequence->insn (1)))
2686 {
2687 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2688 ASSEMBLER_DIALECT ? "/" : ".", label);
2689 print_slot (final_sequence);
2690 }
2691 else
2692 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2693
2694 output_asm_insn ("bra\t%l0", &op0);
2695 fprintf (asm_out_file, "\tnop\n");
2696 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2697
2698 return "";
2699 }
2700 /* FALLTHRU */
2701 /* When relaxing, handle this like a short branch. The linker
2702 will fix it up if it still doesn't fit after relaxation. */
2703 case 2:
2704 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2705
2706 /* These are for SH2e, in which we have to account for the
2707 extra nop because of the hardware bug in annulled branches. */
2708 case 8:
2709 if (! TARGET_RELAX)
2710 {
2711 int label = lf++;
2712
2713 gcc_assert (!final_sequence
2714 || !(INSN_ANNULLED_BRANCH_P
2715 (XVECEXP (final_sequence, 0, 0))));
2716 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2717 logic ? "f" : "t",
2718 ASSEMBLER_DIALECT ? "/" : ".", label);
2719 fprintf (asm_out_file, "\tnop\n");
2720 output_asm_insn ("bra\t%l0", operands);
2721 fprintf (asm_out_file, "\tnop\n");
2722 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2723
2724 return "";
2725 }
2726 /* FALLTHRU */
2727 case 4:
2728 {
2729 char buffer[10];
2730
2731 sprintf (buffer, "b%s%ss\t%%l0",
2732 logic ? "t" : "f",
2733 ASSEMBLER_DIALECT ? "/" : ".");
2734 output_asm_insn (buffer, &operands[0]);
2735 return "nop";
2736 }
2737
2738 default:
2739 /* There should be no longer branches now - that would
2740 indicate that something has destroyed the branches set
2741 up in machine_dependent_reorg. */
2742 gcc_unreachable ();
2743 }
2744 }
2745
2746 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2747 fill in operands 9 as a label to the successor insn.
2748 We try to use jump threading where possible.
2749 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2750 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2751 follow jmp and bt, if the address is in range. */
2752 const char *
output_branchy_insn(enum rtx_code code,const char * templ,rtx_insn * insn,rtx * operands)2753 output_branchy_insn (enum rtx_code code, const char *templ,
2754 rtx_insn *insn, rtx *operands)
2755 {
2756 rtx_insn *next_insn = NEXT_INSN (insn);
2757
2758 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2759 {
2760 rtx src = SET_SRC (PATTERN (next_insn));
2761 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2762 {
2763 /* Following branch not taken */
2764 rtx_code_label *lab = gen_label_rtx ();
2765 emit_label_after (lab, next_insn);
2766 INSN_ADDRESSES_NEW (lab,
2767 INSN_ADDRESSES (INSN_UID (next_insn))
2768 + get_attr_length (next_insn));
2769 operands[9] = lab;
2770 return templ;
2771 }
2772 else
2773 {
2774 int offset = (branch_dest (next_insn)
2775 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2776 if (offset >= -252 && offset <= 258)
2777 {
2778 if (GET_CODE (src) == IF_THEN_ELSE)
2779 /* branch_true */
2780 src = XEXP (src, 1);
2781 operands[9] = src;
2782 return templ;
2783 }
2784 }
2785 }
2786 rtx_code_label *lab = gen_label_rtx ();
2787 emit_label_after (lab, insn);
2788 INSN_ADDRESSES_NEW (lab,
2789 INSN_ADDRESSES (INSN_UID (insn))
2790 + get_attr_length (insn));
2791 operands[9] = lab;
2792 return templ;
2793 }
2794
2795 const char *
output_ieee_ccmpeq(rtx_insn * insn,rtx * operands)2796 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2797 {
2798 return output_branchy_insn (NE, "bt %l9" "\n"
2799 " fcmp/eq %1,%0",
2800 insn, operands);
2801 }
2802
2803 /* Output the start of the assembler file. */
2804 static void
sh_file_start(void)2805 sh_file_start (void)
2806 {
2807 default_file_start ();
2808
2809 if (TARGET_ELF)
2810 /* We need to show the text section with the proper
2811 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2812 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2813 will complain. We can teach GAS specifically about the
2814 default attributes for our choice of text section, but
2815 then we would have to change GAS again if/when we change
2816 the text section name. */
2817 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2818 else
2819 /* Switch to the data section so that the coffsem symbol
2820 isn't in the text section. */
2821 switch_to_section (data_section);
2822
2823 if (TARGET_LITTLE_ENDIAN)
2824 fputs ("\t.little\n", asm_out_file);
2825 }
2826
2827 /* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions
2828 need to be output as pointers to function descriptors for
2829 FDPIC. */
2830
2831 static bool
sh_assemble_integer(rtx value,unsigned int size,int aligned_p)2832 sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
2833 {
2834 if (TARGET_FDPIC && size == UNITS_PER_WORD
2835 && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value))
2836 {
2837 fputs ("\t.long\t", asm_out_file);
2838 output_addr_const (asm_out_file, value);
2839 fputs ("@FUNCDESC\n", asm_out_file);
2840 return true;
2841 }
2842 return default_assemble_integer (value, size, aligned_p);
2843 }
2844
2845 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2846 static bool
unspec_caller_rtx_p(rtx pat)2847 unspec_caller_rtx_p (rtx pat)
2848 {
2849 rtx base, offset;
2850 split_const (pat, &base, &offset);
2851
2852 if (GET_CODE (base) == UNSPEC)
2853 {
2854 if (XINT (base, 1) == UNSPEC_CALLER)
2855 return true;
2856 for (int i = 0; i < XVECLEN (base, 0); i++)
2857 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2858 return true;
2859 }
2860 return false;
2861 }
2862
2863 /* Indicate that INSN cannot be duplicated. This is true for insn
2864 that generates a unique label. */
2865 static bool
sh_cannot_copy_insn_p(rtx_insn * insn)2866 sh_cannot_copy_insn_p (rtx_insn *insn)
2867 {
2868 if (!reload_completed || !flag_pic)
2869 return false;
2870
2871 if (!NONJUMP_INSN_P (insn))
2872 return false;
2873 if (asm_noperands (insn) >= 0)
2874 return false;
2875
2876 rtx pat = PATTERN (insn);
2877
2878 if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
2879 return false;
2880
2881 if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL)
2882 {
2883 rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
2884 if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0)))
2885 return true;
2886 }
2887
2888 if (GET_CODE (pat) != SET)
2889 return false;
2890 pat = SET_SRC (pat);
2891
2892 if (unspec_caller_rtx_p (pat))
2893 return true;
2894
2895 return false;
2896 }
2897
2898 /* Number of instructions used to make an arithmetic right shift by N. */
2899 static const char ashiftrt_insns[] =
2900 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2901
2902 /* Description of a logical left or right shift, when expanded to a sequence
2903 of 1/2/8/16 shifts.
2904 Notice that one bit right shifts clobber the T bit. One bit left shifts
2905 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2906 enum
2907 {
2908 ASHL_CLOBBERS_T = 1 << 0,
2909 LSHR_CLOBBERS_T = 1 << 1
2910 };
2911
2912 struct ashl_lshr_sequence
2913 {
2914 char insn_count;
2915 signed char amount[6];
2916 char clobbers_t;
2917 };
2918
2919 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2920 {
2921 { 0, { 0 }, 0 }, // 0
2922 { 1, { 1 }, LSHR_CLOBBERS_T },
2923 { 1, { 2 }, 0 },
2924 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2925 { 2, { 2, 2 }, 0 }, // 4
2926 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2927 { 3, { 2, 2, 2 }, 0 },
2928 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2929 { 1, { 8 }, 0 }, // 8
2930 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2931 { 2, { 8, 2 }, 0 },
2932 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2933 { 3, { 8, 2, 2 }, 0 }, // 12
2934 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2935 { 3, { 8, -2, 8 }, 0 },
2936 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2937 { 1, { 16 }, 0 }, // 16
2938 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2939 { 2, { 16, 2 }, 0 },
2940 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2941 { 3, { 16, 2, 2 }, 0 }, // 20
2942 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2943 { 3, { 16, -2, 8 }, 0 },
2944 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2945 { 2, { 16, 8 }, 0 }, // 24
2946 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2947 { 3, { 16, 8, 2 }, 0 },
2948 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2949 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2950 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2951 { 3, { 16, -2, 16 }, 0 },
2952
2953 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2954 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2955 However, the shift-and combiner code needs this entry here to be in
2956 terms of real shift insns. */
2957 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2958 };
2959
2960 /* Individual shift amounts for shift amounts < 16, up to three highmost
2961 bits might be clobbered. This is typically used when combined with some
2962 kind of sign or zero extension. */
2963 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
2964 {
2965 { 0, { 0 }, 0 }, // 0
2966 { 1, { 1 }, LSHR_CLOBBERS_T },
2967 { 1, { 2 }, 0 },
2968 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2969 { 2, { 2, 2 }, 0 }, // 4
2970 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2971 { 2, { 8, -2 }, 0 },
2972 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
2973 { 1, { 8 }, 0 }, // 8
2974 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2975 { 2, { 8, 2 }, 0 },
2976 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2977 { 3, { 8, 2, 2 }, 0 }, // 12
2978 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
2979 { 2, { 16, -2 }, 0 },
2980 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
2981 { 1, { 16 }, 0 }, // 16
2982 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2983 { 2, { 16, 2 }, 0 },
2984 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2985 { 3, { 16, 2, 2 }, 0 }, // 20
2986 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2987 { 3, { 16, -2, 8 }, 0 },
2988 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2989 { 2, { 16, 8 }, 0 }, // 24
2990 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2991 { 3, { 16, 8, 2 }, 0 },
2992 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2993 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2994 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2995 { 3, { 16, -2, 16 }, 0 },
2996 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2997 };
2998
2999 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3000 will clobber the T bit. */
3001 bool
sh_ashlsi_clobbers_t_reg_p(rtx shift_amount)3002 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3003 {
3004 gcc_assert (CONST_INT_P (shift_amount));
3005
3006 const int shift_amount_i = INTVAL (shift_amount) & 31;
3007
3008 /* Special case for shift count of 31: use and-rotl sequence. */
3009 if (shift_amount_i == 31)
3010 return true;
3011
3012 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3013 & ASHL_CLOBBERS_T) != 0;
3014 }
3015
3016 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3017 instructions will clobber the T bit. */
3018 bool
sh_lshrsi_clobbers_t_reg_p(rtx shift_amount)3019 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3020 {
3021 gcc_assert (CONST_INT_P (shift_amount));
3022
3023 /* For right shifts the constant might be negative. */
3024 const int shift_amount_i = std::abs (INTVAL (shift_amount)) & 31;
3025
3026 /* Special case for shift count of 31: use shll-movt sequence. */
3027 if (shift_amount_i == 31)
3028 return true;
3029
3030 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3031 & LSHR_CLOBBERS_T) != 0;
3032 }
3033
3034 /* Return true if it is potentially beneficial to use a dynamic shift
3035 instruction (shad / shar) instead of a combination of 1/2/8/16
3036 shift instructions for the specified shift count.
3037 If dynamic shifts are not available, always return false. */
3038 bool
sh_dynamicalize_shift_p(rtx count)3039 sh_dynamicalize_shift_p (rtx count)
3040 {
3041 gcc_assert (CONST_INT_P (count));
3042
3043 /* For right shifts the constant might be negative. */
3044 const int shift_amount_i = std::abs (INTVAL (count)) & 31;
3045 int insn_count;
3046
3047 /* For left and right shifts, there are shorter 2 insn sequences for
3048 shift amounts of 31. */
3049 if (shift_amount_i == 31)
3050 insn_count = 2;
3051 else
3052 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3053
3054 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3055 }
3056
3057 /* Assuming we have a value that has been sign-extended by at least one bit,
3058 can we use the ext_shift_amounts with the last shift turned to an
3059 arithmetic shift to shift it by N without data loss, and quicker than by
3060 other means? */
3061 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3062
3063 /* Return the cost of a shift. */
3064 static inline int
shiftcosts(rtx x)3065 shiftcosts (rtx x)
3066 {
3067 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3068 {
3069 if (GET_MODE (x) == DImode
3070 && CONST_INT_P (XEXP (x, 1))
3071 && INTVAL (XEXP (x, 1)) == 1)
3072 return 2;
3073
3074 /* Everything else is invalid, because there is no pattern for it. */
3075 return -1;
3076 }
3077 /* If shift by a non constant, then this will be expensive. */
3078 if (!CONST_INT_P (XEXP (x, 1)))
3079 return SH_DYNAMIC_SHIFT_COST;
3080
3081 /* Otherwise, return the true cost in instructions. Cope with out of range
3082 shift counts more or less arbitrarily. */
3083 int value = INTVAL (XEXP (x, 1)) & 31;
3084
3085 if (GET_CODE (x) == ASHIFTRT)
3086 {
3087 int cost = ashiftrt_insns[value];
3088 /* If dynamic shifts are available and profitable in this case, then we
3089 put the constant in a reg and use shad. */
3090 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3091 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3092 return cost;
3093 }
3094 else
3095 return ashl_lshr_seq[value].insn_count;
3096 }
3097
3098 /* Return the cost of an AND/XOR/IOR operation. */
3099 static inline int
and_xor_ior_costs(rtx x,int code)3100 and_xor_ior_costs (rtx x, int code)
3101 {
3102 /* On SH1-4 we have only max. SImode operations.
3103 Double the cost for modes > SImode. */
3104 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3105
3106 /* A logical operation with two registers is a single cycle
3107 instruction. */
3108 if (!CONST_INT_P (XEXP (x, 1)))
3109 return 1 * cost_scale;
3110
3111 int i = INTVAL (XEXP (x, 1));
3112
3113 /* These constants are single cycle extu.[bw] instructions. */
3114 if ((i == 0xff || i == 0xffff) && code == AND)
3115 return 1 * cost_scale;
3116 /* Constants that can be used in an instruction as an immediate are
3117 a single cycle, but this requires r0, so make it a little more
3118 expensive. */
3119 if (CONST_OK_FOR_K08 (i))
3120 return 2 * cost_scale;
3121 /* Constants that can be loaded with a mov immediate need one more cycle.
3122 This case is probably unnecessary. */
3123 if (CONST_OK_FOR_I08 (i))
3124 return 2 * cost_scale;
3125 /* Any other constant requires an additional 2 cycle pc-relative load.
3126 This case is probably unnecessary. */
3127 return 3 * cost_scale;
3128 }
3129
3130 /* Return the cost of an addition or a subtraction. */
3131 static inline int
addsubcosts(rtx x)3132 addsubcosts (rtx x)
3133 {
3134 if (GET_MODE (x) == SImode)
3135 {
3136 /* The addc or subc patterns will eventually become one or two
3137 instructions. Below are some costs for some of the patterns
3138 which combine would reject because the costs of the individual
3139 insns in the patterns are lower.
3140
3141 FIXME: It would be much easier if we had something like insn cost
3142 attributes and the cost calculation machinery used those attributes
3143 in the first place. This would eliminate redundant recog-like C
3144 code to calculate costs of complex patterns. */
3145 rtx op0 = XEXP (x, 0);
3146 rtx op1 = XEXP (x, 1);
3147
3148 if (GET_CODE (x) == PLUS)
3149 {
3150 if (GET_CODE (op0) == AND
3151 && XEXP (op0, 1) == const1_rtx
3152 && (GET_CODE (op1) == PLUS
3153 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3154 return 1;
3155
3156 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3157 && GET_CODE (op1) == LSHIFTRT
3158 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3159 return 1;
3160 }
3161 /* Let's assume that adding the result of an insns that stores into
3162 the T bit is cheap. */
3163 if (treg_set_expr (op1, SImode))
3164 return 1;
3165 if (treg_set_expr (op0, SImode))
3166 return 1;
3167 }
3168
3169 /* On SH1-4 we have only max. SImode operations.
3170 Double the cost for modes > SImode. */
3171 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3172
3173 /* Adding a register is a single cycle insn. */
3174 if (REG_P (XEXP (x, 1))
3175 || GET_CODE (XEXP (x, 1)) == SUBREG)
3176 return 1 * cost_scale;
3177
3178 /* Likewise for small constants. */
3179 if (CONST_INT_P (XEXP (x, 1))
3180 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3181 return 1 * cost_scale;
3182
3183 /* Any other constant requires a 2 cycle pc-relative load plus an
3184 addition. */
3185 return 3 * cost_scale;
3186 }
3187
3188 /* Return the cost of a multiply. */
3189 static inline int
multcosts(rtx x ATTRIBUTE_UNUSED)3190 multcosts (rtx x ATTRIBUTE_UNUSED)
3191 {
3192 if (sh_multcost >= 0)
3193 return sh_multcost;
3194
3195 if (TARGET_SH2)
3196 {
3197 /* We have a mul insn, so we can never take more than the mul and the
3198 read of the mac reg, but count more because of the latency and extra
3199 reg usage. */
3200 if (optimize_size)
3201 return 2;
3202 return 3;
3203 }
3204
3205 /* If we're aiming at small code, then just count the number of
3206 insns in a multiply call sequence. */
3207 if (optimize_size)
3208 return 5;
3209
3210 /* Otherwise count all the insns in the routine we'd be calling too. */
3211 return 20;
3212 }
3213
3214 /* Compute a (partial) cost for rtx X. Return true if the complete
3215 cost has been computed, and false if subexpressions should be
3216 scanned. In either case, *TOTAL contains the cost result. */
3217 static bool
sh_rtx_costs(rtx x,machine_mode mode ATTRIBUTE_UNUSED,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)3218 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
3219 int opno ATTRIBUTE_UNUSED,
3220 int *total, bool speed ATTRIBUTE_UNUSED)
3221 {
3222 int code = GET_CODE (x);
3223
3224 switch (code)
3225 {
3226 /* The lower-subreg pass decides whether to split multi-word regs
3227 into individual regs by looking at the cost for a SET of certain
3228 modes with the following patterns:
3229 (set (reg) (reg))
3230 (set (reg) (const_int 0))
3231 On machines that support vector-move operations a multi-word move
3232 is the same cost as individual reg move. On SH there is no
3233 vector-move, so we have to provide the correct cost in the number
3234 of move insns to load/store the reg of the mode in question. */
3235 case SET:
3236 if (sh_movt_set_dest (x) != NULL || sh_movrt_set_dest (x) != NULL)
3237 {
3238 *total = COSTS_N_INSNS (1);
3239 return true;
3240 }
3241
3242 if (register_operand (SET_DEST (x), VOIDmode)
3243 && (register_operand (SET_SRC (x), VOIDmode)
3244 || satisfies_constraint_Z (SET_SRC (x))))
3245 {
3246 const machine_mode mode = GET_MODE (SET_DEST (x));
3247 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3248 / mov_insn_size (mode, TARGET_SH2A));
3249 return true;
3250 }
3251 return false;
3252
3253 /* The cost of a mem access is mainly the cost of the address mode. */
3254 case MEM:
3255 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3256 true);
3257 return true;
3258
3259 case IF_THEN_ELSE:
3260 /* This case is required for the if_then_else negc pattern. */
3261 if (treg_set_expr (XEXP (x, 0), SImode))
3262 {
3263 *total = COSTS_N_INSNS (1);
3264 return true;
3265 }
3266 else
3267 return false;
3268
3269 /* Zero extracts of single bits are usually combine patterns for the
3270 tst insns. */
3271 case ZERO_EXTRACT:
3272 if (GET_CODE (XEXP (x, 0)) == XOR
3273 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3274 && XEXP (x, 1) == const1_rtx
3275 && CONST_INT_P (XEXP (x, 2))
3276 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3277 /* Check that the xor constaint overlaps with the extracted bit. */
3278 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3279 {
3280 *total = 1; //COSTS_N_INSNS (1);
3281 return true;
3282 }
3283
3284 /* div0s variant. */
3285 if (GET_CODE (XEXP (x, 0)) == XOR
3286 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3287 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3288 {
3289 *total = 1;
3290 return true;
3291 }
3292 return false;
3293
3294 /* The cost of a sign or zero extend depends on whether the source is a
3295 reg or a mem. In case of a mem take the address into account. */
3296 case SIGN_EXTEND:
3297 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3298 {
3299 *total = COSTS_N_INSNS (1);
3300 return true;
3301 }
3302 if (MEM_P (XEXP (x, 0)))
3303 {
3304 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3305 GET_MODE (XEXP (x, 0)),
3306 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3307 return true;
3308 }
3309 return false;
3310
3311 case ZERO_EXTEND:
3312 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3313 {
3314 *total = COSTS_N_INSNS (1);
3315 return true;
3316 }
3317 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3318 && (GET_MODE (XEXP (x, 0)) == QImode
3319 || GET_MODE (XEXP (x, 0)) == HImode))
3320 {
3321 /* Handle SH2A's movu.b and movu.w insn. */
3322 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3323 GET_MODE (XEXP (x, 0)),
3324 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3325 return true;
3326 }
3327 return false;
3328
3329 /* mems for SFmode and DFmode can be inside a parallel due to
3330 the way the fpscr is handled. */
3331 case PARALLEL:
3332 for (int i = 0; i < XVECLEN (x, 0); i++)
3333 {
3334 rtx xx = XVECEXP (x, 0, i);
3335 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3336 {
3337 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3338 GET_MODE (XEXP (xx, 0)),
3339 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3340 return true;
3341 }
3342 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3343 {
3344 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3345 GET_MODE (XEXP (xx, 1)),
3346 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3347 return true;
3348 }
3349 }
3350
3351 if (sh_1el_vec (x, VOIDmode))
3352 *total = outer_code != SET;
3353 else if (sh_rep_vec (x, VOIDmode))
3354 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3355 + (outer_code != SET));
3356 else
3357 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3358 return true;
3359
3360 case CONST_INT:
3361 if (CONST_OK_FOR_I08 (INTVAL (x)))
3362 *total = 0;
3363 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3364 && CONST_OK_FOR_K08 (INTVAL (x)))
3365 *total = 1;
3366 /* prepare_cmp_insn will force costly constants int registers before
3367 the cbranch[sd]i4 patterns can see them, so preserve potentially
3368 interesting ones not covered by I08 above. */
3369 else if (outer_code == COMPARE
3370 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3371 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3372 || INTVAL (x) == 0x7fffffff
3373 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3374 *total = 1;
3375 else
3376 *total = 8;
3377 return true;
3378
3379 case EQ:
3380 /* An and with a constant compared against zero is
3381 most likely going to be a TST #imm, R0 instruction. */
3382 if (XEXP (x, 1) == const0_rtx
3383 && ((GET_CODE (XEXP (x, 0)) == AND
3384 || (SUBREG_P (XEXP (x, 0))
3385 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND))
3386 || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT))
3387 {
3388 *total = 1;
3389 return true;
3390 }
3391
3392 else if (XEXP (x, 1) == const0_rtx
3393 && GET_CODE (XEXP (x, 0)) == AND
3394 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3395 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3396 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3397 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3398 {
3399 *total = 1;
3400 return true;
3401 }
3402 else
3403 return false;
3404
3405 case SMIN:
3406 case SMAX:
3407 /* This is most likely a clips.b or clips.w insn that is being made up
3408 by combine. */
3409 if (TARGET_SH2A
3410 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3411 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3412 && REG_P (XEXP (XEXP (x, 0), 0))
3413 && CONST_INT_P (XEXP (x, 1)))
3414 {
3415 *total = COSTS_N_INSNS (1);
3416 return true;
3417 }
3418 else
3419 return false;
3420
3421 case CONST:
3422 case LABEL_REF:
3423 case SYMBOL_REF:
3424 *total = 5;
3425 return true;
3426
3427 case CONST_DOUBLE:
3428 /* prepare_cmp_insn will force costly constants int registers before
3429 the cbranchdi4 pattern can see them, so preserve potentially
3430 interesting ones. */
3431 if (outer_code == COMPARE && GET_MODE (x) == DImode)
3432 *total = 1;
3433 else
3434 *total = 10;
3435 return true;
3436
3437 case CONST_VECTOR:
3438 /* FIXME: This looks broken. Only the last statement has any effect.
3439 Probably this could be folded with the PARALLEL case? */
3440 if (x == CONST0_RTX (GET_MODE (x)))
3441 *total = 0;
3442 else if (sh_1el_vec (x, VOIDmode))
3443 *total = outer_code != SET;
3444 if (sh_rep_vec (x, VOIDmode))
3445 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3446 + (outer_code != SET));
3447 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3448 return true;
3449
3450 case PLUS:
3451 case MINUS:
3452 *total = COSTS_N_INSNS (addsubcosts (x));
3453 return true;
3454
3455 case AND:
3456 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3457 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3458 {
3459 *total = COSTS_N_INSNS (1);
3460 return true;
3461 }
3462 /* Fall through. */
3463
3464 case XOR:
3465 case IOR:
3466 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3467 return true;
3468
3469 case MULT:
3470 *total = COSTS_N_INSNS (multcosts (x));
3471 return true;
3472
3473 case LT:
3474 case GE:
3475 /* div0s sign comparison. */
3476 if (GET_CODE (XEXP (x, 0)) == XOR
3477 && REG_P ((XEXP (XEXP (x, 0), 0)))
3478 && REG_P ((XEXP (XEXP (x, 0), 1)))
3479 && satisfies_constraint_Z (XEXP (x, 1)))
3480 {
3481 *total = COSTS_N_INSNS (1);
3482 return true;
3483 }
3484 else
3485 return false;
3486
3487 case LSHIFTRT:
3488 /* div0s sign comparison. */
3489 if (GET_CODE (XEXP (x, 0)) == XOR
3490 && REG_P ((XEXP (XEXP (x, 0), 0)))
3491 && REG_P ((XEXP (XEXP (x, 0), 1)))
3492 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3493 {
3494 *total = COSTS_N_INSNS (1);
3495 return true;
3496 }
3497 /* FALLTHRU */
3498 case ASHIFT:
3499 case ASHIFTRT:
3500 {
3501 int cost = shiftcosts (x);
3502 if (cost < 0)
3503 return false;
3504 *total = COSTS_N_INSNS (cost);
3505 return true;
3506 }
3507
3508 case DIV:
3509 case UDIV:
3510 case MOD:
3511 case UMOD:
3512 *total = COSTS_N_INSNS (20);
3513 return true;
3514
3515 case FLOAT:
3516 case FIX:
3517 *total = 100;
3518 return true;
3519
3520 default:
3521 return false;
3522 }
3523 }
3524
3525 /* Determine the size of the fundamental move insn that will be used
3526 for the specified mode. */
3527 static inline int
mov_insn_size(machine_mode mode,bool consider_sh2a)3528 mov_insn_size (machine_mode mode, bool consider_sh2a)
3529 {
3530 const int mode_sz = GET_MODE_SIZE (mode);
3531
3532 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3533 || (TARGET_FMOVD && mode == DFmode))
3534 return mode_sz;
3535 else
3536 {
3537 /* The max. available mode for actual move insns is SImode.
3538 Larger accesses will be split into multiple loads/stores. */
3539 const int max_mov_sz = GET_MODE_SIZE (SImode);
3540 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3541 }
3542 }
3543
3544 /* Determine the maximum possible displacement for a move insn for the
3545 specified mode. */
3546 int
sh_max_mov_insn_displacement(machine_mode mode,bool consider_sh2a)3547 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3548 {
3549 /* The 4 byte displacement move insns are the same as the 2 byte
3550 versions but take a 12 bit displacement. All we need to do is to
3551 scale the max. displacement value accordingly. */
3552 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3553
3554 /* SH2A supports FPU move insns with 12 bit displacements.
3555 Other variants to do not support any kind of displacements for
3556 FPU move insns. */
3557 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3558 return 0;
3559 else
3560 {
3561 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3562 const int mode_sz = GET_MODE_SIZE (mode);
3563 int r = 15 * mov_insn_sz * disp_scale;
3564
3565 /* If the mov insn will be split into multiple loads/stores, the
3566 maximum possible displacement is a bit smaller. */
3567 if (mode_sz > mov_insn_sz)
3568 r -= mode_sz - mov_insn_sz;
3569 return r;
3570 }
3571 }
3572
3573 /* Determine the alignment mask for a move insn of the
3574 specified mode. */
3575 static inline int
mov_insn_alignment_mask(machine_mode mode,bool consider_sh2a)3576 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3577 {
3578 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3579 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3580 }
3581
3582 /* Return the displacement value of a displacement address. */
3583 HOST_WIDE_INT
sh_disp_addr_displacement(rtx x)3584 sh_disp_addr_displacement (rtx x)
3585 {
3586 gcc_assert (satisfies_constraint_Sdd (x));
3587 return INTVAL (XEXP (XEXP (x, 0), 1));
3588 }
3589
3590 /* Compute the cost of an address. */
3591 static int
sh_address_cost(rtx x,machine_mode mode,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)3592 sh_address_cost (rtx x, machine_mode mode,
3593 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3594 {
3595 /* 'GBR + 0'. Account one more because of R0 restriction. */
3596 if (REG_P (x) && REGNO (x) == GBR_REG)
3597 return 2;
3598
3599 /* Simple reg, post-inc, pre-dec addressing. */
3600 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3601 return 1;
3602
3603 /* 'reg + disp' addressing. */
3604 if (GET_CODE (x) == PLUS
3605 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3606 {
3607 /* 'GBR + disp'. Account one more because of R0 restriction. */
3608 if (REGNO (XEXP (x, 0)) == GBR_REG
3609 && gbr_displacement (XEXP (x, 1), mode))
3610 return 2;
3611
3612 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3613
3614 if (offset == 0)
3615 return 1;
3616
3617 /* The displacement would fit into a 2 byte move insn.
3618 HImode and QImode loads/stores with displacement put pressure on
3619 R0 which will most likely require another reg copy. Thus account
3620 a higher cost for that. */
3621 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3622 return (mode == HImode || mode == QImode) ? 2 : 1;
3623
3624 /* The displacement would fit into a 4 byte move insn (SH2A). */
3625 if (TARGET_SH2A
3626 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3627 return 2;
3628
3629 /* The displacement is probably out of range and will require extra
3630 calculations. */
3631 return 3;
3632 }
3633
3634 /* 'reg + reg' addressing. Account a slightly higher cost because of
3635 increased pressure on R0. */
3636 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1)))
3637 return 3;
3638
3639 /* Not sure what it is - probably expensive. */
3640 return 10;
3641 }
3642
3643 /* Code to expand a shift. */
3644 static void
gen_ashift(int type,int n,rtx reg)3645 gen_ashift (int type, int n, rtx reg)
3646 {
3647 rtx n_rtx;
3648
3649 /* Negative values here come from the shift_amounts array. */
3650 if (n < 0)
3651 {
3652 if (type == ASHIFT)
3653 type = LSHIFTRT;
3654 else
3655 type = ASHIFT;
3656 n = -n;
3657 }
3658
3659 n_rtx = GEN_INT (n);
3660 gcc_assert (satisfies_constraint_P27 (n_rtx));
3661
3662 switch (type)
3663 {
3664 case ASHIFTRT:
3665 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3666 break;
3667 case LSHIFTRT:
3668 if (n == 1)
3669 emit_insn (gen_shlr (reg, reg));
3670 else
3671 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3672 break;
3673 case ASHIFT:
3674 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3675 break;
3676 default:
3677 gcc_unreachable ();
3678 }
3679 }
3680
3681 /* Code to expand a HImode shift. */
3682 static void
gen_ashift_hi(int type,int n,rtx reg)3683 gen_ashift_hi (int type, int n, rtx reg)
3684 {
3685 /* Negative values here come from the shift_amounts array. */
3686 if (n < 0)
3687 {
3688 if (type == ASHIFT)
3689 type = LSHIFTRT;
3690 else
3691 type = ASHIFT;
3692 n = -n;
3693 }
3694
3695 switch (type)
3696 {
3697 case ASHIFTRT:
3698 case LSHIFTRT:
3699 /* We don't have HImode right shift operations because using the
3700 ordinary 32 bit shift instructions for that doesn't generate proper
3701 zero/sign extension.
3702 gen_ashift_hi is only called in contexts where we know that the
3703 sign extension works out correctly. */
3704 {
3705 int offset = 0;
3706 if (GET_CODE (reg) == SUBREG)
3707 {
3708 offset = SUBREG_BYTE (reg);
3709 reg = SUBREG_REG (reg);
3710 }
3711 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3712 break;
3713 }
3714 case ASHIFT:
3715 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3716 break;
3717 }
3718 }
3719
3720 /* Output RTL to split a constant shift into its component SH constant
3721 shift instructions. */
3722 void
gen_shifty_op(int code,rtx * operands)3723 gen_shifty_op (int code, rtx *operands)
3724 {
3725 int value = INTVAL (operands[2]);
3726 int max, i;
3727
3728 /* Truncate the shift count in case it is out of bounds. */
3729 value = value & 31;
3730
3731 if (value == 31)
3732 {
3733 if (code == LSHIFTRT)
3734 {
3735 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3736 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3737 return;
3738 }
3739 else if (code == ASHIFT)
3740 {
3741 /* There is a two instruction sequence for 31 bit left shifts,
3742 but it requires r0. */
3743 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3744 {
3745 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3746 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3747 return;
3748 }
3749 }
3750 }
3751 else if (value == 0)
3752 {
3753 /* This can happen even when optimizing, if there were subregs before
3754 reload. Don't output a nop here, as this is never optimized away;
3755 use a no-op move instead. */
3756 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3757 return;
3758 }
3759
3760 max = ashl_lshr_seq[value].insn_count;
3761 for (i = 0; i < max; i++)
3762 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3763 }
3764
3765 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3766 don't matter. */
3767 void
gen_shifty_hi_op(int code,rtx * operands)3768 gen_shifty_hi_op (int code, rtx *operands)
3769 {
3770 int value = INTVAL (operands[2]);
3771 int max, i;
3772 void (*gen_fun) (int, int, rtx);
3773
3774 /* This operation is used by and_shl for SImode values with a few
3775 high bits known to be cleared. */
3776 value &= 31;
3777 if (value == 0)
3778 {
3779 emit_insn (gen_nop ());
3780 return;
3781 }
3782
3783 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3784 if (code == ASHIFT)
3785 {
3786 max = ext_ashl_lshr_seq[value].insn_count;
3787 for (i = 0; i < max; i++)
3788 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3789 }
3790 else
3791 /* When shifting right, emit the shifts in reverse order, so that
3792 solitary negative values come first. */
3793 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3794 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3795 }
3796
3797 /* Output RTL for an arithmetic right shift.
3798 ??? Rewrite to use super-optimizer sequences. */
3799 bool
expand_ashiftrt(rtx * operands)3800 expand_ashiftrt (rtx *operands)
3801 {
3802 rtx wrk;
3803 char func[18];
3804 int value;
3805
3806 if (TARGET_DYNSHIFT)
3807 {
3808 if (!CONST_INT_P (operands[2]))
3809 {
3810 rtx count = copy_to_mode_reg (SImode, operands[2]);
3811 emit_insn (gen_negsi2 (count, count));
3812 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3813 return true;
3814 }
3815 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3816 > 1 + SH_DYNAMIC_SHIFT_COST)
3817 {
3818 rtx count
3819 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3820 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3821 return true;
3822 }
3823 }
3824 if (!CONST_INT_P (operands[2]))
3825 return false;
3826
3827 value = INTVAL (operands[2]) & 31;
3828
3829 if (value == 31)
3830 {
3831 /* If we are called from abs expansion, arrange things so that we
3832 we can use a single MT instruction that doesn't clobber the source,
3833 if LICM can hoist out the load of the constant zero. */
3834 if (currently_expanding_to_rtl)
3835 {
3836 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3837 operands[1]));
3838 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3839 return true;
3840 }
3841 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3842 return true;
3843 }
3844 else if (value >= 16 && value <= 19)
3845 {
3846 wrk = gen_reg_rtx (SImode);
3847 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3848 value -= 16;
3849 while (value--)
3850 gen_ashift (ASHIFTRT, 1, wrk);
3851 emit_move_insn (operands[0], wrk);
3852 return true;
3853 }
3854 /* Expand a short sequence inline, longer call a magic routine. */
3855 else if (value <= 5)
3856 {
3857 wrk = gen_reg_rtx (SImode);
3858 emit_move_insn (wrk, operands[1]);
3859 while (value--)
3860 gen_ashift (ASHIFTRT, 1, wrk);
3861 emit_move_insn (operands[0], wrk);
3862 return true;
3863 }
3864
3865 wrk = gen_reg_rtx (Pmode);
3866
3867 /* Load the value into an arg reg and call a helper. */
3868 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3869 sprintf (func, "__ashiftrt_r4_%d", value);
3870 rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
3871 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
3872 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3873 return true;
3874 }
3875
3876 /* Try to find a good way to implement the combiner pattern
3877 [(set (match_operand:SI 0 "register_operand" "r")
3878 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3879 (match_operand:SI 2 "const_int_operand" "n"))
3880 (match_operand:SI 3 "const_int_operand" "n"))) .
3881 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3882 return 0 for simple right / left or left/right shift combination.
3883 return 1 for a combination of shifts with zero_extend.
3884 return 2 for a combination of shifts with an AND that needs r0.
3885 return 3 for a combination of shifts with an AND that needs an extra
3886 scratch register, when the three highmost bits of the AND mask are clear.
3887 return 4 for a combination of shifts with an AND that needs an extra
3888 scratch register, when any of the three highmost bits of the AND mask
3889 is set.
3890 If ATTRP is set, store an initial right shift width in ATTRP[0],
3891 and the instruction length in ATTRP[1] . These values are not valid
3892 when returning 0.
3893 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3894 shift_amounts for the last shift value that is to be used before the
3895 sign extend. */
3896 int
shl_and_kind(rtx left_rtx,rtx mask_rtx,int * attrp)3897 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3898 {
3899 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3900 int left = INTVAL (left_rtx), right;
3901 int best = 0;
3902 int cost, best_cost = 10000;
3903 int best_right = 0, best_len = 0;
3904 int i;
3905 int can_ext;
3906
3907 if (left < 0 || left > 31)
3908 return 0;
3909 if (CONST_INT_P (mask_rtx))
3910 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3911 else
3912 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3913 /* Can this be expressed as a right shift / left shift pair? */
3914 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3915 right = exact_log2 (lsb);
3916 mask2 = ~(mask + lsb - 1);
3917 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3918 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3919 if (! mask2)
3920 best_cost = ashl_lshr_seq[right].insn_count
3921 + ashl_lshr_seq[right + left].insn_count;
3922 /* mask has no trailing zeroes <==> ! right */
3923 else if (! right && mask2 == ~(lsb2 - 1))
3924 {
3925 int late_right = exact_log2 (lsb2);
3926 best_cost = ashl_lshr_seq[left + late_right].insn_count
3927 + ashl_lshr_seq[late_right].insn_count;
3928 }
3929 /* Try to use zero extend. */
3930 if (mask2 == ~(lsb2 - 1))
3931 {
3932 int width, first;
3933
3934 for (width = 8; width <= 16; width += 8)
3935 {
3936 /* Can we zero-extend right away? */
3937 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3938 {
3939 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3940 + ext_ashl_lshr_seq[left + right].insn_count;
3941 if (cost < best_cost)
3942 {
3943 best = 1;
3944 best_cost = cost;
3945 best_right = right;
3946 best_len = cost;
3947 if (attrp)
3948 attrp[2] = -1;
3949 }
3950 continue;
3951 }
3952 /* ??? Could try to put zero extend into initial right shift,
3953 or even shift a bit left before the right shift. */
3954 /* Determine value of first part of left shift, to get to the
3955 zero extend cut-off point. */
3956 first = width - exact_log2 (lsb2) + right;
3957 if (first >= 0 && right + left - first >= 0)
3958 {
3959 cost = ext_ashl_lshr_seq[right].insn_count
3960 + ext_ashl_lshr_seq[first].insn_count + 1
3961 + ext_ashl_lshr_seq[right + left - first].insn_count;
3962
3963 if (cost < best_cost)
3964 {
3965 best = 1;
3966 best_cost = cost;
3967 best_right = right;
3968 best_len = cost;
3969 if (attrp)
3970 attrp[2] = first;
3971 }
3972 }
3973 }
3974 }
3975 /* Try to use r0 AND pattern */
3976 for (i = 0; i <= 2; i++)
3977 {
3978 if (i > right)
3979 break;
3980 if (! CONST_OK_FOR_K08 (mask >> i))
3981 continue;
3982 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
3983 if (cost < best_cost)
3984 {
3985 best = 2;
3986 best_cost = cost;
3987 best_right = i;
3988 best_len = cost - 1;
3989 }
3990 }
3991 /* Try to use a scratch register to hold the AND operand. */
3992 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3993 for (i = 0; i <= 2; i++)
3994 {
3995 if (i > right)
3996 break;
3997 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3998 + (can_ext
3999 ? ext_ashl_lshr_seq
4000 : ashl_lshr_seq)[left + i].insn_count;
4001 if (cost < best_cost)
4002 {
4003 best = 4 - can_ext;
4004 best_cost = cost;
4005 best_right = i;
4006 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4007 }
4008 }
4009
4010 if (attrp)
4011 {
4012 attrp[0] = best_right;
4013 attrp[1] = best_len;
4014 }
4015 return best;
4016 }
4017
4018 /* This is used in length attributes of the unnamed instructions
4019 corresponding to shl_and_kind return values of 1 and 2. */
4020 int
shl_and_length(rtx insn)4021 shl_and_length (rtx insn)
4022 {
4023 rtx set_src, left_rtx, mask_rtx;
4024 int attributes[3];
4025
4026 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4027 left_rtx = XEXP (XEXP (set_src, 0), 1);
4028 mask_rtx = XEXP (set_src, 1);
4029 shl_and_kind (left_rtx, mask_rtx, attributes);
4030 return attributes[1];
4031 }
4032
4033 /* This is used in length attribute of the and_shl_scratch instruction. */
4034 int
shl_and_scr_length(rtx insn)4035 shl_and_scr_length (rtx insn)
4036 {
4037 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4038 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4039 rtx op = XEXP (set_src, 0);
4040 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4041 op = XEXP (XEXP (op, 0), 0);
4042 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4043 }
4044
4045 /* Generate rtl for instructions for which shl_and_kind advised a particular
4046 method of generating them, i.e. returned zero. */
4047 bool
gen_shl_and(rtx dest,rtx left_rtx,rtx mask_rtx,rtx source)4048 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4049 {
4050 int attributes[3];
4051 unsigned HOST_WIDE_INT mask;
4052 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4053 int right, total_shift;
4054 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4055
4056 right = attributes[0];
4057 total_shift = INTVAL (left_rtx) + right;
4058 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4059 switch (kind)
4060 {
4061 default:
4062 return true;
4063 case 1:
4064 {
4065 int first = attributes[2];
4066 rtx operands[3];
4067
4068 if (first < 0)
4069 {
4070 emit_insn ((mask << right) <= 0xff
4071 ? gen_zero_extendqisi2 (dest,
4072 gen_lowpart (QImode, source))
4073 : gen_zero_extendhisi2 (dest,
4074 gen_lowpart (HImode, source)));
4075 source = dest;
4076 }
4077 if (source != dest)
4078 emit_insn (gen_movsi (dest, source));
4079 operands[0] = dest;
4080 if (right)
4081 {
4082 operands[2] = GEN_INT (right);
4083 gen_shifty_hi_op (LSHIFTRT, operands);
4084 }
4085 if (first > 0)
4086 {
4087 operands[2] = GEN_INT (first);
4088 gen_shifty_hi_op (ASHIFT, operands);
4089 total_shift -= first;
4090 mask <<= first;
4091 }
4092 if (first >= 0)
4093 emit_insn (mask <= 0xff
4094 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4095 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4096 if (total_shift > 0)
4097 {
4098 operands[2] = GEN_INT (total_shift);
4099 gen_shifty_hi_op (ASHIFT, operands);
4100 }
4101 break;
4102 }
4103 case 4:
4104 shift_gen_fun = gen_shifty_op;
4105 /* FALLTHRU */
4106 case 3:
4107 /* If the topmost bit that matters is set, set the topmost bits
4108 that don't matter. This way, we might be able to get a shorter
4109 signed constant. */
4110 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4111 mask |= (HOST_WIDE_INT) ((HOST_WIDE_INT_M1U) << (31 - total_shift));
4112 /* FALLTHRU */
4113 case 2:
4114 /* Don't expand fine-grained when combining, because that will
4115 make the pattern fail. */
4116 if (currently_expanding_to_rtl
4117 || reload_in_progress || reload_completed)
4118 {
4119 rtx operands[3];
4120
4121 /* Cases 3 and 4 should be handled by this split
4122 only while combining */
4123 gcc_assert (kind <= 2);
4124 if (right)
4125 {
4126 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4127 source = dest;
4128 }
4129 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4130 if (total_shift)
4131 {
4132 operands[0] = dest;
4133 operands[1] = dest;
4134 operands[2] = GEN_INT (total_shift);
4135 shift_gen_fun (ASHIFT, operands);
4136 }
4137 break;
4138 }
4139 else
4140 {
4141 int neg = 0;
4142 if (kind != 4 && total_shift < 16)
4143 {
4144 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4145 if (neg > 0)
4146 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4147 else
4148 neg = 0;
4149 }
4150 emit_insn (gen_and_shl_scratch (dest, source,
4151 GEN_INT (right),
4152 GEN_INT (mask),
4153 GEN_INT (total_shift + neg),
4154 GEN_INT (neg)));
4155 emit_insn (gen_movsi (dest, dest));
4156 break;
4157 }
4158 }
4159 return false;
4160 }
4161
4162 /* Try to find a good way to implement the combiner pattern
4163 [(set (match_operand:SI 0 "register_operand" "=r")
4164 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4165 (match_operand:SI 2 "const_int_operand" "n")
4166 (match_operand:SI 3 "const_int_operand" "n")
4167 (const_int 0)))
4168 (clobber (reg:SI T_REG))]
4169 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4170 return 0 for simple left / right shift combination.
4171 return 1 for left shift / 8 bit sign extend / left shift.
4172 return 2 for left shift / 16 bit sign extend / left shift.
4173 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4174 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4175 return 5 for left shift / 16 bit sign extend / right shift
4176 return 6 for < 8 bit sign extend / left shift.
4177 return 7 for < 8 bit sign extend / left shift / single right shift.
4178 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4179 int
shl_sext_kind(rtx left_rtx,rtx size_rtx,int * costp)4180 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4181 {
4182 int left, size, insize, ext;
4183 int cost = 0, best_cost;
4184 int kind;
4185
4186 left = INTVAL (left_rtx);
4187 size = INTVAL (size_rtx);
4188 insize = size - left;
4189 gcc_assert (insize > 0);
4190 /* Default to left / right shift. */
4191 kind = 0;
4192 best_cost = ashl_lshr_seq[32 - insize].insn_count
4193 + ashl_lshr_seq[32 - size].insn_count;
4194 if (size <= 16)
4195 {
4196 /* 16 bit shift / sign extend / 16 bit shift */
4197 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4198 + ashl_lshr_seq[16 - size].insn_count;
4199 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4200 below, by alternative 3 or something even better. */
4201 if (cost < best_cost)
4202 {
4203 kind = 5;
4204 best_cost = cost;
4205 }
4206 }
4207 /* Try a plain sign extend between two shifts. */
4208 for (ext = 16; ext >= insize; ext -= 8)
4209 {
4210 if (ext <= size)
4211 {
4212 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4213 + ashl_lshr_seq[size - ext].insn_count;
4214 if (cost < best_cost)
4215 {
4216 kind = ext / (unsigned) 8;
4217 best_cost = cost;
4218 }
4219 }
4220 /* Check if we can do a sloppy shift with a final signed shift
4221 restoring the sign. */
4222 if (EXT_SHIFT_SIGNED (size - ext))
4223 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4224 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4225 /* If not, maybe it's still cheaper to do the second shift sloppy,
4226 and do a final sign extend? */
4227 else if (size <= 16)
4228 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4229 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4230 + 1;
4231 else
4232 continue;
4233 if (cost < best_cost)
4234 {
4235 kind = ext / (unsigned) 8 + 2;
4236 best_cost = cost;
4237 }
4238 }
4239 /* Check if we can sign extend in r0 */
4240 if (insize < 8)
4241 {
4242 cost = 3 + ashl_lshr_seq[left].insn_count;
4243 if (cost < best_cost)
4244 {
4245 kind = 6;
4246 best_cost = cost;
4247 }
4248 /* Try the same with a final signed shift. */
4249 if (left < 31)
4250 {
4251 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4252 if (cost < best_cost)
4253 {
4254 kind = 7;
4255 best_cost = cost;
4256 }
4257 }
4258 }
4259 if (TARGET_DYNSHIFT)
4260 {
4261 /* Try to use a dynamic shift. */
4262 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4263 if (cost < best_cost)
4264 {
4265 kind = 0;
4266 best_cost = cost;
4267 }
4268 }
4269 if (costp)
4270 *costp = cost;
4271 return kind;
4272 }
4273
4274 /* Function to be used in the length attribute of the instructions
4275 implementing this pattern. */
4276 int
shl_sext_length(rtx insn)4277 shl_sext_length (rtx insn)
4278 {
4279 rtx set_src, left_rtx, size_rtx;
4280 int cost;
4281
4282 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4283 left_rtx = XEXP (XEXP (set_src, 0), 1);
4284 size_rtx = XEXP (set_src, 1);
4285 shl_sext_kind (left_rtx, size_rtx, &cost);
4286 return cost;
4287 }
4288
4289 /* Generate rtl for this pattern */
4290 bool
gen_shl_sext(rtx dest,rtx left_rtx,rtx size_rtx,rtx source)4291 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4292 {
4293 int kind;
4294 int left, size, insize, cost;
4295 rtx operands[3];
4296
4297 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4298 left = INTVAL (left_rtx);
4299 size = INTVAL (size_rtx);
4300 insize = size - left;
4301 switch (kind)
4302 {
4303 case 1:
4304 case 2:
4305 case 3:
4306 case 4:
4307 {
4308 int ext = kind & 1 ? 8 : 16;
4309 int shift2 = size - ext;
4310
4311 /* Don't expand fine-grained when combining, because that will
4312 make the pattern fail. */
4313 if (! currently_expanding_to_rtl
4314 && ! reload_in_progress && ! reload_completed)
4315 {
4316 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4317 emit_insn (gen_movsi (dest, source));
4318 break;
4319 }
4320 if (dest != source)
4321 emit_insn (gen_movsi (dest, source));
4322 operands[0] = dest;
4323 if (ext - insize)
4324 {
4325 operands[2] = GEN_INT (ext - insize);
4326 gen_shifty_hi_op (ASHIFT, operands);
4327 }
4328 emit_insn (kind & 1
4329 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4330 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4331 if (kind <= 2)
4332 {
4333 if (shift2)
4334 {
4335 operands[2] = GEN_INT (shift2);
4336 gen_shifty_op (ASHIFT, operands);
4337 }
4338 }
4339 else
4340 {
4341 if (shift2 > 0)
4342 {
4343 if (EXT_SHIFT_SIGNED (shift2))
4344 {
4345 operands[2] = GEN_INT (shift2 + 1);
4346 gen_shifty_op (ASHIFT, operands);
4347 operands[2] = const1_rtx;
4348 gen_shifty_op (ASHIFTRT, operands);
4349 break;
4350 }
4351 operands[2] = GEN_INT (shift2);
4352 gen_shifty_hi_op (ASHIFT, operands);
4353 }
4354 else if (shift2)
4355 {
4356 operands[2] = GEN_INT (-shift2);
4357 gen_shifty_hi_op (LSHIFTRT, operands);
4358 }
4359 emit_insn (size <= 8
4360 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4361 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4362 }
4363 break;
4364 }
4365 case 5:
4366 {
4367 int i = 16 - size;
4368 if (! currently_expanding_to_rtl
4369 && ! reload_in_progress && ! reload_completed)
4370 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4371 else
4372 {
4373 operands[0] = dest;
4374 operands[2] = GEN_INT (16 - insize);
4375 gen_shifty_hi_op (ASHIFT, operands);
4376 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4377 }
4378 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4379 while (--i >= 0)
4380 gen_ashift (ASHIFTRT, 1, dest);
4381 break;
4382 }
4383 case 6:
4384 case 7:
4385 /* Don't expand fine-grained when combining, because that will
4386 make the pattern fail. */
4387 if (! currently_expanding_to_rtl
4388 && ! reload_in_progress && ! reload_completed)
4389 {
4390 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4391 emit_insn (gen_movsi (dest, source));
4392 break;
4393 }
4394 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4395 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4396 emit_insn (gen_addsi3 (dest, dest, GEN_INT (HOST_WIDE_INT_M1U << (insize - 1))));
4397 operands[0] = dest;
4398 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4399 gen_shifty_op (ASHIFT, operands);
4400 if (kind == 7)
4401 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4402 break;
4403 default:
4404 return true;
4405 }
4406 return false;
4407 }
4408
4409 typedef struct label_ref_list_d
4410 {
4411 rtx_code_label *label;
4412 struct label_ref_list_d *next;
4413 } *label_ref_list_t;
4414
4415 static object_allocator<label_ref_list_d> label_ref_list_d_pool
4416 ("label references list");
4417
4418 /* The SH cannot load a large constant into a register, constants have to
4419 come from a pc relative load. The reference of a pc relative load
4420 instruction must be less than 1k in front of the instruction. This
4421 means that we often have to dump a constant inside a function, and
4422 generate code to branch around it.
4423
4424 It is important to minimize this, since the branches will slow things
4425 down and make things bigger.
4426
4427 Worst case code looks like:
4428
4429 mov.l L1,rn
4430 bra L2
4431 nop
4432 align
4433 L1: .long value
4434 L2:
4435 ..
4436
4437 mov.l L3,rn
4438 bra L4
4439 nop
4440 align
4441 L3: .long value
4442 L4:
4443 ..
4444
4445 We fix this by performing a scan before scheduling, which notices which
4446 instructions need to have their operands fetched from the constant table
4447 and builds the table.
4448
4449 The algorithm is:
4450
4451 scan, find an instruction which needs a pcrel move. Look forward, find the
4452 last barrier which is within MAX_COUNT bytes of the requirement.
4453 If there isn't one, make one. Process all the instructions between
4454 the find and the barrier.
4455
4456 In the above example, we can tell that L3 is within 1k of L1, so
4457 the first move can be shrunk from the 3 insn+constant sequence into
4458 just 1 insn, and the constant moved to L3 to make:
4459
4460 mov.l L1,rn
4461 ..
4462 mov.l L3,rn
4463 bra L4
4464 nop
4465 align
4466 L3:.long value
4467 L4:.long value
4468
4469 Then the second move becomes the target for the shortening process. */
4470
4471 typedef struct
4472 {
4473 rtx value; /* Value in table. */
4474 rtx_code_label *label; /* Label of value. */
4475 label_ref_list_t wend; /* End of window. */
4476 machine_mode mode; /* Mode of value. */
4477
4478 /* True if this constant is accessed as part of a post-increment
4479 sequence. Note that HImode constants are never accessed in this way. */
4480 bool part_of_sequence_p;
4481 } pool_node;
4482
4483 /* The maximum number of constants that can fit into one pool, since
4484 constants in the range 0..510 are at least 2 bytes long, and in the
4485 range from there to 1018 at least 4 bytes. */
4486
4487 #define MAX_POOL_SIZE 372
4488 static pool_node pool_vector[MAX_POOL_SIZE];
4489 static int pool_size;
4490 static rtx_code_label *pool_window_label;
4491 static int pool_window_last;
4492
4493 static int max_labelno_before_reorg;
4494
4495 /* ??? If we need a constant in HImode which is the truncated value of a
4496 constant we need in SImode, we could combine the two entries thus saving
4497 two bytes. Is this common enough to be worth the effort of implementing
4498 it? */
4499
4500 /* ??? This stuff should be done at the same time that we shorten branches.
4501 As it is now, we must assume that all branches are the maximum size, and
4502 this causes us to almost always output constant pools sooner than
4503 necessary. */
4504
4505 /* Add a constant to the pool and return its label. */
4506 static rtx_code_label *
add_constant(rtx x,machine_mode mode,rtx last_value)4507 add_constant (rtx x, machine_mode mode, rtx last_value)
4508 {
4509 rtx_code_label *lab, *new_rtx;
4510 label_ref_list_t ref, newref;
4511
4512 /* First see if we've already got it. */
4513 for (int i = 0; i < pool_size; i++)
4514 {
4515 if (x->code == pool_vector[i].value->code
4516 && mode == pool_vector[i].mode)
4517 {
4518 if (x->code == CODE_LABEL)
4519 {
4520 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4521 continue;
4522 }
4523 if (rtx_equal_p (x, pool_vector[i].value))
4524 {
4525 lab = new_rtx = 0;
4526 if (! last_value
4527 || ! i
4528 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4529 {
4530 new_rtx = gen_label_rtx ();
4531 LABEL_REFS (new_rtx) = pool_vector[i].label;
4532 pool_vector[i].label = lab = new_rtx;
4533 }
4534 if (lab && pool_window_label)
4535 {
4536 newref = label_ref_list_d_pool.allocate ();
4537 newref->label = pool_window_label;
4538 ref = pool_vector[pool_window_last].wend;
4539 newref->next = ref;
4540 pool_vector[pool_window_last].wend = newref;
4541 }
4542 if (new_rtx)
4543 pool_window_label = new_rtx;
4544 pool_window_last = i;
4545 return lab;
4546 }
4547 }
4548 }
4549
4550 /* Need a new one. */
4551 pool_vector[pool_size].value = x;
4552 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4553 {
4554 lab = 0;
4555 pool_vector[pool_size - 1].part_of_sequence_p = true;
4556 }
4557 else
4558 lab = gen_label_rtx ();
4559 pool_vector[pool_size].mode = mode;
4560 pool_vector[pool_size].label = lab;
4561 pool_vector[pool_size].wend = NULL;
4562 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4563 if (lab && pool_window_label)
4564 {
4565 newref = label_ref_list_d_pool.allocate ();
4566 newref->label = pool_window_label;
4567 ref = pool_vector[pool_window_last].wend;
4568 newref->next = ref;
4569 pool_vector[pool_window_last].wend = newref;
4570 }
4571 if (lab)
4572 pool_window_label = lab;
4573 pool_window_last = pool_size;
4574 pool_size++;
4575 return lab;
4576 }
4577
4578 /* Output the literal table. START, if nonzero, is the first instruction
4579 this table is needed for, and also indicates that there is at least one
4580 casesi_worker_2 instruction; We have to emit the operand3 labels from
4581 these insns at a 4-byte aligned position. BARRIER is the barrier
4582 after which we are to place the table. */
4583 static void
dump_table(rtx_insn * start,rtx_insn * barrier)4584 dump_table (rtx_insn *start, rtx_insn *barrier)
4585 {
4586 rtx_insn *scan = barrier;
4587 bool need_align = true;
4588 rtx lab;
4589 label_ref_list_t ref;
4590 bool have_df = false;
4591
4592 /* Do two passes, first time dump out the HI sized constants. */
4593
4594 for (int i = 0; i < pool_size; i++)
4595 {
4596 pool_node *p = &pool_vector[i];
4597
4598 if (p->mode == HImode)
4599 {
4600 if (need_align)
4601 {
4602 scan = emit_insn_after (gen_align_2 (), scan);
4603 need_align = false;
4604 }
4605 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4606 scan = emit_label_after (lab, scan);
4607 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4608 scan);
4609 for (ref = p->wend; ref; ref = ref->next)
4610 {
4611 lab = ref->label;
4612 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4613 }
4614 }
4615 else if (p->mode == DFmode)
4616 have_df = true;
4617 }
4618
4619 need_align = true;
4620
4621 if (start)
4622 {
4623 scan = emit_insn_after (gen_align_4 (), scan);
4624 need_align = false;
4625 for (; start != barrier; start = NEXT_INSN (start))
4626 if (NONJUMP_INSN_P (start)
4627 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4628 {
4629 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4630 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4631
4632 scan = emit_label_after (lab, scan);
4633 }
4634 }
4635 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4636 {
4637 rtx_insn *align_insn = NULL;
4638
4639 scan = emit_label_after (gen_label_rtx (), scan);
4640 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4641 need_align = false;
4642
4643 for (int i = 0; i < pool_size; i++)
4644 {
4645 pool_node *p = &pool_vector[i];
4646
4647 switch (p->mode)
4648 {
4649 case E_HImode:
4650 break;
4651 case E_SImode:
4652 case E_SFmode:
4653 if (align_insn && !p->part_of_sequence_p)
4654 {
4655 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4656 emit_label_before (lab, align_insn);
4657 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4658 align_insn);
4659 for (ref = p->wend; ref; ref = ref->next)
4660 {
4661 lab = ref->label;
4662 emit_insn_before (gen_consttable_window_end (lab),
4663 align_insn);
4664 }
4665 delete_insn (align_insn);
4666 align_insn = NULL;
4667 continue;
4668 }
4669 else
4670 {
4671 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4672 scan = emit_label_after (lab, scan);
4673 scan = emit_insn_after (gen_consttable_4 (p->value,
4674 const0_rtx), scan);
4675 need_align = ! need_align;
4676 }
4677 break;
4678 case E_DFmode:
4679 if (need_align)
4680 {
4681 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4682 align_insn = scan;
4683 need_align = false;
4684 }
4685 /* FALLTHRU */
4686 case E_DImode:
4687 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4688 scan = emit_label_after (lab, scan);
4689 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4690 scan);
4691 break;
4692 default:
4693 gcc_unreachable ();
4694 }
4695
4696 if (p->mode != HImode)
4697 {
4698 for (ref = p->wend; ref; ref = ref->next)
4699 {
4700 lab = ref->label;
4701 scan = emit_insn_after (gen_consttable_window_end (lab),
4702 scan);
4703 }
4704 }
4705 }
4706
4707 pool_size = 0;
4708 }
4709
4710 for (int i = 0; i < pool_size; i++)
4711 {
4712 pool_node *p = &pool_vector[i];
4713
4714 switch (p->mode)
4715 {
4716 case E_HImode:
4717 break;
4718 case E_SImode:
4719 case E_SFmode:
4720 if (need_align)
4721 {
4722 need_align = false;
4723 scan = emit_label_after (gen_label_rtx (), scan);
4724 scan = emit_insn_after (gen_align_4 (), scan);
4725 }
4726 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4727 scan = emit_label_after (lab, scan);
4728 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4729 scan);
4730 break;
4731 case E_DFmode:
4732 case E_DImode:
4733 if (need_align)
4734 {
4735 need_align = false;
4736 scan = emit_label_after (gen_label_rtx (), scan);
4737 scan = emit_insn_after (gen_align_4 (), scan);
4738 }
4739 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4740 scan = emit_label_after (lab, scan);
4741 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4742 scan);
4743 break;
4744 default:
4745 gcc_unreachable ();
4746 }
4747
4748 if (p->mode != HImode)
4749 {
4750 for (ref = p->wend; ref; ref = ref->next)
4751 {
4752 lab = ref->label;
4753 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4754 }
4755 }
4756 }
4757
4758 scan = emit_insn_after (gen_consttable_end (), scan);
4759 scan = emit_barrier_after (scan);
4760 pool_size = 0;
4761 pool_window_label = NULL;
4762 pool_window_last = 0;
4763 }
4764
4765 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4766
4767 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4768
4769 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4770 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4771 need to fix it if the input value is CONST_OK_FOR_I08. */
4772 static bool
broken_move(rtx_insn * insn)4773 broken_move (rtx_insn *insn)
4774 {
4775 if (NONJUMP_INSN_P (insn))
4776 {
4777 rtx pat = PATTERN (insn);
4778 if (GET_CODE (pat) == PARALLEL)
4779 pat = XVECEXP (pat, 0, 0);
4780 if (GET_CODE (pat) == SET
4781 /* We can load any 8-bit value if we don't care what the high
4782 order bits end up as. */
4783 && GET_MODE (SET_DEST (pat)) != QImode
4784 && (CONSTANT_P (SET_SRC (pat))
4785 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4786 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4787 /* Match mova_const. */
4788 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4789 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4790 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4791 && ! (TARGET_SH2E
4792 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4793 && (fp_zero_operand (SET_SRC (pat))
4794 || fp_one_operand (SET_SRC (pat)))
4795 /* In general we don't know the current setting of fpscr, so
4796 disable fldi.
4797 There is an exception if this was a register-register move
4798 before reload - and hence it was ascertained that we have
4799 single precision setting - and in a post-reload optimization
4800 we changed this to do a constant load. In that case
4801 we don't have an r0 clobber, hence we must use fldi. */
4802 && (TARGET_FMOVD
4803 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4804 == SCRATCH))
4805 && REG_P (SET_DEST (pat))
4806 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4807 && ! (TARGET_SH2A
4808 && GET_MODE (SET_DEST (pat)) == SImode
4809 && (satisfies_constraint_I20 (SET_SRC (pat))
4810 || satisfies_constraint_I28 (SET_SRC (pat))))
4811 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4812 return true;
4813 }
4814
4815 return false;
4816 }
4817
4818 /* Return true if the specified insn is a mova insn. */
4819 static bool
mova_p(rtx_insn * insn)4820 mova_p (rtx_insn *insn)
4821 {
4822 return (NONJUMP_INSN_P (insn)
4823 && GET_CODE (PATTERN (insn)) == SET
4824 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4825 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4826 /* Don't match mova_const. */
4827 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4828 }
4829
4830 /* Fix up a mova from a switch that went out of range. */
4831 static void
fixup_mova(rtx_insn * mova)4832 fixup_mova (rtx_insn *mova)
4833 {
4834 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4835 if (! flag_pic)
4836 {
4837 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4838 INSN_CODE (mova) = -1;
4839 }
4840 else
4841 {
4842 rtx_insn *worker = mova;
4843 rtx_code_label *lab = gen_label_rtx ();
4844 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4845
4846 do
4847 {
4848 worker = NEXT_INSN (worker);
4849 gcc_assert (worker
4850 && !LABEL_P (worker)
4851 && !JUMP_P (worker));
4852 } while (NOTE_P (worker)
4853 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4854 wpat = PATTERN (worker);
4855 wpat0 = XVECEXP (wpat, 0, 0);
4856 wpat1 = XVECEXP (wpat, 0, 1);
4857 wsrc = SET_SRC (wpat0);
4858 PATTERN (worker) = (gen_casesi_worker_2
4859 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4860 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4861 XEXP (wpat1, 0)));
4862 INSN_CODE (worker) = -1;
4863 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4864 base = gen_rtx_LABEL_REF (Pmode, lab);
4865 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4866 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4867 INSN_CODE (mova) = -1;
4868 }
4869 }
4870
4871 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4872 *num_mova, and check if the new mova is not nested within the first one.
4873 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4874 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4875 static int
untangle_mova(int * num_mova,rtx_insn ** first_mova,rtx_insn * new_mova)4876 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
4877 {
4878 int n_addr = 0; /* Initialization to shut up spurious warning. */
4879 int f_target, n_target = 0; /* Likewise. */
4880
4881 if (optimize)
4882 {
4883 /* If NEW_MOVA has no address yet, it will be handled later. */
4884 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4885 return -1;
4886
4887 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4888 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4889 if (n_addr > n_target || n_addr + 1022 < n_target)
4890 {
4891 /* Change the mova into a load.
4892 broken_move will then return true for it. */
4893 fixup_mova (new_mova);
4894 return 1;
4895 }
4896 }
4897 if (!(*num_mova)++)
4898 {
4899 *first_mova = new_mova;
4900 return 2;
4901 }
4902 if (!optimize
4903 || ((f_target
4904 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4905 >= n_target))
4906 return -1;
4907
4908 (*num_mova)--;
4909 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4910 > n_target - n_addr)
4911 {
4912 fixup_mova (*first_mova);
4913 return 0;
4914 }
4915 else
4916 {
4917 fixup_mova (new_mova);
4918 return 1;
4919 }
4920 }
4921
4922 /* Find the last barrier from insn FROM which is close enough to hold the
4923 constant pool. If we can't find one, then create one near the end of
4924 the range. */
4925 static rtx_insn *
find_barrier(int num_mova,rtx_insn * mova,rtx_insn * from)4926 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
4927 {
4928 int count_si = 0;
4929 int count_hi = 0;
4930 int found_hi = 0;
4931 int found_si = 0;
4932 int hi_align = 2;
4933 int si_align = 2;
4934 int leading_mova = num_mova;
4935 rtx_insn *barrier_before_mova = NULL;
4936 rtx_insn *found_barrier = NULL;
4937 rtx_insn *good_barrier = NULL;
4938 int si_limit;
4939 int hi_limit;
4940 rtx_insn *orig = from;
4941 rtx_insn *last_got = NULL;
4942 rtx_insn *last_symoff = NULL;
4943
4944 /* For HImode: range is 510, add 4 because pc counts from address of
4945 second instruction after this one, subtract 2 for the jump instruction
4946 that we may need to emit before the table, subtract 2 for the instruction
4947 that fills the jump delay slot (in very rare cases, reorg will take an
4948 instruction from after the constant pool or will leave the delay slot
4949 empty). This gives 510.
4950 For SImode: range is 1020, add 4 because pc counts from address of
4951 second instruction after this one, subtract 2 in case pc is 2 byte
4952 aligned, subtract 2 for the jump instruction that we may need to emit
4953 before the table, subtract 2 for the instruction that fills the jump
4954 delay slot. This gives 1018. */
4955
4956 /* The branch will always be shortened now that the reference address for
4957 forward branches is the successor address, thus we need no longer make
4958 adjustments to the [sh]i_limit for -O0. */
4959
4960 si_limit = 1018;
4961 hi_limit = 510;
4962
4963 while (from && count_si < si_limit && count_hi < hi_limit)
4964 {
4965 int inc = get_attr_length (from);
4966 int new_align = 1;
4967
4968 /* If this is a label that existed at the time of the compute_alignments
4969 call, determine the alignment. N.B. When find_barrier recurses for
4970 an out-of-reach mova, we might see labels at the start of previously
4971 inserted constant tables. */
4972 if (LABEL_P (from)
4973 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4974 {
4975 if (optimize)
4976 new_align = 1 << label_to_alignment (from);
4977 else if (BARRIER_P (prev_nonnote_insn (from)))
4978 new_align = 1 << barrier_align (from);
4979 else
4980 new_align = 1;
4981 inc = 0;
4982 }
4983 /* In case we are scanning a constant table because of recursion, check
4984 for explicit alignments. If the table is long, we might be forced
4985 to emit the new table in front of it; the length of the alignment
4986 might be the last straw. */
4987 else if (NONJUMP_INSN_P (from)
4988 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4989 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4990 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4991 /* When we find the end of a constant table, paste the new constant
4992 at the end. That is better than putting it in front because
4993 this way, we don't need extra alignment for adding a 4-byte-aligned
4994 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4995 else if (NONJUMP_INSN_P (from)
4996 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4997 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4998 return from;
4999
5000 if (BARRIER_P (from))
5001 {
5002 rtx_insn *next;
5003
5004 found_barrier = from;
5005
5006 /* If we are at the end of the function, or in front of an alignment
5007 instruction, we need not insert an extra alignment. We prefer
5008 this kind of barrier. */
5009 if (barrier_align (from) > 2)
5010 good_barrier = from;
5011
5012 /* If we are at the end of a hot/cold block, dump the constants
5013 here. */
5014 next = NEXT_INSN (from);
5015 if (next
5016 && NOTE_P (next)
5017 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5018 break;
5019 }
5020
5021 if (broken_move (from))
5022 {
5023 rtx pat, src, dst;
5024 machine_mode mode;
5025
5026 pat = PATTERN (from);
5027 if (GET_CODE (pat) == PARALLEL)
5028 pat = XVECEXP (pat, 0, 0);
5029 src = SET_SRC (pat);
5030 dst = SET_DEST (pat);
5031 mode = GET_MODE (dst);
5032
5033 /* GOT pcrelat setting comes in pair of
5034 mova .L8,r0
5035 mov.l .L8,r12
5036 instructions. (plus add r0,r12).
5037 Remember if we see one without the other. */
5038 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5039 last_got = last_got ? NULL : from;
5040 else if (PIC_ADDR_P (src))
5041 last_got = last_got ? NULL : from;
5042
5043 /* We must explicitly check the mode, because sometimes the
5044 front end will generate code to load unsigned constants into
5045 HImode targets without properly sign extending them. */
5046 if (mode == HImode
5047 || (mode == SImode && satisfies_constraint_I16 (src)
5048 && REGNO (dst) != FPUL_REG))
5049 {
5050 found_hi += 2;
5051 /* We put the short constants before the long constants, so
5052 we must count the length of short constants in the range
5053 for the long constants. */
5054 /* ??? This isn't optimal, but is easy to do. */
5055 si_limit -= 2;
5056 }
5057 else
5058 {
5059 /* We dump DF/DI constants before SF/SI ones, because
5060 the limit is the same, but the alignment requirements
5061 are higher. We may waste up to 4 additional bytes
5062 for alignment, and the DF/DI constant may have
5063 another SF/SI constant placed before it. */
5064 while (si_align > 2 && found_si + si_align - 2 > count_si)
5065 si_align >>= 1;
5066 if (found_si > count_si)
5067 count_si = found_si;
5068 found_si += GET_MODE_SIZE (mode);
5069 if (num_mova)
5070 si_limit -= GET_MODE_SIZE (mode);
5071 }
5072 }
5073
5074 if (mova_p (from))
5075 {
5076 switch (untangle_mova (&num_mova, &mova, from))
5077 {
5078 case 1:
5079 if (flag_pic)
5080 {
5081 rtx src = SET_SRC (PATTERN (from));
5082 if (GET_CODE (src) == CONST
5083 && GET_CODE (XEXP (src, 0)) == UNSPEC
5084 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5085 last_symoff = from;
5086 }
5087 break;
5088 case 0: return find_barrier (0, 0, mova);
5089 case 2:
5090 {
5091 leading_mova = 0;
5092 barrier_before_mova
5093 = good_barrier ? good_barrier : found_barrier;
5094 }
5095 default: break;
5096 }
5097 if (found_si > count_si)
5098 count_si = found_si;
5099 }
5100 else if (JUMP_TABLE_DATA_P (from)
5101 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5102 {
5103 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5104 || (num_mova
5105 && (prev_nonnote_insn (from)
5106 == XEXP (MOVA_LABELREF (mova), 0))))
5107 num_mova--;
5108 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5109 {
5110 /* We have just passed the barrier in front of the
5111 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5112 the ADDR_DIFF_VEC is accessed as data, just like our pool
5113 constants, this is a good opportunity to accommodate what
5114 we have gathered so far.
5115 If we waited any longer, we could end up at a barrier in
5116 front of code, which gives worse cache usage for separated
5117 instruction / data caches. */
5118 good_barrier = found_barrier;
5119 break;
5120 }
5121 else
5122 {
5123 rtx body = PATTERN (from);
5124 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5125 }
5126 }
5127 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5128 else if (JUMP_P (from)
5129 && ! TARGET_SH2
5130 && ! optimize_size)
5131 new_align = 4;
5132
5133 /* There is a possibility that a bf is transformed into a bf/s by the
5134 delay slot scheduler. */
5135 if (JUMP_P (from)
5136 && get_attr_type (from) == TYPE_CBRANCH
5137 && ! sequence_insn_p (from))
5138 inc += 2;
5139
5140 if (found_si)
5141 {
5142 count_si += inc;
5143 if (new_align > si_align)
5144 {
5145 si_limit -= (count_si - 1) & (new_align - si_align);
5146 si_align = new_align;
5147 }
5148 count_si = (count_si + new_align - 1) & -new_align;
5149 }
5150 if (found_hi)
5151 {
5152 count_hi += inc;
5153 if (new_align > hi_align)
5154 {
5155 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5156 hi_align = new_align;
5157 }
5158 count_hi = (count_hi + new_align - 1) & -new_align;
5159 }
5160 from = NEXT_INSN (from);
5161 }
5162
5163 if (num_mova)
5164 {
5165 if (leading_mova)
5166 {
5167 /* Try as we might, the leading mova is out of range. Change
5168 it into a load (which will become a pcload) and retry. */
5169 fixup_mova (mova);
5170 return find_barrier (0, 0, mova);
5171 }
5172 else
5173 {
5174 /* Insert the constant pool table before the mova instruction,
5175 to prevent the mova label reference from going out of range. */
5176 from = mova;
5177 good_barrier = found_barrier = barrier_before_mova;
5178 }
5179 }
5180
5181 if (found_barrier)
5182 {
5183 if (good_barrier && next_real_insn (found_barrier))
5184 found_barrier = good_barrier;
5185 }
5186 else
5187 {
5188 /* We didn't find a barrier in time to dump our stuff,
5189 so we'll make one. */
5190 rtx_code_label *label = gen_label_rtx ();
5191
5192 /* Don't emit a constant table in the middle of insns for
5193 casesi_worker_2. This is a bit overkill but is enough
5194 because casesi_worker_2 wouldn't appear so frequently. */
5195 if (last_symoff)
5196 from = last_symoff;
5197
5198 /* If we exceeded the range, then we must back up over the last
5199 instruction we looked at. Otherwise, we just need to undo the
5200 NEXT_INSN at the end of the loop. */
5201 if (PREV_INSN (from) != orig
5202 && (count_hi > hi_limit || count_si > si_limit))
5203 from = PREV_INSN (PREV_INSN (from));
5204 else
5205 from = PREV_INSN (from);
5206
5207 /* Don't emit a constant table int the middle of global pointer setting,
5208 since that that would move the addressing base GOT into another table.
5209 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5210 in the pool anyway, so just move up the whole constant pool.
5211
5212 However, avoid doing so when the last single GOT mov is the starting
5213 insn itself. Going past above the start insn would create a negative
5214 offset, causing errors. */
5215 if (last_got && last_got != orig)
5216 from = PREV_INSN (last_got);
5217
5218 /* Don't insert the constant pool table at the position which
5219 may be the landing pad. */
5220 if (flag_exceptions
5221 && CALL_P (from)
5222 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5223 from = PREV_INSN (from);
5224
5225 /* Walk back to be just before any jump or label.
5226 Putting it before a label reduces the number of times the branch
5227 around the constant pool table will be hit. Putting it before
5228 a jump makes it more likely that the bra delay slot will be
5229 filled. */
5230 while (NOTE_P (from) || JUMP_P (from) || LABEL_P (from))
5231 from = PREV_INSN (from);
5232
5233 if (CALL_P (from))
5234 {
5235 bool sibcall_p = SIBLING_CALL_P (from);
5236
5237 /* If FROM was a sibling call, then we know that control
5238 will not return. In fact, we were guaranteed to hit
5239 a barrier before another real insn.
5240
5241 The jump around the constant pool is unnecessary. It
5242 costs space, but more importantly it confuses dwarf2cfi
5243 generation. */
5244 if (sibcall_p)
5245 return emit_barrier_after (from);
5246 }
5247
5248 from = emit_jump_insn_after (gen_jump (label), from);
5249 JUMP_LABEL (from) = label;
5250 LABEL_NUSES (label) = 1;
5251 found_barrier = emit_barrier_after (from);
5252 emit_label_after (label, found_barrier);
5253 }
5254
5255 return found_barrier;
5256 }
5257
5258 /* If the instruction INSN is implemented by a special function, and we can
5259 positively find the register that is used to call the sfunc, and this
5260 register is not used anywhere else in this instruction - except as the
5261 destination of a set, return this register; else, return 0. */
5262 rtx
sfunc_uses_reg(rtx_insn * insn)5263 sfunc_uses_reg (rtx_insn *insn)
5264 {
5265 int i;
5266 rtx pattern, part, reg_part, reg;
5267
5268 if (!NONJUMP_INSN_P (insn))
5269 return NULL_RTX;
5270 pattern = PATTERN (insn);
5271 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5272 return NULL_RTX;
5273
5274 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5275 {
5276 part = XVECEXP (pattern, 0, i);
5277 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5278 reg_part = part;
5279 }
5280 if (! reg_part)
5281 return NULL_RTX;
5282 reg = XEXP (reg_part, 0);
5283 for (int i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5284 {
5285 part = XVECEXP (pattern, 0, i);
5286 if (part == reg_part || GET_CODE (part) == CLOBBER)
5287 continue;
5288 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5289 && REG_P (SET_DEST (part)))
5290 ? SET_SRC (part) : part)))
5291 return NULL_RTX;
5292 }
5293 return reg;
5294 }
5295
5296 /* See if the only way in which INSN uses REG is by calling it, or by
5297 setting it while calling it. Set *SET to a SET rtx if the register
5298 is set by INSN. */
5299 static bool
noncall_uses_reg(rtx reg,rtx_insn * insn,rtx * set)5300 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5301 {
5302 *set = NULL_RTX;
5303
5304 rtx reg2 = sfunc_uses_reg (insn);
5305 if (reg2 && REGNO (reg2) == REGNO (reg))
5306 {
5307 rtx pattern = single_set (insn);
5308 if (pattern
5309 && REG_P (SET_DEST (pattern))
5310 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5311 *set = pattern;
5312 return false;
5313 }
5314 if (!CALL_P (insn))
5315 {
5316 /* We don't use rtx_equal_p because we don't care if the mode is
5317 different. */
5318 rtx pattern = single_set (insn);
5319 if (pattern
5320 && REG_P (SET_DEST (pattern))
5321 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5322 {
5323 rtx par, part;
5324 int i;
5325
5326 *set = pattern;
5327 par = PATTERN (insn);
5328 if (GET_CODE (par) == PARALLEL)
5329 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5330 {
5331 part = XVECEXP (par, 0, i);
5332 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5333 return true;
5334 }
5335 return reg_mentioned_p (reg, SET_SRC (pattern));
5336 }
5337
5338 return true;
5339 }
5340
5341 rtx pattern = PATTERN (insn);
5342
5343 if (GET_CODE (pattern) == PARALLEL)
5344 {
5345 for (int i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5346 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5347 return true;
5348 pattern = XVECEXP (pattern, 0, 0);
5349 }
5350
5351 if (GET_CODE (pattern) == SET)
5352 {
5353 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5354 {
5355 /* We don't use rtx_equal_p, because we don't care if the
5356 mode is different. */
5357 if (!REG_P (SET_DEST (pattern))
5358 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5359 return true;
5360
5361 *set = pattern;
5362 }
5363
5364 pattern = SET_SRC (pattern);
5365 }
5366
5367 if (GET_CODE (pattern) != CALL
5368 || !MEM_P (XEXP (pattern, 0))
5369 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5370 return true;
5371
5372 return false;
5373 }
5374
5375 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5376 general registers. Bits 0..15 mean that the respective registers
5377 are used as inputs in the instruction. Bits 16..31 mean that the
5378 registers 0..15, respectively, are used as outputs, or are clobbered.
5379 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5380 int
regs_used(rtx x,int is_dest)5381 regs_used (rtx x, int is_dest)
5382 {
5383 enum rtx_code code;
5384 const char *fmt;
5385 int used = 0;
5386
5387 if (! x)
5388 return used;
5389 code = GET_CODE (x);
5390 switch (code)
5391 {
5392 case REG:
5393 if (REGNO (x) < 16)
5394 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5395 << (REGNO (x) + is_dest));
5396 return 0;
5397 case SUBREG:
5398 {
5399 rtx y = SUBREG_REG (x);
5400
5401 if (!REG_P (y))
5402 break;
5403 if (REGNO (y) < 16)
5404 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5405 << (REGNO (y) +
5406 subreg_regno_offset (REGNO (y),
5407 GET_MODE (y),
5408 SUBREG_BYTE (x),
5409 GET_MODE (x)) + is_dest));
5410 return 0;
5411 }
5412 case SET:
5413 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5414 case RETURN:
5415 /* If there was a return value, it must have been indicated with USE. */
5416 return 0x00ffff00;
5417 case CLOBBER:
5418 is_dest = 1;
5419 break;
5420 case MEM:
5421 is_dest = 0;
5422 break;
5423 case CALL:
5424 used |= 0x00ff00f0;
5425 break;
5426 default:
5427 break;
5428 }
5429
5430 fmt = GET_RTX_FORMAT (code);
5431
5432 for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5433 {
5434 if (fmt[i] == 'E')
5435 {
5436 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
5437 used |= regs_used (XVECEXP (x, i, j), is_dest);
5438 }
5439 else if (fmt[i] == 'e')
5440 used |= regs_used (XEXP (x, i), is_dest);
5441 }
5442 return used;
5443 }
5444
5445 /* Create an instruction that prevents redirection of a conditional branch
5446 to the destination of the JUMP with address ADDR.
5447 If the branch needs to be implemented as an indirect jump, try to find
5448 a scratch register for it.
5449 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5450 If any preceding insn that doesn't fit into a delay slot is good enough,
5451 pass 1. Pass 2 if a definite blocking insn is needed.
5452 -1 is used internally to avoid deep recursion.
5453 If a blocking instruction is made or recognized, return it. */
5454 static rtx_insn *
gen_block_redirect(rtx_insn * jump,int addr,int need_block)5455 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5456 {
5457 int dead = 0;
5458 rtx_insn *prev = prev_nonnote_insn (jump);
5459
5460 /* First, check if we already have an instruction that satisfies our need. */
5461 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5462 {
5463 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5464 return prev;
5465 if (GET_CODE (PATTERN (prev)) == USE
5466 || GET_CODE (PATTERN (prev)) == CLOBBER
5467 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5468 prev = jump;
5469 else if ((need_block &= ~1) < 0)
5470 return prev;
5471 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5472 need_block = 0;
5473 }
5474 if (GET_CODE (PATTERN (jump)) == RETURN)
5475 {
5476 if (! need_block)
5477 return prev;
5478 /* Reorg even does nasty things with return insns that cause branches
5479 to go out of range - see find_end_label and callers. */
5480 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5481 }
5482 /* We can't use JUMP_LABEL here because it might be undefined
5483 when not optimizing. */
5484 rtx dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5485 /* If the branch is out of range, try to find a scratch register for it. */
5486 if (optimize
5487 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5488 > 4092 + 4098))
5489 {
5490 rtx_insn *scan;
5491 /* Don't look for the stack pointer as a scratch register,
5492 it would cause trouble if an interrupt occurred. */
5493 unsigned attempt = 0x7fff, used;
5494 int jump_left = flag_expensive_optimizations + 1;
5495
5496 /* It is likely that the most recent eligible instruction is wanted for
5497 the delay slot. Therefore, find out which registers it uses, and
5498 try to avoid using them. */
5499
5500 for (scan = jump; (scan = PREV_INSN (scan)); )
5501 {
5502 if (scan->deleted ())
5503 continue;
5504 rtx_code code = GET_CODE (scan);
5505 if (code == CODE_LABEL || code == JUMP_INSN)
5506 break;
5507 if (code == INSN
5508 && GET_CODE (PATTERN (scan)) != USE
5509 && GET_CODE (PATTERN (scan)) != CLOBBER
5510 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5511 {
5512 attempt &= ~regs_used (PATTERN (scan), 0);
5513 break;
5514 }
5515 }
5516 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5517 (scan = NEXT_INSN (scan)); )
5518 {
5519 if (scan->deleted ())
5520 continue;
5521 rtx_code code = GET_CODE (scan);
5522 if (INSN_P (scan))
5523 {
5524 used |= regs_used (PATTERN (scan), 0);
5525 if (code == CALL_INSN)
5526 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5527 dead |= (used >> 16) & ~used;
5528 if (dead & attempt)
5529 {
5530 dead &= attempt;
5531 break;
5532 }
5533 if (code == JUMP_INSN)
5534 {
5535 if (jump_left-- && simplejump_p (scan))
5536 scan = JUMP_LABEL_AS_INSN (scan);
5537 else
5538 break;
5539 }
5540 }
5541 }
5542 /* Mask out the stack pointer again, in case it was
5543 the only 'free' register we have found. */
5544 dead &= 0x7fff;
5545 }
5546 /* If the immediate destination is still in range, check for possible
5547 threading with a jump beyond the delay slot insn.
5548 Don't check if we are called recursively; the jump has been or will be
5549 checked in a different invocation then. */
5550
5551 else if (optimize && need_block >= 0)
5552 {
5553 rtx_insn *next = next_active_insn (as_a<rtx_insn *> (dest));
5554 next = next_active_insn (next);
5555 if (next && JUMP_P (next)
5556 && GET_CODE (PATTERN (next)) == SET
5557 && recog_memoized (next) == CODE_FOR_jump_compact)
5558 {
5559 dest = JUMP_LABEL (next);
5560 if (dest
5561 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5562 > 4092 + 4098))
5563 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5564 }
5565 }
5566
5567 if (dead)
5568 {
5569 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5570
5571 /* It would be nice if we could convert the jump into an indirect
5572 jump / far branch right now, and thus exposing all constituent
5573 instructions to further optimization. However, reorg uses
5574 simplejump_p to determine if there is an unconditional jump where
5575 it should try to schedule instructions from the target of the
5576 branch; simplejump_p fails for indirect jumps even if they have
5577 a JUMP_LABEL. */
5578 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5579 (reg, GEN_INT (unspec_bbr_uid++)),
5580 jump);
5581 /* ??? We would like this to have the scope of the jump, but that
5582 scope will change when a delay slot insn of an inner scope is added.
5583 Hence, after delay slot scheduling, we'll have to expect
5584 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5585 the jump. */
5586
5587 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5588 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5589 return insn;
5590 }
5591 else if (need_block)
5592 /* We can't use JUMP_LABEL here because it might be undefined
5593 when not optimizing. */
5594 return emit_insn_before (gen_block_branch_redirect
5595 (GEN_INT (unspec_bbr_uid++)),
5596 jump);
5597 return prev;
5598 }
5599
5600 #define CONDJUMP_MIN -252
5601 #define CONDJUMP_MAX 262
5602 struct far_branch
5603 {
5604 /* A label (to be placed) in front of the jump
5605 that jumps to our ultimate destination. */
5606 rtx_insn *near_label;
5607 /* Where we are going to insert it if we cannot move the jump any farther,
5608 or the jump itself if we have picked up an existing jump. */
5609 rtx_insn *insert_place;
5610 /* The ultimate destination. */
5611 rtx_insn *far_label;
5612 struct far_branch *prev;
5613 /* If the branch has already been created, its address;
5614 else the address of its first prospective user. */
5615 int address;
5616 };
5617
5618 enum mdep_reorg_phase_e mdep_reorg_phase;
5619
5620 static void
gen_far_branch(struct far_branch * bp)5621 gen_far_branch (struct far_branch *bp)
5622 {
5623 rtx_insn *insn = bp->insert_place;
5624 rtx_jump_insn *jump;
5625 rtx_code_label *label = gen_label_rtx ();
5626
5627 emit_label_after (label, insn);
5628 if (bp->far_label)
5629 {
5630 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5631 LABEL_NUSES (bp->far_label)++;
5632 }
5633 else
5634 jump = emit_jump_insn_after (gen_return (), insn);
5635
5636 /* Emit a barrier so that reorg knows that any following instructions
5637 are not reachable via a fall-through path.
5638 But don't do this when not optimizing, since we wouldn't suppress the
5639 alignment for the barrier then, and could end up with out-of-range
5640 pc-relative loads. */
5641 if (optimize)
5642 emit_barrier_after (jump);
5643 emit_label_after (bp->near_label, insn);
5644
5645 if (bp->far_label)
5646 JUMP_LABEL (jump) = bp->far_label;
5647 else
5648 {
5649 rtx pat = PATTERN (jump);
5650 gcc_assert (ANY_RETURN_P (pat));
5651 JUMP_LABEL (jump) = pat;
5652 }
5653
5654 bool ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5655 gcc_assert (ok);
5656
5657 /* If we are branching around a jump (rather than a return), prevent
5658 reorg from using an insn from the jump target as the delay slot insn -
5659 when reorg did this, it pessimized code (we rather hide the delay slot)
5660 and it could cause branches to go out of range. */
5661 if (bp->far_label)
5662 (emit_insn_after
5663 (gen_stuff_delay_slot
5664 (GEN_INT (unspec_bbr_uid++),
5665 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5666 insn));
5667 /* Prevent reorg from undoing our splits. */
5668 gen_block_redirect (jump, bp->address += 2, 2);
5669 }
5670
5671 /* Fix up ADDR_DIFF_VECs. */
5672 void
fixup_addr_diff_vecs(rtx_insn * first)5673 fixup_addr_diff_vecs (rtx_insn *first)
5674 {
5675 rtx_insn *insn;
5676
5677 for (insn = first; insn; insn = NEXT_INSN (insn))
5678 {
5679 rtx vec_lab, pat, prevpat, x, braf_label;
5680 rtx_insn *prev;
5681
5682 if (! JUMP_TABLE_DATA_P (insn)
5683 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5684 continue;
5685 pat = PATTERN (insn);
5686 vec_lab = XEXP (XEXP (pat, 0), 0);
5687
5688 /* Search the matching casesi_jump_2. */
5689 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5690 {
5691 if (!JUMP_P (prev))
5692 continue;
5693 prevpat = PATTERN (prev);
5694 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5695 continue;
5696 x = XVECEXP (prevpat, 0, 1);
5697 if (GET_CODE (x) != USE)
5698 continue;
5699 x = XEXP (x, 0);
5700 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5701 break;
5702 }
5703 /* FIXME: This is a bug in the optimizer, but it seems harmless
5704 to just avoid panicing. */
5705 if (!prev)
5706 continue;
5707
5708 /* Emit the reference label of the braf where it belongs, right after
5709 the casesi_jump_2 (i.e. braf). */
5710 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5711 emit_label_after (braf_label, prev);
5712
5713 /* Fix up the ADDR_DIF_VEC to be relative
5714 to the reference address of the braf. */
5715 XEXP (XEXP (pat, 0), 0) = braf_label;
5716 }
5717 }
5718
5719 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5720 a barrier. Return the base 2 logarithm of the desired alignment. */
5721 int
barrier_align(rtx_insn * barrier_or_label)5722 barrier_align (rtx_insn *barrier_or_label)
5723 {
5724 if (! barrier_or_label)
5725 return 0;
5726
5727 if (LABEL_P (barrier_or_label)
5728 && NEXT_INSN (barrier_or_label)
5729 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5730 return 2;
5731
5732 if (BARRIER_P (barrier_or_label)
5733 && PREV_INSN (barrier_or_label)
5734 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5735 {
5736 rtx pat = PATTERN (PREV_INSN (barrier_or_label));
5737 /* If this is a very small table, we want to keep the alignment after
5738 the table to the minimum for proper code alignment. */
5739 return ((optimize_size
5740 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5741 <= (unsigned) 1 << (CACHE_LOG - 2)))
5742 ? 1 : align_jumps_log);
5743 }
5744
5745 rtx_insn *next = next_active_insn (barrier_or_label);
5746
5747 if (! next)
5748 return 0;
5749
5750 rtx pat = PATTERN (next);
5751
5752 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5753 /* This is a barrier in front of a constant table. */
5754 return 0;
5755
5756 if (optimize_size)
5757 return 0;
5758
5759 if (! TARGET_SH2 || ! optimize)
5760 return align_jumps_log;
5761
5762 /* When fixing up pcloads, a constant table might be inserted just before
5763 the basic block that ends with the barrier. Thus, we can't trust the
5764 instruction lengths before that. */
5765 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5766 {
5767 /* Check if there is an immediately preceding branch to the insn beyond
5768 the barrier. We must weight the cost of discarding useful information
5769 from the current cache line when executing this branch and there is
5770 an alignment, against that of fetching unneeded insn in front of the
5771 branch target when there is no alignment. */
5772
5773 /* There are two delay_slot cases to consider. One is the simple case
5774 where the preceding branch is to the insn beyond the barrier (simple
5775 delay slot filling), and the other is where the preceding branch has
5776 a delay slot that is a duplicate of the insn after the barrier
5777 (fill_eager_delay_slots) and the branch is to the insn after the insn
5778 after the barrier. */
5779
5780 int slot, credit;
5781 bool jump_to_next = false;
5782
5783 /* Skip to the insn before the JUMP_INSN before the barrier under
5784 investigation. */
5785 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5786
5787 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5788 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5789 prev = prev_real_insn (prev))
5790 {
5791 jump_to_next = false;
5792 if (GET_CODE (PATTERN (prev)) == USE
5793 || GET_CODE (PATTERN (prev)) == CLOBBER)
5794 continue;
5795 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5796 {
5797 prev = prev_seq->insn (1);
5798 if (INSN_UID (prev) == INSN_UID (next))
5799 {
5800 /* Delay slot was filled with insn at jump target. */
5801 jump_to_next = true;
5802 continue;
5803 }
5804 }
5805
5806 if (slot &&
5807 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5808 slot = 0;
5809 credit -= get_attr_length (prev);
5810 }
5811 if (prev && jump_to_label_p (prev))
5812 {
5813 rtx_insn *x;
5814 if (jump_to_next
5815 || next_real_insn (JUMP_LABEL (prev)) == next
5816 /* If relax_delay_slots() decides NEXT was redundant
5817 with some previous instruction, it will have
5818 redirected PREV's jump to the following insn. */
5819 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5820 /* There is no upper bound on redundant instructions
5821 that might have been skipped, but we must not put an
5822 alignment where none had been before. */
5823 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5824 (INSN_P (x)
5825 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5826 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5827 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5828 {
5829 rtx pat = PATTERN (prev);
5830 if (GET_CODE (pat) == PARALLEL)
5831 pat = XVECEXP (pat, 0, 0);
5832 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5833 return 0;
5834 }
5835 }
5836 }
5837
5838 return align_jumps_log;
5839 }
5840
5841 /* If we are inside a phony loop, almost any kind of label can turn up as the
5842 first one in the loop. Aligning a braf label causes incorrect switch
5843 destination addresses; we can detect braf labels because they are
5844 followed by a BARRIER.
5845 Applying loop alignment to small constant or switch tables is a waste
5846 of space, so we suppress this too. */
5847 int
sh_loop_align(rtx_insn * label)5848 sh_loop_align (rtx_insn *label)
5849 {
5850 rtx_insn *next = label;
5851
5852 if (! optimize || optimize_size)
5853 return 0;
5854
5855 do
5856 next = next_nonnote_insn (next);
5857 while (next && LABEL_P (next));
5858
5859 if (! next
5860 || ! INSN_P (next)
5861 || recog_memoized (next) == CODE_FOR_consttable_2)
5862 return 0;
5863
5864 return align_loops_log;
5865 }
5866
5867 /* Do a final pass over the function, just before delayed branch
5868 scheduling. */
5869 static void
sh_reorg(void)5870 sh_reorg (void)
5871 {
5872 rtx_insn *first, *insn, *mova = NULL;
5873 int num_mova;
5874 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5875 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5876
5877 first = get_insns ();
5878 max_labelno_before_reorg = max_label_num ();
5879
5880 /* We must split call insns before introducing `mova's. If we're
5881 optimizing, they'll have already been split. Otherwise, make
5882 sure we don't split them too late. */
5883 if (! optimize)
5884 split_all_insns_noflow ();
5885
5886 /* If relaxing, generate pseudo-ops to associate function calls with
5887 the symbols they call. It does no harm to not generate these
5888 pseudo-ops. However, when we can generate them, it enables the
5889 linker to potentially relax the jsr to a bsr, and eliminate the
5890 register load and, possibly, the constant pool entry. */
5891
5892 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5893 if (TARGET_RELAX)
5894 {
5895 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5896 own purposes. This works because none of the remaining passes
5897 need to look at them.
5898
5899 ??? But it may break in the future. We should use a machine
5900 dependent REG_NOTE, or some other approach entirely. */
5901 for (insn = first; insn; insn = NEXT_INSN (insn))
5902 {
5903 if (INSN_P (insn))
5904 {
5905 rtx note;
5906
5907 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5908 NULL_RTX)) != 0)
5909 remove_note (insn, note);
5910 }
5911 }
5912
5913 for (insn = first; insn; insn = NEXT_INSN (insn))
5914 {
5915 rtx pattern, reg, set, dies;
5916 rtx_code_label *label;
5917 rtx_insn *link, *scan;
5918 int rescan = 0, foundinsn = 0;
5919
5920 if (CALL_P (insn))
5921 {
5922 pattern = PATTERN (insn);
5923
5924 if (GET_CODE (pattern) == PARALLEL)
5925 pattern = XVECEXP (pattern, 0, 0);
5926 if (GET_CODE (pattern) == SET)
5927 pattern = SET_SRC (pattern);
5928
5929 if (GET_CODE (pattern) != CALL
5930 || !MEM_P (XEXP (pattern, 0)))
5931 continue;
5932
5933 reg = XEXP (XEXP (pattern, 0), 0);
5934 }
5935 else
5936 {
5937 reg = sfunc_uses_reg (insn);
5938 if (! reg)
5939 continue;
5940 }
5941
5942 if (!REG_P (reg))
5943 continue;
5944
5945 /* Try scanning backward to find where the register is set. */
5946 link = NULL;
5947 for (scan = PREV_INSN (insn);
5948 scan && !LABEL_P (scan);
5949 scan = PREV_INSN (scan))
5950 {
5951 if (! INSN_P (scan))
5952 continue;
5953
5954 if (! reg_mentioned_p (reg, scan))
5955 continue;
5956
5957 if (noncall_uses_reg (reg, scan, &set))
5958 break;
5959
5960 if (set)
5961 {
5962 link = scan;
5963 break;
5964 }
5965 }
5966
5967 if (! link)
5968 continue;
5969
5970 /* The register is set at LINK. */
5971
5972 /* We can only optimize the function call if the register is
5973 being set to a symbol. In theory, we could sometimes
5974 optimize calls to a constant location, but the assembler
5975 and linker do not support that at present. */
5976 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5977 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5978 continue;
5979
5980 /* Scan forward from LINK to the place where REG dies, and
5981 make sure that the only insns which use REG are
5982 themselves function calls. */
5983
5984 /* ??? This doesn't work for call targets that were allocated
5985 by reload, since there may not be a REG_DEAD note for the
5986 register. */
5987
5988 dies = NULL_RTX;
5989 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5990 {
5991 rtx scanset;
5992
5993 /* Don't try to trace forward past a CODE_LABEL if we haven't
5994 seen INSN yet. Ordinarily, we will only find the setting insn
5995 if it is in the same basic block. However,
5996 cross-jumping can insert code labels in between the load and
5997 the call, and can result in situations where a single call
5998 insn may have two targets depending on where we came from. */
5999
6000 if (LABEL_P (scan) && ! foundinsn)
6001 break;
6002
6003 if (! INSN_P (scan))
6004 continue;
6005
6006 /* Don't try to trace forward past a JUMP. To optimize
6007 safely, we would have to check that all the
6008 instructions at the jump destination did not use REG. */
6009
6010 if (JUMP_P (scan))
6011 break;
6012
6013 if (! reg_mentioned_p (reg, scan))
6014 continue;
6015
6016 if (noncall_uses_reg (reg, scan, &scanset))
6017 break;
6018
6019 if (scan == insn)
6020 foundinsn = 1;
6021
6022 if (scan != insn
6023 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6024 {
6025 /* There is a function call to this register other
6026 than the one we are checking. If we optimize
6027 this call, we need to rescan again below. */
6028 rescan = 1;
6029 }
6030
6031 /* ??? We shouldn't have to worry about SCANSET here.
6032 We should just be able to check for a REG_DEAD note
6033 on a function call. However, the REG_DEAD notes are
6034 apparently not dependable around libcalls; c-torture
6035 execute/920501-2 is a test case. If SCANSET is set,
6036 then this insn sets the register, so it must have
6037 died earlier. Unfortunately, this will only handle
6038 the cases in which the register is, in fact, set in a
6039 later insn. */
6040
6041 /* ??? We shouldn't have to use FOUNDINSN here.
6042 This dates back to when we used LOG_LINKS to find
6043 the most recent insn which sets the register. */
6044
6045 if (foundinsn
6046 && (scanset
6047 || find_reg_note (scan, REG_DEAD, reg)))
6048 {
6049 dies = scan;
6050 break;
6051 }
6052 }
6053
6054 if (! dies)
6055 {
6056 /* Either there was a branch, or some insn used REG
6057 other than as a function call address. */
6058 continue;
6059 }
6060
6061 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6062 on the insn which sets the register, and on each call insn
6063 which uses the register. In final_prescan_insn we look for
6064 the REG_LABEL_OPERAND notes, and output the appropriate label
6065 or pseudo-op. */
6066
6067 label = gen_label_rtx ();
6068 add_reg_note (link, REG_LABEL_OPERAND, label);
6069 add_reg_note (insn, REG_LABEL_OPERAND, label);
6070 if (rescan)
6071 {
6072 scan = link;
6073 do
6074 {
6075 rtx reg2;
6076
6077 scan = NEXT_INSN (scan);
6078 if (scan != insn
6079 && ((CALL_P (scan)
6080 && reg_mentioned_p (reg, scan))
6081 || ((reg2 = sfunc_uses_reg (scan))
6082 && REGNO (reg2) == REGNO (reg))))
6083 add_reg_note (scan, REG_LABEL_OPERAND, label);
6084 }
6085 while (scan != dies);
6086 }
6087 }
6088 }
6089
6090 if (TARGET_SH2)
6091 fixup_addr_diff_vecs (first);
6092
6093 if (optimize)
6094 {
6095 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6096 shorten_branches (first);
6097 }
6098
6099 /* Scan the function looking for move instructions which have to be
6100 changed to pc-relative loads and insert the literal tables. */
6101 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6102 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6103 {
6104 if (mova_p (insn))
6105 {
6106 /* ??? basic block reordering can move a switch table dispatch
6107 below the switch table. Check if that has happened.
6108 We only have the addresses available when optimizing; but then,
6109 this check shouldn't be needed when not optimizing. */
6110 if (!untangle_mova (&num_mova, &mova, insn))
6111 {
6112 insn = mova;
6113 num_mova = 0;
6114 }
6115 }
6116 else if (JUMP_TABLE_DATA_P (insn)
6117 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6118 && num_mova
6119 /* ??? loop invariant motion can also move a mova out of a
6120 loop. Since loop does this code motion anyway, maybe we
6121 should wrap UNSPEC_MOVA into a CONST, so that reload can
6122 move it back. */
6123 && ((num_mova > 1
6124 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6125 || (prev_nonnote_insn (insn)
6126 == XEXP (MOVA_LABELREF (mova), 0))))
6127 {
6128 rtx_insn *scan;
6129 int total;
6130
6131 num_mova--;
6132
6133 /* Some code might have been inserted between the mova and
6134 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6135 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6136 total += get_attr_length (scan);
6137
6138 /* range of mova is 1020, add 4 because pc counts from address of
6139 second instruction after this one, subtract 2 in case pc is 2
6140 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6141 cancels out with alignment effects of the mova itself. */
6142 if (total > 1022)
6143 {
6144 /* Change the mova into a load, and restart scanning
6145 there. broken_move will then return true for mova. */
6146 fixup_mova (mova);
6147 insn = mova;
6148 }
6149 }
6150 if (broken_move (insn)
6151 || (NONJUMP_INSN_P (insn)
6152 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6153 {
6154 rtx_insn *scan;
6155 /* Scan ahead looking for a barrier to stick the constant table
6156 behind. */
6157 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6158 rtx_insn *last_float_move = NULL;
6159 rtx last_float = 0, *last_float_addr = NULL;
6160 int need_aligned_label = 0;
6161
6162 if (num_mova && ! mova_p (mova))
6163 {
6164 /* find_barrier had to change the first mova into a
6165 pcload; thus, we have to start with this new pcload. */
6166 insn = mova;
6167 num_mova = 0;
6168 }
6169 /* Now find all the moves between the points and modify them. */
6170 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6171 {
6172 if (LABEL_P (scan))
6173 last_float = 0;
6174 if (NONJUMP_INSN_P (scan)
6175 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6176 need_aligned_label = 1;
6177 if (broken_move (scan))
6178 {
6179 rtx *patp = &PATTERN (scan), pat = *patp;
6180 rtx src, dst;
6181 rtx lab;
6182 rtx newsrc;
6183 machine_mode mode;
6184
6185 if (GET_CODE (pat) == PARALLEL)
6186 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6187 src = SET_SRC (pat);
6188 dst = SET_DEST (pat);
6189 mode = GET_MODE (dst);
6190
6191 if (mode == SImode && satisfies_constraint_I16 (src)
6192 && REGNO (dst) != FPUL_REG)
6193 {
6194 int offset = 0;
6195
6196 mode = HImode;
6197 while (GET_CODE (dst) == SUBREG)
6198 {
6199 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6200 GET_MODE (SUBREG_REG (dst)),
6201 SUBREG_BYTE (dst),
6202 GET_MODE (dst));
6203 dst = SUBREG_REG (dst);
6204 }
6205 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6206 }
6207 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6208 {
6209 /* This must be an insn that clobbers r0. */
6210 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6211 XVECLEN (PATTERN (scan), 0)
6212 - 1);
6213 rtx clobber = *clobberp;
6214
6215 gcc_assert (GET_CODE (clobber) == CLOBBER
6216 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6217
6218 if (last_float
6219 && reg_set_between_p (r0_rtx, last_float_move, scan))
6220 last_float = 0;
6221 lab = add_constant (src, mode, last_float);
6222 if (lab)
6223 emit_insn_before (gen_mova (lab), scan);
6224 else
6225 {
6226 /* There will be a REG_UNUSED note for r0 on
6227 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6228 lest reorg:mark_target_live_regs will not
6229 consider r0 to be used, and we end up with delay
6230 slot insn in front of SCAN that clobbers r0. */
6231 rtx note
6232 = find_regno_note (last_float_move, REG_UNUSED, 0);
6233
6234 /* If we are not optimizing, then there may not be
6235 a note. */
6236 if (note)
6237 PUT_REG_NOTE_KIND (note, REG_INC);
6238
6239 *last_float_addr = r0_inc_rtx;
6240 }
6241 last_float_move = scan;
6242 last_float = src;
6243 newsrc = gen_const_mem (mode,
6244 (((TARGET_SH4 && ! TARGET_FMOVD)
6245 || REGNO (dst) == FPUL_REG)
6246 ? r0_inc_rtx
6247 : r0_rtx));
6248 last_float_addr = &XEXP (newsrc, 0);
6249
6250 /* Remove the clobber of r0. */
6251 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6252 gen_rtx_SCRATCH (Pmode));
6253 }
6254 /* This is a mova needing a label. Create it. */
6255 else if (GET_CODE (src) == UNSPEC
6256 && XINT (src, 1) == UNSPEC_MOVA
6257 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6258 {
6259 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6260 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6261 newsrc = gen_rtx_UNSPEC (SImode,
6262 gen_rtvec (1, newsrc),
6263 UNSPEC_MOVA);
6264 }
6265 else if (GET_CODE (src) == UNSPEC_VOLATILE
6266 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6267 {
6268 newsrc = XVECEXP (src, 0, 0);
6269 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6270 INSN_CODE (scan) = -1;
6271 continue;
6272 }
6273 else
6274 {
6275 lab = add_constant (src, mode, 0);
6276 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6277 newsrc = gen_const_mem (mode, newsrc);
6278 }
6279 *patp = gen_rtx_SET (dst, newsrc);
6280 INSN_CODE (scan) = -1;
6281 }
6282 }
6283 dump_table (need_aligned_label ? insn : 0, barrier);
6284 insn = barrier;
6285 }
6286 }
6287 label_ref_list_d_pool.release ();
6288 for (insn = first; insn; insn = NEXT_INSN (insn))
6289 PUT_MODE (insn, VOIDmode);
6290
6291 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6292 INSN_ADDRESSES_FREE ();
6293 split_branches (first);
6294
6295 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6296 also has an effect on the register that holds the address of the sfunc.
6297 Insert an extra dummy insn in front of each sfunc that pretends to
6298 use this register. */
6299 if (flag_delayed_branch)
6300 {
6301 for (insn = first; insn; insn = NEXT_INSN (insn))
6302 {
6303 rtx reg = sfunc_uses_reg (insn);
6304
6305 if (! reg)
6306 continue;
6307 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6308 }
6309 }
6310 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6311 }
6312
6313 /* Return the UID of the insn that follows the specified label. */
6314 int
get_dest_uid(rtx label,int max_uid)6315 get_dest_uid (rtx label, int max_uid)
6316 {
6317 rtx_insn *dest = next_real_insn (label);
6318
6319 if (! dest)
6320 /* This can happen for an undefined label. */
6321 return 0;
6322 int dest_uid = INSN_UID (dest);
6323 /* If this is a newly created branch redirection blocking instruction,
6324 we cannot index the branch_uid or insn_addresses arrays with its
6325 uid. But then, we won't need to, because the actual destination is
6326 the following branch. */
6327 while (dest_uid >= max_uid)
6328 {
6329 dest = NEXT_INSN (dest);
6330 dest_uid = INSN_UID (dest);
6331 }
6332 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6333 return 0;
6334 return dest_uid;
6335 }
6336
6337 /* Split condbranches that are out of range. Also add clobbers for
6338 scratch registers that are needed in far jumps.
6339 We do this before delay slot scheduling, so that it can take our
6340 newly created instructions into account. It also allows us to
6341 find branches with common targets more easily. */
6342 static void
split_branches(rtx_insn * first)6343 split_branches (rtx_insn *first)
6344 {
6345 rtx_insn *insn;
6346 struct far_branch **uid_branch, *far_branch_list = 0;
6347 int max_uid = get_max_uid ();
6348 int ok;
6349
6350 /* Find out which branches are out of range. */
6351 shorten_branches (first);
6352
6353 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6354 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6355
6356 for (insn = first; insn; insn = NEXT_INSN (insn))
6357 if (! INSN_P (insn))
6358 continue;
6359 else if (insn->deleted ())
6360 {
6361 /* Shorten_branches would split this instruction again,
6362 so transform it into a note. */
6363 SET_INSN_DELETED (insn);
6364 }
6365 else if (JUMP_P (insn))
6366 {
6367 enum attr_type type = get_attr_type (insn);
6368 if (type == TYPE_CBRANCH)
6369 {
6370 rtx_insn *next, *beyond;
6371
6372 if (get_attr_length (insn) > 4)
6373 {
6374 rtx src = SET_SRC (PATTERN (insn));
6375 rtx olabel = XEXP (XEXP (src, 1), 0);
6376 int addr = INSN_ADDRESSES (INSN_UID (insn));
6377 rtx_insn *label = 0;
6378 int dest_uid = get_dest_uid (olabel, max_uid);
6379 struct far_branch *bp = uid_branch[dest_uid];
6380
6381 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6382 the label if the LABEL_NUSES count drops to zero. There is
6383 always a jump_optimize pass that sets these values, but it
6384 proceeds to delete unreferenced code, and then if not
6385 optimizing, to un-delete the deleted instructions, thus
6386 leaving labels with too low uses counts. */
6387 if (! optimize)
6388 {
6389 JUMP_LABEL (insn) = olabel;
6390 LABEL_NUSES (olabel)++;
6391 }
6392 if (! bp)
6393 {
6394 bp = (struct far_branch *) alloca (sizeof *bp);
6395 uid_branch[dest_uid] = bp;
6396 bp->prev = far_branch_list;
6397 far_branch_list = bp;
6398 bp->far_label = as_a <rtx_insn *> (
6399 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6400 0));
6401 LABEL_NUSES (bp->far_label)++;
6402 }
6403 else
6404 {
6405 label = bp->near_label;
6406 if (! label && bp->address - addr >= CONDJUMP_MIN)
6407 {
6408 rtx_insn *block = bp->insert_place;
6409
6410 if (GET_CODE (PATTERN (block)) == RETURN)
6411 block = PREV_INSN (block);
6412 else
6413 block = gen_block_redirect (block,
6414 bp->address, 2);
6415 label = emit_label_after (gen_label_rtx (),
6416 PREV_INSN (block));
6417 bp->near_label = label;
6418 }
6419 else if (label && ! NEXT_INSN (label))
6420 {
6421 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6422 bp->insert_place = insn;
6423 else
6424 gen_far_branch (bp);
6425 }
6426 }
6427 if (! label
6428 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6429 {
6430 bp->near_label = label = gen_label_rtx ();
6431 bp->insert_place = insn;
6432 bp->address = addr;
6433 }
6434 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6435 gcc_assert (ok);
6436 }
6437 else
6438 {
6439 /* get_attr_length (insn) == 2 */
6440 /* Check if we have a pattern where reorg wants to redirect
6441 the branch to a label from an unconditional branch that
6442 is too far away. */
6443 /* We can't use JUMP_LABEL here because it might be undefined
6444 when not optimizing. */
6445 /* A syntax error might cause beyond to be NULL_RTX. */
6446 rtx temp = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6447 beyond = next_active_insn (as_a<rtx_insn *> (temp));
6448
6449 if (beyond
6450 && (JUMP_P (beyond)
6451 || ((beyond = next_active_insn (beyond))
6452 && JUMP_P (beyond)))
6453 && GET_CODE (PATTERN (beyond)) == SET
6454 && recog_memoized (beyond) == CODE_FOR_jump_compact
6455 && ((INSN_ADDRESSES
6456 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6457 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6458 > 252 + 258 + 2))
6459 gen_block_redirect (beyond,
6460 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6461 }
6462
6463 next = next_active_insn (insn);
6464
6465 if (next
6466 && (JUMP_P (next)
6467 || ((next = next_active_insn (next))
6468 && JUMP_P (next)))
6469 && GET_CODE (PATTERN (next)) == SET
6470 && recog_memoized (next) == CODE_FOR_jump_compact
6471 && ((INSN_ADDRESSES
6472 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6473 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6474 > 252 + 258 + 2))
6475 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6476 }
6477 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6478 {
6479 int addr = INSN_ADDRESSES (INSN_UID (insn));
6480 rtx_insn *far_label = 0;
6481 int dest_uid = 0;
6482 struct far_branch *bp;
6483
6484 if (type == TYPE_JUMP)
6485 {
6486 if (CROSSING_JUMP_P (insn))
6487 {
6488 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6489 insn);
6490 continue;
6491 }
6492
6493 far_label = as_a <rtx_insn *> (
6494 XEXP (SET_SRC (PATTERN (insn)), 0));
6495 dest_uid = get_dest_uid (far_label, max_uid);
6496 if (! dest_uid)
6497 {
6498 /* Parse errors can lead to labels outside
6499 the insn stream. */
6500 if (! NEXT_INSN (far_label))
6501 continue;
6502
6503 if (! optimize)
6504 {
6505 JUMP_LABEL (insn) = far_label;
6506 LABEL_NUSES (far_label)++;
6507 }
6508 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6509 far_label = 0;
6510 }
6511 }
6512 bp = uid_branch[dest_uid];
6513 if (! bp)
6514 {
6515 bp = (struct far_branch *) alloca (sizeof *bp);
6516 uid_branch[dest_uid] = bp;
6517 bp->prev = far_branch_list;
6518 far_branch_list = bp;
6519 bp->near_label = 0;
6520 bp->far_label = far_label;
6521 if (far_label)
6522 LABEL_NUSES (far_label)++;
6523 }
6524 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6525 if (addr - bp->address <= CONDJUMP_MAX)
6526 emit_label_after (bp->near_label, PREV_INSN (insn));
6527 else
6528 {
6529 gen_far_branch (bp);
6530 bp->near_label = 0;
6531 }
6532 else
6533 bp->near_label = 0;
6534 bp->address = addr;
6535 bp->insert_place = insn;
6536 if (! far_label)
6537 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6538 else
6539 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6540 }
6541 }
6542 /* Generate all pending far branches,
6543 and free our references to the far labels. */
6544 while (far_branch_list)
6545 {
6546 if (far_branch_list->near_label
6547 && ! NEXT_INSN (far_branch_list->near_label))
6548 gen_far_branch (far_branch_list);
6549 if (optimize
6550 && far_branch_list->far_label
6551 && ! --LABEL_NUSES (far_branch_list->far_label))
6552 delete_insn (far_branch_list->far_label);
6553 far_branch_list = far_branch_list->prev;
6554 }
6555
6556 /* Instruction length information is no longer valid due to the new
6557 instructions that have been generated. */
6558 init_insn_lengths ();
6559 }
6560
6561 /* Dump out instruction addresses, which is useful for debugging the
6562 constant pool table stuff.
6563
6564 If relaxing, output the label and pseudo-ops used to link together
6565 calls and the instruction which set the registers.
6566
6567 ??? The addresses printed by this routine for insns are nonsense for
6568 insns which are inside of a sequence where none of the inner insns have
6569 variable length. This is because the second pass of shorten_branches
6570 does not bother to update them. */
6571 void
final_prescan_insn(rtx_insn * insn,rtx * opvec ATTRIBUTE_UNUSED,int noperands ATTRIBUTE_UNUSED)6572 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6573 int noperands ATTRIBUTE_UNUSED)
6574 {
6575 if (TARGET_DUMPISIZE)
6576 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6577
6578 if (TARGET_RELAX)
6579 {
6580 if (rtx note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX))
6581 {
6582 rtx pattern = PATTERN (insn);
6583 if (GET_CODE (pattern) == PARALLEL)
6584 pattern = XVECEXP (pattern, 0, 0);
6585 switch (GET_CODE (pattern))
6586 {
6587 case SET:
6588 if (GET_CODE (SET_SRC (pattern)) != CALL
6589 && get_attr_type (insn) != TYPE_SFUNC)
6590 {
6591 targetm.asm_out.internal_label
6592 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6593 break;
6594 }
6595 /* FALLTHROUGH */
6596 case CALL:
6597 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6598 CODE_LABEL_NUMBER (XEXP (note, 0)));
6599 break;
6600
6601 default:
6602 gcc_unreachable ();
6603 }
6604 }
6605 }
6606 }
6607
6608 /* Dump out any constants accumulated in the final pass. These will
6609 only be labels. */
6610 const char *
output_jump_label_table(void)6611 output_jump_label_table (void)
6612 {
6613 if (pool_size)
6614 {
6615 fprintf (asm_out_file, "\t.align 2\n");
6616 for (int i = 0; i < pool_size; i++)
6617 {
6618 pool_node *p = &pool_vector[i];
6619
6620 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6621 CODE_LABEL_NUMBER (p->label));
6622 output_asm_insn (".long %O0", &p->value);
6623 }
6624 pool_size = 0;
6625 }
6626
6627 return "";
6628 }
6629
6630 /* A full frame looks like:
6631
6632 arg-5
6633 arg-4
6634 [ if current_function_anonymous_args
6635 arg-3
6636 arg-2
6637 arg-1
6638 arg-0 ]
6639 saved-fp
6640 saved-r10
6641 saved-r11
6642 saved-r12
6643 saved-pr
6644 local-n
6645 ..
6646 local-1
6647 local-0 <- fp points here.
6648
6649 Number of bytes pushed for anonymous args, used to pass information
6650 between expand_prologue and expand_epilogue.
6651
6652 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6653 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6654 for an epilogue and a negative value means that it's for a sibcall
6655 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6656 all the registers that are about to be restored, and hence dead. */
6657 static void
output_stack_adjust(int size,rtx reg,int epilogue_p,HARD_REG_SET * live_regs_mask,bool frame_p)6658 output_stack_adjust (int size, rtx reg, int epilogue_p,
6659 HARD_REG_SET *live_regs_mask, bool frame_p)
6660 {
6661 rtx_insn *(*emit_fn) (rtx) = frame_p ? &emit_frame_insn : &emit_insn;
6662 if (size)
6663 {
6664 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6665
6666 /* This test is bogus, as output_stack_adjust is used to re-align the
6667 stack. */
6668 #if 0
6669 gcc_assert (!(size % align));
6670 #endif
6671
6672 if (CONST_OK_FOR_ADD (size))
6673 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6674 /* Try to do it with two partial adjustments; however, we must make
6675 sure that the stack is properly aligned at all times, in case
6676 an interrupt occurs between the two partial adjustments. */
6677 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6678 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6679 {
6680 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6681 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6682 }
6683 else
6684 {
6685 rtx const_reg;
6686 rtx insn;
6687 int temp = epilogue_p ? 7 : 1;
6688 int i;
6689
6690 /* If TEMP is invalid, we could temporarily save a general
6691 register to MACL. However, there is currently no need
6692 to handle this case, so just die when we see it. */
6693 if (epilogue_p < 0
6694 || current_function_interrupt
6695 || ! call_really_used_regs[temp] || fixed_regs[temp])
6696 temp = -1;
6697 if (temp < 0 && ! current_function_interrupt && epilogue_p >= 0)
6698 {
6699 HARD_REG_SET temps;
6700 COPY_HARD_REG_SET (temps, call_used_reg_set);
6701 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6702 if (epilogue_p > 0)
6703 {
6704 int nreg = 0;
6705 if (crtl->return_rtx)
6706 {
6707 machine_mode mode;
6708 mode = GET_MODE (crtl->return_rtx);
6709 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6710 nreg = hard_regno_nregs (FIRST_RET_REG, mode);
6711 }
6712 for (i = 0; i < nreg; i++)
6713 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6714 if (crtl->calls_eh_return)
6715 {
6716 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6717 for (i = 0; i <= 3; i++)
6718 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6719 }
6720 }
6721 if (epilogue_p <= 0)
6722 {
6723 for (i = FIRST_PARM_REG;
6724 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6725 CLEAR_HARD_REG_BIT (temps, i);
6726 if (cfun->static_chain_decl != NULL)
6727 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6728 }
6729 temp = scavenge_reg (&temps);
6730 }
6731 if (temp < 0 && live_regs_mask)
6732 {
6733 HARD_REG_SET temps;
6734
6735 COPY_HARD_REG_SET (temps, *live_regs_mask);
6736 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6737 temp = scavenge_reg (&temps);
6738 }
6739 if (temp < 0)
6740 {
6741 rtx adj_reg, tmp_reg, mem;
6742
6743 /* If we reached here, the most likely case is the (sibcall)
6744 epilogue. Put a special push/pop sequence for such case as
6745 the last resort. This looks lengthy but would not be problem
6746 because it seems to be very rare. */
6747 gcc_assert (epilogue_p);
6748
6749 /* ??? There is still the slight possibility that r4 or
6750 r5 have been reserved as fixed registers or assigned
6751 as global registers, and they change during an
6752 interrupt. There are possible ways to handle this:
6753
6754 - If we are adjusting the frame pointer (r14), we can do
6755 with a single temp register and an ordinary push / pop
6756 on the stack.
6757 - Grab any call-used or call-saved registers (i.e. not
6758 fixed or globals) for the temps we need. We might
6759 also grab r14 if we are adjusting the stack pointer.
6760 If we can't find enough available registers, issue
6761 a diagnostic and die - the user must have reserved
6762 way too many registers.
6763 But since all this is rather unlikely to happen and
6764 would require extra testing, we just die if r4 / r5
6765 are not available. */
6766 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6767 && !global_regs[4] && !global_regs[5]);
6768
6769 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6770 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6771 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6772 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6773 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6774 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6775 emit_move_insn (mem, tmp_reg);
6776 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6777 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6778 emit_move_insn (mem, tmp_reg);
6779 emit_move_insn (reg, adj_reg);
6780 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6781 emit_move_insn (adj_reg, mem);
6782 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6783 emit_move_insn (tmp_reg, mem);
6784 /* Tell flow the insns that pop r4/r5 aren't dead. */
6785 emit_use (tmp_reg);
6786 emit_use (adj_reg);
6787 return;
6788 }
6789 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6790
6791 /* If SIZE is negative, subtract the positive value.
6792 This sometimes allows a constant pool entry to be shared
6793 between prologue and epilogue code. */
6794 if (size < 0)
6795 {
6796 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6797 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6798 }
6799 else
6800 {
6801 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6802 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6803 }
6804 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6805 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
6806 GEN_INT (size))));
6807 }
6808 }
6809 }
6810
6811 /* Emit the specified insn and mark it as frame related. */
6812 static rtx_insn *
emit_frame_insn(rtx x)6813 emit_frame_insn (rtx x)
6814 {
6815 rtx_insn *insn = emit_insn (x);
6816 RTX_FRAME_RELATED_P (insn) = 1;
6817 return insn;
6818 }
6819
6820 /* Output RTL to push register RN onto the stack. */
6821 static rtx
push(int rn)6822 push (int rn)
6823 {
6824 rtx x;
6825 if (rn == FPUL_REG)
6826 x = gen_push_fpul ();
6827 else if (rn == FPSCR_REG)
6828 x = gen_push_fpscr ();
6829 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6830 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6831 {
6832 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6833 return NULL_RTX;
6834 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6835 }
6836 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6837 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6838 else
6839 x = gen_push (gen_rtx_REG (SImode, rn));
6840
6841 x = emit_frame_insn (x);
6842 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6843 return x;
6844 }
6845
6846 /* Output RTL to pop register RN from the stack. */
6847 static void
pop(int rn)6848 pop (int rn)
6849 {
6850 rtx x, sp_reg, reg;
6851 if (rn == FPUL_REG)
6852 x = gen_pop_fpul ();
6853 else if (rn == FPSCR_REG)
6854 x = gen_pop_fpscr ();
6855 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6856 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6857 {
6858 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6859 return;
6860 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6861 }
6862 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6863 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6864 else
6865 x = gen_pop (gen_rtx_REG (SImode, rn));
6866
6867 x = emit_insn (x);
6868
6869 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6870 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6871 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6872 : SET_DEST (PATTERN (x)));
6873 add_reg_note (x, REG_CFA_RESTORE, reg);
6874 add_reg_note (x, REG_CFA_ADJUST_CFA,
6875 gen_rtx_SET (sp_reg,
6876 plus_constant (SImode, sp_reg,
6877 GET_MODE_SIZE (GET_MODE (reg)))));
6878 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6879 RTX_FRAME_RELATED_P (x) = 1;
6880 }
6881
6882 /* Generate code to push the regs specified in the mask. */
6883 static void
push_regs(HARD_REG_SET * mask,bool interrupt_handler)6884 push_regs (HARD_REG_SET *mask, bool interrupt_handler)
6885 {
6886 bool skip_fpscr = false;
6887
6888 /* Push PR last; this gives better latencies after the prologue, and
6889 candidates for the return delay slot when there are no general
6890 registers pushed. */
6891 for (int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6892 i < FIRST_PSEUDO_REGISTER; i++)
6893 {
6894 /* If this is an interrupt handler, and the SZ bit varies,
6895 and we have to push any floating point register, we need
6896 to switch to the correct precision first. */
6897 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6898 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6899 {
6900 HARD_REG_SET unsaved;
6901
6902 push (FPSCR_REG);
6903 COMPL_HARD_REG_SET (unsaved, *mask);
6904 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6905 skip_fpscr = true;
6906 }
6907 if (i != PR_REG
6908 && (i != FPSCR_REG || ! skip_fpscr)
6909 && TEST_HARD_REG_BIT (*mask, i))
6910 {
6911 /* If the ISR has RESBANK attribute assigned, don't push any of
6912 the following registers - R0-R14, MACH, MACL and GBR. */
6913 if (! (sh_cfun_resbank_handler_p ()
6914 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6915 || i == MACH_REG
6916 || i == MACL_REG
6917 || i == GBR_REG)))
6918 push (i);
6919 }
6920 }
6921
6922 /* Push banked registers last to improve delay slot opportunities. */
6923 if (interrupt_handler)
6924 {
6925 bool use_movml = false;
6926
6927 if (TARGET_SH2A)
6928 {
6929 unsigned int count = 0;
6930
6931 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6932 if (TEST_HARD_REG_BIT (*mask, i))
6933 count++;
6934 else
6935 break;
6936
6937 /* Use movml when all banked registers are pushed. */
6938 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6939 use_movml = true;
6940 }
6941
6942 if (sh_cfun_resbank_handler_p ())
6943 ; /* Do nothing. */
6944 else if (use_movml)
6945 {
6946 rtx x, mem, reg, set;
6947 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6948
6949 /* We must avoid scheduling multiple store insn with another
6950 insns. */
6951 emit_insn (gen_blockage ());
6952 x = gen_movml_push_banked (sp_reg);
6953 x = emit_frame_insn (x);
6954 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6955 {
6956 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
6957 reg = gen_rtx_REG (SImode, i);
6958 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6959 }
6960
6961 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
6962 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6963 emit_insn (gen_blockage ());
6964 }
6965 else
6966 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6967 if (TEST_HARD_REG_BIT (*mask, i))
6968 push (i);
6969 }
6970
6971 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6972 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6973 push (PR_REG);
6974 }
6975
6976 /* Work out the registers which need to be saved, both as a mask and a
6977 count of saved words. Return the count.
6978
6979 If doing a pragma interrupt function, then push all regs used by the
6980 function, and if we call another function (we can tell by looking at PR),
6981 make sure that all the regs it clobbers are safe too. */
6982 static int
calc_live_regs(HARD_REG_SET * live_regs_mask)6983 calc_live_regs (HARD_REG_SET *live_regs_mask)
6984 {
6985 unsigned int reg;
6986 tree attrs;
6987 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6988 bool nosave_low_regs;
6989
6990 attrs = DECL_ATTRIBUTES (current_function_decl);
6991 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6992 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6993 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6994 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6995
6996 CLEAR_HARD_REG_SET (*live_regs_mask);
6997 if (TARGET_FPU_DOUBLE && TARGET_FMOVD && interrupt_handler
6998 && df_regs_ever_live_p (FPSCR_REG))
6999 target_flags &= ~MASK_FPU_SINGLE;
7000 /* If we can save a lot of saves by switching to double mode, do that. */
7001 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD && TARGET_FPU_SINGLE)
7002 for (int count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7003 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7004 && (! call_really_used_regs[reg]
7005 || interrupt_handler)
7006 && ++count > 2)
7007 {
7008 target_flags &= ~MASK_FPU_SINGLE;
7009 break;
7010 }
7011
7012
7013 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7014 bool pr_live = (pr_initial
7015 ? (!REG_P (pr_initial)
7016 || REGNO (pr_initial) != (PR_REG))
7017 : df_regs_ever_live_p (PR_REG));
7018 /* For Shcompact, if not optimizing, we end up with a memory reference
7019 using the return address pointer for __builtin_return_address even
7020 though there is no actual need to put the PR register on the stack. */
7021 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7022
7023 /* Force PR to be live if the prologue has to call the SHmedia
7024 argument decoder or register saver. */
7025 bool has_call = pr_live;
7026
7027 int count;
7028 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7029 {
7030 if (reg == PR_REG
7031 ? pr_live
7032 : interrupt_handler
7033 ? (/* Need to save all the regs ever live. */
7034 (df_regs_ever_live_p (reg)
7035 || (call_really_used_regs[reg]
7036 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7037 || reg == PIC_OFFSET_TABLE_REGNUM)
7038 && has_call))
7039 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7040 && reg != RETURN_ADDRESS_POINTER_REGNUM
7041 && reg != T_REG && reg != GBR_REG
7042 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7043 /* Push fpscr only on targets which have FPU */
7044 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7045 : (/* Only push those regs which are used and need to be saved. */
7046 (false)
7047 || (df_regs_ever_live_p (reg)
7048 && ((!call_really_used_regs[reg]
7049 && !(reg != PIC_OFFSET_TABLE_REGNUM
7050 && fixed_regs[reg] && call_used_regs[reg]))
7051 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7052 || (crtl->calls_eh_return
7053 && (reg == EH_RETURN_DATA_REGNO (0)
7054 || reg == EH_RETURN_DATA_REGNO (1)
7055 || reg == EH_RETURN_DATA_REGNO (2)
7056 || reg == EH_RETURN_DATA_REGNO (3)))
7057 || ((reg == MACL_REG || reg == MACH_REG)
7058 && df_regs_ever_live_p (reg)
7059 && sh_cfun_attr_renesas_p ())
7060 ))
7061 {
7062 SET_HARD_REG_BIT (*live_regs_mask, reg);
7063 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7064
7065 if (TARGET_FPU_DOUBLE && TARGET_FMOVD
7066 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7067 {
7068 if (FP_REGISTER_P (reg))
7069 {
7070 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7071 {
7072 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7073 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7074 }
7075 }
7076 else if (XD_REGISTER_P (reg))
7077 {
7078 /* Must switch to double mode to access these registers. */
7079 target_flags &= ~MASK_FPU_SINGLE;
7080 }
7081 }
7082 }
7083 if (nosave_low_regs && reg == R8_REG)
7084 break;
7085 }
7086
7087 return count;
7088 }
7089
7090 /* Code to generate prologue and epilogue sequences */
7091
7092 /* PUSHED is the number of bytes that are being pushed on the
7093 stack for register saves. Return the frame size, padded
7094 appropriately so that the stack stays properly aligned. */
7095 static HOST_WIDE_INT
rounded_frame_size(int pushed)7096 rounded_frame_size (int pushed)
7097 {
7098 HOST_WIDE_INT size = get_frame_size ();
7099 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7100
7101 if (ACCUMULATE_OUTGOING_ARGS)
7102 size += crtl->outgoing_args_size;
7103
7104 return ((size + pushed + align - 1) & -align) - pushed;
7105 }
7106
7107 /* Expand code for the function prologue. */
7108 void
sh_expand_prologue(void)7109 sh_expand_prologue (void)
7110 {
7111 int save_flags = target_flags;
7112 tree sp_switch_attr
7113 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7114
7115 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7116
7117 /* We have pretend args if we had an object sent partially in registers
7118 and partially on the stack, e.g. a large structure. */
7119 int pretend_args = crtl->args.pretend_args_size;
7120 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7121 && (NPARM_REGS(SImode)
7122 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7123 pretend_args = 0;
7124
7125 output_stack_adjust (-pretend_args, stack_pointer_rtx, 0, NULL, true);
7126 int stack_usage = pretend_args;
7127
7128 /* Emit the code for SETUP_VARARGS. */
7129 if (cfun->stdarg)
7130 {
7131 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7132 {
7133 /* Push arg regs as if they'd been provided by caller in stack. */
7134 for (int i = 0; i < NPARM_REGS(SImode); i++)
7135 {
7136 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7137
7138 if (i >= (NPARM_REGS(SImode)
7139 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7140 ))
7141 break;
7142 push (rn);
7143 stack_usage += GET_MODE_SIZE (SImode);
7144 }
7145 }
7146 }
7147
7148 /* If we're supposed to switch stacks at function entry, do so now. */
7149 if (sp_switch_attr)
7150 {
7151 rtx lab, newsrc;
7152 /* The argument specifies a variable holding the address of the
7153 stack the interrupt function should switch to/from at entry/exit. */
7154 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7155 const char* s = ggc_strdup (TREE_STRING_POINTER (arg));
7156 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7157
7158 lab = add_constant (sp_switch, SImode, 0);
7159 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7160
7161 emit_insn (gen_sp_switch_1 (newsrc));
7162 }
7163
7164 HARD_REG_SET live_regs_mask;
7165 int d = calc_live_regs (&live_regs_mask);
7166 /* ??? Maybe we could save some switching if we can move a mode switch
7167 that already happens to be at the function start into the prologue. */
7168 if (target_flags != save_flags && ! current_function_interrupt)
7169 emit_insn (gen_toggle_sz ());
7170
7171 push_regs (&live_regs_mask, current_function_interrupt);
7172 stack_usage += d;
7173
7174 if (flag_pic && !TARGET_FDPIC
7175 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7176 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7177
7178 if (target_flags != save_flags && ! current_function_interrupt)
7179 emit_insn (gen_toggle_sz ());
7180
7181 target_flags = save_flags;
7182
7183 output_stack_adjust (-rounded_frame_size (d),
7184 stack_pointer_rtx, 0, NULL, true);
7185 stack_usage += rounded_frame_size (d);
7186
7187 if (frame_pointer_needed)
7188 emit_frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7189
7190 /* If we are profiling, make sure no instructions are scheduled before
7191 the call to mcount. Similarly if some call instructions are swapped
7192 before frame related insns, it'll confuse the unwinder because
7193 currently SH has no unwind info for function epilogues. */
7194 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7195 emit_insn (gen_blockage ());
7196
7197 if (flag_stack_usage_info)
7198 current_function_static_stack_size = stack_usage;
7199 }
7200
7201 /* Expand code for the function epilogue. */
7202 void
sh_expand_epilogue(bool sibcall_p)7203 sh_expand_epilogue (bool sibcall_p)
7204 {
7205 int save_flags = target_flags;
7206 bool fpscr_deferred = false;
7207 int e = sibcall_p ? -1 : 1;
7208
7209 HARD_REG_SET live_regs_mask;
7210 int d = calc_live_regs (&live_regs_mask);
7211
7212 int save_size = d;
7213 int frame_size = rounded_frame_size (d);
7214
7215 if (frame_pointer_needed)
7216 {
7217 /* We must avoid scheduling the epilogue with previous basic blocks.
7218 See PR/18032 and PR/40313. */
7219 emit_insn (gen_blockage ());
7220 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7221 &live_regs_mask, true);
7222
7223 /* We must avoid moving the stack pointer adjustment past code
7224 which reads from the local frame, else an interrupt could
7225 occur after the SP adjustment and clobber data in the local
7226 frame. */
7227 emit_insn (gen_blockage ());
7228 emit_frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7229 }
7230 else if (frame_size)
7231 {
7232 /* We must avoid moving the stack pointer adjustment past code
7233 which reads from the local frame, else an interrupt could
7234 occur after the SP adjustment and clobber data in the local
7235 frame. */
7236 emit_insn (gen_blockage ());
7237 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7238 &live_regs_mask, true);
7239 }
7240
7241 /* Pop all the registers. */
7242
7243 if (target_flags != save_flags && ! current_function_interrupt)
7244 emit_insn (gen_toggle_sz ());
7245
7246 {
7247 int last_reg;
7248
7249 save_size = 0;
7250 /* For an ISR with RESBANK attribute assigned, don't pop PR
7251 register. */
7252 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7253 && !sh_cfun_resbank_handler_p ())
7254 {
7255 if (!frame_pointer_needed)
7256 emit_insn (gen_blockage ());
7257 pop (PR_REG);
7258 }
7259
7260 /* Banked registers are popped first to avoid being scheduled in the
7261 delay slot. RTE switches banks before the ds instruction. */
7262 if (current_function_interrupt)
7263 {
7264 bool use_movml = false;
7265
7266 if (TARGET_SH2A)
7267 {
7268 unsigned int count = 0;
7269
7270 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7271 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7272 count++;
7273 else
7274 break;
7275
7276 /* Use movml when all banked register are poped. */
7277 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7278 use_movml = true;
7279 }
7280
7281 if (sh_cfun_resbank_handler_p ())
7282 ; /* Do nothing. */
7283 else if (use_movml)
7284 {
7285 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7286
7287 /* We must avoid scheduling multiple load insn with another
7288 insns. */
7289 emit_insn (gen_blockage ());
7290 emit_insn (gen_movml_pop_banked (sp_reg));
7291 emit_insn (gen_blockage ());
7292 }
7293 else
7294 for (int i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7295 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7296 pop (i);
7297
7298 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7299 }
7300 else
7301 last_reg = FIRST_PSEUDO_REGISTER;
7302
7303 for (int i = 0; i < last_reg; i++)
7304 {
7305 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7306
7307 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7308 && hard_reg_set_intersect_p (live_regs_mask,
7309 reg_class_contents[DF_REGS]))
7310 fpscr_deferred = true;
7311 /* For an ISR with RESBANK attribute assigned, don't pop
7312 following registers, R0-R14, MACH, MACL and GBR. */
7313 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7314 && ! (sh_cfun_resbank_handler_p ()
7315 && ((j >= FIRST_GENERAL_REG
7316 && j < LAST_GENERAL_REG)
7317 || j == MACH_REG
7318 || j == MACL_REG
7319 || j == GBR_REG)))
7320 pop (j);
7321
7322 if (j == FIRST_FP_REG && fpscr_deferred)
7323 pop (FPSCR_REG);
7324 }
7325 }
7326 if (target_flags != save_flags && ! current_function_interrupt)
7327 emit_insn (gen_toggle_sz ());
7328 target_flags = save_flags;
7329
7330 output_stack_adjust (crtl->args.pretend_args_size + save_size,
7331 stack_pointer_rtx, e, NULL, true);
7332
7333 if (crtl->calls_eh_return)
7334 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7335 EH_RETURN_STACKADJ_RTX));
7336
7337 /* Switch back to the normal stack if necessary. */
7338 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7339 emit_insn (gen_sp_switch_2 ());
7340
7341 /* Tell flow the insn that pops PR isn't dead. */
7342 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7343 emit_use (gen_rtx_REG (SImode, PR_REG));
7344 }
7345
7346 /* Emit code to change the current function's return address to RA.
7347 TEMP is available as a scratch register, if needed. */
7348 void
sh_set_return_address(rtx ra,rtx tmp)7349 sh_set_return_address (rtx ra, rtx tmp)
7350 {
7351 HARD_REG_SET live_regs_mask;
7352 int d = calc_live_regs (&live_regs_mask);
7353
7354 /* If pr_reg isn't life, we can set it directly. */
7355 if (! TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7356 {
7357 rtx rr = gen_rtx_REG (SImode, PR_REG);
7358 emit_insn (GEN_MOV (rr, ra));
7359 /* Tell flow the register for return isn't dead. */
7360 emit_use (rr);
7361 return;
7362 }
7363
7364 int pr_offset = rounded_frame_size (d);
7365
7366 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7367
7368 if (frame_pointer_needed)
7369 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7370 else
7371 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7372
7373 tmp = gen_frame_mem (Pmode, tmp);
7374 emit_insn (GEN_MOV (tmp, ra));
7375 /* Tell this store isn't dead. */
7376 emit_use (tmp);
7377 }
7378
7379 /* Clear variables at function end. */
7380 static void
sh_output_function_epilogue(FILE *)7381 sh_output_function_epilogue (FILE *)
7382 {
7383 }
7384
7385 static rtx
sh_builtin_saveregs(void)7386 sh_builtin_saveregs (void)
7387 {
7388 /* First unnamed integer register. */
7389 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7390 /* Number of integer registers we need to save. */
7391 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7392 /* First unnamed SFmode float reg */
7393 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7394 /* Number of SFmode float regs to save. */
7395 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7396 rtx regbuf, fpregs;
7397 int bufsize, regno;
7398 alias_set_type alias_set;
7399
7400 if (!TARGET_FPU_ANY)
7401 {
7402 error ("__builtin_saveregs not supported by this subtarget");
7403 return const0_rtx;
7404 }
7405
7406 /* Allocate block of memory for the regs. */
7407 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7408 Or can assign_stack_local accept a 0 SIZE argument? */
7409 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7410
7411 if (n_floatregs & 1)
7412 {
7413 rtx addr;
7414
7415 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7416 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7417 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7418 regbuf = change_address (regbuf, BLKmode, addr);
7419 }
7420 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7421 {
7422 rtx addr, mask;
7423
7424 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7425 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
7426 XEXP (regbuf, 0), 4));
7427 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7428 emit_insn (gen_andsi3 (addr, addr, mask));
7429 regbuf = change_address (regbuf, BLKmode, addr);
7430 }
7431 else
7432 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7433 alias_set = get_varargs_alias_set ();
7434 set_mem_alias_set (regbuf, alias_set);
7435
7436 /* Save int args.
7437 This is optimized to only save the regs that are necessary. Explicitly
7438 named args need not be saved. */
7439 if (n_intregs > 0)
7440 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7441 adjust_address (regbuf, BLKmode,
7442 n_floatregs * UNITS_PER_WORD),
7443 n_intregs);
7444
7445 /* Save float args.
7446 This is optimized to only save the regs that are necessary. Explicitly
7447 named args need not be saved.
7448 We explicitly build a pointer to the buffer because it halves the insn
7449 count when not optimizing (otherwise the pointer is built for each reg
7450 saved).
7451 We emit the moves in reverse order so that we can use predecrement. */
7452
7453 fpregs = copy_to_mode_reg (Pmode,
7454 plus_constant (Pmode, XEXP (regbuf, 0),
7455 n_floatregs * UNITS_PER_WORD));
7456 if (TARGET_FPU_DOUBLE)
7457 {
7458 rtx mem;
7459 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7460 {
7461 emit_insn (gen_addsi3 (fpregs, fpregs,
7462 GEN_INT (-2 * UNITS_PER_WORD)));
7463 mem = change_address (regbuf, DFmode, fpregs);
7464 emit_move_insn (mem,
7465 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7466 }
7467 regno = first_floatreg;
7468 if (regno & 1)
7469 {
7470 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7471 mem = change_address (regbuf, SFmode, fpregs);
7472 emit_move_insn (mem,
7473 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
7474 + regno - SH_REG_MSW_OFFSET));
7475 }
7476 }
7477 else
7478 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7479 {
7480 rtx mem;
7481
7482 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7483 mem = change_address (regbuf, SFmode, fpregs);
7484 emit_move_insn (mem,
7485 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7486 }
7487
7488 /* Return the address of the regbuf. */
7489 return XEXP (regbuf, 0);
7490 }
7491
7492 /* Define the `__builtin_va_list' type for the ABI. */
7493 static tree
sh_build_builtin_va_list(void)7494 sh_build_builtin_va_list (void)
7495 {
7496 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7497 tree record, type_decl;
7498
7499 if ((! TARGET_SH2E && ! TARGET_SH4)
7500 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7501 return ptr_type_node;
7502
7503 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7504 type_decl = build_decl (BUILTINS_LOCATION,
7505 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7506
7507 f_next_o = build_decl (BUILTINS_LOCATION,
7508 FIELD_DECL, get_identifier ("__va_next_o"),
7509 ptr_type_node);
7510 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7511 FIELD_DECL,
7512 get_identifier ("__va_next_o_limit"),
7513 ptr_type_node);
7514 f_next_fp = build_decl (BUILTINS_LOCATION,
7515 FIELD_DECL, get_identifier ("__va_next_fp"),
7516 ptr_type_node);
7517 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7518 FIELD_DECL,
7519 get_identifier ("__va_next_fp_limit"),
7520 ptr_type_node);
7521 f_next_stack = build_decl (BUILTINS_LOCATION,
7522 FIELD_DECL, get_identifier ("__va_next_stack"),
7523 ptr_type_node);
7524
7525 DECL_FIELD_CONTEXT (f_next_o) = record;
7526 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7527 DECL_FIELD_CONTEXT (f_next_fp) = record;
7528 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7529 DECL_FIELD_CONTEXT (f_next_stack) = record;
7530
7531 TYPE_STUB_DECL (record) = type_decl;
7532 TYPE_NAME (record) = type_decl;
7533 TYPE_FIELDS (record) = f_next_o;
7534 DECL_CHAIN (f_next_o) = f_next_o_limit;
7535 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7536 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7537 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7538
7539 layout_type (record);
7540
7541 return record;
7542 }
7543
7544 /* Implement `va_start' for varargs and stdarg. */
7545 static void
sh_va_start(tree valist,rtx nextarg)7546 sh_va_start (tree valist, rtx nextarg)
7547 {
7548 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7549 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7550 tree t, u;
7551 int nfp, nint;
7552
7553 if ((! TARGET_SH2E && ! TARGET_SH4)
7554 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7555 {
7556 std_expand_builtin_va_start (valist, nextarg);
7557 return;
7558 }
7559
7560 f_next_o = TYPE_FIELDS (va_list_type_node);
7561 f_next_o_limit = DECL_CHAIN (f_next_o);
7562 f_next_fp = DECL_CHAIN (f_next_o_limit);
7563 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7564 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7565
7566 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7567 NULL_TREE);
7568 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7569 valist, f_next_o_limit, NULL_TREE);
7570 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7571 NULL_TREE);
7572 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7573 valist, f_next_fp_limit, NULL_TREE);
7574 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7575 valist, f_next_stack, NULL_TREE);
7576
7577 /* Call __builtin_saveregs. */
7578 u = make_tree (sizetype, expand_builtin_saveregs ());
7579 u = fold_convert (ptr_type_node, u);
7580 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7581 TREE_SIDE_EFFECTS (t) = 1;
7582 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7583
7584 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7585 if (nfp < 8)
7586 nfp = 8 - nfp;
7587 else
7588 nfp = 0;
7589 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
7590 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7591 TREE_SIDE_EFFECTS (t) = 1;
7592 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7593
7594 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7595 TREE_SIDE_EFFECTS (t) = 1;
7596 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7597
7598 nint = crtl->args.info.arg_count[SH_ARG_INT];
7599 if (nint < 4)
7600 nint = 4 - nint;
7601 else
7602 nint = 0;
7603 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
7604 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7605 TREE_SIDE_EFFECTS (t) = 1;
7606 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7607
7608 u = make_tree (ptr_type_node, nextarg);
7609 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7610 TREE_SIDE_EFFECTS (t) = 1;
7611 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7612 }
7613
7614 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7615 member, return it. */
7616 static tree
find_sole_member(tree type)7617 find_sole_member (tree type)
7618 {
7619 tree field, member = NULL_TREE;
7620
7621 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7622 {
7623 if (TREE_CODE (field) != FIELD_DECL)
7624 continue;
7625 if (!DECL_SIZE (field))
7626 return NULL_TREE;
7627 if (integer_zerop (DECL_SIZE (field)))
7628 continue;
7629 if (member)
7630 return NULL_TREE;
7631 member = field;
7632 }
7633 return member;
7634 }
7635
7636 /* Implement `va_arg'. */
7637 static tree
sh_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)7638 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7639 gimple_seq *post_p ATTRIBUTE_UNUSED)
7640 {
7641 tree tmp;
7642 tree addr, lab_over = NULL, result = NULL;
7643 tree eff_type;
7644
7645 const bool pass_by_ref =
7646 !VOID_TYPE_P (type)
7647 && targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7648
7649 if (pass_by_ref)
7650 type = build_pointer_type (type);
7651
7652 HOST_WIDE_INT size = int_size_in_bytes (type);
7653 HOST_WIDE_INT rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7654 tree pptr_type_node = build_pointer_type (ptr_type_node);
7655
7656 if ((TARGET_SH2E || TARGET_SH4)
7657 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7658 {
7659 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7660 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7661 tree lab_false;
7662 tree member;
7663
7664 f_next_o = TYPE_FIELDS (va_list_type_node);
7665 f_next_o_limit = DECL_CHAIN (f_next_o);
7666 f_next_fp = DECL_CHAIN (f_next_o_limit);
7667 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7668 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7669
7670 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7671 NULL_TREE);
7672 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7673 valist, f_next_o_limit, NULL_TREE);
7674 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7675 valist, f_next_fp, NULL_TREE);
7676 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7677 valist, f_next_fp_limit, NULL_TREE);
7678 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7679 valist, f_next_stack, NULL_TREE);
7680
7681 /* Structures with a single member with a distinct mode are passed
7682 like their member. This is relevant if the latter has a REAL_TYPE
7683 or COMPLEX_TYPE type. */
7684 eff_type = type;
7685 while (TREE_CODE (eff_type) == RECORD_TYPE
7686 && (member = find_sole_member (eff_type))
7687 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7688 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7689 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7690 {
7691 tree field_type = TREE_TYPE (member);
7692
7693 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7694 eff_type = field_type;
7695 else
7696 {
7697 gcc_assert ((TYPE_ALIGN (eff_type)
7698 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7699 || (TYPE_ALIGN (eff_type)
7700 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7701 break;
7702 }
7703 }
7704
7705 bool pass_as_float;
7706 if (TARGET_FPU_DOUBLE)
7707 {
7708 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7709 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7710 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7711 && size <= 16));
7712 }
7713 else
7714 {
7715 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7716 }
7717
7718 addr = create_tmp_var (pptr_type_node);
7719 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7720 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7721
7722 valist = build_simple_mem_ref (addr);
7723
7724 if (pass_as_float)
7725 {
7726 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
7727 tree cmp;
7728 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7729
7730 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7731 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7732
7733 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7734 tmp = next_fp_limit;
7735 if (size > 4 && !is_double)
7736 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
7737 tmp = build2 (GE_EXPR, boolean_type_node,
7738 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7739 cmp = build3 (COND_EXPR, void_type_node, tmp,
7740 build1 (GOTO_EXPR, void_type_node,
7741 unshare_expr (lab_false)), NULL_TREE);
7742 if (!is_double)
7743 gimplify_and_add (cmp, pre_p);
7744
7745 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7746 || (is_double || size == 16))
7747 {
7748 tmp = fold_convert (sizetype, next_fp_tmp);
7749 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7750 size_int (UNITS_PER_WORD));
7751 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
7752 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7753 }
7754 if (is_double)
7755 gimplify_and_add (cmp, pre_p);
7756
7757 #ifdef FUNCTION_ARG_SCmode_WART
7758 if (TYPE_MODE (eff_type) == SCmode
7759 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7760 {
7761 tree subtype = TREE_TYPE (eff_type);
7762 tree real, imag;
7763
7764 imag
7765 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7766 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7767
7768 real
7769 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7770 real = get_initialized_tmp_var (real, pre_p, NULL);
7771
7772 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7773 if (type != eff_type)
7774 result = build1 (VIEW_CONVERT_EXPR, type, result);
7775 result = get_initialized_tmp_var (result, pre_p, NULL);
7776 }
7777 #endif /* FUNCTION_ARG_SCmode_WART */
7778
7779 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7780 gimplify_and_add (tmp, pre_p);
7781
7782 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7783 gimplify_and_add (tmp, pre_p);
7784
7785 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7786 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7787 gimplify_assign (unshare_expr (next_fp_tmp),
7788 unshare_expr (valist), pre_p);
7789
7790 gimplify_assign (unshare_expr (valist),
7791 unshare_expr (next_fp_tmp), post_p);
7792 valist = next_fp_tmp;
7793 }
7794 else
7795 {
7796 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
7797 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7798 unshare_expr (next_o_limit));
7799 tmp = build3 (COND_EXPR, void_type_node, tmp,
7800 build1 (GOTO_EXPR, void_type_node,
7801 unshare_expr (lab_false)),
7802 NULL_TREE);
7803 gimplify_and_add (tmp, pre_p);
7804
7805 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7806 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7807
7808 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7809 gimplify_and_add (tmp, pre_p);
7810
7811 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7812 gimplify_and_add (tmp, pre_p);
7813
7814 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7815 gimplify_assign (unshare_expr (next_o),
7816 unshare_expr (next_o_limit), pre_p);
7817
7818 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7819 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7820 }
7821
7822 if (!result)
7823 {
7824 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7825 gimplify_and_add (tmp, pre_p);
7826 }
7827 }
7828
7829 /* ??? In va-sh.h, there had been code to make values larger than
7830 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7831
7832 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7833 if (result)
7834 {
7835 gimplify_assign (result, tmp, pre_p);
7836 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7837 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7838 gimplify_and_add (tmp, pre_p);
7839 }
7840 else
7841 result = tmp;
7842
7843 if (pass_by_ref)
7844 result = build_va_arg_indirect_ref (result);
7845
7846 return result;
7847 }
7848
7849 /* 64 bit floating points memory transfers are paired single precision loads
7850 or store. So DWARF information needs fixing in little endian (unless
7851 PR=SZ=1 in FPSCR). */
7852 rtx
sh_dwarf_register_span(rtx reg)7853 sh_dwarf_register_span (rtx reg)
7854 {
7855 unsigned regno = REGNO (reg);
7856
7857 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7858 return NULL_RTX;
7859
7860 return
7861 gen_rtx_PARALLEL (VOIDmode,
7862 gen_rtvec (2,
7863 gen_rtx_REG (SFmode, regno + 1),
7864 gen_rtx_REG (SFmode, regno)));
7865 }
7866
7867 static machine_mode
sh_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree funtype,int for_return)7868 sh_promote_function_mode (const_tree type, machine_mode mode,
7869 int *punsignedp, const_tree funtype,
7870 int for_return)
7871 {
7872 if (sh_promote_prototypes (funtype))
7873 return promote_mode (type, mode, punsignedp);
7874 else
7875 return default_promote_function_mode (type, mode, punsignedp, funtype,
7876 for_return);
7877 }
7878
7879 static bool
sh_promote_prototypes(const_tree type)7880 sh_promote_prototypes (const_tree type)
7881 {
7882 if (TARGET_HITACHI)
7883 return false;
7884 if (! type)
7885 return true;
7886 return ! sh_attr_renesas_p (type);
7887 }
7888
7889 static bool
sh_pass_by_reference(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)7890 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7891 const_tree type, bool named ATTRIBUTE_UNUSED)
7892 {
7893 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7894
7895 if (targetm.calls.must_pass_in_stack (mode, type))
7896 return true;
7897
7898 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7899 wants to know about pass-by-reference semantics for incoming
7900 arguments. */
7901 if (! cum)
7902 return false;
7903
7904 return false;
7905 }
7906
7907 static bool
sh_callee_copies(cumulative_args_t cum,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)7908 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
7909 const_tree type, bool named ATTRIBUTE_UNUSED)
7910 {
7911 /* ??? How can it possibly be correct to return true only on the
7912 caller side of the equation? Is there someplace else in the
7913 sh backend that's magically producing the copies? */
7914 return (get_cumulative_args (cum)->outgoing
7915 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7916 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7917 }
7918
7919 static sh_arg_class
get_sh_arg_class(machine_mode mode)7920 get_sh_arg_class (machine_mode mode)
7921 {
7922 if (TARGET_FPU_ANY && mode == SFmode)
7923 return SH_ARG_FLOAT;
7924
7925 if (TARGET_FPU_DOUBLE
7926 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7927 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT))
7928 return SH_ARG_FLOAT;
7929
7930 return SH_ARG_INT;
7931 }
7932
7933 /* Round a register number up to a proper boundary for an arg of mode
7934 MODE.
7935 The SH doesn't care about double alignment, so we only
7936 round doubles to even regs when asked to explicitly. */
7937 static int
sh_round_reg(const CUMULATIVE_ARGS & cum,machine_mode mode)7938 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
7939 {
7940 /* FIXME: This used to be a macro and has been copy pasted into this
7941 function as is. Make this more readable. */
7942 return
7943 (((TARGET_ALIGN_DOUBLE
7944 || (TARGET_FPU_DOUBLE
7945 && (mode == DFmode || mode == DCmode)
7946 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
7947 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
7948 ? (cum.arg_count[(int) get_sh_arg_class (mode)]
7949 + (cum.arg_count[(int) get_sh_arg_class (mode)] & 1))
7950 : cum.arg_count[(int) get_sh_arg_class (mode)]);
7951 }
7952
7953 /* Return true if arg of the specified mode should be passed in a register
7954 or false otherwise. */
7955 static bool
sh_pass_in_reg_p(const CUMULATIVE_ARGS & cum,machine_mode mode,const_tree type)7956 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
7957 const_tree type)
7958 {
7959 /* FIXME: This used to be a macro and has been copy pasted into this
7960 function as is. Make this more readable. */
7961 return
7962 ((type == 0
7963 || (! TREE_ADDRESSABLE (type)
7964 && (! (TARGET_HITACHI || cum.renesas_abi)
7965 || ! (AGGREGATE_TYPE_P (type)
7966 || (!TARGET_FPU_ANY
7967 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7968 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
7969 && ! cum.force_mem
7970 && (TARGET_SH2E
7971 ? ((mode) == BLKmode
7972 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
7973 + int_size_in_bytes (type))
7974 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
7975 : ((sh_round_reg (cum, mode)
7976 + sh_hard_regno_nregs (BASE_ARG_REG (mode), mode))
7977 <= NPARM_REGS (mode)))
7978 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
7979 }
7980
7981 static int
sh_arg_partial_bytes(cumulative_args_t cum_v,machine_mode mode,tree type,bool named ATTRIBUTE_UNUSED)7982 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
7983 tree type, bool named ATTRIBUTE_UNUSED)
7984 {
7985 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7986 int words = 0;
7987
7988 if (sh_pass_in_reg_p (*cum, mode, type)
7989 && !TARGET_FPU_DOUBLE
7990 && (sh_round_reg (*cum, mode)
7991 + (mode != BLKmode
7992 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
7993 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
7994 > NPARM_REGS (mode)))
7995 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
7996
7997 return words * UNITS_PER_WORD;
7998 }
7999
8000
8001 /* Define where to put the arguments to a function.
8002 Value is zero to push the argument on the stack,
8003 or a hard register in which to store the argument.
8004
8005 MODE is the argument's machine mode.
8006 TYPE is the data type of the argument (as a tree).
8007 This is null for libcalls where that information may
8008 not be available.
8009 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8010 the preceding args and about the function being called.
8011 NAMED is nonzero if this argument is a named parameter
8012 (otherwise it is an extra parameter matching an ellipsis).
8013
8014 On SH the first args are normally in registers
8015 and the rest are pushed. Any arg that starts within the first
8016 NPARM_REGS words is at least partially passed in a register unless
8017 its data type forbids. */
8018 static rtx
sh_function_arg(cumulative_args_t ca_v,machine_mode mode,const_tree type,bool named)8019 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
8020 const_tree type, bool named)
8021 {
8022 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8023
8024 if (mode == VOIDmode)
8025 return ca->renesas_abi ? const1_rtx : const0_rtx;
8026
8027 if (sh_pass_in_reg_p (*ca, mode, type)
8028 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8029 {
8030 int regno;
8031
8032 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8033 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8034 {
8035 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8036 gen_rtx_REG (SFmode,
8037 BASE_ARG_REG (mode)
8038 + (sh_round_reg (*ca, mode) ^ 1)),
8039 const0_rtx);
8040 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8041 gen_rtx_REG (SFmode,
8042 BASE_ARG_REG (mode)
8043 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8044 GEN_INT (4));
8045 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8046 }
8047
8048 /* If the alignment of a DF value causes an SF register to be
8049 skipped, we will use that skipped register for the next SF
8050 value. */
8051 if ((TARGET_HITACHI || ca->renesas_abi)
8052 && ca->free_single_fp_reg
8053 && mode == SFmode)
8054 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8055
8056 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8057 ^ (mode == SFmode && TARGET_SH4
8058 && TARGET_LITTLE_ENDIAN
8059 && ! TARGET_HITACHI && ! ca->renesas_abi);
8060 return gen_rtx_REG (mode, regno);
8061
8062 }
8063
8064 return NULL_RTX;
8065 }
8066
8067 /* Update the data in CUM to advance over an argument
8068 of mode MODE and data type TYPE.
8069 (TYPE is null for libcalls where that information may not be
8070 available.) */
8071 static void
sh_function_arg_advance(cumulative_args_t ca_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)8072 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
8073 const_tree type, bool named ATTRIBUTE_UNUSED)
8074 {
8075 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8076
8077 if (ca->force_mem)
8078 ca->force_mem = false;
8079
8080 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8081 {
8082 /* Note that we've used the skipped register. */
8083 if (mode == SFmode && ca->free_single_fp_reg)
8084 {
8085 ca->free_single_fp_reg = 0;
8086 return;
8087 }
8088 /* When we have a DF after an SF, there's an SF register that get
8089 skipped in order to align the DF value. We note this skipped
8090 register, because the next SF value will use it, and not the
8091 SF that follows the DF. */
8092 if (mode == DFmode
8093 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
8094 {
8095 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
8096 + BASE_ARG_REG (mode));
8097 }
8098 }
8099
8100 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8101 || sh_pass_in_reg_p (*ca, mode, type))
8102 (ca->arg_count[(int) get_sh_arg_class (mode)]
8103 = (sh_round_reg (*ca, mode)
8104 + (mode == BLKmode
8105 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8106 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
8107 }
8108
8109 /* The Renesas calling convention doesn't quite fit into this scheme since
8110 the address is passed like an invisible argument, but one that is always
8111 passed in memory. */
8112 static rtx
sh_struct_value_rtx(tree fndecl,int incoming ATTRIBUTE_UNUSED)8113 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8114 {
8115 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8116 return NULL_RTX;
8117 return gen_rtx_REG (Pmode, 2);
8118 }
8119
8120 /* Worker function for TARGET_FUNCTION_VALUE.
8121
8122 For the SH, this is like LIBCALL_VALUE, except that we must change the
8123 mode like PROMOTE_MODE does.
8124 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8125 tested here has to be kept in sync with the one in
8126 explow.c:promote_mode. */
8127 static rtx
sh_function_value(const_tree valtype,const_tree fn_decl_or_type,bool outgoing ATTRIBUTE_UNUSED)8128 sh_function_value (const_tree valtype,
8129 const_tree fn_decl_or_type,
8130 bool outgoing ATTRIBUTE_UNUSED)
8131 {
8132 if (fn_decl_or_type
8133 && !DECL_P (fn_decl_or_type))
8134 fn_decl_or_type = NULL;
8135
8136 return gen_rtx_REG (
8137 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8138 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8139 && (TREE_CODE (valtype) == INTEGER_TYPE
8140 || TREE_CODE (valtype) == ENUMERAL_TYPE
8141 || TREE_CODE (valtype) == BOOLEAN_TYPE
8142 || TREE_CODE (valtype) == REAL_TYPE
8143 || TREE_CODE (valtype) == OFFSET_TYPE))
8144 && sh_promote_prototypes (fn_decl_or_type)
8145 ? SImode : TYPE_MODE (valtype)),
8146 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8147 }
8148
8149 /* Worker function for TARGET_LIBCALL_VALUE. */
8150 static rtx
sh_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)8151 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8152 {
8153 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8154 }
8155
8156 /* Return true if N is a possible register number of function value. */
8157 static bool
sh_function_value_regno_p(const unsigned int regno)8158 sh_function_value_regno_p (const unsigned int regno)
8159 {
8160 return regno == FIRST_RET_REG || (TARGET_SH2E && regno == FIRST_FP_RET_REG);
8161 }
8162
8163 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8164 static bool
sh_return_in_memory(const_tree type,const_tree fndecl)8165 sh_return_in_memory (const_tree type, const_tree fndecl)
8166 {
8167 return TYPE_MODE (type) == BLKmode
8168 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8169 && TREE_CODE (type) == RECORD_TYPE);
8170 }
8171
8172 /* We actually emit the code in sh_expand_prologue. We used to use
8173 a static variable to flag that we need to emit this code, but that
8174 doesn't when inlining, when functions are deferred and then emitted
8175 later. Fortunately, we already have two flags that are part of struct
8176 function that tell if a function uses varargs or stdarg. */
8177 static void
sh_setup_incoming_varargs(cumulative_args_t ca,machine_mode mode,tree type,int * pretend_arg_size,int second_time ATTRIBUTE_UNUSED)8178 sh_setup_incoming_varargs (cumulative_args_t ca,
8179 machine_mode mode,
8180 tree type,
8181 int *pretend_arg_size,
8182 int second_time ATTRIBUTE_UNUSED)
8183 {
8184 gcc_assert (cfun->stdarg);
8185 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8186 {
8187 int named_parm_regs, anon_parm_regs;
8188
8189 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
8190 + (mode == BLKmode
8191 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8192 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
8193 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8194 if (anon_parm_regs > 0)
8195 *pretend_arg_size = anon_parm_regs * 4;
8196 }
8197 }
8198
8199 static bool
sh_strict_argument_naming(cumulative_args_t ca ATTRIBUTE_UNUSED)8200 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8201 {
8202 return false;
8203 }
8204
8205 static bool
sh_pretend_outgoing_varargs_named(cumulative_args_t ca_v)8206 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8207 {
8208 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8209
8210 return ! (TARGET_HITACHI || ca->renesas_abi);
8211 }
8212
8213
8214 /* Define the offset between two registers, one to be eliminated, and
8215 the other its replacement, at the start of a routine. */
8216 int
initial_elimination_offset(int from,int to)8217 initial_elimination_offset (int from, int to)
8218 {
8219 const int regs_saved_rounding = 0;
8220 int save_flags = target_flags;
8221 HARD_REG_SET live_regs_mask;
8222
8223 int regs_saved = calc_live_regs (&live_regs_mask);
8224
8225 int total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8226 target_flags = save_flags;
8227
8228 int total_saved_regs_space = regs_saved + regs_saved_rounding;
8229
8230 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8231 return total_saved_regs_space + total_auto_space;
8232
8233 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8234 return total_saved_regs_space + total_auto_space;
8235
8236 /* Initial gap between fp and sp is 0. */
8237 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8238 return 0;
8239
8240 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8241 return rounded_frame_size (0);
8242
8243 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8244 return rounded_frame_size (0);
8245
8246 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8247 && (to == HARD_FRAME_POINTER_REGNUM
8248 || to == STACK_POINTER_REGNUM));
8249 return total_auto_space;
8250 }
8251
8252 /* Parse the -mfixed-range= option string. */
8253 void
sh_fix_range(const char * const_str)8254 sh_fix_range (const char *const_str)
8255 {
8256 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8257 REG2 are either register names or register numbers. The effect
8258 of this option is to mark the registers in the range from REG1 to
8259 REG2 as ``fixed'' so they won't be used by the compiler. */
8260
8261 char* str = strcpy ((char*)alloca (strlen (const_str) + 1), const_str);
8262
8263 while (1)
8264 {
8265 char* dash = strchr (str, '-');
8266 if (!dash)
8267 {
8268 warning (0, "value of -mfixed-range must have form REG1-REG2");
8269 return;
8270 }
8271 *dash = '\0';
8272 char* comma = strchr (dash + 1, ',');
8273 if (comma)
8274 *comma = '\0';
8275
8276 int first = decode_reg_name (str);
8277 if (first < 0)
8278 {
8279 warning (0, "unknown register name: %s", str);
8280 return;
8281 }
8282
8283 int last = decode_reg_name (dash + 1);
8284 if (last < 0)
8285 {
8286 warning (0, "unknown register name: %s", dash + 1);
8287 return;
8288 }
8289
8290 *dash = '-';
8291
8292 if (first > last)
8293 {
8294 warning (0, "%s-%s is an empty range", str, dash + 1);
8295 return;
8296 }
8297
8298 for (int i = first; i <= last; ++i)
8299 fixed_regs[i] = call_used_regs[i] = 1;
8300
8301 if (!comma)
8302 break;
8303
8304 *comma = ',';
8305 str = comma + 1;
8306 }
8307 }
8308
8309 /* Insert any deferred function attributes from earlier pragmas. */
8310 static void
sh_insert_attributes(tree node,tree * attributes)8311 sh_insert_attributes (tree node, tree *attributes)
8312 {
8313 if (TREE_CODE (node) != FUNCTION_DECL)
8314 return;
8315
8316 /* We are only interested in fields. */
8317 if (!DECL_P (node))
8318 return;
8319
8320 /* Append the attributes to the deferred attributes. */
8321 *sh_deferred_function_attributes_tail = *attributes;
8322 tree attrs = sh_deferred_function_attributes;
8323 if (!attrs)
8324 return;
8325
8326 /* Some attributes imply or require the interrupt attribute. */
8327 if (!lookup_attribute ("interrupt_handler", attrs)
8328 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8329 {
8330 /* If we have a trapa_handler, but no interrupt_handler attribute,
8331 insert an interrupt_handler attribute. */
8332 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8333 /* We can't use sh_pr_interrupt here because that's not in the
8334 java frontend. */
8335 attrs
8336 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8337 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8338 if the interrupt attribute is missing, we ignore the attribute
8339 and warn. */
8340 else if (lookup_attribute ("sp_switch", attrs)
8341 || lookup_attribute ("trap_exit", attrs)
8342 || lookup_attribute ("nosave_low_regs", attrs)
8343 || lookup_attribute ("resbank", attrs))
8344 {
8345 tree *tail;
8346
8347 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8348 {
8349 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8350 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8351 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8352 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8353 warning (OPT_Wattributes,
8354 "%qE attribute only applies to interrupt functions",
8355 TREE_PURPOSE (attrs));
8356 else
8357 {
8358 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8359 NULL_TREE);
8360 tail = &TREE_CHAIN (*tail);
8361 }
8362 }
8363 attrs = *attributes;
8364 }
8365 }
8366
8367 /* Install the processed list. */
8368 *attributes = attrs;
8369
8370 /* Clear deferred attributes. */
8371 sh_deferred_function_attributes = NULL_TREE;
8372 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8373
8374 return;
8375 }
8376
8377 /*------------------------------------------------------------------------------
8378 Target specific attributes
8379 Supported attributes are:
8380
8381 * interrupt_handler
8382 Specifies this function is an interrupt handler.
8383
8384 * trapa_handler
8385 Like interrupt_handler, but don't save all registers.
8386
8387 * sp_switch
8388 Specifies an alternate stack for an interrupt handler to run on.
8389
8390 * trap_exit
8391 Use a trapa to exit an interrupt function instead of rte.
8392
8393 * nosave_low_regs
8394 Don't save r0..r7 in an interrupt handler function.
8395 This is useful on SH3* and SH4*, which have a separate set of low
8396 regs for user and privileged modes.
8397 This is mainly to be used for non-reentrant interrupt handlers (i.e.
8398 those that run with interrupts disabled and thus can't be
8399 interrupted thenselves).
8400
8401 * renesas
8402 Use Renesas calling/layout conventions (functions and structures).
8403
8404 * resbank
8405 In case of an interrupt handler function, use a register bank to
8406 save registers R0-R14, MACH, MACL, GBR and PR.
8407 This is available only on SH2A targets.
8408
8409 * function_vector
8410 Declares a function to be called using the TBR relative addressing
8411 mode. Takes an argument that specifies the slot number in the table
8412 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
8413 */
8414
8415 /* Handle a 'resbank' attribute. */
8416 static tree
sh_handle_resbank_handler_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)8417 sh_handle_resbank_handler_attribute (tree * node, tree name,
8418 tree args ATTRIBUTE_UNUSED,
8419 int flags ATTRIBUTE_UNUSED,
8420 bool * no_add_attrs)
8421 {
8422 if (!TARGET_SH2A)
8423 {
8424 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8425 name);
8426 *no_add_attrs = true;
8427 }
8428 if (TREE_CODE (*node) != FUNCTION_DECL)
8429 {
8430 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8431 name);
8432 *no_add_attrs = true;
8433 }
8434
8435 return NULL_TREE;
8436 }
8437
8438 /* Handle an "interrupt_handler" attribute; arguments as in
8439 struct attribute_spec.handler. */
8440 static tree
sh_handle_interrupt_handler_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)8441 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8442 tree args ATTRIBUTE_UNUSED,
8443 int flags ATTRIBUTE_UNUSED,
8444 bool *no_add_attrs)
8445 {
8446 if (TREE_CODE (*node) != FUNCTION_DECL)
8447 {
8448 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8449 name);
8450 *no_add_attrs = true;
8451 }
8452
8453 return NULL_TREE;
8454 }
8455
8456 /* Handle an 'function_vector' attribute; arguments as in
8457 struct attribute_spec.handler. */
8458 static tree
sh2a_handle_function_vector_handler_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)8459 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8460 tree args ATTRIBUTE_UNUSED,
8461 int flags ATTRIBUTE_UNUSED,
8462 bool * no_add_attrs)
8463 {
8464 if (!TARGET_SH2A)
8465 {
8466 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8467 name);
8468 *no_add_attrs = true;
8469 }
8470 else if (TREE_CODE (*node) != FUNCTION_DECL)
8471 {
8472 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8473 name);
8474 *no_add_attrs = true;
8475 }
8476 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8477 {
8478 /* The argument must be a constant integer. */
8479 warning (OPT_Wattributes,
8480 "%qE attribute argument not an integer constant",
8481 name);
8482 *no_add_attrs = true;
8483 }
8484 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8485 {
8486 /* The argument value must be between 0 to 255. */
8487 warning (OPT_Wattributes,
8488 "%qE attribute argument should be between 0 to 255",
8489 name);
8490 *no_add_attrs = true;
8491 }
8492 return NULL_TREE;
8493 }
8494
8495 /* Returns true if current function has been assigned the attribute
8496 'function_vector'. */
8497 bool
sh2a_is_function_vector_call(rtx x)8498 sh2a_is_function_vector_call (rtx x)
8499 {
8500 if (GET_CODE (x) == SYMBOL_REF
8501 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8502 {
8503 tree tr = SYMBOL_REF_DECL (x);
8504
8505 if (sh2a_function_vector_p (tr))
8506 return true;
8507 }
8508
8509 return false;
8510 }
8511
8512 /* Returns the function vector number, if the attribute
8513 'function_vector' is assigned, otherwise returns zero. */
8514 int
sh2a_get_function_vector_number(rtx x)8515 sh2a_get_function_vector_number (rtx x)
8516 {
8517 if ((GET_CODE (x) == SYMBOL_REF)
8518 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8519 {
8520 tree t = SYMBOL_REF_DECL (x);
8521
8522 if (TREE_CODE (t) != FUNCTION_DECL)
8523 return 0;
8524
8525 for (tree list = SH_ATTRIBUTES (t); list; list = TREE_CHAIN (list))
8526 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8527 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8528
8529 return 0;
8530 }
8531 else
8532 return 0;
8533 }
8534
8535 /* Handle an "sp_switch" attribute; arguments as in
8536 struct attribute_spec.handler. */
8537 static tree
sh_handle_sp_switch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)8538 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8539 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8540 {
8541 if (TREE_CODE (*node) != FUNCTION_DECL)
8542 {
8543 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8544 name);
8545 *no_add_attrs = true;
8546 }
8547 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8548 {
8549 /* The argument must be a constant string. */
8550 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8551 name);
8552 *no_add_attrs = true;
8553 }
8554
8555 return NULL_TREE;
8556 }
8557
8558 /* Handle an "trap_exit" attribute; arguments as in
8559 struct attribute_spec.handler. */
8560 static tree
sh_handle_trap_exit_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)8561 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8562 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8563 {
8564 if (TREE_CODE (*node) != FUNCTION_DECL)
8565 {
8566 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8567 name);
8568 *no_add_attrs = true;
8569 }
8570 /* The argument specifies a trap number to be used in a trapa instruction
8571 at function exit (instead of an rte instruction). */
8572 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8573 {
8574 /* The argument must be a constant integer. */
8575 warning (OPT_Wattributes, "%qE attribute argument not an "
8576 "integer constant", name);
8577 *no_add_attrs = true;
8578 }
8579
8580 return NULL_TREE;
8581 }
8582
8583 static tree
sh_handle_renesas_attribute(tree * node ATTRIBUTE_UNUSED,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs ATTRIBUTE_UNUSED)8584 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8585 tree name ATTRIBUTE_UNUSED,
8586 tree args ATTRIBUTE_UNUSED,
8587 int flags ATTRIBUTE_UNUSED,
8588 bool *no_add_attrs ATTRIBUTE_UNUSED)
8589 {
8590 return NULL_TREE;
8591 }
8592
8593 /* True if __attribute__((renesas)) or -mrenesas. */
8594 bool
sh_attr_renesas_p(const_tree td)8595 sh_attr_renesas_p (const_tree td)
8596 {
8597 if (TARGET_HITACHI)
8598 return true;
8599 if (td == NULL_TREE)
8600 return false;
8601 if (DECL_P (td))
8602 td = TREE_TYPE (td);
8603 if (td == error_mark_node)
8604 return false;
8605 return lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) != NULL_TREE;
8606 }
8607
8608 /* True if __attribute__((renesas)) or -mrenesas, for the current
8609 function. */
8610 bool
sh_cfun_attr_renesas_p(void)8611 sh_cfun_attr_renesas_p (void)
8612 {
8613 return sh_attr_renesas_p (current_function_decl);
8614 }
8615
8616 /* Returns true if the current function has the "interrupt_handler"
8617 attribute set. */
8618 bool
sh_cfun_interrupt_handler_p(void)8619 sh_cfun_interrupt_handler_p (void)
8620 {
8621 return (lookup_attribute ("interrupt_handler",
8622 DECL_ATTRIBUTES (current_function_decl))
8623 != NULL_TREE);
8624 }
8625
8626 /* Returns true if FUNC has been assigned the attribute
8627 "function_vector". */
8628 bool
sh2a_function_vector_p(tree func)8629 sh2a_function_vector_p (tree func)
8630 {
8631 if (TREE_CODE (func) != FUNCTION_DECL)
8632 return false;
8633
8634 for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list))
8635 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8636 return true;
8637
8638 return false;
8639 }
8640
8641 /* Returns true if given tree has the "resbank" attribute set. */
8642 bool
sh_cfun_resbank_handler_p(void)8643 sh_cfun_resbank_handler_p (void)
8644 {
8645 return ((lookup_attribute ("resbank",
8646 DECL_ATTRIBUTES (current_function_decl))
8647 != NULL_TREE)
8648 && (lookup_attribute ("interrupt_handler",
8649 DECL_ATTRIBUTES (current_function_decl))
8650 != NULL_TREE) && TARGET_SH2A);
8651 }
8652
8653 /* Returns true if the current function has a "trap_exit" attribute set. */
8654 bool
sh_cfun_trap_exit_p(void)8655 sh_cfun_trap_exit_p (void)
8656 {
8657 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
8658 != NULL_TREE;
8659 }
8660
8661 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8662 static const char *
sh_check_pch_target_flags(int old_flags)8663 sh_check_pch_target_flags (int old_flags)
8664 {
8665 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8666 | MASK_SH_E | MASK_HARD_SH4
8667 | MASK_FPU_SINGLE | MASK_SH4))
8668 return _("created and used with different architectures / ABIs");
8669 if ((old_flags ^ target_flags) & MASK_HITACHI)
8670 return _("created and used with different ABIs");
8671 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8672 return _("created and used with different endianness");
8673 return NULL;
8674 }
8675
8676 /* Predicates used by the templates. */
8677
8678 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
8679 Used only in general_movsrc_operand. */
8680 bool
system_reg_operand(rtx op,machine_mode mode ATTRIBUTE_UNUSED)8681 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8682 {
8683 switch (REGNO (op))
8684 {
8685 case PR_REG:
8686 case MACL_REG:
8687 case MACH_REG:
8688 return true;
8689 }
8690 return false;
8691 }
8692
8693 /* Returns true if OP is a floating point value with value 0.0. */
8694 bool
fp_zero_operand(rtx op)8695 fp_zero_operand (rtx op)
8696 {
8697 if (GET_MODE (op) != SFmode)
8698 return false;
8699
8700 const REAL_VALUE_TYPE* r = CONST_DOUBLE_REAL_VALUE (op);
8701 return real_equal (r, &dconst0) && ! REAL_VALUE_MINUS_ZERO (*r);
8702 }
8703
8704 /* Returns true if OP is a floating point value with value 1.0. */
8705 bool
fp_one_operand(rtx op)8706 fp_one_operand (rtx op)
8707 {
8708 if (GET_MODE (op) != SFmode)
8709 return false;
8710
8711 return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1);
8712 }
8713
8714 /* Return the TLS type for TLS symbols. */
8715 enum tls_model
tls_symbolic_operand(rtx op,machine_mode mode ATTRIBUTE_UNUSED)8716 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8717 {
8718 if (GET_CODE (op) != SYMBOL_REF)
8719 return TLS_MODEL_NONE;
8720 return SYMBOL_REF_TLS_MODEL (op);
8721 }
8722
8723 /* Return the destination address of a branch. */
8724 static int
branch_dest(rtx branch)8725 branch_dest (rtx branch)
8726 {
8727 rtx dest = SET_SRC (PATTERN (branch));
8728
8729 if (GET_CODE (dest) == IF_THEN_ELSE)
8730 dest = XEXP (dest, 1);
8731
8732 return INSN_ADDRESSES (INSN_UID (XEXP (dest, 0)));
8733 }
8734
8735 /* Return nonzero if REG is not used after INSN.
8736 We assume REG is a reload reg, and therefore does
8737 not live past labels. It may live past calls or jumps though. */
8738 bool
reg_unused_after(rtx reg,rtx_insn * insn)8739 reg_unused_after (rtx reg, rtx_insn *insn)
8740 {
8741 /* If the reg is set by this instruction, then it is safe for our
8742 case. Disregard the case where this is a store to memory, since
8743 we are checking a register used in the store address. */
8744 rtx set = single_set (insn);
8745 if (set && !MEM_P (SET_DEST (set))
8746 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8747 return true;
8748
8749 while ((insn = NEXT_INSN (insn)))
8750 {
8751 if (!INSN_P (insn))
8752 continue;
8753
8754 rtx_code code = GET_CODE (insn);
8755
8756 #if 0
8757 /* If this is a label that existed before reload, then the register
8758 is dead here. However, if this is a label added by reorg, then
8759 the register may still be live here. We can't tell the difference,
8760 so we just ignore labels completely. */
8761 if (code == CODE_LABEL)
8762 return 1;
8763 /* else */
8764 #endif
8765
8766 if (code == JUMP_INSN)
8767 return false;
8768
8769 /* If this is a sequence, we must handle them all at once.
8770 We could have for instance a call that sets the target register,
8771 and an insn in a delay slot that uses the register. In this case,
8772 we must return 0. */
8773 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8774 {
8775 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
8776 bool retval = false;
8777
8778 for (int i = 0; i < seq->len (); i++)
8779 {
8780 rtx_insn *this_insn = seq->insn (i);
8781 rtx set = single_set (this_insn);
8782
8783 if (CALL_P (this_insn))
8784 code = CALL_INSN;
8785 else if (JUMP_P (this_insn))
8786 {
8787 if (INSN_ANNULLED_BRANCH_P (this_insn))
8788 return false;
8789 code = JUMP_INSN;
8790 }
8791
8792 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8793 return false;
8794 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8795 {
8796 if (!MEM_P (SET_DEST (set)))
8797 retval = true;
8798 else
8799 return false;
8800 }
8801 if (set == NULL_RTX
8802 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8803 return false;
8804 }
8805 if (retval)
8806 return true;
8807 else if (code == JUMP_INSN)
8808 return false;
8809 }
8810
8811 rtx set = single_set (insn);
8812 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8813 return false;
8814 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8815 return !MEM_P (SET_DEST (set));
8816 if (set == NULL && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8817 return false;
8818
8819 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8820 return true;
8821 }
8822 return true;
8823 }
8824
8825
8826 static GTY(()) rtx t_reg_rtx;
8827 rtx
get_t_reg_rtx(void)8828 get_t_reg_rtx (void)
8829 {
8830 if (! t_reg_rtx)
8831 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
8832 return t_reg_rtx;
8833 }
8834
8835 static GTY(()) tree fpscr_values;
8836
8837 static void
emit_fpu_switch(rtx scratch,int index)8838 emit_fpu_switch (rtx scratch, int index)
8839 {
8840 if (fpscr_values == NULL)
8841 {
8842 tree t = build_index_type (integer_one_node);
8843 t = build_array_type (integer_type_node, t);
8844 t = build_decl (BUILTINS_LOCATION,
8845 VAR_DECL, get_identifier ("__fpscr_values"), t);
8846 DECL_ARTIFICIAL (t) = 1;
8847 DECL_IGNORED_P (t) = 1;
8848 DECL_EXTERNAL (t) = 1;
8849 TREE_STATIC (t) = 1;
8850 TREE_PUBLIC (t) = 1;
8851 TREE_USED (t) = 1;
8852
8853 fpscr_values = t;
8854 }
8855
8856 rtx src = DECL_RTL (fpscr_values);
8857 if (!can_create_pseudo_p ())
8858 {
8859 emit_move_insn (scratch, XEXP (src, 0));
8860 if (index != 0)
8861 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8862 src = adjust_automodify_address (src, SImode, scratch, index * 4);
8863 }
8864 else
8865 src = adjust_address (src, SImode, index * 4);
8866
8867 emit_insn (gen_lds_fpscr (src));
8868 }
8869
8870 static rtx get_free_reg (HARD_REG_SET);
8871
8872 /* This function returns a register to use to load the address to load
8873 the fpscr from. Currently it always returns r1 or r7, but when we are
8874 able to use pseudo registers after combine, or have a better mechanism
8875 for choosing a register, it should be done here. */
8876 /* REGS_LIVE is the liveness information for the point for which we
8877 need this allocation. In some bare-bones exit blocks, r1 is live at the
8878 start. We can even have all of r0..r3 being live:
8879 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8880 INSN before which new insns are placed with will clobber the register
8881 we return. If a basic block consists only of setting the return value
8882 register to a pseudo and using that register, the return value is not
8883 live before or after this block, yet we we'll insert our insns right in
8884 the middle. */
8885 static rtx
get_free_reg(HARD_REG_SET regs_live)8886 get_free_reg (HARD_REG_SET regs_live)
8887 {
8888 if (! TEST_HARD_REG_BIT (regs_live, 1))
8889 return gen_rtx_REG (Pmode, 1);
8890
8891 /* Hard reg 1 is live; since this is a small register classes target,
8892 there shouldn't be anything but a jump before the function end. */
8893 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8894 return gen_rtx_REG (Pmode, 7);
8895 }
8896
8897 /* This function will set the fpscr from memory.
8898 MODE is the mode we are setting it to. */
8899 void
fpscr_set_from_mem(int mode,HARD_REG_SET regs_live)8900 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8901 {
8902 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
8903 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8904
8905 rtx addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8906 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8907 }
8908
8909 /* Is the given character a logical line separator for the assembler? */
8910 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8911 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8912 #endif
8913
8914 static bool
sequence_insn_p(rtx_insn * insn)8915 sequence_insn_p (rtx_insn *insn)
8916 {
8917 rtx_insn* prev = PREV_INSN (insn);
8918 if (prev == NULL)
8919 return false;
8920
8921 rtx_insn* next = NEXT_INSN (prev);
8922 if (next == NULL)
8923 return false;
8924
8925 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
8926 }
8927
8928 int
sh_insn_length_adjustment(rtx_insn * insn)8929 sh_insn_length_adjustment (rtx_insn *insn)
8930 {
8931 /* Instructions with unfilled delay slots take up an extra two bytes for
8932 the nop in the delay slot. */
8933 if (((NONJUMP_INSN_P (insn)
8934 && GET_CODE (PATTERN (insn)) != USE
8935 && GET_CODE (PATTERN (insn)) != CLOBBER)
8936 || CALL_P (insn) || JUMP_P (insn))
8937 && ! sequence_insn_p (insn)
8938 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8939 return 2;
8940
8941 /* Increase the insn length of a cbranch without a delay slot insn to
8942 force a delay slot which will be stuffed with a nop. */
8943 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
8944 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
8945 && ! sequence_insn_p (insn))
8946 return 2;
8947
8948 /* sh-dsp parallel processing insn take four bytes instead of two. */
8949
8950 if (NONJUMP_INSN_P (insn))
8951 {
8952 int sum = 0;
8953 rtx body = PATTERN (insn);
8954 const char *templ;
8955 char c;
8956 bool maybe_label = true;
8957
8958 if (GET_CODE (body) == ASM_INPUT)
8959 templ = XSTR (body, 0);
8960 else if (asm_noperands (body) >= 0)
8961 templ
8962 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8963 else
8964 return 0;
8965 do
8966 {
8967 int ppi_adjust = 0;
8968
8969 do
8970 c = *templ++;
8971 while (c == ' ' || c == '\t');
8972 /* all sh-dsp parallel-processing insns start with p.
8973 The only non-ppi sh insn starting with p is pref.
8974 The only ppi starting with pr is prnd. */
8975 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8976 ppi_adjust = 2;
8977 /* The repeat pseudo-insn expands two three insns, a total of
8978 six bytes in size. */
8979 else if ((c == 'r' || c == 'R')
8980 && ! strncasecmp ("epeat", templ, 5))
8981 ppi_adjust = 4;
8982 while (c && c != '\n'
8983 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
8984 {
8985 /* If this is a label, it is obviously not a ppi insn. */
8986 if (c == ':' && maybe_label)
8987 {
8988 ppi_adjust = 0;
8989 break;
8990 }
8991 else if (c == '\'' || c == '"')
8992 maybe_label = false;
8993 c = *templ++;
8994 }
8995 sum += ppi_adjust;
8996 maybe_label = c != ':';
8997 }
8998 while (c);
8999 return sum;
9000 }
9001 return 0;
9002 }
9003
9004 /* Return TRUE for a valid displacement for the REG+disp addressing
9005 with MODE. */
9006 bool
sh_legitimate_index_p(machine_mode mode,rtx op,bool consider_sh2a,bool allow_zero)9007 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
9008 bool allow_zero)
9009 {
9010 if (! CONST_INT_P (op))
9011 return false;
9012
9013 {
9014 const HOST_WIDE_INT offset = INTVAL (op);
9015 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
9016 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
9017
9018 /* If the mode does not support any displacement always return false.
9019 Even though an index of '0' is actually always valid, it will cause
9020 troubles when e.g. a DFmode move is split into two SFmode moves,
9021 where one SFmode move will have index '0' and the other move will
9022 have index '4'. */
9023 if (!allow_zero && max_disp < 1)
9024 return false;
9025
9026 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
9027 }
9028 }
9029
9030 /* Recognize an RTL expression that is a valid memory address for
9031 an instruction.
9032 The MODE argument is the machine mode for the MEM expression
9033 that wants to use this address.
9034 Allow REG
9035 REG+disp
9036 REG+r0
9037 REG++
9038 --REG
9039 GBR
9040 GBR+disp */
9041 static bool
sh_legitimate_address_p(machine_mode mode,rtx x,bool strict)9042 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
9043 {
9044 if (REG_P (x) && REGNO (x) == GBR_REG)
9045 return true;
9046
9047 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9048 return true;
9049 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9050 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9051 return true;
9052 else if (GET_CODE (x) == PLUS)
9053 {
9054 rtx xop0 = XEXP (x, 0);
9055 rtx xop1 = XEXP (x, 1);
9056
9057 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
9058 return gbr_displacement (xop1, mode);
9059
9060 if (GET_MODE_SIZE (mode) <= 8
9061 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9062 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
9063 return true;
9064
9065 if (GET_MODE_SIZE (mode) <= 4
9066 || (TARGET_FPU_DOUBLE && TARGET_FMOVD && mode == DFmode))
9067 {
9068 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9069 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9070 return true;
9071 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9072 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9073 return true;
9074 }
9075 }
9076
9077 return false;
9078 }
9079
9080 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9081 isn't protected by a PIC unspec. */
9082 bool
nonpic_symbol_mentioned_p(rtx x)9083 nonpic_symbol_mentioned_p (rtx x)
9084 {
9085 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9086 || GET_CODE (x) == PC)
9087 return true;
9088
9089 /* We don't want to look into the possible MEM location of a
9090 CONST_DOUBLE, since we're not going to use it, in general. */
9091 if (GET_CODE (x) == CONST_DOUBLE)
9092 return false;
9093
9094 if (GET_CODE (x) == UNSPEC
9095 && (XINT (x, 1) == UNSPEC_PIC
9096 || XINT (x, 1) == UNSPEC_GOT
9097 || XINT (x, 1) == UNSPEC_GOTOFF
9098 || XINT (x, 1) == UNSPEC_GOTPLT
9099 || XINT (x, 1) == UNSPEC_GOTTPOFF
9100 || XINT (x, 1) == UNSPEC_DTPOFF
9101 || XINT (x, 1) == UNSPEC_TPOFF
9102 || XINT (x, 1) == UNSPEC_PLT
9103 || XINT (x, 1) == UNSPEC_PCREL
9104 || XINT (x, 1) == UNSPEC_SYMOFF
9105 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
9106 || XINT (x, 1) == UNSPEC_GOTFUNCDESC
9107 || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
9108 return false;
9109
9110 const char* fmt = GET_RTX_FORMAT (GET_CODE (x));
9111 for (int i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9112 {
9113 if (fmt[i] == 'E')
9114 {
9115 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
9116 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9117 return true;
9118 }
9119 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9120 return true;
9121 }
9122
9123 return false;
9124 }
9125
9126 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9127 @GOTOFF in `reg'. */
9128 rtx
legitimize_pic_address(rtx orig,machine_mode mode ATTRIBUTE_UNUSED,rtx reg)9129 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg)
9130 {
9131 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9132 return orig;
9133
9134 if (GET_CODE (orig) == LABEL_REF
9135 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9136 {
9137 if (reg == NULL_RTX)
9138 reg = gen_reg_rtx (Pmode);
9139
9140 if (TARGET_FDPIC
9141 && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig))
9142 {
9143 /* Weak functions may be NULL which doesn't work with
9144 GOTOFFFUNCDESC because the runtime offset is not known. */
9145 if (SYMBOL_REF_WEAK (orig))
9146 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9147 else
9148 emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
9149 }
9150 else if (TARGET_FDPIC
9151 && (GET_CODE (orig) == LABEL_REF
9152 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig)
9153 && (TREE_READONLY (SYMBOL_REF_DECL (orig))
9154 || SYMBOL_REF_EXTERNAL_P (orig)
9155 || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig))))))
9156 /* In FDPIC, GOTOFF can only be used for writable data. */
9157 emit_insn (gen_symGOT2reg (reg, orig));
9158 else
9159 emit_insn (gen_symGOTOFF2reg (reg, orig));
9160 return reg;
9161 }
9162 else if (GET_CODE (orig) == SYMBOL_REF)
9163 {
9164 if (reg == NULL_RTX)
9165 reg = gen_reg_rtx (Pmode);
9166
9167 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
9168 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9169 else
9170 emit_insn (gen_symGOT2reg (reg, orig));
9171 return reg;
9172 }
9173 return orig;
9174 }
9175
9176 /* Given a (logical) mode size and an offset in bytes, try to find a the
9177 appropriate displacement value for a mov insn. On SH the displacements
9178 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
9179 15 bytes in QImode. To compensate this we create a new base address by
9180 adding an adjustment value to it.
9181
9182 If the originally requested offset is greater than 127 we prefer using
9183 values 124..127 over 128..131 to increase opportunities to use the
9184 add #imm, Rn insn.
9185
9186 In some cases it is possible that a requested offset might seem unaligned
9187 or inappropriate for the mode size, like offset = 2 and mode size = 4.
9188 This is compensated by adjusting the base address so that the effective
9189 address of the displacement move insn will be aligned.
9190
9191 This is not the best possible way of rebasing the base address, as it
9192 does not look at other present displacement addressings around it.
9193 In some cases this can create more base address adjustments than would
9194 actually be necessary. */
9195 struct disp_adjust
9196 {
9197 rtx offset_adjust;
9198 rtx mov_disp;
9199 };
9200
9201 static struct disp_adjust
sh_find_mov_disp_adjust(machine_mode mode,HOST_WIDE_INT offset)9202 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
9203 {
9204 struct disp_adjust res = { NULL_RTX, NULL_RTX };
9205
9206 /* Do not try to use SH2A's large displacements here, because this would
9207 effectively disable the small displacement insns. */
9208 const int mode_sz = GET_MODE_SIZE (mode);
9209 const int mov_insn_sz = mov_insn_size (mode, false);
9210 const int max_disp = sh_max_mov_insn_displacement (mode, false);
9211 const int max_disp_next = max_disp + mov_insn_sz;
9212 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
9213 HOST_WIDE_INT offset_adjust;
9214
9215 /* In some cases this actually does happen and we must check for it. */
9216 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
9217 return res;
9218
9219 /* Keeps the previous behavior for QImode displacement addressing.
9220 This just decides how the offset is re-based. Removing this special
9221 case will result in slightly bigger code on average, but it's not that
9222 bad actually. */
9223 if (mov_insn_sz == 1)
9224 align_modifier = 0;
9225
9226 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
9227
9228 if (mode_sz + offset - offset_adjust <= max_disp_next)
9229 {
9230 res.offset_adjust = GEN_INT (offset_adjust);
9231 res.mov_disp = GEN_INT (offset - offset_adjust);
9232 }
9233
9234 return res;
9235 }
9236
9237 /* Try to modify an illegitimate address and make it legitimate.
9238 If we find one, return the new, valid address.
9239 Otherwise, return the original address. */
9240 static rtx
sh_legitimize_address(rtx x,rtx oldx,machine_mode mode)9241 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9242 {
9243 if (flag_pic)
9244 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9245
9246 if ((TARGET_FPU_DOUBLE && mode == DFmode)
9247 || (TARGET_SH2E && mode == SFmode))
9248 return x;
9249
9250 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
9251 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
9252 {
9253 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
9254 INTVAL (XEXP (x, 1)));
9255
9256 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9257 {
9258 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9259 adj.offset_adjust, NULL_RTX, 0,
9260 OPTAB_LIB_WIDEN);
9261 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9262 }
9263 }
9264 return x;
9265 }
9266
9267 /* Attempt to replace *p, which is an address that needs reloading, with
9268 a valid memory address for an operand of mode MODE.
9269 Like for sh_legitimize_address, for the SH we try to get a normal form
9270 of the address. That will allow inheritance of the address reloads. */
9271 bool
sh_legitimize_reload_address(rtx * p,machine_mode mode,int opnum,int itype)9272 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9273 int itype)
9274 {
9275 enum reload_type type = (enum reload_type) itype;
9276 const int mode_sz = GET_MODE_SIZE (mode);
9277
9278 if (sh_lra_p ())
9279 return false;
9280
9281 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
9282 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true))
9283 {
9284 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
9285 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
9286
9287 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9288 {
9289 push_reload (*p, NULL_RTX, p, NULL,
9290 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9291 return true;
9292 }
9293
9294 if (TARGET_SH2E && mode == SFmode)
9295 {
9296 *p = copy_rtx (*p);
9297 push_reload (*p, NULL_RTX, p, NULL,
9298 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9299 return true;
9300 }
9301
9302 /* FIXME: Do not allow to legitimize QImode and HImode displacement
9303 moves because then reload has a problem figuring the constraint
9304 that the move insn target/source reg must be R0.
9305 Or maybe some handling is wrong in sh_secondary_reload for this
9306 to work properly? */
9307 if ((mode_sz == 4 || mode_sz == 8)
9308 && ! (TARGET_SH4 && mode == DFmode)
9309 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9310 {
9311 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
9312 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9313 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9314 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9315 return true;
9316 }
9317 }
9318
9319 /* We must re-recognize what we created before. */
9320 if (GET_CODE (*p) == PLUS
9321 && (mode_sz == 4 || mode_sz == 8)
9322 && GET_CODE (XEXP (*p, 0)) == PLUS
9323 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9324 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9325 && CONST_INT_P (XEXP (*p, 1))
9326 && ! (TARGET_SH2E && mode == SFmode))
9327 {
9328 /* Because this address is so complex, we know it must have
9329 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9330 it is already unshared, and needs no further unsharing. */
9331 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9332 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9333 return true;
9334 }
9335
9336 return false;
9337 }
9338
9339 /* In the name of slightly smaller debug output, and to cater to
9340 general assembler lossage, recognize various UNSPEC sequences
9341 and turn them back into a direct symbol reference. */
9342 static rtx
sh_delegitimize_address(rtx orig_x)9343 sh_delegitimize_address (rtx orig_x)
9344 {
9345 orig_x = delegitimize_mem_from_attrs (orig_x);
9346
9347 rtx x = orig_x;
9348 if (MEM_P (x))
9349 x = XEXP (x, 0);
9350 if (GET_CODE (x) == CONST)
9351 {
9352 rtx y = XEXP (x, 0);
9353 if (GET_CODE (y) == UNSPEC)
9354 {
9355 if (XINT (y, 1) == UNSPEC_GOT
9356 || XINT (y, 1) == UNSPEC_GOTOFF
9357 || XINT (y, 1) == UNSPEC_SYMOFF)
9358 return XVECEXP (y, 0, 0);
9359 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
9360 {
9361 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
9362 {
9363 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
9364
9365 if (GET_CODE (symplt) == UNSPEC
9366 && (XINT (symplt, 1) == UNSPEC_PLT
9367 || XINT (symplt, 1) == UNSPEC_PCREL))
9368 return XVECEXP (symplt, 0, 0);
9369 }
9370 }
9371 }
9372 }
9373
9374 return orig_x;
9375 }
9376
9377 /* Mark the use of a constant in the literal table. If the constant
9378 has multiple labels, make it unique. */
9379 static rtx
mark_constant_pool_use(rtx x)9380 mark_constant_pool_use (rtx x)
9381 {
9382 if (x == NULL_RTX)
9383 return x;
9384
9385 switch (GET_CODE (x))
9386 {
9387 case LABEL_REF:
9388 x = XEXP (x, 0);
9389 case CODE_LABEL:
9390 break;
9391 default:
9392 return x;
9393 }
9394
9395 /* Get the first label in the list of labels for the same constant
9396 and delete another labels in the list. */
9397 rtx_insn* lab = as_a <rtx_insn*> (x);
9398 for (rtx_insn* insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
9399 {
9400 if (!LABEL_P (insn)
9401 || LABEL_REFS (insn) != NEXT_INSN (insn))
9402 break;
9403 lab = insn;
9404 }
9405
9406 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9407 as_a<rtx_insn *> (insn)->set_deleted ();
9408
9409 /* Mark constants in a window. */
9410 for (rtx_insn* insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn;
9411 insn = NEXT_INSN (insn))
9412 {
9413 if (!NONJUMP_INSN_P (insn))
9414 continue;
9415
9416 rtx pattern = PATTERN (insn);
9417 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9418 continue;
9419
9420 switch (XINT (pattern, 1))
9421 {
9422 case UNSPECV_CONST2:
9423 case UNSPECV_CONST4:
9424 case UNSPECV_CONST8:
9425 XVECEXP (pattern, 0, 1) = const1_rtx;
9426 break;
9427 case UNSPECV_WINDOW_END:
9428 if (XVECEXP (pattern, 0, 0) == x)
9429 return lab;
9430 break;
9431 case UNSPECV_CONST_END:
9432 return lab;
9433 default:
9434 break;
9435 }
9436 }
9437
9438 return lab;
9439 }
9440
9441 /* Return true if it's possible to redirect BRANCH1 to the destination
9442 of an unconditional jump BRANCH2. We only want to do this if the
9443 resulting branch will have a short displacement. */
9444 static bool
sh_can_follow_jump(const rtx_insn * branch1,const rtx_insn * branch2)9445 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
9446 {
9447 /* Don't follow if BRANCH2 is possible to be a jump crossing between
9448 hot and cold partitions. */
9449 if (flag_reorder_blocks_and_partition
9450 && simplejump_p (branch2)
9451 && CROSSING_JUMP_P (branch2))
9452 return false;
9453
9454 if (flag_expensive_optimizations && simplejump_p (branch2))
9455 {
9456 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9457 rtx_insn *insn;
9458 int distance;
9459
9460 for (distance = 0, insn = NEXT_INSN (branch1);
9461 insn && distance < 256;
9462 insn = PREV_INSN (insn))
9463 {
9464 if (insn == dest)
9465 return true;
9466 else
9467 distance += get_attr_length (insn);
9468 }
9469 for (distance = 0, insn = NEXT_INSN (branch1);
9470 insn && distance < 256;
9471 insn = NEXT_INSN (insn))
9472 {
9473 if (insn == dest)
9474 return true;
9475 else
9476 distance += get_attr_length (insn);
9477 }
9478 }
9479 return false;
9480 }
9481
9482 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9483 bool
sh_hard_regno_rename_ok(unsigned int old_reg ATTRIBUTE_UNUSED,unsigned int new_reg)9484 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9485 unsigned int new_reg)
9486 {
9487 /* Interrupt functions can only use registers that have already been
9488 saved by the prologue, even if they would normally be
9489 call-clobbered. */
9490 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9491 return false;
9492
9493 return true;
9494 }
9495
9496 /* Function to update the integer COST
9497 based on the relationship between INSN that is dependent on
9498 DEP_INSN through the dependence LINK. The default is to make no
9499 adjustment to COST. This can be used for example to specify to
9500 the scheduler that an output- or anti-dependence does not incur
9501 the same cost as a data-dependence. The return value should be
9502 the new value for COST. */
9503 static int
sh_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)9504 sh_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
9505 unsigned int)
9506 {
9507 rtx reg, use_pat;
9508
9509 if (dep_type == 0)
9510 {
9511 if (recog_memoized (insn) < 0
9512 || recog_memoized (dep_insn) < 0)
9513 return cost;
9514
9515 rtx dep_set = single_set (dep_insn);
9516
9517 /* The latency that we specify in the scheduling description refers
9518 to the actual output, not to an auto-increment register; for that,
9519 the latency is one. */
9520 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9521 {
9522 rtx set = single_set (insn);
9523
9524 if (set
9525 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9526 && (!MEM_P (SET_DEST (set))
9527 || !reg_mentioned_p (SET_DEST (dep_set),
9528 XEXP (SET_DEST (set), 0))))
9529 cost = 1;
9530 }
9531 /* The only input for a call that is timing-critical is the
9532 function's address. */
9533 if (CALL_P (insn))
9534 {
9535 rtx call = get_call_rtx_from (insn);
9536 if (call
9537 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9538 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9539 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9540 cost -= TARGET_SH4_300 ? 3 : 6;
9541 }
9542 /* Likewise, the most timing critical input for an sfuncs call
9543 is the function address. However, sfuncs typically start
9544 using their arguments pretty quickly.
9545 Assume a four cycle delay for SH4 before they are needed.
9546 Cached ST40-300 calls are quicker, so assume only a one
9547 cycle delay there.
9548 ??? Maybe we should encode the delays till input registers
9549 are needed by sfuncs into the sfunc call insn. */
9550 /* All sfunc calls are parallels with at least four components.
9551 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9552 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9553 && XVECLEN (PATTERN (insn), 0) >= 4
9554 && (reg = sfunc_uses_reg (insn)))
9555 {
9556 if (! reg_set_p (reg, dep_insn))
9557 cost -= TARGET_SH4_300 ? 1 : 4;
9558 }
9559 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9560 {
9561 attr_type dep_type = get_attr_type (dep_insn);
9562 attr_type type;
9563 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9564 cost--;
9565 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9566 && (type = get_attr_type (insn)) != TYPE_CALL
9567 && type != TYPE_SFUNC)
9568 cost--;
9569 /* When the preceding instruction loads the shift amount of
9570 the following SHAD/SHLD, the latency of the load is increased
9571 by 1 cycle. */
9572 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9573 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9574 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9575 XEXP (SET_SRC (single_set (insn)),
9576 1)))
9577 cost++;
9578 /* When an LS group instruction with a latency of less than
9579 3 cycles is followed by a double-precision floating-point
9580 instruction, FIPR, or FTRV, the latency of the first
9581 instruction is increased to 3 cycles. */
9582 else if (cost < 3
9583 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9584 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9585 cost = 3;
9586 /* The lsw register of a double-precision computation is ready one
9587 cycle earlier. */
9588 else if (reload_completed
9589 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9590 && (use_pat = single_set (insn))
9591 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9592 SET_SRC (use_pat)))
9593 cost -= 1;
9594
9595 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9596 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9597 cost -= 1;
9598 }
9599 else if (TARGET_SH4_300)
9600 {
9601 /* Stores need their input register two cycles later. */
9602 attr_type type;
9603 if (dep_set && cost >= 1
9604 && ((type = get_attr_type (insn)) == TYPE_STORE
9605 || type == TYPE_PSTORE
9606 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9607 {
9608 rtx set = single_set (insn);
9609
9610 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9611 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9612 {
9613 cost -= 2;
9614 /* But don't reduce the cost below 1 if the address depends
9615 on a side effect of dep_insn. */
9616 if (cost < 1
9617 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9618 cost = 1;
9619 }
9620 }
9621 }
9622 }
9623 /* An anti-dependence penalty of two applies if the first insn is a double
9624 precision fadd / fsub / fmul. */
9625 else if (!TARGET_SH4_300
9626 && dep_type == REG_DEP_ANTI
9627 && recog_memoized (dep_insn) >= 0
9628 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9629 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9630 /* A lot of alleged anti-flow dependences are fake,
9631 so check this one is real. */
9632 && flow_dependent_p (dep_insn, insn))
9633 cost = 2;
9634
9635 return cost;
9636 }
9637
9638 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9639 if DEP_INSN is anti-flow dependent on INSN. */
9640 static bool
flow_dependent_p(rtx insn,rtx dep_insn)9641 flow_dependent_p (rtx insn, rtx dep_insn)
9642 {
9643 rtx tmp = PATTERN (insn);
9644
9645 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9646 return tmp == NULL_RTX;
9647 }
9648
9649 /* A helper function for flow_dependent_p called through note_stores. */
9650 static void
flow_dependent_p_1(rtx x,const_rtx pat ATTRIBUTE_UNUSED,void * data)9651 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9652 {
9653 rtx * pinsn = (rtx *) data;
9654
9655 if (*pinsn && reg_referenced_p (x, *pinsn))
9656 *pinsn = NULL_RTX;
9657 }
9658
9659 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9660 'special function' patterns (type sfunc) that clobber pr, but that
9661 do not look like function calls to leaf_function_p. Hence we must
9662 do this extra check. */
9663 static int
sh_pr_n_sets(void)9664 sh_pr_n_sets (void)
9665 {
9666 return DF_REG_DEF_COUNT (PR_REG);
9667 }
9668
9669 /* Return where to allocate pseudo for a given hard register initial
9670 value. */
9671 static rtx
sh_allocate_initial_value(rtx hard_reg)9672 sh_allocate_initial_value (rtx hard_reg)
9673 {
9674 if (REGNO (hard_reg) == PR_REG)
9675 {
9676 if (crtl->is_leaf && ! sh_pr_n_sets ())
9677 return hard_reg;
9678 else
9679 return gen_frame_mem (Pmode, return_address_pointer_rtx);
9680 }
9681
9682 return NULL_RTX;
9683 }
9684
9685 /* This function returns "2" to indicate dual issue for the SH4
9686 processor. To be used by the DFA pipeline description. */
9687 static int
sh_issue_rate(void)9688 sh_issue_rate (void)
9689 {
9690 if (TARGET_SUPERSCALAR)
9691 return 2;
9692 else
9693 return 1;
9694 }
9695
9696 /* Functions for ready queue reordering for sched1. */
9697
9698 /* Get weight for mode for a set x. */
9699 static short
find_set_regmode_weight(rtx x,machine_mode mode)9700 find_set_regmode_weight (rtx x, machine_mode mode)
9701 {
9702 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9703 return 1;
9704 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9705 {
9706 if (REG_P (SET_DEST (x)))
9707 {
9708 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9709 return 1;
9710 else
9711 return 0;
9712 }
9713 return 1;
9714 }
9715 return 0;
9716 }
9717
9718 /* Get regmode weight for insn. */
9719 static short
find_insn_regmode_weight(rtx insn,machine_mode mode)9720 find_insn_regmode_weight (rtx insn, machine_mode mode)
9721 {
9722 /* Increment weight for each register born here. */
9723 rtx x = PATTERN (insn);
9724 short reg_weight = find_set_regmode_weight (x, mode);
9725 if (GET_CODE (x) == PARALLEL)
9726 {
9727 int j;
9728 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9729 {
9730 x = XVECEXP (PATTERN (insn), 0, j);
9731 reg_weight += find_set_regmode_weight (x, mode);
9732 }
9733 }
9734 /* Decrement weight for each register that dies here. */
9735 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9736 {
9737 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9738 {
9739 rtx note = XEXP (x, 0);
9740 if (REG_P (note) && GET_MODE (note) == mode)
9741 reg_weight--;
9742 }
9743 }
9744 return reg_weight;
9745 }
9746
9747 /* Calculate regmode weights for all insns of a basic block. */
9748 static void
find_regmode_weight(basic_block b,machine_mode mode)9749 find_regmode_weight (basic_block b, machine_mode mode)
9750 {
9751 rtx_insn *insn, *next_tail, *head, *tail;
9752
9753 get_ebb_head_tail (b, b, &head, &tail);
9754 next_tail = NEXT_INSN (tail);
9755
9756 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9757 {
9758 /* Handle register life information. */
9759 if (!INSN_P (insn))
9760 continue;
9761
9762 if (mode == SFmode)
9763 INSN_REGMODE_WEIGHT (insn, mode) =
9764 find_insn_regmode_weight (insn, mode)
9765 + 2 * find_insn_regmode_weight (insn, DFmode);
9766 else if (mode == SImode)
9767 INSN_REGMODE_WEIGHT (insn, mode) =
9768 find_insn_regmode_weight (insn, mode)
9769 + 2 * find_insn_regmode_weight (insn, DImode);
9770 }
9771 }
9772
9773 /* Comparison function for ready queue sorting. */
9774 static int
rank_for_reorder(const void * x,const void * y)9775 rank_for_reorder (const void *x, const void *y)
9776 {
9777 rtx_insn *tmp = *(rtx_insn * const *) y;
9778 rtx_insn *tmp2 = *(rtx_insn * const *) x;
9779
9780 /* The insn in a schedule group should be issued the first. */
9781 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9782 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9783
9784 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9785 minimizes instruction movement, thus minimizing sched's effect on
9786 register pressure. */
9787 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9788 }
9789
9790 /* Resort the array A in which only element at index N may be out of order. */
9791 static void
swap_reorder(rtx_insn ** a,int n)9792 swap_reorder (rtx_insn **a, int n)
9793 {
9794 rtx_insn *insn = a[n - 1];
9795 int i = n - 2;
9796
9797 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9798 {
9799 a[i + 1] = a[i];
9800 i -= 1;
9801 }
9802 a[i + 1] = insn;
9803 }
9804
9805 /* Sort the ready list by ascending priority. */
9806 static void
ready_reorder(rtx_insn ** ready,int nready)9807 ready_reorder (rtx_insn **ready, int nready)
9808 {
9809 if (nready == 2)
9810 swap_reorder (ready, nready);
9811 else if (nready > 2)
9812 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
9813 }
9814
9815 /* Count life regions of r0 for a block. */
9816 static int
find_r0_life_regions(basic_block b)9817 find_r0_life_regions (basic_block b)
9818 {
9819 bool live;
9820 int set;
9821 int death = 0;
9822
9823 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9824 {
9825 set = 1;
9826 live = true;
9827 }
9828 else
9829 {
9830 set = 0;
9831 live = false;
9832 }
9833
9834 rtx_insn* insn = BB_HEAD (b);
9835 rtx_insn* end = BB_END (b);
9836 rtx r0_reg = gen_rtx_REG (SImode, R0_REG);
9837 while (1)
9838 {
9839 if (INSN_P (insn))
9840 {
9841 if (find_regno_note (insn, REG_DEAD, R0_REG))
9842 {
9843 death++;
9844 live = false;
9845 }
9846
9847 rtx pset;
9848 if (!live
9849 && (pset = single_set (insn))
9850 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9851 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9852 {
9853 set++;
9854 live = true;
9855 }
9856 }
9857 if (insn == end)
9858 break;
9859 insn = NEXT_INSN (insn);
9860 }
9861 return set - death;
9862 }
9863
9864 /* Calculate regmode weights for all insns of all basic block. */
9865 static void
sh_md_init_global(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int old_max_uid)9866 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9867 int verbose ATTRIBUTE_UNUSED,
9868 int old_max_uid)
9869 {
9870 basic_block b;
9871
9872 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9873 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9874 r0_life_regions = 0;
9875
9876 FOR_EACH_BB_REVERSE_FN (b, cfun)
9877 {
9878 find_regmode_weight (b, SImode);
9879 find_regmode_weight (b, SFmode);
9880 if (!reload_completed)
9881 r0_life_regions += find_r0_life_regions (b);
9882 }
9883
9884 CURR_REGMODE_PRESSURE (SImode) = 0;
9885 CURR_REGMODE_PRESSURE (SFmode) = 0;
9886 }
9887
9888 /* Cleanup. */
9889 static void
sh_md_finish_global(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED)9890 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9891 int verbose ATTRIBUTE_UNUSED)
9892 {
9893 if (regmode_weight[0])
9894 {
9895 free (regmode_weight[0]);
9896 regmode_weight[0] = NULL;
9897 }
9898 if (regmode_weight[1])
9899 {
9900 free (regmode_weight[1]);
9901 regmode_weight[1] = NULL;
9902 }
9903 }
9904
9905 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9906 keep count of register pressures on SImode and SFmode. */
9907 static int
sh_variable_issue(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn * insn,int can_issue_more)9908 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9909 int sched_verbose ATTRIBUTE_UNUSED,
9910 rtx_insn *insn,
9911 int can_issue_more)
9912 {
9913 if (GET_CODE (PATTERN (insn)) != USE
9914 && GET_CODE (PATTERN (insn)) != CLOBBER)
9915 cached_can_issue_more = can_issue_more - 1;
9916 else
9917 cached_can_issue_more = can_issue_more;
9918
9919 if (reload_completed)
9920 return cached_can_issue_more;
9921
9922 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9923 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9924
9925 return cached_can_issue_more;
9926 }
9927
9928 static void
sh_md_init(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int veclen ATTRIBUTE_UNUSED)9929 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9930 int verbose ATTRIBUTE_UNUSED,
9931 int veclen ATTRIBUTE_UNUSED)
9932 {
9933 CURR_REGMODE_PRESSURE (SImode) = 0;
9934 CURR_REGMODE_PRESSURE (SFmode) = 0;
9935 }
9936
9937 /* Some magic numbers. */
9938 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9939 functions that already have high pressure on r0. */
9940 #define R0_MAX_LIFE_REGIONS 2
9941 /* Register Pressure thresholds for SImode and SFmode registers. */
9942 #define SIMODE_MAX_WEIGHT 5
9943 #define SFMODE_MAX_WEIGHT 10
9944
9945 /* Return true if the pressure is high for MODE. */
9946 static bool
high_pressure(machine_mode mode)9947 high_pressure (machine_mode mode)
9948 {
9949 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9950 functions that already have high pressure on r0. */
9951 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9952 return true;
9953
9954 if (mode == SFmode)
9955 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9956 else
9957 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9958 }
9959
9960 /* Reorder ready queue if register pressure is high. */
9961 static int
sh_reorder(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn ** ready,int * n_readyp,int clock_var ATTRIBUTE_UNUSED)9962 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9963 int sched_verbose ATTRIBUTE_UNUSED,
9964 rtx_insn **ready,
9965 int *n_readyp,
9966 int clock_var ATTRIBUTE_UNUSED)
9967 {
9968 if (reload_completed)
9969 return sh_issue_rate ();
9970
9971 if (high_pressure (SFmode) || high_pressure (SImode))
9972 {
9973 ready_reorder (ready, *n_readyp);
9974 }
9975
9976 return sh_issue_rate ();
9977 }
9978
9979 /* Skip cycles if the current register pressure is high. */
9980 static int
sh_reorder2(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn ** ready ATTRIBUTE_UNUSED,int * n_readyp ATTRIBUTE_UNUSED,int clock_var ATTRIBUTE_UNUSED)9981 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9982 int sched_verbose ATTRIBUTE_UNUSED,
9983 rtx_insn **ready ATTRIBUTE_UNUSED,
9984 int *n_readyp ATTRIBUTE_UNUSED,
9985 int clock_var ATTRIBUTE_UNUSED)
9986 {
9987 if (reload_completed)
9988 return cached_can_issue_more;
9989
9990 if (high_pressure(SFmode) || high_pressure (SImode))
9991 skip_cycles = 1;
9992
9993 return cached_can_issue_more;
9994 }
9995
9996 /* Skip cycles without sorting the ready queue. This will move insn from
9997 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9998 queue by sh_reorder. */
9999
10000 /* Generally, skipping these many cycles are sufficient for all insns to move
10001 from Q -> R. */
10002 #define MAX_SKIPS 8
10003
10004 static int
sh_dfa_new_cycle(FILE * sched_dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn * insn ATTRIBUTE_UNUSED,int last_clock_var,int clock_var,int * sort_p)10005 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10006 int sched_verbose ATTRIBUTE_UNUSED,
10007 rtx_insn *insn ATTRIBUTE_UNUSED,
10008 int last_clock_var,
10009 int clock_var,
10010 int *sort_p)
10011 {
10012 if (reload_completed)
10013 return 0;
10014
10015 if (skip_cycles)
10016 {
10017 if ((clock_var - last_clock_var) < MAX_SKIPS)
10018 {
10019 *sort_p = 0;
10020 return 1;
10021 }
10022 /* If this is the last cycle we are skipping, allow reordering of R. */
10023 if ((clock_var - last_clock_var) == MAX_SKIPS)
10024 {
10025 *sort_p = 1;
10026 return 1;
10027 }
10028 }
10029
10030 skip_cycles = 0;
10031
10032 return 0;
10033 }
10034
10035 static bool
sh_ms_bitfield_layout_p(const_tree record_type ATTRIBUTE_UNUSED)10036 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10037 {
10038 return TARGET_HITACHI || sh_attr_renesas_p (record_type);
10039 }
10040
10041 /*
10042 On the SH1..SH4, the trampoline looks like
10043 2 0002 D202 mov.l l2,r2
10044 1 0000 D301 mov.l l1,r3
10045 3 0004 422B jmp @r2
10046 4 0006 0009 nop
10047 5 0008 00000000 l1: .long area
10048 6 000c 00000000 l2: .long function
10049
10050 FDPIC needs a form that includes a function descriptor and
10051 code to load the GOT register:
10052 0 0000 00000000 .long l0
10053 1 0004 00000000 .long gotval
10054 2 0008 D302 l0: mov.l l1,r3
10055 3 000a D203 mov.l l2,r2
10056 4 000c 6122 mov.l @r2,r1
10057 5 000e 5C21 mov.l @(4,r2),r12
10058 6 0010 412B jmp @r1
10059 7 0012 0009 nop
10060 8 0014 00000000 l1: .long area
10061 9 0018 00000000 l2: .long function
10062
10063 SH5 (compact) uses r1 instead of r3 for the static chain. */
10064
10065 /* Emit insns to store a value at memory address + offset. */
10066 static void
sh_emit_storesi(rtx addr,HOST_WIDE_INT offset,rtx value)10067 sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value)
10068 {
10069 gcc_assert ((offset & 3) == 0);
10070 emit_move_insn (offset == 0
10071 ? change_address (addr, SImode, NULL_RTX)
10072 : adjust_address (addr, SImode, offset), value);
10073 }
10074
10075 /* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2. */
10076 static void
sh_emit_storehi(rtx addr,HOST_WIDE_INT offset,uint16_t w0,uint16_t w1)10077 sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1)
10078 {
10079 sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN
10080 ? (w0 | (w1 << 16))
10081 : (w1 | (w0 << 16)), SImode));
10082 }
10083
10084 /* Emit RTL insns to initialize the variable parts of a trampoline.
10085 FNADDR is an RTX for the address of the function's pure code.
10086 CXT is an RTX for the static chain value for the function. */
10087 static void
sh_trampoline_init(rtx tramp_mem,tree fndecl,rtx cxt)10088 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10089 {
10090 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10091 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10092
10093 if (TARGET_FDPIC)
10094 {
10095 rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
10096
10097 sh_emit_storesi (tramp_mem, 0, a);
10098 sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ());
10099
10100 sh_emit_storehi (tramp_mem, 8, 0xd302, 0xd203);
10101 sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21);
10102 sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009);
10103
10104 sh_emit_storesi (tramp_mem, 20, cxt);
10105 sh_emit_storesi (tramp_mem, 24, fnaddr);
10106 }
10107 else
10108 {
10109 sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301);
10110 sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009);
10111
10112 sh_emit_storesi (tramp_mem, 8, cxt);
10113 sh_emit_storesi (tramp_mem, 12, fnaddr);
10114 }
10115 if (TARGET_HARD_SH4)
10116 {
10117 if (!TARGET_INLINE_IC_INVALIDATE
10118 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
10119 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10120 FUNCTION_ORDINARY).sym,
10121 LCT_NORMAL, VOIDmode, tramp, SImode);
10122 else
10123 emit_insn (gen_ic_invalidate_line (tramp));
10124 }
10125 }
10126
10127 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10128 static rtx
sh_trampoline_adjust_address(rtx tramp)10129 sh_trampoline_adjust_address (rtx tramp)
10130 {
10131 return tramp;
10132 }
10133
10134 /* If PIC, we cannot make sibling calls to global functions
10135 because the PLT requires r12 to be live. */
10136 static bool
sh_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)10137 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10138 {
10139 return (1
10140 && ! sh_cfun_interrupt_handler_p ()
10141 && (! flag_pic || TARGET_FDPIC
10142 || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
10143 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10144 }
10145
10146 /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */
10147 void
sh_expand_sym_label2reg(rtx reg,rtx sym,rtx lab,bool sibcall_p)10148 sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
10149 {
10150 const_tree decl = SYMBOL_REF_DECL (sym);
10151 bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
10152
10153 if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
10154 emit_insn (gen_sym_label2reg (reg, sym, lab));
10155 else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
10156 emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
10157 else
10158 emit_insn (gen_symPLT_label2reg (reg, sym, lab));
10159 }
10160
10161 /* Machine specific built-in functions. */
10162
10163 struct builtin_description
10164 {
10165 bool (* const is_enabled) (void);
10166 const enum insn_code icode;
10167 const char *const name;
10168 int signature;
10169 tree fndecl;
10170 };
10171
10172 /* This function can be used if there are any built-ins that are not for
10173 SHmedia. It's commented out to avoid the defined-but-unused warning. */
10174 static bool
sh1_builtin_p(void)10175 sh1_builtin_p (void)
10176 {
10177 return TARGET_SH1;
10178 }
10179
10180 /* describe number and signedness of arguments; arg[0] == result
10181 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10182 /* 9: 64-bit pointer, 10: 32-bit pointer */
10183 static const char signature_args[][4] =
10184 {
10185 #define SH_BLTIN_V2SI2 0
10186 { 4, 4 },
10187 #define SH_BLTIN_V4HI2 1
10188 { 4, 4 },
10189 #define SH_BLTIN_V2SI3 2
10190 { 4, 4, 4 },
10191 #define SH_BLTIN_V4HI3 3
10192 { 4, 4, 4 },
10193 #define SH_BLTIN_V8QI3 4
10194 { 4, 4, 4 },
10195 #define SH_BLTIN_MAC_HISI 5
10196 { 1, 4, 4, 1 },
10197 #define SH_BLTIN_SH_HI 6
10198 { 4, 4, 1 },
10199 #define SH_BLTIN_SH_SI 7
10200 { 4, 4, 1 },
10201 #define SH_BLTIN_V4HI2V2SI 8
10202 { 4, 4, 4 },
10203 #define SH_BLTIN_V4HI2V8QI 9
10204 { 4, 4, 4 },
10205 #define SH_BLTIN_SISF 10
10206 { 4, 2 },
10207 #define SH_BLTIN_LDUA_L 11
10208 { 2, 10 },
10209 #define SH_BLTIN_LDUA_Q 12
10210 { 1, 10 },
10211 #define SH_BLTIN_STUA_L 13
10212 { 0, 10, 2 },
10213 #define SH_BLTIN_STUA_Q 14
10214 { 0, 10, 1 },
10215 #define SH_BLTIN_LDUA_L64 15
10216 { 2, 9 },
10217 #define SH_BLTIN_LDUA_Q64 16
10218 { 1, 9 },
10219 #define SH_BLTIN_STUA_L64 17
10220 { 0, 9, 2 },
10221 #define SH_BLTIN_STUA_Q64 18
10222 { 0, 9, 1 },
10223 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10224 #define SH_BLTIN_2 19
10225 #define SH_BLTIN_SU 19
10226 { 1, 2 },
10227 #define SH_BLTIN_3 20
10228 #define SH_BLTIN_SUS 20
10229 { 2, 2, 1 },
10230 #define SH_BLTIN_PSSV 21
10231 { 0, 8, 2, 2 },
10232 #define SH_BLTIN_XXUU 22
10233 #define SH_BLTIN_UUUU 22
10234 { 1, 1, 1, 1 },
10235 #define SH_BLTIN_PV 23
10236 { 0, 8 },
10237 #define SH_BLTIN_VP 24
10238 { 8, 0 },
10239 #define SH_BLTIN_UV 25
10240 { 1, 0 },
10241 #define SH_BLTIN_VU 26
10242 { 0, 1 },
10243 };
10244 /* mcmv: operands considered unsigned. */
10245 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10246 /* mperm: control value considered unsigned int. */
10247 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10248 /* mshards_q: returns signed short. */
10249 /* nsb: takes long long arg, returns unsigned char. */
10250 static struct builtin_description bdesc[] =
10251 {
10252 { sh1_builtin_p,
10253 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
10254 { sh1_builtin_p,
10255 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
10256 };
10257
10258 static tree sh_builtin_get_fpscr;
10259 static tree sh_builtin_set_fpscr;
10260
10261 static void
sh_init_builtins(void)10262 sh_init_builtins (void)
10263 {
10264 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10265 memset (shared, 0, sizeof shared);
10266
10267 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
10268 {
10269 builtin_description* d = &bdesc[di];
10270
10271 if (!d->is_enabled ())
10272 continue;
10273
10274 tree type, arg_type = NULL_TREE;
10275 int signature = d->signature;
10276
10277 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10278 type = shared[signature];
10279 else
10280 {
10281 int has_result = signature_args[signature][0] != 0;
10282 tree args[3];
10283
10284 if (! TARGET_FPU_ANY
10285 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10286 continue;
10287 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
10288 args[i] = NULL_TREE;
10289 for (int i = 3; ; i--)
10290 {
10291 int arg = signature_args[signature][i];
10292 int opno = i - 1 + has_result;
10293
10294 if (arg & 8)
10295 arg_type = ptr_type_node;
10296 else if (arg)
10297 arg_type = (*lang_hooks.types.type_for_mode)
10298 (insn_data[d->icode].operand[opno].mode, (arg & 1));
10299 else if (i)
10300 continue;
10301 else
10302 arg_type = void_type_node;
10303 if (i == 0)
10304 break;
10305 args[i-1] = arg_type;
10306 }
10307 type = build_function_type_list (arg_type, args[0], args[1],
10308 args[2], NULL_TREE);
10309 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10310 shared[signature] = type;
10311 }
10312 d->fndecl =
10313 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10314 NULL, NULL_TREE);
10315 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
10316 if (d->icode == CODE_FOR_sts_fpscr)
10317 sh_builtin_get_fpscr = d->fndecl;
10318 else if (d->icode == CODE_FOR_set_fpscr)
10319 sh_builtin_set_fpscr = d->fndecl;
10320 }
10321 }
10322
10323 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
10324
10325 static void
sh_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)10326 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
10327 {
10328 const unsigned SH_FE_INVALID = 64;
10329 const unsigned SH_FE_DIVBYZERO = 32;
10330 const unsigned SH_FE_OVERFLOW = 16;
10331 const unsigned SH_FE_UNDERFLOW = 8;
10332 const unsigned SH_FE_INEXACT = 4;
10333 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
10334 | SH_FE_DIVBYZERO
10335 | SH_FE_OVERFLOW
10336 | SH_FE_UNDERFLOW
10337 | SH_FE_INEXACT);
10338 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
10339 tree fenv_var, mask, ld_fenv, masked_fenv;
10340 tree new_fenv_var, reload_fenv, restore_fnenv;
10341 tree update_call, atomic_feraiseexcept, hold_fnclex;
10342
10343 if (! TARGET_FPU_ANY)
10344 return;
10345
10346 /* Generate the equivalent of :
10347 unsigned int fenv_var;
10348 fenv_var = __builtin_sh_get_fpscr ();
10349
10350 unsigned int masked_fenv;
10351 masked_fenv = fenv_var & mask;
10352
10353 __builtin_sh_set_fpscr (masked_fenv); */
10354
10355 fenv_var = create_tmp_var_raw (unsigned_type_node);
10356 mask = build_int_cst (unsigned_type_node,
10357 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
10358 | SH_FE_ALL_EXCEPT));
10359 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
10360 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
10361 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
10362 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10363 fenv_var = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
10364 build2 (COMPOUND_EXPR, void_type_node, masked_fenv,
10365 ld_fenv),
10366 NULL_TREE, NULL_TREE);
10367 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, hold_fnclex);
10368
10369 /* Store the value of masked_fenv to clear the exceptions:
10370 __builtin_sh_set_fpscr (masked_fenv); */
10371
10372 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10373
10374 /* Generate the equivalent of :
10375 unsigned int new_fenv_var;
10376 new_fenv_var = __builtin_sh_get_fpscr ();
10377
10378 __builtin_sh_set_fpscr (fenv_var);
10379
10380 __atomic_feraiseexcept (new_fenv_var); */
10381
10382 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
10383 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
10384 build_call_expr (sh_builtin_get_fpscr, 0));
10385 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
10386 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
10387 update_call = build_call_expr (atomic_feraiseexcept, 1,
10388 fold_convert (integer_type_node,
10389 new_fenv_var));
10390 *update = build2 (COMPOUND_EXPR, void_type_node,
10391 build2 (COMPOUND_EXPR, void_type_node,
10392 reload_fenv, restore_fnenv), update_call);
10393 }
10394
10395 /* Implements target hook vector_mode_supported_p. */
10396 bool
sh_vector_mode_supported_p(machine_mode mode ATTRIBUTE_UNUSED)10397 sh_vector_mode_supported_p (machine_mode mode ATTRIBUTE_UNUSED)
10398 {
10399 return false;
10400 }
10401
10402 bool
sh_frame_pointer_required(void)10403 sh_frame_pointer_required (void)
10404 {
10405 /* If needed override this in other tm.h files to cope with various OS
10406 lossage requiring a frame pointer. */
10407 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10408 return true;
10409
10410 if (crtl->profile)
10411 return true;
10412
10413 return false;
10414 }
10415
10416 /* Implements target hook dwarf_calling_convention. Return an enum
10417 of dwarf_calling_convention. */
10418 int
sh_dwarf_calling_convention(const_tree func)10419 sh_dwarf_calling_convention (const_tree func)
10420 {
10421 if (sh_attr_renesas_p (func))
10422 return DW_CC_GNU_renesas_sh;
10423
10424 return DW_CC_normal;
10425 }
10426
10427 /* Returns the sh builtin decl for CODE. */
10428 static tree
sh_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)10429 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10430 {
10431 if (code >= ARRAY_SIZE (bdesc))
10432 return error_mark_node;
10433
10434 if (!bdesc[code].is_enabled ())
10435 return error_mark_node;
10436
10437 return bdesc[code].fndecl;
10438 }
10439
10440 /* Expand an expression EXP that calls a built-in function,
10441 with result going to TARGET if that's convenient
10442 (and in mode MODE if that's convenient).
10443 SUBTARGET may be used as the target for computing one of EXP's operands.
10444 IGNORE is nonzero if the value is to be ignored. */
10445 static rtx
sh_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore)10446 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10447 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10448 {
10449 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10450 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10451 const struct builtin_description *d = &bdesc[fcode];
10452 enum insn_code icode = d->icode;
10453 int signature = d->signature;
10454 int nop = 0;
10455 rtx op[4];
10456
10457 if (signature_args[signature][0])
10458 {
10459 if (ignore)
10460 return NULL_RTX;
10461
10462 machine_mode tmode = insn_data[icode].operand[0].mode;
10463 if (! target || GET_MODE (target) != tmode
10464 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10465 target = gen_reg_rtx (tmode);
10466 op[nop++] = target;
10467 }
10468 else
10469 target = NULL_RTX;
10470
10471 for (int i = 1; i <= 3; i++, nop++)
10472 {
10473 if (! signature_args[signature][i])
10474 break;
10475 tree arg = CALL_EXPR_ARG (exp, i - 1);
10476 if (arg == error_mark_node)
10477 return const0_rtx;
10478
10479 machine_mode opmode;
10480 tree optype;
10481 if (signature_args[signature][i] & 8)
10482 {
10483 opmode = ptr_mode;
10484 optype = ptr_type_node;
10485 }
10486 else
10487 {
10488 opmode = insn_data[icode].operand[nop].mode;
10489 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10490 }
10491
10492 machine_mode argmode = TYPE_MODE (TREE_TYPE (arg));
10493 if (argmode != opmode)
10494 arg = build1 (NOP_EXPR, optype, arg);
10495 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10496 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10497 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10498 }
10499
10500 rtx pat = NULL_RTX;
10501
10502 switch (nop)
10503 {
10504 case 1:
10505 pat = (*insn_data[d->icode].genfun) (op[0]);
10506 break;
10507 case 2:
10508 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10509 break;
10510 case 3:
10511 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10512 break;
10513 case 4:
10514 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10515 break;
10516 default:
10517 gcc_unreachable ();
10518 }
10519 if (! pat)
10520 return NULL_RTX;
10521 emit_insn (pat);
10522 return target;
10523 }
10524
10525 /* Implement TARGET_HARD_REGNO_NREGS. On the SH all but the XD regs are
10526 UNITS_PER_WORD bits wide. */
10527
10528 static unsigned int
sh_hard_regno_nregs(unsigned int regno,machine_mode mode)10529 sh_hard_regno_nregs (unsigned int regno, machine_mode mode)
10530 {
10531 if (XD_REGISTER_P (regno))
10532 return CEIL (GET_MODE_SIZE (mode), 2 * UNITS_PER_WORD);
10533 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
10534 }
10535
10536 /* Implement TARGET_HARD_REGNO_MODE_OK.
10537
10538 We can allow any mode in any general register. The special registers
10539 only allow SImode. Don't allow any mode in the PR.
10540
10541 We cannot hold DCmode values in the XD registers because alter_reg
10542 handles subregs of them incorrectly. We could work around this by
10543 spacing the XD registers like the DR registers, but this would require
10544 additional memory in every compilation to hold larger register vectors.
10545 We could hold SFmode / SCmode values in XD registers, but that
10546 would require a tertiary reload when reloading from / to memory,
10547 and a secondary reload to reload from / to general regs; that
10548 seems to be a losing proposition.
10549
10550 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10551 it won't be ferried through GP registers first. */
10552 static bool
sh_hard_regno_mode_ok(unsigned int regno,machine_mode mode)10553 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10554 {
10555 if (SPECIAL_REGISTER_P (regno))
10556 return mode == SImode;
10557
10558 if (regno == FPUL_REG)
10559 return (mode == SImode || mode == SFmode);
10560
10561 if (FP_REGISTER_P (regno) && mode == SFmode)
10562 return true;
10563
10564 if (mode == V2SFmode)
10565 {
10566 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10567 || GENERAL_REGISTER_P (regno)))
10568 return true;
10569 else
10570 return false;
10571 }
10572
10573 if (mode == V4SFmode)
10574 {
10575 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10576 || GENERAL_REGISTER_P (regno))
10577 return true;
10578 else
10579 return false;
10580 }
10581
10582 if (mode == V16SFmode)
10583 return regno == FIRST_XD_REG;
10584
10585 if (FP_REGISTER_P (regno))
10586 {
10587 if (mode == SFmode
10588 || mode == SImode
10589 || ((TARGET_SH2E) && mode == SCmode)
10590 || (((TARGET_FPU_DOUBLE && mode == DFmode) || mode == DCmode)
10591 && ((regno - FIRST_FP_REG) & 1) == 0)
10592 || (TARGET_SH4 && mode == TImode
10593 && ((regno - FIRST_FP_REG) & 3) == 0))
10594 return true;
10595 else
10596 return false;
10597 }
10598
10599 if (XD_REGISTER_P (regno))
10600 return mode == DFmode;
10601
10602 if (regno == PR_REG)
10603 return mode == SImode;
10604
10605 if (regno == FPSCR_REG)
10606 return mode == SImode;
10607
10608 return true;
10609 }
10610
10611 /* Implement TARGET_MODES_TIEABLE_P.
10612
10613 If TARGET_HARD_REGNO_MODE_OK could produce different values for MODE1
10614 and MODE2, for any hard reg, then this must be false for correct output.
10615 That's the case for xd registers: we don't hold SFmode values in
10616 them, so we can't tie an SFmode pseudos with one in another
10617 floating-point mode. */
10618
10619 static bool
sh_modes_tieable_p(machine_mode mode1,machine_mode mode2)10620 sh_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10621 {
10622 return (mode1 == mode2
10623 || (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
10624 && (mode1 != SFmode && mode2 != SFmode)));
10625 }
10626
10627 /* Specify the modes required to caller save a given hard regno.
10628 choose_hard_reg_mode chooses mode based on TARGET_HARD_REGNO_MODE_OK
10629 and returns ?Imode for float regs when sh_hard_regno_mode_ok
10630 permits integer modes on them. That makes LRA's split process
10631 unhappy. See PR55212.
10632 */
10633 machine_mode
sh_hard_regno_caller_save_mode(unsigned int regno,unsigned int nregs,machine_mode mode)10634 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
10635 machine_mode mode)
10636 {
10637 if (FP_REGISTER_P (regno)
10638 && (mode == SFmode
10639 || mode == SCmode
10640 || ((mode == DFmode || mode == DCmode)
10641 && ((regno - FIRST_FP_REG) & 1) == 0)))
10642 return mode;
10643
10644 return choose_hard_reg_mode (regno, nregs, false);
10645 }
10646
10647 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10648 static bool
sh_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)10649 sh_can_change_mode_class (machine_mode from, machine_mode to,
10650 reg_class_t rclass)
10651 {
10652 /* We want to enable the use of SUBREGs as a means to
10653 VEC_SELECT a single element of a vector. */
10654
10655 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
10656 This can be problematic when SFmode vector subregs need to be accessed
10657 on the stack with displacement addressing, as it happens with -O0.
10658 Thus we disallow the mode change for -O0. */
10659 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10660 return optimize ? !reg_classes_intersect_p (GENERAL_REGS, rclass) : true;
10661
10662 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10663 {
10664 if (TARGET_LITTLE_ENDIAN)
10665 {
10666 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10667 return !reg_classes_intersect_p (DF_REGS, rclass);
10668 }
10669 else
10670 {
10671 if (GET_MODE_SIZE (from) < 8)
10672 return !reg_classes_intersect_p (DF_REGS, rclass);
10673 }
10674 }
10675 return true;
10676 }
10677
10678 /* Return true if registers in machine mode MODE will likely be
10679 allocated to registers in small register classes. */
10680 bool
sh_small_register_classes_for_mode_p(machine_mode mode ATTRIBUTE_UNUSED)10681 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
10682 {
10683 return true;
10684 }
10685
10686 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10687 that label is used. */
10688 void
sh_mark_label(rtx address,int nuses)10689 sh_mark_label (rtx address, int nuses)
10690 {
10691 if (GOTOFF_P (address))
10692 {
10693 /* Extract the label or symbol. */
10694 address = XEXP (address, 0);
10695 if (GET_CODE (address) == PLUS)
10696 address = XEXP (address, 0);
10697 address = XVECEXP (address, 0, 0);
10698 }
10699 if (GET_CODE (address) == LABEL_REF
10700 && LABEL_P (XEXP (address, 0)))
10701 LABEL_NUSES (XEXP (address, 0)) += nuses;
10702 }
10703
10704 /* Compute extra cost of moving data between one register class
10705 and another.
10706
10707 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10708 uses this information. Hence, the general register <-> floating point
10709 register information here is not used for SFmode. */
10710 static int
sh_register_move_cost(machine_mode mode,reg_class_t srcclass,reg_class_t dstclass)10711 sh_register_move_cost (machine_mode mode,
10712 reg_class_t srcclass, reg_class_t dstclass)
10713 {
10714 if (dstclass == T_REGS || dstclass == PR_REGS)
10715 return 10;
10716
10717 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10718 return 4;
10719
10720 if (mode == SImode && TARGET_FMOVD
10721 && REGCLASS_HAS_FP_REG (srcclass)
10722 && REGCLASS_HAS_FP_REG (dstclass))
10723 return 4;
10724
10725 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10726 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10727
10728 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10729 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10730 return 9;
10731
10732 if ((REGCLASS_HAS_FP_REG (dstclass)
10733 && REGCLASS_HAS_GENERAL_REG (srcclass))
10734 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10735 && REGCLASS_HAS_FP_REG (srcclass)))
10736 {
10737 /* Discourage trying to use fp regs for a pointer. This also
10738 discourages fp regs with SImode because Pmode is an alias
10739 of SImode on this target. See PR target/48596. */
10740 int addend = (mode == Pmode) ? 40 : 0;
10741
10742 return ((TARGET_FMOVD ? 8 : 12) + addend)
10743 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10744 }
10745
10746 if ((dstclass == FPUL_REGS
10747 && REGCLASS_HAS_GENERAL_REG (srcclass))
10748 || (srcclass == FPUL_REGS
10749 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10750 return 5;
10751
10752 if ((dstclass == FPUL_REGS
10753 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10754 || (srcclass == FPUL_REGS
10755 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10756 return 7;
10757
10758 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10759 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10760 return 4;
10761
10762 if (TARGET_FMOVD
10763 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10764 && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10765 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10766
10767 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10768 }
10769
10770 static rtx
emit_load_ptr(rtx reg,rtx addr)10771 emit_load_ptr (rtx reg, rtx addr)
10772 {
10773 rtx mem = gen_const_mem (ptr_mode, addr);
10774
10775 if (Pmode != ptr_mode)
10776 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10777 return emit_move_insn (reg, mem);
10778 }
10779
10780 static void
sh_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)10781 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10782 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10783 tree function)
10784 {
10785 CUMULATIVE_ARGS cum;
10786 int structure_value_byref = 0;
10787 rtx this_rtx, this_value, sibcall, funexp;
10788 rtx_insn *insns;
10789 tree funtype = TREE_TYPE (function);
10790 int simple_add = CONST_OK_FOR_ADD (delta);
10791 int did_load = 0;
10792 rtx scratch0, scratch1, scratch2;
10793
10794 reload_completed = 1;
10795 epilogue_completed = 1;
10796 crtl->uses_only_leaf_regs = 1;
10797
10798 emit_note (NOTE_INSN_PROLOGUE_END);
10799
10800 /* Find the "this" pointer. We have such a wide range of ABIs for the
10801 SH that it's best to do this completely machine independently.
10802 "this" is passed as first argument, unless a structure return pointer
10803 comes first, in which case "this" comes second. */
10804 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10805 #ifndef PCC_STATIC_STRUCT_RETURN
10806 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10807 structure_value_byref = 1;
10808 #endif /* not PCC_STATIC_STRUCT_RETURN */
10809 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10810 {
10811 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10812
10813 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
10814 }
10815 this_rtx
10816 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
10817
10818 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10819 static chain pointer (even if you can't have nested virtual functions
10820 right now, someone might implement them sometime), and the rest of the
10821 registers are used for argument passing, are callee-saved, or reserved. */
10822 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10823 -ffixed-reg has been used. */
10824 if (! call_used_regs[0] || fixed_regs[0])
10825 error ("r0 needs to be available as a call-clobbered register");
10826 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10827
10828 {
10829 if (call_used_regs[1] && ! fixed_regs[1])
10830 scratch1 = gen_rtx_REG (ptr_mode, 1);
10831 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10832 pointing where to return struct values. */
10833 if (call_used_regs[3] && ! fixed_regs[3])
10834 scratch2 = gen_rtx_REG (Pmode, 3);
10835 }
10836
10837 this_value = plus_constant (Pmode, this_rtx, delta);
10838 if (vcall_offset
10839 && (simple_add || scratch0 != scratch1)
10840 && strict_memory_address_p (ptr_mode, this_value))
10841 {
10842 emit_load_ptr (scratch0, this_value);
10843 did_load = 1;
10844 }
10845
10846 if (!delta)
10847 ; /* Do nothing. */
10848 else if (simple_add)
10849 emit_move_insn (this_rtx, this_value);
10850 else
10851 {
10852 emit_move_insn (scratch1, GEN_INT (delta));
10853 emit_insn (gen_add2_insn (this_rtx, scratch1));
10854 }
10855
10856 if (vcall_offset)
10857 {
10858 rtx offset_addr;
10859
10860 if (!did_load)
10861 emit_load_ptr (scratch0, this_rtx);
10862
10863 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
10864 if (strict_memory_address_p (ptr_mode, offset_addr))
10865 ; /* Do nothing. */
10866 else if (scratch0 != scratch1)
10867 {
10868 /* scratch0 != scratch1, and we have indexed loads. Get better
10869 schedule by loading the offset into r1 and using an indexed
10870 load - then the load of r1 can issue before the load from
10871 (this_rtx + delta) finishes. */
10872 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10873 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10874 }
10875 else if (CONST_OK_FOR_ADD (vcall_offset))
10876 {
10877 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10878 offset_addr = scratch0;
10879 }
10880 else
10881 gcc_unreachable (); /* FIXME */
10882 emit_load_ptr (scratch0, offset_addr);
10883
10884 if (Pmode != ptr_mode)
10885 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10886 emit_insn (gen_add2_insn (this_rtx, scratch0));
10887 }
10888
10889 /* Generate a tail call to the target function. */
10890 if (! TREE_USED (function))
10891 {
10892 assemble_external (function);
10893 TREE_USED (function) = 1;
10894 }
10895 funexp = XEXP (DECL_RTL (function), 0);
10896 /* If the function is overridden, so is the thunk, hence we don't
10897 need GOT addressing even if this is a public symbol. */
10898 #if 0
10899 if (TARGET_SH1 && ! flag_weak)
10900 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10901 else
10902 #endif
10903 if (TARGET_SH2 && flag_pic)
10904 {
10905 if (TARGET_FDPIC)
10906 {
10907 sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
10908 XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
10909 }
10910 else
10911 {
10912 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10913 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10914 }
10915 }
10916 else
10917 {
10918 emit_move_insn (scratch2, funexp);
10919 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10920 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10921 }
10922 sibcall = emit_call_insn (sibcall);
10923 SIBLING_CALL_P (sibcall) = 1;
10924 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10925 emit_barrier ();
10926
10927 /* Run just enough of rest_of_compilation to do scheduling and get
10928 the insns emitted. Note that use_thunk calls
10929 assemble_start_function and assemble_end_function. */
10930
10931 insns = get_insns ();
10932
10933 if (optimize > 0)
10934 {
10935 if (! cfun->cfg)
10936 init_flow (cfun);
10937 split_all_insns_noflow ();
10938 }
10939
10940 sh_reorg ();
10941 shorten_branches (insns);
10942 final_start_function (insns, file, 1);
10943 final (insns, file, 1);
10944 final_end_function ();
10945
10946 reload_completed = 0;
10947 epilogue_completed = 0;
10948 }
10949
10950 /* Return an RTX pair for the address and call site label of a function
10951 NAME of kind KIND, placing the result in TARGET if not NULL. For
10952 SFUNC_STATIC, if FDPIC, the LAB member of result will be set to
10953 (const_int 0) if jsr should be used, or a label_ref if bsrf should
10954 be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the
10955 address of the function itself, not a function descriptor, so they
10956 can only be used with functions not using the FDPIC register that
10957 are known to be called directory without a PLT entry. */
10958
10959 function_symbol_result
function_symbol(rtx target,const char * name,sh_function_kind kind)10960 function_symbol (rtx target, const char *name, sh_function_kind kind)
10961 {
10962 /* If this is not an ordinary function, the name usually comes from a
10963 string literal or an sprintf buffer. Make sure we use the same
10964 string consistently, so that cse will be able to unify address loads. */
10965 if (kind != FUNCTION_ORDINARY)
10966 name = IDENTIFIER_POINTER (get_identifier (name));
10967 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
10968 rtx lab = const0_rtx;
10969 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10970 if (flag_pic)
10971 switch (kind)
10972 {
10973 case FUNCTION_ORDINARY:
10974 break;
10975 case SFUNC_GOT:
10976 {
10977 rtx reg = target ? target : gen_reg_rtx (Pmode);
10978
10979 emit_insn (gen_symGOT2reg (reg, sym));
10980 sym = reg;
10981 break;
10982 }
10983 case SFUNC_STATIC:
10984 {
10985 rtx reg = target ? target : gen_reg_rtx (Pmode);
10986
10987 if (TARGET_FDPIC)
10988 {
10989 /* We use PC-relative calls, since GOTOFF can only refer
10990 to writable data. This works along with sh_sfunc_call. */
10991 lab = PATTERN (gen_call_site ());
10992 emit_insn (gen_sym_label2reg (reg, sym, lab));
10993 }
10994 else
10995 {
10996 /* ??? To allow cse to work, we use GOTOFF relocations.
10997 we could add combiner patterns to transform this into
10998 straight pc-relative calls with sym2PIC / bsrf when
10999 label load and function call are still 1:1 and in the
11000 same basic block during combine. */
11001 emit_insn (gen_symGOTOFF2reg (reg, sym));
11002 }
11003
11004 sym = reg;
11005 break;
11006 }
11007 }
11008 if (target && sym != target)
11009 {
11010 emit_move_insn (target, sym);
11011 return function_symbol_result (target, lab);
11012 }
11013 return function_symbol_result (sym, lab);
11014 }
11015
11016 /* Find the number of the first general purpose register in S that
11017 is not set. */
11018 static int
scavenge_reg(HARD_REG_SET * s)11019 scavenge_reg (HARD_REG_SET *s)
11020 {
11021 for (int r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11022 if (TEST_HARD_REG_BIT (*s, r))
11023 return r;
11024 return -1;
11025 }
11026
11027 rtx
sh_get_pr_initial_val(void)11028 sh_get_pr_initial_val (void)
11029 {
11030 /* If we haven't finished rtl generation, there might be a nonlocal label
11031 that we haven't seen yet.
11032 ??? get_hard_reg_initial_val fails if it is called after register
11033 allocation has started, unless it has been called before for the
11034 same register. And even then, we end in trouble if we didn't use
11035 the register in the same basic block before. So call
11036 get_hard_reg_initial_val now and wrap it in an unspec if we might
11037 need to replace it. */
11038 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11039 combine can put the pseudo returned by get_hard_reg_initial_val into
11040 instructions that need a general purpose registers, which will fail to
11041 be recognized when the pseudo becomes allocated to PR. */
11042 rtx val = get_hard_reg_initial_val (Pmode, PR_REG);
11043 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11044 }
11045
11046 bool
sh_expand_t_scc(rtx operands[])11047 sh_expand_t_scc (rtx operands[])
11048 {
11049 enum rtx_code code = GET_CODE (operands[1]);
11050 rtx target = operands[0];
11051 rtx op0 = operands[2];
11052 rtx op1 = operands[3];
11053 rtx result = target;
11054
11055 if (!REG_P (op0) || REGNO (op0) != T_REG
11056 || !CONST_INT_P (op1))
11057 return false;
11058 if (!REG_P (result))
11059 result = gen_reg_rtx (SImode);
11060 HOST_WIDE_INT val = INTVAL (op1);
11061 if ((code == EQ && val == 1) || (code == NE && val == 0))
11062 emit_insn (gen_movt (result, get_t_reg_rtx ()));
11063 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11064 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
11065 else if (code == EQ || code == NE)
11066 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11067 else
11068 return false;
11069 if (result != target)
11070 emit_move_insn (target, result);
11071 return true;
11072 }
11073
11074 /* INSN is an sfunc; return the rtx that describes the address used. */
11075 static rtx
extract_sfunc_addr(rtx insn)11076 extract_sfunc_addr (rtx insn)
11077 {
11078 rtx pattern = PATTERN (insn);
11079 const int len = XVECLEN (pattern, 0);
11080 for (int i = 0; i < len; i++)
11081 {
11082 rtx part = XVECEXP (pattern, 0, i);
11083 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11084 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11085 return XEXP (part, 0);
11086 }
11087 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11088 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11089 }
11090
11091 /* Verify that the register in use_sfunc_addr still agrees with the address
11092 used in the sfunc. This prevents fill_slots_from_thread from changing
11093 use_sfunc_addr.
11094 INSN is the use_sfunc_addr instruction, and REG is the register it
11095 guards. */
11096 bool
check_use_sfunc_addr(rtx_insn * insn,rtx reg)11097 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
11098 {
11099 /* Search for the sfunc. It should really come right after INSN. */
11100 while ((insn = NEXT_INSN (insn)))
11101 {
11102 if (LABEL_P (insn) || JUMP_P (insn))
11103 break;
11104 if (! INSN_P (insn))
11105 continue;
11106
11107 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
11108 insn = seq->insn (0);
11109 if (GET_CODE (PATTERN (insn)) != PARALLEL
11110 || get_attr_type (insn) != TYPE_SFUNC)
11111 continue;
11112 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11113 }
11114 gcc_unreachable ();
11115 }
11116
11117 /* This function returns a constant rtx that represents 2**15 / pi in
11118 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
11119 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
11120 static GTY(()) rtx sh_fsca_sf2int_rtx;
11121
11122 rtx
sh_fsca_sf2int(void)11123 sh_fsca_sf2int (void)
11124 {
11125 if (! sh_fsca_sf2int_rtx)
11126 {
11127 REAL_VALUE_TYPE rv;
11128
11129 real_from_string (&rv, "10430.378350470453");
11130 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11131 }
11132
11133 return sh_fsca_sf2int_rtx;
11134 }
11135
11136 /* This function returns a constant rtx that represents pi / 2**15 in
11137 SFmode. It's used to scale SFmode angles, in radians, to a
11138 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
11139 maps to 0x10000. */
11140 static GTY(()) rtx sh_fsca_int2sf_rtx;
11141
11142 rtx
sh_fsca_int2sf(void)11143 sh_fsca_int2sf (void)
11144 {
11145 if (! sh_fsca_int2sf_rtx)
11146 {
11147 REAL_VALUE_TYPE rv;
11148
11149 real_from_string (&rv, "9.587379924285257e-5");
11150 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11151 }
11152
11153 return sh_fsca_int2sf_rtx;
11154 }
11155
11156 /* Initialize the CUMULATIVE_ARGS structure. */
11157 void
sh_init_cumulative_args(CUMULATIVE_ARGS * pcum,tree fntype,rtx libname ATTRIBUTE_UNUSED,tree fndecl,signed int n_named_args,machine_mode mode)11158 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11159 tree fntype,
11160 rtx libname ATTRIBUTE_UNUSED,
11161 tree fndecl,
11162 signed int n_named_args,
11163 machine_mode mode)
11164 {
11165 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11166 pcum->free_single_fp_reg = 0;
11167 pcum->outgoing = n_named_args != -1;
11168
11169 /* FIXME: Should we check TARGET_HITACHI here ??? */
11170 pcum->renesas_abi = sh_attr_renesas_p (fntype);
11171
11172 if (fntype)
11173 {
11174 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11175 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11176 pcum->prototype_p = prototype_p (fntype);
11177 pcum->arg_count [(int) SH_ARG_INT] = false;
11178 }
11179 else
11180 {
11181 pcum->arg_count [(int) SH_ARG_INT] = 0;
11182 pcum->prototype_p = false;
11183 if (mode != VOIDmode)
11184 {
11185 /* If the default ABI is the Renesas ABI then all library
11186 calls must assume that the library will be using the
11187 Renesas ABI. So if the function would return its result
11188 in memory then we must force the address of this memory
11189 block onto the stack. Ideally we would like to call
11190 targetm.calls.return_in_memory() here but we do not have
11191 the TYPE or the FNDECL available so we synthesize the
11192 contents of that function as best we can. */
11193 pcum->force_mem =
11194 (TARGET_DEFAULT & MASK_HITACHI)
11195 && (mode == BLKmode
11196 || (GET_MODE_SIZE (mode) > 4
11197 && !(mode == DFmode
11198 && TARGET_FPU_DOUBLE)));
11199 }
11200 else
11201 pcum->force_mem = false;
11202 }
11203 }
11204
11205 rtx
sh_gen_truncate(machine_mode mode,rtx x,int need_sign_ext)11206 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
11207 {
11208 enum rtx_code code = TRUNCATE;
11209
11210 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11211 {
11212 rtx inner = XEXP (x, 0);
11213 machine_mode inner_mode = GET_MODE (inner);
11214
11215 if (inner_mode == mode)
11216 return inner;
11217 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11218 x = inner;
11219 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11220 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11221 {
11222 code = GET_CODE (x);
11223 x = inner;
11224 }
11225 }
11226 return gen_rtx_fmt_e (code, mode, x);
11227 }
11228
11229 /* Load and store depend on the highpart of the address. However,
11230 set_attr_alternative does not give well-defined results before reload,
11231 so we must look at the rtl ourselves to see if any of the feeding
11232 registers is used in a memref.
11233
11234 Return true iff INSN contains a MEM. */
11235 bool
sh_contains_memref_p(rtx insn)11236 sh_contains_memref_p (rtx insn)
11237 {
11238 subrtx_iterator::array_type array;
11239 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
11240 if (MEM_P (*iter))
11241 return true;
11242 return false;
11243 }
11244
11245 /* Return true iff INSN loads a banked register. */
11246 bool
sh_loads_bankedreg_p(rtx insn)11247 sh_loads_bankedreg_p (rtx insn)
11248 {
11249 if (GET_CODE (PATTERN (insn)) == SET)
11250 {
11251 rtx op = SET_DEST (PATTERN(insn));
11252 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11253 return true;
11254 }
11255
11256 return false;
11257 }
11258
11259 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
11260 static reg_class_t
sh_preferred_reload_class(rtx x ATTRIBUTE_UNUSED,reg_class_t rclass)11261 sh_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
11262 {
11263 return rclass;
11264 }
11265
11266 /* Implement TARGET_SECONDARY_RELOAD. */
11267 static reg_class_t
sh_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)11268 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11269 machine_mode mode, secondary_reload_info *sri)
11270 {
11271 enum reg_class rclass = (enum reg_class) rclass_i;
11272
11273 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
11274 && REG_P (XEXP (XEXP (x, 0), 0))
11275 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
11276 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11277
11278 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
11279 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11280
11281 if (REG_P (x) && REGNO (x) == GBR_REG)
11282 return NO_REGS;
11283
11284 if (in_p)
11285 {
11286 if (REGCLASS_HAS_FP_REG (rclass)
11287 && immediate_operand ((x), mode)
11288 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
11289 switch (mode)
11290 {
11291 case E_SFmode:
11292 sri->icode = CODE_FOR_reload_insf__frn;
11293 return NO_REGS;
11294 case E_DFmode:
11295 sri->icode = CODE_FOR_reload_indf__frn;
11296 return NO_REGS;
11297 case E_SImode:
11298 /* ??? If we knew that we are in the appropriate mode -
11299 single precision - we could use a reload pattern directly. */
11300 return FPUL_REGS;
11301 default:
11302 abort ();
11303 }
11304 if (rclass == FPUL_REGS
11305 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11306 || REGNO (x) == T_REG))
11307 || GET_CODE (x) == PLUS))
11308 return GENERAL_REGS;
11309 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11310 {
11311 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11312 return GENERAL_REGS;
11313 else if (mode == SFmode)
11314 return FP_REGS;
11315 sri->icode = CODE_FOR_reload_insi__i_fpul;
11316 return NO_REGS;
11317 }
11318 if (rclass == FPSCR_REGS
11319 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11320 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11321 return GENERAL_REGS;
11322 } /* end of input-only processing. */
11323
11324 if (((REGCLASS_HAS_FP_REG (rclass)
11325 && (REG_P (x)
11326 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11327 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11328 && TARGET_FMOVD))))
11329 || (REGCLASS_HAS_GENERAL_REG (rclass)
11330 && REG_P (x)
11331 && FP_REGISTER_P (REGNO (x))))
11332 && (mode == SFmode || mode == SImode))
11333 return FPUL_REGS;
11334 if ((rclass == FPUL_REGS
11335 || (REGCLASS_HAS_FP_REG (rclass) && mode == SImode))
11336 && (MEM_P (x)
11337 || (REG_P (x)
11338 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11339 || REGNO (x) == T_REG
11340 || system_reg_operand (x, VOIDmode)))))
11341 {
11342 if (rclass == FPUL_REGS)
11343 return GENERAL_REGS;
11344 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
11345 }
11346
11347 if ((rclass == MAC_REGS || rclass == PR_REGS)
11348 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11349 && rclass != REGNO_REG_CLASS (REGNO (x)))
11350 return GENERAL_REGS;
11351
11352 /* If here fall back to loading FPUL register through general registers.
11353 This case can happen when movsi_ie insn is picked initially to
11354 load/store the FPUL register from/to another register, and then the
11355 other register is allocated on the stack. */
11356 if (rclass == FPUL_REGS && true_regnum (x) == -1)
11357 return GENERAL_REGS;
11358
11359 /* Force mov.b / mov.w displacement addressing insn to use R0 as
11360 the other operand.
11361 On SH2A could also just leave it alone here, which would result in a
11362 4 byte move insn being generated instead. However, for this to work
11363 the insns must have the appropriate alternatives. */
11364 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11365 && satisfies_constraint_Sdd (x)
11366 && sh_disp_addr_displacement (x)
11367 <= sh_max_mov_insn_displacement (mode, false))
11368 return R0_REGS;
11369
11370 /* When reload is trying to address a QImode or HImode subreg on the stack,
11371 force any subreg byte into R0_REGS, as this is going to become a
11372 displacement address.
11373 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
11374 is on the stack, the memref to it might already require a displacement
11375 and that has to be added to the final address. At this point we don't
11376 know the cumulative displacement so we assume the worst case. */
11377 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11378 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
11379 return R0_REGS;
11380
11381 return NO_REGS;
11382 }
11383
11384 /* Return true if SUBST can't safely replace its equivalent during RA. */
11385 static bool
sh_cannot_substitute_mem_equiv_p(rtx)11386 sh_cannot_substitute_mem_equiv_p (rtx)
11387 {
11388 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
11389 uses R0 and may cause spill failure when R0 is already used.
11390 We have to return true for that case at least.
11391 Moreover SH has strong R0 parity and also have not enough numbers of
11392 the hard registers to make the equiv substitution win in the size
11393 and the speed on average working sets. The pseudos produced to
11394 hold the equiv values can't get good hard registers for bad cases
11395 and end up memory save/restore insns which make the code worse. */
11396 return true;
11397 }
11398
11399 /* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
11400 static bool
sh_legitimize_address_displacement(rtx * offset1,rtx * offset2,poly_int64 orig_offset,machine_mode mode)11401 sh_legitimize_address_displacement (rtx *offset1, rtx *offset2,
11402 poly_int64 orig_offset,
11403 machine_mode mode)
11404 {
11405 if ((TARGET_FPU_DOUBLE && mode == DFmode)
11406 || (TARGET_SH2E && mode == SFmode))
11407 return false;
11408
11409 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, orig_offset);
11410 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
11411 {
11412 *offset1 = adj.offset_adjust;
11413 *offset2 = adj.mov_disp;
11414 return true;
11415 }
11416
11417 return false;
11418 }
11419
11420 /* Return true if movsf insn should be splited with an additional
11421 register. */
11422 bool
sh_movsf_ie_ra_split_p(rtx op0,rtx op1,rtx op2)11423 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
11424 {
11425 /* op0 == op1 */
11426 if (rtx_equal_p (op0, op1))
11427 return true;
11428 /* fy, FQ, reg */
11429 if (GET_CODE (op1) == CONST_DOUBLE
11430 && ! satisfies_constraint_G (op1)
11431 && ! satisfies_constraint_H (op1)
11432 && REG_P (op0)
11433 && REG_P (op2))
11434 return true;
11435 /* f, r, y */
11436 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
11437 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
11438 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11439 return true;
11440 /* r, f, y */
11441 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
11442 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
11443 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11444 return true;
11445
11446 return false;
11447 }
11448
11449 static void
sh_conditional_register_usage(void)11450 sh_conditional_register_usage (void)
11451 {
11452 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
11453 if (! VALID_REGISTER_P (regno))
11454 fixed_regs[regno] = call_used_regs[regno] = 1;
11455 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
11456 if (flag_pic)
11457 {
11458 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11459 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11460 }
11461 if (TARGET_FDPIC)
11462 {
11463 fixed_regs[PIC_REG] = 1;
11464 call_used_regs[PIC_REG] = 1;
11465 call_really_used_regs[PIC_REG] = 1;
11466 }
11467 /* Renesas saves and restores mac registers on call. */
11468 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
11469 {
11470 call_really_used_regs[MACH_REG] = 0;
11471 call_really_used_regs[MACL_REG] = 0;
11472 }
11473
11474 for (int regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
11475 if (! fixed_regs[regno] && call_really_used_regs[regno])
11476 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
11477
11478 call_really_used_regs[FPSCR_MODES_REG] = 0;
11479 call_really_used_regs[FPSCR_STAT_REG] = 0;
11480 }
11481
11482 /* Implement TARGET_LEGITIMATE_CONSTANT_P
11483
11484 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
11485 static bool
sh_legitimate_constant_p(machine_mode mode,rtx x)11486 sh_legitimate_constant_p (machine_mode mode, rtx x)
11487 {
11488 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
11489 {
11490 rtx base, offset;
11491 split_const (x, &base, &offset);
11492
11493 if (GET_CODE (base) == SYMBOL_REF
11494 && !offset_within_block_p (base, INTVAL (offset)))
11495 return false;
11496 }
11497
11498 if (TARGET_FDPIC
11499 && (SYMBOLIC_CONST_P (x)
11500 || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
11501 && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
11502 return false;
11503
11504 return GET_CODE (x) != CONST_DOUBLE
11505 || mode == DFmode || mode == SFmode
11506 || mode == DImode || GET_MODE (x) == VOIDmode;
11507 }
11508
11509 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11510
11511 static void
sh_init_sync_libfuncs(void)11512 sh_init_sync_libfuncs (void)
11513 {
11514 init_sync_libfuncs (UNITS_PER_WORD);
11515 }
11516
11517 /* Return true if it is appropriate to emit `ret' instructions in the
11518 body of a function. */
11519 bool
sh_can_use_simple_return_p(void)11520 sh_can_use_simple_return_p (void)
11521 {
11522 if (! reload_completed || frame_pointer_needed)
11523 return false;
11524
11525 /* Moving prologue around does't reduce the size. */
11526 if (optimize_function_for_size_p (cfun))
11527 return false;
11528
11529 /* Finally, allow for pr save. */
11530 HARD_REG_SET live_regs_mask;
11531 int d = calc_live_regs (&live_regs_mask);
11532
11533 if (rounded_frame_size (d) > 4)
11534 return false;
11535
11536 return true;
11537 }
11538
11539 /*------------------------------------------------------------------------------
11540 Address mode optimization support code
11541 */
11542
11543 typedef HOST_WIDE_INT disp_t;
11544 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
11545 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
11546 static const disp_t INVALID_DISP = MAX_DISP;
11547
11548 /* A memory reference which is described by a base register and a
11549 displacement. */
11550 class base_reg_disp
11551 {
11552 public:
11553 base_reg_disp (rtx br, disp_t d);
11554
11555 bool is_reg (void) const;
11556 bool is_disp (void) const;
11557 rtx reg (void) const;
11558 disp_t disp (void) const;
11559
11560 private:
11561 rtx reg_;
11562 disp_t disp_;
11563 };
11564
11565 inline
base_reg_disp(rtx br,disp_t d)11566 base_reg_disp::base_reg_disp (rtx br, disp_t d)
11567 : reg_ (br), disp_ (d)
11568 {
11569 }
11570
11571 inline bool
is_reg(void)11572 base_reg_disp::is_reg (void) const
11573 {
11574 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
11575 }
11576
11577 inline bool
is_disp(void)11578 base_reg_disp::is_disp (void) const
11579 {
11580 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
11581 }
11582
11583 inline rtx
reg(void)11584 base_reg_disp::reg (void) const
11585 {
11586 return reg_;
11587 }
11588
11589 inline disp_t
disp(void)11590 base_reg_disp::disp (void) const
11591 {
11592 return disp_;
11593 }
11594
11595 /* Find the base register and calculate the displacement for a given
11596 address rtx 'x'. */
11597 static base_reg_disp
11598 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
11599 rtx base_reg = NULL)
11600 {
11601 if (REG_P (x))
11602 {
11603 if (REGNO (x) == GBR_REG)
11604 return base_reg_disp (x, disp);
11605
11606 /* We've reached a hard-reg. This is probably the point where
11607 function args are copied to pseudos. Do not go any further and
11608 stick to the pseudo. If the original mem addr was in a hard reg
11609 from the beginning, it will become the base reg. */
11610 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
11611 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
11612
11613 /* Find the def of the reg and trace it. If there are more than one
11614 defs and they are not the same, assume it's not safe to proceed. */
11615 rtx_insn* last_i = NULL;
11616 rtx last_set = NULL;
11617 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
11618 d = DF_REF_NEXT_REG (d))
11619 {
11620 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
11621
11622 /* Accept multiple defs, as long as they are equal. */
11623 if (last_set == NULL || rtx_equal_p (last_set, set))
11624 {
11625 last_i = DF_REF_INSN (d);
11626 last_set = set;
11627 }
11628 else
11629 {
11630 last_i = NULL;
11631 last_set = NULL;
11632 break;
11633 }
11634 }
11635
11636 if (last_set != NULL && last_i != NULL)
11637 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
11638 XEXP (last_set, 0));
11639
11640 /* When here, no previous insn was found that sets the reg.
11641 The input reg is already the base reg. */
11642 return base_reg_disp (x, disp);
11643 }
11644
11645 else if (GET_CODE (x) == PLUS)
11646 {
11647 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
11648 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
11649
11650 /* Either left or right val must be a reg.
11651 We don't handle the case of 'reg + reg' here. */
11652 if (left_val.is_reg () && right_val.is_disp ())
11653 return base_reg_disp (left_val.reg (), left_val.disp ()
11654 + right_val.disp () + disp);
11655 else if (right_val.is_reg () && left_val.is_disp ())
11656 return base_reg_disp (right_val.reg (), right_val.disp ()
11657 + left_val.disp () + disp);
11658 else
11659 return base_reg_disp (base_reg, disp);
11660 }
11661
11662 else if (CONST_INT_P (x))
11663 return base_reg_disp (NULL, disp + INTVAL (x));
11664
11665 /* Didn't find anything useful. */
11666 return base_reg_disp (base_reg, disp);
11667 }
11668
11669 /* Given an insn and a memory operand, try to find an equivalent GBR
11670 based memory address and return the corresponding new memory address.
11671 Return NULL_RTX if not found. */
11672 rtx
sh_find_equiv_gbr_addr(rtx_insn * insn,rtx mem)11673 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
11674 {
11675 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
11676 return NULL_RTX;
11677
11678 /* Leave post/pre inc/dec or any other side effect addresses alone. */
11679 if (side_effects_p (XEXP (mem, 0)))
11680 return NULL_RTX;
11681
11682 /* When not optimizing there might be no dataflow available. */
11683 if (df == NULL)
11684 return NULL_RTX;
11685
11686 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
11687
11688 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
11689 {
11690 /* If GBR is marked as call clobbered we bail out if we see a call.
11691 FIXME: Actually should check if this mem refers to the gbr value
11692 before or after the call. If there is a store_gbr preceeding this
11693 mem, it's safe to use GBR for this mem.
11694
11695 If GBR is not marked as call clobbered, but there is some other
11696 def than a call, it's probably a load_gbr upon which we also
11697 bail out to be on the safe side.
11698 FIXME: Should check if we have a use-after-def case, such as
11699 the call case above. */
11700 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
11701 d = DF_REF_NEXT_REG (d))
11702 {
11703 if (CALL_P (DF_REF_INSN (d)))
11704 {
11705 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
11706 return NULL_RTX;
11707 else
11708 continue;
11709 }
11710 else
11711 return NULL_RTX;
11712 }
11713
11714 rtx disp = GEN_INT (gbr_disp.disp ());
11715 if (gbr_displacement (disp, GET_MODE (mem)))
11716 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
11717 }
11718
11719 return NULL_RTX;
11720 }
11721
11722 /*------------------------------------------------------------------------------
11723 Manual insn combine support code.
11724 */
11725
11726 /* Return true if the specified insn contains any UNSPECs or
11727 UNSPEC_VOLATILEs. */
11728 static bool
sh_unspec_insn_p(rtx x)11729 sh_unspec_insn_p (rtx x)
11730 {
11731 subrtx_iterator::array_type array;
11732 FOR_EACH_SUBRTX (i, array, x, ALL)
11733 if (*i != NULL
11734 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
11735 return true;
11736
11737 return false;
11738 }
11739
11740 /* Return true if the register operands of the specified insn are modified
11741 between the specified from and to insns (exclusive of those two). */
11742 bool
sh_insn_operands_modified_between_p(rtx_insn * operands_insn,const rtx_insn * from,const rtx_insn * to)11743 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
11744 const rtx_insn* from,
11745 const rtx_insn* to)
11746 {
11747 /* FIXME: Return true for multiple sets for now. */
11748 rtx s = single_set (operands_insn);
11749 if (s == NULL_RTX)
11750 return true;
11751
11752 subrtx_iterator::array_type array;
11753 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
11754 if (*i != NULL &&
11755 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
11756 return true;
11757
11758 return false;
11759 }
11760
11761 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
11762 negates the T bit and stores the result in the T bit. */
11763 bool
sh_is_nott_insn(const rtx_insn * i)11764 sh_is_nott_insn (const rtx_insn* i)
11765 {
11766 return i != NULL && GET_CODE (PATTERN (i)) == SET
11767 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
11768 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
11769 }
11770
11771 rtx
sh_movt_set_dest(const rtx_insn * i)11772 sh_movt_set_dest (const rtx_insn* i)
11773 {
11774 return i == NULL ? NULL : sh_movt_set_dest (PATTERN (i));
11775 }
11776
11777 rtx
sh_movt_set_dest(const_rtx pat)11778 sh_movt_set_dest (const_rtx pat)
11779 {
11780 return GET_CODE (pat) == SET
11781 && arith_reg_dest (XEXP (pat, 0), SImode)
11782 && t_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11783 }
11784
11785 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
11786 that stores the negated T bit in a register, and return the destination
11787 register rtx, or null. */
11788 rtx
sh_movrt_set_dest(const rtx_insn * i)11789 sh_movrt_set_dest (const rtx_insn* i)
11790 {
11791 return i == NULL ? NULL : sh_movrt_set_dest (PATTERN (i));
11792 }
11793
11794 rtx
sh_movrt_set_dest(const_rtx pat)11795 sh_movrt_set_dest (const_rtx pat)
11796 {
11797 /* The negc movrt replacement is inside a parallel. */
11798 if (GET_CODE (pat) == PARALLEL)
11799 pat = XVECEXP (pat, 0, 0);
11800
11801 return GET_CODE (pat) == SET
11802 && arith_reg_dest (XEXP (pat, 0), SImode)
11803 && negt_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11804
11805 }
11806
11807 /* Given an insn and a reg number, tell whether the reg dies or is unused
11808 after the insn. */
11809 bool
sh_reg_dead_or_unused_after_insn(const rtx_insn * i,int regno)11810 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
11811 {
11812 return find_regno_note (i, REG_DEAD, regno) != NULL
11813 || find_regno_note (i, REG_UNUSED, regno) != NULL;
11814 }
11815
11816 /* Given an insn and a reg number, remove reg dead or reg unused notes to
11817 mark it as being used after the insn. */
11818 void
sh_remove_reg_dead_or_unused_notes(rtx_insn * i,int regno)11819 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
11820 {
11821 if (rtx n = find_regno_note (i, REG_DEAD, regno))
11822 remove_note (i, n);
11823 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
11824 remove_note (i, n);
11825 }
11826
11827 /* Given an insn check if it contains any post/pre inc/dec mem operands and
11828 add the REG_INC notes accordingly.
11829 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
11830 FIXME: This function is currently used by peephole2 patterns because
11831 the peephole2 pass does not preserve REG_INC notes. If the notes
11832 are dropped the following passes will do wrong things. */
11833 rtx_insn*
sh_check_add_incdec_notes(rtx_insn * i)11834 sh_check_add_incdec_notes (rtx_insn* i)
11835 {
11836 struct for_each_inc_dec_clb
11837 {
11838 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
11839 rtx dest, rtx src ATTRIBUTE_UNUSED,
11840 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
11841 {
11842 gcc_assert (REG_P (dest));
11843
11844 rtx_insn* i = (rtx_insn*)arg;
11845 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
11846 add_reg_note (i, REG_INC, dest);
11847
11848 return 0;
11849 }
11850 };
11851
11852 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
11853 return i;
11854 }
11855
11856 /* Given a move insn destiation and a source, make sure that the move source
11857 operand is not a post-inc mem load with the same address reg as the
11858 destination. Returns the modified source operand with the post-inc removed
11859 if necessary. */
11860 rtx
sh_remove_overlapping_post_inc(rtx dst,rtx src)11861 sh_remove_overlapping_post_inc (rtx dst, rtx src)
11862 {
11863 if (!MEM_P (src))
11864 return src;
11865
11866 rtx addr = XEXP (src, 0);
11867
11868 if (GET_CODE (addr) == POST_INC
11869 && reg_overlap_mentioned_p (XEXP (addr, 0), dst))
11870 return replace_equiv_address (src, XEXP (addr, 0));
11871
11872 gcc_assert (GET_CODE (addr) != POST_MODIFY);
11873 return src;
11874 }
11875
11876 /* Emit a move insn that is safe to be used in peephole patterns. */
11877 rtx_insn*
sh_peephole_emit_move_insn(rtx dst,rtx src)11878 sh_peephole_emit_move_insn (rtx dst, rtx src)
11879 {
11880 return sh_check_add_incdec_notes (
11881 emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src)));
11882 }
11883
11884 /* Given an op rtx and an insn, try to find out whether the result of the
11885 specified op consists only of logical operations on T bit stores. */
11886 bool
sh_is_logical_t_store_expr(rtx op,rtx_insn * insn)11887 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
11888 {
11889 if (!logical_operator (op, SImode))
11890 return false;
11891
11892 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
11893 int op_is_t_count = 0;
11894
11895 for (int i = 0; i < 2; ++i)
11896 {
11897 if (t_reg_operand (ops[i], VOIDmode)
11898 || negt_reg_operand (ops[i], VOIDmode))
11899 op_is_t_count++;
11900
11901 else
11902 {
11903 set_of_reg op_set = sh_find_set_of_reg
11904 (ops[i], insn, prev_nonnote_nondebug_insn_bb);
11905 if (op_set.set_src == NULL_RTX)
11906 continue;
11907
11908 if (t_reg_operand (op_set.set_src, VOIDmode)
11909 || negt_reg_operand (op_set.set_src, VOIDmode)
11910 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
11911 op_is_t_count++;
11912 }
11913 }
11914
11915 return op_is_t_count == 2;
11916 }
11917
11918 /* Given the operand that is extended in a sign/zero extend insn, and the
11919 insn, try to figure out whether the sign/zero extension can be replaced
11920 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
11921 NULL_RTX otherwise. */
11922 rtx
sh_try_omit_signzero_extend(rtx extended_op,rtx_insn * insn)11923 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
11924 {
11925 if (REG_P (extended_op))
11926 extended_op = extended_op;
11927 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
11928 extended_op = SUBREG_REG (extended_op);
11929 else
11930 return NULL_RTX;
11931
11932 /* Reg moves must be of the same mode. */
11933 if (GET_MODE (extended_op) != SImode)
11934 return NULL_RTX;
11935
11936 set_of_reg s = sh_find_set_of_reg (extended_op, insn,
11937 prev_nonnote_nondebug_insn_bb);
11938 if (s.set_src == NULL_RTX)
11939 return NULL_RTX;
11940
11941 if (t_reg_operand (s.set_src, VOIDmode)
11942 || negt_reg_operand (s.set_src, VOIDmode))
11943 return extended_op;
11944
11945 /* If the zero extended reg was formed by a logical operation, check the
11946 operands of the logical operation. If both originated from T bit
11947 stores the zero extension can be eliminated. */
11948 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
11949 return extended_op;
11950
11951 return NULL_RTX;
11952 }
11953
11954 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
11955 figure out whether it should be converted into a movt-xor sequence in
11956 the movrt_negc splitter.
11957 Returns true if insns have been modified and the splitter has succeeded. */
11958 bool
sh_split_movrt_negc_to_movt_xor(rtx_insn * curr_insn,rtx operands[])11959 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
11960 {
11961 /* In cases such as
11962 tst r4,r4
11963 mov #-1,r1
11964 negc r1,r1
11965 tst r4,r4
11966 we can replace the T bit clobbering negc with a movt-xor sequence and
11967 eliminate the redundant comparison.
11968 Because the xor insn depends on register allocation results, allow this
11969 only before reload. */
11970 if (!can_create_pseudo_p ())
11971 return false;
11972
11973 set_of_reg t_before_negc = sh_find_set_of_reg
11974 (get_t_reg_rtx (), curr_insn, prev_nonnote_nondebug_insn_bb);
11975 set_of_reg t_after_negc = sh_find_set_of_reg
11976 (get_t_reg_rtx (), curr_insn, next_nonnote_nondebug_insn_bb);
11977
11978 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
11979 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
11980 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11981 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
11982 t_before_negc.insn,
11983 t_after_negc.insn)
11984 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11985 && !sh_unspec_insn_p (t_after_negc.insn)
11986 && !volatile_insn_p (PATTERN (t_after_negc.insn))
11987 && !side_effects_p (PATTERN (t_after_negc.insn))
11988 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
11989 {
11990 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
11991 set_insn_deleted (t_after_negc.insn);
11992 return true;
11993 }
11994 else
11995 return false;
11996 }
11997
11998 /* Given a reg and the current insn, see if the value of the reg originated
11999 from a sign or zero extension and return the discovered information. */
12000 sh_extending_set_of_reg
sh_find_extending_set_of_reg(rtx reg,rtx_insn * curr_insn)12001 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
12002 {
12003 if (reg == NULL)
12004 return sh_extending_set_of_reg (curr_insn);
12005
12006 if (SUBREG_P (reg))
12007 reg = SUBREG_REG (reg);
12008
12009 if (!REG_P (reg))
12010 return sh_extending_set_of_reg (curr_insn);
12011
12012 /* FIXME: Also search the predecessor basic blocks. It seems that checking
12013 only the adjacent predecessor blocks would cover most of the cases.
12014 Also try to look through the first extension that we hit. There are some
12015 cases, where a zero_extend is followed an (implicit) sign_extend, and it
12016 fails to see the sign_extend. */
12017 sh_extending_set_of_reg result = sh_find_set_of_reg
12018 (reg, curr_insn, prev_nonnote_nondebug_insn_bb, true);
12019
12020 if (result.set_src != NULL)
12021 {
12022 if (GET_CODE (result.set_src) == SIGN_EXTEND
12023 || GET_CODE (result.set_src) == ZERO_EXTEND)
12024 {
12025 if (dump_file)
12026 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12027 "explicitly sign/zero extended in insn %d\n",
12028 REGNO (reg), INSN_UID (result.insn));
12029 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
12030 result.ext_code = GET_CODE (result.set_src);
12031 }
12032 else if (MEM_P (result.set_src)
12033 && (GET_MODE (result.set_src) == QImode
12034 || GET_MODE (result.set_src) == HImode)
12035 && !sh_unspec_insn_p (result.insn))
12036 {
12037 /* On SH QIHImode memory loads always sign extend. However, in
12038 some cases where it seems that the higher bits are not
12039 interesting, the loads will not be expanded as sign extending
12040 insns, but as QIHImode loads into QIHImode regs. We report that
12041 the reg has been sign extended by the mem load. When it is used
12042 as such, we must convert the mem load into a sign extending insn,
12043 see also sh_extending_set_of_reg::use_as_extended_reg. */
12044 if (dump_file)
12045 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12046 "implicitly sign extended in insn %d\n",
12047 REGNO (reg), INSN_UID (result.insn));
12048 result.from_mode = GET_MODE (result.set_src);
12049 result.ext_code = SIGN_EXTEND;
12050 }
12051 }
12052
12053 return result;
12054 }
12055
12056 /* Given a reg that is known to be sign or zero extended at some insn,
12057 take the appropriate measures so that the extended value can be used as
12058 a reg at the specified insn and return the resulting reg rtx. */
12059 rtx
use_as_extended_reg(rtx_insn * use_at_insn)12060 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
12061 {
12062 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
12063 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
12064 gcc_assert (from_mode == QImode || from_mode == HImode);
12065
12066 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
12067 {
12068 if (dump_file)
12069 fprintf (dump_file,
12070 "use_as_extended_reg: converting non-extending mem load in "
12071 "insn %d into sign-extending load\n", INSN_UID (insn));
12072
12073 rtx r = gen_reg_rtx (SImode);
12074 rtx_insn* i0;
12075 if (from_mode == QImode)
12076 i0 = sh_check_add_incdec_notes (
12077 emit_insn_after (gen_extendqisi2 (r, set_src), insn));
12078 else if (from_mode == HImode)
12079 i0 = sh_check_add_incdec_notes (
12080 emit_insn_after (gen_extendhisi2 (r, set_src), insn));
12081 else
12082 gcc_unreachable ();
12083
12084 emit_insn_after (
12085 gen_move_insn (XEXP (set_rtx, 0),
12086 gen_lowpart (GET_MODE (set_src), r)), i0);
12087 set_insn_deleted (insn);
12088 return r;
12089 }
12090 else
12091 {
12092 rtx extension_dst = XEXP (set_rtx, 0);
12093 if (GET_MODE (extension_dst) != SImode)
12094 extension_dst = simplify_gen_subreg (SImode, extension_dst,
12095 GET_MODE (extension_dst), 0);
12096 if (modified_between_p (extension_dst, insn, use_at_insn))
12097 {
12098 if (dump_file)
12099 fprintf (dump_file,
12100 "use_as_extended_reg: dest reg %d of extending insn %d is "
12101 "modified, inserting a reg-reg copy\n",
12102 REGNO (extension_dst), INSN_UID (insn));
12103
12104 rtx r = gen_reg_rtx (SImode);
12105 emit_insn_after (gen_move_insn (r, extension_dst), insn);
12106 return r;
12107 }
12108 else
12109 {
12110 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
12111 return extension_dst;
12112 }
12113 }
12114 }
12115
12116 bool
can_use_as_unextended_reg(void)12117 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
12118 {
12119 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
12120 && (from_mode == QImode || from_mode == HImode)
12121 && set_src != NULL)
12122 return arith_reg_operand (XEXP (set_src, 0), from_mode);
12123 else
12124 return false;
12125 }
12126
12127 rtx
use_as_unextended_reg(rtx_insn * use_at_insn)12128 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
12129 {
12130 gcc_assert (can_use_as_unextended_reg ());
12131
12132 rtx r = XEXP (set_src, 0);
12133 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
12134
12135 if (modified_between_p (r, insn, use_at_insn))
12136 {
12137 rtx r1 = gen_reg_rtx (SImode);
12138 emit_insn_after (gen_move_insn (r1, r0), insn);
12139 return r1;
12140 }
12141 else
12142 {
12143 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
12144 ? REGNO (SUBREG_REG (r))
12145 : REGNO (r));
12146 return r0;
12147 }
12148 }
12149
12150 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
12151 perform the necessary checks on the operands and split it accordingly. */
12152 void
sh_split_tst_subregs(rtx_insn * curr_insn,machine_mode subreg_mode,int subreg_offset,rtx operands[])12153 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
12154 int subreg_offset, rtx operands[])
12155 {
12156 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
12157
12158 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
12159 curr_insn);
12160 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
12161 curr_insn);
12162
12163 /* If one of the operands is known to be zero extended, that's already
12164 sufficient to mask out the unwanted high bits. */
12165 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
12166 {
12167 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12168 operands[1]));
12169 return;
12170 }
12171 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
12172 {
12173 emit_insn (gen_tstsi_t (operands[0],
12174 eop1.use_as_extended_reg (curr_insn)));
12175 return;
12176 }
12177
12178 /* None of the operands seem to be zero extended.
12179 If both are sign extended it's OK, too. */
12180 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
12181 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
12182 {
12183 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12184 eop1.use_as_extended_reg (curr_insn)));
12185 return;
12186 }
12187
12188 /* Otherwise we have to insert a zero extension on one of the operands to
12189 mask out the unwanted high bits.
12190 Prefer the operand that has no known extension. */
12191 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
12192 std::swap (operands[0], operands[1]);
12193
12194 rtx tmp0 = gen_reg_rtx (SImode);
12195 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
12196 GET_MODE (operands[0]), subreg_offset);
12197 emit_insn (subreg_mode == QImode
12198 ? gen_zero_extendqisi2 (tmp0, tmp1)
12199 : gen_zero_extendhisi2 (tmp0, tmp1));
12200 emit_insn (gen_tstsi_t (tmp0, operands[1]));
12201 }
12202
12203 /* A helper class to increment/decrement a counter variable each time a
12204 function is entered/left. */
12205 class scope_counter
12206 {
12207 public:
scope_counter(int & counter)12208 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
12209
~scope_counter(void)12210 ~scope_counter (void)
12211 {
12212 --m_counter;
12213 gcc_assert (m_counter >= 0);
12214 }
12215
count(void)12216 int count (void) const { return m_counter; }
12217
12218 private:
12219 int& m_counter;
12220 };
12221
12222 /* Given an rtx x, determine whether the expression can be used to create
12223 an insn that calulates x and stores the result in the T bit.
12224 This is used by the 'treg_set_expr' predicate to construct insns sequences
12225 where T bit results are fed into other insns, such as addc, subc, negc
12226 insns.
12227
12228 FIXME: The patterns that expand 'treg_set_expr' operands tend to
12229 distinguish between 'positive' and 'negative' forms. For now this has to
12230 be done in the preparation code. We could also introduce
12231 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
12232 two different patterns for the 'postive' and 'negative' forms. However,
12233 the total amount of lines of code seems to be about the same and the
12234 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
12235 recog function would need to look inside the expression by temporarily
12236 splitting it. */
12237 static int sh_recog_treg_set_expr_reent_count = 0;
12238
12239 bool
sh_recog_treg_set_expr(rtx op,machine_mode mode)12240 sh_recog_treg_set_expr (rtx op, machine_mode mode)
12241 {
12242 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
12243
12244 /* Limit the recursion count to avoid nested expressions which we can't
12245 resolve to a single treg set insn. */
12246 if (recursion.count () > 1)
12247 return false;
12248
12249 /* Early accept known possible operands before doing recog. */
12250 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode)
12251 || negt_reg_operand (op, mode))
12252 return true;
12253
12254 /* Early reject impossible operands before doing recog.
12255 There are some (set ((t) (subreg ...))) patterns, but we must be careful
12256 not to allow any invalid reg-reg or mem-reg moves, or else other passes
12257 such as lower-subreg will bail out. Some insns such as SH4A movua are
12258 done with UNSPEC, so must reject those, too, or else it would result
12259 in an invalid reg -> treg move. */
12260 if (CONST_INT_P (op) || register_operand (op, mode)
12261 || memory_operand (op, mode) || sh_unspec_insn_p (op))
12262 return false;
12263
12264 if (!can_create_pseudo_p ())
12265 return false;
12266
12267 /* expand_debug_locations may call this to compute rtx costs at
12268 very early stage. In that case, don't make new insns here to
12269 avoid codegen differences with -g. */
12270 if (currently_expanding_to_rtl)
12271 return false;
12272
12273 /* We are going to invoke recog in a re-entrant way and thus
12274 have to capture its current state and restore it afterwards. */
12275 recog_data_d prev_recog_data = recog_data;
12276
12277 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
12278 SET_PREV_INSN (i) = NULL;
12279 SET_NEXT_INSN (i) = NULL;
12280
12281 /* If the comparison op doesn't have a result mode, set it to SImode. */
12282 machine_mode prev_op_mode = GET_MODE (op);
12283 if (COMPARISON_P (op) && prev_op_mode == VOIDmode)
12284 PUT_MODE (op, SImode);
12285
12286 int result = recog (PATTERN (i), i, 0);
12287
12288 /* It seems there is no insn like that. Create a negated version and
12289 try again. If we hit a negated form, we'll allow that and append a
12290 nott sequence when splitting out the insns. Insns that do the split
12291 can then remove the trailing nott if they know how to deal with it. */
12292 if (result < 0 && COMPARISON_P (op))
12293 {
12294 machine_mode cmp_mode = GET_MODE (XEXP (op, 0));
12295 if (cmp_mode == VOIDmode)
12296 cmp_mode = GET_MODE (XEXP (op, 1));
12297
12298 rtx_code prev_code = GET_CODE (op);
12299 PUT_CODE (op, reverse_condition (GET_CODE (op)));
12300 result = recog (PATTERN (i), i, 0);
12301 PUT_CODE (op, prev_code);
12302 }
12303
12304 PUT_MODE (op, prev_op_mode);
12305 recog_data = prev_recog_data;
12306 return result >= 0;
12307 }
12308
12309 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
12310 This can be used as a condition for insn/split patterns to allow certain
12311 T bit setting patters only to be matched as sub expressions of other
12312 patterns. */
12313 bool
sh_in_recog_treg_set_expr(void)12314 sh_in_recog_treg_set_expr (void)
12315 {
12316 return sh_recog_treg_set_expr_reent_count > 0;
12317 }
12318
12319 /* Given an rtx x, which is assumed to be some expression that has been
12320 matched by the 'treg_set_expr' predicate before, split and emit the
12321 insns that are necessary to calculate the expression and store the result
12322 in the T bit.
12323 The splitting is done recursively similar to 'try_split' in emit-rt.c.
12324 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
12325 'delete_insn' which then causes the DF parts to bail out, because we
12326 currently are inside another gen_split* function and would invoke
12327 'try_split' in a reentrant way. */
12328 static std::pair<rtx_insn*, rtx_insn*>
12329 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
12330 {
12331 if (dump_file)
12332 {
12333 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
12334 print_rtl_single (dump_file, i);
12335 fprintf (dump_file, "\n");
12336 }
12337
12338 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
12339
12340 if (seq == NULL)
12341 return std::make_pair (i, i);
12342
12343 /* Avoid infinite splitter loops if any insn of the result matches
12344 the original pattern. */
12345 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
12346 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
12347 return std::make_pair (i, i);
12348
12349 unshare_all_rtl_in_chain (seq);
12350
12351 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
12352 a linked list, replace the single insn with the new insns. */
12353 rtx_insn* seqlast = seq;
12354 while (NEXT_INSN (seqlast) != NULL)
12355 seqlast = NEXT_INSN (seqlast);
12356
12357 if (rtx_insn* iprev = PREV_INSN (i))
12358 SET_NEXT_INSN (iprev) = seq;
12359 if (rtx_insn* inext = NEXT_INSN (i))
12360 SET_PREV_INSN (inext) = seqlast;
12361
12362 SET_PREV_INSN (seq) = PREV_INSN (i);
12363 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
12364
12365 SET_PREV_INSN (i) = NULL;
12366 SET_NEXT_INSN (i) = NULL;
12367
12368 /* Recursively split all insns. */
12369 for (i = seq; ; i = NEXT_INSN (i))
12370 {
12371 std::pair<rtx_insn*, rtx_insn*> ii =
12372 sh_try_split_insn_simple (i, curr_insn, n + 1);
12373 if (i == seq)
12374 seq = ii.first;
12375 if (i == seqlast)
12376 {
12377 seqlast = ii.second;
12378 break;
12379 }
12380 i = ii.first;
12381 }
12382
12383 return std::make_pair (seq, seqlast);
12384 }
12385
12386 sh_treg_insns
sh_split_treg_set_expr(rtx x,rtx_insn * curr_insn)12387 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
12388 {
12389 if (t_reg_operand (x, VOIDmode))
12390 return sh_treg_insns ();
12391
12392 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
12393
12394 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
12395 SET_PREV_INSN (i) = NULL;
12396 SET_NEXT_INSN (i) = NULL;
12397
12398 if (dump_file)
12399 {
12400 fprintf (dump_file, "split_treg_set_expr insn:\n");
12401 print_rtl (dump_file, i);
12402 fprintf (dump_file, "\n");
12403 }
12404
12405 /* If the insn is not found, we will try a negated form and append
12406 a nott. */
12407 bool append_nott = false;
12408
12409 /* We are going to invoke recog/split_insns in a re-entrant way and thus
12410 have to capture its current state and restore it afterwards. */
12411 recog_data_d prev_recog_data = recog_data;
12412
12413 if (negt_reg_operand (x, GET_MODE (x)))
12414 {
12415 /* This is a normal movt followed by a nott. It will be converted
12416 into a movrt after initial expansion. */
12417 XEXP (PATTERN (i), 1) = get_t_reg_rtx ();
12418 append_nott = true;
12419 }
12420 else
12421 {
12422 /* If the comparison op doesn't have a mode set, set it to SImode. */
12423 if (COMPARISON_P (x) && GET_MODE (x) == VOIDmode)
12424 PUT_MODE (x, SImode);
12425
12426 int insn_code = recog (PATTERN (i), i, 0);
12427
12428 if (insn_code < 0 && COMPARISON_P (x))
12429 {
12430 machine_mode cmp_mode = GET_MODE (XEXP (x, 0));
12431 if (cmp_mode == VOIDmode)
12432 cmp_mode = GET_MODE (XEXP (x, 1));
12433
12434 PUT_CODE (x, reverse_condition (GET_CODE (x)));
12435 insn_code = recog (PATTERN (i), i, 0);
12436 append_nott = true;
12437 }
12438
12439 gcc_assert (insn_code >= 0);
12440 }
12441
12442 /* Try to recursively split the insn. Some insns might refuse to split
12443 any further while we are in the treg_set_expr splitting phase. They
12444 will be emitted as part of the outer insn and then split again. */
12445 std::pair<rtx_insn*, rtx_insn*> insnlist =
12446 sh_try_split_insn_simple (i, curr_insn);
12447
12448 /* Restore recog state. */
12449 recog_data = prev_recog_data;
12450
12451 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
12452 ? insnlist.second
12453 : NULL;
12454 if (dump_file)
12455 {
12456 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
12457 print_rtl (dump_file, insnlist.first);
12458 fprintf (dump_file, "\n");
12459
12460 if (nott_insn != NULL)
12461 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
12462 }
12463
12464 emit_insn (insnlist.first);
12465
12466 if (nott_insn != NULL && append_nott)
12467 {
12468 if (dump_file)
12469 fprintf (dump_file, "removing trailing nott\n");
12470 remove_insn (nott_insn);
12471 nott_insn = NULL;
12472 append_nott = false;
12473 }
12474
12475 if (append_nott)
12476 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
12477
12478 rtx_insn* first_insn = get_insns ();
12479
12480 if (dump_file)
12481 {
12482 fprintf (dump_file, "resulting insns:\n");
12483 print_rtl (dump_file, first_insn);
12484 fprintf (dump_file, "\n");
12485 }
12486
12487 return sh_treg_insns (first_insn, nott_insn);
12488 }
12489
12490 /*------------------------------------------------------------------------------
12491 Mode switching support code.
12492 */
12493
12494 static void
sh_emit_mode_set(int entity ATTRIBUTE_UNUSED,int mode,int prev_mode,HARD_REG_SET regs_live ATTRIBUTE_UNUSED)12495 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
12496 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
12497 {
12498 if ((TARGET_SH4A_FP || TARGET_FPU_SH4_300)
12499 && prev_mode != FP_MODE_NONE && prev_mode != mode)
12500 {
12501 emit_insn (gen_toggle_pr ());
12502 if (TARGET_FMOVD)
12503 emit_insn (gen_toggle_sz ());
12504 }
12505 else if (mode != FP_MODE_NONE)
12506 {
12507 rtx tmp = gen_reg_rtx (SImode);
12508 emit_insn (gen_sts_fpscr (tmp));
12509 rtx i = NULL;
12510
12511 const unsigned HOST_WIDE_INT fpbits =
12512 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
12513
12514 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
12515 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12516 else if (mode == FP_MODE_SINGLE)
12517 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
12518 else if (mode == FP_MODE_DOUBLE)
12519 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12520 else
12521 gcc_unreachable ();
12522
12523 emit_insn (i);
12524 emit_insn (gen_lds_fpscr (tmp));
12525 }
12526 }
12527
12528 static int
sh_mode_needed(int entity ATTRIBUTE_UNUSED,rtx_insn * insn)12529 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
12530 {
12531 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
12532 }
12533
12534 static int
sh_mode_after(int entity ATTRIBUTE_UNUSED,int mode,rtx_insn * insn)12535 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
12536 {
12537 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
12538 get_attr_fp_set (insn) != FP_SET_NONE)
12539 return (int) get_attr_fp_set (insn);
12540 else
12541 return mode;
12542 }
12543
12544 static int
sh_mode_entry(int entity ATTRIBUTE_UNUSED)12545 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
12546 {
12547 return NORMAL_MODE (entity);
12548 }
12549
12550 static int
sh_mode_exit(int entity ATTRIBUTE_UNUSED)12551 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
12552 {
12553 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
12554 }
12555
12556 static int
sh_mode_priority(int entity ATTRIBUTE_UNUSED,int n)12557 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
12558 {
12559 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
12560 }
12561
12562 /*------------------------------------------------------------------------------
12563 Misc
12564 */
12565
12566 /* Return true if we use LRA instead of reload pass. */
12567 bool
sh_lra_p(void)12568 sh_lra_p (void)
12569 {
12570 return sh_lra_flag;
12571 }
12572
12573 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
12574
12575 static bool
sh_use_by_pieces_infrastructure_p(unsigned HOST_WIDE_INT size,unsigned int align,enum by_pieces_operation op,bool speed_p)12576 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
12577 unsigned int align,
12578 enum by_pieces_operation op,
12579 bool speed_p)
12580 {
12581 switch (op)
12582 {
12583 case MOVE_BY_PIECES:
12584 return by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1, op)
12585 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12586 case STORE_BY_PIECES:
12587 case SET_BY_PIECES:
12588 return by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1, op)
12589 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12590 default:
12591 return default_use_by_pieces_infrastructure_p (size, align,
12592 op, speed_p);
12593 }
12594 }
12595
12596 bool
sh_cannot_force_const_mem_p(machine_mode mode ATTRIBUTE_UNUSED,rtx x ATTRIBUTE_UNUSED)12597 sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
12598 rtx x ATTRIBUTE_UNUSED)
12599 {
12600 return TARGET_FDPIC;
12601 }
12602
12603 /* Emit insns to load the function address from FUNCDESC (an FDPIC
12604 function descriptor) into r1 and the GOT address into r12,
12605 returning an rtx for r1. */
12606
12607 rtx
sh_load_function_descriptor(rtx funcdesc)12608 sh_load_function_descriptor (rtx funcdesc)
12609 {
12610 rtx r1 = gen_rtx_REG (Pmode, R1_REG);
12611 rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
12612 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
12613 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
12614
12615 emit_move_insn (r1, fnaddr);
12616 /* The ABI requires the entry point address to be loaded first, so
12617 prevent the load from being moved after that of the GOT
12618 address. */
12619 emit_insn (gen_blockage ());
12620 emit_move_insn (pic_reg, gotaddr);
12621 return r1;
12622 }
12623
12624 /* Return an rtx holding the initial value of the FDPIC register (the
12625 FDPIC pointer passed in from the caller). */
12626
12627 rtx
sh_get_fdpic_reg_initial_val(void)12628 sh_get_fdpic_reg_initial_val (void)
12629 {
12630 return get_hard_reg_initial_val (Pmode, PIC_REG);
12631 }
12632
12633 #include "gt-sh.h"
12634