1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2019 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sstream>
23
24 #define IN_TARGET_CODE 1
25
26 #include "config.h"
27 #define INCLUDE_VECTOR
28 #include "system.h"
29 #include "coretypes.h"
30 #include "backend.h"
31 #include "target.h"
32 #include "rtl.h"
33 #include "tree.h"
34 #include "gimple.h"
35 #include "cfghooks.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "optabs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "flags.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "reload.h"
54 #include "output.h"
55 #include "insn-attr.h"
56 #include "dwarf2.h"
57 #include "langhooks.h"
58 #include "cfgrtl.h"
59 #include "intl.h"
60 #include "sched-int.h"
61 #include "gimplify.h"
62 #include "tm-constrs.h"
63 #include "opts.h"
64 #include "tree-pass.h"
65 #include "context.h"
66 #include "builtins.h"
67 #include "rtl-iter.h"
68 #include "regs.h"
69 #include "toplev.h"
70
71 /* This file should be included last. */
72 #include "target-def.h"
73
74 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
75
76 #define CONST_OK_FOR_ADD(size) CONST_OK_FOR_I08 (size)
77 #define GEN_MOV (*(gen_movsi))
78 #define GEN_ADD3 (*(gen_addsi3))
79 #define GEN_SUB3 (*(gen_subsi3))
80
81 /* Used to simplify the logic below. Find the attributes wherever
82 they may be. */
83 #define SH_ATTRIBUTES(decl) \
84 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
85 : DECL_ATTRIBUTES (decl) \
86 ? (DECL_ATTRIBUTES (decl)) \
87 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
88
89 /* Set to true by expand_prologue() when the function is an
90 interrupt handler. */
91 bool current_function_interrupt;
92
93 tree sh_deferred_function_attributes;
94 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
95
96 /* Global variables for machine-dependent things. */
97
98 /* Which cpu are we scheduling for. */
99 enum processor_type sh_cpu;
100
101 /* Definitions used in ready queue reordering for first scheduling pass. */
102
103 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
104 static short *regmode_weight[2];
105
106 /* Total SFmode and SImode weights of scheduled insns. */
107 static int curr_regmode_pressure[2];
108
109 /* Number of r0 life regions. */
110 static int r0_life_regions;
111
112 /* If true, skip cycles for Q -> R movement. */
113 static int skip_cycles = 0;
114
115 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
116 and returned from sh_reorder2. */
117 static short cached_can_issue_more;
118
119 /* Unique number for UNSPEC_BBR pattern. */
120 static unsigned int unspec_bbr_uid = 1;
121
122 /* Provides the class number of the smallest class containing
123 reg number. */
124 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
125 {
126 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
159 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
160 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
161 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
162 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
163 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
164 GENERAL_REGS, GENERAL_REGS,
165 };
166
167 char sh_register_names[FIRST_PSEUDO_REGISTER] \
168 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
169
170 char sh_additional_register_names[ADDREGNAMES_SIZE] \
171 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
172 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
173
174 int assembler_dialect;
175
176 static void split_branches (rtx_insn *);
177 static int branch_dest (rtx);
178 static void print_slot (rtx_sequence *);
179 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
180 static void dump_table (rtx_insn *, rtx_insn *);
181 static bool broken_move (rtx_insn *);
182 static bool mova_p (rtx_insn *);
183 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
184 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
185 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
186 static void sh_reorg (void);
187 static void sh_option_override (void);
188 static void sh_override_options_after_change (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
190 static rtx_insn* emit_frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET* mask, bool interrupt_handler);
194 static int calc_live_regs (HARD_REG_SET *);
195 static HOST_WIDE_INT rounded_frame_size (int);
196 static bool sh_frame_pointer_required (void);
197 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
198 static int sh_mode_needed (int, rtx_insn *);
199 static int sh_mode_after (int, int, rtx_insn *);
200 static int sh_mode_entry (int);
201 static int sh_mode_exit (int);
202 static int sh_mode_priority (int entity, int n);
203
204 static rtx mark_constant_pool_use (rtx);
205 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
206 int, bool *);
207 static tree sh_handle_resbank_handler_attribute (tree *, tree,
208 tree, int, bool *);
209 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
210 tree, int, bool *);
211 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
212 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
213 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
214 static void sh_print_operand (FILE *, rtx, int);
215 static void sh_print_operand_address (FILE *, machine_mode, rtx);
216 static bool sh_print_operand_punct_valid_p (unsigned char code);
217 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
218 static void sh_output_function_epilogue (FILE *);
219 static void sh_insert_attributes (tree, tree *);
220 static const char *sh_check_pch_target_flags (int);
221 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
222 static int sh_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
223 static int sh_issue_rate (void);
224 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
225 static short find_set_regmode_weight (rtx, machine_mode);
226 static short find_insn_regmode_weight (rtx, machine_mode);
227 static void find_regmode_weight (basic_block, machine_mode);
228 static int find_r0_life_regions (basic_block);
229 static void sh_md_init_global (FILE *, int, int);
230 static void sh_md_finish_global (FILE *, int);
231 static int rank_for_reorder (const void *, const void *);
232 static void swap_reorder (rtx_insn **, int);
233 static void ready_reorder (rtx_insn **, int);
234 static bool high_pressure (machine_mode);
235 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
236 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
237 static void sh_md_init (FILE *, int, int);
238 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
239
240 static bool sh_function_ok_for_sibcall (tree, tree);
241
242 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
243 static bool sh_ms_bitfield_layout_p (const_tree);
244
245 static void sh_init_builtins (void);
246 static tree sh_builtin_decl (unsigned, bool);
247 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
248 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
249 HOST_WIDE_INT, tree);
250 static void sh_file_start (void);
251 static bool sh_assemble_integer (rtx, unsigned int, int);
252 static bool flow_dependent_p (rtx, rtx);
253 static void flow_dependent_p_1 (rtx, const_rtx, void *);
254 static int shiftcosts (rtx);
255 static int and_xor_ior_costs (rtx, int);
256 static int addsubcosts (rtx);
257 static int multcosts (rtx);
258 static bool unspec_caller_rtx_p (rtx);
259 static bool sh_cannot_copy_insn_p (rtx_insn *);
260 static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
261 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
262 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
263 static int sh_pr_n_sets (void);
264 static rtx sh_allocate_initial_value (rtx);
265 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
266 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
267 machine_mode,
268 struct secondary_reload_info *);
269 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
270 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
271 static rtx sh_delegitimize_address (rtx);
272 static bool sh_cannot_substitute_mem_equiv_p (rtx);
273 static bool sh_legitimize_address_displacement (rtx *, rtx *,
274 poly_int64, machine_mode);
275 static int scavenge_reg (HARD_REG_SET *s);
276
277 static rtx sh_struct_value_rtx (tree, int);
278 static rtx sh_function_value (const_tree, const_tree, bool);
279 static bool sh_function_value_regno_p (const unsigned int);
280 static rtx sh_libcall_value (machine_mode, const_rtx);
281 static bool sh_return_in_memory (const_tree, const_tree);
282 static rtx sh_builtin_saveregs (void);
283 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
284 tree, int *, int);
285 static bool sh_strict_argument_naming (cumulative_args_t);
286 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
287 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
288 static tree sh_build_builtin_va_list (void);
289 static void sh_va_start (tree, rtx);
290 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
291 static bool sh_promote_prototypes (const_tree);
292 static machine_mode sh_promote_function_mode (const_tree type,
293 machine_mode,
294 int *punsignedp,
295 const_tree funtype,
296 int for_return);
297 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
298 const_tree, bool);
299 static bool sh_callee_copies (cumulative_args_t, machine_mode,
300 const_tree, bool);
301 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
302 tree, bool);
303 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
304 const_tree, bool);
305 static rtx sh_function_arg (cumulative_args_t, machine_mode,
306 const_tree, bool);
307 static int sh_dwarf_calling_convention (const_tree);
308 static void sh_encode_section_info (tree, rtx, int);
309 static bool sh2a_function_vector_p (tree);
310 static void sh_trampoline_init (rtx, tree, rtx);
311 static rtx sh_trampoline_adjust_address (rtx);
312 static void sh_conditional_register_usage (void);
313 static bool sh_legitimate_constant_p (machine_mode, rtx);
314 static int mov_insn_size (machine_mode, bool);
315 static int mov_insn_alignment_mask (machine_mode, bool);
316 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
317 unsigned int,
318 enum by_pieces_operation,
319 bool);
320 static bool sequence_insn_p (rtx_insn *);
321 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
322 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
323 machine_mode, bool);
324 static bool sh_legitimate_combined_insn (rtx_insn* insn);
325
326 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
327
328 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
329 static unsigned int sh_hard_regno_nregs (unsigned int, machine_mode);
330 static bool sh_hard_regno_mode_ok (unsigned int, machine_mode);
331 static bool sh_modes_tieable_p (machine_mode, machine_mode);
332 static bool sh_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
333
334 static const struct attribute_spec sh_attribute_table[] =
335 {
336 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
337 affects_type_identity, handler, exclude } */
338 { "interrupt_handler", 0, 0, true, false, false, false,
339 sh_handle_interrupt_handler_attribute, NULL },
340 { "sp_switch", 1, 1, true, false, false, false,
341 sh_handle_sp_switch_attribute, NULL },
342 { "trap_exit", 1, 1, true, false, false, false,
343 sh_handle_trap_exit_attribute, NULL },
344 { "renesas", 0, 0, false, true, false, false,
345 sh_handle_renesas_attribute, NULL },
346 { "trapa_handler", 0, 0, true, false, false, false,
347 sh_handle_interrupt_handler_attribute, NULL },
348 { "nosave_low_regs", 0, 0, true, false, false, false,
349 sh_handle_interrupt_handler_attribute, NULL },
350 { "resbank", 0, 0, true, false, false, false,
351 sh_handle_resbank_handler_attribute, NULL },
352 { "function_vector", 1, 1, true, false, false, false,
353 sh2a_handle_function_vector_handler_attribute, NULL },
354 { NULL, 0, 0, false, false, false, false, NULL, NULL }
355 };
356
357 /* Initialize the GCC target structure. */
358 #undef TARGET_ATTRIBUTE_TABLE
359 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
360
361 /* The next two are used for debug info when compiling with -gdwarf. */
362 #undef TARGET_ASM_UNALIGNED_HI_OP
363 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
364 #undef TARGET_ASM_UNALIGNED_SI_OP
365 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
366
367 #undef TARGET_OPTION_OVERRIDE
368 #define TARGET_OPTION_OVERRIDE sh_option_override
369
370 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
371 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
372 sh_override_options_after_change
373
374 #undef TARGET_PRINT_OPERAND
375 #define TARGET_PRINT_OPERAND sh_print_operand
376 #undef TARGET_PRINT_OPERAND_ADDRESS
377 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
378 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
379 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
380 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
381 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
382
383 #undef TARGET_ASM_FUNCTION_EPILOGUE
384 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
385
386 #undef TARGET_ASM_OUTPUT_MI_THUNK
387 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
388
389 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
390 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
391 hook_bool_const_tree_hwi_hwi_const_tree_true
392
393 #undef TARGET_ASM_FILE_START
394 #define TARGET_ASM_FILE_START sh_file_start
395 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
396 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
397
398 #undef TARGET_ASM_INTEGER
399 #define TARGET_ASM_INTEGER sh_assemble_integer
400
401 #undef TARGET_REGISTER_MOVE_COST
402 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
403
404 #undef TARGET_INSERT_ATTRIBUTES
405 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
406
407 #undef TARGET_SCHED_ADJUST_COST
408 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
409
410 #undef TARGET_SCHED_ISSUE_RATE
411 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
412
413 /* The next 5 hooks have been implemented for reenabling sched1. With the
414 help of these macros we are limiting the movement of insns in sched1 to
415 reduce the register pressure. The overall idea is to keep count of SImode
416 and SFmode regs required by already scheduled insns. When these counts
417 cross some threshold values; give priority to insns that free registers.
418 The insn that frees registers is most likely to be the insn with lowest
419 LUID (original insn order); but such an insn might be there in the stalled
420 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
421 up to a max of 8 cycles so that such insns may move from Q -> R.
422
423 The description of the hooks are as below:
424
425 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
426 scheduler; it is called inside the sched_init function just after
427 find_insn_reg_weights function call. It is used to calculate the SImode
428 and SFmode weights of insns of basic blocks; much similar to what
429 find_insn_reg_weights does.
430 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
431
432 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
433 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
434 (Q)->(R).
435
436 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
437 high; reorder the ready queue so that the insn with lowest LUID will be
438 issued next.
439
440 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
441 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
442
443 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
444 can be returned from TARGET_SCHED_REORDER2.
445
446 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
447
448 #undef TARGET_SCHED_DFA_NEW_CYCLE
449 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
450
451 #undef TARGET_SCHED_INIT_GLOBAL
452 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
453
454 #undef TARGET_SCHED_FINISH_GLOBAL
455 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
456
457 #undef TARGET_SCHED_VARIABLE_ISSUE
458 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
459
460 #undef TARGET_SCHED_REORDER
461 #define TARGET_SCHED_REORDER sh_reorder
462
463 #undef TARGET_SCHED_REORDER2
464 #define TARGET_SCHED_REORDER2 sh_reorder2
465
466 #undef TARGET_SCHED_INIT
467 #define TARGET_SCHED_INIT sh_md_init
468
469 #undef TARGET_DELEGITIMIZE_ADDRESS
470 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
471
472 #undef TARGET_LEGITIMIZE_ADDRESS
473 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
474
475 #undef TARGET_CAN_FOLLOW_JUMP
476 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
477
478 #undef TARGET_MS_BITFIELD_LAYOUT_P
479 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
480
481 #undef TARGET_INIT_BUILTINS
482 #define TARGET_INIT_BUILTINS sh_init_builtins
483 #undef TARGET_BUILTIN_DECL
484 #define TARGET_BUILTIN_DECL sh_builtin_decl
485 #undef TARGET_EXPAND_BUILTIN
486 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
487
488 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
489 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
490
491 #undef TARGET_CANNOT_COPY_INSN_P
492 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
493 #undef TARGET_RTX_COSTS
494 #define TARGET_RTX_COSTS sh_rtx_costs
495 #undef TARGET_ADDRESS_COST
496 #define TARGET_ADDRESS_COST sh_address_cost
497 #undef TARGET_ALLOCATE_INITIAL_VALUE
498 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
499
500 #undef TARGET_MACHINE_DEPENDENT_REORG
501 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
502
503 #undef TARGET_DWARF_REGISTER_SPAN
504 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
505
506 #ifdef HAVE_AS_TLS
507 #undef TARGET_HAVE_TLS
508 #define TARGET_HAVE_TLS true
509 #endif
510
511 #undef TARGET_PROMOTE_PROTOTYPES
512 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
513 #undef TARGET_PROMOTE_FUNCTION_MODE
514 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
515
516 #undef TARGET_FUNCTION_VALUE
517 #define TARGET_FUNCTION_VALUE sh_function_value
518 #undef TARGET_FUNCTION_VALUE_REGNO_P
519 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
520 #undef TARGET_LIBCALL_VALUE
521 #define TARGET_LIBCALL_VALUE sh_libcall_value
522 #undef TARGET_STRUCT_VALUE_RTX
523 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
524 #undef TARGET_RETURN_IN_MEMORY
525 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
526
527 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
528 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
529 #undef TARGET_SETUP_INCOMING_VARARGS
530 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
531 #undef TARGET_STRICT_ARGUMENT_NAMING
532 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
533 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
534 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
535 #undef TARGET_MUST_PASS_IN_STACK
536 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
537 #undef TARGET_PASS_BY_REFERENCE
538 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
539 #undef TARGET_CALLEE_COPIES
540 #define TARGET_CALLEE_COPIES sh_callee_copies
541 #undef TARGET_ARG_PARTIAL_BYTES
542 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
543 #undef TARGET_FUNCTION_ARG
544 #define TARGET_FUNCTION_ARG sh_function_arg
545 #undef TARGET_FUNCTION_ARG_ADVANCE
546 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
547
548 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
549 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
550
551 #undef TARGET_BUILD_BUILTIN_VA_LIST
552 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
553 #undef TARGET_EXPAND_BUILTIN_VA_START
554 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
555 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
556 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
557
558 #undef TARGET_VECTOR_MODE_SUPPORTED_P
559 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
560
561 #undef TARGET_CHECK_PCH_TARGET_FLAGS
562 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
563
564 #undef TARGET_DWARF_CALLING_CONVENTION
565 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
566
567 #undef TARGET_FRAME_POINTER_REQUIRED
568 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
569
570 #undef TARGET_MODE_EMIT
571 #define TARGET_MODE_EMIT sh_emit_mode_set
572
573 #undef TARGET_MODE_NEEDED
574 #define TARGET_MODE_NEEDED sh_mode_needed
575
576 #undef TARGET_MODE_AFTER
577 #define TARGET_MODE_AFTER sh_mode_after
578
579 #undef TARGET_MODE_ENTRY
580 #define TARGET_MODE_ENTRY sh_mode_entry
581
582 #undef TARGET_MODE_EXIT
583 #define TARGET_MODE_EXIT sh_mode_exit
584
585 #undef TARGET_MODE_PRIORITY
586 #define TARGET_MODE_PRIORITY sh_mode_priority
587
588 /* Return regmode weight for insn. */
589 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
590 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
591
592 /* Return current register pressure for regmode. */
593 #define CURR_REGMODE_PRESSURE(MODE)\
594 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
595
596 #undef TARGET_ENCODE_SECTION_INFO
597 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
598
599 #undef TARGET_LRA_P
600 #define TARGET_LRA_P sh_lra_p
601
602 #undef TARGET_SECONDARY_RELOAD
603 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
604
605 #undef TARGET_PREFERRED_RELOAD_CLASS
606 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
607
608 #undef TARGET_CONDITIONAL_REGISTER_USAGE
609 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
610
611 #undef TARGET_LEGITIMATE_ADDRESS_P
612 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
613
614 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
615 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
616
617 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
618 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
619 sh_legitimize_address_displacement
620
621 #undef TARGET_TRAMPOLINE_INIT
622 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
623 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
624 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
625
626 #undef TARGET_LEGITIMATE_CONSTANT_P
627 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
628
629 #undef TARGET_CANONICALIZE_COMPARISON
630 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
631
632 #undef TARGET_LEGITIMATE_COMBINED_INSN
633 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
634
635 #undef TARGET_FIXED_CONDITION_CODE_REGS
636 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
637
638 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
639 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
640 sh_use_by_pieces_infrastructure_p
641
642 /* Machine-specific symbol_ref flags. */
643 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
644
645 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
646 is used by optabs.c atomic op expansion code as well as in sync.md. */
647 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
648 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
649
650 #undef TARGET_CANNOT_FORCE_CONST_MEM
651 #define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
652
653 #undef TARGET_HARD_REGNO_NREGS
654 #define TARGET_HARD_REGNO_NREGS sh_hard_regno_nregs
655 #undef TARGET_HARD_REGNO_MODE_OK
656 #define TARGET_HARD_REGNO_MODE_OK sh_hard_regno_mode_ok
657
658 #undef TARGET_MODES_TIEABLE_P
659 #define TARGET_MODES_TIEABLE_P sh_modes_tieable_p
660
661 #undef TARGET_CAN_CHANGE_MODE_CLASS
662 #define TARGET_CAN_CHANGE_MODE_CLASS sh_can_change_mode_class
663
664 #undef TARGET_CONSTANT_ALIGNMENT
665 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
666
667 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
668 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
669
670 struct gcc_target targetm = TARGET_INITIALIZER;
671
672
673 /* Information on the currently selected atomic model.
674 This is initialized in sh_option_override. */
675 static sh_atomic_model selected_atomic_model_;
676
677 const sh_atomic_model&
selected_atomic_model(void)678 selected_atomic_model (void)
679 {
680 return selected_atomic_model_;
681 }
682
683 static sh_atomic_model
parse_validate_atomic_model_option(const char * str)684 parse_validate_atomic_model_option (const char* str)
685 {
686 const char* model_names[sh_atomic_model::num_models];
687 model_names[sh_atomic_model::none] = "none";
688 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
689 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
690 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
691 model_names[sh_atomic_model::soft_imask] = "soft-imask";
692
693 const char* model_cdef_names[sh_atomic_model::num_models];
694 model_cdef_names[sh_atomic_model::none] = "NONE";
695 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
696 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
697 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
698 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
699
700 sh_atomic_model ret;
701 ret.type = sh_atomic_model::none;
702 ret.name = model_names[sh_atomic_model::none];
703 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
704 ret.strict = false;
705 ret.tcb_gbr_offset = -1;
706
707 /* Handle empty string as 'none'. */
708 if (str == NULL || *str == '\0')
709 return ret;
710
711 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
712
713 std::vector<std::string> tokens;
714 for (std::stringstream ss (str); ss.good (); )
715 {
716 tokens.push_back (std::string ());
717 std::getline (ss, tokens.back (), ',');
718 }
719
720 if (tokens.empty ())
721 err_ret ("invalid atomic model option");
722
723 /* The first token must be the atomic model name. */
724 {
725 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
726 if (tokens.front () == model_names[i])
727 {
728 ret.type = (sh_atomic_model::enum_type)i;
729 ret.name = model_names[i];
730 ret.cdef_name = model_cdef_names[i];
731 goto got_mode_name;
732 }
733
734 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
735 got_mode_name:;
736 }
737
738 /* Go through the remaining tokens. */
739 for (size_t i = 1; i < tokens.size (); ++i)
740 {
741 if (tokens[i] == "strict")
742 ret.strict = true;
743 else if (!tokens[i].compare (0, strlen ("gbr-offset="), "gbr-offset="))
744 {
745 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
746 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
747 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
748 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
749 "option", offset_str.c_str ());
750 }
751 else
752 err_ret ("unknown parameter \"%s\" in atomic model option",
753 tokens[i].c_str ());
754 }
755
756 /* Check that the selection makes sense. */
757 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
758 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
759 ret.name);
760
761 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
762 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
763
764 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
765 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
766
767 if (ret.type == sh_atomic_model::soft_tcb
768 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
769 || (ret.tcb_gbr_offset & 3) != 0))
770 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
771 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
772 ret.name);
773
774 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
775 err_ret ("cannot use atomic model %s in user mode", ret.name);
776
777 return ret;
778
779 #undef err_ret
780 }
781
782 /* Register SH specific RTL passes. */
783 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
784 const char* name);
785 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
786 const char* name);
787 static void
register_sh_passes(void)788 register_sh_passes (void)
789 {
790 /* Running the sh_treg_combine pass after ce1 generates better code when
791 comparisons are combined and reg-reg moves are introduced, because
792 reg-reg moves will be eliminated afterwards. However, there are quite
793 some cases where combine will be unable to fold comparison related insns,
794 thus for now don't do it.
795 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
796 PASS_POS_INSERT_AFTER, "ce1", 1);
797 */
798
799 /* Run sh_treg_combine pass after combine but before register allocation. */
800 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
801 PASS_POS_INSERT_AFTER, "split1", 1);
802
803 /* Run sh_treg_combine pass after register allocation and basic block
804 reordering as this sometimes creates new opportunities. */
805 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
806 PASS_POS_INSERT_AFTER, "split4", 1);
807
808 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
809 is known after a conditional branch.
810 This must be done after basic blocks and branch conditions have
811 stabilized and won't be changed by further passes. */
812 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
813 PASS_POS_INSERT_BEFORE, "sched2", 1);
814 }
815
816 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
817 various options, and do some machine dependent initialization. */
818 static void
sh_option_override(void)819 sh_option_override (void)
820 {
821 int regno;
822
823 SUBTARGET_OVERRIDE_OPTIONS;
824
825 sh_cpu = PROCESSOR_SH1;
826 assembler_dialect = 0;
827 if (TARGET_SH2)
828 sh_cpu = PROCESSOR_SH2;
829 if (TARGET_SH2E)
830 sh_cpu = PROCESSOR_SH2E;
831 if (TARGET_SH2A)
832 sh_cpu = PROCESSOR_SH2A;
833 if (TARGET_SH3)
834 sh_cpu = PROCESSOR_SH3;
835 if (TARGET_SH3E)
836 sh_cpu = PROCESSOR_SH3E;
837 if (TARGET_SH4)
838 {
839 assembler_dialect = 1;
840 sh_cpu = PROCESSOR_SH4;
841 }
842 if (TARGET_SH4A)
843 {
844 assembler_dialect = 1;
845 sh_cpu = PROCESSOR_SH4A;
846 }
847
848 /* User/priviledged mode is supported only on SH3* and SH4*.
849 Disable it for everything else. */
850 if (!TARGET_SH3 && TARGET_USERMODE)
851 TARGET_USERMODE = false;
852
853 if (! strcmp (sh_div_str, "call-div1"))
854 sh_div_strategy = SH_DIV_CALL_DIV1;
855 else if (! strcmp (sh_div_str, "call-fp") && TARGET_FPU_ANY)
856 sh_div_strategy = SH_DIV_CALL_FP;
857 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
858 sh_div_strategy = SH_DIV_CALL_TABLE;
859 else
860 {
861 /* Pick one that makes most sense for the target in general.
862 It is not much good to use different functions depending on -Os,
863 since then we'll end up with two different functions when some of
864 the code is compiled for size, and some for speed. */
865
866 /* SH4 tends to emphasize speed. */
867 if (TARGET_HARD_SH4)
868 sh_div_strategy = SH_DIV_CALL_TABLE;
869 /* These have their own way of doing things. */
870 else if (TARGET_SH2A)
871 sh_div_strategy = SH_DIV_INTRINSIC;
872 /* SH1 .. SH3 cores often go into small-footprint systems, so
873 default to the smallest implementation available. */
874 else
875 sh_div_strategy = SH_DIV_CALL_DIV1;
876 }
877
878 if (sh_divsi3_libfunc[0])
879 ; /* User supplied - leave it alone. */
880 else if (TARGET_DIVIDE_CALL_FP)
881 sh_divsi3_libfunc = "__sdivsi3_i4";
882 else if (TARGET_DIVIDE_CALL_TABLE)
883 sh_divsi3_libfunc = "__sdivsi3_i4i";
884 else
885 sh_divsi3_libfunc = "__sdivsi3";
886
887 if (sh_branch_cost == -1)
888 {
889 /* The SH1 does not have delay slots, hence we get a pipeline stall
890 at every branch. The SH4 is superscalar, so the single delay slot
891 is not sufficient to keep both pipelines filled.
892 In any case, set the default branch cost to '2', as it results in
893 slightly overall smaller code and also enables some if conversions
894 that are required for matching special T bit related insns. */
895 sh_branch_cost = 2;
896 }
897
898 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
899 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
900 TARGET_ZDCBRANCH = 1;
901
902 /* FDPIC code is a special form of PIC, and the vast majority of code
903 generation constraints that apply to PIC also apply to FDPIC, so we
904 set flag_pic to avoid the need to check TARGET_FDPIC everywhere
905 flag_pic is checked. */
906 if (TARGET_FDPIC && !flag_pic)
907 flag_pic = 2;
908
909 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
910 if (! VALID_REGISTER_P (regno))
911 sh_register_names[regno][0] = '\0';
912
913 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
914 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
915 sh_additional_register_names[regno][0] = '\0';
916
917 if (flag_pic && ! TARGET_PREFERGOT)
918 flag_no_function_cse = 1;
919
920 if (targetm.small_register_classes_for_mode_p (VOIDmode))
921 {
922 /* Never run scheduling before reload, since that can
923 break global alloc, and generates slower code anyway due
924 to the pressure on R0. */
925 /* Enable sched1 for SH4 if the user explicitly requests.
926 When sched1 is enabled, the ready queue will be reordered by
927 the target hooks if pressure is high. We cannot do this for
928 PIC, SH3 and lower as they give spill failures for R0. */
929 if (!TARGET_HARD_SH4 || flag_pic)
930 flag_schedule_insns = 0;
931 /* ??? Current exception handling places basic block boundaries
932 after call_insns. It causes the high pressure on R0 and gives
933 spill failures for R0 in reload. See PR 22553 and the thread
934 on gcc-patches
935 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
936 else if (flag_exceptions)
937 {
938 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
939 warning (0, "ignoring %<-fschedule-insns%> because of exception "
940 "handling bug");
941 flag_schedule_insns = 0;
942 }
943 else if (flag_schedule_insns
944 && !global_options_set.x_flag_schedule_insns)
945 flag_schedule_insns = 0;
946 }
947
948 /* Unwind info is not correct around the CFG unless either a frame
949 pointer is present or M_A_O_A is set. Fixing this requires rewriting
950 unwind info generation to be aware of the CFG and propagating states
951 around edges. */
952 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
953 || flag_exceptions || flag_non_call_exceptions)
954 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
955 {
956 warning (0, "unwind tables currently require either a frame pointer "
957 "or %<-maccumulate-outgoing-args%> for correctness");
958 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
959 }
960
961 if (flag_unsafe_math_optimizations)
962 {
963 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
964 if (global_options_set.x_TARGET_FSCA == 0
965 && (TARGET_SH4A_FP || TARGET_FPU_SH4_300))
966 TARGET_FSCA = 1;
967
968 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
969 if (global_options_set.x_TARGET_FSRRA == 0
970 && (TARGET_SH4A_FP || TARGET_FPU_SH4_300))
971 TARGET_FSRRA = 1;
972 }
973
974 /* Allow fsrra insn only if -funsafe-math-optimizations and
975 -ffinite-math-only is enabled. */
976 TARGET_FSRRA = TARGET_FSRRA
977 && flag_unsafe_math_optimizations
978 && flag_finite_math_only;
979
980 /* If the -mieee option was not explicitly set by the user, turn it on
981 unless -ffinite-math-only was specified. See also PR 33135. */
982 if (! global_options_set.x_TARGET_IEEE)
983 TARGET_IEEE = ! flag_finite_math_only;
984
985 if (sh_fixed_range_str)
986 sh_fix_range (sh_fixed_range_str);
987
988 /* This target defaults to strict volatile bitfields. */
989 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
990 flag_strict_volatile_bitfields = 1;
991
992 sh_override_options_after_change ();
993
994 /* Parse atomic model option and make sure it is valid for the current
995 target CPU. */
996 selected_atomic_model_
997 = parse_validate_atomic_model_option (sh_atomic_model_str);
998
999 register_sh_passes ();
1000 }
1001
1002 /* Implement targetm.override_options_after_change. */
1003
1004 static void
sh_override_options_after_change(void)1005 sh_override_options_after_change (void)
1006 {
1007 /* Adjust loop, jump and function alignment values (in bytes), if those
1008 were not specified by the user using -falign-loops, -falign-jumps
1009 and -falign-functions options.
1010 32 bit alignment is better for speed, because instructions can be
1011 fetched as a pair from a longword boundary. For size use 16 bit
1012 alignment to get more compact code.
1013 Aligning all jumps increases the code size, even if it might
1014 result in slightly faster code. Thus, it is set to the smallest
1015 alignment possible if not specified by the user. */
1016 if (flag_align_loops && !str_align_loops)
1017 str_align_loops = optimize_size ? "2" : "4";
1018
1019 /* Parse values so that we can compare for current value. */
1020 parse_alignment_opts ();
1021 if (flag_align_jumps && !str_align_jumps)
1022 str_align_jumps = "2";
1023 else if (align_jumps.levels[0].get_value () < 2)
1024 str_align_jumps = "2";
1025
1026 if (flag_align_functions && !str_align_functions)
1027 str_align_functions = optimize_size ? "2" : "4";
1028
1029 /* The linker relaxation code breaks when a function contains
1030 alignments that are larger than that at the start of a
1031 compilation unit. */
1032 if (TARGET_RELAX)
1033 {
1034 /* Parse values so that we can compare for current value. */
1035 parse_alignment_opts ();
1036 int min_align = MAX (align_loops.levels[0].get_value (),
1037 align_jumps.levels[0].get_value ());
1038
1039 /* Also take possible .long constants / mova tables into account. */
1040 if (min_align < 4)
1041 min_align = 4;
1042 if (align_functions.levels[0].get_value () < min_align)
1043 {
1044 char *r = XNEWVEC (char, 16);
1045 sprintf (r, "%d", min_align);
1046 str_align_functions = r;
1047 }
1048 }
1049 }
1050
1051 /* Print the operand address in x to the stream. */
1052 static void
sh_print_operand_address(FILE * stream,machine_mode,rtx x)1053 sh_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
1054 {
1055 switch (GET_CODE (x))
1056 {
1057 case REG:
1058 case SUBREG:
1059 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1060 break;
1061
1062 case PLUS:
1063 {
1064 rtx base = XEXP (x, 0);
1065 rtx index = XEXP (x, 1);
1066
1067 switch (GET_CODE (index))
1068 {
1069 case CONST_INT:
1070 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1071 reg_names[true_regnum (base)]);
1072 break;
1073
1074 case REG:
1075 case SUBREG:
1076 {
1077 int base_num = true_regnum (base);
1078 int index_num = true_regnum (index);
1079
1080 /* If base or index is R0, make sure that it comes first.
1081 Usually one of them will be R0, but the order might be wrong.
1082 If neither base nor index are R0 it's an error and we just
1083 pass it on to the assembler. This avoids silent wrong code
1084 bugs. */
1085 if (base_num == 0 && index_num != 0)
1086 std::swap (base_num, index_num);
1087
1088 fprintf (stream, "@(%s,%s)", reg_names[index_num],
1089 reg_names[base_num]);
1090 break;
1091 }
1092
1093 default:
1094 gcc_unreachable ();
1095 }
1096 }
1097 break;
1098
1099 case PRE_DEC:
1100 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1101 break;
1102
1103 case POST_INC:
1104 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1105 break;
1106
1107 default:
1108 x = mark_constant_pool_use (x);
1109 output_addr_const (stream, x);
1110 break;
1111 }
1112 }
1113
1114 /* Print operand x (an rtx) in assembler syntax to file stream
1115 according to modifier code.
1116
1117 '.' print a .s if insn needs delay slot
1118 ',' print LOCAL_LABEL_PREFIX
1119 '@' print trap, rte or rts depending upon pragma interruptness
1120 '#' output a nop if there is nothing to put in the delay slot
1121 ''' print likelihood suffix (/u for unlikely).
1122 '>' print branch target if -fverbose-asm
1123 'O' print a constant without the #
1124 'R' print the LSW of a dp value - changes if in little endian
1125 'S' print the MSW of a dp value - changes if in little endian
1126 'T' print the next word of a dp value - same as 'R' in big endian mode.
1127 'M' print .b / .w / .l / .s / .d suffix if operand is a MEM.
1128 'N' print 'r63' if the operand is (const_int 0).
1129 'd' print a V2SF reg as dN instead of fpN.
1130 'm' print a pair `base,offset' or `base,index', for LD and ST.
1131 'U' Likewise for {LD,ST}{HI,LO}.
1132 'V' print the position of a single bit set.
1133 'W' print the position of a single bit cleared.
1134 't' print a memory address which is a register.
1135 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1136 'o' output an operator. */
1137 static void
sh_print_operand(FILE * stream,rtx x,int code)1138 sh_print_operand (FILE *stream, rtx x, int code)
1139 {
1140 int regno;
1141 machine_mode mode;
1142
1143 switch (code)
1144 {
1145 tree trapa_attr;
1146
1147 case '.':
1148 if (final_sequence
1149 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1150 && get_attr_length (final_sequence->insn (1)))
1151 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1152 break;
1153 case ',':
1154 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1155 break;
1156 case '@':
1157 trapa_attr = lookup_attribute ("trap_exit",
1158 DECL_ATTRIBUTES (current_function_decl));
1159 if (trapa_attr)
1160 fprintf (stream, "trapa #%ld",
1161 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1162 else if (sh_cfun_interrupt_handler_p ())
1163 {
1164 if (sh_cfun_resbank_handler_p ())
1165 fprintf (stream, "resbank\n");
1166 fprintf (stream, "rte");
1167 }
1168 else
1169 fprintf (stream, "rts");
1170 break;
1171 case '#':
1172 /* Output a nop if there's nothing in the delay slot. */
1173 if (dbr_sequence_length () == 0)
1174 fprintf (stream, "\n\tnop");
1175 break;
1176 case '\'':
1177 {
1178 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1179
1180 if (note
1181 && profile_probability::from_reg_br_prob_note (XINT (note, 0))
1182 < profile_probability::even ())
1183 fputs ("/u", stream);
1184 break;
1185 }
1186 case '>':
1187 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1188 {
1189 fputs ("\t! target: ", stream);
1190 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1191 }
1192 break;
1193 case 'O':
1194 x = mark_constant_pool_use (x);
1195 output_addr_const (stream, x);
1196 break;
1197 /* N.B.: %R / %S / %T adjust memory addresses by four.
1198 While they can be used to access 64 bit parts of a larger value
1199 held in general purpose registers, that won't work with memory -
1200 neither for fp registers, since the frxx names are used. */
1201 case 'R':
1202 if (REG_P (x) || GET_CODE (x) == SUBREG)
1203 {
1204 regno = true_regnum (x);
1205 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1206 fputs (reg_names[regno], (stream));
1207 }
1208 else if (MEM_P (x))
1209 {
1210 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1211 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1212 }
1213 else
1214 {
1215 rtx sub = NULL_RTX;
1216
1217 mode = GET_MODE (x);
1218 if (mode == VOIDmode)
1219 mode = DImode;
1220 if (GET_MODE_SIZE (mode) >= 8)
1221 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1222 if (sub)
1223 sh_print_operand (stream, sub, 0);
1224 else
1225 output_operand_lossage ("invalid operand to %%R");
1226 }
1227 break;
1228 case 'S':
1229 if (REG_P (x) || GET_CODE (x) == SUBREG)
1230 {
1231 regno = true_regnum (x);
1232 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1233 fputs (reg_names[regno], (stream));
1234 }
1235 else if (MEM_P (x))
1236 {
1237 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1238 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1239 }
1240 else
1241 {
1242 rtx sub = NULL_RTX;
1243
1244 mode = GET_MODE (x);
1245 if (mode == VOIDmode)
1246 mode = DImode;
1247 if (GET_MODE_SIZE (mode) >= 8)
1248 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1249 if (sub)
1250 sh_print_operand (stream, sub, 0);
1251 else
1252 output_operand_lossage ("invalid operand to %%S");
1253 }
1254 break;
1255 case 'T':
1256 /* Next word of a double. */
1257 switch (GET_CODE (x))
1258 {
1259 case REG:
1260 fputs (reg_names[REGNO (x) + 1], (stream));
1261 break;
1262 case MEM:
1263 {
1264 machine_mode mode = GET_MODE (x);
1265 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1266 && GET_CODE (XEXP (x, 0)) != POST_INC)
1267 x = adjust_address (x, SImode, 4);
1268 sh_print_operand_address (stream, mode, XEXP (x, 0));
1269 }
1270 break;
1271 default:
1272 break;
1273 }
1274 break;
1275
1276 case 't':
1277 gcc_assert (MEM_P (x));
1278 x = XEXP (x, 0);
1279 switch (GET_CODE (x))
1280 {
1281 case REG:
1282 case SUBREG:
1283 sh_print_operand (stream, x, 0);
1284 break;
1285 default:
1286 break;
1287 }
1288 break;
1289
1290 case 'o':
1291 switch (GET_CODE (x))
1292 {
1293 case PLUS: fputs ("add", stream); break;
1294 case MINUS: fputs ("sub", stream); break;
1295 case MULT: fputs ("mul", stream); break;
1296 case DIV: fputs ("div", stream); break;
1297 case EQ: fputs ("eq", stream); break;
1298 case NE: fputs ("ne", stream); break;
1299 case GT: case LT: fputs ("gt", stream); break;
1300 case GE: case LE: fputs ("ge", stream); break;
1301 case GTU: case LTU: fputs ("gtu", stream); break;
1302 case GEU: case LEU: fputs ("geu", stream); break;
1303 default:
1304 break;
1305 }
1306 break;
1307 case 'M':
1308 if (MEM_P (x))
1309 {
1310 switch (GET_MODE (x))
1311 {
1312 case E_QImode: fputs (".b", stream); break;
1313 case E_HImode: fputs (".w", stream); break;
1314 case E_SImode: fputs (".l", stream); break;
1315 case E_SFmode: fputs (".s", stream); break;
1316 case E_DFmode: fputs (".d", stream); break;
1317 default: gcc_unreachable ();
1318 }
1319 }
1320 break;
1321
1322 case 'm':
1323 gcc_assert (MEM_P (x));
1324 x = XEXP (x, 0);
1325 /* Fall through. */
1326 case 'U':
1327 switch (GET_CODE (x))
1328 {
1329 case REG:
1330 case SUBREG:
1331 sh_print_operand (stream, x, 0);
1332 fputs (", 0", stream);
1333 break;
1334
1335 case PLUS:
1336 sh_print_operand (stream, XEXP (x, 0), 0);
1337 fputs (", ", stream);
1338 sh_print_operand (stream, XEXP (x, 1), 0);
1339 break;
1340
1341 default:
1342 gcc_unreachable ();
1343 }
1344 break;
1345
1346 case 'V':
1347 {
1348 int num = exact_log2 (INTVAL (x));
1349 gcc_assert (num >= 0);
1350 fprintf (stream, "#%d", num);
1351 }
1352 break;
1353
1354 case 'W':
1355 {
1356 int num = exact_log2 (~INTVAL (x));
1357 gcc_assert (num >= 0);
1358 fprintf (stream, "#%d", num);
1359 }
1360 break;
1361
1362 case 'd':
1363 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1364
1365 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1366 break;
1367
1368 case 'N':
1369 if (x == CONST0_RTX (GET_MODE (x)))
1370 {
1371 fprintf ((stream), "r63");
1372 break;
1373 }
1374 goto default_output;
1375 case 'u':
1376 if (CONST_INT_P (x))
1377 {
1378 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1379 break;
1380 }
1381 /* Fall through. */
1382
1383 default_output:
1384 default:
1385 regno = 0;
1386 mode = GET_MODE (x);
1387
1388 switch (GET_CODE (x))
1389 {
1390 case TRUNCATE:
1391 {
1392 rtx inner = XEXP (x, 0);
1393 int offset = 0;
1394 machine_mode inner_mode;
1395
1396 /* We might see SUBREGs with vector mode registers inside. */
1397 if (GET_CODE (inner) == SUBREG
1398 && (GET_MODE_SIZE (GET_MODE (inner))
1399 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1400 && subreg_lowpart_p (inner))
1401 inner = SUBREG_REG (inner);
1402 if (CONST_INT_P (inner))
1403 {
1404 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1405 goto default_output;
1406 }
1407 inner_mode = GET_MODE (inner);
1408 if (GET_CODE (inner) == SUBREG
1409 && (GET_MODE_SIZE (GET_MODE (inner))
1410 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1411 && REG_P (SUBREG_REG (inner)))
1412 {
1413 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1414 GET_MODE (SUBREG_REG (inner)),
1415 SUBREG_BYTE (inner),
1416 GET_MODE (inner));
1417 inner = SUBREG_REG (inner);
1418 }
1419 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1420 abort ();
1421 /* Floating point register pairs are always big endian;
1422 general purpose registers are 64 bit wide. */
1423 regno = REGNO (inner);
1424 regno = (hard_regno_nregs (regno, inner_mode)
1425 - hard_regno_nregs (regno, mode))
1426 + offset;
1427 x = inner;
1428 goto reg;
1429 }
1430 case SIGN_EXTEND:
1431 x = XEXP (x, 0);
1432 goto reg;
1433 case SUBREG:
1434 gcc_assert (SUBREG_BYTE (x) == 0
1435 && REG_P (SUBREG_REG (x)));
1436
1437 x = SUBREG_REG (x);
1438 /* Fall through. */
1439
1440 reg:
1441 case REG:
1442 regno += REGNO (x);
1443 if (FP_REGISTER_P (regno)
1444 && mode == V16SFmode)
1445 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1446 else if (FP_REGISTER_P (REGNO (x))
1447 && mode == V4SFmode)
1448 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1449 else if (REG_P (x)
1450 && mode == V2SFmode)
1451 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1452 else if (FP_REGISTER_P (REGNO (x))
1453 && GET_MODE_SIZE (mode) > 4)
1454 fprintf ((stream), "d%s", reg_names[regno] + 1);
1455 else
1456 fputs (reg_names[regno], (stream));
1457 break;
1458
1459 case MEM:
1460 output_address (GET_MODE (x), XEXP (x, 0));
1461 break;
1462
1463 default:
1464 fputc ('#', stream);
1465 output_addr_const (stream, x);
1466 break;
1467 }
1468 break;
1469 }
1470 }
1471
1472 static bool
sh_print_operand_punct_valid_p(unsigned char code)1473 sh_print_operand_punct_valid_p (unsigned char code)
1474 {
1475 return (code == '.' || code == '#' || code == '@' || code == ','
1476 || code == '$' || code == '\'' || code == '>');
1477 }
1478
1479 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1480 static bool
sh_asm_output_addr_const_extra(FILE * file,rtx x)1481 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1482 {
1483 if (GET_CODE (x) == UNSPEC)
1484 {
1485 switch (XINT (x, 1))
1486 {
1487 case UNSPEC_PIC:
1488 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1489 output_addr_const (file, XVECEXP (x, 0, 0));
1490 break;
1491 case UNSPEC_GOT:
1492 output_addr_const (file, XVECEXP (x, 0, 0));
1493 fputs ("@GOT", file);
1494 break;
1495 case UNSPEC_GOTOFF:
1496 output_addr_const (file, XVECEXP (x, 0, 0));
1497 fputs ("@GOTOFF", file);
1498 break;
1499 case UNSPEC_PLT:
1500 output_addr_const (file, XVECEXP (x, 0, 0));
1501 fputs ("@PLT", file);
1502 break;
1503 case UNSPEC_GOTPLT:
1504 output_addr_const (file, XVECEXP (x, 0, 0));
1505 fputs ("@GOTPLT", file);
1506 break;
1507 case UNSPEC_PCREL:
1508 output_addr_const (file, XVECEXP (x, 0, 0));
1509 fputs ("@PCREL", file);
1510 break;
1511 case UNSPEC_DTPOFF:
1512 output_addr_const (file, XVECEXP (x, 0, 0));
1513 fputs ("@DTPOFF", file);
1514 break;
1515 case UNSPEC_GOTTPOFF:
1516 output_addr_const (file, XVECEXP (x, 0, 0));
1517 fputs ("@GOTTPOFF", file);
1518 break;
1519 case UNSPEC_TPOFF:
1520 output_addr_const (file, XVECEXP (x, 0, 0));
1521 fputs ("@TPOFF", file);
1522 break;
1523 case UNSPEC_CALLER:
1524 {
1525 char name[32];
1526 /* LPCS stands for Label for PIC Call Site. */
1527 targetm.asm_out.generate_internal_label (name, "LPCS",
1528 INTVAL (XVECEXP (x, 0, 0)));
1529 assemble_name (file, name);
1530 }
1531 break;
1532 case UNSPEC_SYMOFF:
1533 output_addr_const (file, XVECEXP (x, 0, 0));
1534 fputc ('-', file);
1535 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1536 {
1537 fputc ('(', file);
1538 output_addr_const (file, XVECEXP (x, 0, 1));
1539 fputc (')', file);
1540 }
1541 else
1542 output_addr_const (file, XVECEXP (x, 0, 1));
1543 break;
1544 case UNSPEC_PCREL_SYMOFF:
1545 output_addr_const (file, XVECEXP (x, 0, 0));
1546 fputs ("-(", file);
1547 output_addr_const (file, XVECEXP (x, 0, 1));
1548 fputs ("-.)", file);
1549 break;
1550 case UNSPEC_GOTFUNCDESC:
1551 output_addr_const (file, XVECEXP (x, 0, 0));
1552 fputs ("@GOTFUNCDESC", file);
1553 break;
1554 case UNSPEC_GOTOFFFUNCDESC:
1555 output_addr_const (file, XVECEXP (x, 0, 0));
1556 fputs ("@GOTOFFFUNCDESC", file);
1557 break;
1558 default:
1559 return false;
1560 }
1561 return true;
1562 }
1563 else
1564 return false;
1565 }
1566
1567 /* Encode symbol attributes of a SYMBOL_REF into its
1568 SYMBOL_REF_FLAGS. */
1569 static void
sh_encode_section_info(tree decl,rtx rtl,int first)1570 sh_encode_section_info (tree decl, rtx rtl, int first)
1571 {
1572 default_encode_section_info (decl, rtl, first);
1573
1574 if (TREE_CODE (decl) == FUNCTION_DECL
1575 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1576 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1577 }
1578
1579 /* Prepare operands for a move define_expand; specifically, one of the
1580 operands must be in a register. */
1581 void
prepare_move_operands(rtx operands[],machine_mode mode)1582 prepare_move_operands (rtx operands[], machine_mode mode)
1583 {
1584 if ((mode == SImode || mode == DImode)
1585 && flag_pic
1586 && ! ((mode == Pmode || mode == ptr_mode)
1587 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1588 {
1589 rtx temp;
1590 if (SYMBOLIC_CONST_P (operands[1]))
1591 {
1592 if (MEM_P (operands[0]))
1593 operands[1] = force_reg (Pmode, operands[1]);
1594 else
1595 {
1596 temp = (!can_create_pseudo_p ()
1597 ? operands[0]
1598 : gen_reg_rtx (Pmode));
1599 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1600 }
1601 }
1602 else if (GET_CODE (operands[1]) == CONST
1603 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1604 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1605 {
1606 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1607 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1608 mode, temp);
1609 operands[1] = expand_binop (mode, add_optab, temp,
1610 XEXP (XEXP (operands[1], 0), 1),
1611 (!can_create_pseudo_p ()
1612 ? temp
1613 : gen_reg_rtx (Pmode)),
1614 0, OPTAB_LIB_WIDEN);
1615 }
1616 }
1617
1618 if (! reload_in_progress && ! reload_completed)
1619 {
1620 /* Copy the source to a register if both operands aren't registers. */
1621 if (! register_operand (operands[0], mode)
1622 && ! register_operand (operands[1], mode))
1623 operands[1] = copy_to_mode_reg (mode, operands[1]);
1624
1625 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1626 {
1627 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1628 except that we can't use that function because it is static. */
1629 rtx new_rtx = change_address (operands[0], mode, 0);
1630 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1631 operands[0] = new_rtx;
1632 }
1633
1634 /* This case can happen while generating code to move the result
1635 of a library call to the target. Reject `st r0,@(rX,rY)' because
1636 reload will fail to find a spill register for rX, since r0 is already
1637 being used for the source. */
1638 else if (refers_to_regno_p (R0_REG, operands[1])
1639 && MEM_P (operands[0])
1640 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1641 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1642 operands[1] = copy_to_mode_reg (mode, operands[1]);
1643
1644 /* When the displacement addressing is used, RA will assign r0 to
1645 the pseudo register operand for the QI/HImode load/store.
1646 This tends to make a long live range for R0 and might cause
1647 anomalous register spills in some case with LRA. See PR
1648 target/55212.
1649 We split possible load/store to two move insns via r0 so as to
1650 shorten R0 live range. It will make some codes worse but will
1651 win on average for LRA.
1652 Also when base+index addressing is used and the index term is
1653 a subreg, LRA assumes that more hard registers can be available
1654 in some situation. It isn't the case for SH in the problematic
1655 case. We can pre-allocate R0 for that index term to avoid
1656 the issue. See PR target/66591. */
1657 else if (sh_lra_p ()
1658 && ! TARGET_SH2A
1659 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1660 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1661 {
1662 bool load_p = REG_P (operands[0]);
1663 rtx reg = operands[load_p ? 0 : 1];
1664 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1665
1666 if ((mode == QImode || mode == HImode)
1667 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1668 && GET_CODE (adr) == PLUS
1669 && REG_P (XEXP (adr, 0))
1670 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1671 && CONST_INT_P (XEXP (adr, 1))
1672 && INTVAL (XEXP (adr, 1)) != 0
1673 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1674 {
1675 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1676 emit_move_insn (r0_rtx, operands[1]);
1677 operands[1] = r0_rtx;
1678 }
1679 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1680 && GET_CODE (adr) == PLUS
1681 && REG_P (XEXP (adr, 0))
1682 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1683 && SUBREG_P (XEXP (adr, 1))
1684 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1685 {
1686 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1687 emit_move_insn (r0_rtx, XEXP (adr, 1));
1688 XEXP (adr, 1) = r0_rtx;
1689 }
1690 }
1691 }
1692
1693 if (mode == Pmode || mode == ptr_mode)
1694 {
1695 rtx op0 = operands[0];
1696 rtx op1 = operands[1];
1697 rtx opc;
1698 if (GET_CODE (op1) == CONST
1699 && GET_CODE (XEXP (op1, 0)) == PLUS
1700 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1701 != TLS_MODEL_NONE))
1702 {
1703 opc = XEXP (XEXP (op1, 0), 1);
1704 op1 = XEXP (XEXP (op1, 0), 0);
1705 }
1706 else
1707 opc = NULL_RTX;
1708
1709 enum tls_model tls_kind;
1710
1711 if (! reload_in_progress && ! reload_completed
1712 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1713 {
1714 rtx tga_op1, tga_ret, tmp, tmp2;
1715
1716 if (! flag_pic
1717 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1718 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1719 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1720 {
1721 static int got_labelno;
1722 /* Don't schedule insns for getting GOT address when
1723 the first scheduling is enabled, to avoid spill
1724 failures for R0. */
1725 if (flag_schedule_insns)
1726 emit_insn (gen_blockage ());
1727 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1728 emit_use (gen_rtx_REG (SImode, PIC_REG));
1729 if (flag_schedule_insns)
1730 emit_insn (gen_blockage ());
1731 }
1732
1733 switch (tls_kind)
1734 {
1735 case TLS_MODEL_GLOBAL_DYNAMIC:
1736 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1737 if (TARGET_FDPIC)
1738 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1739 sh_get_fdpic_reg_initial_val ());
1740 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1741 tmp = gen_reg_rtx (Pmode);
1742 emit_move_insn (tmp, tga_ret);
1743 op1 = tmp;
1744 break;
1745
1746 case TLS_MODEL_LOCAL_DYNAMIC:
1747 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1748 if (TARGET_FDPIC)
1749 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1750 sh_get_fdpic_reg_initial_val ());
1751 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1752
1753 tmp = gen_reg_rtx (Pmode);
1754 emit_move_insn (tmp, tga_ret);
1755
1756 if (register_operand (op0, Pmode))
1757 tmp2 = op0;
1758 else
1759 tmp2 = gen_reg_rtx (Pmode);
1760
1761 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1762 op1 = tmp2;
1763 break;
1764
1765 case TLS_MODEL_INITIAL_EXEC:
1766 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1767 tmp = gen_sym2GOTTPOFF (op1);
1768 if (TARGET_FDPIC)
1769 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1770 sh_get_fdpic_reg_initial_val ());
1771 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1772 op1 = tga_op1;
1773 break;
1774
1775 case TLS_MODEL_LOCAL_EXEC:
1776 tmp2 = gen_reg_rtx (Pmode);
1777 emit_insn (gen_store_gbr (tmp2));
1778 tmp = gen_reg_rtx (Pmode);
1779 emit_insn (gen_symTPOFF2reg (tmp, op1));
1780
1781 if (register_operand (op0, Pmode))
1782 op1 = op0;
1783 else
1784 op1 = gen_reg_rtx (Pmode);
1785
1786 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1787 break;
1788
1789 default:
1790 gcc_unreachable ();
1791 }
1792 if (opc)
1793 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1794 operands[1] = op1;
1795 }
1796 }
1797
1798 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
1799 {
1800 rtx base, offset;
1801 split_const (operands[1], &base, &offset);
1802
1803 if (GET_CODE (base) == SYMBOL_REF
1804 && !offset_within_block_p (base, INTVAL (offset)))
1805 {
1806 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
1807 emit_move_insn (tmp, base);
1808 if (!arith_operand (offset, mode))
1809 offset = force_reg (mode, offset);
1810 emit_insn (gen_add3_insn (operands[0], tmp, offset));
1811 }
1812 }
1813 }
1814
1815 /* Implement the canonicalize_comparison target hook for the combine
1816 pass. For the target hook this function is invoked via
1817 sh_canonicalize_comparison. This function is also re-used to
1818 canonicalize comparisons in cbranch pattern expanders. */
1819 static void
sh_canonicalize_comparison(enum rtx_code & cmp,rtx & op0,rtx & op1,machine_mode mode,bool op0_preserve_value)1820 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1821 machine_mode mode,
1822 bool op0_preserve_value)
1823 {
1824 /* When invoked from within the combine pass the mode is not specified,
1825 so try to get it from one of the operands. */
1826 if (mode == VOIDmode)
1827 mode = GET_MODE (op0);
1828 if (mode == VOIDmode)
1829 mode = GET_MODE (op1);
1830
1831 // We need to have a mode to do something useful here.
1832 if (mode == VOIDmode)
1833 return;
1834
1835 // Currently, we don't deal with floats here.
1836 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1837 return;
1838
1839 // Make sure that the constant operand is the second operand.
1840 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1841 {
1842 if (op0_preserve_value)
1843 return;
1844
1845 std::swap (op0, op1);
1846 cmp = swap_condition (cmp);
1847 }
1848
1849 if (CONST_INT_P (op1))
1850 {
1851 /* Try to adjust the constant operand in such a way that available
1852 comparison insns can be utilized better and the constant can be
1853 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1854 constant pool. */
1855 const HOST_WIDE_INT val = INTVAL (op1);
1856
1857 /* x > -1 --> x >= 0
1858 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1859 x <= -1 --> x < 0
1860 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1861 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1862 {
1863 cmp = cmp == GT ? GE : LT;
1864 op1 = gen_int_mode (val + 1, mode);
1865 }
1866
1867 /* x >= 1 --> x > 0
1868 x >= 0x80 --> x > 0x7F
1869 x < 1 --> x <= 0
1870 x < 0x80 --> x <= 0x7F */
1871 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1872 {
1873 cmp = cmp == GE ? GT : LE;
1874 op1 = gen_int_mode (val - 1, mode);
1875 }
1876
1877 /* unsigned x >= 1 --> x != 0
1878 unsigned x < 1 --> x == 0 */
1879 else if (val == 1 && (cmp == GEU || cmp == LTU))
1880 {
1881 cmp = cmp == GEU ? NE : EQ;
1882 op1 = CONST0_RTX (mode);
1883 }
1884
1885 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1886 unsigned x < 0x80 --> unsigned x < 0x7F */
1887 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1888 {
1889 cmp = cmp == GEU ? GTU : LEU;
1890 op1 = gen_int_mode (val - 1, mode);
1891 }
1892
1893 /* unsigned x > 0 --> x != 0
1894 unsigned x <= 0 --> x == 0 */
1895 else if (val == 0 && (cmp == GTU || cmp == LEU))
1896 cmp = cmp == GTU ? NE : EQ;
1897
1898 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1899 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1900 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1901 && val == 0x7FFFFFFF)
1902 {
1903 cmp = cmp == GTU ? LT : GE;
1904 op1 = const0_rtx;
1905 }
1906
1907 /* unsigned x >= 0x80000000 --> signed x < 0
1908 unsigned x < 0x80000000 --> signed x >= 0 */
1909 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1910 && (unsigned HOST_WIDE_INT)val
1911 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1912 {
1913 cmp = cmp == GEU ? LT : GE;
1914 op1 = const0_rtx;
1915 }
1916 }
1917 }
1918
1919 /* This function implements the canonicalize_comparison target hook.
1920 This wrapper around the internally used sh_canonicalize_comparison
1921 function is needed to do the enum rtx_code <-> int conversion.
1922 Target hooks cannot use enum rtx_code in its definition. */
1923 static void
sh_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)1924 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1925 bool op0_preserve_value)
1926 {
1927 enum rtx_code tmp_code = (enum rtx_code)*code;
1928 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1929 VOIDmode, op0_preserve_value);
1930 *code = (int)tmp_code;
1931 }
1932
1933 /* This function implements the legitimate_combined_insn target hook,
1934 which the combine pass uses to early reject combined insns, before
1935 it tries to recog the insn and determine its cost. */
1936 static bool
sh_legitimate_combined_insn(rtx_insn * insn)1937 sh_legitimate_combined_insn (rtx_insn* insn)
1938 {
1939 /* Reject combinations of memory loads and zero extensions, as these
1940 interfere with other combine patterns such as zero extracts and bit
1941 tests. The SH2A movu.{b|w} insns are formed later in the
1942 'sh_optimize_extu_exts' pass after combine/split1. */
1943 rtx p = PATTERN (insn);
1944 if (GET_CODE (p) == SET
1945 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
1946 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
1947 && MEM_P (XEXP (XEXP (p, 1), 0)))
1948 return false;
1949
1950 return true;
1951 }
1952
1953 bool
sh_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)1954 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1955 {
1956 *p1 = T_REG;
1957 *p2 = INVALID_REGNUM;
1958 return true;
1959 }
1960
1961 /* Try to calculate the branch distance of a conditional branch in bytes.
1962
1963 FIXME: Because of PR 59189 we can't use the CFG here. Instead just
1964 walk from this insn into the next (fall-through) basic block and see if
1965 we hit the label. */
1966 unsigned int
sh_cbranch_distance(rtx_insn * _cbranch_insn,unsigned int max_dist)1967 sh_cbranch_distance (rtx_insn* _cbranch_insn, unsigned int max_dist)
1968 {
1969 rtx_jump_insn* cbranch_insn = safe_as_a<rtx_jump_insn*> (_cbranch_insn);
1970
1971 if (dump_file)
1972 {
1973 fprintf (dump_file, "sh_cbranch_distance insn = \n");
1974 print_rtl_single (dump_file, cbranch_insn);
1975 }
1976
1977 unsigned int dist = 0;
1978
1979 for (rtx_insn* i = next_nonnote_insn (cbranch_insn);
1980 i != NULL && dist < max_dist; i = next_nonnote_insn (i))
1981 {
1982 const unsigned int i_len = get_attr_length (i);
1983 dist += i_len;
1984
1985 if (dump_file)
1986 fprintf (dump_file, " insn %d length = %u dist = %u\n",
1987 INSN_UID (i), i_len, dist);
1988
1989 if (rtx_code_label* l = dyn_cast<rtx_code_label*> (i))
1990 {
1991 if (l == cbranch_insn->jump_target ())
1992 {
1993 if (dump_file)
1994 fprintf (dump_file, " cbranch dist = %u\n", dist);
1995 return dist;
1996 }
1997 break;
1998 }
1999 }
2000
2001 if (dump_file)
2002 fprintf (dump_file, " cbranch dist = unknown\n");
2003
2004 return unknown_cbranch_distance;
2005 }
2006
2007 enum rtx_code
prepare_cbranch_operands(rtx * operands,machine_mode mode,enum rtx_code comparison)2008 prepare_cbranch_operands (rtx *operands, machine_mode mode,
2009 enum rtx_code comparison)
2010 {
2011 gcc_assert (can_create_pseudo_p ());
2012
2013 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2014 comparison = GET_CODE (operands[0]);
2015
2016 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2017 mode, false);
2018
2019 rtx op1 = operands[1];
2020 operands[1] = force_reg (mode, op1);
2021
2022 /* When we are handling DImode comparisons, we want to keep constants so
2023 that we can optimize the component comparisons; however, memory loads
2024 are better issued as a whole so that they can be scheduled well.
2025 SImode equality comparisons allow I08 constants, but only when they
2026 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2027 into a register, that register might as well be r0, and we allow the
2028 constant. If it is already in a register, this is likely to be
2029 allocated to a different hard register, thus we load the constant into
2030 a register unless it is zero. */
2031 if (!REG_P (operands[2])
2032 && (!CONST_INT_P (operands[2])
2033 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2034 && ((comparison != EQ && comparison != NE)
2035 || (REG_P (op1) && REGNO (op1) != R0_REG)
2036 || !satisfies_constraint_I08 (operands[2])))))
2037 operands[2] = force_reg (mode, operands[2]);
2038
2039 return comparison;
2040 }
2041
2042 static void
expand_cbranchsi4(rtx * operands,enum rtx_code comparison,profile_probability probability)2043 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison,
2044 profile_probability probability)
2045 {
2046 rtx (*branch_expander) (rtx) = gen_branch_true;
2047 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2048 switch (comparison)
2049 {
2050 case NE: case LT: case LE: case LTU: case LEU:
2051 comparison = reverse_condition (comparison);
2052 branch_expander = gen_branch_false;
2053 default: ;
2054 }
2055 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2056 gen_rtx_fmt_ee (comparison, SImode,
2057 operands[1], operands[2])));
2058 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2059 if (probability.initialized_p ())
2060 add_reg_br_prob_note (jump, probability);
2061 }
2062
2063 void
expand_cbranchsi4(rtx * operands,enum rtx_code comparison)2064 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison)
2065 {
2066 expand_cbranchsi4 (operands, comparison,
2067 profile_probability::uninitialized ());
2068 }
2069
2070 /* ??? How should we distribute probabilities when more than one branch
2071 is generated. So far we only have some ad-hoc observations:
2072 - If the operands are random, they are likely to differ in both parts.
2073 - If comparing items in a hash chain, the operands are random or equal;
2074 operation should be EQ or NE.
2075 - If items are searched in an ordered tree from the root, we can expect
2076 the highpart to be unequal about half of the time; operation should be
2077 an inequality comparison, operands non-constant, and overall probability
2078 about 50%. Likewise for quicksort.
2079 - Range checks will be often made against constants. Even if we assume for
2080 simplicity an even distribution of the non-constant operand over a
2081 sub-range here, the same probability could be generated with differently
2082 wide sub-ranges - as long as the ratio of the part of the subrange that
2083 is before the threshold to the part that comes after the threshold stays
2084 the same. Thus, we can't really tell anything here;
2085 assuming random distribution is at least simple.
2086 */
2087 bool
expand_cbranchdi4(rtx * operands,enum rtx_code comparison)2088 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2089 {
2090 enum rtx_code msw_taken, msw_skip, lsw_taken;
2091 rtx_code_label *skip_label = NULL;
2092 rtx op1h, op1l, op2h, op2l;
2093 int num_branches;
2094 profile_probability prob, rev_prob;
2095 profile_probability msw_taken_prob = profile_probability::uninitialized (),
2096 msw_skip_prob = profile_probability::uninitialized (),
2097 lsw_taken_prob = profile_probability::uninitialized ();
2098
2099 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2100 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2101 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2102 op1l = gen_lowpart (SImode, operands[1]);
2103 op2l = gen_lowpart (SImode, operands[2]);
2104 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2105 prob = split_branch_probability;
2106 rev_prob = prob.invert ();
2107 switch (comparison)
2108 {
2109 case EQ:
2110 msw_skip = NE;
2111 lsw_taken = EQ;
2112 if (prob.initialized_p ())
2113 {
2114 /* FIXME: This is not optimal. We do not really know the probablity
2115 that values differ by MCW only, but we should probably distribute
2116 probabilities more evenly. */
2117 msw_skip_prob = rev_prob;
2118 lsw_taken_prob = prob > profile_probability::never ()
2119 ? profile_probability::guessed_always ()
2120 : profile_probability::guessed_never ();
2121 }
2122 break;
2123 case NE:
2124 msw_taken = NE;
2125 msw_taken_prob = prob;
2126 lsw_taken = NE;
2127 lsw_taken_prob = profile_probability::guessed_never ();
2128 break;
2129 case GTU: case GT:
2130 msw_taken = comparison;
2131 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2132 break;
2133 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2134 msw_skip = swap_condition (msw_taken);
2135 lsw_taken = GTU;
2136 break;
2137 case GEU: case GE:
2138 if (op2l == CONST0_RTX (SImode))
2139 msw_taken = comparison;
2140 else
2141 {
2142 msw_taken = comparison == GE ? GT : GTU;
2143 msw_skip = swap_condition (msw_taken);
2144 lsw_taken = GEU;
2145 }
2146 break;
2147 case LTU: case LT:
2148 msw_taken = comparison;
2149 if (op2l == CONST0_RTX (SImode))
2150 break;
2151 msw_skip = swap_condition (msw_taken);
2152 lsw_taken = LTU;
2153 break;
2154 case LEU: case LE:
2155 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2156 msw_taken = comparison;
2157 else
2158 {
2159 lsw_taken = LEU;
2160 if (comparison == LE)
2161 msw_taken = LT;
2162 else if (op2h != CONST0_RTX (SImode))
2163 msw_taken = LTU;
2164 else
2165 {
2166 msw_skip = swap_condition (LTU);
2167 break;
2168 }
2169 msw_skip = swap_condition (msw_taken);
2170 }
2171 break;
2172 default: return false;
2173 }
2174 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2175 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2176 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2177 if (comparison != EQ && comparison != NE && num_branches > 1)
2178 {
2179 if (!CONSTANT_P (operands[2])
2180 && prob.initialized_p ()
2181 && prob.to_reg_br_prob_base () >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2182 && prob.to_reg_br_prob_base () <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2183 {
2184 msw_taken_prob = prob.apply_scale (1, 2);
2185 msw_skip_prob = rev_prob.apply_scale (REG_BR_PROB_BASE,
2186 rev_prob.to_reg_br_prob_base ()
2187 + REG_BR_PROB_BASE);
2188 lsw_taken_prob = prob;
2189 }
2190 else
2191 {
2192 msw_taken_prob = prob;
2193 msw_skip_prob = profile_probability::guessed_always ();
2194 /* ??? If we have a constant op2h, should we use that when
2195 calculating lsw_taken_prob? */
2196 lsw_taken_prob = prob;
2197 }
2198 }
2199 operands[1] = op1h;
2200 operands[2] = op2h;
2201
2202 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2203 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2204 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2205 {
2206 rtx taken_label = operands[3];
2207
2208 /* Operands were possibly modified, but msw_skip doesn't expect this.
2209 Always use the original ones. */
2210 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2211 {
2212 operands[1] = op1h;
2213 operands[2] = op2h;
2214 }
2215
2216 operands[3] = skip_label = gen_label_rtx ();
2217 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2218 operands[3] = taken_label;
2219 }
2220 operands[1] = op1l;
2221 operands[2] = op2l;
2222 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2223 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2224 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2225 emit_label (skip_label);
2226 return true;
2227 }
2228
2229 /* Given an operand, return 1 if the evaluated operand plugged into an
2230 if_then_else will result in a branch_true, 0 if branch_false, or
2231 -1 if neither nor applies. The truth table goes like this:
2232
2233 op | cmpval | code | result
2234 ---------+--------+---------+--------------------
2235 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2236 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2237 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2238 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2239 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2240 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2241 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2242 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2243 int
sh_eval_treg_value(rtx op)2244 sh_eval_treg_value (rtx op)
2245 {
2246 if (t_reg_operand (op, GET_MODE (op)))
2247 return 1;
2248 if (negt_reg_operand (op, GET_MODE (op)))
2249 return 0;
2250
2251 rtx_code code = GET_CODE (op);
2252 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2253 return -1;
2254
2255 int cmpop = code == EQ ? 1 : 0;
2256 int cmpval = INTVAL (XEXP (op, 1));
2257 if (cmpval != 0 && cmpval != 1)
2258 return -1;
2259
2260 int t;
2261 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2262 t = 0;
2263 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2264 t = 1;
2265 else
2266 return -1;
2267
2268 return t ^ (cmpval == cmpop);
2269 }
2270
2271 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2272 of floating-point comparisons. */
2273 static void
sh_emit_set_t_insn(rtx insn,machine_mode mode)2274 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2275 {
2276 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2277 && GET_CODE (insn) != PARALLEL)
2278 {
2279 insn = gen_rtx_PARALLEL (VOIDmode,
2280 gen_rtvec (3, insn,
2281 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2282 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2283 }
2284 emit_insn (insn);
2285 }
2286
2287 /* Prepare the operands for an scc instruction; make sure that the
2288 compare has been done and the result is in T_REG. */
2289 void
sh_emit_scc_to_t(enum rtx_code code,rtx op0,rtx op1)2290 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2291 {
2292 rtx t_reg = get_t_reg_rtx ();
2293 enum rtx_code oldcode = code;
2294
2295 /* First need a compare insn. */
2296 switch (code)
2297 {
2298 case NE:
2299 /* It isn't possible to handle this case. */
2300 gcc_unreachable ();
2301 case LT:
2302 code = GT;
2303 break;
2304 case LE:
2305 code = GE;
2306 break;
2307 case LTU:
2308 code = GTU;
2309 break;
2310 case LEU:
2311 code = GEU;
2312 break;
2313 default:
2314 break;
2315 }
2316 if (code != oldcode)
2317 std::swap (op0, op1);
2318
2319 machine_mode mode = GET_MODE (op0);
2320 if (mode == VOIDmode)
2321 mode = GET_MODE (op1);
2322
2323 op0 = force_reg (mode, op0);
2324 if ((code != EQ && code != NE
2325 && (op1 != const0_rtx
2326 || code == GTU || code == GEU || code == LTU || code == LEU))
2327 || (mode == DImode && op1 != const0_rtx)
2328 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2329 op1 = force_reg (mode, op1);
2330
2331 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2332 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2333 mode);
2334 }
2335
2336 /* Called from the md file, set up the operands of a compare instruction. */
2337 void
sh_emit_compare_and_branch(rtx * operands,machine_mode mode)2338 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2339 {
2340 enum rtx_code code = GET_CODE (operands[0]);
2341 enum rtx_code branch_code;
2342 rtx op0 = operands[1];
2343 rtx op1 = operands[2];
2344 rtx insn;
2345 bool need_ccmpeq = false;
2346
2347 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2348 {
2349 op0 = force_reg (mode, op0);
2350 op1 = force_reg (mode, op1);
2351 }
2352 else
2353 {
2354 if (code != EQ || mode == DImode)
2355 {
2356 /* Force args into regs, since we can't use constants here. */
2357 op0 = force_reg (mode, op0);
2358 if (op1 != const0_rtx || code == GTU || code == GEU)
2359 op1 = force_reg (mode, op1);
2360 }
2361 }
2362
2363 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2364 {
2365 if (code == LT
2366 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2367 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2368 {
2369 std::swap (op0, op1);
2370 code = swap_condition (code);
2371 }
2372
2373 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2374 if (code == GE)
2375 {
2376 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2377 need_ccmpeq = true;
2378 code = GT;
2379 }
2380
2381 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2382 to EQ/GT respectively. */
2383 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2384 }
2385
2386 switch (code)
2387 {
2388 case EQ:
2389 case GT:
2390 case GE:
2391 case GTU:
2392 case GEU:
2393 branch_code = code;
2394 break;
2395 case NE:
2396 case LT:
2397 case LE:
2398 case LTU:
2399 case LEU:
2400 branch_code = reverse_condition (code);
2401 break;
2402 default:
2403 gcc_unreachable ();
2404 }
2405
2406 insn = gen_rtx_SET (get_t_reg_rtx (),
2407 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2408
2409 sh_emit_set_t_insn (insn, mode);
2410 if (need_ccmpeq)
2411 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2412
2413 if (branch_code == code)
2414 emit_jump_insn (gen_branch_true (operands[3]));
2415 else
2416 emit_jump_insn (gen_branch_false (operands[3]));
2417 }
2418
2419 void
sh_emit_compare_and_set(rtx * operands,machine_mode mode)2420 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2421 {
2422 enum rtx_code code = GET_CODE (operands[1]);
2423 rtx op0 = operands[2];
2424 rtx op1 = operands[3];
2425 rtx_code_label *lab = NULL;
2426 bool invert = false;
2427
2428 op0 = force_reg (mode, op0);
2429 if ((code != EQ && code != NE
2430 && (op1 != const0_rtx
2431 || code == GTU || code == GEU || code == LTU || code == LEU))
2432 || (mode == DImode && op1 != const0_rtx)
2433 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2434 op1 = force_reg (mode, op1);
2435
2436 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2437 {
2438 if (code == LT || code == LE)
2439 {
2440 std::swap (op0, op1);
2441 code = swap_condition (code);
2442 }
2443 if (code == GE)
2444 {
2445 if (TARGET_IEEE)
2446 {
2447 lab = gen_label_rtx ();
2448 sh_emit_scc_to_t (EQ, op0, op1);
2449 emit_jump_insn (gen_branch_true (lab));
2450 code = GT;
2451 }
2452 else
2453 {
2454 code = LT;
2455 invert = true;
2456 }
2457 }
2458 }
2459
2460 if (code == NE)
2461 {
2462 code = EQ;
2463 invert = true;
2464 }
2465
2466 sh_emit_scc_to_t (code, op0, op1);
2467 if (lab)
2468 emit_label (lab);
2469 if (invert)
2470 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2471 else
2472 emit_move_insn (operands[0], get_t_reg_rtx ());
2473 }
2474
2475 /* Functions to output assembly code. */
2476
2477 /* Return a sequence of instructions to perform DI or DF move.
2478
2479 Since the SH cannot move a DI or DF in one instruction, we have
2480 to take care when we see overlapping source and dest registers. */
2481 const char *
output_movedouble(rtx insn ATTRIBUTE_UNUSED,rtx operands[],machine_mode mode)2482 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2483 machine_mode mode)
2484 {
2485 rtx dst = operands[0];
2486 rtx src = operands[1];
2487
2488 if (MEM_P (dst)
2489 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2490 return "mov.l %T1,%0" "\n"
2491 " mov.l %1,%0";
2492
2493 if (register_operand (dst, mode)
2494 && register_operand (src, mode))
2495 {
2496 if (REGNO (src) == MACH_REG)
2497 return "sts mach,%S0" "\n"
2498 " sts macl,%R0";
2499
2500 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2501 when mov.d r1,r0 do r1->r0 then r2->r1. */
2502 if (REGNO (src) + 1 == REGNO (dst))
2503 return "mov %T1,%T0" "\n"
2504 " mov %1,%0";
2505 else
2506 return "mov %1,%0" "\n"
2507 " mov %T1,%T0";
2508 }
2509 else if (CONST_INT_P (src))
2510 {
2511 if (INTVAL (src) < 0)
2512 output_asm_insn ("mov #-1,%S0", operands);
2513 else
2514 output_asm_insn ("mov #0,%S0", operands);
2515
2516 return "mov %1,%R0";
2517 }
2518 else if (MEM_P (src))
2519 {
2520 int ptrreg = -1;
2521 int dreg = REGNO (dst);
2522 rtx inside = XEXP (src, 0);
2523
2524 switch (GET_CODE (inside))
2525 {
2526 case REG:
2527 ptrreg = REGNO (inside);
2528 break;
2529
2530 case SUBREG:
2531 ptrreg = subreg_regno (inside);
2532 break;
2533
2534 case PLUS:
2535 ptrreg = REGNO (XEXP (inside, 0));
2536 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2537 an offsettable address. Unfortunately, offsettable addresses use
2538 QImode to check the offset, and a QImode offsettable address
2539 requires r0 for the other operand, which is not currently
2540 supported, so we can't use the 'o' constraint.
2541 Thus we must check for and handle r0+REG addresses here.
2542 We punt for now, since this is likely very rare. */
2543 gcc_assert (!REG_P (XEXP (inside, 1)));
2544 break;
2545
2546 case LABEL_REF:
2547 return "mov.l %1,%0" "\n"
2548 " mov.l %1+4,%T0";
2549 case POST_INC:
2550 return "mov.l %1,%0" "\n"
2551 " mov.l %1,%T0";
2552 default:
2553 gcc_unreachable ();
2554 }
2555
2556 /* Work out the safe way to copy. Copy into the second half first. */
2557 if (dreg == ptrreg)
2558 return "mov.l %T1,%T0" "\n"
2559 " mov.l %1,%0";
2560 }
2561
2562 return "mov.l %1,%0" "\n"
2563 " mov.l %T1,%T0";
2564 }
2565
2566 /* Print an instruction which would have gone into a delay slot after
2567 another instruction, but couldn't because the other instruction expanded
2568 into a sequence where putting the slot insn at the end wouldn't work. */
2569 static void
print_slot(rtx_sequence * seq)2570 print_slot (rtx_sequence *seq)
2571 {
2572 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2573
2574 seq->insn (1)->set_deleted ();
2575 }
2576
2577 const char *
output_far_jump(rtx_insn * insn,rtx op)2578 output_far_jump (rtx_insn *insn, rtx op)
2579 {
2580 struct { rtx lab, reg, op; } this_jmp;
2581 rtx_code_label *braf_base_lab = NULL;
2582 const char *jump;
2583 int far;
2584 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2585 rtx_insn *prev;
2586
2587 this_jmp.lab = gen_label_rtx ();
2588
2589 if (TARGET_SH2
2590 && offset >= -32764
2591 && offset - get_attr_length (insn) <= 32766
2592 && ! CROSSING_JUMP_P (insn))
2593 {
2594 far = 0;
2595 jump = "mov.w %O0,%1" "\n"
2596 " braf %1";
2597 }
2598 else
2599 {
2600 far = 1;
2601 if (flag_pic)
2602 {
2603 if (TARGET_SH2)
2604 jump = "mov.l %O0,%1" "\n"
2605 " braf %1";
2606 else
2607 jump = "mov.l r0,@-r15" "\n"
2608 " mova %O0,r0" "\n"
2609 " mov.l @r0,%1" "\n"
2610 " add r0,%1" "\n"
2611 " mov.l @r15+,r0" "\n"
2612 " jmp @%1";
2613 }
2614 else
2615 jump = "mov.l %O0,%1" "\n"
2616 " jmp @%1";
2617 }
2618 /* If we have a scratch register available, use it. */
2619 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2620 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2621 {
2622 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2623 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2624 jump = "mov.l r1,@-r15" "\n"
2625 " mova %O0,r0" "\n"
2626 " mov.l @r0,r1" "\n"
2627 " add r1,r0" "\n"
2628 " mov.l @r15+,r1" "\n"
2629 " jmp @%1";
2630 output_asm_insn (jump, &this_jmp.lab);
2631 if (dbr_sequence_length ())
2632 print_slot (final_sequence);
2633 else
2634 output_asm_insn ("nop", 0);
2635 }
2636 else
2637 {
2638 /* Output the delay slot insn first if any. */
2639 if (dbr_sequence_length ())
2640 print_slot (final_sequence);
2641
2642 this_jmp.reg = gen_rtx_REG (SImode, 13);
2643 output_asm_insn ("mov.l r13,@-r15", 0);
2644 output_asm_insn (jump, &this_jmp.lab);
2645 output_asm_insn ("mov.l @r15+,r13", 0);
2646 }
2647 if (far && flag_pic && TARGET_SH2)
2648 {
2649 braf_base_lab = gen_label_rtx ();
2650 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2651 CODE_LABEL_NUMBER (braf_base_lab));
2652 }
2653 if (far)
2654 output_asm_insn (".align 2", 0);
2655 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2656 this_jmp.op = op;
2657 if (far && flag_pic)
2658 {
2659 if (TARGET_SH2)
2660 this_jmp.lab = braf_base_lab;
2661 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2662 }
2663 else
2664 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2665 return "";
2666 }
2667
2668 /* Local label counter, used for constants in the pool and inside
2669 pattern branches. */
2670 static int lf = 100;
2671
2672 /* Output code for ordinary branches. */
2673 const char *
output_branch(int logic,rtx_insn * insn,rtx * operands)2674 output_branch (int logic, rtx_insn *insn, rtx *operands)
2675 {
2676 switch (get_attr_length (insn))
2677 {
2678 case 6:
2679 /* This can happen if filling the delay slot has caused a forward
2680 branch to exceed its range (we could reverse it, but only
2681 when we know we won't overextend other branches; this should
2682 best be handled by relaxation).
2683 It can also happen when other condbranches hoist delay slot insn
2684 from their destination, thus leading to code size increase.
2685 But the branch will still be in the range -4092..+4098 bytes. */
2686 if (! TARGET_RELAX)
2687 {
2688 int label = lf++;
2689 /* The call to print_slot will clobber the operands. */
2690 rtx op0 = operands[0];
2691
2692 /* If the instruction in the delay slot is annulled (true), then
2693 there is no delay slot where we can put it now. The only safe
2694 place for it is after the label. final will do that by default. */
2695
2696 if (final_sequence
2697 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2698 && get_attr_length (final_sequence->insn (1)))
2699 {
2700 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2701 ASSEMBLER_DIALECT ? "/" : ".", label);
2702 print_slot (final_sequence);
2703 }
2704 else
2705 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2706
2707 output_asm_insn ("bra\t%l0", &op0);
2708 fprintf (asm_out_file, "\tnop\n");
2709 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2710
2711 return "";
2712 }
2713 /* FALLTHRU */
2714 /* When relaxing, handle this like a short branch. The linker
2715 will fix it up if it still doesn't fit after relaxation. */
2716 case 2:
2717 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2718
2719 /* These are for SH2e, in which we have to account for the
2720 extra nop because of the hardware bug in annulled branches. */
2721 case 8:
2722 if (! TARGET_RELAX)
2723 {
2724 int label = lf++;
2725
2726 gcc_assert (!final_sequence
2727 || !(INSN_ANNULLED_BRANCH_P
2728 (XVECEXP (final_sequence, 0, 0))));
2729 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2730 logic ? "f" : "t",
2731 ASSEMBLER_DIALECT ? "/" : ".", label);
2732 fprintf (asm_out_file, "\tnop\n");
2733 output_asm_insn ("bra\t%l0", operands);
2734 fprintf (asm_out_file, "\tnop\n");
2735 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2736
2737 return "";
2738 }
2739 /* FALLTHRU */
2740 case 4:
2741 {
2742 char buffer[10];
2743
2744 sprintf (buffer, "b%s%ss\t%%l0",
2745 logic ? "t" : "f",
2746 ASSEMBLER_DIALECT ? "/" : ".");
2747 output_asm_insn (buffer, &operands[0]);
2748 return "nop";
2749 }
2750
2751 default:
2752 /* There should be no longer branches now - that would
2753 indicate that something has destroyed the branches set
2754 up in machine_dependent_reorg. */
2755 gcc_unreachable ();
2756 }
2757 }
2758
2759 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2760 fill in operands 9 as a label to the successor insn.
2761 We try to use jump threading where possible.
2762 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2763 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2764 follow jmp and bt, if the address is in range. */
2765 const char *
output_branchy_insn(enum rtx_code code,const char * templ,rtx_insn * insn,rtx * operands)2766 output_branchy_insn (enum rtx_code code, const char *templ,
2767 rtx_insn *insn, rtx *operands)
2768 {
2769 rtx_insn *next_insn = NEXT_INSN (insn);
2770
2771 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2772 {
2773 rtx src = SET_SRC (PATTERN (next_insn));
2774 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2775 {
2776 /* Following branch not taken */
2777 rtx_code_label *lab = gen_label_rtx ();
2778 emit_label_after (lab, next_insn);
2779 INSN_ADDRESSES_NEW (lab,
2780 INSN_ADDRESSES (INSN_UID (next_insn))
2781 + get_attr_length (next_insn));
2782 operands[9] = lab;
2783 return templ;
2784 }
2785 else
2786 {
2787 int offset = (branch_dest (next_insn)
2788 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2789 if (offset >= -252 && offset <= 258)
2790 {
2791 if (GET_CODE (src) == IF_THEN_ELSE)
2792 /* branch_true */
2793 src = XEXP (src, 1);
2794 operands[9] = src;
2795 return templ;
2796 }
2797 }
2798 }
2799 rtx_code_label *lab = gen_label_rtx ();
2800 emit_label_after (lab, insn);
2801 INSN_ADDRESSES_NEW (lab,
2802 INSN_ADDRESSES (INSN_UID (insn))
2803 + get_attr_length (insn));
2804 operands[9] = lab;
2805 return templ;
2806 }
2807
2808 const char *
output_ieee_ccmpeq(rtx_insn * insn,rtx * operands)2809 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2810 {
2811 return output_branchy_insn (NE, "bt %l9" "\n"
2812 " fcmp/eq %1,%0",
2813 insn, operands);
2814 }
2815
2816 /* Output the start of the assembler file. */
2817 static void
sh_file_start(void)2818 sh_file_start (void)
2819 {
2820 default_file_start ();
2821
2822 if (TARGET_ELF)
2823 /* We need to show the text section with the proper
2824 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2825 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2826 will complain. We can teach GAS specifically about the
2827 default attributes for our choice of text section, but
2828 then we would have to change GAS again if/when we change
2829 the text section name. */
2830 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2831 else
2832 /* Switch to the data section so that the coffsem symbol
2833 isn't in the text section. */
2834 switch_to_section (data_section);
2835
2836 if (TARGET_LITTLE_ENDIAN)
2837 fputs ("\t.little\n", asm_out_file);
2838 }
2839
2840 /* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions
2841 need to be output as pointers to function descriptors for
2842 FDPIC. */
2843
2844 static bool
sh_assemble_integer(rtx value,unsigned int size,int aligned_p)2845 sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
2846 {
2847 if (TARGET_FDPIC && size == UNITS_PER_WORD
2848 && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value))
2849 {
2850 fputs ("\t.long\t", asm_out_file);
2851 output_addr_const (asm_out_file, value);
2852 fputs ("@FUNCDESC\n", asm_out_file);
2853 return true;
2854 }
2855 return default_assemble_integer (value, size, aligned_p);
2856 }
2857
2858 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2859 static bool
unspec_caller_rtx_p(rtx pat)2860 unspec_caller_rtx_p (rtx pat)
2861 {
2862 rtx base, offset;
2863 split_const (pat, &base, &offset);
2864
2865 if (GET_CODE (base) == UNSPEC)
2866 {
2867 if (XINT (base, 1) == UNSPEC_CALLER)
2868 return true;
2869 for (int i = 0; i < XVECLEN (base, 0); i++)
2870 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2871 return true;
2872 }
2873 return false;
2874 }
2875
2876 /* Indicate that INSN cannot be duplicated. This is true for insn
2877 that generates a unique label. */
2878 static bool
sh_cannot_copy_insn_p(rtx_insn * insn)2879 sh_cannot_copy_insn_p (rtx_insn *insn)
2880 {
2881 if (!reload_completed || !flag_pic)
2882 return false;
2883
2884 if (!NONJUMP_INSN_P (insn))
2885 return false;
2886 if (asm_noperands (insn) >= 0)
2887 return false;
2888
2889 rtx pat = PATTERN (insn);
2890
2891 if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
2892 return false;
2893
2894 if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL)
2895 {
2896 rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
2897 if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0)))
2898 return true;
2899 }
2900
2901 if (GET_CODE (pat) != SET)
2902 return false;
2903 pat = SET_SRC (pat);
2904
2905 if (unspec_caller_rtx_p (pat))
2906 return true;
2907
2908 return false;
2909 }
2910
2911 /* Number of instructions used to make an arithmetic right shift by N. */
2912 static const char ashiftrt_insns[] =
2913 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2914
2915 /* Description of a logical left or right shift, when expanded to a sequence
2916 of 1/2/8/16 shifts.
2917 Notice that one bit right shifts clobber the T bit. One bit left shifts
2918 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2919 enum
2920 {
2921 ASHL_CLOBBERS_T = 1 << 0,
2922 LSHR_CLOBBERS_T = 1 << 1
2923 };
2924
2925 struct ashl_lshr_sequence
2926 {
2927 char insn_count;
2928 signed char amount[6];
2929 char clobbers_t;
2930 };
2931
2932 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2933 {
2934 { 0, { 0 }, 0 }, // 0
2935 { 1, { 1 }, LSHR_CLOBBERS_T },
2936 { 1, { 2 }, 0 },
2937 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2938 { 2, { 2, 2 }, 0 }, // 4
2939 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2940 { 3, { 2, 2, 2 }, 0 },
2941 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2942 { 1, { 8 }, 0 }, // 8
2943 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2944 { 2, { 8, 2 }, 0 },
2945 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2946 { 3, { 8, 2, 2 }, 0 }, // 12
2947 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2948 { 3, { 8, -2, 8 }, 0 },
2949 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2950 { 1, { 16 }, 0 }, // 16
2951 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2952 { 2, { 16, 2 }, 0 },
2953 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2954 { 3, { 16, 2, 2 }, 0 }, // 20
2955 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2956 { 3, { 16, -2, 8 }, 0 },
2957 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2958 { 2, { 16, 8 }, 0 }, // 24
2959 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2960 { 3, { 16, 8, 2 }, 0 },
2961 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2962 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2963 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2964 { 3, { 16, -2, 16 }, 0 },
2965
2966 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2967 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2968 However, the shift-and combiner code needs this entry here to be in
2969 terms of real shift insns. */
2970 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2971 };
2972
2973 /* Individual shift amounts for shift amounts < 16, up to three highmost
2974 bits might be clobbered. This is typically used when combined with some
2975 kind of sign or zero extension. */
2976 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
2977 {
2978 { 0, { 0 }, 0 }, // 0
2979 { 1, { 1 }, LSHR_CLOBBERS_T },
2980 { 1, { 2 }, 0 },
2981 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2982 { 2, { 2, 2 }, 0 }, // 4
2983 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2984 { 2, { 8, -2 }, 0 },
2985 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
2986 { 1, { 8 }, 0 }, // 8
2987 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2988 { 2, { 8, 2 }, 0 },
2989 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2990 { 3, { 8, 2, 2 }, 0 }, // 12
2991 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
2992 { 2, { 16, -2 }, 0 },
2993 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
2994 { 1, { 16 }, 0 }, // 16
2995 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2996 { 2, { 16, 2 }, 0 },
2997 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2998 { 3, { 16, 2, 2 }, 0 }, // 20
2999 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3000 { 3, { 16, -2, 8 }, 0 },
3001 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3002 { 2, { 16, 8 }, 0 }, // 24
3003 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3004 { 3, { 16, 8, 2 }, 0 },
3005 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3006 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3007 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3008 { 3, { 16, -2, 16 }, 0 },
3009 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3010 };
3011
3012 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3013 will clobber the T bit. */
3014 bool
sh_ashlsi_clobbers_t_reg_p(rtx shift_amount)3015 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3016 {
3017 gcc_assert (CONST_INT_P (shift_amount));
3018
3019 const int shift_amount_i = INTVAL (shift_amount) & 31;
3020
3021 /* Special case for shift count of 31: use and-rotl sequence. */
3022 if (shift_amount_i == 31)
3023 return true;
3024
3025 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3026 & ASHL_CLOBBERS_T) != 0;
3027 }
3028
3029 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3030 instructions will clobber the T bit. */
3031 bool
sh_lshrsi_clobbers_t_reg_p(rtx shift_amount)3032 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3033 {
3034 gcc_assert (CONST_INT_P (shift_amount));
3035
3036 /* For right shifts the constant might be negative. */
3037 const int shift_amount_i = std::abs (INTVAL (shift_amount)) & 31;
3038
3039 /* Special case for shift count of 31: use shll-movt sequence. */
3040 if (shift_amount_i == 31)
3041 return true;
3042
3043 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3044 & LSHR_CLOBBERS_T) != 0;
3045 }
3046
3047 /* Return true if it is potentially beneficial to use a dynamic shift
3048 instruction (shad / shar) instead of a combination of 1/2/8/16
3049 shift instructions for the specified shift count.
3050 If dynamic shifts are not available, always return false. */
3051 bool
sh_dynamicalize_shift_p(rtx count)3052 sh_dynamicalize_shift_p (rtx count)
3053 {
3054 gcc_assert (CONST_INT_P (count));
3055
3056 /* For right shifts the constant might be negative. */
3057 const int shift_amount_i = std::abs (INTVAL (count)) & 31;
3058 int insn_count;
3059
3060 /* For left and right shifts, there are shorter 2 insn sequences for
3061 shift amounts of 31. */
3062 if (shift_amount_i == 31)
3063 insn_count = 2;
3064 else
3065 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3066
3067 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3068 }
3069
3070 /* Assuming we have a value that has been sign-extended by at least one bit,
3071 can we use the ext_shift_amounts with the last shift turned to an
3072 arithmetic shift to shift it by N without data loss, and quicker than by
3073 other means? */
3074 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3075
3076 /* Return the cost of a shift. */
3077 static inline int
shiftcosts(rtx x)3078 shiftcosts (rtx x)
3079 {
3080 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3081 {
3082 if (GET_MODE (x) == DImode
3083 && CONST_INT_P (XEXP (x, 1))
3084 && INTVAL (XEXP (x, 1)) == 1)
3085 return 2;
3086
3087 /* Everything else is invalid, because there is no pattern for it. */
3088 return -1;
3089 }
3090 /* If shift by a non constant, then this will be expensive. */
3091 if (!CONST_INT_P (XEXP (x, 1)))
3092 return SH_DYNAMIC_SHIFT_COST;
3093
3094 /* Otherwise, return the true cost in instructions. Cope with out of range
3095 shift counts more or less arbitrarily. */
3096 int value = INTVAL (XEXP (x, 1)) & 31;
3097
3098 if (GET_CODE (x) == ASHIFTRT)
3099 {
3100 int cost = ashiftrt_insns[value];
3101 /* If dynamic shifts are available and profitable in this case, then we
3102 put the constant in a reg and use shad. */
3103 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3104 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3105 return cost;
3106 }
3107 else
3108 return ashl_lshr_seq[value].insn_count;
3109 }
3110
3111 /* Return the cost of an AND/XOR/IOR operation. */
3112 static inline int
and_xor_ior_costs(rtx x,int code)3113 and_xor_ior_costs (rtx x, int code)
3114 {
3115 /* On SH1-4 we have only max. SImode operations.
3116 Double the cost for modes > SImode. */
3117 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3118
3119 /* A logical operation with two registers is a single cycle
3120 instruction. */
3121 if (!CONST_INT_P (XEXP (x, 1)))
3122 return 1 * cost_scale;
3123
3124 int i = INTVAL (XEXP (x, 1));
3125
3126 /* These constants are single cycle extu.[bw] instructions. */
3127 if ((i == 0xff || i == 0xffff) && code == AND)
3128 return 1 * cost_scale;
3129 /* Constants that can be used in an instruction as an immediate are
3130 a single cycle, but this requires r0, so make it a little more
3131 expensive. */
3132 if (CONST_OK_FOR_K08 (i))
3133 return 2 * cost_scale;
3134 /* Constants that can be loaded with a mov immediate need one more cycle.
3135 This case is probably unnecessary. */
3136 if (CONST_OK_FOR_I08 (i))
3137 return 2 * cost_scale;
3138 /* Any other constant requires an additional 2 cycle pc-relative load.
3139 This case is probably unnecessary. */
3140 return 3 * cost_scale;
3141 }
3142
3143 /* Return the cost of an addition or a subtraction. */
3144 static inline int
addsubcosts(rtx x)3145 addsubcosts (rtx x)
3146 {
3147 if (GET_MODE (x) == SImode)
3148 {
3149 /* The addc or subc patterns will eventually become one or two
3150 instructions. Below are some costs for some of the patterns
3151 which combine would reject because the costs of the individual
3152 insns in the patterns are lower.
3153
3154 FIXME: It would be much easier if we had something like insn cost
3155 attributes and the cost calculation machinery used those attributes
3156 in the first place. This would eliminate redundant recog-like C
3157 code to calculate costs of complex patterns. */
3158 rtx op0 = XEXP (x, 0);
3159 rtx op1 = XEXP (x, 1);
3160
3161 if (GET_CODE (x) == PLUS)
3162 {
3163 if (GET_CODE (op0) == AND
3164 && XEXP (op0, 1) == const1_rtx
3165 && (GET_CODE (op1) == PLUS
3166 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3167 return 1;
3168
3169 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3170 && GET_CODE (op1) == LSHIFTRT
3171 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3172 return 1;
3173 }
3174 /* Let's assume that adding the result of an insns that stores into
3175 the T bit is cheap. */
3176 if (treg_set_expr (op1, SImode))
3177 return 1;
3178 if (treg_set_expr (op0, SImode))
3179 return 1;
3180 }
3181
3182 /* On SH1-4 we have only max. SImode operations.
3183 Double the cost for modes > SImode. */
3184 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3185
3186 /* Adding a register is a single cycle insn. */
3187 if (REG_P (XEXP (x, 1))
3188 || GET_CODE (XEXP (x, 1)) == SUBREG)
3189 return 1 * cost_scale;
3190
3191 /* Likewise for small constants. */
3192 if (CONST_INT_P (XEXP (x, 1))
3193 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3194 return 1 * cost_scale;
3195
3196 /* Any other constant requires a 2 cycle pc-relative load plus an
3197 addition. */
3198 return 3 * cost_scale;
3199 }
3200
3201 /* Return the cost of a multiply. */
3202 static inline int
multcosts(rtx x ATTRIBUTE_UNUSED)3203 multcosts (rtx x ATTRIBUTE_UNUSED)
3204 {
3205 if (sh_multcost >= 0)
3206 return sh_multcost;
3207
3208 if (TARGET_SH2)
3209 {
3210 /* We have a mul insn, so we can never take more than the mul and the
3211 read of the mac reg, but count more because of the latency and extra
3212 reg usage. */
3213 if (optimize_size)
3214 return 2;
3215 return 3;
3216 }
3217
3218 /* If we're aiming at small code, then just count the number of
3219 insns in a multiply call sequence. */
3220 if (optimize_size)
3221 return 5;
3222
3223 /* Otherwise count all the insns in the routine we'd be calling too. */
3224 return 20;
3225 }
3226
3227 /* Compute a (partial) cost for rtx X. Return true if the complete
3228 cost has been computed, and false if subexpressions should be
3229 scanned. In either case, *TOTAL contains the cost result. */
3230 static bool
sh_rtx_costs(rtx x,machine_mode mode ATTRIBUTE_UNUSED,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)3231 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
3232 int opno ATTRIBUTE_UNUSED,
3233 int *total, bool speed ATTRIBUTE_UNUSED)
3234 {
3235 int code = GET_CODE (x);
3236
3237 switch (code)
3238 {
3239 /* The lower-subreg pass decides whether to split multi-word regs
3240 into individual regs by looking at the cost for a SET of certain
3241 modes with the following patterns:
3242 (set (reg) (reg))
3243 (set (reg) (const_int 0))
3244 On machines that support vector-move operations a multi-word move
3245 is the same cost as individual reg move. On SH there is no
3246 vector-move, so we have to provide the correct cost in the number
3247 of move insns to load/store the reg of the mode in question. */
3248 case SET:
3249 if (sh_movt_set_dest (x) != NULL || sh_movrt_set_dest (x) != NULL)
3250 {
3251 *total = COSTS_N_INSNS (1);
3252 return true;
3253 }
3254
3255 if (register_operand (SET_DEST (x), VOIDmode)
3256 && (register_operand (SET_SRC (x), VOIDmode)
3257 || satisfies_constraint_Z (SET_SRC (x))))
3258 {
3259 const machine_mode mode = GET_MODE (SET_DEST (x));
3260 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3261 / mov_insn_size (mode, TARGET_SH2A));
3262 return true;
3263 }
3264 return false;
3265
3266 /* The cost of a mem access is mainly the cost of the address mode. */
3267 case MEM:
3268 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3269 true);
3270 return true;
3271
3272 case IF_THEN_ELSE:
3273 /* This case is required for the if_then_else negc pattern. */
3274 if (treg_set_expr (XEXP (x, 0), SImode))
3275 {
3276 *total = COSTS_N_INSNS (1);
3277 return true;
3278 }
3279 else
3280 return false;
3281
3282 /* Zero extracts of single bits are usually combine patterns for the
3283 tst insns. */
3284 case ZERO_EXTRACT:
3285 if (GET_CODE (XEXP (x, 0)) == XOR
3286 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3287 && XEXP (x, 1) == const1_rtx
3288 && CONST_INT_P (XEXP (x, 2))
3289 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3290 /* Check that the xor constaint overlaps with the extracted bit. */
3291 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3292 {
3293 *total = 1; //COSTS_N_INSNS (1);
3294 return true;
3295 }
3296
3297 /* div0s variant. */
3298 if (GET_CODE (XEXP (x, 0)) == XOR
3299 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3300 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3301 {
3302 *total = 1;
3303 return true;
3304 }
3305 return false;
3306
3307 /* The cost of a sign or zero extend depends on whether the source is a
3308 reg or a mem. In case of a mem take the address into account. */
3309 case SIGN_EXTEND:
3310 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3311 {
3312 *total = COSTS_N_INSNS (1);
3313 return true;
3314 }
3315 if (MEM_P (XEXP (x, 0)))
3316 {
3317 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3318 GET_MODE (XEXP (x, 0)),
3319 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3320 return true;
3321 }
3322 return false;
3323
3324 case ZERO_EXTEND:
3325 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3326 {
3327 *total = COSTS_N_INSNS (1);
3328 return true;
3329 }
3330 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3331 && (GET_MODE (XEXP (x, 0)) == QImode
3332 || GET_MODE (XEXP (x, 0)) == HImode))
3333 {
3334 /* Handle SH2A's movu.b and movu.w insn. */
3335 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3336 GET_MODE (XEXP (x, 0)),
3337 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3338 return true;
3339 }
3340 return false;
3341
3342 /* mems for SFmode and DFmode can be inside a parallel due to
3343 the way the fpscr is handled. */
3344 case PARALLEL:
3345 for (int i = 0; i < XVECLEN (x, 0); i++)
3346 {
3347 rtx xx = XVECEXP (x, 0, i);
3348 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3349 {
3350 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3351 GET_MODE (XEXP (xx, 0)),
3352 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3353 return true;
3354 }
3355 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3356 {
3357 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3358 GET_MODE (XEXP (xx, 1)),
3359 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3360 return true;
3361 }
3362 }
3363
3364 if (sh_1el_vec (x, VOIDmode))
3365 *total = outer_code != SET;
3366 else if (sh_rep_vec (x, VOIDmode))
3367 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3368 + (outer_code != SET));
3369 else
3370 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3371 return true;
3372
3373 case CONST_INT:
3374 if (CONST_OK_FOR_I08 (INTVAL (x)))
3375 *total = 0;
3376 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3377 && CONST_OK_FOR_K08 (INTVAL (x)))
3378 *total = 1;
3379 /* prepare_cmp_insn will force costly constants int registers before
3380 the cbranch[sd]i4 patterns can see them, so preserve potentially
3381 interesting ones not covered by I08 above. */
3382 else if (outer_code == COMPARE
3383 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3384 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3385 || INTVAL (x) == 0x7fffffff
3386 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3387 *total = 1;
3388 else
3389 *total = 8;
3390 return true;
3391
3392 case EQ:
3393 /* An and with a constant compared against zero is
3394 most likely going to be a TST #imm, R0 instruction. */
3395 if (XEXP (x, 1) == const0_rtx
3396 && ((GET_CODE (XEXP (x, 0)) == AND
3397 || (SUBREG_P (XEXP (x, 0))
3398 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND))
3399 || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT))
3400 {
3401 *total = 1;
3402 return true;
3403 }
3404
3405 else if (XEXP (x, 1) == const0_rtx
3406 && GET_CODE (XEXP (x, 0)) == AND
3407 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3408 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3409 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3410 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3411 {
3412 *total = 1;
3413 return true;
3414 }
3415 else
3416 return false;
3417
3418 case SMIN:
3419 case SMAX:
3420 /* This is most likely a clips.b or clips.w insn that is being made up
3421 by combine. */
3422 if (TARGET_SH2A
3423 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3424 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3425 && REG_P (XEXP (XEXP (x, 0), 0))
3426 && CONST_INT_P (XEXP (x, 1)))
3427 {
3428 *total = COSTS_N_INSNS (1);
3429 return true;
3430 }
3431 else
3432 return false;
3433
3434 case CONST:
3435 case LABEL_REF:
3436 case SYMBOL_REF:
3437 *total = 5;
3438 return true;
3439
3440 case CONST_DOUBLE:
3441 /* prepare_cmp_insn will force costly constants int registers before
3442 the cbranchdi4 pattern can see them, so preserve potentially
3443 interesting ones. */
3444 if (outer_code == COMPARE && GET_MODE (x) == DImode)
3445 *total = 1;
3446 else
3447 *total = 10;
3448 return true;
3449
3450 case CONST_VECTOR:
3451 /* FIXME: This looks broken. Only the last statement has any effect.
3452 Probably this could be folded with the PARALLEL case? */
3453 if (x == CONST0_RTX (GET_MODE (x)))
3454 *total = 0;
3455 else if (sh_1el_vec (x, VOIDmode))
3456 *total = outer_code != SET;
3457 if (sh_rep_vec (x, VOIDmode))
3458 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3459 + (outer_code != SET));
3460 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3461 return true;
3462
3463 case PLUS:
3464 case MINUS:
3465 *total = COSTS_N_INSNS (addsubcosts (x));
3466 return true;
3467
3468 case AND:
3469 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3470 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3471 {
3472 *total = COSTS_N_INSNS (1);
3473 return true;
3474 }
3475 /* Fall through. */
3476
3477 case XOR:
3478 case IOR:
3479 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3480 return true;
3481
3482 case MULT:
3483 *total = COSTS_N_INSNS (multcosts (x));
3484 return true;
3485
3486 case LT:
3487 case GE:
3488 /* div0s sign comparison. */
3489 if (GET_CODE (XEXP (x, 0)) == XOR
3490 && REG_P ((XEXP (XEXP (x, 0), 0)))
3491 && REG_P ((XEXP (XEXP (x, 0), 1)))
3492 && satisfies_constraint_Z (XEXP (x, 1)))
3493 {
3494 *total = COSTS_N_INSNS (1);
3495 return true;
3496 }
3497 else
3498 return false;
3499
3500 case LSHIFTRT:
3501 /* div0s sign comparison. */
3502 if (GET_CODE (XEXP (x, 0)) == XOR
3503 && REG_P ((XEXP (XEXP (x, 0), 0)))
3504 && REG_P ((XEXP (XEXP (x, 0), 1)))
3505 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3506 {
3507 *total = COSTS_N_INSNS (1);
3508 return true;
3509 }
3510 /* FALLTHRU */
3511 case ASHIFT:
3512 case ASHIFTRT:
3513 {
3514 int cost = shiftcosts (x);
3515 if (cost < 0)
3516 return false;
3517 *total = COSTS_N_INSNS (cost);
3518 return true;
3519 }
3520
3521 case DIV:
3522 case UDIV:
3523 case MOD:
3524 case UMOD:
3525 *total = COSTS_N_INSNS (20);
3526 return true;
3527
3528 case FLOAT:
3529 case FIX:
3530 *total = 100;
3531 return true;
3532
3533 default:
3534 return false;
3535 }
3536 }
3537
3538 /* Determine the size of the fundamental move insn that will be used
3539 for the specified mode. */
3540 static inline int
mov_insn_size(machine_mode mode,bool consider_sh2a)3541 mov_insn_size (machine_mode mode, bool consider_sh2a)
3542 {
3543 const int mode_sz = GET_MODE_SIZE (mode);
3544
3545 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3546 || (TARGET_FMOVD && mode == DFmode))
3547 return mode_sz;
3548 else
3549 {
3550 /* The max. available mode for actual move insns is SImode.
3551 Larger accesses will be split into multiple loads/stores. */
3552 const int max_mov_sz = GET_MODE_SIZE (SImode);
3553 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3554 }
3555 }
3556
3557 /* Determine the maximum possible displacement for a move insn for the
3558 specified mode. */
3559 int
sh_max_mov_insn_displacement(machine_mode mode,bool consider_sh2a)3560 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3561 {
3562 /* The 4 byte displacement move insns are the same as the 2 byte
3563 versions but take a 12 bit displacement. All we need to do is to
3564 scale the max. displacement value accordingly. */
3565 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3566
3567 /* SH2A supports FPU move insns with 12 bit displacements.
3568 Other variants to do not support any kind of displacements for
3569 FPU move insns. */
3570 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3571 return 0;
3572 else
3573 {
3574 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3575 const int mode_sz = GET_MODE_SIZE (mode);
3576 int r = 15 * mov_insn_sz * disp_scale;
3577
3578 /* If the mov insn will be split into multiple loads/stores, the
3579 maximum possible displacement is a bit smaller. */
3580 if (mode_sz > mov_insn_sz)
3581 r -= mode_sz - mov_insn_sz;
3582 return r;
3583 }
3584 }
3585
3586 /* Determine the alignment mask for a move insn of the
3587 specified mode. */
3588 static inline int
mov_insn_alignment_mask(machine_mode mode,bool consider_sh2a)3589 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3590 {
3591 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3592 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3593 }
3594
3595 /* Return the displacement value of a displacement address. */
3596 HOST_WIDE_INT
sh_disp_addr_displacement(rtx x)3597 sh_disp_addr_displacement (rtx x)
3598 {
3599 gcc_assert (satisfies_constraint_Sdd (x));
3600 return INTVAL (XEXP (XEXP (x, 0), 1));
3601 }
3602
3603 /* Compute the cost of an address. */
3604 static int
sh_address_cost(rtx x,machine_mode mode,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)3605 sh_address_cost (rtx x, machine_mode mode,
3606 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3607 {
3608 /* 'GBR + 0'. Account one more because of R0 restriction. */
3609 if (REG_P (x) && REGNO (x) == GBR_REG)
3610 return 2;
3611
3612 /* Simple reg, post-inc, pre-dec addressing. */
3613 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3614 return 1;
3615
3616 /* 'reg + disp' addressing. */
3617 if (GET_CODE (x) == PLUS
3618 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3619 {
3620 /* 'GBR + disp'. Account one more because of R0 restriction. */
3621 if (REGNO (XEXP (x, 0)) == GBR_REG
3622 && gbr_displacement (XEXP (x, 1), mode))
3623 return 2;
3624
3625 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3626
3627 if (offset == 0)
3628 return 1;
3629
3630 /* The displacement would fit into a 2 byte move insn.
3631 HImode and QImode loads/stores with displacement put pressure on
3632 R0 which will most likely require another reg copy. Thus account
3633 a higher cost for that. */
3634 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3635 return (mode == HImode || mode == QImode) ? 2 : 1;
3636
3637 /* The displacement would fit into a 4 byte move insn (SH2A). */
3638 if (TARGET_SH2A
3639 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3640 return 2;
3641
3642 /* The displacement is probably out of range and will require extra
3643 calculations. */
3644 return 3;
3645 }
3646
3647 /* 'reg + reg' addressing. Account a slightly higher cost because of
3648 increased pressure on R0. */
3649 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1)))
3650 return 3;
3651
3652 /* Not sure what it is - probably expensive. */
3653 return 10;
3654 }
3655
3656 /* Code to expand a shift. */
3657 static void
gen_ashift(int type,int n,rtx reg)3658 gen_ashift (int type, int n, rtx reg)
3659 {
3660 rtx n_rtx;
3661
3662 /* Negative values here come from the shift_amounts array. */
3663 if (n < 0)
3664 {
3665 if (type == ASHIFT)
3666 type = LSHIFTRT;
3667 else
3668 type = ASHIFT;
3669 n = -n;
3670 }
3671
3672 n_rtx = GEN_INT (n);
3673 gcc_assert (satisfies_constraint_P27 (n_rtx));
3674
3675 switch (type)
3676 {
3677 case ASHIFTRT:
3678 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3679 break;
3680 case LSHIFTRT:
3681 if (n == 1)
3682 emit_insn (gen_shlr (reg, reg));
3683 else
3684 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3685 break;
3686 case ASHIFT:
3687 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3688 break;
3689 default:
3690 gcc_unreachable ();
3691 }
3692 }
3693
3694 /* Code to expand a HImode shift. */
3695 static void
gen_ashift_hi(int type,int n,rtx reg)3696 gen_ashift_hi (int type, int n, rtx reg)
3697 {
3698 /* Negative values here come from the shift_amounts array. */
3699 if (n < 0)
3700 {
3701 if (type == ASHIFT)
3702 type = LSHIFTRT;
3703 else
3704 type = ASHIFT;
3705 n = -n;
3706 }
3707
3708 switch (type)
3709 {
3710 case ASHIFTRT:
3711 case LSHIFTRT:
3712 /* We don't have HImode right shift operations because using the
3713 ordinary 32 bit shift instructions for that doesn't generate proper
3714 zero/sign extension.
3715 gen_ashift_hi is only called in contexts where we know that the
3716 sign extension works out correctly. */
3717 {
3718 int offset = 0;
3719 if (GET_CODE (reg) == SUBREG)
3720 {
3721 offset = SUBREG_BYTE (reg);
3722 reg = SUBREG_REG (reg);
3723 }
3724 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3725 break;
3726 }
3727 case ASHIFT:
3728 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3729 break;
3730 }
3731 }
3732
3733 /* Output RTL to split a constant shift into its component SH constant
3734 shift instructions. */
3735 void
gen_shifty_op(int code,rtx * operands)3736 gen_shifty_op (int code, rtx *operands)
3737 {
3738 int value = INTVAL (operands[2]);
3739 int max, i;
3740
3741 /* Truncate the shift count in case it is out of bounds. */
3742 value = value & 31;
3743
3744 if (value == 31)
3745 {
3746 if (code == LSHIFTRT)
3747 {
3748 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3749 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3750 return;
3751 }
3752 else if (code == ASHIFT)
3753 {
3754 /* There is a two instruction sequence for 31 bit left shifts,
3755 but it requires r0. */
3756 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3757 {
3758 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3759 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3760 return;
3761 }
3762 }
3763 }
3764 else if (value == 0)
3765 {
3766 /* This can happen even when optimizing, if there were subregs before
3767 reload. Don't output a nop here, as this is never optimized away;
3768 use a no-op move instead. */
3769 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3770 return;
3771 }
3772
3773 max = ashl_lshr_seq[value].insn_count;
3774 for (i = 0; i < max; i++)
3775 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3776 }
3777
3778 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3779 don't matter. */
3780 void
gen_shifty_hi_op(int code,rtx * operands)3781 gen_shifty_hi_op (int code, rtx *operands)
3782 {
3783 int value = INTVAL (operands[2]);
3784 int max, i;
3785 void (*gen_fun) (int, int, rtx);
3786
3787 /* This operation is used by and_shl for SImode values with a few
3788 high bits known to be cleared. */
3789 value &= 31;
3790 if (value == 0)
3791 {
3792 emit_insn (gen_nop ());
3793 return;
3794 }
3795
3796 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3797 if (code == ASHIFT)
3798 {
3799 max = ext_ashl_lshr_seq[value].insn_count;
3800 for (i = 0; i < max; i++)
3801 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3802 }
3803 else
3804 /* When shifting right, emit the shifts in reverse order, so that
3805 solitary negative values come first. */
3806 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3807 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3808 }
3809
3810 /* Output RTL for an arithmetic right shift.
3811 ??? Rewrite to use super-optimizer sequences. */
3812 bool
expand_ashiftrt(rtx * operands)3813 expand_ashiftrt (rtx *operands)
3814 {
3815 rtx wrk;
3816 char func[18];
3817 int value;
3818
3819 if (TARGET_DYNSHIFT)
3820 {
3821 if (!CONST_INT_P (operands[2]))
3822 {
3823 rtx count = copy_to_mode_reg (SImode, operands[2]);
3824 emit_insn (gen_negsi2 (count, count));
3825 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3826 return true;
3827 }
3828 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3829 > 1 + SH_DYNAMIC_SHIFT_COST)
3830 {
3831 rtx count
3832 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3833 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3834 return true;
3835 }
3836 }
3837 if (!CONST_INT_P (operands[2]))
3838 return false;
3839
3840 value = INTVAL (operands[2]) & 31;
3841
3842 if (value == 31)
3843 {
3844 /* If we are called from abs expansion, arrange things so that we
3845 we can use a single MT instruction that doesn't clobber the source,
3846 if LICM can hoist out the load of the constant zero. */
3847 if (currently_expanding_to_rtl)
3848 {
3849 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3850 operands[1]));
3851 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3852 return true;
3853 }
3854 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3855 return true;
3856 }
3857 else if (value >= 16 && value <= 19)
3858 {
3859 wrk = gen_reg_rtx (SImode);
3860 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3861 value -= 16;
3862 while (value--)
3863 gen_ashift (ASHIFTRT, 1, wrk);
3864 emit_move_insn (operands[0], wrk);
3865 return true;
3866 }
3867 /* Expand a short sequence inline, longer call a magic routine. */
3868 else if (value <= 5)
3869 {
3870 wrk = gen_reg_rtx (SImode);
3871 emit_move_insn (wrk, operands[1]);
3872 while (value--)
3873 gen_ashift (ASHIFTRT, 1, wrk);
3874 emit_move_insn (operands[0], wrk);
3875 return true;
3876 }
3877
3878 wrk = gen_reg_rtx (Pmode);
3879
3880 /* Load the value into an arg reg and call a helper. */
3881 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3882 sprintf (func, "__ashiftrt_r4_%d", value);
3883 rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
3884 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
3885 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3886 return true;
3887 }
3888
3889 /* Try to find a good way to implement the combiner pattern
3890 [(set (match_operand:SI 0 "register_operand" "r")
3891 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3892 (match_operand:SI 2 "const_int_operand" "n"))
3893 (match_operand:SI 3 "const_int_operand" "n"))) .
3894 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3895 return 0 for simple right / left or left/right shift combination.
3896 return 1 for a combination of shifts with zero_extend.
3897 return 2 for a combination of shifts with an AND that needs r0.
3898 return 3 for a combination of shifts with an AND that needs an extra
3899 scratch register, when the three highmost bits of the AND mask are clear.
3900 return 4 for a combination of shifts with an AND that needs an extra
3901 scratch register, when any of the three highmost bits of the AND mask
3902 is set.
3903 If ATTRP is set, store an initial right shift width in ATTRP[0],
3904 and the instruction length in ATTRP[1] . These values are not valid
3905 when returning 0.
3906 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3907 shift_amounts for the last shift value that is to be used before the
3908 sign extend. */
3909 int
shl_and_kind(rtx left_rtx,rtx mask_rtx,int * attrp)3910 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3911 {
3912 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3913 int left = INTVAL (left_rtx), right;
3914 int best = 0;
3915 int cost, best_cost = 10000;
3916 int best_right = 0, best_len = 0;
3917 int i;
3918 int can_ext;
3919
3920 if (left < 0 || left > 31)
3921 return 0;
3922 if (CONST_INT_P (mask_rtx))
3923 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3924 else
3925 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3926 /* Can this be expressed as a right shift / left shift pair? */
3927 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3928 right = exact_log2 (lsb);
3929 mask2 = ~(mask + lsb - 1);
3930 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3931 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3932 if (! mask2)
3933 best_cost = ashl_lshr_seq[right].insn_count
3934 + ashl_lshr_seq[right + left].insn_count;
3935 /* mask has no trailing zeroes <==> ! right */
3936 else if (! right && mask2 == ~(lsb2 - 1))
3937 {
3938 int late_right = exact_log2 (lsb2);
3939 best_cost = ashl_lshr_seq[left + late_right].insn_count
3940 + ashl_lshr_seq[late_right].insn_count;
3941 }
3942 /* Try to use zero extend. */
3943 if (mask2 == ~(lsb2 - 1))
3944 {
3945 int width, first;
3946
3947 for (width = 8; width <= 16; width += 8)
3948 {
3949 /* Can we zero-extend right away? */
3950 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3951 {
3952 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3953 + ext_ashl_lshr_seq[left + right].insn_count;
3954 if (cost < best_cost)
3955 {
3956 best = 1;
3957 best_cost = cost;
3958 best_right = right;
3959 best_len = cost;
3960 if (attrp)
3961 attrp[2] = -1;
3962 }
3963 continue;
3964 }
3965 /* ??? Could try to put zero extend into initial right shift,
3966 or even shift a bit left before the right shift. */
3967 /* Determine value of first part of left shift, to get to the
3968 zero extend cut-off point. */
3969 first = width - exact_log2 (lsb2) + right;
3970 if (first >= 0 && right + left - first >= 0)
3971 {
3972 cost = ext_ashl_lshr_seq[right].insn_count
3973 + ext_ashl_lshr_seq[first].insn_count + 1
3974 + ext_ashl_lshr_seq[right + left - first].insn_count;
3975
3976 if (cost < best_cost)
3977 {
3978 best = 1;
3979 best_cost = cost;
3980 best_right = right;
3981 best_len = cost;
3982 if (attrp)
3983 attrp[2] = first;
3984 }
3985 }
3986 }
3987 }
3988 /* Try to use r0 AND pattern */
3989 for (i = 0; i <= 2; i++)
3990 {
3991 if (i > right)
3992 break;
3993 if (! CONST_OK_FOR_K08 (mask >> i))
3994 continue;
3995 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
3996 if (cost < best_cost)
3997 {
3998 best = 2;
3999 best_cost = cost;
4000 best_right = i;
4001 best_len = cost - 1;
4002 }
4003 }
4004 /* Try to use a scratch register to hold the AND operand. */
4005 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4006 for (i = 0; i <= 2; i++)
4007 {
4008 if (i > right)
4009 break;
4010 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4011 + (can_ext
4012 ? ext_ashl_lshr_seq
4013 : ashl_lshr_seq)[left + i].insn_count;
4014 if (cost < best_cost)
4015 {
4016 best = 4 - can_ext;
4017 best_cost = cost;
4018 best_right = i;
4019 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4020 }
4021 }
4022
4023 if (attrp)
4024 {
4025 attrp[0] = best_right;
4026 attrp[1] = best_len;
4027 }
4028 return best;
4029 }
4030
4031 /* This is used in length attributes of the unnamed instructions
4032 corresponding to shl_and_kind return values of 1 and 2. */
4033 int
shl_and_length(rtx insn)4034 shl_and_length (rtx insn)
4035 {
4036 rtx set_src, left_rtx, mask_rtx;
4037 int attributes[3];
4038
4039 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4040 left_rtx = XEXP (XEXP (set_src, 0), 1);
4041 mask_rtx = XEXP (set_src, 1);
4042 shl_and_kind (left_rtx, mask_rtx, attributes);
4043 return attributes[1];
4044 }
4045
4046 /* This is used in length attribute of the and_shl_scratch instruction. */
4047 int
shl_and_scr_length(rtx insn)4048 shl_and_scr_length (rtx insn)
4049 {
4050 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4051 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4052 rtx op = XEXP (set_src, 0);
4053 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4054 op = XEXP (XEXP (op, 0), 0);
4055 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4056 }
4057
4058 /* Generate rtl for instructions for which shl_and_kind advised a particular
4059 method of generating them, i.e. returned zero. */
4060 bool
gen_shl_and(rtx dest,rtx left_rtx,rtx mask_rtx,rtx source)4061 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4062 {
4063 int attributes[3];
4064 unsigned HOST_WIDE_INT mask;
4065 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4066 int right, total_shift;
4067 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4068
4069 right = attributes[0];
4070 total_shift = INTVAL (left_rtx) + right;
4071 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4072 switch (kind)
4073 {
4074 default:
4075 return true;
4076 case 1:
4077 {
4078 int first = attributes[2];
4079 rtx operands[3];
4080
4081 if (first < 0)
4082 {
4083 emit_insn ((mask << right) <= 0xff
4084 ? gen_zero_extendqisi2 (dest,
4085 gen_lowpart (QImode, source))
4086 : gen_zero_extendhisi2 (dest,
4087 gen_lowpart (HImode, source)));
4088 source = dest;
4089 }
4090 if (source != dest)
4091 emit_insn (gen_movsi (dest, source));
4092 operands[0] = dest;
4093 if (right)
4094 {
4095 operands[2] = GEN_INT (right);
4096 gen_shifty_hi_op (LSHIFTRT, operands);
4097 }
4098 if (first > 0)
4099 {
4100 operands[2] = GEN_INT (first);
4101 gen_shifty_hi_op (ASHIFT, operands);
4102 total_shift -= first;
4103 mask <<= first;
4104 }
4105 if (first >= 0)
4106 emit_insn (mask <= 0xff
4107 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4108 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4109 if (total_shift > 0)
4110 {
4111 operands[2] = GEN_INT (total_shift);
4112 gen_shifty_hi_op (ASHIFT, operands);
4113 }
4114 break;
4115 }
4116 case 4:
4117 shift_gen_fun = gen_shifty_op;
4118 /* FALLTHRU */
4119 case 3:
4120 /* If the topmost bit that matters is set, set the topmost bits
4121 that don't matter. This way, we might be able to get a shorter
4122 signed constant. */
4123 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4124 mask |= (HOST_WIDE_INT) ((HOST_WIDE_INT_M1U) << (31 - total_shift));
4125 /* FALLTHRU */
4126 case 2:
4127 /* Don't expand fine-grained when combining, because that will
4128 make the pattern fail. */
4129 if (currently_expanding_to_rtl
4130 || reload_in_progress || reload_completed)
4131 {
4132 rtx operands[3];
4133
4134 /* Cases 3 and 4 should be handled by this split
4135 only while combining */
4136 gcc_assert (kind <= 2);
4137 if (right)
4138 {
4139 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4140 source = dest;
4141 }
4142 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4143 if (total_shift)
4144 {
4145 operands[0] = dest;
4146 operands[1] = dest;
4147 operands[2] = GEN_INT (total_shift);
4148 shift_gen_fun (ASHIFT, operands);
4149 }
4150 break;
4151 }
4152 else
4153 {
4154 int neg = 0;
4155 if (kind != 4 && total_shift < 16)
4156 {
4157 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4158 if (neg > 0)
4159 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4160 else
4161 neg = 0;
4162 }
4163 emit_insn (gen_and_shl_scratch (dest, source,
4164 GEN_INT (right),
4165 GEN_INT (mask),
4166 GEN_INT (total_shift + neg),
4167 GEN_INT (neg)));
4168 emit_insn (gen_movsi (dest, dest));
4169 break;
4170 }
4171 }
4172 return false;
4173 }
4174
4175 /* Try to find a good way to implement the combiner pattern
4176 [(set (match_operand:SI 0 "register_operand" "=r")
4177 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4178 (match_operand:SI 2 "const_int_operand" "n")
4179 (match_operand:SI 3 "const_int_operand" "n")
4180 (const_int 0)))
4181 (clobber (reg:SI T_REG))]
4182 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4183 return 0 for simple left / right shift combination.
4184 return 1 for left shift / 8 bit sign extend / left shift.
4185 return 2 for left shift / 16 bit sign extend / left shift.
4186 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4187 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4188 return 5 for left shift / 16 bit sign extend / right shift
4189 return 6 for < 8 bit sign extend / left shift.
4190 return 7 for < 8 bit sign extend / left shift / single right shift.
4191 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4192 int
shl_sext_kind(rtx left_rtx,rtx size_rtx,int * costp)4193 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4194 {
4195 int left, size, insize, ext;
4196 int cost = 0, best_cost;
4197 int kind;
4198
4199 left = INTVAL (left_rtx);
4200 size = INTVAL (size_rtx);
4201 insize = size - left;
4202 gcc_assert (insize > 0);
4203 /* Default to left / right shift. */
4204 kind = 0;
4205 best_cost = ashl_lshr_seq[32 - insize].insn_count
4206 + ashl_lshr_seq[32 - size].insn_count;
4207 if (size <= 16)
4208 {
4209 /* 16 bit shift / sign extend / 16 bit shift */
4210 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4211 + ashl_lshr_seq[16 - size].insn_count;
4212 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4213 below, by alternative 3 or something even better. */
4214 if (cost < best_cost)
4215 {
4216 kind = 5;
4217 best_cost = cost;
4218 }
4219 }
4220 /* Try a plain sign extend between two shifts. */
4221 for (ext = 16; ext >= insize; ext -= 8)
4222 {
4223 if (ext <= size)
4224 {
4225 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4226 + ashl_lshr_seq[size - ext].insn_count;
4227 if (cost < best_cost)
4228 {
4229 kind = ext / (unsigned) 8;
4230 best_cost = cost;
4231 }
4232 }
4233 /* Check if we can do a sloppy shift with a final signed shift
4234 restoring the sign. */
4235 if (EXT_SHIFT_SIGNED (size - ext))
4236 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4237 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4238 /* If not, maybe it's still cheaper to do the second shift sloppy,
4239 and do a final sign extend? */
4240 else if (size <= 16)
4241 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4242 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4243 + 1;
4244 else
4245 continue;
4246 if (cost < best_cost)
4247 {
4248 kind = ext / (unsigned) 8 + 2;
4249 best_cost = cost;
4250 }
4251 }
4252 /* Check if we can sign extend in r0 */
4253 if (insize < 8)
4254 {
4255 cost = 3 + ashl_lshr_seq[left].insn_count;
4256 if (cost < best_cost)
4257 {
4258 kind = 6;
4259 best_cost = cost;
4260 }
4261 /* Try the same with a final signed shift. */
4262 if (left < 31)
4263 {
4264 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4265 if (cost < best_cost)
4266 {
4267 kind = 7;
4268 best_cost = cost;
4269 }
4270 }
4271 }
4272 if (TARGET_DYNSHIFT)
4273 {
4274 /* Try to use a dynamic shift. */
4275 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4276 if (cost < best_cost)
4277 {
4278 kind = 0;
4279 best_cost = cost;
4280 }
4281 }
4282 if (costp)
4283 *costp = cost;
4284 return kind;
4285 }
4286
4287 /* Function to be used in the length attribute of the instructions
4288 implementing this pattern. */
4289 int
shl_sext_length(rtx insn)4290 shl_sext_length (rtx insn)
4291 {
4292 rtx set_src, left_rtx, size_rtx;
4293 int cost;
4294
4295 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4296 left_rtx = XEXP (XEXP (set_src, 0), 1);
4297 size_rtx = XEXP (set_src, 1);
4298 shl_sext_kind (left_rtx, size_rtx, &cost);
4299 return cost;
4300 }
4301
4302 /* Generate rtl for this pattern */
4303 bool
gen_shl_sext(rtx dest,rtx left_rtx,rtx size_rtx,rtx source)4304 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4305 {
4306 int kind;
4307 int left, size, insize, cost;
4308 rtx operands[3];
4309
4310 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4311 left = INTVAL (left_rtx);
4312 size = INTVAL (size_rtx);
4313 insize = size - left;
4314 switch (kind)
4315 {
4316 case 1:
4317 case 2:
4318 case 3:
4319 case 4:
4320 {
4321 int ext = kind & 1 ? 8 : 16;
4322 int shift2 = size - ext;
4323
4324 /* Don't expand fine-grained when combining, because that will
4325 make the pattern fail. */
4326 if (! currently_expanding_to_rtl
4327 && ! reload_in_progress && ! reload_completed)
4328 {
4329 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4330 emit_insn (gen_movsi (dest, source));
4331 break;
4332 }
4333 if (dest != source)
4334 emit_insn (gen_movsi (dest, source));
4335 operands[0] = dest;
4336 if (ext - insize)
4337 {
4338 operands[2] = GEN_INT (ext - insize);
4339 gen_shifty_hi_op (ASHIFT, operands);
4340 }
4341 emit_insn (kind & 1
4342 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4343 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4344 if (kind <= 2)
4345 {
4346 if (shift2)
4347 {
4348 operands[2] = GEN_INT (shift2);
4349 gen_shifty_op (ASHIFT, operands);
4350 }
4351 }
4352 else
4353 {
4354 if (shift2 > 0)
4355 {
4356 if (EXT_SHIFT_SIGNED (shift2))
4357 {
4358 operands[2] = GEN_INT (shift2 + 1);
4359 gen_shifty_op (ASHIFT, operands);
4360 operands[2] = const1_rtx;
4361 gen_shifty_op (ASHIFTRT, operands);
4362 break;
4363 }
4364 operands[2] = GEN_INT (shift2);
4365 gen_shifty_hi_op (ASHIFT, operands);
4366 }
4367 else if (shift2)
4368 {
4369 operands[2] = GEN_INT (-shift2);
4370 gen_shifty_hi_op (LSHIFTRT, operands);
4371 }
4372 emit_insn (size <= 8
4373 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4374 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4375 }
4376 break;
4377 }
4378 case 5:
4379 {
4380 int i = 16 - size;
4381 if (! currently_expanding_to_rtl
4382 && ! reload_in_progress && ! reload_completed)
4383 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4384 else
4385 {
4386 operands[0] = dest;
4387 operands[2] = GEN_INT (16 - insize);
4388 gen_shifty_hi_op (ASHIFT, operands);
4389 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4390 }
4391 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4392 while (--i >= 0)
4393 gen_ashift (ASHIFTRT, 1, dest);
4394 break;
4395 }
4396 case 6:
4397 case 7:
4398 /* Don't expand fine-grained when combining, because that will
4399 make the pattern fail. */
4400 if (! currently_expanding_to_rtl
4401 && ! reload_in_progress && ! reload_completed)
4402 {
4403 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4404 emit_insn (gen_movsi (dest, source));
4405 break;
4406 }
4407 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4408 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4409 emit_insn (gen_addsi3 (dest, dest, GEN_INT (HOST_WIDE_INT_M1U << (insize - 1))));
4410 operands[0] = dest;
4411 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4412 gen_shifty_op (ASHIFT, operands);
4413 if (kind == 7)
4414 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4415 break;
4416 default:
4417 return true;
4418 }
4419 return false;
4420 }
4421
4422 typedef struct label_ref_list_d
4423 {
4424 rtx_code_label *label;
4425 struct label_ref_list_d *next;
4426 } *label_ref_list_t;
4427
4428 static object_allocator<label_ref_list_d> label_ref_list_d_pool
4429 ("label references list");
4430
4431 /* The SH cannot load a large constant into a register, constants have to
4432 come from a pc relative load. The reference of a pc relative load
4433 instruction must be less than 1k in front of the instruction. This
4434 means that we often have to dump a constant inside a function, and
4435 generate code to branch around it.
4436
4437 It is important to minimize this, since the branches will slow things
4438 down and make things bigger.
4439
4440 Worst case code looks like:
4441
4442 mov.l L1,rn
4443 bra L2
4444 nop
4445 align
4446 L1: .long value
4447 L2:
4448 ..
4449
4450 mov.l L3,rn
4451 bra L4
4452 nop
4453 align
4454 L3: .long value
4455 L4:
4456 ..
4457
4458 We fix this by performing a scan before scheduling, which notices which
4459 instructions need to have their operands fetched from the constant table
4460 and builds the table.
4461
4462 The algorithm is:
4463
4464 scan, find an instruction which needs a pcrel move. Look forward, find the
4465 last barrier which is within MAX_COUNT bytes of the requirement.
4466 If there isn't one, make one. Process all the instructions between
4467 the find and the barrier.
4468
4469 In the above example, we can tell that L3 is within 1k of L1, so
4470 the first move can be shrunk from the 3 insn+constant sequence into
4471 just 1 insn, and the constant moved to L3 to make:
4472
4473 mov.l L1,rn
4474 ..
4475 mov.l L3,rn
4476 bra L4
4477 nop
4478 align
4479 L3:.long value
4480 L4:.long value
4481
4482 Then the second move becomes the target for the shortening process. */
4483
4484 typedef struct
4485 {
4486 rtx value; /* Value in table. */
4487 rtx_code_label *label; /* Label of value. */
4488 label_ref_list_t wend; /* End of window. */
4489 machine_mode mode; /* Mode of value. */
4490
4491 /* True if this constant is accessed as part of a post-increment
4492 sequence. Note that HImode constants are never accessed in this way. */
4493 bool part_of_sequence_p;
4494 } pool_node;
4495
4496 /* The maximum number of constants that can fit into one pool, since
4497 constants in the range 0..510 are at least 2 bytes long, and in the
4498 range from there to 1018 at least 4 bytes. */
4499
4500 #define MAX_POOL_SIZE 372
4501 static pool_node pool_vector[MAX_POOL_SIZE];
4502 static int pool_size;
4503 static rtx_code_label *pool_window_label;
4504 static int pool_window_last;
4505
4506 static int max_labelno_before_reorg;
4507
4508 /* ??? If we need a constant in HImode which is the truncated value of a
4509 constant we need in SImode, we could combine the two entries thus saving
4510 two bytes. Is this common enough to be worth the effort of implementing
4511 it? */
4512
4513 /* ??? This stuff should be done at the same time that we shorten branches.
4514 As it is now, we must assume that all branches are the maximum size, and
4515 this causes us to almost always output constant pools sooner than
4516 necessary. */
4517
4518 /* Add a constant to the pool and return its label. */
4519 static rtx_code_label *
add_constant(rtx x,machine_mode mode,rtx last_value)4520 add_constant (rtx x, machine_mode mode, rtx last_value)
4521 {
4522 rtx_code_label *lab, *new_rtx;
4523 label_ref_list_t ref, newref;
4524
4525 /* First see if we've already got it. */
4526 for (int i = 0; i < pool_size; i++)
4527 {
4528 if (x->code == pool_vector[i].value->code
4529 && mode == pool_vector[i].mode)
4530 {
4531 if (x->code == CODE_LABEL)
4532 {
4533 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4534 continue;
4535 }
4536 if (rtx_equal_p (x, pool_vector[i].value))
4537 {
4538 lab = new_rtx = 0;
4539 if (! last_value
4540 || ! i
4541 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4542 {
4543 new_rtx = gen_label_rtx ();
4544 LABEL_REFS (new_rtx) = pool_vector[i].label;
4545 pool_vector[i].label = lab = new_rtx;
4546 }
4547 if (lab && pool_window_label)
4548 {
4549 newref = label_ref_list_d_pool.allocate ();
4550 newref->label = pool_window_label;
4551 ref = pool_vector[pool_window_last].wend;
4552 newref->next = ref;
4553 pool_vector[pool_window_last].wend = newref;
4554 }
4555 if (new_rtx)
4556 pool_window_label = new_rtx;
4557 pool_window_last = i;
4558 return lab;
4559 }
4560 }
4561 }
4562
4563 /* Need a new one. */
4564 pool_vector[pool_size].value = x;
4565 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4566 {
4567 lab = 0;
4568 pool_vector[pool_size - 1].part_of_sequence_p = true;
4569 }
4570 else
4571 lab = gen_label_rtx ();
4572 pool_vector[pool_size].mode = mode;
4573 pool_vector[pool_size].label = lab;
4574 pool_vector[pool_size].wend = NULL;
4575 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4576 if (lab && pool_window_label)
4577 {
4578 newref = label_ref_list_d_pool.allocate ();
4579 newref->label = pool_window_label;
4580 ref = pool_vector[pool_window_last].wend;
4581 newref->next = ref;
4582 pool_vector[pool_window_last].wend = newref;
4583 }
4584 if (lab)
4585 pool_window_label = lab;
4586 pool_window_last = pool_size;
4587 pool_size++;
4588 return lab;
4589 }
4590
4591 /* Output the literal table. START, if nonzero, is the first instruction
4592 this table is needed for, and also indicates that there is at least one
4593 casesi_worker_2 instruction; We have to emit the operand3 labels from
4594 these insns at a 4-byte aligned position. BARRIER is the barrier
4595 after which we are to place the table. */
4596 static void
dump_table(rtx_insn * start,rtx_insn * barrier)4597 dump_table (rtx_insn *start, rtx_insn *barrier)
4598 {
4599 rtx_insn *scan = barrier;
4600 bool need_align = true;
4601 rtx_code_label *lab;
4602 label_ref_list_t ref;
4603 bool have_df = false;
4604
4605 /* Do two passes, first time dump out the HI sized constants. */
4606
4607 for (int i = 0; i < pool_size; i++)
4608 {
4609 pool_node *p = &pool_vector[i];
4610
4611 if (p->mode == HImode)
4612 {
4613 if (need_align)
4614 {
4615 scan = emit_insn_after (gen_align_2 (), scan);
4616 need_align = false;
4617 }
4618 for (lab = p->label; lab;
4619 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4620 scan = emit_label_after (lab, scan);
4621 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4622 scan);
4623 for (ref = p->wend; ref; ref = ref->next)
4624 {
4625 lab = ref->label;
4626 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4627 }
4628 }
4629 else if (p->mode == DFmode)
4630 have_df = true;
4631 }
4632
4633 need_align = true;
4634
4635 if (start)
4636 {
4637 scan = emit_insn_after (gen_align_4 (), scan);
4638 need_align = false;
4639 for (; start != barrier; start = NEXT_INSN (start))
4640 if (NONJUMP_INSN_P (start)
4641 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4642 {
4643 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4644 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4645
4646 scan = emit_label_after (as_a <rtx_insn *> (lab), scan);
4647 }
4648 }
4649 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4650 {
4651 rtx_insn *align_insn = NULL;
4652
4653 scan = emit_label_after (gen_label_rtx (), scan);
4654 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4655 need_align = false;
4656
4657 for (int i = 0; i < pool_size; i++)
4658 {
4659 pool_node *p = &pool_vector[i];
4660
4661 switch (p->mode)
4662 {
4663 case E_HImode:
4664 break;
4665 case E_SImode:
4666 case E_SFmode:
4667 if (align_insn && !p->part_of_sequence_p)
4668 {
4669 for (lab = p->label; lab;
4670 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4671 emit_label_before (lab, align_insn);
4672 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4673 align_insn);
4674 for (ref = p->wend; ref; ref = ref->next)
4675 {
4676 lab = ref->label;
4677 emit_insn_before (gen_consttable_window_end (lab),
4678 align_insn);
4679 }
4680 delete_insn (align_insn);
4681 align_insn = NULL;
4682 continue;
4683 }
4684 else
4685 {
4686 for (lab = p->label; lab;
4687 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4688 scan = emit_label_after (lab, scan);
4689 scan = emit_insn_after (gen_consttable_4 (p->value,
4690 const0_rtx), scan);
4691 need_align = ! need_align;
4692 }
4693 break;
4694 case E_DFmode:
4695 if (need_align)
4696 {
4697 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4698 align_insn = scan;
4699 need_align = false;
4700 }
4701 /* FALLTHRU */
4702 case E_DImode:
4703 for (lab = p->label; lab;
4704 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4705 scan = emit_label_after (lab, scan);
4706 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4707 scan);
4708 break;
4709 default:
4710 gcc_unreachable ();
4711 }
4712
4713 if (p->mode != HImode)
4714 {
4715 for (ref = p->wend; ref; ref = ref->next)
4716 {
4717 lab = ref->label;
4718 scan = emit_insn_after (gen_consttable_window_end (lab),
4719 scan);
4720 }
4721 }
4722 }
4723
4724 pool_size = 0;
4725 }
4726
4727 for (int i = 0; i < pool_size; i++)
4728 {
4729 pool_node *p = &pool_vector[i];
4730
4731 switch (p->mode)
4732 {
4733 case E_HImode:
4734 break;
4735 case E_SImode:
4736 case E_SFmode:
4737 if (need_align)
4738 {
4739 need_align = false;
4740 scan = emit_label_after (gen_label_rtx (), scan);
4741 scan = emit_insn_after (gen_align_4 (), scan);
4742 }
4743 for (lab = p->label; lab;
4744 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4745 scan = emit_label_after (lab, scan);
4746 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4747 scan);
4748 break;
4749 case E_DFmode:
4750 case E_DImode:
4751 if (need_align)
4752 {
4753 need_align = false;
4754 scan = emit_label_after (gen_label_rtx (), scan);
4755 scan = emit_insn_after (gen_align_4 (), scan);
4756 }
4757 for (lab = p->label; lab;
4758 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4759 scan = emit_label_after (lab, scan);
4760 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4761 scan);
4762 break;
4763 default:
4764 gcc_unreachable ();
4765 }
4766
4767 if (p->mode != HImode)
4768 {
4769 for (ref = p->wend; ref; ref = ref->next)
4770 {
4771 lab = ref->label;
4772 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4773 }
4774 }
4775 }
4776
4777 scan = emit_insn_after (gen_consttable_end (), scan);
4778 scan = emit_barrier_after (scan);
4779 pool_size = 0;
4780 pool_window_label = NULL;
4781 pool_window_last = 0;
4782 }
4783
4784 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4785
4786 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4787
4788 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4789 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4790 need to fix it if the input value is CONST_OK_FOR_I08. */
4791 static bool
broken_move(rtx_insn * insn)4792 broken_move (rtx_insn *insn)
4793 {
4794 if (NONJUMP_INSN_P (insn))
4795 {
4796 rtx pat = PATTERN (insn);
4797 if (GET_CODE (pat) == PARALLEL)
4798 pat = XVECEXP (pat, 0, 0);
4799 if (GET_CODE (pat) == SET
4800 /* We can load any 8-bit value if we don't care what the high
4801 order bits end up as. */
4802 && GET_MODE (SET_DEST (pat)) != QImode
4803 && (CONSTANT_P (SET_SRC (pat))
4804 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4805 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4806 /* Match mova_const. */
4807 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4808 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4809 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4810 && ! (TARGET_SH2E
4811 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4812 && (fp_zero_operand (SET_SRC (pat))
4813 || fp_one_operand (SET_SRC (pat)))
4814 /* In general we don't know the current setting of fpscr, so
4815 disable fldi.
4816 There is an exception if this was a register-register move
4817 before reload - and hence it was ascertained that we have
4818 single precision setting - and in a post-reload optimization
4819 we changed this to do a constant load. In that case
4820 we don't have an r0 clobber, hence we must use fldi. */
4821 && (TARGET_FMOVD
4822 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4823 == SCRATCH))
4824 && REG_P (SET_DEST (pat))
4825 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4826 && ! (TARGET_SH2A
4827 && GET_MODE (SET_DEST (pat)) == SImode
4828 && (satisfies_constraint_I20 (SET_SRC (pat))
4829 || satisfies_constraint_I28 (SET_SRC (pat))))
4830 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4831 return true;
4832 }
4833
4834 return false;
4835 }
4836
4837 /* Return true if the specified insn is a mova insn. */
4838 static bool
mova_p(rtx_insn * insn)4839 mova_p (rtx_insn *insn)
4840 {
4841 return (NONJUMP_INSN_P (insn)
4842 && GET_CODE (PATTERN (insn)) == SET
4843 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4844 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4845 /* Don't match mova_const. */
4846 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4847 }
4848
4849 /* Fix up a mova from a switch that went out of range. */
4850 static void
fixup_mova(rtx_insn * mova)4851 fixup_mova (rtx_insn *mova)
4852 {
4853 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4854 if (! flag_pic)
4855 {
4856 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4857 INSN_CODE (mova) = -1;
4858 }
4859 else
4860 {
4861 rtx_insn *worker = mova;
4862 rtx_code_label *lab = gen_label_rtx ();
4863 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4864
4865 do
4866 {
4867 worker = NEXT_INSN (worker);
4868 gcc_assert (worker
4869 && !LABEL_P (worker)
4870 && !JUMP_P (worker));
4871 } while (NOTE_P (worker)
4872 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4873 wpat = PATTERN (worker);
4874 wpat0 = XVECEXP (wpat, 0, 0);
4875 wpat1 = XVECEXP (wpat, 0, 1);
4876 wsrc = SET_SRC (wpat0);
4877 PATTERN (worker) = (gen_casesi_worker_2
4878 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4879 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4880 XEXP (wpat1, 0)));
4881 INSN_CODE (worker) = -1;
4882 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4883 base = gen_rtx_LABEL_REF (Pmode, lab);
4884 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4885 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4886 INSN_CODE (mova) = -1;
4887 }
4888 }
4889
4890 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4891 *num_mova, and check if the new mova is not nested within the first one.
4892 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4893 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4894 static int
untangle_mova(int * num_mova,rtx_insn ** first_mova,rtx_insn * new_mova)4895 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
4896 {
4897 int n_addr = 0; /* Initialization to shut up spurious warning. */
4898 int f_target, n_target = 0; /* Likewise. */
4899
4900 if (optimize)
4901 {
4902 /* If NEW_MOVA has no address yet, it will be handled later. */
4903 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4904 return -1;
4905
4906 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4907 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4908 if (n_addr > n_target || n_addr + 1022 < n_target)
4909 {
4910 /* Change the mova into a load.
4911 broken_move will then return true for it. */
4912 fixup_mova (new_mova);
4913 return 1;
4914 }
4915 }
4916 if (!(*num_mova)++)
4917 {
4918 *first_mova = new_mova;
4919 return 2;
4920 }
4921 if (!optimize
4922 || ((f_target
4923 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4924 >= n_target))
4925 return -1;
4926
4927 (*num_mova)--;
4928 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4929 > n_target - n_addr)
4930 {
4931 fixup_mova (*first_mova);
4932 return 0;
4933 }
4934 else
4935 {
4936 fixup_mova (new_mova);
4937 return 1;
4938 }
4939 }
4940
4941 /* Find the last barrier from insn FROM which is close enough to hold the
4942 constant pool. If we can't find one, then create one near the end of
4943 the range. */
4944 static rtx_insn *
find_barrier(int num_mova,rtx_insn * mova,rtx_insn * from)4945 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
4946 {
4947 int count_si = 0;
4948 int count_hi = 0;
4949 int found_hi = 0;
4950 int found_si = 0;
4951 int hi_align = 2;
4952 int si_align = 2;
4953 int leading_mova = num_mova;
4954 rtx_insn *barrier_before_mova = NULL;
4955 rtx_insn *found_barrier = NULL;
4956 rtx_insn *good_barrier = NULL;
4957 int si_limit;
4958 int hi_limit;
4959 rtx_insn *orig = from;
4960 rtx_insn *last_got = NULL;
4961 rtx_insn *last_symoff = NULL;
4962
4963 /* For HImode: range is 510, add 4 because pc counts from address of
4964 second instruction after this one, subtract 2 for the jump instruction
4965 that we may need to emit before the table, subtract 2 for the instruction
4966 that fills the jump delay slot (in very rare cases, reorg will take an
4967 instruction from after the constant pool or will leave the delay slot
4968 empty). This gives 510.
4969 For SImode: range is 1020, add 4 because pc counts from address of
4970 second instruction after this one, subtract 2 in case pc is 2 byte
4971 aligned, subtract 2 for the jump instruction that we may need to emit
4972 before the table, subtract 2 for the instruction that fills the jump
4973 delay slot. This gives 1018. */
4974
4975 /* The branch will always be shortened now that the reference address for
4976 forward branches is the successor address, thus we need no longer make
4977 adjustments to the [sh]i_limit for -O0. */
4978
4979 si_limit = 1018;
4980 hi_limit = 510;
4981
4982 while (from && count_si < si_limit && count_hi < hi_limit)
4983 {
4984 int inc = get_attr_length (from);
4985 int new_align = 1;
4986
4987 /* If this is a label that existed at the time of the compute_alignments
4988 call, determine the alignment. N.B. When find_barrier recurses for
4989 an out-of-reach mova, we might see labels at the start of previously
4990 inserted constant tables. */
4991 if (LABEL_P (from)
4992 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4993 {
4994 if (optimize)
4995 new_align = 1 << label_to_alignment (from).levels[0].log;
4996 else if (BARRIER_P (prev_nonnote_insn (from)))
4997 new_align = 1 << barrier_align (from);
4998 else
4999 new_align = 1;
5000 inc = 0;
5001 }
5002 /* In case we are scanning a constant table because of recursion, check
5003 for explicit alignments. If the table is long, we might be forced
5004 to emit the new table in front of it; the length of the alignment
5005 might be the last straw. */
5006 else if (NONJUMP_INSN_P (from)
5007 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5008 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5009 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5010 /* When we find the end of a constant table, paste the new constant
5011 at the end. That is better than putting it in front because
5012 this way, we don't need extra alignment for adding a 4-byte-aligned
5013 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5014 else if (NONJUMP_INSN_P (from)
5015 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5016 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5017 return from;
5018
5019 if (BARRIER_P (from))
5020 {
5021 rtx_insn *next;
5022
5023 found_barrier = from;
5024
5025 /* If we are at the end of the function, or in front of an alignment
5026 instruction, we need not insert an extra alignment. We prefer
5027 this kind of barrier. */
5028 if (barrier_align (from) > 2)
5029 good_barrier = from;
5030
5031 /* If we are at the end of a hot/cold block, dump the constants
5032 here. */
5033 next = NEXT_INSN (from);
5034 if (next
5035 && NOTE_P (next)
5036 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5037 break;
5038 }
5039
5040 if (broken_move (from))
5041 {
5042 rtx pat, src, dst;
5043 machine_mode mode;
5044
5045 pat = PATTERN (from);
5046 if (GET_CODE (pat) == PARALLEL)
5047 pat = XVECEXP (pat, 0, 0);
5048 src = SET_SRC (pat);
5049 dst = SET_DEST (pat);
5050 mode = GET_MODE (dst);
5051
5052 /* GOT pcrelat setting comes in pair of
5053 mova .L8,r0
5054 mov.l .L8,r12
5055 instructions. (plus add r0,r12).
5056 Remember if we see one without the other. */
5057 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5058 last_got = last_got ? NULL : from;
5059 else if (PIC_ADDR_P (src))
5060 last_got = last_got ? NULL : from;
5061
5062 /* We must explicitly check the mode, because sometimes the
5063 front end will generate code to load unsigned constants into
5064 HImode targets without properly sign extending them. */
5065 if (mode == HImode
5066 || (mode == SImode && satisfies_constraint_I16 (src)
5067 && REGNO (dst) != FPUL_REG))
5068 {
5069 found_hi += 2;
5070 /* We put the short constants before the long constants, so
5071 we must count the length of short constants in the range
5072 for the long constants. */
5073 /* ??? This isn't optimal, but is easy to do. */
5074 si_limit -= 2;
5075 }
5076 else
5077 {
5078 /* We dump DF/DI constants before SF/SI ones, because
5079 the limit is the same, but the alignment requirements
5080 are higher. We may waste up to 4 additional bytes
5081 for alignment, and the DF/DI constant may have
5082 another SF/SI constant placed before it. */
5083 while (si_align > 2 && found_si + si_align - 2 > count_si)
5084 si_align >>= 1;
5085 if (found_si > count_si)
5086 count_si = found_si;
5087 found_si += GET_MODE_SIZE (mode);
5088 if (num_mova)
5089 si_limit -= GET_MODE_SIZE (mode);
5090 }
5091 }
5092
5093 if (mova_p (from))
5094 {
5095 switch (untangle_mova (&num_mova, &mova, from))
5096 {
5097 case 1:
5098 if (flag_pic)
5099 {
5100 rtx src = SET_SRC (PATTERN (from));
5101 if (GET_CODE (src) == CONST
5102 && GET_CODE (XEXP (src, 0)) == UNSPEC
5103 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5104 last_symoff = from;
5105 }
5106 break;
5107 case 0: return find_barrier (0, 0, mova);
5108 case 2:
5109 {
5110 leading_mova = 0;
5111 barrier_before_mova
5112 = good_barrier ? good_barrier : found_barrier;
5113 }
5114 default: break;
5115 }
5116 if (found_si > count_si)
5117 count_si = found_si;
5118 }
5119 else if (JUMP_TABLE_DATA_P (from)
5120 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5121 {
5122 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5123 || (num_mova
5124 && (prev_nonnote_insn (from)
5125 == XEXP (MOVA_LABELREF (mova), 0))))
5126 num_mova--;
5127 if (barrier_align (next_real_insn (from)) == align_jumps.levels[0].log)
5128 {
5129 /* We have just passed the barrier in front of the
5130 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5131 the ADDR_DIFF_VEC is accessed as data, just like our pool
5132 constants, this is a good opportunity to accommodate what
5133 we have gathered so far.
5134 If we waited any longer, we could end up at a barrier in
5135 front of code, which gives worse cache usage for separated
5136 instruction / data caches. */
5137 good_barrier = found_barrier;
5138 break;
5139 }
5140 else
5141 {
5142 rtx body = PATTERN (from);
5143 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5144 }
5145 }
5146 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5147 else if (JUMP_P (from)
5148 && ! TARGET_SH2
5149 && ! optimize_size)
5150 new_align = 4;
5151
5152 /* There is a possibility that a bf is transformed into a bf/s by the
5153 delay slot scheduler. */
5154 if (JUMP_P (from)
5155 && get_attr_type (from) == TYPE_CBRANCH
5156 && ! sequence_insn_p (from))
5157 inc += 2;
5158
5159 if (found_si)
5160 {
5161 count_si += inc;
5162 if (new_align > si_align)
5163 {
5164 si_limit -= (count_si - 1) & (new_align - si_align);
5165 si_align = new_align;
5166 }
5167 count_si = (count_si + new_align - 1) & -new_align;
5168 }
5169 if (found_hi)
5170 {
5171 count_hi += inc;
5172 if (new_align > hi_align)
5173 {
5174 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5175 hi_align = new_align;
5176 }
5177 count_hi = (count_hi + new_align - 1) & -new_align;
5178 }
5179 from = NEXT_INSN (from);
5180 }
5181
5182 if (num_mova)
5183 {
5184 if (leading_mova)
5185 {
5186 /* Try as we might, the leading mova is out of range. Change
5187 it into a load (which will become a pcload) and retry. */
5188 fixup_mova (mova);
5189 return find_barrier (0, 0, mova);
5190 }
5191 else
5192 {
5193 /* Insert the constant pool table before the mova instruction,
5194 to prevent the mova label reference from going out of range. */
5195 from = mova;
5196 good_barrier = found_barrier = barrier_before_mova;
5197 }
5198 }
5199
5200 if (found_barrier)
5201 {
5202 if (good_barrier && next_real_insn (found_barrier))
5203 found_barrier = good_barrier;
5204 }
5205 else
5206 {
5207 /* We didn't find a barrier in time to dump our stuff,
5208 so we'll make one. */
5209 rtx_code_label *label = gen_label_rtx ();
5210
5211 /* Don't emit a constant table in the middle of insns for
5212 casesi_worker_2. This is a bit overkill but is enough
5213 because casesi_worker_2 wouldn't appear so frequently. */
5214 if (last_symoff)
5215 from = last_symoff;
5216
5217 /* If we exceeded the range, then we must back up over the last
5218 instruction we looked at. Otherwise, we just need to undo the
5219 NEXT_INSN at the end of the loop. */
5220 if (PREV_INSN (from) != orig
5221 && (count_hi > hi_limit || count_si > si_limit))
5222 from = PREV_INSN (PREV_INSN (from));
5223 else
5224 from = PREV_INSN (from);
5225
5226 /* Don't emit a constant table int the middle of global pointer setting,
5227 since that that would move the addressing base GOT into another table.
5228 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5229 in the pool anyway, so just move up the whole constant pool.
5230
5231 However, avoid doing so when the last single GOT mov is the starting
5232 insn itself. Going past above the start insn would create a negative
5233 offset, causing errors. */
5234 if (last_got && last_got != orig)
5235 from = PREV_INSN (last_got);
5236
5237 /* Don't insert the constant pool table at the position which
5238 may be the landing pad. */
5239 if (flag_exceptions
5240 && CALL_P (from)
5241 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5242 from = PREV_INSN (from);
5243
5244 /* Walk back to be just before any jump or label.
5245 Putting it before a label reduces the number of times the branch
5246 around the constant pool table will be hit. Putting it before
5247 a jump makes it more likely that the bra delay slot will be
5248 filled. */
5249 while (NOTE_P (from) || JUMP_P (from) || LABEL_P (from))
5250 from = PREV_INSN (from);
5251
5252 if (CALL_P (from))
5253 {
5254 bool sibcall_p = SIBLING_CALL_P (from);
5255
5256 /* If FROM was a sibling call, then we know that control
5257 will not return. In fact, we were guaranteed to hit
5258 a barrier before another real insn.
5259
5260 The jump around the constant pool is unnecessary. It
5261 costs space, but more importantly it confuses dwarf2cfi
5262 generation. */
5263 if (sibcall_p)
5264 return emit_barrier_after (from);
5265 }
5266
5267 from = emit_jump_insn_after (gen_jump (label), from);
5268 JUMP_LABEL (from) = label;
5269 LABEL_NUSES (label) = 1;
5270 found_barrier = emit_barrier_after (from);
5271 emit_label_after (label, found_barrier);
5272 }
5273
5274 return found_barrier;
5275 }
5276
5277 /* If the instruction INSN is implemented by a special function, and we can
5278 positively find the register that is used to call the sfunc, and this
5279 register is not used anywhere else in this instruction - except as the
5280 destination of a set, return this register; else, return 0. */
5281 rtx
sfunc_uses_reg(rtx_insn * insn)5282 sfunc_uses_reg (rtx_insn *insn)
5283 {
5284 int i;
5285 rtx pattern, part, reg_part, reg;
5286
5287 if (!NONJUMP_INSN_P (insn))
5288 return NULL_RTX;
5289 pattern = PATTERN (insn);
5290 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5291 return NULL_RTX;
5292
5293 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5294 {
5295 part = XVECEXP (pattern, 0, i);
5296 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5297 reg_part = part;
5298 }
5299 if (! reg_part)
5300 return NULL_RTX;
5301 reg = XEXP (reg_part, 0);
5302 for (int i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5303 {
5304 part = XVECEXP (pattern, 0, i);
5305 if (part == reg_part || GET_CODE (part) == CLOBBER)
5306 continue;
5307 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5308 && REG_P (SET_DEST (part)))
5309 ? SET_SRC (part) : part)))
5310 return NULL_RTX;
5311 }
5312 return reg;
5313 }
5314
5315 /* See if the only way in which INSN uses REG is by calling it, or by
5316 setting it while calling it. Set *SET to a SET rtx if the register
5317 is set by INSN. */
5318 static bool
noncall_uses_reg(rtx reg,rtx_insn * insn,rtx * set)5319 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5320 {
5321 *set = NULL_RTX;
5322
5323 rtx reg2 = sfunc_uses_reg (insn);
5324 if (reg2 && REGNO (reg2) == REGNO (reg))
5325 {
5326 rtx pattern = single_set (insn);
5327 if (pattern
5328 && REG_P (SET_DEST (pattern))
5329 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5330 *set = pattern;
5331 return false;
5332 }
5333 if (!CALL_P (insn))
5334 {
5335 /* We don't use rtx_equal_p because we don't care if the mode is
5336 different. */
5337 rtx pattern = single_set (insn);
5338 if (pattern
5339 && REG_P (SET_DEST (pattern))
5340 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5341 {
5342 rtx par, part;
5343 int i;
5344
5345 *set = pattern;
5346 par = PATTERN (insn);
5347 if (GET_CODE (par) == PARALLEL)
5348 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5349 {
5350 part = XVECEXP (par, 0, i);
5351 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5352 return true;
5353 }
5354 return reg_mentioned_p (reg, SET_SRC (pattern));
5355 }
5356
5357 return true;
5358 }
5359
5360 rtx pattern = PATTERN (insn);
5361
5362 if (GET_CODE (pattern) == PARALLEL)
5363 {
5364 for (int i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5365 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5366 return true;
5367 pattern = XVECEXP (pattern, 0, 0);
5368 }
5369
5370 if (GET_CODE (pattern) == SET)
5371 {
5372 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5373 {
5374 /* We don't use rtx_equal_p, because we don't care if the
5375 mode is different. */
5376 if (!REG_P (SET_DEST (pattern))
5377 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5378 return true;
5379
5380 *set = pattern;
5381 }
5382
5383 pattern = SET_SRC (pattern);
5384 }
5385
5386 if (GET_CODE (pattern) != CALL
5387 || !MEM_P (XEXP (pattern, 0))
5388 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5389 return true;
5390
5391 return false;
5392 }
5393
5394 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5395 general registers. Bits 0..15 mean that the respective registers
5396 are used as inputs in the instruction. Bits 16..31 mean that the
5397 registers 0..15, respectively, are used as outputs, or are clobbered.
5398 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5399 int
regs_used(rtx x,int is_dest)5400 regs_used (rtx x, int is_dest)
5401 {
5402 enum rtx_code code;
5403 const char *fmt;
5404 int used = 0;
5405
5406 if (! x)
5407 return used;
5408 code = GET_CODE (x);
5409 switch (code)
5410 {
5411 case REG:
5412 if (REGNO (x) < 16)
5413 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5414 << (REGNO (x) + is_dest));
5415 return 0;
5416 case SUBREG:
5417 {
5418 rtx y = SUBREG_REG (x);
5419
5420 if (!REG_P (y))
5421 break;
5422 if (REGNO (y) < 16)
5423 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5424 << (REGNO (y) +
5425 subreg_regno_offset (REGNO (y),
5426 GET_MODE (y),
5427 SUBREG_BYTE (x),
5428 GET_MODE (x)) + is_dest));
5429 return 0;
5430 }
5431 case SET:
5432 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5433 case RETURN:
5434 /* If there was a return value, it must have been indicated with USE. */
5435 return 0x00ffff00;
5436 case CLOBBER:
5437 is_dest = 1;
5438 break;
5439 case MEM:
5440 is_dest = 0;
5441 break;
5442 case CALL:
5443 used |= 0x00ff00f0;
5444 break;
5445 default:
5446 break;
5447 }
5448
5449 fmt = GET_RTX_FORMAT (code);
5450
5451 for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5452 {
5453 if (fmt[i] == 'E')
5454 {
5455 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
5456 used |= regs_used (XVECEXP (x, i, j), is_dest);
5457 }
5458 else if (fmt[i] == 'e')
5459 used |= regs_used (XEXP (x, i), is_dest);
5460 }
5461 return used;
5462 }
5463
5464 /* Create an instruction that prevents redirection of a conditional branch
5465 to the destination of the JUMP with address ADDR.
5466 If the branch needs to be implemented as an indirect jump, try to find
5467 a scratch register for it.
5468 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5469 If any preceding insn that doesn't fit into a delay slot is good enough,
5470 pass 1. Pass 2 if a definite blocking insn is needed.
5471 -1 is used internally to avoid deep recursion.
5472 If a blocking instruction is made or recognized, return it. */
5473 static rtx_insn *
gen_block_redirect(rtx_insn * jump,int addr,int need_block)5474 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5475 {
5476 int dead = 0;
5477 rtx_insn *prev = prev_nonnote_insn (jump);
5478
5479 /* First, check if we already have an instruction that satisfies our need. */
5480 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5481 {
5482 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5483 return prev;
5484 if (GET_CODE (PATTERN (prev)) == USE
5485 || GET_CODE (PATTERN (prev)) == CLOBBER
5486 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5487 prev = jump;
5488 else if ((need_block &= ~1) < 0)
5489 return prev;
5490 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5491 need_block = 0;
5492 }
5493 if (GET_CODE (PATTERN (jump)) == RETURN)
5494 {
5495 if (! need_block)
5496 return prev;
5497 /* Reorg even does nasty things with return insns that cause branches
5498 to go out of range - see find_end_label and callers. */
5499 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5500 }
5501 /* We can't use JUMP_LABEL here because it might be undefined
5502 when not optimizing. */
5503 rtx dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5504 /* If the branch is out of range, try to find a scratch register for it. */
5505 if (optimize
5506 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5507 > 4092 + 4098))
5508 {
5509 rtx_insn *scan;
5510 /* Don't look for the stack pointer as a scratch register,
5511 it would cause trouble if an interrupt occurred. */
5512 unsigned attempt = 0x7fff, used;
5513 int jump_left = flag_expensive_optimizations + 1;
5514
5515 /* It is likely that the most recent eligible instruction is wanted for
5516 the delay slot. Therefore, find out which registers it uses, and
5517 try to avoid using them. */
5518
5519 for (scan = jump; (scan = PREV_INSN (scan)); )
5520 {
5521 if (scan->deleted ())
5522 continue;
5523 rtx_code code = GET_CODE (scan);
5524 if (code == CODE_LABEL || code == JUMP_INSN)
5525 break;
5526 if (code == INSN
5527 && GET_CODE (PATTERN (scan)) != USE
5528 && GET_CODE (PATTERN (scan)) != CLOBBER
5529 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5530 {
5531 attempt &= ~regs_used (PATTERN (scan), 0);
5532 break;
5533 }
5534 }
5535 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5536 (scan = NEXT_INSN (scan)); )
5537 {
5538 if (scan->deleted ())
5539 continue;
5540 rtx_code code = GET_CODE (scan);
5541 if (INSN_P (scan))
5542 {
5543 used |= regs_used (PATTERN (scan), 0);
5544 if (code == CALL_INSN)
5545 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5546 dead |= (used >> 16) & ~used;
5547 if (dead & attempt)
5548 {
5549 dead &= attempt;
5550 break;
5551 }
5552 if (code == JUMP_INSN)
5553 {
5554 if (jump_left-- && simplejump_p (scan))
5555 scan = JUMP_LABEL_AS_INSN (scan);
5556 else
5557 break;
5558 }
5559 }
5560 }
5561 /* Mask out the stack pointer again, in case it was
5562 the only 'free' register we have found. */
5563 dead &= 0x7fff;
5564 }
5565 /* If the immediate destination is still in range, check for possible
5566 threading with a jump beyond the delay slot insn.
5567 Don't check if we are called recursively; the jump has been or will be
5568 checked in a different invocation then. */
5569
5570 else if (optimize && need_block >= 0)
5571 {
5572 rtx_insn *next = next_active_insn (as_a<rtx_insn *> (dest));
5573 next = next_active_insn (next);
5574 if (next && JUMP_P (next)
5575 && GET_CODE (PATTERN (next)) == SET
5576 && recog_memoized (next) == CODE_FOR_jump_compact)
5577 {
5578 dest = JUMP_LABEL (next);
5579 if (dest
5580 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5581 > 4092 + 4098))
5582 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5583 }
5584 }
5585
5586 if (dead)
5587 {
5588 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5589
5590 /* It would be nice if we could convert the jump into an indirect
5591 jump / far branch right now, and thus exposing all constituent
5592 instructions to further optimization. However, reorg uses
5593 simplejump_p to determine if there is an unconditional jump where
5594 it should try to schedule instructions from the target of the
5595 branch; simplejump_p fails for indirect jumps even if they have
5596 a JUMP_LABEL. */
5597 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5598 (reg, GEN_INT (unspec_bbr_uid++)),
5599 jump);
5600 /* ??? We would like this to have the scope of the jump, but that
5601 scope will change when a delay slot insn of an inner scope is added.
5602 Hence, after delay slot scheduling, we'll have to expect
5603 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5604 the jump. */
5605
5606 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5607 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5608 return insn;
5609 }
5610 else if (need_block)
5611 /* We can't use JUMP_LABEL here because it might be undefined
5612 when not optimizing. */
5613 return emit_insn_before (gen_block_branch_redirect
5614 (GEN_INT (unspec_bbr_uid++)),
5615 jump);
5616 return prev;
5617 }
5618
5619 #define CONDJUMP_MIN -252
5620 #define CONDJUMP_MAX 262
5621 struct far_branch
5622 {
5623 /* A label (to be placed) in front of the jump
5624 that jumps to our ultimate destination. */
5625 rtx_insn *near_label;
5626 /* Where we are going to insert it if we cannot move the jump any farther,
5627 or the jump itself if we have picked up an existing jump. */
5628 rtx_insn *insert_place;
5629 /* The ultimate destination. */
5630 rtx_insn *far_label;
5631 struct far_branch *prev;
5632 /* If the branch has already been created, its address;
5633 else the address of its first prospective user. */
5634 int address;
5635 };
5636
5637 enum mdep_reorg_phase_e mdep_reorg_phase;
5638
5639 static void
gen_far_branch(struct far_branch * bp)5640 gen_far_branch (struct far_branch *bp)
5641 {
5642 rtx_insn *insn = bp->insert_place;
5643 rtx_jump_insn *jump;
5644 rtx_code_label *label = gen_label_rtx ();
5645
5646 emit_label_after (label, insn);
5647 if (bp->far_label)
5648 {
5649 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5650 LABEL_NUSES (bp->far_label)++;
5651 }
5652 else
5653 jump = emit_jump_insn_after (gen_return (), insn);
5654
5655 /* Emit a barrier so that reorg knows that any following instructions
5656 are not reachable via a fall-through path.
5657 But don't do this when not optimizing, since we wouldn't suppress the
5658 alignment for the barrier then, and could end up with out-of-range
5659 pc-relative loads. */
5660 if (optimize)
5661 emit_barrier_after (jump);
5662 emit_label_after (bp->near_label, insn);
5663
5664 if (bp->far_label)
5665 JUMP_LABEL (jump) = bp->far_label;
5666 else
5667 {
5668 rtx pat = PATTERN (jump);
5669 gcc_assert (ANY_RETURN_P (pat));
5670 JUMP_LABEL (jump) = pat;
5671 }
5672
5673 bool ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5674 gcc_assert (ok);
5675
5676 /* If we are branching around a jump (rather than a return), prevent
5677 reorg from using an insn from the jump target as the delay slot insn -
5678 when reorg did this, it pessimized code (we rather hide the delay slot)
5679 and it could cause branches to go out of range. */
5680 if (bp->far_label)
5681 (emit_insn_after
5682 (gen_stuff_delay_slot
5683 (GEN_INT (unspec_bbr_uid++),
5684 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5685 insn));
5686 /* Prevent reorg from undoing our splits. */
5687 gen_block_redirect (jump, bp->address += 2, 2);
5688 }
5689
5690 /* Fix up ADDR_DIFF_VECs. */
5691 void
fixup_addr_diff_vecs(rtx_insn * first)5692 fixup_addr_diff_vecs (rtx_insn *first)
5693 {
5694 rtx_insn *insn;
5695
5696 for (insn = first; insn; insn = NEXT_INSN (insn))
5697 {
5698 rtx vec_lab, pat, prevpat, x, braf_label;
5699 rtx_insn *prev;
5700
5701 if (! JUMP_TABLE_DATA_P (insn)
5702 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5703 continue;
5704 pat = PATTERN (insn);
5705 vec_lab = XEXP (XEXP (pat, 0), 0);
5706
5707 /* Search the matching casesi_jump_2. */
5708 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5709 {
5710 if (!JUMP_P (prev))
5711 continue;
5712 prevpat = PATTERN (prev);
5713 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5714 continue;
5715 x = XVECEXP (prevpat, 0, 1);
5716 if (GET_CODE (x) != USE)
5717 continue;
5718 x = XEXP (x, 0);
5719 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5720 break;
5721 }
5722 /* FIXME: This is a bug in the optimizer, but it seems harmless
5723 to just avoid panicing. */
5724 if (!prev)
5725 continue;
5726
5727 /* Emit the reference label of the braf where it belongs, right after
5728 the casesi_jump_2 (i.e. braf). */
5729 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5730 emit_label_after (as_a <rtx_insn *> (braf_label), prev);
5731
5732 /* Fix up the ADDR_DIF_VEC to be relative
5733 to the reference address of the braf. */
5734 XEXP (XEXP (pat, 0), 0) = braf_label;
5735 }
5736 }
5737
5738 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5739 a barrier. Return the base 2 logarithm of the desired alignment. */
5740 int
barrier_align(rtx_insn * barrier_or_label)5741 barrier_align (rtx_insn *barrier_or_label)
5742 {
5743 if (! barrier_or_label)
5744 return 0;
5745
5746 if (LABEL_P (barrier_or_label)
5747 && NEXT_INSN (barrier_or_label)
5748 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5749 return 2;
5750
5751 if (BARRIER_P (barrier_or_label)
5752 && PREV_INSN (barrier_or_label)
5753 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5754 {
5755 rtx pat = PATTERN (PREV_INSN (barrier_or_label));
5756 /* If this is a very small table, we want to keep the alignment after
5757 the table to the minimum for proper code alignment. */
5758 return ((optimize_size
5759 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5760 <= (unsigned) 1 << (CACHE_LOG - 2)))
5761 ? 1 : align_jumps.levels[0].log);
5762 }
5763
5764 rtx_insn *next = next_active_insn (barrier_or_label);
5765
5766 if (! next)
5767 return 0;
5768
5769 rtx pat = PATTERN (next);
5770
5771 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5772 /* This is a barrier in front of a constant table. */
5773 return 0;
5774
5775 if (optimize_size)
5776 return 0;
5777
5778 if (! TARGET_SH2 || ! optimize)
5779 return align_jumps.levels[0].log;
5780
5781 /* When fixing up pcloads, a constant table might be inserted just before
5782 the basic block that ends with the barrier. Thus, we can't trust the
5783 instruction lengths before that. */
5784 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5785 {
5786 /* Check if there is an immediately preceding branch to the insn beyond
5787 the barrier. We must weight the cost of discarding useful information
5788 from the current cache line when executing this branch and there is
5789 an alignment, against that of fetching unneeded insn in front of the
5790 branch target when there is no alignment. */
5791
5792 /* There are two delay_slot cases to consider. One is the simple case
5793 where the preceding branch is to the insn beyond the barrier (simple
5794 delay slot filling), and the other is where the preceding branch has
5795 a delay slot that is a duplicate of the insn after the barrier
5796 (fill_eager_delay_slots) and the branch is to the insn after the insn
5797 after the barrier. */
5798
5799 int slot, credit;
5800 bool jump_to_next = false;
5801
5802 /* Skip to the insn before the JUMP_INSN before the barrier under
5803 investigation. */
5804 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5805
5806 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5807 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5808 prev = prev_real_insn (prev))
5809 {
5810 jump_to_next = false;
5811 if (GET_CODE (PATTERN (prev)) == USE
5812 || GET_CODE (PATTERN (prev)) == CLOBBER)
5813 continue;
5814 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5815 {
5816 prev = prev_seq->insn (1);
5817 if (INSN_UID (prev) == INSN_UID (next))
5818 {
5819 /* Delay slot was filled with insn at jump target. */
5820 jump_to_next = true;
5821 continue;
5822 }
5823 }
5824
5825 if (slot &&
5826 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5827 slot = 0;
5828 credit -= get_attr_length (prev);
5829 }
5830 if (prev && jump_to_label_p (prev))
5831 {
5832 rtx_insn *x;
5833 if (jump_to_next
5834 || next_real_insn (JUMP_LABEL_AS_INSN (prev)) == next
5835 /* If relax_delay_slots() decides NEXT was redundant
5836 with some previous instruction, it will have
5837 redirected PREV's jump to the following insn. */
5838 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5839 /* There is no upper bound on redundant instructions
5840 that might have been skipped, but we must not put an
5841 alignment where none had been before. */
5842 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5843 (INSN_P (x)
5844 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5845 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5846 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5847 {
5848 rtx pat = PATTERN (prev);
5849 if (GET_CODE (pat) == PARALLEL)
5850 pat = XVECEXP (pat, 0, 0);
5851 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5852 return 0;
5853 }
5854 }
5855 }
5856
5857 return align_jumps.levels[0].log;
5858 }
5859
5860 /* If we are inside a phony loop, almost any kind of label can turn up as the
5861 first one in the loop. Aligning a braf label causes incorrect switch
5862 destination addresses; we can detect braf labels because they are
5863 followed by a BARRIER.
5864 Applying loop alignment to small constant or switch tables is a waste
5865 of space, so we suppress this too. */
5866 int
sh_loop_align(rtx_insn * label)5867 sh_loop_align (rtx_insn *label)
5868 {
5869 rtx_insn *next = label;
5870
5871 if (! optimize || optimize_size)
5872 return 0;
5873
5874 do
5875 next = next_nonnote_insn (next);
5876 while (next && LABEL_P (next));
5877
5878 if (! next
5879 || ! INSN_P (next)
5880 || recog_memoized (next) == CODE_FOR_consttable_2)
5881 return 0;
5882
5883 return align_loops.levels[0].log;
5884 }
5885
5886 /* Do a final pass over the function, just before delayed branch
5887 scheduling. */
5888 static void
sh_reorg(void)5889 sh_reorg (void)
5890 {
5891 rtx_insn *first, *insn, *mova = NULL;
5892 int num_mova;
5893 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5894 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5895
5896 first = get_insns ();
5897 max_labelno_before_reorg = max_label_num ();
5898
5899 /* We must split call insns before introducing `mova's. If we're
5900 optimizing, they'll have already been split. Otherwise, make
5901 sure we don't split them too late. */
5902 if (! optimize)
5903 split_all_insns_noflow ();
5904
5905 /* If relaxing, generate pseudo-ops to associate function calls with
5906 the symbols they call. It does no harm to not generate these
5907 pseudo-ops. However, when we can generate them, it enables the
5908 linker to potentially relax the jsr to a bsr, and eliminate the
5909 register load and, possibly, the constant pool entry. */
5910
5911 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5912 if (TARGET_RELAX)
5913 {
5914 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5915 own purposes. This works because none of the remaining passes
5916 need to look at them.
5917
5918 ??? But it may break in the future. We should use a machine
5919 dependent REG_NOTE, or some other approach entirely. */
5920 for (insn = first; insn; insn = NEXT_INSN (insn))
5921 {
5922 if (INSN_P (insn))
5923 {
5924 rtx note;
5925
5926 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5927 NULL_RTX)) != 0)
5928 remove_note (insn, note);
5929 }
5930 }
5931
5932 for (insn = first; insn; insn = NEXT_INSN (insn))
5933 {
5934 rtx pattern, reg, set, dies;
5935 rtx_code_label *label;
5936 rtx_insn *link, *scan;
5937 int rescan = 0, foundinsn = 0;
5938
5939 if (CALL_P (insn))
5940 {
5941 pattern = PATTERN (insn);
5942
5943 if (GET_CODE (pattern) == PARALLEL)
5944 pattern = XVECEXP (pattern, 0, 0);
5945 if (GET_CODE (pattern) == SET)
5946 pattern = SET_SRC (pattern);
5947
5948 if (GET_CODE (pattern) != CALL
5949 || !MEM_P (XEXP (pattern, 0)))
5950 continue;
5951
5952 reg = XEXP (XEXP (pattern, 0), 0);
5953 }
5954 else
5955 {
5956 reg = sfunc_uses_reg (insn);
5957 if (! reg)
5958 continue;
5959 }
5960
5961 if (!REG_P (reg))
5962 continue;
5963
5964 /* Try scanning backward to find where the register is set. */
5965 link = NULL;
5966 for (scan = PREV_INSN (insn);
5967 scan && !LABEL_P (scan);
5968 scan = PREV_INSN (scan))
5969 {
5970 if (! INSN_P (scan))
5971 continue;
5972
5973 if (! reg_mentioned_p (reg, scan))
5974 continue;
5975
5976 if (noncall_uses_reg (reg, scan, &set))
5977 break;
5978
5979 if (set)
5980 {
5981 link = scan;
5982 break;
5983 }
5984 }
5985
5986 if (! link)
5987 continue;
5988
5989 /* The register is set at LINK. */
5990
5991 /* We can only optimize the function call if the register is
5992 being set to a symbol. In theory, we could sometimes
5993 optimize calls to a constant location, but the assembler
5994 and linker do not support that at present. */
5995 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5996 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5997 continue;
5998
5999 /* Scan forward from LINK to the place where REG dies, and
6000 make sure that the only insns which use REG are
6001 themselves function calls. */
6002
6003 /* ??? This doesn't work for call targets that were allocated
6004 by reload, since there may not be a REG_DEAD note for the
6005 register. */
6006
6007 dies = NULL_RTX;
6008 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6009 {
6010 rtx scanset;
6011
6012 /* Don't try to trace forward past a CODE_LABEL if we haven't
6013 seen INSN yet. Ordinarily, we will only find the setting insn
6014 if it is in the same basic block. However,
6015 cross-jumping can insert code labels in between the load and
6016 the call, and can result in situations where a single call
6017 insn may have two targets depending on where we came from. */
6018
6019 if (LABEL_P (scan) && ! foundinsn)
6020 break;
6021
6022 if (! INSN_P (scan))
6023 continue;
6024
6025 /* Don't try to trace forward past a JUMP. To optimize
6026 safely, we would have to check that all the
6027 instructions at the jump destination did not use REG. */
6028
6029 if (JUMP_P (scan))
6030 break;
6031
6032 if (! reg_mentioned_p (reg, scan))
6033 continue;
6034
6035 if (noncall_uses_reg (reg, scan, &scanset))
6036 break;
6037
6038 if (scan == insn)
6039 foundinsn = 1;
6040
6041 if (scan != insn
6042 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6043 {
6044 /* There is a function call to this register other
6045 than the one we are checking. If we optimize
6046 this call, we need to rescan again below. */
6047 rescan = 1;
6048 }
6049
6050 /* ??? We shouldn't have to worry about SCANSET here.
6051 We should just be able to check for a REG_DEAD note
6052 on a function call. However, the REG_DEAD notes are
6053 apparently not dependable around libcalls; c-torture
6054 execute/920501-2 is a test case. If SCANSET is set,
6055 then this insn sets the register, so it must have
6056 died earlier. Unfortunately, this will only handle
6057 the cases in which the register is, in fact, set in a
6058 later insn. */
6059
6060 /* ??? We shouldn't have to use FOUNDINSN here.
6061 This dates back to when we used LOG_LINKS to find
6062 the most recent insn which sets the register. */
6063
6064 if (foundinsn
6065 && (scanset
6066 || find_reg_note (scan, REG_DEAD, reg)))
6067 {
6068 dies = scan;
6069 break;
6070 }
6071 }
6072
6073 if (! dies)
6074 {
6075 /* Either there was a branch, or some insn used REG
6076 other than as a function call address. */
6077 continue;
6078 }
6079
6080 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6081 on the insn which sets the register, and on each call insn
6082 which uses the register. In final_prescan_insn we look for
6083 the REG_LABEL_OPERAND notes, and output the appropriate label
6084 or pseudo-op. */
6085
6086 label = gen_label_rtx ();
6087 add_reg_note (link, REG_LABEL_OPERAND, label);
6088 add_reg_note (insn, REG_LABEL_OPERAND, label);
6089 if (rescan)
6090 {
6091 scan = link;
6092 do
6093 {
6094 rtx reg2;
6095
6096 scan = NEXT_INSN (scan);
6097 if (scan != insn
6098 && ((CALL_P (scan)
6099 && reg_mentioned_p (reg, scan))
6100 || ((reg2 = sfunc_uses_reg (scan))
6101 && REGNO (reg2) == REGNO (reg))))
6102 add_reg_note (scan, REG_LABEL_OPERAND, label);
6103 }
6104 while (scan != dies);
6105 }
6106 }
6107 }
6108
6109 if (TARGET_SH2)
6110 fixup_addr_diff_vecs (first);
6111
6112 if (optimize)
6113 {
6114 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6115 shorten_branches (first);
6116 }
6117
6118 /* Scan the function looking for move instructions which have to be
6119 changed to pc-relative loads and insert the literal tables. */
6120 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6121 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6122 {
6123 if (mova_p (insn))
6124 {
6125 /* ??? basic block reordering can move a switch table dispatch
6126 below the switch table. Check if that has happened.
6127 We only have the addresses available when optimizing; but then,
6128 this check shouldn't be needed when not optimizing. */
6129 if (!untangle_mova (&num_mova, &mova, insn))
6130 {
6131 insn = mova;
6132 num_mova = 0;
6133 }
6134 }
6135 else if (JUMP_TABLE_DATA_P (insn)
6136 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6137 && num_mova
6138 /* ??? loop invariant motion can also move a mova out of a
6139 loop. Since loop does this code motion anyway, maybe we
6140 should wrap UNSPEC_MOVA into a CONST, so that reload can
6141 move it back. */
6142 && ((num_mova > 1
6143 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6144 || (prev_nonnote_insn (insn)
6145 == XEXP (MOVA_LABELREF (mova), 0))))
6146 {
6147 rtx_insn *scan;
6148 int total;
6149
6150 num_mova--;
6151
6152 /* Some code might have been inserted between the mova and
6153 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6154 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6155 total += get_attr_length (scan);
6156
6157 /* range of mova is 1020, add 4 because pc counts from address of
6158 second instruction after this one, subtract 2 in case pc is 2
6159 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6160 cancels out with alignment effects of the mova itself. */
6161 if (total > 1022)
6162 {
6163 /* Change the mova into a load, and restart scanning
6164 there. broken_move will then return true for mova. */
6165 fixup_mova (mova);
6166 insn = mova;
6167 }
6168 }
6169 if (broken_move (insn)
6170 || (NONJUMP_INSN_P (insn)
6171 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6172 {
6173 rtx_insn *scan;
6174 /* Scan ahead looking for a barrier to stick the constant table
6175 behind. */
6176 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6177 rtx_insn *last_float_move = NULL;
6178 rtx last_float = 0, *last_float_addr = NULL;
6179 int need_aligned_label = 0;
6180
6181 if (num_mova && ! mova_p (mova))
6182 {
6183 /* find_barrier had to change the first mova into a
6184 pcload; thus, we have to start with this new pcload. */
6185 insn = mova;
6186 num_mova = 0;
6187 }
6188 /* Now find all the moves between the points and modify them. */
6189 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6190 {
6191 if (LABEL_P (scan))
6192 last_float = 0;
6193 if (NONJUMP_INSN_P (scan)
6194 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6195 need_aligned_label = 1;
6196 if (broken_move (scan))
6197 {
6198 rtx *patp = &PATTERN (scan), pat = *patp;
6199 rtx src, dst;
6200 rtx lab;
6201 rtx newsrc;
6202 machine_mode mode;
6203
6204 if (GET_CODE (pat) == PARALLEL)
6205 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6206 src = SET_SRC (pat);
6207 dst = SET_DEST (pat);
6208 mode = GET_MODE (dst);
6209
6210 if (mode == SImode && satisfies_constraint_I16 (src)
6211 && REGNO (dst) != FPUL_REG)
6212 {
6213 int offset = 0;
6214
6215 mode = HImode;
6216 while (GET_CODE (dst) == SUBREG)
6217 {
6218 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6219 GET_MODE (SUBREG_REG (dst)),
6220 SUBREG_BYTE (dst),
6221 GET_MODE (dst));
6222 dst = SUBREG_REG (dst);
6223 }
6224 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6225 }
6226 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6227 {
6228 /* This must be an insn that clobbers r0. */
6229 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6230 XVECLEN (PATTERN (scan), 0)
6231 - 1);
6232 rtx clobber = *clobberp;
6233
6234 gcc_assert (GET_CODE (clobber) == CLOBBER
6235 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6236
6237 if (last_float
6238 && reg_set_between_p (r0_rtx, last_float_move, scan))
6239 last_float = 0;
6240 lab = add_constant (src, mode, last_float);
6241 if (lab)
6242 emit_insn_before (gen_mova (lab), scan);
6243 else
6244 {
6245 /* There will be a REG_UNUSED note for r0 on
6246 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6247 lest reorg:mark_target_live_regs will not
6248 consider r0 to be used, and we end up with delay
6249 slot insn in front of SCAN that clobbers r0. */
6250 rtx note
6251 = find_regno_note (last_float_move, REG_UNUSED, 0);
6252
6253 /* If we are not optimizing, then there may not be
6254 a note. */
6255 if (note)
6256 PUT_REG_NOTE_KIND (note, REG_INC);
6257
6258 *last_float_addr = r0_inc_rtx;
6259 }
6260 last_float_move = scan;
6261 last_float = src;
6262 newsrc = gen_const_mem (mode,
6263 (((TARGET_SH4 && ! TARGET_FMOVD)
6264 || REGNO (dst) == FPUL_REG)
6265 ? r0_inc_rtx
6266 : r0_rtx));
6267 last_float_addr = &XEXP (newsrc, 0);
6268
6269 /* Remove the clobber of r0. */
6270 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6271 gen_rtx_SCRATCH (Pmode));
6272 }
6273 /* This is a mova needing a label. Create it. */
6274 else if (GET_CODE (src) == UNSPEC
6275 && XINT (src, 1) == UNSPEC_MOVA
6276 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6277 {
6278 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6279 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6280 newsrc = gen_rtx_UNSPEC (SImode,
6281 gen_rtvec (1, newsrc),
6282 UNSPEC_MOVA);
6283 }
6284 else if (GET_CODE (src) == UNSPEC_VOLATILE
6285 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6286 {
6287 newsrc = XVECEXP (src, 0, 0);
6288 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6289 INSN_CODE (scan) = -1;
6290 continue;
6291 }
6292 else
6293 {
6294 lab = add_constant (src, mode, 0);
6295 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6296 newsrc = gen_const_mem (mode, newsrc);
6297 }
6298 *patp = gen_rtx_SET (dst, newsrc);
6299 INSN_CODE (scan) = -1;
6300 }
6301 }
6302 dump_table (need_aligned_label ? insn : 0, barrier);
6303 insn = barrier;
6304 }
6305 }
6306 label_ref_list_d_pool.release ();
6307 for (insn = first; insn; insn = NEXT_INSN (insn))
6308 PUT_MODE (insn, VOIDmode);
6309
6310 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6311 INSN_ADDRESSES_FREE ();
6312 split_branches (first);
6313
6314 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6315 also has an effect on the register that holds the address of the sfunc.
6316 Insert an extra dummy insn in front of each sfunc that pretends to
6317 use this register. */
6318 if (flag_delayed_branch)
6319 {
6320 for (insn = first; insn; insn = NEXT_INSN (insn))
6321 {
6322 rtx reg = sfunc_uses_reg (insn);
6323
6324 if (! reg)
6325 continue;
6326 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6327 }
6328 }
6329 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6330 }
6331
6332 /* Return the UID of the insn that follows the specified label. */
6333 int
get_dest_uid(rtx_insn * label,int max_uid)6334 get_dest_uid (rtx_insn *label, int max_uid)
6335 {
6336 rtx_insn *dest = next_real_insn (label);
6337
6338 if (! dest)
6339 /* This can happen for an undefined label. */
6340 return 0;
6341 int dest_uid = INSN_UID (dest);
6342 /* If this is a newly created branch redirection blocking instruction,
6343 we cannot index the branch_uid or insn_addresses arrays with its
6344 uid. But then, we won't need to, because the actual destination is
6345 the following branch. */
6346 while (dest_uid >= max_uid)
6347 {
6348 dest = NEXT_INSN (dest);
6349 dest_uid = INSN_UID (dest);
6350 }
6351 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6352 return 0;
6353 return dest_uid;
6354 }
6355
6356 /* Split condbranches that are out of range. Also add clobbers for
6357 scratch registers that are needed in far jumps.
6358 We do this before delay slot scheduling, so that it can take our
6359 newly created instructions into account. It also allows us to
6360 find branches with common targets more easily. */
6361 static void
split_branches(rtx_insn * first)6362 split_branches (rtx_insn *first)
6363 {
6364 rtx_insn *insn;
6365 struct far_branch **uid_branch, *far_branch_list = 0;
6366 int max_uid = get_max_uid ();
6367 int ok;
6368
6369 /* Find out which branches are out of range. */
6370 shorten_branches (first);
6371
6372 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6373 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6374
6375 for (insn = first; insn; insn = NEXT_INSN (insn))
6376 if (! INSN_P (insn))
6377 continue;
6378 else if (insn->deleted ())
6379 {
6380 /* Shorten_branches would split this instruction again,
6381 so transform it into a note. */
6382 SET_INSN_DELETED (insn);
6383 }
6384 else if (JUMP_P (insn))
6385 {
6386 enum attr_type type = get_attr_type (insn);
6387 if (type == TYPE_CBRANCH)
6388 {
6389 rtx_insn *next, *beyond;
6390
6391 if (get_attr_length (insn) > 4)
6392 {
6393 rtx src = SET_SRC (PATTERN (insn));
6394 rtx_insn *olabel = safe_as_a <rtx_insn *> (XEXP (XEXP (src, 1), 0));
6395 int addr = INSN_ADDRESSES (INSN_UID (insn));
6396 rtx_insn *label = 0;
6397 int dest_uid = get_dest_uid (olabel, max_uid);
6398 struct far_branch *bp = uid_branch[dest_uid];
6399
6400 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6401 the label if the LABEL_NUSES count drops to zero. There is
6402 always a jump_optimize pass that sets these values, but it
6403 proceeds to delete unreferenced code, and then if not
6404 optimizing, to un-delete the deleted instructions, thus
6405 leaving labels with too low uses counts. */
6406 if (! optimize)
6407 {
6408 JUMP_LABEL (insn) = olabel;
6409 LABEL_NUSES (olabel)++;
6410 }
6411 if (! bp)
6412 {
6413 bp = (struct far_branch *) alloca (sizeof *bp);
6414 uid_branch[dest_uid] = bp;
6415 bp->prev = far_branch_list;
6416 far_branch_list = bp;
6417 bp->far_label = as_a <rtx_insn *> (
6418 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6419 0));
6420 LABEL_NUSES (bp->far_label)++;
6421 }
6422 else
6423 {
6424 label = bp->near_label;
6425 if (! label && bp->address - addr >= CONDJUMP_MIN)
6426 {
6427 rtx_insn *block = bp->insert_place;
6428
6429 if (GET_CODE (PATTERN (block)) == RETURN)
6430 block = PREV_INSN (block);
6431 else
6432 block = gen_block_redirect (block,
6433 bp->address, 2);
6434 label = emit_label_after (gen_label_rtx (),
6435 PREV_INSN (block));
6436 bp->near_label = label;
6437 }
6438 else if (label && ! NEXT_INSN (label))
6439 {
6440 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6441 bp->insert_place = insn;
6442 else
6443 gen_far_branch (bp);
6444 }
6445 }
6446 if (! label
6447 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6448 {
6449 bp->near_label = label = gen_label_rtx ();
6450 bp->insert_place = insn;
6451 bp->address = addr;
6452 }
6453 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6454 gcc_assert (ok);
6455 }
6456 else
6457 {
6458 /* get_attr_length (insn) == 2 */
6459 /* Check if we have a pattern where reorg wants to redirect
6460 the branch to a label from an unconditional branch that
6461 is too far away. */
6462 /* We can't use JUMP_LABEL here because it might be undefined
6463 when not optimizing. */
6464 /* A syntax error might cause beyond to be NULL_RTX. */
6465 rtx temp = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6466 beyond = next_active_insn (as_a<rtx_insn *> (temp));
6467
6468 if (beyond
6469 && (JUMP_P (beyond)
6470 || ((beyond = next_active_insn (beyond))
6471 && JUMP_P (beyond)))
6472 && GET_CODE (PATTERN (beyond)) == SET
6473 && recog_memoized (beyond) == CODE_FOR_jump_compact
6474 && ((INSN_ADDRESSES
6475 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6476 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6477 > 252 + 258 + 2))
6478 gen_block_redirect (beyond,
6479 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6480 }
6481
6482 next = next_active_insn (insn);
6483
6484 if (next
6485 && (JUMP_P (next)
6486 || ((next = next_active_insn (next))
6487 && JUMP_P (next)))
6488 && GET_CODE (PATTERN (next)) == SET
6489 && recog_memoized (next) == CODE_FOR_jump_compact
6490 && ((INSN_ADDRESSES
6491 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6492 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6493 > 252 + 258 + 2))
6494 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6495 }
6496 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6497 {
6498 int addr = INSN_ADDRESSES (INSN_UID (insn));
6499 rtx_insn *far_label = 0;
6500 int dest_uid = 0;
6501 struct far_branch *bp;
6502
6503 if (type == TYPE_JUMP)
6504 {
6505 if (CROSSING_JUMP_P (insn))
6506 {
6507 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6508 insn);
6509 continue;
6510 }
6511
6512 far_label = as_a <rtx_insn *> (
6513 XEXP (SET_SRC (PATTERN (insn)), 0));
6514 dest_uid = get_dest_uid (far_label, max_uid);
6515 if (! dest_uid)
6516 {
6517 /* Parse errors can lead to labels outside
6518 the insn stream. */
6519 if (! NEXT_INSN (far_label))
6520 continue;
6521
6522 if (! optimize)
6523 {
6524 JUMP_LABEL (insn) = far_label;
6525 LABEL_NUSES (far_label)++;
6526 }
6527 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6528 far_label = 0;
6529 }
6530 }
6531 bp = uid_branch[dest_uid];
6532 if (! bp)
6533 {
6534 bp = (struct far_branch *) alloca (sizeof *bp);
6535 uid_branch[dest_uid] = bp;
6536 bp->prev = far_branch_list;
6537 far_branch_list = bp;
6538 bp->near_label = 0;
6539 bp->far_label = far_label;
6540 if (far_label)
6541 LABEL_NUSES (far_label)++;
6542 }
6543 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6544 if (addr - bp->address <= CONDJUMP_MAX)
6545 emit_label_after (bp->near_label, PREV_INSN (insn));
6546 else
6547 {
6548 gen_far_branch (bp);
6549 bp->near_label = 0;
6550 }
6551 else
6552 bp->near_label = 0;
6553 bp->address = addr;
6554 bp->insert_place = insn;
6555 if (! far_label)
6556 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6557 else
6558 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6559 }
6560 }
6561 /* Generate all pending far branches,
6562 and free our references to the far labels. */
6563 while (far_branch_list)
6564 {
6565 if (far_branch_list->near_label
6566 && ! NEXT_INSN (far_branch_list->near_label))
6567 gen_far_branch (far_branch_list);
6568 if (optimize
6569 && far_branch_list->far_label
6570 && ! --LABEL_NUSES (far_branch_list->far_label))
6571 delete_insn (far_branch_list->far_label);
6572 far_branch_list = far_branch_list->prev;
6573 }
6574
6575 /* Instruction length information is no longer valid due to the new
6576 instructions that have been generated. */
6577 init_insn_lengths ();
6578 }
6579
6580 /* Dump out instruction addresses, which is useful for debugging the
6581 constant pool table stuff.
6582
6583 If relaxing, output the label and pseudo-ops used to link together
6584 calls and the instruction which set the registers.
6585
6586 ??? The addresses printed by this routine for insns are nonsense for
6587 insns which are inside of a sequence where none of the inner insns have
6588 variable length. This is because the second pass of shorten_branches
6589 does not bother to update them. */
6590 void
final_prescan_insn(rtx_insn * insn,rtx * opvec ATTRIBUTE_UNUSED,int noperands ATTRIBUTE_UNUSED)6591 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6592 int noperands ATTRIBUTE_UNUSED)
6593 {
6594 if (TARGET_DUMPISIZE)
6595 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6596
6597 if (TARGET_RELAX)
6598 {
6599 if (rtx note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX))
6600 {
6601 rtx pattern = PATTERN (insn);
6602 if (GET_CODE (pattern) == PARALLEL)
6603 pattern = XVECEXP (pattern, 0, 0);
6604 switch (GET_CODE (pattern))
6605 {
6606 case SET:
6607 if (GET_CODE (SET_SRC (pattern)) != CALL
6608 && get_attr_type (insn) != TYPE_SFUNC)
6609 {
6610 targetm.asm_out.internal_label
6611 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6612 break;
6613 }
6614 /* FALLTHROUGH */
6615 case CALL:
6616 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6617 CODE_LABEL_NUMBER (XEXP (note, 0)));
6618 break;
6619
6620 default:
6621 gcc_unreachable ();
6622 }
6623 }
6624 }
6625 }
6626
6627 /* Dump out any constants accumulated in the final pass. These will
6628 only be labels. */
6629 const char *
output_jump_label_table(void)6630 output_jump_label_table (void)
6631 {
6632 if (pool_size)
6633 {
6634 fprintf (asm_out_file, "\t.align 2\n");
6635 for (int i = 0; i < pool_size; i++)
6636 {
6637 pool_node *p = &pool_vector[i];
6638
6639 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6640 CODE_LABEL_NUMBER (p->label));
6641 output_asm_insn (".long %O0", &p->value);
6642 }
6643 pool_size = 0;
6644 }
6645
6646 return "";
6647 }
6648
6649 /* A full frame looks like:
6650
6651 arg-5
6652 arg-4
6653 [ if current_function_anonymous_args
6654 arg-3
6655 arg-2
6656 arg-1
6657 arg-0 ]
6658 saved-fp
6659 saved-r10
6660 saved-r11
6661 saved-r12
6662 saved-pr
6663 local-n
6664 ..
6665 local-1
6666 local-0 <- fp points here.
6667
6668 Number of bytes pushed for anonymous args, used to pass information
6669 between expand_prologue and expand_epilogue.
6670
6671 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6672 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6673 for an epilogue and a negative value means that it's for a sibcall
6674 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6675 all the registers that are about to be restored, and hence dead. */
6676 static void
output_stack_adjust(int size,rtx reg,int epilogue_p,HARD_REG_SET * live_regs_mask,bool frame_p)6677 output_stack_adjust (int size, rtx reg, int epilogue_p,
6678 HARD_REG_SET *live_regs_mask, bool frame_p)
6679 {
6680 rtx_insn *(*emit_fn) (rtx) = frame_p ? &emit_frame_insn : &emit_insn;
6681 if (size)
6682 {
6683 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6684
6685 /* This test is bogus, as output_stack_adjust is used to re-align the
6686 stack. */
6687 #if 0
6688 gcc_assert (!(size % align));
6689 #endif
6690
6691 if (CONST_OK_FOR_ADD (size))
6692 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6693 /* Try to do it with two partial adjustments; however, we must make
6694 sure that the stack is properly aligned at all times, in case
6695 an interrupt occurs between the two partial adjustments. */
6696 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6697 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6698 {
6699 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6700 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6701 }
6702 else
6703 {
6704 rtx const_reg;
6705 rtx insn;
6706 int temp = epilogue_p ? 7 : 1;
6707 int i;
6708
6709 /* If TEMP is invalid, we could temporarily save a general
6710 register to MACL. However, there is currently no need
6711 to handle this case, so just die when we see it. */
6712 if (epilogue_p < 0
6713 || current_function_interrupt
6714 || ! call_really_used_regs[temp] || fixed_regs[temp])
6715 temp = -1;
6716 if (temp < 0 && ! current_function_interrupt && epilogue_p >= 0)
6717 {
6718 HARD_REG_SET temps;
6719 COPY_HARD_REG_SET (temps, call_used_reg_set);
6720 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6721 if (epilogue_p > 0)
6722 {
6723 int nreg = 0;
6724 if (crtl->return_rtx)
6725 {
6726 machine_mode mode;
6727 mode = GET_MODE (crtl->return_rtx);
6728 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6729 nreg = hard_regno_nregs (FIRST_RET_REG, mode);
6730 }
6731 for (i = 0; i < nreg; i++)
6732 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6733 if (crtl->calls_eh_return)
6734 {
6735 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6736 for (i = 0; i <= 3; i++)
6737 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6738 }
6739 }
6740 if (epilogue_p <= 0)
6741 {
6742 for (i = FIRST_PARM_REG;
6743 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6744 CLEAR_HARD_REG_BIT (temps, i);
6745 if (cfun->static_chain_decl != NULL)
6746 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6747 }
6748 temp = scavenge_reg (&temps);
6749 }
6750 if (temp < 0 && live_regs_mask)
6751 {
6752 HARD_REG_SET temps;
6753
6754 COPY_HARD_REG_SET (temps, *live_regs_mask);
6755 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6756 temp = scavenge_reg (&temps);
6757 }
6758 if (temp < 0)
6759 {
6760 rtx adj_reg, tmp_reg, mem;
6761
6762 /* If we reached here, the most likely case is the (sibcall)
6763 epilogue. Put a special push/pop sequence for such case as
6764 the last resort. This looks lengthy but would not be problem
6765 because it seems to be very rare. */
6766 gcc_assert (epilogue_p);
6767
6768 /* ??? There is still the slight possibility that r4 or
6769 r5 have been reserved as fixed registers or assigned
6770 as global registers, and they change during an
6771 interrupt. There are possible ways to handle this:
6772
6773 - If we are adjusting the frame pointer (r14), we can do
6774 with a single temp register and an ordinary push / pop
6775 on the stack.
6776 - Grab any call-used or call-saved registers (i.e. not
6777 fixed or globals) for the temps we need. We might
6778 also grab r14 if we are adjusting the stack pointer.
6779 If we can't find enough available registers, issue
6780 a diagnostic and die - the user must have reserved
6781 way too many registers.
6782 But since all this is rather unlikely to happen and
6783 would require extra testing, we just die if r4 / r5
6784 are not available. */
6785 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6786 && !global_regs[4] && !global_regs[5]);
6787
6788 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6789 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6790 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6791 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6792 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6793 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6794 emit_move_insn (mem, tmp_reg);
6795 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6796 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6797 emit_move_insn (mem, tmp_reg);
6798 emit_move_insn (reg, adj_reg);
6799 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6800 emit_move_insn (adj_reg, mem);
6801 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6802 emit_move_insn (tmp_reg, mem);
6803 /* Tell flow the insns that pop r4/r5 aren't dead. */
6804 emit_use (tmp_reg);
6805 emit_use (adj_reg);
6806 return;
6807 }
6808 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6809
6810 /* If SIZE is negative, subtract the positive value.
6811 This sometimes allows a constant pool entry to be shared
6812 between prologue and epilogue code. */
6813 if (size < 0)
6814 {
6815 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6816 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6817 }
6818 else
6819 {
6820 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6821 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6822 }
6823 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6824 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
6825 GEN_INT (size))));
6826 }
6827 }
6828 }
6829
6830 /* Emit the specified insn and mark it as frame related. */
6831 static rtx_insn *
emit_frame_insn(rtx x)6832 emit_frame_insn (rtx x)
6833 {
6834 rtx_insn *insn = emit_insn (x);
6835 RTX_FRAME_RELATED_P (insn) = 1;
6836 return insn;
6837 }
6838
6839 /* Output RTL to push register RN onto the stack. */
6840 static rtx
push(int rn)6841 push (int rn)
6842 {
6843 rtx x;
6844 if (rn == FPUL_REG)
6845 x = gen_push_fpul ();
6846 else if (rn == FPSCR_REG)
6847 x = gen_push_fpscr ();
6848 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6849 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6850 {
6851 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6852 return NULL_RTX;
6853 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6854 }
6855 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6856 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6857 else
6858 x = gen_push (gen_rtx_REG (SImode, rn));
6859
6860 x = emit_frame_insn (x);
6861 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6862 return x;
6863 }
6864
6865 /* Output RTL to pop register RN from the stack. */
6866 static void
pop(int rn)6867 pop (int rn)
6868 {
6869 rtx x, sp_reg, reg;
6870 if (rn == FPUL_REG)
6871 x = gen_pop_fpul ();
6872 else if (rn == FPSCR_REG)
6873 x = gen_pop_fpscr ();
6874 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6875 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6876 {
6877 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6878 return;
6879 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6880 }
6881 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6882 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6883 else
6884 x = gen_pop (gen_rtx_REG (SImode, rn));
6885
6886 x = emit_insn (x);
6887
6888 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6889 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6890 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6891 : SET_DEST (PATTERN (x)));
6892 add_reg_note (x, REG_CFA_RESTORE, reg);
6893 add_reg_note (x, REG_CFA_ADJUST_CFA,
6894 gen_rtx_SET (sp_reg,
6895 plus_constant (SImode, sp_reg,
6896 GET_MODE_SIZE (GET_MODE (reg)))));
6897 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6898 RTX_FRAME_RELATED_P (x) = 1;
6899 }
6900
6901 /* Generate code to push the regs specified in the mask. */
6902 static void
push_regs(HARD_REG_SET * mask,bool interrupt_handler)6903 push_regs (HARD_REG_SET *mask, bool interrupt_handler)
6904 {
6905 bool skip_fpscr = false;
6906
6907 /* Push PR last; this gives better latencies after the prologue, and
6908 candidates for the return delay slot when there are no general
6909 registers pushed. */
6910 for (int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6911 i < FIRST_PSEUDO_REGISTER; i++)
6912 {
6913 /* If this is an interrupt handler, and the SZ bit varies,
6914 and we have to push any floating point register, we need
6915 to switch to the correct precision first. */
6916 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6917 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6918 {
6919 HARD_REG_SET unsaved;
6920
6921 push (FPSCR_REG);
6922 COMPL_HARD_REG_SET (unsaved, *mask);
6923 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6924 skip_fpscr = true;
6925 }
6926 if (i != PR_REG
6927 && (i != FPSCR_REG || ! skip_fpscr)
6928 && TEST_HARD_REG_BIT (*mask, i))
6929 {
6930 /* If the ISR has RESBANK attribute assigned, don't push any of
6931 the following registers - R0-R14, MACH, MACL and GBR. */
6932 if (! (sh_cfun_resbank_handler_p ()
6933 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6934 || i == MACH_REG
6935 || i == MACL_REG
6936 || i == GBR_REG)))
6937 push (i);
6938 }
6939 }
6940
6941 /* Push banked registers last to improve delay slot opportunities. */
6942 if (interrupt_handler)
6943 {
6944 bool use_movml = false;
6945
6946 if (TARGET_SH2A)
6947 {
6948 unsigned int count = 0;
6949
6950 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6951 if (TEST_HARD_REG_BIT (*mask, i))
6952 count++;
6953 else
6954 break;
6955
6956 /* Use movml when all banked registers are pushed. */
6957 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6958 use_movml = true;
6959 }
6960
6961 if (sh_cfun_resbank_handler_p ())
6962 ; /* Do nothing. */
6963 else if (use_movml)
6964 {
6965 rtx x, mem, reg, set;
6966 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6967
6968 /* We must avoid scheduling multiple store insn with another
6969 insns. */
6970 emit_insn (gen_blockage ());
6971 x = gen_movml_push_banked (sp_reg);
6972 x = emit_frame_insn (x);
6973 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6974 {
6975 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
6976 reg = gen_rtx_REG (SImode, i);
6977 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6978 }
6979
6980 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
6981 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6982 emit_insn (gen_blockage ());
6983 }
6984 else
6985 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6986 if (TEST_HARD_REG_BIT (*mask, i))
6987 push (i);
6988 }
6989
6990 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6991 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6992 push (PR_REG);
6993 }
6994
6995 /* Work out the registers which need to be saved, both as a mask and a
6996 count of saved words. Return the count.
6997
6998 If doing a pragma interrupt function, then push all regs used by the
6999 function, and if we call another function (we can tell by looking at PR),
7000 make sure that all the regs it clobbers are safe too. */
7001 static int
calc_live_regs(HARD_REG_SET * live_regs_mask)7002 calc_live_regs (HARD_REG_SET *live_regs_mask)
7003 {
7004 unsigned int reg;
7005 tree attrs;
7006 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7007 bool nosave_low_regs;
7008
7009 attrs = DECL_ATTRIBUTES (current_function_decl);
7010 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7011 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7012 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7013 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7014
7015 CLEAR_HARD_REG_SET (*live_regs_mask);
7016 if (TARGET_FPU_DOUBLE && TARGET_FMOVD && interrupt_handler
7017 && df_regs_ever_live_p (FPSCR_REG))
7018 target_flags &= ~MASK_FPU_SINGLE;
7019 /* If we can save a lot of saves by switching to double mode, do that. */
7020 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD && TARGET_FPU_SINGLE)
7021 for (int count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7022 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7023 && (! call_really_used_regs[reg]
7024 || interrupt_handler)
7025 && ++count > 2)
7026 {
7027 target_flags &= ~MASK_FPU_SINGLE;
7028 break;
7029 }
7030
7031
7032 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7033 bool pr_live = (pr_initial
7034 ? (!REG_P (pr_initial)
7035 || REGNO (pr_initial) != (PR_REG))
7036 : df_regs_ever_live_p (PR_REG));
7037 /* For Shcompact, if not optimizing, we end up with a memory reference
7038 using the return address pointer for __builtin_return_address even
7039 though there is no actual need to put the PR register on the stack. */
7040 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7041
7042 /* Force PR to be live if the prologue has to call the SHmedia
7043 argument decoder or register saver. */
7044 bool has_call = pr_live;
7045
7046 int count;
7047 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7048 {
7049 if (reg == PR_REG
7050 ? pr_live
7051 : interrupt_handler
7052 ? (/* Need to save all the regs ever live. */
7053 (df_regs_ever_live_p (reg)
7054 || (call_really_used_regs[reg]
7055 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7056 || reg == PIC_OFFSET_TABLE_REGNUM)
7057 && has_call))
7058 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7059 && reg != RETURN_ADDRESS_POINTER_REGNUM
7060 && reg != T_REG && reg != GBR_REG
7061 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7062 /* Push fpscr only on targets which have FPU */
7063 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7064 : (/* Only push those regs which are used and need to be saved. */
7065 (false)
7066 || (df_regs_ever_live_p (reg)
7067 && ((!call_really_used_regs[reg]
7068 && !(reg != PIC_OFFSET_TABLE_REGNUM
7069 && fixed_regs[reg] && call_used_regs[reg]))
7070 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7071 || (crtl->calls_eh_return
7072 && (reg == EH_RETURN_DATA_REGNO (0)
7073 || reg == EH_RETURN_DATA_REGNO (1)
7074 || reg == EH_RETURN_DATA_REGNO (2)
7075 || reg == EH_RETURN_DATA_REGNO (3)))
7076 || ((reg == MACL_REG || reg == MACH_REG)
7077 && df_regs_ever_live_p (reg)
7078 && sh_cfun_attr_renesas_p ())
7079 ))
7080 {
7081 SET_HARD_REG_BIT (*live_regs_mask, reg);
7082 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7083
7084 if (TARGET_FPU_DOUBLE && TARGET_FMOVD
7085 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7086 {
7087 if (FP_REGISTER_P (reg))
7088 {
7089 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7090 {
7091 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7092 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7093 }
7094 }
7095 else if (XD_REGISTER_P (reg))
7096 {
7097 /* Must switch to double mode to access these registers. */
7098 target_flags &= ~MASK_FPU_SINGLE;
7099 }
7100 }
7101 }
7102 if (nosave_low_regs && reg == R8_REG)
7103 break;
7104 }
7105
7106 return count;
7107 }
7108
7109 /* Code to generate prologue and epilogue sequences */
7110
7111 /* PUSHED is the number of bytes that are being pushed on the
7112 stack for register saves. Return the frame size, padded
7113 appropriately so that the stack stays properly aligned. */
7114 static HOST_WIDE_INT
rounded_frame_size(int pushed)7115 rounded_frame_size (int pushed)
7116 {
7117 HOST_WIDE_INT size = get_frame_size ();
7118 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7119
7120 if (ACCUMULATE_OUTGOING_ARGS)
7121 size += crtl->outgoing_args_size;
7122
7123 return ((size + pushed + align - 1) & -align) - pushed;
7124 }
7125
7126 /* Expand code for the function prologue. */
7127 void
sh_expand_prologue(void)7128 sh_expand_prologue (void)
7129 {
7130 int save_flags = target_flags;
7131 tree sp_switch_attr
7132 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7133
7134 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7135
7136 /* We have pretend args if we had an object sent partially in registers
7137 and partially on the stack, e.g. a large structure. */
7138 int pretend_args = crtl->args.pretend_args_size;
7139 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7140 && (NPARM_REGS(SImode)
7141 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7142 pretend_args = 0;
7143
7144 output_stack_adjust (-pretend_args, stack_pointer_rtx, 0, NULL, true);
7145 int stack_usage = pretend_args;
7146
7147 /* Emit the code for SETUP_VARARGS. */
7148 if (cfun->stdarg)
7149 {
7150 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7151 {
7152 /* Push arg regs as if they'd been provided by caller in stack. */
7153 for (int i = 0; i < NPARM_REGS(SImode); i++)
7154 {
7155 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7156
7157 if (i >= (NPARM_REGS(SImode)
7158 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7159 ))
7160 break;
7161 push (rn);
7162 stack_usage += GET_MODE_SIZE (SImode);
7163 }
7164 }
7165 }
7166
7167 /* If we're supposed to switch stacks at function entry, do so now. */
7168 if (sp_switch_attr)
7169 {
7170 rtx lab, newsrc;
7171 /* The argument specifies a variable holding the address of the
7172 stack the interrupt function should switch to/from at entry/exit. */
7173 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7174 const char* s = ggc_strdup (TREE_STRING_POINTER (arg));
7175 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7176
7177 lab = add_constant (sp_switch, SImode, 0);
7178 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7179
7180 emit_insn (gen_sp_switch_1 (newsrc));
7181 }
7182
7183 HARD_REG_SET live_regs_mask;
7184 int d = calc_live_regs (&live_regs_mask);
7185 /* ??? Maybe we could save some switching if we can move a mode switch
7186 that already happens to be at the function start into the prologue. */
7187 if (target_flags != save_flags && ! current_function_interrupt)
7188 emit_insn (gen_toggle_sz ());
7189
7190 push_regs (&live_regs_mask, current_function_interrupt);
7191 stack_usage += d;
7192
7193 if (flag_pic && !TARGET_FDPIC
7194 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7195 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7196
7197 if (target_flags != save_flags && ! current_function_interrupt)
7198 emit_insn (gen_toggle_sz ());
7199
7200 target_flags = save_flags;
7201
7202 output_stack_adjust (-rounded_frame_size (d),
7203 stack_pointer_rtx, 0, NULL, true);
7204 stack_usage += rounded_frame_size (d);
7205
7206 if (frame_pointer_needed)
7207 emit_frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7208
7209 /* If we are profiling, make sure no instructions are scheduled before
7210 the call to mcount. Similarly if some call instructions are swapped
7211 before frame related insns, it'll confuse the unwinder because
7212 currently SH has no unwind info for function epilogues. */
7213 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7214 emit_insn (gen_blockage ());
7215
7216 if (flag_stack_usage_info)
7217 current_function_static_stack_size = stack_usage;
7218 }
7219
7220 /* Expand code for the function epilogue. */
7221 void
sh_expand_epilogue(bool sibcall_p)7222 sh_expand_epilogue (bool sibcall_p)
7223 {
7224 int save_flags = target_flags;
7225 bool fpscr_deferred = false;
7226 int e = sibcall_p ? -1 : 1;
7227
7228 HARD_REG_SET live_regs_mask;
7229 int d = calc_live_regs (&live_regs_mask);
7230
7231 int save_size = d;
7232 int frame_size = rounded_frame_size (d);
7233
7234 if (frame_pointer_needed)
7235 {
7236 /* We must avoid scheduling the epilogue with previous basic blocks.
7237 See PR/18032 and PR/40313. */
7238 emit_insn (gen_blockage ());
7239 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7240 &live_regs_mask, true);
7241
7242 /* We must avoid moving the stack pointer adjustment past code
7243 which reads from the local frame, else an interrupt could
7244 occur after the SP adjustment and clobber data in the local
7245 frame. */
7246 emit_insn (gen_blockage ());
7247 emit_frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7248 }
7249 else if (frame_size)
7250 {
7251 /* We must avoid moving the stack pointer adjustment past code
7252 which reads from the local frame, else an interrupt could
7253 occur after the SP adjustment and clobber data in the local
7254 frame. */
7255 emit_insn (gen_blockage ());
7256 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7257 &live_regs_mask, true);
7258 }
7259
7260 /* Pop all the registers. */
7261
7262 if (target_flags != save_flags && ! current_function_interrupt)
7263 emit_insn (gen_toggle_sz ());
7264
7265 {
7266 int last_reg;
7267
7268 save_size = 0;
7269 /* For an ISR with RESBANK attribute assigned, don't pop PR
7270 register. */
7271 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7272 && !sh_cfun_resbank_handler_p ())
7273 {
7274 if (!frame_pointer_needed)
7275 emit_insn (gen_blockage ());
7276 pop (PR_REG);
7277 }
7278
7279 /* Banked registers are popped first to avoid being scheduled in the
7280 delay slot. RTE switches banks before the ds instruction. */
7281 if (current_function_interrupt)
7282 {
7283 bool use_movml = false;
7284
7285 if (TARGET_SH2A)
7286 {
7287 unsigned int count = 0;
7288
7289 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7290 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7291 count++;
7292 else
7293 break;
7294
7295 /* Use movml when all banked register are poped. */
7296 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7297 use_movml = true;
7298 }
7299
7300 if (sh_cfun_resbank_handler_p ())
7301 ; /* Do nothing. */
7302 else if (use_movml)
7303 {
7304 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7305
7306 /* We must avoid scheduling multiple load insn with another
7307 insns. */
7308 emit_insn (gen_blockage ());
7309 emit_insn (gen_movml_pop_banked (sp_reg));
7310 emit_insn (gen_blockage ());
7311 }
7312 else
7313 for (int i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7314 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7315 pop (i);
7316
7317 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7318 }
7319 else
7320 last_reg = FIRST_PSEUDO_REGISTER;
7321
7322 for (int i = 0; i < last_reg; i++)
7323 {
7324 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7325
7326 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7327 && hard_reg_set_intersect_p (live_regs_mask,
7328 reg_class_contents[DF_REGS]))
7329 fpscr_deferred = true;
7330 /* For an ISR with RESBANK attribute assigned, don't pop
7331 following registers, R0-R14, MACH, MACL and GBR. */
7332 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7333 && ! (sh_cfun_resbank_handler_p ()
7334 && ((j >= FIRST_GENERAL_REG
7335 && j < LAST_GENERAL_REG)
7336 || j == MACH_REG
7337 || j == MACL_REG
7338 || j == GBR_REG)))
7339 pop (j);
7340
7341 if (j == FIRST_FP_REG && fpscr_deferred)
7342 pop (FPSCR_REG);
7343 }
7344 }
7345 if (target_flags != save_flags && ! current_function_interrupt)
7346 emit_insn (gen_toggle_sz ());
7347 target_flags = save_flags;
7348
7349 output_stack_adjust (crtl->args.pretend_args_size + save_size,
7350 stack_pointer_rtx, e, NULL, true);
7351
7352 if (crtl->calls_eh_return)
7353 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7354 EH_RETURN_STACKADJ_RTX));
7355
7356 /* Switch back to the normal stack if necessary. */
7357 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7358 emit_insn (gen_sp_switch_2 ());
7359
7360 /* Tell flow the insn that pops PR isn't dead. */
7361 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7362 emit_use (gen_rtx_REG (SImode, PR_REG));
7363 }
7364
7365 /* Emit code to change the current function's return address to RA.
7366 TEMP is available as a scratch register, if needed. */
7367 void
sh_set_return_address(rtx ra,rtx tmp)7368 sh_set_return_address (rtx ra, rtx tmp)
7369 {
7370 HARD_REG_SET live_regs_mask;
7371 int d = calc_live_regs (&live_regs_mask);
7372
7373 /* If pr_reg isn't life, we can set it directly. */
7374 if (! TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7375 {
7376 rtx rr = gen_rtx_REG (SImode, PR_REG);
7377 emit_insn (GEN_MOV (rr, ra));
7378 /* Tell flow the register for return isn't dead. */
7379 emit_use (rr);
7380 return;
7381 }
7382
7383 int pr_offset = rounded_frame_size (d);
7384
7385 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7386
7387 if (frame_pointer_needed)
7388 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7389 else
7390 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7391
7392 tmp = gen_frame_mem (Pmode, tmp);
7393 emit_insn (GEN_MOV (tmp, ra));
7394 /* Tell this store isn't dead. */
7395 emit_use (tmp);
7396 }
7397
7398 /* Clear variables at function end. */
7399 static void
sh_output_function_epilogue(FILE *)7400 sh_output_function_epilogue (FILE *)
7401 {
7402 }
7403
7404 static rtx
sh_builtin_saveregs(void)7405 sh_builtin_saveregs (void)
7406 {
7407 /* First unnamed integer register. */
7408 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7409 /* Number of integer registers we need to save. */
7410 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7411 /* First unnamed SFmode float reg */
7412 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7413 /* Number of SFmode float regs to save. */
7414 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7415 rtx regbuf, fpregs;
7416 int bufsize, regno;
7417 alias_set_type alias_set;
7418
7419 if (!TARGET_FPU_ANY)
7420 {
7421 error ("%<__builtin_saveregs%> not supported by this subtarget");
7422 return const0_rtx;
7423 }
7424
7425 /* Allocate block of memory for the regs. */
7426 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7427 Or can assign_stack_local accept a 0 SIZE argument? */
7428 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7429
7430 if (n_floatregs & 1)
7431 {
7432 rtx addr;
7433
7434 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7435 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7436 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7437 regbuf = change_address (regbuf, BLKmode, addr);
7438 }
7439 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7440 {
7441 rtx addr, mask;
7442
7443 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7444 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
7445 XEXP (regbuf, 0), 4));
7446 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7447 emit_insn (gen_andsi3 (addr, addr, mask));
7448 regbuf = change_address (regbuf, BLKmode, addr);
7449 }
7450 else
7451 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7452 alias_set = get_varargs_alias_set ();
7453 set_mem_alias_set (regbuf, alias_set);
7454
7455 /* Save int args.
7456 This is optimized to only save the regs that are necessary. Explicitly
7457 named args need not be saved. */
7458 if (n_intregs > 0)
7459 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7460 adjust_address (regbuf, BLKmode,
7461 n_floatregs * UNITS_PER_WORD),
7462 n_intregs);
7463
7464 /* Save float args.
7465 This is optimized to only save the regs that are necessary. Explicitly
7466 named args need not be saved.
7467 We explicitly build a pointer to the buffer because it halves the insn
7468 count when not optimizing (otherwise the pointer is built for each reg
7469 saved).
7470 We emit the moves in reverse order so that we can use predecrement. */
7471
7472 fpregs = copy_to_mode_reg (Pmode,
7473 plus_constant (Pmode, XEXP (regbuf, 0),
7474 n_floatregs * UNITS_PER_WORD));
7475 if (TARGET_FPU_DOUBLE)
7476 {
7477 rtx mem;
7478 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7479 {
7480 emit_insn (gen_addsi3 (fpregs, fpregs,
7481 GEN_INT (-2 * UNITS_PER_WORD)));
7482 mem = change_address (regbuf, DFmode, fpregs);
7483 emit_move_insn (mem,
7484 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7485 }
7486 regno = first_floatreg;
7487 if (regno & 1)
7488 {
7489 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7490 mem = change_address (regbuf, SFmode, fpregs);
7491 emit_move_insn (mem,
7492 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
7493 + regno - SH_REG_MSW_OFFSET));
7494 }
7495 }
7496 else
7497 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7498 {
7499 rtx mem;
7500
7501 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7502 mem = change_address (regbuf, SFmode, fpregs);
7503 emit_move_insn (mem,
7504 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7505 }
7506
7507 /* Return the address of the regbuf. */
7508 return XEXP (regbuf, 0);
7509 }
7510
7511 /* Define the `__builtin_va_list' type for the ABI. */
7512 static tree
sh_build_builtin_va_list(void)7513 sh_build_builtin_va_list (void)
7514 {
7515 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7516 tree record, type_decl;
7517
7518 if ((! TARGET_SH2E && ! TARGET_SH4)
7519 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7520 return ptr_type_node;
7521
7522 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7523 type_decl = build_decl (BUILTINS_LOCATION,
7524 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7525
7526 f_next_o = build_decl (BUILTINS_LOCATION,
7527 FIELD_DECL, get_identifier ("__va_next_o"),
7528 ptr_type_node);
7529 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7530 FIELD_DECL,
7531 get_identifier ("__va_next_o_limit"),
7532 ptr_type_node);
7533 f_next_fp = build_decl (BUILTINS_LOCATION,
7534 FIELD_DECL, get_identifier ("__va_next_fp"),
7535 ptr_type_node);
7536 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7537 FIELD_DECL,
7538 get_identifier ("__va_next_fp_limit"),
7539 ptr_type_node);
7540 f_next_stack = build_decl (BUILTINS_LOCATION,
7541 FIELD_DECL, get_identifier ("__va_next_stack"),
7542 ptr_type_node);
7543
7544 DECL_FIELD_CONTEXT (f_next_o) = record;
7545 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7546 DECL_FIELD_CONTEXT (f_next_fp) = record;
7547 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7548 DECL_FIELD_CONTEXT (f_next_stack) = record;
7549
7550 TYPE_STUB_DECL (record) = type_decl;
7551 TYPE_NAME (record) = type_decl;
7552 TYPE_FIELDS (record) = f_next_o;
7553 DECL_CHAIN (f_next_o) = f_next_o_limit;
7554 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7555 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7556 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7557
7558 layout_type (record);
7559
7560 return record;
7561 }
7562
7563 /* Implement `va_start' for varargs and stdarg. */
7564 static void
sh_va_start(tree valist,rtx nextarg)7565 sh_va_start (tree valist, rtx nextarg)
7566 {
7567 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7568 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7569 tree t, u;
7570 int nfp, nint;
7571
7572 if ((! TARGET_SH2E && ! TARGET_SH4)
7573 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7574 {
7575 std_expand_builtin_va_start (valist, nextarg);
7576 return;
7577 }
7578
7579 f_next_o = TYPE_FIELDS (va_list_type_node);
7580 f_next_o_limit = DECL_CHAIN (f_next_o);
7581 f_next_fp = DECL_CHAIN (f_next_o_limit);
7582 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7583 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7584
7585 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7586 NULL_TREE);
7587 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7588 valist, f_next_o_limit, NULL_TREE);
7589 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7590 NULL_TREE);
7591 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7592 valist, f_next_fp_limit, NULL_TREE);
7593 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7594 valist, f_next_stack, NULL_TREE);
7595
7596 /* Call __builtin_saveregs. */
7597 u = make_tree (sizetype, expand_builtin_saveregs ());
7598 u = fold_convert (ptr_type_node, u);
7599 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7600 TREE_SIDE_EFFECTS (t) = 1;
7601 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7602
7603 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7604 if (nfp < 8)
7605 nfp = 8 - nfp;
7606 else
7607 nfp = 0;
7608 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
7609 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7610 TREE_SIDE_EFFECTS (t) = 1;
7611 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7612
7613 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7614 TREE_SIDE_EFFECTS (t) = 1;
7615 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7616
7617 nint = crtl->args.info.arg_count[SH_ARG_INT];
7618 if (nint < 4)
7619 nint = 4 - nint;
7620 else
7621 nint = 0;
7622 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
7623 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7624 TREE_SIDE_EFFECTS (t) = 1;
7625 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7626
7627 u = make_tree (ptr_type_node, nextarg);
7628 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7629 TREE_SIDE_EFFECTS (t) = 1;
7630 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7631 }
7632
7633 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7634 member, return it. */
7635 static tree
find_sole_member(tree type)7636 find_sole_member (tree type)
7637 {
7638 tree field, member = NULL_TREE;
7639
7640 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7641 {
7642 if (TREE_CODE (field) != FIELD_DECL)
7643 continue;
7644 if (!DECL_SIZE (field))
7645 return NULL_TREE;
7646 if (integer_zerop (DECL_SIZE (field)))
7647 continue;
7648 if (member)
7649 return NULL_TREE;
7650 member = field;
7651 }
7652 return member;
7653 }
7654
7655 /* Implement `va_arg'. */
7656 static tree
sh_gimplify_va_arg_expr(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)7657 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7658 gimple_seq *post_p ATTRIBUTE_UNUSED)
7659 {
7660 tree tmp;
7661 tree addr, lab_over = NULL, result = NULL;
7662 tree eff_type;
7663
7664 const bool pass_by_ref =
7665 !VOID_TYPE_P (type)
7666 && targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7667
7668 if (pass_by_ref)
7669 type = build_pointer_type (type);
7670
7671 HOST_WIDE_INT size = int_size_in_bytes (type);
7672 HOST_WIDE_INT rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7673 tree pptr_type_node = build_pointer_type (ptr_type_node);
7674
7675 if ((TARGET_SH2E || TARGET_SH4)
7676 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7677 {
7678 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7679 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7680 tree lab_false;
7681 tree member;
7682
7683 f_next_o = TYPE_FIELDS (va_list_type_node);
7684 f_next_o_limit = DECL_CHAIN (f_next_o);
7685 f_next_fp = DECL_CHAIN (f_next_o_limit);
7686 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7687 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7688
7689 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7690 NULL_TREE);
7691 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7692 valist, f_next_o_limit, NULL_TREE);
7693 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7694 valist, f_next_fp, NULL_TREE);
7695 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7696 valist, f_next_fp_limit, NULL_TREE);
7697 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7698 valist, f_next_stack, NULL_TREE);
7699
7700 /* Structures with a single member with a distinct mode are passed
7701 like their member. This is relevant if the latter has a REAL_TYPE
7702 or COMPLEX_TYPE type. */
7703 eff_type = type;
7704 while (TREE_CODE (eff_type) == RECORD_TYPE
7705 && (member = find_sole_member (eff_type))
7706 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7707 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7708 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7709 {
7710 tree field_type = TREE_TYPE (member);
7711
7712 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7713 eff_type = field_type;
7714 else
7715 {
7716 gcc_assert ((TYPE_ALIGN (eff_type)
7717 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7718 || (TYPE_ALIGN (eff_type)
7719 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7720 break;
7721 }
7722 }
7723
7724 bool pass_as_float;
7725 if (TARGET_FPU_DOUBLE)
7726 {
7727 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7728 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7729 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7730 && size <= 16));
7731 }
7732 else
7733 {
7734 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7735 }
7736
7737 addr = create_tmp_var (pptr_type_node);
7738 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7739 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7740
7741 valist = build_simple_mem_ref (addr);
7742
7743 if (pass_as_float)
7744 {
7745 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
7746 tree cmp;
7747 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7748
7749 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7750 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7751
7752 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7753 tmp = next_fp_limit;
7754 if (size > 4 && !is_double)
7755 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
7756 tmp = build2 (GE_EXPR, boolean_type_node,
7757 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7758 cmp = build3 (COND_EXPR, void_type_node, tmp,
7759 build1 (GOTO_EXPR, void_type_node,
7760 unshare_expr (lab_false)), NULL_TREE);
7761 if (!is_double)
7762 gimplify_and_add (cmp, pre_p);
7763
7764 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7765 || (is_double || size == 16))
7766 {
7767 tmp = fold_convert (sizetype, next_fp_tmp);
7768 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7769 size_int (UNITS_PER_WORD));
7770 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
7771 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7772 }
7773 if (is_double)
7774 gimplify_and_add (cmp, pre_p);
7775
7776 #ifdef FUNCTION_ARG_SCmode_WART
7777 if (TYPE_MODE (eff_type) == SCmode
7778 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7779 {
7780 tree subtype = TREE_TYPE (eff_type);
7781 tree real, imag;
7782
7783 imag
7784 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7785 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7786
7787 real
7788 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7789 real = get_initialized_tmp_var (real, pre_p, NULL);
7790
7791 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7792 if (type != eff_type)
7793 result = build1 (VIEW_CONVERT_EXPR, type, result);
7794 result = get_initialized_tmp_var (result, pre_p, NULL);
7795 }
7796 #endif /* FUNCTION_ARG_SCmode_WART */
7797
7798 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7799 gimplify_and_add (tmp, pre_p);
7800
7801 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7802 gimplify_and_add (tmp, pre_p);
7803
7804 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7805 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7806 gimplify_assign (unshare_expr (next_fp_tmp),
7807 unshare_expr (valist), pre_p);
7808
7809 gimplify_assign (unshare_expr (valist),
7810 unshare_expr (next_fp_tmp), post_p);
7811 valist = next_fp_tmp;
7812 }
7813 else
7814 {
7815 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
7816 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7817 unshare_expr (next_o_limit));
7818 tmp = build3 (COND_EXPR, void_type_node, tmp,
7819 build1 (GOTO_EXPR, void_type_node,
7820 unshare_expr (lab_false)),
7821 NULL_TREE);
7822 gimplify_and_add (tmp, pre_p);
7823
7824 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7825 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7826
7827 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7828 gimplify_and_add (tmp, pre_p);
7829
7830 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7831 gimplify_and_add (tmp, pre_p);
7832
7833 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7834 gimplify_assign (unshare_expr (next_o),
7835 unshare_expr (next_o_limit), pre_p);
7836
7837 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7838 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7839 }
7840
7841 if (!result)
7842 {
7843 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7844 gimplify_and_add (tmp, pre_p);
7845 }
7846 }
7847
7848 /* ??? In va-sh.h, there had been code to make values larger than
7849 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7850
7851 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7852 if (result)
7853 {
7854 gimplify_assign (result, tmp, pre_p);
7855 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7856 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7857 gimplify_and_add (tmp, pre_p);
7858 }
7859 else
7860 result = tmp;
7861
7862 if (pass_by_ref)
7863 result = build_va_arg_indirect_ref (result);
7864
7865 return result;
7866 }
7867
7868 /* 64 bit floating points memory transfers are paired single precision loads
7869 or store. So DWARF information needs fixing in little endian (unless
7870 PR=SZ=1 in FPSCR). */
7871 rtx
sh_dwarf_register_span(rtx reg)7872 sh_dwarf_register_span (rtx reg)
7873 {
7874 unsigned regno = REGNO (reg);
7875
7876 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7877 return NULL_RTX;
7878
7879 return
7880 gen_rtx_PARALLEL (VOIDmode,
7881 gen_rtvec (2,
7882 gen_rtx_REG (SFmode, regno + 1),
7883 gen_rtx_REG (SFmode, regno)));
7884 }
7885
7886 static machine_mode
sh_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree funtype,int for_return)7887 sh_promote_function_mode (const_tree type, machine_mode mode,
7888 int *punsignedp, const_tree funtype,
7889 int for_return)
7890 {
7891 if (sh_promote_prototypes (funtype))
7892 return promote_mode (type, mode, punsignedp);
7893 else
7894 return default_promote_function_mode (type, mode, punsignedp, funtype,
7895 for_return);
7896 }
7897
7898 static bool
sh_promote_prototypes(const_tree type)7899 sh_promote_prototypes (const_tree type)
7900 {
7901 if (TARGET_HITACHI)
7902 return false;
7903 if (! type)
7904 return true;
7905 return ! sh_attr_renesas_p (type);
7906 }
7907
7908 static bool
sh_pass_by_reference(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)7909 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7910 const_tree type, bool named ATTRIBUTE_UNUSED)
7911 {
7912 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7913
7914 if (targetm.calls.must_pass_in_stack (mode, type))
7915 return true;
7916
7917 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7918 wants to know about pass-by-reference semantics for incoming
7919 arguments. */
7920 if (! cum)
7921 return false;
7922
7923 return false;
7924 }
7925
7926 static bool
sh_callee_copies(cumulative_args_t cum,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)7927 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
7928 const_tree type, bool named ATTRIBUTE_UNUSED)
7929 {
7930 /* ??? How can it possibly be correct to return true only on the
7931 caller side of the equation? Is there someplace else in the
7932 sh backend that's magically producing the copies? */
7933 return (get_cumulative_args (cum)->outgoing
7934 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7935 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7936 }
7937
7938 static sh_arg_class
get_sh_arg_class(machine_mode mode)7939 get_sh_arg_class (machine_mode mode)
7940 {
7941 if (TARGET_FPU_ANY && mode == SFmode)
7942 return SH_ARG_FLOAT;
7943
7944 if (TARGET_FPU_DOUBLE
7945 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7946 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT))
7947 return SH_ARG_FLOAT;
7948
7949 return SH_ARG_INT;
7950 }
7951
7952 /* Round a register number up to a proper boundary for an arg of mode
7953 MODE.
7954 The SH doesn't care about double alignment, so we only
7955 round doubles to even regs when asked to explicitly. */
7956 static int
sh_round_reg(const CUMULATIVE_ARGS & cum,machine_mode mode)7957 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
7958 {
7959 /* FIXME: This used to be a macro and has been copy pasted into this
7960 function as is. Make this more readable. */
7961 return
7962 (((TARGET_ALIGN_DOUBLE
7963 || (TARGET_FPU_DOUBLE
7964 && (mode == DFmode || mode == DCmode)
7965 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
7966 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
7967 ? (cum.arg_count[(int) get_sh_arg_class (mode)]
7968 + (cum.arg_count[(int) get_sh_arg_class (mode)] & 1))
7969 : cum.arg_count[(int) get_sh_arg_class (mode)]);
7970 }
7971
7972 /* Return true if arg of the specified mode should be passed in a register
7973 or false otherwise. */
7974 static bool
sh_pass_in_reg_p(const CUMULATIVE_ARGS & cum,machine_mode mode,const_tree type)7975 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
7976 const_tree type)
7977 {
7978 /* FIXME: This used to be a macro and has been copy pasted into this
7979 function as is. Make this more readable. */
7980 return
7981 ((type == 0
7982 || (! TREE_ADDRESSABLE (type)
7983 && (! (TARGET_HITACHI || cum.renesas_abi)
7984 || ! (AGGREGATE_TYPE_P (type)
7985 || (!TARGET_FPU_ANY
7986 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7987 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
7988 && ! cum.force_mem
7989 && (TARGET_SH2E
7990 ? ((mode) == BLKmode
7991 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
7992 + int_size_in_bytes (type))
7993 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
7994 : ((sh_round_reg (cum, mode)
7995 + sh_hard_regno_nregs (BASE_ARG_REG (mode), mode))
7996 <= NPARM_REGS (mode)))
7997 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
7998 }
7999
8000 static int
sh_arg_partial_bytes(cumulative_args_t cum_v,machine_mode mode,tree type,bool named ATTRIBUTE_UNUSED)8001 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
8002 tree type, bool named ATTRIBUTE_UNUSED)
8003 {
8004 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8005 int words = 0;
8006
8007 if (sh_pass_in_reg_p (*cum, mode, type)
8008 && !TARGET_FPU_DOUBLE
8009 && (sh_round_reg (*cum, mode)
8010 + (mode != BLKmode
8011 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
8012 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
8013 > NPARM_REGS (mode)))
8014 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
8015
8016 return words * UNITS_PER_WORD;
8017 }
8018
8019
8020 /* Define where to put the arguments to a function.
8021 Value is zero to push the argument on the stack,
8022 or a hard register in which to store the argument.
8023
8024 MODE is the argument's machine mode.
8025 TYPE is the data type of the argument (as a tree).
8026 This is null for libcalls where that information may
8027 not be available.
8028 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8029 the preceding args and about the function being called.
8030 NAMED is nonzero if this argument is a named parameter
8031 (otherwise it is an extra parameter matching an ellipsis).
8032
8033 On SH the first args are normally in registers
8034 and the rest are pushed. Any arg that starts within the first
8035 NPARM_REGS words is at least partially passed in a register unless
8036 its data type forbids. */
8037 static rtx
sh_function_arg(cumulative_args_t ca_v,machine_mode mode,const_tree type,bool named)8038 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
8039 const_tree type, bool named)
8040 {
8041 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8042
8043 if (mode == VOIDmode)
8044 return ca->renesas_abi ? const1_rtx : const0_rtx;
8045
8046 if (sh_pass_in_reg_p (*ca, mode, type)
8047 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8048 {
8049 int regno;
8050
8051 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8052 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8053 {
8054 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8055 gen_rtx_REG (SFmode,
8056 BASE_ARG_REG (mode)
8057 + (sh_round_reg (*ca, mode) ^ 1)),
8058 const0_rtx);
8059 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8060 gen_rtx_REG (SFmode,
8061 BASE_ARG_REG (mode)
8062 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8063 GEN_INT (4));
8064 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8065 }
8066
8067 /* If the alignment of a DF value causes an SF register to be
8068 skipped, we will use that skipped register for the next SF
8069 value. */
8070 if ((TARGET_HITACHI || ca->renesas_abi)
8071 && ca->free_single_fp_reg
8072 && mode == SFmode)
8073 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8074
8075 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8076 ^ (mode == SFmode && TARGET_SH4
8077 && TARGET_LITTLE_ENDIAN
8078 && ! TARGET_HITACHI && ! ca->renesas_abi);
8079 return gen_rtx_REG (mode, regno);
8080
8081 }
8082
8083 return NULL_RTX;
8084 }
8085
8086 /* Update the data in CUM to advance over an argument
8087 of mode MODE and data type TYPE.
8088 (TYPE is null for libcalls where that information may not be
8089 available.) */
8090 static void
sh_function_arg_advance(cumulative_args_t ca_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)8091 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
8092 const_tree type, bool named ATTRIBUTE_UNUSED)
8093 {
8094 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8095
8096 if (ca->force_mem)
8097 ca->force_mem = false;
8098
8099 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8100 {
8101 /* Note that we've used the skipped register. */
8102 if (mode == SFmode && ca->free_single_fp_reg)
8103 {
8104 ca->free_single_fp_reg = 0;
8105 return;
8106 }
8107 /* When we have a DF after an SF, there's an SF register that get
8108 skipped in order to align the DF value. We note this skipped
8109 register, because the next SF value will use it, and not the
8110 SF that follows the DF. */
8111 if (mode == DFmode
8112 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
8113 {
8114 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
8115 + BASE_ARG_REG (mode));
8116 }
8117 }
8118
8119 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8120 || sh_pass_in_reg_p (*ca, mode, type))
8121 (ca->arg_count[(int) get_sh_arg_class (mode)]
8122 = (sh_round_reg (*ca, mode)
8123 + (mode == BLKmode
8124 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8125 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
8126 }
8127
8128 /* The Renesas calling convention doesn't quite fit into this scheme since
8129 the address is passed like an invisible argument, but one that is always
8130 passed in memory. */
8131 static rtx
sh_struct_value_rtx(tree fndecl,int incoming ATTRIBUTE_UNUSED)8132 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8133 {
8134 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8135 return NULL_RTX;
8136 return gen_rtx_REG (Pmode, 2);
8137 }
8138
8139 /* Worker function for TARGET_FUNCTION_VALUE.
8140
8141 For the SH, this is like LIBCALL_VALUE, except that we must change the
8142 mode like PROMOTE_MODE does.
8143 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8144 tested here has to be kept in sync with the one in
8145 explow.c:promote_mode. */
8146 static rtx
sh_function_value(const_tree valtype,const_tree fn_decl_or_type,bool outgoing ATTRIBUTE_UNUSED)8147 sh_function_value (const_tree valtype,
8148 const_tree fn_decl_or_type,
8149 bool outgoing ATTRIBUTE_UNUSED)
8150 {
8151 if (fn_decl_or_type
8152 && !DECL_P (fn_decl_or_type))
8153 fn_decl_or_type = NULL;
8154
8155 return gen_rtx_REG (
8156 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8157 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8158 && (TREE_CODE (valtype) == INTEGER_TYPE
8159 || TREE_CODE (valtype) == ENUMERAL_TYPE
8160 || TREE_CODE (valtype) == BOOLEAN_TYPE
8161 || TREE_CODE (valtype) == REAL_TYPE
8162 || TREE_CODE (valtype) == OFFSET_TYPE))
8163 && sh_promote_prototypes (fn_decl_or_type)
8164 ? SImode : TYPE_MODE (valtype)),
8165 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8166 }
8167
8168 /* Worker function for TARGET_LIBCALL_VALUE. */
8169 static rtx
sh_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)8170 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8171 {
8172 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8173 }
8174
8175 /* Return true if N is a possible register number of function value. */
8176 static bool
sh_function_value_regno_p(const unsigned int regno)8177 sh_function_value_regno_p (const unsigned int regno)
8178 {
8179 return regno == FIRST_RET_REG || (TARGET_SH2E && regno == FIRST_FP_RET_REG);
8180 }
8181
8182 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8183 static bool
sh_return_in_memory(const_tree type,const_tree fndecl)8184 sh_return_in_memory (const_tree type, const_tree fndecl)
8185 {
8186 return TYPE_MODE (type) == BLKmode
8187 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8188 && TREE_CODE (type) == RECORD_TYPE);
8189 }
8190
8191 /* We actually emit the code in sh_expand_prologue. We used to use
8192 a static variable to flag that we need to emit this code, but that
8193 doesn't when inlining, when functions are deferred and then emitted
8194 later. Fortunately, we already have two flags that are part of struct
8195 function that tell if a function uses varargs or stdarg. */
8196 static void
sh_setup_incoming_varargs(cumulative_args_t ca,machine_mode mode,tree type,int * pretend_arg_size,int second_time ATTRIBUTE_UNUSED)8197 sh_setup_incoming_varargs (cumulative_args_t ca,
8198 machine_mode mode,
8199 tree type,
8200 int *pretend_arg_size,
8201 int second_time ATTRIBUTE_UNUSED)
8202 {
8203 gcc_assert (cfun->stdarg);
8204 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8205 {
8206 int named_parm_regs, anon_parm_regs;
8207
8208 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
8209 + (mode == BLKmode
8210 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8211 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
8212 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8213 if (anon_parm_regs > 0)
8214 *pretend_arg_size = anon_parm_regs * 4;
8215 }
8216 }
8217
8218 static bool
sh_strict_argument_naming(cumulative_args_t ca ATTRIBUTE_UNUSED)8219 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8220 {
8221 return false;
8222 }
8223
8224 static bool
sh_pretend_outgoing_varargs_named(cumulative_args_t ca_v)8225 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8226 {
8227 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8228
8229 return ! (TARGET_HITACHI || ca->renesas_abi);
8230 }
8231
8232
8233 /* Define the offset between two registers, one to be eliminated, and
8234 the other its replacement, at the start of a routine. */
8235 int
initial_elimination_offset(int from,int to)8236 initial_elimination_offset (int from, int to)
8237 {
8238 const int regs_saved_rounding = 0;
8239 int save_flags = target_flags;
8240 HARD_REG_SET live_regs_mask;
8241
8242 int regs_saved = calc_live_regs (&live_regs_mask);
8243
8244 int total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8245 target_flags = save_flags;
8246
8247 int total_saved_regs_space = regs_saved + regs_saved_rounding;
8248
8249 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8250 return total_saved_regs_space + total_auto_space;
8251
8252 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8253 return total_saved_regs_space + total_auto_space;
8254
8255 /* Initial gap between fp and sp is 0. */
8256 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8257 return 0;
8258
8259 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8260 return rounded_frame_size (0);
8261
8262 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8263 return rounded_frame_size (0);
8264
8265 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8266 && (to == HARD_FRAME_POINTER_REGNUM
8267 || to == STACK_POINTER_REGNUM));
8268 return total_auto_space;
8269 }
8270
8271 /* Parse the -mfixed-range= option string. */
8272 void
sh_fix_range(const char * const_str)8273 sh_fix_range (const char *const_str)
8274 {
8275 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8276 REG2 are either register names or register numbers. The effect
8277 of this option is to mark the registers in the range from REG1 to
8278 REG2 as ``fixed'' so they won't be used by the compiler. */
8279
8280 char* str = strcpy ((char*)alloca (strlen (const_str) + 1), const_str);
8281
8282 while (1)
8283 {
8284 char* dash = strchr (str, '-');
8285 if (!dash)
8286 {
8287 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
8288 return;
8289 }
8290 *dash = '\0';
8291 char* comma = strchr (dash + 1, ',');
8292 if (comma)
8293 *comma = '\0';
8294
8295 int first = decode_reg_name (str);
8296 if (first < 0)
8297 {
8298 warning (0, "unknown register name: %s", str);
8299 return;
8300 }
8301
8302 int last = decode_reg_name (dash + 1);
8303 if (last < 0)
8304 {
8305 warning (0, "unknown register name: %s", dash + 1);
8306 return;
8307 }
8308
8309 *dash = '-';
8310
8311 if (first > last)
8312 {
8313 warning (0, "%s-%s is an empty range", str, dash + 1);
8314 return;
8315 }
8316
8317 for (int i = first; i <= last; ++i)
8318 fixed_regs[i] = call_used_regs[i] = 1;
8319
8320 if (!comma)
8321 break;
8322
8323 *comma = ',';
8324 str = comma + 1;
8325 }
8326 }
8327
8328 /* Insert any deferred function attributes from earlier pragmas. */
8329 static void
sh_insert_attributes(tree node,tree * attributes)8330 sh_insert_attributes (tree node, tree *attributes)
8331 {
8332 if (TREE_CODE (node) != FUNCTION_DECL)
8333 return;
8334
8335 /* We are only interested in fields. */
8336 if (!DECL_P (node))
8337 return;
8338
8339 /* Append the attributes to the deferred attributes. */
8340 *sh_deferred_function_attributes_tail = *attributes;
8341 tree attrs = sh_deferred_function_attributes;
8342 if (!attrs)
8343 return;
8344
8345 /* Some attributes imply or require the interrupt attribute. */
8346 if (!lookup_attribute ("interrupt_handler", attrs)
8347 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8348 {
8349 /* If we have a trapa_handler, but no interrupt_handler attribute,
8350 insert an interrupt_handler attribute. */
8351 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8352 /* We can't use sh_pr_interrupt here because that's not in the
8353 java frontend. */
8354 attrs
8355 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8356 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8357 if the interrupt attribute is missing, we ignore the attribute
8358 and warn. */
8359 else if (lookup_attribute ("sp_switch", attrs)
8360 || lookup_attribute ("trap_exit", attrs)
8361 || lookup_attribute ("nosave_low_regs", attrs)
8362 || lookup_attribute ("resbank", attrs))
8363 {
8364 tree *tail;
8365
8366 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8367 {
8368 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8369 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8370 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8371 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8372 warning (OPT_Wattributes,
8373 "%qE attribute only applies to interrupt functions",
8374 TREE_PURPOSE (attrs));
8375 else
8376 {
8377 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8378 NULL_TREE);
8379 tail = &TREE_CHAIN (*tail);
8380 }
8381 }
8382 attrs = *attributes;
8383 }
8384 }
8385
8386 /* Install the processed list. */
8387 *attributes = attrs;
8388
8389 /* Clear deferred attributes. */
8390 sh_deferred_function_attributes = NULL_TREE;
8391 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8392
8393 return;
8394 }
8395
8396 /*------------------------------------------------------------------------------
8397 Target specific attributes
8398 Supported attributes are:
8399
8400 * interrupt_handler
8401 Specifies this function is an interrupt handler.
8402
8403 * trapa_handler
8404 Like interrupt_handler, but don't save all registers.
8405
8406 * sp_switch
8407 Specifies an alternate stack for an interrupt handler to run on.
8408
8409 * trap_exit
8410 Use a trapa to exit an interrupt function instead of rte.
8411
8412 * nosave_low_regs
8413 Don't save r0..r7 in an interrupt handler function.
8414 This is useful on SH3* and SH4*, which have a separate set of low
8415 regs for user and privileged modes.
8416 This is mainly to be used for non-reentrant interrupt handlers (i.e.
8417 those that run with interrupts disabled and thus can't be
8418 interrupted thenselves).
8419
8420 * renesas
8421 Use Renesas calling/layout conventions (functions and structures).
8422
8423 * resbank
8424 In case of an interrupt handler function, use a register bank to
8425 save registers R0-R14, MACH, MACL, GBR and PR.
8426 This is available only on SH2A targets.
8427
8428 * function_vector
8429 Declares a function to be called using the TBR relative addressing
8430 mode. Takes an argument that specifies the slot number in the table
8431 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
8432 */
8433
8434 /* Handle a 'resbank' attribute. */
8435 static tree
sh_handle_resbank_handler_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)8436 sh_handle_resbank_handler_attribute (tree * node, tree name,
8437 tree args ATTRIBUTE_UNUSED,
8438 int flags ATTRIBUTE_UNUSED,
8439 bool * no_add_attrs)
8440 {
8441 if (!TARGET_SH2A)
8442 {
8443 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8444 name);
8445 *no_add_attrs = true;
8446 }
8447 if (TREE_CODE (*node) != FUNCTION_DECL)
8448 {
8449 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8450 name);
8451 *no_add_attrs = true;
8452 }
8453
8454 return NULL_TREE;
8455 }
8456
8457 /* Handle an "interrupt_handler" attribute; arguments as in
8458 struct attribute_spec.handler. */
8459 static tree
sh_handle_interrupt_handler_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)8460 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8461 tree args ATTRIBUTE_UNUSED,
8462 int flags ATTRIBUTE_UNUSED,
8463 bool *no_add_attrs)
8464 {
8465 if (TREE_CODE (*node) != FUNCTION_DECL)
8466 {
8467 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8468 name);
8469 *no_add_attrs = true;
8470 }
8471
8472 return NULL_TREE;
8473 }
8474
8475 /* Handle an 'function_vector' attribute; arguments as in
8476 struct attribute_spec.handler. */
8477 static tree
sh2a_handle_function_vector_handler_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)8478 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8479 tree args ATTRIBUTE_UNUSED,
8480 int flags ATTRIBUTE_UNUSED,
8481 bool * no_add_attrs)
8482 {
8483 if (!TARGET_SH2A)
8484 {
8485 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8486 name);
8487 *no_add_attrs = true;
8488 }
8489 else if (TREE_CODE (*node) != FUNCTION_DECL)
8490 {
8491 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8492 name);
8493 *no_add_attrs = true;
8494 }
8495 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8496 {
8497 /* The argument must be a constant integer. */
8498 warning (OPT_Wattributes,
8499 "%qE attribute argument not an integer constant",
8500 name);
8501 *no_add_attrs = true;
8502 }
8503 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8504 {
8505 /* The argument value must be between 0 to 255. */
8506 warning (OPT_Wattributes,
8507 "%qE attribute argument should be between 0 to 255",
8508 name);
8509 *no_add_attrs = true;
8510 }
8511 return NULL_TREE;
8512 }
8513
8514 /* Returns true if current function has been assigned the attribute
8515 'function_vector'. */
8516 bool
sh2a_is_function_vector_call(rtx x)8517 sh2a_is_function_vector_call (rtx x)
8518 {
8519 if (GET_CODE (x) == SYMBOL_REF
8520 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8521 {
8522 tree tr = SYMBOL_REF_DECL (x);
8523
8524 if (sh2a_function_vector_p (tr))
8525 return true;
8526 }
8527
8528 return false;
8529 }
8530
8531 /* Returns the function vector number, if the attribute
8532 'function_vector' is assigned, otherwise returns zero. */
8533 int
sh2a_get_function_vector_number(rtx x)8534 sh2a_get_function_vector_number (rtx x)
8535 {
8536 if ((GET_CODE (x) == SYMBOL_REF)
8537 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8538 {
8539 tree t = SYMBOL_REF_DECL (x);
8540
8541 if (TREE_CODE (t) != FUNCTION_DECL)
8542 return 0;
8543
8544 for (tree list = SH_ATTRIBUTES (t); list; list = TREE_CHAIN (list))
8545 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8546 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8547
8548 return 0;
8549 }
8550 else
8551 return 0;
8552 }
8553
8554 /* Handle an "sp_switch" attribute; arguments as in
8555 struct attribute_spec.handler. */
8556 static tree
sh_handle_sp_switch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)8557 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8558 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8559 {
8560 if (TREE_CODE (*node) != FUNCTION_DECL)
8561 {
8562 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8563 name);
8564 *no_add_attrs = true;
8565 }
8566 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8567 {
8568 /* The argument must be a constant string. */
8569 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8570 name);
8571 *no_add_attrs = true;
8572 }
8573
8574 return NULL_TREE;
8575 }
8576
8577 /* Handle an "trap_exit" attribute; arguments as in
8578 struct attribute_spec.handler. */
8579 static tree
sh_handle_trap_exit_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)8580 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8581 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8582 {
8583 if (TREE_CODE (*node) != FUNCTION_DECL)
8584 {
8585 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8586 name);
8587 *no_add_attrs = true;
8588 }
8589 /* The argument specifies a trap number to be used in a trapa instruction
8590 at function exit (instead of an rte instruction). */
8591 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8592 {
8593 /* The argument must be a constant integer. */
8594 warning (OPT_Wattributes, "%qE attribute argument not an "
8595 "integer constant", name);
8596 *no_add_attrs = true;
8597 }
8598
8599 return NULL_TREE;
8600 }
8601
8602 static tree
sh_handle_renesas_attribute(tree * node ATTRIBUTE_UNUSED,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs ATTRIBUTE_UNUSED)8603 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8604 tree name ATTRIBUTE_UNUSED,
8605 tree args ATTRIBUTE_UNUSED,
8606 int flags ATTRIBUTE_UNUSED,
8607 bool *no_add_attrs ATTRIBUTE_UNUSED)
8608 {
8609 return NULL_TREE;
8610 }
8611
8612 /* True if __attribute__((renesas)) or -mrenesas. */
8613 bool
sh_attr_renesas_p(const_tree td)8614 sh_attr_renesas_p (const_tree td)
8615 {
8616 if (TARGET_HITACHI)
8617 return true;
8618 if (td == NULL_TREE)
8619 return false;
8620 if (DECL_P (td))
8621 td = TREE_TYPE (td);
8622 if (td == error_mark_node)
8623 return false;
8624 return lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) != NULL_TREE;
8625 }
8626
8627 /* True if __attribute__((renesas)) or -mrenesas, for the current
8628 function. */
8629 bool
sh_cfun_attr_renesas_p(void)8630 sh_cfun_attr_renesas_p (void)
8631 {
8632 return sh_attr_renesas_p (current_function_decl);
8633 }
8634
8635 /* Returns true if the current function has the "interrupt_handler"
8636 attribute set. */
8637 bool
sh_cfun_interrupt_handler_p(void)8638 sh_cfun_interrupt_handler_p (void)
8639 {
8640 return (lookup_attribute ("interrupt_handler",
8641 DECL_ATTRIBUTES (current_function_decl))
8642 != NULL_TREE);
8643 }
8644
8645 /* Returns true if FUNC has been assigned the attribute
8646 "function_vector". */
8647 bool
sh2a_function_vector_p(tree func)8648 sh2a_function_vector_p (tree func)
8649 {
8650 if (TREE_CODE (func) != FUNCTION_DECL)
8651 return false;
8652
8653 for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list))
8654 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8655 return true;
8656
8657 return false;
8658 }
8659
8660 /* Returns true if given tree has the "resbank" attribute set. */
8661 bool
sh_cfun_resbank_handler_p(void)8662 sh_cfun_resbank_handler_p (void)
8663 {
8664 return ((lookup_attribute ("resbank",
8665 DECL_ATTRIBUTES (current_function_decl))
8666 != NULL_TREE)
8667 && (lookup_attribute ("interrupt_handler",
8668 DECL_ATTRIBUTES (current_function_decl))
8669 != NULL_TREE) && TARGET_SH2A);
8670 }
8671
8672 /* Returns true if the current function has a "trap_exit" attribute set. */
8673 bool
sh_cfun_trap_exit_p(void)8674 sh_cfun_trap_exit_p (void)
8675 {
8676 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
8677 != NULL_TREE;
8678 }
8679
8680 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8681 static const char *
sh_check_pch_target_flags(int old_flags)8682 sh_check_pch_target_flags (int old_flags)
8683 {
8684 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8685 | MASK_SH_E | MASK_HARD_SH4
8686 | MASK_FPU_SINGLE | MASK_SH4))
8687 return _("created and used with different architectures / ABIs");
8688 if ((old_flags ^ target_flags) & MASK_HITACHI)
8689 return _("created and used with different ABIs");
8690 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8691 return _("created and used with different endianness");
8692 return NULL;
8693 }
8694
8695 /* Predicates used by the templates. */
8696
8697 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
8698 Used only in general_movsrc_operand. */
8699 bool
system_reg_operand(rtx op,machine_mode mode ATTRIBUTE_UNUSED)8700 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8701 {
8702 switch (REGNO (op))
8703 {
8704 case PR_REG:
8705 case MACL_REG:
8706 case MACH_REG:
8707 return true;
8708 }
8709 return false;
8710 }
8711
8712 /* Returns true if OP is a floating point value with value 0.0. */
8713 bool
fp_zero_operand(rtx op)8714 fp_zero_operand (rtx op)
8715 {
8716 if (GET_MODE (op) != SFmode)
8717 return false;
8718
8719 const REAL_VALUE_TYPE* r = CONST_DOUBLE_REAL_VALUE (op);
8720 return real_equal (r, &dconst0) && ! REAL_VALUE_MINUS_ZERO (*r);
8721 }
8722
8723 /* Returns true if OP is a floating point value with value 1.0. */
8724 bool
fp_one_operand(rtx op)8725 fp_one_operand (rtx op)
8726 {
8727 if (GET_MODE (op) != SFmode)
8728 return false;
8729
8730 return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1);
8731 }
8732
8733 /* Return the TLS type for TLS symbols. */
8734 enum tls_model
tls_symbolic_operand(rtx op,machine_mode mode ATTRIBUTE_UNUSED)8735 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8736 {
8737 if (GET_CODE (op) != SYMBOL_REF)
8738 return TLS_MODEL_NONE;
8739 return SYMBOL_REF_TLS_MODEL (op);
8740 }
8741
8742 /* Return the destination address of a branch. */
8743 static int
branch_dest(rtx branch)8744 branch_dest (rtx branch)
8745 {
8746 rtx dest = SET_SRC (PATTERN (branch));
8747
8748 if (GET_CODE (dest) == IF_THEN_ELSE)
8749 dest = XEXP (dest, 1);
8750
8751 return INSN_ADDRESSES (INSN_UID (XEXP (dest, 0)));
8752 }
8753
8754 /* Return nonzero if REG is not used after INSN.
8755 We assume REG is a reload reg, and therefore does
8756 not live past labels. It may live past calls or jumps though. */
8757 bool
reg_unused_after(rtx reg,rtx_insn * insn)8758 reg_unused_after (rtx reg, rtx_insn *insn)
8759 {
8760 /* If the reg is set by this instruction, then it is safe for our
8761 case. Disregard the case where this is a store to memory, since
8762 we are checking a register used in the store address. */
8763 rtx set = single_set (insn);
8764 if (set && !MEM_P (SET_DEST (set))
8765 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8766 return true;
8767
8768 while ((insn = NEXT_INSN (insn)))
8769 {
8770 if (!INSN_P (insn))
8771 continue;
8772
8773 rtx_code code = GET_CODE (insn);
8774
8775 #if 0
8776 /* If this is a label that existed before reload, then the register
8777 is dead here. However, if this is a label added by reorg, then
8778 the register may still be live here. We can't tell the difference,
8779 so we just ignore labels completely. */
8780 if (code == CODE_LABEL)
8781 return 1;
8782 /* else */
8783 #endif
8784
8785 if (code == JUMP_INSN)
8786 return false;
8787
8788 /* If this is a sequence, we must handle them all at once.
8789 We could have for instance a call that sets the target register,
8790 and an insn in a delay slot that uses the register. In this case,
8791 we must return 0. */
8792 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8793 {
8794 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
8795 bool retval = false;
8796
8797 for (int i = 0; i < seq->len (); i++)
8798 {
8799 rtx_insn *this_insn = seq->insn (i);
8800 rtx set = single_set (this_insn);
8801
8802 if (CALL_P (this_insn))
8803 code = CALL_INSN;
8804 else if (JUMP_P (this_insn))
8805 {
8806 if (INSN_ANNULLED_BRANCH_P (this_insn))
8807 return false;
8808 code = JUMP_INSN;
8809 }
8810
8811 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8812 return false;
8813 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8814 {
8815 if (!MEM_P (SET_DEST (set)))
8816 retval = true;
8817 else
8818 return false;
8819 }
8820 if (set == NULL_RTX
8821 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8822 return false;
8823 }
8824 if (retval)
8825 return true;
8826 else if (code == JUMP_INSN)
8827 return false;
8828 }
8829
8830 rtx set = single_set (insn);
8831 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8832 return false;
8833 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8834 return !MEM_P (SET_DEST (set));
8835 if (set == NULL && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8836 return false;
8837
8838 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8839 return true;
8840 }
8841 return true;
8842 }
8843
8844
8845 static GTY(()) rtx t_reg_rtx;
8846 rtx
get_t_reg_rtx(void)8847 get_t_reg_rtx (void)
8848 {
8849 if (! t_reg_rtx)
8850 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
8851 return t_reg_rtx;
8852 }
8853
8854 static GTY(()) tree fpscr_values;
8855
8856 static void
emit_fpu_switch(rtx scratch,int index)8857 emit_fpu_switch (rtx scratch, int index)
8858 {
8859 if (fpscr_values == NULL)
8860 {
8861 tree t = build_index_type (integer_one_node);
8862 t = build_array_type (integer_type_node, t);
8863 t = build_decl (BUILTINS_LOCATION,
8864 VAR_DECL, get_identifier ("__fpscr_values"), t);
8865 DECL_ARTIFICIAL (t) = 1;
8866 DECL_IGNORED_P (t) = 1;
8867 DECL_EXTERNAL (t) = 1;
8868 TREE_STATIC (t) = 1;
8869 TREE_PUBLIC (t) = 1;
8870 TREE_USED (t) = 1;
8871
8872 fpscr_values = t;
8873 }
8874
8875 rtx src = DECL_RTL (fpscr_values);
8876 if (!can_create_pseudo_p ())
8877 {
8878 emit_move_insn (scratch, XEXP (src, 0));
8879 if (index != 0)
8880 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8881 src = adjust_automodify_address (src, SImode, scratch, index * 4);
8882 }
8883 else
8884 src = adjust_address (src, SImode, index * 4);
8885
8886 emit_insn (gen_lds_fpscr (src));
8887 }
8888
8889 static rtx get_free_reg (HARD_REG_SET);
8890
8891 /* This function returns a register to use to load the address to load
8892 the fpscr from. Currently it always returns r1 or r7, but when we are
8893 able to use pseudo registers after combine, or have a better mechanism
8894 for choosing a register, it should be done here. */
8895 /* REGS_LIVE is the liveness information for the point for which we
8896 need this allocation. In some bare-bones exit blocks, r1 is live at the
8897 start. We can even have all of r0..r3 being live:
8898 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8899 INSN before which new insns are placed with will clobber the register
8900 we return. If a basic block consists only of setting the return value
8901 register to a pseudo and using that register, the return value is not
8902 live before or after this block, yet we we'll insert our insns right in
8903 the middle. */
8904 static rtx
get_free_reg(HARD_REG_SET regs_live)8905 get_free_reg (HARD_REG_SET regs_live)
8906 {
8907 if (! TEST_HARD_REG_BIT (regs_live, 1))
8908 return gen_rtx_REG (Pmode, 1);
8909
8910 /* Hard reg 1 is live; since this is a small register classes target,
8911 there shouldn't be anything but a jump before the function end. */
8912 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8913 return gen_rtx_REG (Pmode, 7);
8914 }
8915
8916 /* This function will set the fpscr from memory.
8917 MODE is the mode we are setting it to. */
8918 void
fpscr_set_from_mem(int mode,HARD_REG_SET regs_live)8919 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8920 {
8921 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
8922 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8923
8924 rtx addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8925 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8926 }
8927
8928 /* Is the given character a logical line separator for the assembler? */
8929 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8930 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8931 #endif
8932
8933 static bool
sequence_insn_p(rtx_insn * insn)8934 sequence_insn_p (rtx_insn *insn)
8935 {
8936 rtx_insn* prev = PREV_INSN (insn);
8937 if (prev == NULL)
8938 return false;
8939
8940 rtx_insn* next = NEXT_INSN (prev);
8941 if (next == NULL)
8942 return false;
8943
8944 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
8945 }
8946
8947 int
sh_insn_length_adjustment(rtx_insn * insn)8948 sh_insn_length_adjustment (rtx_insn *insn)
8949 {
8950 /* Instructions with unfilled delay slots take up an extra two bytes for
8951 the nop in the delay slot. */
8952 if (((NONJUMP_INSN_P (insn)
8953 && GET_CODE (PATTERN (insn)) != USE
8954 && GET_CODE (PATTERN (insn)) != CLOBBER)
8955 || CALL_P (insn) || JUMP_P (insn))
8956 && ! sequence_insn_p (insn)
8957 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8958 return 2;
8959
8960 /* Increase the insn length of a cbranch without a delay slot insn to
8961 force a delay slot which will be stuffed with a nop. */
8962 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
8963 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
8964 && ! sequence_insn_p (insn))
8965 return 2;
8966
8967 /* sh-dsp parallel processing insn take four bytes instead of two. */
8968
8969 if (NONJUMP_INSN_P (insn))
8970 {
8971 int sum = 0;
8972 rtx body = PATTERN (insn);
8973 const char *templ;
8974 char c;
8975 bool maybe_label = true;
8976
8977 if (GET_CODE (body) == ASM_INPUT)
8978 templ = XSTR (body, 0);
8979 else if (asm_noperands (body) >= 0)
8980 templ
8981 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8982 else
8983 return 0;
8984 do
8985 {
8986 int ppi_adjust = 0;
8987
8988 do
8989 c = *templ++;
8990 while (c == ' ' || c == '\t');
8991 /* all sh-dsp parallel-processing insns start with p.
8992 The only non-ppi sh insn starting with p is pref.
8993 The only ppi starting with pr is prnd. */
8994 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8995 ppi_adjust = 2;
8996 /* The repeat pseudo-insn expands two three insns, a total of
8997 six bytes in size. */
8998 else if ((c == 'r' || c == 'R')
8999 && ! strncasecmp ("epeat", templ, 5))
9000 ppi_adjust = 4;
9001 while (c && c != '\n'
9002 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
9003 {
9004 /* If this is a label, it is obviously not a ppi insn. */
9005 if (c == ':' && maybe_label)
9006 {
9007 ppi_adjust = 0;
9008 break;
9009 }
9010 else if (c == '\'' || c == '"')
9011 maybe_label = false;
9012 c = *templ++;
9013 }
9014 sum += ppi_adjust;
9015 maybe_label = c != ':';
9016 }
9017 while (c);
9018 return sum;
9019 }
9020 return 0;
9021 }
9022
9023 /* Return TRUE for a valid displacement for the REG+disp addressing
9024 with MODE. */
9025 bool
sh_legitimate_index_p(machine_mode mode,rtx op,bool consider_sh2a,bool allow_zero)9026 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
9027 bool allow_zero)
9028 {
9029 if (! CONST_INT_P (op))
9030 return false;
9031
9032 {
9033 const HOST_WIDE_INT offset = INTVAL (op);
9034 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
9035 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
9036
9037 /* If the mode does not support any displacement always return false.
9038 Even though an index of '0' is actually always valid, it will cause
9039 troubles when e.g. a DFmode move is split into two SFmode moves,
9040 where one SFmode move will have index '0' and the other move will
9041 have index '4'. */
9042 if (!allow_zero && max_disp < 1)
9043 return false;
9044
9045 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
9046 }
9047 }
9048
9049 /* Recognize an RTL expression that is a valid memory address for
9050 an instruction.
9051 The MODE argument is the machine mode for the MEM expression
9052 that wants to use this address.
9053 Allow REG
9054 REG+disp
9055 REG+r0
9056 REG++
9057 --REG
9058 GBR
9059 GBR+disp */
9060 static bool
sh_legitimate_address_p(machine_mode mode,rtx x,bool strict)9061 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
9062 {
9063 if (REG_P (x) && REGNO (x) == GBR_REG)
9064 return true;
9065
9066 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9067 return true;
9068 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9069 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9070 return true;
9071 else if (GET_CODE (x) == PLUS)
9072 {
9073 rtx xop0 = XEXP (x, 0);
9074 rtx xop1 = XEXP (x, 1);
9075
9076 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
9077 return gbr_displacement (xop1, mode);
9078
9079 if (GET_MODE_SIZE (mode) <= 8
9080 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9081 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
9082 return true;
9083
9084 if (GET_MODE_SIZE (mode) <= 4
9085 || (TARGET_FPU_DOUBLE && TARGET_FMOVD && mode == DFmode))
9086 {
9087 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9088 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9089 return true;
9090 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9091 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9092 return true;
9093 }
9094 }
9095
9096 return false;
9097 }
9098
9099 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9100 isn't protected by a PIC unspec. */
9101 bool
nonpic_symbol_mentioned_p(rtx x)9102 nonpic_symbol_mentioned_p (rtx x)
9103 {
9104 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9105 || GET_CODE (x) == PC)
9106 return true;
9107
9108 /* We don't want to look into the possible MEM location of a
9109 CONST_DOUBLE, since we're not going to use it, in general. */
9110 if (GET_CODE (x) == CONST_DOUBLE)
9111 return false;
9112
9113 if (GET_CODE (x) == UNSPEC
9114 && (XINT (x, 1) == UNSPEC_PIC
9115 || XINT (x, 1) == UNSPEC_GOT
9116 || XINT (x, 1) == UNSPEC_GOTOFF
9117 || XINT (x, 1) == UNSPEC_GOTPLT
9118 || XINT (x, 1) == UNSPEC_GOTTPOFF
9119 || XINT (x, 1) == UNSPEC_DTPOFF
9120 || XINT (x, 1) == UNSPEC_TPOFF
9121 || XINT (x, 1) == UNSPEC_PLT
9122 || XINT (x, 1) == UNSPEC_PCREL
9123 || XINT (x, 1) == UNSPEC_SYMOFF
9124 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
9125 || XINT (x, 1) == UNSPEC_GOTFUNCDESC
9126 || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
9127 return false;
9128
9129 const char* fmt = GET_RTX_FORMAT (GET_CODE (x));
9130 for (int i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9131 {
9132 if (fmt[i] == 'E')
9133 {
9134 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
9135 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9136 return true;
9137 }
9138 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9139 return true;
9140 }
9141
9142 return false;
9143 }
9144
9145 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9146 @GOTOFF in `reg'. */
9147 rtx
legitimize_pic_address(rtx orig,machine_mode mode ATTRIBUTE_UNUSED,rtx reg)9148 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg)
9149 {
9150 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9151 return orig;
9152
9153 if (GET_CODE (orig) == LABEL_REF
9154 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9155 {
9156 if (reg == NULL_RTX)
9157 reg = gen_reg_rtx (Pmode);
9158
9159 if (TARGET_FDPIC
9160 && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig))
9161 {
9162 /* Weak functions may be NULL which doesn't work with
9163 GOTOFFFUNCDESC because the runtime offset is not known. */
9164 if (SYMBOL_REF_WEAK (orig))
9165 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9166 else
9167 emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
9168 }
9169 else if (TARGET_FDPIC
9170 && (GET_CODE (orig) == LABEL_REF
9171 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig)
9172 && (TREE_READONLY (SYMBOL_REF_DECL (orig))
9173 || SYMBOL_REF_EXTERNAL_P (orig)
9174 || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig))))))
9175 /* In FDPIC, GOTOFF can only be used for writable data. */
9176 emit_insn (gen_symGOT2reg (reg, orig));
9177 else
9178 emit_insn (gen_symGOTOFF2reg (reg, orig));
9179 return reg;
9180 }
9181 else if (GET_CODE (orig) == SYMBOL_REF)
9182 {
9183 if (reg == NULL_RTX)
9184 reg = gen_reg_rtx (Pmode);
9185
9186 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
9187 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9188 else
9189 emit_insn (gen_symGOT2reg (reg, orig));
9190 return reg;
9191 }
9192 return orig;
9193 }
9194
9195 /* Given a (logical) mode size and an offset in bytes, try to find a the
9196 appropriate displacement value for a mov insn. On SH the displacements
9197 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
9198 15 bytes in QImode. To compensate this we create a new base address by
9199 adding an adjustment value to it.
9200
9201 If the originally requested offset is greater than 127 we prefer using
9202 values 124..127 over 128..131 to increase opportunities to use the
9203 add #imm, Rn insn.
9204
9205 In some cases it is possible that a requested offset might seem unaligned
9206 or inappropriate for the mode size, like offset = 2 and mode size = 4.
9207 This is compensated by adjusting the base address so that the effective
9208 address of the displacement move insn will be aligned.
9209
9210 This is not the best possible way of rebasing the base address, as it
9211 does not look at other present displacement addressings around it.
9212 In some cases this can create more base address adjustments than would
9213 actually be necessary. */
9214 struct disp_adjust
9215 {
9216 rtx offset_adjust;
9217 rtx mov_disp;
9218 };
9219
9220 static struct disp_adjust
sh_find_mov_disp_adjust(machine_mode mode,HOST_WIDE_INT offset)9221 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
9222 {
9223 struct disp_adjust res = { NULL_RTX, NULL_RTX };
9224
9225 /* Do not try to use SH2A's large displacements here, because this would
9226 effectively disable the small displacement insns. */
9227 const int mode_sz = GET_MODE_SIZE (mode);
9228 const int mov_insn_sz = mov_insn_size (mode, false);
9229 const int max_disp = sh_max_mov_insn_displacement (mode, false);
9230 const int max_disp_next = max_disp + mov_insn_sz;
9231 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
9232 HOST_WIDE_INT offset_adjust;
9233
9234 /* In some cases this actually does happen and we must check for it. */
9235 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
9236 return res;
9237
9238 /* Keeps the previous behavior for QImode displacement addressing.
9239 This just decides how the offset is re-based. Removing this special
9240 case will result in slightly bigger code on average, but it's not that
9241 bad actually. */
9242 if (mov_insn_sz == 1)
9243 align_modifier = 0;
9244
9245 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
9246
9247 if (mode_sz + offset - offset_adjust <= max_disp_next)
9248 {
9249 res.offset_adjust = GEN_INT (offset_adjust);
9250 res.mov_disp = GEN_INT (offset - offset_adjust);
9251 }
9252
9253 return res;
9254 }
9255
9256 /* Try to modify an illegitimate address and make it legitimate.
9257 If we find one, return the new, valid address.
9258 Otherwise, return the original address. */
9259 static rtx
sh_legitimize_address(rtx x,rtx oldx,machine_mode mode)9260 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9261 {
9262 if (flag_pic)
9263 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9264
9265 if ((TARGET_FPU_DOUBLE && mode == DFmode)
9266 || (TARGET_SH2E && mode == SFmode))
9267 return x;
9268
9269 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
9270 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
9271 {
9272 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
9273 INTVAL (XEXP (x, 1)));
9274
9275 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9276 {
9277 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9278 adj.offset_adjust, NULL_RTX, 0,
9279 OPTAB_LIB_WIDEN);
9280 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9281 }
9282 }
9283 return x;
9284 }
9285
9286 /* Attempt to replace *p, which is an address that needs reloading, with
9287 a valid memory address for an operand of mode MODE.
9288 Like for sh_legitimize_address, for the SH we try to get a normal form
9289 of the address. That will allow inheritance of the address reloads. */
9290 bool
sh_legitimize_reload_address(rtx * p,machine_mode mode,int opnum,int itype)9291 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9292 int itype)
9293 {
9294 enum reload_type type = (enum reload_type) itype;
9295 const int mode_sz = GET_MODE_SIZE (mode);
9296
9297 if (sh_lra_p ())
9298 return false;
9299
9300 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
9301 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true))
9302 {
9303 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
9304 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
9305
9306 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9307 {
9308 push_reload (*p, NULL_RTX, p, NULL,
9309 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9310 return true;
9311 }
9312
9313 if (TARGET_SH2E && mode == SFmode)
9314 {
9315 *p = copy_rtx (*p);
9316 push_reload (*p, NULL_RTX, p, NULL,
9317 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9318 return true;
9319 }
9320
9321 /* FIXME: Do not allow to legitimize QImode and HImode displacement
9322 moves because then reload has a problem figuring the constraint
9323 that the move insn target/source reg must be R0.
9324 Or maybe some handling is wrong in sh_secondary_reload for this
9325 to work properly? */
9326 if ((mode_sz == 4 || mode_sz == 8)
9327 && ! (TARGET_SH4 && mode == DFmode)
9328 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9329 {
9330 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
9331 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9332 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9333 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9334 return true;
9335 }
9336 }
9337
9338 /* We must re-recognize what we created before. */
9339 if (GET_CODE (*p) == PLUS
9340 && (mode_sz == 4 || mode_sz == 8)
9341 && GET_CODE (XEXP (*p, 0)) == PLUS
9342 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9343 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9344 && CONST_INT_P (XEXP (*p, 1))
9345 && ! (TARGET_SH2E && mode == SFmode))
9346 {
9347 /* Because this address is so complex, we know it must have
9348 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9349 it is already unshared, and needs no further unsharing. */
9350 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9351 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9352 return true;
9353 }
9354
9355 return false;
9356 }
9357
9358 /* In the name of slightly smaller debug output, and to cater to
9359 general assembler lossage, recognize various UNSPEC sequences
9360 and turn them back into a direct symbol reference. */
9361 static rtx
sh_delegitimize_address(rtx orig_x)9362 sh_delegitimize_address (rtx orig_x)
9363 {
9364 orig_x = delegitimize_mem_from_attrs (orig_x);
9365
9366 rtx x = orig_x;
9367 if (MEM_P (x))
9368 x = XEXP (x, 0);
9369 if (GET_CODE (x) == CONST)
9370 {
9371 rtx y = XEXP (x, 0);
9372 if (GET_CODE (y) == UNSPEC)
9373 {
9374 if (XINT (y, 1) == UNSPEC_GOT
9375 || XINT (y, 1) == UNSPEC_GOTOFF
9376 || XINT (y, 1) == UNSPEC_SYMOFF)
9377 return XVECEXP (y, 0, 0);
9378 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
9379 {
9380 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
9381 {
9382 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
9383
9384 if (GET_CODE (symplt) == UNSPEC
9385 && (XINT (symplt, 1) == UNSPEC_PLT
9386 || XINT (symplt, 1) == UNSPEC_PCREL))
9387 return XVECEXP (symplt, 0, 0);
9388 }
9389 }
9390 }
9391 }
9392
9393 return orig_x;
9394 }
9395
9396 /* Mark the use of a constant in the literal table. If the constant
9397 has multiple labels, make it unique. */
9398 static rtx
mark_constant_pool_use(rtx x)9399 mark_constant_pool_use (rtx x)
9400 {
9401 if (x == NULL_RTX)
9402 return x;
9403
9404 switch (GET_CODE (x))
9405 {
9406 case LABEL_REF:
9407 x = XEXP (x, 0);
9408 case CODE_LABEL:
9409 break;
9410 default:
9411 return x;
9412 }
9413
9414 /* Get the first label in the list of labels for the same constant
9415 and delete another labels in the list. */
9416 rtx_insn* lab = as_a <rtx_insn*> (x);
9417 for (rtx_insn* insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
9418 {
9419 if (!LABEL_P (insn)
9420 || LABEL_REFS (insn) != NEXT_INSN (insn))
9421 break;
9422 lab = insn;
9423 }
9424
9425 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9426 as_a<rtx_insn *> (insn)->set_deleted ();
9427
9428 /* Mark constants in a window. */
9429 for (rtx_insn* insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn;
9430 insn = NEXT_INSN (insn))
9431 {
9432 if (!NONJUMP_INSN_P (insn))
9433 continue;
9434
9435 rtx pattern = PATTERN (insn);
9436 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9437 continue;
9438
9439 switch (XINT (pattern, 1))
9440 {
9441 case UNSPECV_CONST2:
9442 case UNSPECV_CONST4:
9443 case UNSPECV_CONST8:
9444 XVECEXP (pattern, 0, 1) = const1_rtx;
9445 break;
9446 case UNSPECV_WINDOW_END:
9447 if (XVECEXP (pattern, 0, 0) == x)
9448 return lab;
9449 break;
9450 case UNSPECV_CONST_END:
9451 return lab;
9452 default:
9453 break;
9454 }
9455 }
9456
9457 return lab;
9458 }
9459
9460 /* Return true if it's possible to redirect BRANCH1 to the destination
9461 of an unconditional jump BRANCH2. We only want to do this if the
9462 resulting branch will have a short displacement. */
9463 static bool
sh_can_follow_jump(const rtx_insn * branch1,const rtx_insn * branch2)9464 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
9465 {
9466 /* Don't follow if BRANCH2 is possible to be a jump crossing between
9467 hot and cold partitions. */
9468 if (flag_reorder_blocks_and_partition
9469 && simplejump_p (branch2)
9470 && CROSSING_JUMP_P (branch2))
9471 return false;
9472
9473 if (flag_expensive_optimizations && simplejump_p (branch2))
9474 {
9475 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9476 rtx_insn *insn;
9477 int distance;
9478
9479 for (distance = 0, insn = NEXT_INSN (branch1);
9480 insn && distance < 256;
9481 insn = PREV_INSN (insn))
9482 {
9483 if (insn == dest)
9484 return true;
9485 else
9486 distance += get_attr_length (insn);
9487 }
9488 for (distance = 0, insn = NEXT_INSN (branch1);
9489 insn && distance < 256;
9490 insn = NEXT_INSN (insn))
9491 {
9492 if (insn == dest)
9493 return true;
9494 else
9495 distance += get_attr_length (insn);
9496 }
9497 }
9498 return false;
9499 }
9500
9501 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9502 bool
sh_hard_regno_rename_ok(unsigned int old_reg ATTRIBUTE_UNUSED,unsigned int new_reg)9503 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9504 unsigned int new_reg)
9505 {
9506 /* Interrupt functions can only use registers that have already been
9507 saved by the prologue, even if they would normally be
9508 call-clobbered. */
9509 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9510 return false;
9511
9512 return true;
9513 }
9514
9515 /* Function to update the integer COST
9516 based on the relationship between INSN that is dependent on
9517 DEP_INSN through the dependence LINK. The default is to make no
9518 adjustment to COST. This can be used for example to specify to
9519 the scheduler that an output- or anti-dependence does not incur
9520 the same cost as a data-dependence. The return value should be
9521 the new value for COST. */
9522 static int
sh_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)9523 sh_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
9524 unsigned int)
9525 {
9526 rtx reg, use_pat;
9527
9528 if (dep_type == 0)
9529 {
9530 if (recog_memoized (insn) < 0
9531 || recog_memoized (dep_insn) < 0)
9532 return cost;
9533
9534 rtx dep_set = single_set (dep_insn);
9535
9536 /* The latency that we specify in the scheduling description refers
9537 to the actual output, not to an auto-increment register; for that,
9538 the latency is one. */
9539 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9540 {
9541 rtx set = single_set (insn);
9542
9543 if (set
9544 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9545 && (!MEM_P (SET_DEST (set))
9546 || !reg_mentioned_p (SET_DEST (dep_set),
9547 XEXP (SET_DEST (set), 0))))
9548 cost = 1;
9549 }
9550 /* The only input for a call that is timing-critical is the
9551 function's address. */
9552 if (CALL_P (insn))
9553 {
9554 rtx call = get_call_rtx_from (insn);
9555 if (call
9556 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9557 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9558 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9559 cost -= TARGET_SH4_300 ? 3 : 6;
9560 }
9561 /* Likewise, the most timing critical input for an sfuncs call
9562 is the function address. However, sfuncs typically start
9563 using their arguments pretty quickly.
9564 Assume a four cycle delay for SH4 before they are needed.
9565 Cached ST40-300 calls are quicker, so assume only a one
9566 cycle delay there.
9567 ??? Maybe we should encode the delays till input registers
9568 are needed by sfuncs into the sfunc call insn. */
9569 /* All sfunc calls are parallels with at least four components.
9570 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9571 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9572 && XVECLEN (PATTERN (insn), 0) >= 4
9573 && (reg = sfunc_uses_reg (insn)))
9574 {
9575 if (! reg_set_p (reg, dep_insn))
9576 cost -= TARGET_SH4_300 ? 1 : 4;
9577 }
9578 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9579 {
9580 attr_type dep_type = get_attr_type (dep_insn);
9581 attr_type type;
9582 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9583 cost--;
9584 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9585 && (type = get_attr_type (insn)) != TYPE_CALL
9586 && type != TYPE_SFUNC)
9587 cost--;
9588 /* When the preceding instruction loads the shift amount of
9589 the following SHAD/SHLD, the latency of the load is increased
9590 by 1 cycle. */
9591 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9592 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9593 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9594 XEXP (SET_SRC (single_set (insn)),
9595 1)))
9596 cost++;
9597 /* When an LS group instruction with a latency of less than
9598 3 cycles is followed by a double-precision floating-point
9599 instruction, FIPR, or FTRV, the latency of the first
9600 instruction is increased to 3 cycles. */
9601 else if (cost < 3
9602 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9603 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9604 cost = 3;
9605 /* The lsw register of a double-precision computation is ready one
9606 cycle earlier. */
9607 else if (reload_completed
9608 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9609 && (use_pat = single_set (insn))
9610 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9611 SET_SRC (use_pat)))
9612 cost -= 1;
9613
9614 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9615 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9616 cost -= 1;
9617 }
9618 else if (TARGET_SH4_300)
9619 {
9620 /* Stores need their input register two cycles later. */
9621 attr_type type;
9622 if (dep_set && cost >= 1
9623 && ((type = get_attr_type (insn)) == TYPE_STORE
9624 || type == TYPE_PSTORE
9625 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9626 {
9627 rtx set = single_set (insn);
9628
9629 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9630 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9631 {
9632 cost -= 2;
9633 /* But don't reduce the cost below 1 if the address depends
9634 on a side effect of dep_insn. */
9635 if (cost < 1
9636 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9637 cost = 1;
9638 }
9639 }
9640 }
9641 }
9642 /* An anti-dependence penalty of two applies if the first insn is a double
9643 precision fadd / fsub / fmul. */
9644 else if (!TARGET_SH4_300
9645 && dep_type == REG_DEP_ANTI
9646 && recog_memoized (dep_insn) >= 0
9647 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9648 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9649 /* A lot of alleged anti-flow dependences are fake,
9650 so check this one is real. */
9651 && flow_dependent_p (dep_insn, insn))
9652 cost = 2;
9653
9654 return cost;
9655 }
9656
9657 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9658 if DEP_INSN is anti-flow dependent on INSN. */
9659 static bool
flow_dependent_p(rtx insn,rtx dep_insn)9660 flow_dependent_p (rtx insn, rtx dep_insn)
9661 {
9662 rtx tmp = PATTERN (insn);
9663
9664 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9665 return tmp == NULL_RTX;
9666 }
9667
9668 /* A helper function for flow_dependent_p called through note_stores. */
9669 static void
flow_dependent_p_1(rtx x,const_rtx pat ATTRIBUTE_UNUSED,void * data)9670 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9671 {
9672 rtx * pinsn = (rtx *) data;
9673
9674 if (*pinsn && reg_referenced_p (x, *pinsn))
9675 *pinsn = NULL_RTX;
9676 }
9677
9678 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9679 'special function' patterns (type sfunc) that clobber pr, but that
9680 do not look like function calls to leaf_function_p. Hence we must
9681 do this extra check. */
9682 static int
sh_pr_n_sets(void)9683 sh_pr_n_sets (void)
9684 {
9685 return DF_REG_DEF_COUNT (PR_REG);
9686 }
9687
9688 /* Return where to allocate pseudo for a given hard register initial
9689 value. */
9690 static rtx
sh_allocate_initial_value(rtx hard_reg)9691 sh_allocate_initial_value (rtx hard_reg)
9692 {
9693 if (REGNO (hard_reg) == PR_REG)
9694 {
9695 if (crtl->is_leaf && ! sh_pr_n_sets ())
9696 return hard_reg;
9697 else
9698 return gen_frame_mem (Pmode, return_address_pointer_rtx);
9699 }
9700
9701 return NULL_RTX;
9702 }
9703
9704 /* This function returns "2" to indicate dual issue for the SH4
9705 processor. To be used by the DFA pipeline description. */
9706 static int
sh_issue_rate(void)9707 sh_issue_rate (void)
9708 {
9709 if (TARGET_SUPERSCALAR)
9710 return 2;
9711 else
9712 return 1;
9713 }
9714
9715 /* Functions for ready queue reordering for sched1. */
9716
9717 /* Get weight for mode for a set x. */
9718 static short
find_set_regmode_weight(rtx x,machine_mode mode)9719 find_set_regmode_weight (rtx x, machine_mode mode)
9720 {
9721 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9722 return 1;
9723 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9724 {
9725 if (REG_P (SET_DEST (x)))
9726 {
9727 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9728 return 1;
9729 else
9730 return 0;
9731 }
9732 return 1;
9733 }
9734 return 0;
9735 }
9736
9737 /* Get regmode weight for insn. */
9738 static short
find_insn_regmode_weight(rtx insn,machine_mode mode)9739 find_insn_regmode_weight (rtx insn, machine_mode mode)
9740 {
9741 /* Increment weight for each register born here. */
9742 rtx x = PATTERN (insn);
9743 short reg_weight = find_set_regmode_weight (x, mode);
9744 if (GET_CODE (x) == PARALLEL)
9745 {
9746 int j;
9747 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9748 {
9749 x = XVECEXP (PATTERN (insn), 0, j);
9750 reg_weight += find_set_regmode_weight (x, mode);
9751 }
9752 }
9753 /* Decrement weight for each register that dies here. */
9754 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9755 {
9756 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9757 {
9758 rtx note = XEXP (x, 0);
9759 if (REG_P (note) && GET_MODE (note) == mode)
9760 reg_weight--;
9761 }
9762 }
9763 return reg_weight;
9764 }
9765
9766 /* Calculate regmode weights for all insns of a basic block. */
9767 static void
find_regmode_weight(basic_block b,machine_mode mode)9768 find_regmode_weight (basic_block b, machine_mode mode)
9769 {
9770 rtx_insn *insn, *next_tail, *head, *tail;
9771
9772 get_ebb_head_tail (b, b, &head, &tail);
9773 next_tail = NEXT_INSN (tail);
9774
9775 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9776 {
9777 /* Handle register life information. */
9778 if (!INSN_P (insn))
9779 continue;
9780
9781 if (mode == SFmode)
9782 INSN_REGMODE_WEIGHT (insn, mode) =
9783 find_insn_regmode_weight (insn, mode)
9784 + 2 * find_insn_regmode_weight (insn, DFmode);
9785 else if (mode == SImode)
9786 INSN_REGMODE_WEIGHT (insn, mode) =
9787 find_insn_regmode_weight (insn, mode)
9788 + 2 * find_insn_regmode_weight (insn, DImode);
9789 }
9790 }
9791
9792 /* Comparison function for ready queue sorting. */
9793 static int
rank_for_reorder(const void * x,const void * y)9794 rank_for_reorder (const void *x, const void *y)
9795 {
9796 rtx_insn *tmp = *(rtx_insn * const *) y;
9797 rtx_insn *tmp2 = *(rtx_insn * const *) x;
9798
9799 /* The insn in a schedule group should be issued the first. */
9800 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9801 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9802
9803 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9804 minimizes instruction movement, thus minimizing sched's effect on
9805 register pressure. */
9806 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9807 }
9808
9809 /* Resort the array A in which only element at index N may be out of order. */
9810 static void
swap_reorder(rtx_insn ** a,int n)9811 swap_reorder (rtx_insn **a, int n)
9812 {
9813 rtx_insn *insn = a[n - 1];
9814 int i = n - 2;
9815
9816 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9817 {
9818 a[i + 1] = a[i];
9819 i -= 1;
9820 }
9821 a[i + 1] = insn;
9822 }
9823
9824 /* Sort the ready list by ascending priority. */
9825 static void
ready_reorder(rtx_insn ** ready,int nready)9826 ready_reorder (rtx_insn **ready, int nready)
9827 {
9828 if (nready == 2)
9829 swap_reorder (ready, nready);
9830 else if (nready > 2)
9831 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
9832 }
9833
9834 /* Count life regions of r0 for a block. */
9835 static int
find_r0_life_regions(basic_block b)9836 find_r0_life_regions (basic_block b)
9837 {
9838 bool live;
9839 int set;
9840 int death = 0;
9841
9842 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9843 {
9844 set = 1;
9845 live = true;
9846 }
9847 else
9848 {
9849 set = 0;
9850 live = false;
9851 }
9852
9853 rtx_insn* insn = BB_HEAD (b);
9854 rtx_insn* end = BB_END (b);
9855 rtx r0_reg = gen_rtx_REG (SImode, R0_REG);
9856 while (1)
9857 {
9858 if (INSN_P (insn))
9859 {
9860 if (find_regno_note (insn, REG_DEAD, R0_REG))
9861 {
9862 death++;
9863 live = false;
9864 }
9865
9866 rtx pset;
9867 if (!live
9868 && (pset = single_set (insn))
9869 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9870 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9871 {
9872 set++;
9873 live = true;
9874 }
9875 }
9876 if (insn == end)
9877 break;
9878 insn = NEXT_INSN (insn);
9879 }
9880 return set - death;
9881 }
9882
9883 /* Calculate regmode weights for all insns of all basic block. */
9884 static void
sh_md_init_global(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int old_max_uid)9885 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9886 int verbose ATTRIBUTE_UNUSED,
9887 int old_max_uid)
9888 {
9889 basic_block b;
9890
9891 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9892 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9893 r0_life_regions = 0;
9894
9895 FOR_EACH_BB_REVERSE_FN (b, cfun)
9896 {
9897 find_regmode_weight (b, SImode);
9898 find_regmode_weight (b, SFmode);
9899 if (!reload_completed)
9900 r0_life_regions += find_r0_life_regions (b);
9901 }
9902
9903 CURR_REGMODE_PRESSURE (SImode) = 0;
9904 CURR_REGMODE_PRESSURE (SFmode) = 0;
9905 }
9906
9907 /* Cleanup. */
9908 static void
sh_md_finish_global(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED)9909 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9910 int verbose ATTRIBUTE_UNUSED)
9911 {
9912 if (regmode_weight[0])
9913 {
9914 free (regmode_weight[0]);
9915 regmode_weight[0] = NULL;
9916 }
9917 if (regmode_weight[1])
9918 {
9919 free (regmode_weight[1]);
9920 regmode_weight[1] = NULL;
9921 }
9922 }
9923
9924 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9925 keep count of register pressures on SImode and SFmode. */
9926 static int
sh_variable_issue(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn * insn,int can_issue_more)9927 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9928 int sched_verbose ATTRIBUTE_UNUSED,
9929 rtx_insn *insn,
9930 int can_issue_more)
9931 {
9932 if (GET_CODE (PATTERN (insn)) != USE
9933 && GET_CODE (PATTERN (insn)) != CLOBBER)
9934 cached_can_issue_more = can_issue_more - 1;
9935 else
9936 cached_can_issue_more = can_issue_more;
9937
9938 if (reload_completed)
9939 return cached_can_issue_more;
9940
9941 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9942 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9943
9944 return cached_can_issue_more;
9945 }
9946
9947 static void
sh_md_init(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int veclen ATTRIBUTE_UNUSED)9948 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9949 int verbose ATTRIBUTE_UNUSED,
9950 int veclen ATTRIBUTE_UNUSED)
9951 {
9952 CURR_REGMODE_PRESSURE (SImode) = 0;
9953 CURR_REGMODE_PRESSURE (SFmode) = 0;
9954 }
9955
9956 /* Some magic numbers. */
9957 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9958 functions that already have high pressure on r0. */
9959 #define R0_MAX_LIFE_REGIONS 2
9960 /* Register Pressure thresholds for SImode and SFmode registers. */
9961 #define SIMODE_MAX_WEIGHT 5
9962 #define SFMODE_MAX_WEIGHT 10
9963
9964 /* Return true if the pressure is high for MODE. */
9965 static bool
high_pressure(machine_mode mode)9966 high_pressure (machine_mode mode)
9967 {
9968 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9969 functions that already have high pressure on r0. */
9970 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9971 return true;
9972
9973 if (mode == SFmode)
9974 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9975 else
9976 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9977 }
9978
9979 /* Reorder ready queue if register pressure is high. */
9980 static int
sh_reorder(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn ** ready,int * n_readyp,int clock_var ATTRIBUTE_UNUSED)9981 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9982 int sched_verbose ATTRIBUTE_UNUSED,
9983 rtx_insn **ready,
9984 int *n_readyp,
9985 int clock_var ATTRIBUTE_UNUSED)
9986 {
9987 if (reload_completed)
9988 return sh_issue_rate ();
9989
9990 if (high_pressure (SFmode) || high_pressure (SImode))
9991 {
9992 ready_reorder (ready, *n_readyp);
9993 }
9994
9995 return sh_issue_rate ();
9996 }
9997
9998 /* Skip cycles if the current register pressure is high. */
9999 static int
sh_reorder2(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn ** ready ATTRIBUTE_UNUSED,int * n_readyp ATTRIBUTE_UNUSED,int clock_var ATTRIBUTE_UNUSED)10000 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
10001 int sched_verbose ATTRIBUTE_UNUSED,
10002 rtx_insn **ready ATTRIBUTE_UNUSED,
10003 int *n_readyp ATTRIBUTE_UNUSED,
10004 int clock_var ATTRIBUTE_UNUSED)
10005 {
10006 if (reload_completed)
10007 return cached_can_issue_more;
10008
10009 if (high_pressure(SFmode) || high_pressure (SImode))
10010 skip_cycles = 1;
10011
10012 return cached_can_issue_more;
10013 }
10014
10015 /* Skip cycles without sorting the ready queue. This will move insn from
10016 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10017 queue by sh_reorder. */
10018
10019 /* Generally, skipping these many cycles are sufficient for all insns to move
10020 from Q -> R. */
10021 #define MAX_SKIPS 8
10022
10023 static int
sh_dfa_new_cycle(FILE * sched_dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn * insn ATTRIBUTE_UNUSED,int last_clock_var,int clock_var,int * sort_p)10024 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10025 int sched_verbose ATTRIBUTE_UNUSED,
10026 rtx_insn *insn ATTRIBUTE_UNUSED,
10027 int last_clock_var,
10028 int clock_var,
10029 int *sort_p)
10030 {
10031 if (reload_completed)
10032 return 0;
10033
10034 if (skip_cycles)
10035 {
10036 if ((clock_var - last_clock_var) < MAX_SKIPS)
10037 {
10038 *sort_p = 0;
10039 return 1;
10040 }
10041 /* If this is the last cycle we are skipping, allow reordering of R. */
10042 if ((clock_var - last_clock_var) == MAX_SKIPS)
10043 {
10044 *sort_p = 1;
10045 return 1;
10046 }
10047 }
10048
10049 skip_cycles = 0;
10050
10051 return 0;
10052 }
10053
10054 static bool
sh_ms_bitfield_layout_p(const_tree record_type ATTRIBUTE_UNUSED)10055 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10056 {
10057 return TARGET_HITACHI || sh_attr_renesas_p (record_type);
10058 }
10059
10060 /*
10061 On the SH1..SH4, the trampoline looks like
10062 2 0002 D202 mov.l l2,r2
10063 1 0000 D301 mov.l l1,r3
10064 3 0004 422B jmp @r2
10065 4 0006 0009 nop
10066 5 0008 00000000 l1: .long area
10067 6 000c 00000000 l2: .long function
10068
10069 FDPIC needs a form that includes a function descriptor and
10070 code to load the GOT register:
10071 0 0000 00000000 .long l0
10072 1 0004 00000000 .long gotval
10073 2 0008 D302 l0: mov.l l1,r3
10074 3 000a D203 mov.l l2,r2
10075 4 000c 6122 mov.l @r2,r1
10076 5 000e 5C21 mov.l @(4,r2),r12
10077 6 0010 412B jmp @r1
10078 7 0012 0009 nop
10079 8 0014 00000000 l1: .long area
10080 9 0018 00000000 l2: .long function
10081
10082 SH5 (compact) uses r1 instead of r3 for the static chain. */
10083
10084 /* Emit insns to store a value at memory address + offset. */
10085 static void
sh_emit_storesi(rtx addr,HOST_WIDE_INT offset,rtx value)10086 sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value)
10087 {
10088 gcc_assert ((offset & 3) == 0);
10089 emit_move_insn (offset == 0
10090 ? change_address (addr, SImode, NULL_RTX)
10091 : adjust_address (addr, SImode, offset), value);
10092 }
10093
10094 /* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2. */
10095 static void
sh_emit_storehi(rtx addr,HOST_WIDE_INT offset,uint16_t w0,uint16_t w1)10096 sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1)
10097 {
10098 sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN
10099 ? (w0 | (w1 << 16))
10100 : (w1 | (w0 << 16)), SImode));
10101 }
10102
10103 /* Emit RTL insns to initialize the variable parts of a trampoline.
10104 FNADDR is an RTX for the address of the function's pure code.
10105 CXT is an RTX for the static chain value for the function. */
10106 static void
sh_trampoline_init(rtx tramp_mem,tree fndecl,rtx cxt)10107 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10108 {
10109 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10110 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10111
10112 if (TARGET_FDPIC)
10113 {
10114 rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
10115
10116 sh_emit_storesi (tramp_mem, 0, a);
10117 sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ());
10118
10119 sh_emit_storehi (tramp_mem, 8, 0xd302, 0xd203);
10120 sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21);
10121 sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009);
10122
10123 sh_emit_storesi (tramp_mem, 20, cxt);
10124 sh_emit_storesi (tramp_mem, 24, fnaddr);
10125 }
10126 else
10127 {
10128 sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301);
10129 sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009);
10130
10131 sh_emit_storesi (tramp_mem, 8, cxt);
10132 sh_emit_storesi (tramp_mem, 12, fnaddr);
10133 }
10134 if (TARGET_HARD_SH4)
10135 {
10136 if (!TARGET_INLINE_IC_INVALIDATE
10137 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
10138 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10139 FUNCTION_ORDINARY).sym,
10140 LCT_NORMAL, VOIDmode, tramp, SImode);
10141 else
10142 emit_insn (gen_ic_invalidate_line (tramp));
10143 }
10144 }
10145
10146 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10147 static rtx
sh_trampoline_adjust_address(rtx tramp)10148 sh_trampoline_adjust_address (rtx tramp)
10149 {
10150 return tramp;
10151 }
10152
10153 /* If PIC, we cannot make sibling calls to global functions
10154 because the PLT requires r12 to be live. */
10155 static bool
sh_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)10156 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10157 {
10158 return (1
10159 && ! sh_cfun_interrupt_handler_p ()
10160 && (! flag_pic || TARGET_FDPIC
10161 || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
10162 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10163 }
10164
10165 /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */
10166 void
sh_expand_sym_label2reg(rtx reg,rtx sym,rtx lab,bool sibcall_p)10167 sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
10168 {
10169 const_tree decl = SYMBOL_REF_DECL (sym);
10170 bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
10171
10172 if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
10173 emit_insn (gen_sym_label2reg (reg, sym, lab));
10174 else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
10175 emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
10176 else
10177 emit_insn (gen_symPLT_label2reg (reg, sym, lab));
10178 }
10179
10180 /* Machine specific built-in functions. */
10181
10182 struct builtin_description
10183 {
10184 bool (* const is_enabled) (void);
10185 const enum insn_code icode;
10186 const char *const name;
10187 int signature;
10188 tree fndecl;
10189 };
10190
10191 /* This function can be used if there are any built-ins that are not for
10192 SHmedia. It's commented out to avoid the defined-but-unused warning. */
10193 static bool
sh1_builtin_p(void)10194 sh1_builtin_p (void)
10195 {
10196 return TARGET_SH1;
10197 }
10198
10199 /* describe number and signedness of arguments; arg[0] == result
10200 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10201 /* 9: 64-bit pointer, 10: 32-bit pointer */
10202 static const char signature_args[][4] =
10203 {
10204 #define SH_BLTIN_V2SI2 0
10205 { 4, 4 },
10206 #define SH_BLTIN_V4HI2 1
10207 { 4, 4 },
10208 #define SH_BLTIN_V2SI3 2
10209 { 4, 4, 4 },
10210 #define SH_BLTIN_V4HI3 3
10211 { 4, 4, 4 },
10212 #define SH_BLTIN_V8QI3 4
10213 { 4, 4, 4 },
10214 #define SH_BLTIN_MAC_HISI 5
10215 { 1, 4, 4, 1 },
10216 #define SH_BLTIN_SH_HI 6
10217 { 4, 4, 1 },
10218 #define SH_BLTIN_SH_SI 7
10219 { 4, 4, 1 },
10220 #define SH_BLTIN_V4HI2V2SI 8
10221 { 4, 4, 4 },
10222 #define SH_BLTIN_V4HI2V8QI 9
10223 { 4, 4, 4 },
10224 #define SH_BLTIN_SISF 10
10225 { 4, 2 },
10226 #define SH_BLTIN_LDUA_L 11
10227 { 2, 10 },
10228 #define SH_BLTIN_LDUA_Q 12
10229 { 1, 10 },
10230 #define SH_BLTIN_STUA_L 13
10231 { 0, 10, 2 },
10232 #define SH_BLTIN_STUA_Q 14
10233 { 0, 10, 1 },
10234 #define SH_BLTIN_LDUA_L64 15
10235 { 2, 9 },
10236 #define SH_BLTIN_LDUA_Q64 16
10237 { 1, 9 },
10238 #define SH_BLTIN_STUA_L64 17
10239 { 0, 9, 2 },
10240 #define SH_BLTIN_STUA_Q64 18
10241 { 0, 9, 1 },
10242 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10243 #define SH_BLTIN_2 19
10244 #define SH_BLTIN_SU 19
10245 { 1, 2 },
10246 #define SH_BLTIN_3 20
10247 #define SH_BLTIN_SUS 20
10248 { 2, 2, 1 },
10249 #define SH_BLTIN_PSSV 21
10250 { 0, 8, 2, 2 },
10251 #define SH_BLTIN_XXUU 22
10252 #define SH_BLTIN_UUUU 22
10253 { 1, 1, 1, 1 },
10254 #define SH_BLTIN_PV 23
10255 { 0, 8 },
10256 #define SH_BLTIN_VP 24
10257 { 8, 0 },
10258 #define SH_BLTIN_UV 25
10259 { 1, 0 },
10260 #define SH_BLTIN_VU 26
10261 { 0, 1 },
10262 };
10263 /* mcmv: operands considered unsigned. */
10264 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10265 /* mperm: control value considered unsigned int. */
10266 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10267 /* mshards_q: returns signed short. */
10268 /* nsb: takes long long arg, returns unsigned char. */
10269 static struct builtin_description bdesc[] =
10270 {
10271 { sh1_builtin_p,
10272 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
10273 { sh1_builtin_p,
10274 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
10275 };
10276
10277 static tree sh_builtin_get_fpscr;
10278 static tree sh_builtin_set_fpscr;
10279
10280 static void
sh_init_builtins(void)10281 sh_init_builtins (void)
10282 {
10283 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10284 memset (shared, 0, sizeof shared);
10285
10286 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
10287 {
10288 builtin_description* d = &bdesc[di];
10289
10290 if (!d->is_enabled ())
10291 continue;
10292
10293 tree type, arg_type = NULL_TREE;
10294 int signature = d->signature;
10295
10296 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10297 type = shared[signature];
10298 else
10299 {
10300 int has_result = signature_args[signature][0] != 0;
10301 tree args[3];
10302
10303 if (! TARGET_FPU_ANY
10304 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10305 continue;
10306 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
10307 args[i] = NULL_TREE;
10308 for (int i = 3; ; i--)
10309 {
10310 int arg = signature_args[signature][i];
10311 int opno = i - 1 + has_result;
10312
10313 if (arg & 8)
10314 arg_type = ptr_type_node;
10315 else if (arg)
10316 arg_type = (*lang_hooks.types.type_for_mode)
10317 (insn_data[d->icode].operand[opno].mode, (arg & 1));
10318 else if (i)
10319 continue;
10320 else
10321 arg_type = void_type_node;
10322 if (i == 0)
10323 break;
10324 args[i-1] = arg_type;
10325 }
10326 type = build_function_type_list (arg_type, args[0], args[1],
10327 args[2], NULL_TREE);
10328 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10329 shared[signature] = type;
10330 }
10331 d->fndecl =
10332 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10333 NULL, NULL_TREE);
10334 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
10335 if (d->icode == CODE_FOR_sts_fpscr)
10336 sh_builtin_get_fpscr = d->fndecl;
10337 else if (d->icode == CODE_FOR_set_fpscr)
10338 sh_builtin_set_fpscr = d->fndecl;
10339 }
10340 }
10341
10342 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
10343
10344 static void
sh_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)10345 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
10346 {
10347 const unsigned SH_FE_INVALID = 64;
10348 const unsigned SH_FE_DIVBYZERO = 32;
10349 const unsigned SH_FE_OVERFLOW = 16;
10350 const unsigned SH_FE_UNDERFLOW = 8;
10351 const unsigned SH_FE_INEXACT = 4;
10352 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
10353 | SH_FE_DIVBYZERO
10354 | SH_FE_OVERFLOW
10355 | SH_FE_UNDERFLOW
10356 | SH_FE_INEXACT);
10357 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
10358 tree fenv_var, mask, ld_fenv, masked_fenv;
10359 tree new_fenv_var, reload_fenv, restore_fnenv;
10360 tree update_call, atomic_feraiseexcept, hold_fnclex;
10361
10362 if (! TARGET_FPU_ANY)
10363 return;
10364
10365 /* Generate the equivalent of :
10366 unsigned int fenv_var;
10367 fenv_var = __builtin_sh_get_fpscr ();
10368
10369 unsigned int masked_fenv;
10370 masked_fenv = fenv_var & mask;
10371
10372 __builtin_sh_set_fpscr (masked_fenv); */
10373
10374 fenv_var = create_tmp_var_raw (unsigned_type_node);
10375 mask = build_int_cst (unsigned_type_node,
10376 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
10377 | SH_FE_ALL_EXCEPT));
10378 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
10379 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
10380 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
10381 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10382 fenv_var = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
10383 build2 (COMPOUND_EXPR, void_type_node, masked_fenv,
10384 ld_fenv),
10385 NULL_TREE, NULL_TREE);
10386 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, hold_fnclex);
10387
10388 /* Store the value of masked_fenv to clear the exceptions:
10389 __builtin_sh_set_fpscr (masked_fenv); */
10390
10391 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10392
10393 /* Generate the equivalent of :
10394 unsigned int new_fenv_var;
10395 new_fenv_var = __builtin_sh_get_fpscr ();
10396
10397 __builtin_sh_set_fpscr (fenv_var);
10398
10399 __atomic_feraiseexcept (new_fenv_var); */
10400
10401 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
10402 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
10403 build_call_expr (sh_builtin_get_fpscr, 0));
10404 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
10405 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
10406 update_call = build_call_expr (atomic_feraiseexcept, 1,
10407 fold_convert (integer_type_node,
10408 new_fenv_var));
10409 *update = build2 (COMPOUND_EXPR, void_type_node,
10410 build2 (COMPOUND_EXPR, void_type_node,
10411 reload_fenv, restore_fnenv), update_call);
10412 }
10413
10414 /* Implements target hook vector_mode_supported_p. */
10415 bool
sh_vector_mode_supported_p(machine_mode mode ATTRIBUTE_UNUSED)10416 sh_vector_mode_supported_p (machine_mode mode ATTRIBUTE_UNUSED)
10417 {
10418 return false;
10419 }
10420
10421 bool
sh_frame_pointer_required(void)10422 sh_frame_pointer_required (void)
10423 {
10424 /* If needed override this in other tm.h files to cope with various OS
10425 lossage requiring a frame pointer. */
10426 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10427 return true;
10428
10429 if (crtl->profile)
10430 return true;
10431
10432 return false;
10433 }
10434
10435 /* Implements target hook dwarf_calling_convention. Return an enum
10436 of dwarf_calling_convention. */
10437 int
sh_dwarf_calling_convention(const_tree func)10438 sh_dwarf_calling_convention (const_tree func)
10439 {
10440 if (sh_attr_renesas_p (func))
10441 return DW_CC_GNU_renesas_sh;
10442
10443 return DW_CC_normal;
10444 }
10445
10446 /* Returns the sh builtin decl for CODE. */
10447 static tree
sh_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)10448 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10449 {
10450 if (code >= ARRAY_SIZE (bdesc))
10451 return error_mark_node;
10452
10453 if (!bdesc[code].is_enabled ())
10454 return error_mark_node;
10455
10456 return bdesc[code].fndecl;
10457 }
10458
10459 /* Expand an expression EXP that calls a built-in function,
10460 with result going to TARGET if that's convenient
10461 (and in mode MODE if that's convenient).
10462 SUBTARGET may be used as the target for computing one of EXP's operands.
10463 IGNORE is nonzero if the value is to be ignored. */
10464 static rtx
sh_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore)10465 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10466 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10467 {
10468 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10469 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10470 const struct builtin_description *d = &bdesc[fcode];
10471 enum insn_code icode = d->icode;
10472 int signature = d->signature;
10473 int nop = 0;
10474 rtx op[4];
10475
10476 if (signature_args[signature][0])
10477 {
10478 if (ignore)
10479 return NULL_RTX;
10480
10481 machine_mode tmode = insn_data[icode].operand[0].mode;
10482 if (! target || GET_MODE (target) != tmode
10483 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10484 target = gen_reg_rtx (tmode);
10485 op[nop++] = target;
10486 }
10487 else
10488 target = NULL_RTX;
10489
10490 for (int i = 1; i <= 3; i++, nop++)
10491 {
10492 if (! signature_args[signature][i])
10493 break;
10494 tree arg = CALL_EXPR_ARG (exp, i - 1);
10495 if (arg == error_mark_node)
10496 return const0_rtx;
10497
10498 machine_mode opmode;
10499 tree optype;
10500 if (signature_args[signature][i] & 8)
10501 {
10502 opmode = ptr_mode;
10503 optype = ptr_type_node;
10504 }
10505 else
10506 {
10507 opmode = insn_data[icode].operand[nop].mode;
10508 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10509 }
10510
10511 machine_mode argmode = TYPE_MODE (TREE_TYPE (arg));
10512 if (argmode != opmode)
10513 arg = build1 (NOP_EXPR, optype, arg);
10514 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10515 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10516 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10517 }
10518
10519 rtx pat = NULL_RTX;
10520
10521 switch (nop)
10522 {
10523 case 1:
10524 pat = (*insn_data[d->icode].genfun) (op[0]);
10525 break;
10526 case 2:
10527 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10528 break;
10529 case 3:
10530 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10531 break;
10532 case 4:
10533 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10534 break;
10535 default:
10536 gcc_unreachable ();
10537 }
10538 if (! pat)
10539 return NULL_RTX;
10540 emit_insn (pat);
10541 return target;
10542 }
10543
10544 /* Implement TARGET_HARD_REGNO_NREGS. On the SH all but the XD regs are
10545 UNITS_PER_WORD bits wide. */
10546
10547 static unsigned int
sh_hard_regno_nregs(unsigned int regno,machine_mode mode)10548 sh_hard_regno_nregs (unsigned int regno, machine_mode mode)
10549 {
10550 if (XD_REGISTER_P (regno))
10551 return CEIL (GET_MODE_SIZE (mode), 2 * UNITS_PER_WORD);
10552 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
10553 }
10554
10555 /* Implement TARGET_HARD_REGNO_MODE_OK.
10556
10557 We can allow any mode in any general register. The special registers
10558 only allow SImode. Don't allow any mode in the PR.
10559
10560 We cannot hold DCmode values in the XD registers because alter_reg
10561 handles subregs of them incorrectly. We could work around this by
10562 spacing the XD registers like the DR registers, but this would require
10563 additional memory in every compilation to hold larger register vectors.
10564 We could hold SFmode / SCmode values in XD registers, but that
10565 would require a tertiary reload when reloading from / to memory,
10566 and a secondary reload to reload from / to general regs; that
10567 seems to be a losing proposition.
10568
10569 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10570 it won't be ferried through GP registers first. */
10571 static bool
sh_hard_regno_mode_ok(unsigned int regno,machine_mode mode)10572 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10573 {
10574 if (SPECIAL_REGISTER_P (regno))
10575 return mode == SImode;
10576
10577 if (regno == FPUL_REG)
10578 return (mode == SImode || mode == SFmode);
10579
10580 if (FP_REGISTER_P (regno) && mode == SFmode)
10581 return true;
10582
10583 if (mode == V2SFmode)
10584 {
10585 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10586 || GENERAL_REGISTER_P (regno)))
10587 return true;
10588 else
10589 return false;
10590 }
10591
10592 if (mode == V4SFmode)
10593 {
10594 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10595 || GENERAL_REGISTER_P (regno))
10596 return true;
10597 else
10598 return false;
10599 }
10600
10601 if (mode == V16SFmode)
10602 return regno == FIRST_XD_REG;
10603
10604 if (FP_REGISTER_P (regno))
10605 {
10606 if (mode == SFmode
10607 || mode == SImode
10608 || ((TARGET_SH2E) && mode == SCmode)
10609 || (((TARGET_FPU_DOUBLE && mode == DFmode) || mode == DCmode)
10610 && ((regno - FIRST_FP_REG) & 1) == 0)
10611 || (TARGET_SH4 && mode == TImode
10612 && ((regno - FIRST_FP_REG) & 3) == 0))
10613 return true;
10614 else
10615 return false;
10616 }
10617
10618 if (XD_REGISTER_P (regno))
10619 return mode == DFmode;
10620
10621 if (regno == PR_REG)
10622 return mode == SImode;
10623
10624 if (regno == FPSCR_REG)
10625 return mode == SImode;
10626
10627 return true;
10628 }
10629
10630 /* Implement TARGET_MODES_TIEABLE_P.
10631
10632 If TARGET_HARD_REGNO_MODE_OK could produce different values for MODE1
10633 and MODE2, for any hard reg, then this must be false for correct output.
10634 That's the case for xd registers: we don't hold SFmode values in
10635 them, so we can't tie an SFmode pseudos with one in another
10636 floating-point mode. */
10637
10638 static bool
sh_modes_tieable_p(machine_mode mode1,machine_mode mode2)10639 sh_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10640 {
10641 return (mode1 == mode2
10642 || (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
10643 && (mode1 != SFmode && mode2 != SFmode)));
10644 }
10645
10646 /* Specify the modes required to caller save a given hard regno.
10647 choose_hard_reg_mode chooses mode based on TARGET_HARD_REGNO_MODE_OK
10648 and returns ?Imode for float regs when sh_hard_regno_mode_ok
10649 permits integer modes on them. That makes LRA's split process
10650 unhappy. See PR55212.
10651 */
10652 machine_mode
sh_hard_regno_caller_save_mode(unsigned int regno,unsigned int nregs,machine_mode mode)10653 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
10654 machine_mode mode)
10655 {
10656 if (FP_REGISTER_P (regno)
10657 && (mode == SFmode
10658 || mode == SCmode
10659 || ((mode == DFmode || mode == DCmode)
10660 && ((regno - FIRST_FP_REG) & 1) == 0)))
10661 return mode;
10662
10663 return choose_hard_reg_mode (regno, nregs, false);
10664 }
10665
10666 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10667 static bool
sh_can_change_mode_class(machine_mode from,machine_mode to,reg_class_t rclass)10668 sh_can_change_mode_class (machine_mode from, machine_mode to,
10669 reg_class_t rclass)
10670 {
10671 /* We want to enable the use of SUBREGs as a means to
10672 VEC_SELECT a single element of a vector. */
10673
10674 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
10675 This can be problematic when SFmode vector subregs need to be accessed
10676 on the stack with displacement addressing, as it happens with -O0.
10677 Thus we disallow the mode change for -O0. */
10678 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10679 return optimize ? !reg_classes_intersect_p (GENERAL_REGS, rclass) : true;
10680
10681 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10682 {
10683 if (TARGET_LITTLE_ENDIAN)
10684 {
10685 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10686 return !reg_classes_intersect_p (DF_REGS, rclass);
10687 }
10688 else
10689 {
10690 if (GET_MODE_SIZE (from) < 8)
10691 return !reg_classes_intersect_p (DF_REGS, rclass);
10692 }
10693 }
10694 return true;
10695 }
10696
10697 /* Return true if registers in machine mode MODE will likely be
10698 allocated to registers in small register classes. */
10699 bool
sh_small_register_classes_for_mode_p(machine_mode mode ATTRIBUTE_UNUSED)10700 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
10701 {
10702 return true;
10703 }
10704
10705 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10706 that label is used. */
10707 void
sh_mark_label(rtx address,int nuses)10708 sh_mark_label (rtx address, int nuses)
10709 {
10710 if (GOTOFF_P (address))
10711 {
10712 /* Extract the label or symbol. */
10713 address = XEXP (address, 0);
10714 if (GET_CODE (address) == PLUS)
10715 address = XEXP (address, 0);
10716 address = XVECEXP (address, 0, 0);
10717 }
10718 if (GET_CODE (address) == LABEL_REF
10719 && LABEL_P (XEXP (address, 0)))
10720 LABEL_NUSES (XEXP (address, 0)) += nuses;
10721 }
10722
10723 /* Compute extra cost of moving data between one register class
10724 and another.
10725
10726 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10727 uses this information. Hence, the general register <-> floating point
10728 register information here is not used for SFmode. */
10729 static int
sh_register_move_cost(machine_mode mode,reg_class_t srcclass,reg_class_t dstclass)10730 sh_register_move_cost (machine_mode mode,
10731 reg_class_t srcclass, reg_class_t dstclass)
10732 {
10733 if (dstclass == T_REGS || dstclass == PR_REGS)
10734 return 10;
10735
10736 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10737 return 4;
10738
10739 if (mode == SImode && TARGET_FMOVD
10740 && REGCLASS_HAS_FP_REG (srcclass)
10741 && REGCLASS_HAS_FP_REG (dstclass))
10742 return 4;
10743
10744 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10745 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10746
10747 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10748 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10749 return 9;
10750
10751 if ((REGCLASS_HAS_FP_REG (dstclass)
10752 && REGCLASS_HAS_GENERAL_REG (srcclass))
10753 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10754 && REGCLASS_HAS_FP_REG (srcclass)))
10755 {
10756 /* Discourage trying to use fp regs for a pointer. This also
10757 discourages fp regs with SImode because Pmode is an alias
10758 of SImode on this target. See PR target/48596. */
10759 int addend = (mode == Pmode) ? 40 : 0;
10760
10761 return ((TARGET_FMOVD ? 8 : 12) + addend)
10762 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10763 }
10764
10765 if ((dstclass == FPUL_REGS
10766 && REGCLASS_HAS_GENERAL_REG (srcclass))
10767 || (srcclass == FPUL_REGS
10768 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10769 return 5;
10770
10771 if ((dstclass == FPUL_REGS
10772 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10773 || (srcclass == FPUL_REGS
10774 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10775 return 7;
10776
10777 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10778 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10779 return 4;
10780
10781 if (TARGET_FMOVD
10782 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10783 && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10784 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10785
10786 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10787 }
10788
10789 static rtx
emit_load_ptr(rtx reg,rtx addr)10790 emit_load_ptr (rtx reg, rtx addr)
10791 {
10792 rtx mem = gen_const_mem (ptr_mode, addr);
10793
10794 if (Pmode != ptr_mode)
10795 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10796 return emit_move_insn (reg, mem);
10797 }
10798
10799 static void
sh_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)10800 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10801 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10802 tree function)
10803 {
10804 CUMULATIVE_ARGS cum;
10805 int structure_value_byref = 0;
10806 rtx this_rtx, this_value, sibcall, funexp;
10807 rtx_insn *insns;
10808 tree funtype = TREE_TYPE (function);
10809 int simple_add = CONST_OK_FOR_ADD (delta);
10810 int did_load = 0;
10811 rtx scratch0, scratch1, scratch2;
10812
10813 reload_completed = 1;
10814 epilogue_completed = 1;
10815 crtl->uses_only_leaf_regs = 1;
10816
10817 emit_note (NOTE_INSN_PROLOGUE_END);
10818
10819 /* Find the "this" pointer. We have such a wide range of ABIs for the
10820 SH that it's best to do this completely machine independently.
10821 "this" is passed as first argument, unless a structure return pointer
10822 comes first, in which case "this" comes second. */
10823 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10824 #ifndef PCC_STATIC_STRUCT_RETURN
10825 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10826 structure_value_byref = 1;
10827 #endif /* not PCC_STATIC_STRUCT_RETURN */
10828 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10829 {
10830 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10831
10832 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
10833 }
10834 this_rtx
10835 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
10836
10837 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10838 static chain pointer (even if you can't have nested virtual functions
10839 right now, someone might implement them sometime), and the rest of the
10840 registers are used for argument passing, are callee-saved, or reserved. */
10841 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10842 -ffixed-reg has been used. */
10843 if (! call_used_regs[0] || fixed_regs[0])
10844 error ("r0 needs to be available as a call-clobbered register");
10845 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10846
10847 {
10848 if (call_used_regs[1] && ! fixed_regs[1])
10849 scratch1 = gen_rtx_REG (ptr_mode, 1);
10850 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10851 pointing where to return struct values. */
10852 if (call_used_regs[3] && ! fixed_regs[3])
10853 scratch2 = gen_rtx_REG (Pmode, 3);
10854 }
10855
10856 this_value = plus_constant (Pmode, this_rtx, delta);
10857 if (vcall_offset
10858 && (simple_add || scratch0 != scratch1)
10859 && strict_memory_address_p (ptr_mode, this_value))
10860 {
10861 emit_load_ptr (scratch0, this_value);
10862 did_load = 1;
10863 }
10864
10865 if (!delta)
10866 ; /* Do nothing. */
10867 else if (simple_add)
10868 emit_move_insn (this_rtx, this_value);
10869 else
10870 {
10871 emit_move_insn (scratch1, GEN_INT (delta));
10872 emit_insn (gen_add2_insn (this_rtx, scratch1));
10873 }
10874
10875 if (vcall_offset)
10876 {
10877 rtx offset_addr;
10878
10879 if (!did_load)
10880 emit_load_ptr (scratch0, this_rtx);
10881
10882 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
10883 if (strict_memory_address_p (ptr_mode, offset_addr))
10884 ; /* Do nothing. */
10885 else if (scratch0 != scratch1)
10886 {
10887 /* scratch0 != scratch1, and we have indexed loads. Get better
10888 schedule by loading the offset into r1 and using an indexed
10889 load - then the load of r1 can issue before the load from
10890 (this_rtx + delta) finishes. */
10891 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10892 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10893 }
10894 else if (CONST_OK_FOR_ADD (vcall_offset))
10895 {
10896 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10897 offset_addr = scratch0;
10898 }
10899 else
10900 gcc_unreachable (); /* FIXME */
10901 emit_load_ptr (scratch0, offset_addr);
10902
10903 if (Pmode != ptr_mode)
10904 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10905 emit_insn (gen_add2_insn (this_rtx, scratch0));
10906 }
10907
10908 /* Generate a tail call to the target function. */
10909 if (! TREE_USED (function))
10910 {
10911 assemble_external (function);
10912 TREE_USED (function) = 1;
10913 }
10914 funexp = XEXP (DECL_RTL (function), 0);
10915 /* If the function is overridden, so is the thunk, hence we don't
10916 need GOT addressing even if this is a public symbol. */
10917 #if 0
10918 if (TARGET_SH1 && ! flag_weak)
10919 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10920 else
10921 #endif
10922 if (TARGET_SH2 && flag_pic)
10923 {
10924 if (TARGET_FDPIC)
10925 {
10926 sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
10927 XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
10928 }
10929 else
10930 {
10931 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10932 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10933 }
10934 }
10935 else
10936 {
10937 emit_move_insn (scratch2, funexp);
10938 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10939 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10940 }
10941 sibcall = emit_call_insn (sibcall);
10942 SIBLING_CALL_P (sibcall) = 1;
10943 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10944 emit_barrier ();
10945
10946 /* Run just enough of rest_of_compilation to do scheduling and get
10947 the insns emitted. Note that use_thunk calls
10948 assemble_start_function and assemble_end_function. */
10949
10950 insns = get_insns ();
10951
10952 if (optimize > 0)
10953 {
10954 if (! cfun->cfg)
10955 init_flow (cfun);
10956 split_all_insns_noflow ();
10957 }
10958
10959 sh_reorg ();
10960 shorten_branches (insns);
10961 final_start_function (insns, file, 1);
10962 final (insns, file, 1);
10963 final_end_function ();
10964
10965 reload_completed = 0;
10966 epilogue_completed = 0;
10967 }
10968
10969 /* Return an RTX pair for the address and call site label of a function
10970 NAME of kind KIND, placing the result in TARGET if not NULL. For
10971 SFUNC_STATIC, if FDPIC, the LAB member of result will be set to
10972 (const_int 0) if jsr should be used, or a label_ref if bsrf should
10973 be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the
10974 address of the function itself, not a function descriptor, so they
10975 can only be used with functions not using the FDPIC register that
10976 are known to be called directory without a PLT entry. */
10977
10978 function_symbol_result
function_symbol(rtx target,const char * name,sh_function_kind kind)10979 function_symbol (rtx target, const char *name, sh_function_kind kind)
10980 {
10981 /* If this is not an ordinary function, the name usually comes from a
10982 string literal or an sprintf buffer. Make sure we use the same
10983 string consistently, so that cse will be able to unify address loads. */
10984 if (kind != FUNCTION_ORDINARY)
10985 name = IDENTIFIER_POINTER (get_identifier (name));
10986 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
10987 rtx lab = const0_rtx;
10988 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10989 if (flag_pic)
10990 switch (kind)
10991 {
10992 case FUNCTION_ORDINARY:
10993 break;
10994 case SFUNC_GOT:
10995 {
10996 rtx reg = target ? target : gen_reg_rtx (Pmode);
10997
10998 emit_insn (gen_symGOT2reg (reg, sym));
10999 sym = reg;
11000 break;
11001 }
11002 case SFUNC_STATIC:
11003 {
11004 rtx reg = target ? target : gen_reg_rtx (Pmode);
11005
11006 if (TARGET_FDPIC)
11007 {
11008 /* We use PC-relative calls, since GOTOFF can only refer
11009 to writable data. This works along with sh_sfunc_call. */
11010 lab = PATTERN (gen_call_site ());
11011 emit_insn (gen_sym_label2reg (reg, sym, lab));
11012 }
11013 else
11014 {
11015 /* ??? To allow cse to work, we use GOTOFF relocations.
11016 we could add combiner patterns to transform this into
11017 straight pc-relative calls with sym2PIC / bsrf when
11018 label load and function call are still 1:1 and in the
11019 same basic block during combine. */
11020 emit_insn (gen_symGOTOFF2reg (reg, sym));
11021 }
11022
11023 sym = reg;
11024 break;
11025 }
11026 }
11027 if (target && sym != target)
11028 {
11029 emit_move_insn (target, sym);
11030 return function_symbol_result (target, lab);
11031 }
11032 return function_symbol_result (sym, lab);
11033 }
11034
11035 /* Find the number of the first general purpose register in S that
11036 is not set. */
11037 static int
scavenge_reg(HARD_REG_SET * s)11038 scavenge_reg (HARD_REG_SET *s)
11039 {
11040 for (int r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11041 if (TEST_HARD_REG_BIT (*s, r))
11042 return r;
11043 return -1;
11044 }
11045
11046 rtx
sh_get_pr_initial_val(void)11047 sh_get_pr_initial_val (void)
11048 {
11049 /* If we haven't finished rtl generation, there might be a nonlocal label
11050 that we haven't seen yet.
11051 ??? get_hard_reg_initial_val fails if it is called after register
11052 allocation has started, unless it has been called before for the
11053 same register. And even then, we end in trouble if we didn't use
11054 the register in the same basic block before. So call
11055 get_hard_reg_initial_val now and wrap it in an unspec if we might
11056 need to replace it. */
11057 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11058 combine can put the pseudo returned by get_hard_reg_initial_val into
11059 instructions that need a general purpose registers, which will fail to
11060 be recognized when the pseudo becomes allocated to PR. */
11061 rtx val = get_hard_reg_initial_val (Pmode, PR_REG);
11062 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11063 }
11064
11065 bool
sh_expand_t_scc(rtx operands[])11066 sh_expand_t_scc (rtx operands[])
11067 {
11068 enum rtx_code code = GET_CODE (operands[1]);
11069 rtx target = operands[0];
11070 rtx op0 = operands[2];
11071 rtx op1 = operands[3];
11072 rtx result = target;
11073
11074 if (!REG_P (op0) || REGNO (op0) != T_REG
11075 || !CONST_INT_P (op1))
11076 return false;
11077 if (!REG_P (result))
11078 result = gen_reg_rtx (SImode);
11079 HOST_WIDE_INT val = INTVAL (op1);
11080 if ((code == EQ && val == 1) || (code == NE && val == 0))
11081 emit_insn (gen_movt (result, get_t_reg_rtx ()));
11082 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11083 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
11084 else if (code == EQ || code == NE)
11085 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11086 else
11087 return false;
11088 if (result != target)
11089 emit_move_insn (target, result);
11090 return true;
11091 }
11092
11093 /* INSN is an sfunc; return the rtx that describes the address used. */
11094 static rtx
extract_sfunc_addr(rtx insn)11095 extract_sfunc_addr (rtx insn)
11096 {
11097 rtx pattern = PATTERN (insn);
11098 const int len = XVECLEN (pattern, 0);
11099 for (int i = 0; i < len; i++)
11100 {
11101 rtx part = XVECEXP (pattern, 0, i);
11102 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11103 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11104 return XEXP (part, 0);
11105 }
11106 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11107 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11108 }
11109
11110 /* Verify that the register in use_sfunc_addr still agrees with the address
11111 used in the sfunc. This prevents fill_slots_from_thread from changing
11112 use_sfunc_addr.
11113 INSN is the use_sfunc_addr instruction, and REG is the register it
11114 guards. */
11115 bool
check_use_sfunc_addr(rtx_insn * insn,rtx reg)11116 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
11117 {
11118 /* Search for the sfunc. It should really come right after INSN. */
11119 while ((insn = NEXT_INSN (insn)))
11120 {
11121 if (LABEL_P (insn) || JUMP_P (insn))
11122 break;
11123 if (! INSN_P (insn))
11124 continue;
11125
11126 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
11127 insn = seq->insn (0);
11128 if (GET_CODE (PATTERN (insn)) != PARALLEL
11129 || get_attr_type (insn) != TYPE_SFUNC)
11130 continue;
11131 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11132 }
11133 gcc_unreachable ();
11134 }
11135
11136 /* This function returns a constant rtx that represents 2**15 / pi in
11137 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
11138 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
11139 static GTY(()) rtx sh_fsca_sf2int_rtx;
11140
11141 rtx
sh_fsca_sf2int(void)11142 sh_fsca_sf2int (void)
11143 {
11144 if (! sh_fsca_sf2int_rtx)
11145 {
11146 REAL_VALUE_TYPE rv;
11147
11148 real_from_string (&rv, "10430.378350470453");
11149 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11150 }
11151
11152 return sh_fsca_sf2int_rtx;
11153 }
11154
11155 /* This function returns a constant rtx that represents pi / 2**15 in
11156 SFmode. It's used to scale SFmode angles, in radians, to a
11157 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
11158 maps to 0x10000. */
11159 static GTY(()) rtx sh_fsca_int2sf_rtx;
11160
11161 rtx
sh_fsca_int2sf(void)11162 sh_fsca_int2sf (void)
11163 {
11164 if (! sh_fsca_int2sf_rtx)
11165 {
11166 REAL_VALUE_TYPE rv;
11167
11168 real_from_string (&rv, "9.587379924285257e-5");
11169 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11170 }
11171
11172 return sh_fsca_int2sf_rtx;
11173 }
11174
11175 /* Initialize the CUMULATIVE_ARGS structure. */
11176 void
sh_init_cumulative_args(CUMULATIVE_ARGS * pcum,tree fntype,rtx libname ATTRIBUTE_UNUSED,tree fndecl,signed int n_named_args,machine_mode mode)11177 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11178 tree fntype,
11179 rtx libname ATTRIBUTE_UNUSED,
11180 tree fndecl,
11181 signed int n_named_args,
11182 machine_mode mode)
11183 {
11184 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11185 pcum->free_single_fp_reg = 0;
11186 pcum->outgoing = n_named_args != -1;
11187
11188 /* FIXME: Should we check TARGET_HITACHI here ??? */
11189 pcum->renesas_abi = sh_attr_renesas_p (fntype);
11190
11191 if (fntype)
11192 {
11193 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11194 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11195 pcum->prototype_p = prototype_p (fntype);
11196 pcum->arg_count [(int) SH_ARG_INT] = false;
11197 }
11198 else
11199 {
11200 pcum->arg_count [(int) SH_ARG_INT] = 0;
11201 pcum->prototype_p = false;
11202 if (mode != VOIDmode)
11203 {
11204 /* If the default ABI is the Renesas ABI then all library
11205 calls must assume that the library will be using the
11206 Renesas ABI. So if the function would return its result
11207 in memory then we must force the address of this memory
11208 block onto the stack. Ideally we would like to call
11209 targetm.calls.return_in_memory() here but we do not have
11210 the TYPE or the FNDECL available so we synthesize the
11211 contents of that function as best we can. */
11212 pcum->force_mem =
11213 (TARGET_DEFAULT & MASK_HITACHI)
11214 && (mode == BLKmode
11215 || (GET_MODE_SIZE (mode) > 4
11216 && !(mode == DFmode
11217 && TARGET_FPU_DOUBLE)));
11218 }
11219 else
11220 pcum->force_mem = false;
11221 }
11222 }
11223
11224 rtx
sh_gen_truncate(machine_mode mode,rtx x,int need_sign_ext)11225 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
11226 {
11227 enum rtx_code code = TRUNCATE;
11228
11229 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11230 {
11231 rtx inner = XEXP (x, 0);
11232 machine_mode inner_mode = GET_MODE (inner);
11233
11234 if (inner_mode == mode)
11235 return inner;
11236 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11237 x = inner;
11238 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11239 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11240 {
11241 code = GET_CODE (x);
11242 x = inner;
11243 }
11244 }
11245 return gen_rtx_fmt_e (code, mode, x);
11246 }
11247
11248 /* Load and store depend on the highpart of the address. However,
11249 set_attr_alternative does not give well-defined results before reload,
11250 so we must look at the rtl ourselves to see if any of the feeding
11251 registers is used in a memref.
11252
11253 Return true iff INSN contains a MEM. */
11254 bool
sh_contains_memref_p(rtx insn)11255 sh_contains_memref_p (rtx insn)
11256 {
11257 subrtx_iterator::array_type array;
11258 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
11259 if (MEM_P (*iter))
11260 return true;
11261 return false;
11262 }
11263
11264 /* Return true iff INSN loads a banked register. */
11265 bool
sh_loads_bankedreg_p(rtx insn)11266 sh_loads_bankedreg_p (rtx insn)
11267 {
11268 if (GET_CODE (PATTERN (insn)) == SET)
11269 {
11270 rtx op = SET_DEST (PATTERN(insn));
11271 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11272 return true;
11273 }
11274
11275 return false;
11276 }
11277
11278 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
11279 static reg_class_t
sh_preferred_reload_class(rtx x ATTRIBUTE_UNUSED,reg_class_t rclass)11280 sh_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
11281 {
11282 return rclass;
11283 }
11284
11285 /* Implement TARGET_SECONDARY_RELOAD. */
11286 static reg_class_t
sh_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)11287 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11288 machine_mode mode, secondary_reload_info *sri)
11289 {
11290 enum reg_class rclass = (enum reg_class) rclass_i;
11291
11292 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
11293 && REG_P (XEXP (XEXP (x, 0), 0))
11294 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
11295 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11296
11297 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
11298 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11299
11300 if (REG_P (x) && REGNO (x) == GBR_REG)
11301 return NO_REGS;
11302
11303 if (in_p)
11304 {
11305 if (REGCLASS_HAS_FP_REG (rclass)
11306 && immediate_operand ((x), mode)
11307 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
11308 switch (mode)
11309 {
11310 case E_SFmode:
11311 sri->icode = CODE_FOR_reload_insf__frn;
11312 return NO_REGS;
11313 case E_DFmode:
11314 sri->icode = CODE_FOR_reload_indf__frn;
11315 return NO_REGS;
11316 case E_SImode:
11317 /* ??? If we knew that we are in the appropriate mode -
11318 single precision - we could use a reload pattern directly. */
11319 return FPUL_REGS;
11320 default:
11321 abort ();
11322 }
11323 if (rclass == FPUL_REGS
11324 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11325 || REGNO (x) == T_REG))
11326 || GET_CODE (x) == PLUS))
11327 return GENERAL_REGS;
11328 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11329 {
11330 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11331 return GENERAL_REGS;
11332 else if (mode == SFmode)
11333 return FP_REGS;
11334 sri->icode = CODE_FOR_reload_insi__i_fpul;
11335 return NO_REGS;
11336 }
11337 if (rclass == FPSCR_REGS
11338 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11339 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11340 return GENERAL_REGS;
11341 } /* end of input-only processing. */
11342
11343 if (((REGCLASS_HAS_FP_REG (rclass)
11344 && (REG_P (x)
11345 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11346 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11347 && TARGET_FMOVD))))
11348 || (REGCLASS_HAS_GENERAL_REG (rclass)
11349 && REG_P (x)
11350 && FP_REGISTER_P (REGNO (x))))
11351 && (mode == SFmode || mode == SImode))
11352 return FPUL_REGS;
11353 if ((rclass == FPUL_REGS
11354 || (REGCLASS_HAS_FP_REG (rclass) && mode == SImode))
11355 && (MEM_P (x)
11356 || (REG_P (x)
11357 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11358 || REGNO (x) == T_REG
11359 || system_reg_operand (x, VOIDmode)))))
11360 {
11361 if (rclass == FPUL_REGS)
11362 return GENERAL_REGS;
11363 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
11364 }
11365
11366 if ((rclass == MAC_REGS || rclass == PR_REGS)
11367 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11368 && rclass != REGNO_REG_CLASS (REGNO (x)))
11369 return GENERAL_REGS;
11370
11371 /* If here fall back to loading FPUL register through general registers.
11372 This case can happen when movsi_ie insn is picked initially to
11373 load/store the FPUL register from/to another register, and then the
11374 other register is allocated on the stack. */
11375 if (rclass == FPUL_REGS && true_regnum (x) == -1)
11376 return GENERAL_REGS;
11377
11378 /* Force mov.b / mov.w displacement addressing insn to use R0 as
11379 the other operand.
11380 On SH2A could also just leave it alone here, which would result in a
11381 4 byte move insn being generated instead. However, for this to work
11382 the insns must have the appropriate alternatives. */
11383 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11384 && satisfies_constraint_Sdd (x)
11385 && sh_disp_addr_displacement (x)
11386 <= sh_max_mov_insn_displacement (mode, false))
11387 return R0_REGS;
11388
11389 /* When reload is trying to address a QImode or HImode subreg on the stack,
11390 force any subreg byte into R0_REGS, as this is going to become a
11391 displacement address.
11392 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
11393 is on the stack, the memref to it might already require a displacement
11394 and that has to be added to the final address. At this point we don't
11395 know the cumulative displacement so we assume the worst case. */
11396 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11397 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
11398 return R0_REGS;
11399
11400 return NO_REGS;
11401 }
11402
11403 /* Return true if SUBST can't safely replace its equivalent during RA. */
11404 static bool
sh_cannot_substitute_mem_equiv_p(rtx)11405 sh_cannot_substitute_mem_equiv_p (rtx)
11406 {
11407 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
11408 uses R0 and may cause spill failure when R0 is already used.
11409 We have to return true for that case at least.
11410 Moreover SH has strong R0 parity and also have not enough numbers of
11411 the hard registers to make the equiv substitution win in the size
11412 and the speed on average working sets. The pseudos produced to
11413 hold the equiv values can't get good hard registers for bad cases
11414 and end up memory save/restore insns which make the code worse. */
11415 return true;
11416 }
11417
11418 /* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
11419 static bool
sh_legitimize_address_displacement(rtx * offset1,rtx * offset2,poly_int64 orig_offset,machine_mode mode)11420 sh_legitimize_address_displacement (rtx *offset1, rtx *offset2,
11421 poly_int64 orig_offset,
11422 machine_mode mode)
11423 {
11424 if ((TARGET_FPU_DOUBLE && mode == DFmode)
11425 || (TARGET_SH2E && mode == SFmode))
11426 return false;
11427
11428 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, orig_offset);
11429 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
11430 {
11431 *offset1 = adj.offset_adjust;
11432 *offset2 = adj.mov_disp;
11433 return true;
11434 }
11435
11436 return false;
11437 }
11438
11439 /* Return true if movsf insn should be splited with an additional
11440 register. */
11441 bool
sh_movsf_ie_ra_split_p(rtx op0,rtx op1,rtx op2)11442 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
11443 {
11444 /* op0 == op1 */
11445 if (rtx_equal_p (op0, op1))
11446 return true;
11447 /* fy, FQ, reg */
11448 if (GET_CODE (op1) == CONST_DOUBLE
11449 && ! satisfies_constraint_G (op1)
11450 && ! satisfies_constraint_H (op1)
11451 && REG_P (op0)
11452 && REG_P (op2))
11453 return true;
11454 /* f, r, y */
11455 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
11456 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
11457 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11458 return true;
11459 /* r, f, y */
11460 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
11461 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
11462 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11463 return true;
11464
11465 return false;
11466 }
11467
11468 static void
sh_conditional_register_usage(void)11469 sh_conditional_register_usage (void)
11470 {
11471 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
11472 if (! VALID_REGISTER_P (regno))
11473 fixed_regs[regno] = call_used_regs[regno] = 1;
11474 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
11475 if (flag_pic)
11476 {
11477 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11478 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11479 }
11480 if (TARGET_FDPIC)
11481 {
11482 fixed_regs[PIC_REG] = 1;
11483 call_used_regs[PIC_REG] = 1;
11484 call_really_used_regs[PIC_REG] = 1;
11485 }
11486 /* Renesas saves and restores mac registers on call. */
11487 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
11488 {
11489 call_really_used_regs[MACH_REG] = 0;
11490 call_really_used_regs[MACL_REG] = 0;
11491 }
11492
11493 for (int regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
11494 if (! fixed_regs[regno] && call_really_used_regs[regno])
11495 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
11496
11497 call_really_used_regs[FPSCR_MODES_REG] = 0;
11498 call_really_used_regs[FPSCR_STAT_REG] = 0;
11499 }
11500
11501 /* Implement TARGET_LEGITIMATE_CONSTANT_P
11502
11503 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
11504 static bool
sh_legitimate_constant_p(machine_mode mode,rtx x)11505 sh_legitimate_constant_p (machine_mode mode, rtx x)
11506 {
11507 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
11508 {
11509 rtx base, offset;
11510 split_const (x, &base, &offset);
11511
11512 if (GET_CODE (base) == SYMBOL_REF
11513 && !offset_within_block_p (base, INTVAL (offset)))
11514 return false;
11515 }
11516
11517 if (TARGET_FDPIC
11518 && (SYMBOLIC_CONST_P (x)
11519 || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
11520 && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
11521 return false;
11522
11523 return GET_CODE (x) != CONST_DOUBLE
11524 || mode == DFmode || mode == SFmode
11525 || mode == DImode || GET_MODE (x) == VOIDmode;
11526 }
11527
11528 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11529
11530 static void
sh_init_sync_libfuncs(void)11531 sh_init_sync_libfuncs (void)
11532 {
11533 init_sync_libfuncs (UNITS_PER_WORD);
11534 }
11535
11536 /* Return true if it is appropriate to emit `ret' instructions in the
11537 body of a function. */
11538 bool
sh_can_use_simple_return_p(void)11539 sh_can_use_simple_return_p (void)
11540 {
11541 if (! reload_completed || frame_pointer_needed)
11542 return false;
11543
11544 /* Moving prologue around does't reduce the size. */
11545 if (optimize_function_for_size_p (cfun))
11546 return false;
11547
11548 /* Finally, allow for pr save. */
11549 HARD_REG_SET live_regs_mask;
11550 int d = calc_live_regs (&live_regs_mask);
11551
11552 if (rounded_frame_size (d) > 4)
11553 return false;
11554
11555 return true;
11556 }
11557
11558 /*------------------------------------------------------------------------------
11559 Address mode optimization support code
11560 */
11561
11562 typedef HOST_WIDE_INT disp_t;
11563 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
11564 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
11565 static const disp_t INVALID_DISP = MAX_DISP;
11566
11567 /* A memory reference which is described by a base register and a
11568 displacement. */
11569 class base_reg_disp
11570 {
11571 public:
11572 base_reg_disp (rtx br, disp_t d);
11573
11574 bool is_reg (void) const;
11575 bool is_disp (void) const;
11576 rtx reg (void) const;
11577 disp_t disp (void) const;
11578
11579 private:
11580 rtx reg_;
11581 disp_t disp_;
11582 };
11583
11584 inline
base_reg_disp(rtx br,disp_t d)11585 base_reg_disp::base_reg_disp (rtx br, disp_t d)
11586 : reg_ (br), disp_ (d)
11587 {
11588 }
11589
11590 inline bool
is_reg(void)11591 base_reg_disp::is_reg (void) const
11592 {
11593 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
11594 }
11595
11596 inline bool
is_disp(void)11597 base_reg_disp::is_disp (void) const
11598 {
11599 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
11600 }
11601
11602 inline rtx
reg(void)11603 base_reg_disp::reg (void) const
11604 {
11605 return reg_;
11606 }
11607
11608 inline disp_t
disp(void)11609 base_reg_disp::disp (void) const
11610 {
11611 return disp_;
11612 }
11613
11614 /* Find the base register and calculate the displacement for a given
11615 address rtx 'x'. */
11616 static base_reg_disp
11617 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
11618 rtx base_reg = NULL)
11619 {
11620 if (REG_P (x))
11621 {
11622 if (REGNO (x) == GBR_REG)
11623 return base_reg_disp (x, disp);
11624
11625 /* We've reached a hard-reg. This is probably the point where
11626 function args are copied to pseudos. Do not go any further and
11627 stick to the pseudo. If the original mem addr was in a hard reg
11628 from the beginning, it will become the base reg. */
11629 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
11630 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
11631
11632 /* Find the def of the reg and trace it. If there are more than one
11633 defs and they are not the same, assume it's not safe to proceed. */
11634 rtx_insn* last_i = NULL;
11635 rtx last_set = NULL;
11636 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
11637 d = DF_REF_NEXT_REG (d))
11638 {
11639 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
11640
11641 /* Accept multiple defs, as long as they are equal. */
11642 if (last_set == NULL || rtx_equal_p (last_set, set))
11643 {
11644 last_i = DF_REF_INSN (d);
11645 last_set = set;
11646 }
11647 else
11648 {
11649 last_i = NULL;
11650 last_set = NULL;
11651 break;
11652 }
11653 }
11654
11655 if (last_set != NULL && last_i != NULL)
11656 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
11657 XEXP (last_set, 0));
11658
11659 /* When here, no previous insn was found that sets the reg.
11660 The input reg is already the base reg. */
11661 return base_reg_disp (x, disp);
11662 }
11663
11664 else if (GET_CODE (x) == PLUS)
11665 {
11666 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
11667 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
11668
11669 /* Either left or right val must be a reg.
11670 We don't handle the case of 'reg + reg' here. */
11671 if (left_val.is_reg () && right_val.is_disp ())
11672 return base_reg_disp (left_val.reg (), left_val.disp ()
11673 + right_val.disp () + disp);
11674 else if (right_val.is_reg () && left_val.is_disp ())
11675 return base_reg_disp (right_val.reg (), right_val.disp ()
11676 + left_val.disp () + disp);
11677 else
11678 return base_reg_disp (base_reg, disp);
11679 }
11680
11681 else if (CONST_INT_P (x))
11682 return base_reg_disp (NULL, disp + INTVAL (x));
11683
11684 /* Didn't find anything useful. */
11685 return base_reg_disp (base_reg, disp);
11686 }
11687
11688 /* Given an insn and a memory operand, try to find an equivalent GBR
11689 based memory address and return the corresponding new memory address.
11690 Return NULL_RTX if not found. */
11691 rtx
sh_find_equiv_gbr_addr(rtx_insn * insn,rtx mem)11692 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
11693 {
11694 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
11695 return NULL_RTX;
11696
11697 /* Leave post/pre inc/dec or any other side effect addresses alone. */
11698 if (side_effects_p (XEXP (mem, 0)))
11699 return NULL_RTX;
11700
11701 /* When not optimizing there might be no dataflow available. */
11702 if (df == NULL)
11703 return NULL_RTX;
11704
11705 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
11706
11707 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
11708 {
11709 /* If GBR is marked as call clobbered we bail out if we see a call.
11710 FIXME: Actually should check if this mem refers to the gbr value
11711 before or after the call. If there is a store_gbr preceeding this
11712 mem, it's safe to use GBR for this mem.
11713
11714 If GBR is not marked as call clobbered, but there is some other
11715 def than a call, it's probably a load_gbr upon which we also
11716 bail out to be on the safe side.
11717 FIXME: Should check if we have a use-after-def case, such as
11718 the call case above. */
11719 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
11720 d = DF_REF_NEXT_REG (d))
11721 {
11722 if (CALL_P (DF_REF_INSN (d)))
11723 {
11724 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
11725 return NULL_RTX;
11726 else
11727 continue;
11728 }
11729 else
11730 return NULL_RTX;
11731 }
11732
11733 rtx disp = GEN_INT (gbr_disp.disp ());
11734 if (gbr_displacement (disp, GET_MODE (mem)))
11735 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
11736 }
11737
11738 return NULL_RTX;
11739 }
11740
11741 /*------------------------------------------------------------------------------
11742 Manual insn combine support code.
11743 */
11744
11745 /* Return true if the specified insn contains any UNSPECs or
11746 UNSPEC_VOLATILEs. */
11747 static bool
sh_unspec_insn_p(rtx x)11748 sh_unspec_insn_p (rtx x)
11749 {
11750 subrtx_iterator::array_type array;
11751 FOR_EACH_SUBRTX (i, array, x, ALL)
11752 if (*i != NULL
11753 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
11754 return true;
11755
11756 return false;
11757 }
11758
11759 /* Return true if the register operands of the specified insn are modified
11760 between the specified from and to insns (exclusive of those two). */
11761 bool
sh_insn_operands_modified_between_p(rtx_insn * operands_insn,const rtx_insn * from,const rtx_insn * to)11762 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
11763 const rtx_insn* from,
11764 const rtx_insn* to)
11765 {
11766 /* FIXME: Return true for multiple sets for now. */
11767 rtx s = single_set (operands_insn);
11768 if (s == NULL_RTX)
11769 return true;
11770
11771 subrtx_iterator::array_type array;
11772 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
11773 if (*i != NULL &&
11774 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
11775 return true;
11776
11777 return false;
11778 }
11779
11780 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
11781 negates the T bit and stores the result in the T bit. */
11782 bool
sh_is_nott_insn(const rtx_insn * i)11783 sh_is_nott_insn (const rtx_insn* i)
11784 {
11785 return i != NULL && GET_CODE (PATTERN (i)) == SET
11786 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
11787 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
11788 }
11789
11790 rtx
sh_movt_set_dest(const rtx_insn * i)11791 sh_movt_set_dest (const rtx_insn* i)
11792 {
11793 return i == NULL ? NULL : sh_movt_set_dest (PATTERN (i));
11794 }
11795
11796 rtx
sh_movt_set_dest(const_rtx pat)11797 sh_movt_set_dest (const_rtx pat)
11798 {
11799 return GET_CODE (pat) == SET
11800 && arith_reg_dest (XEXP (pat, 0), SImode)
11801 && t_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11802 }
11803
11804 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
11805 that stores the negated T bit in a register, and return the destination
11806 register rtx, or null. */
11807 rtx
sh_movrt_set_dest(const rtx_insn * i)11808 sh_movrt_set_dest (const rtx_insn* i)
11809 {
11810 return i == NULL ? NULL : sh_movrt_set_dest (PATTERN (i));
11811 }
11812
11813 rtx
sh_movrt_set_dest(const_rtx pat)11814 sh_movrt_set_dest (const_rtx pat)
11815 {
11816 /* The negc movrt replacement is inside a parallel. */
11817 if (GET_CODE (pat) == PARALLEL)
11818 pat = XVECEXP (pat, 0, 0);
11819
11820 return GET_CODE (pat) == SET
11821 && arith_reg_dest (XEXP (pat, 0), SImode)
11822 && negt_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11823
11824 }
11825
11826 /* Given an insn and a reg number, tell whether the reg dies or is unused
11827 after the insn. */
11828 bool
sh_reg_dead_or_unused_after_insn(const rtx_insn * i,int regno)11829 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
11830 {
11831 return find_regno_note (i, REG_DEAD, regno) != NULL
11832 || find_regno_note (i, REG_UNUSED, regno) != NULL;
11833 }
11834
11835 /* Given an insn and a reg number, remove reg dead or reg unused notes to
11836 mark it as being used after the insn. */
11837 void
sh_remove_reg_dead_or_unused_notes(rtx_insn * i,int regno)11838 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
11839 {
11840 if (rtx n = find_regno_note (i, REG_DEAD, regno))
11841 remove_note (i, n);
11842 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
11843 remove_note (i, n);
11844 }
11845
11846 /* Given an insn check if it contains any post/pre inc/dec mem operands and
11847 add the REG_INC notes accordingly.
11848 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
11849 FIXME: This function is currently used by peephole2 patterns because
11850 the peephole2 pass does not preserve REG_INC notes. If the notes
11851 are dropped the following passes will do wrong things. */
11852 rtx_insn*
sh_check_add_incdec_notes(rtx_insn * i)11853 sh_check_add_incdec_notes (rtx_insn* i)
11854 {
11855 struct for_each_inc_dec_clb
11856 {
11857 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
11858 rtx dest, rtx src ATTRIBUTE_UNUSED,
11859 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
11860 {
11861 gcc_assert (REG_P (dest));
11862
11863 rtx_insn* i = (rtx_insn*)arg;
11864 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
11865 add_reg_note (i, REG_INC, dest);
11866
11867 return 0;
11868 }
11869 };
11870
11871 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
11872 return i;
11873 }
11874
11875 /* Given a move insn destiation and a source, make sure that the move source
11876 operand is not a post-inc mem load with the same address reg as the
11877 destination. Returns the modified source operand with the post-inc removed
11878 if necessary. */
11879 rtx
sh_remove_overlapping_post_inc(rtx dst,rtx src)11880 sh_remove_overlapping_post_inc (rtx dst, rtx src)
11881 {
11882 if (!MEM_P (src))
11883 return src;
11884
11885 rtx addr = XEXP (src, 0);
11886
11887 if (GET_CODE (addr) == POST_INC
11888 && reg_overlap_mentioned_p (XEXP (addr, 0), dst))
11889 return replace_equiv_address (src, XEXP (addr, 0));
11890
11891 gcc_assert (GET_CODE (addr) != POST_MODIFY);
11892 return src;
11893 }
11894
11895 /* Emit a move insn that is safe to be used in peephole patterns. */
11896 rtx_insn*
sh_peephole_emit_move_insn(rtx dst,rtx src)11897 sh_peephole_emit_move_insn (rtx dst, rtx src)
11898 {
11899 return sh_check_add_incdec_notes (
11900 emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src)));
11901 }
11902
11903 /* Given an op rtx and an insn, try to find out whether the result of the
11904 specified op consists only of logical operations on T bit stores. */
11905 bool
sh_is_logical_t_store_expr(rtx op,rtx_insn * insn)11906 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
11907 {
11908 if (!logical_operator (op, SImode))
11909 return false;
11910
11911 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
11912 int op_is_t_count = 0;
11913
11914 for (int i = 0; i < 2; ++i)
11915 {
11916 if (t_reg_operand (ops[i], VOIDmode)
11917 || negt_reg_operand (ops[i], VOIDmode))
11918 op_is_t_count++;
11919
11920 else
11921 {
11922 set_of_reg op_set = sh_find_set_of_reg
11923 (ops[i], insn, prev_nonnote_nondebug_insn_bb);
11924 if (op_set.set_src == NULL_RTX)
11925 continue;
11926
11927 if (t_reg_operand (op_set.set_src, VOIDmode)
11928 || negt_reg_operand (op_set.set_src, VOIDmode)
11929 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
11930 op_is_t_count++;
11931 }
11932 }
11933
11934 return op_is_t_count == 2;
11935 }
11936
11937 /* Given the operand that is extended in a sign/zero extend insn, and the
11938 insn, try to figure out whether the sign/zero extension can be replaced
11939 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
11940 NULL_RTX otherwise. */
11941 rtx
sh_try_omit_signzero_extend(rtx extended_op,rtx_insn * insn)11942 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
11943 {
11944 if (REG_P (extended_op))
11945 extended_op = extended_op;
11946 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
11947 extended_op = SUBREG_REG (extended_op);
11948 else
11949 return NULL_RTX;
11950
11951 /* Reg moves must be of the same mode. */
11952 if (GET_MODE (extended_op) != SImode)
11953 return NULL_RTX;
11954
11955 set_of_reg s = sh_find_set_of_reg (extended_op, insn,
11956 prev_nonnote_nondebug_insn_bb);
11957 if (s.set_src == NULL_RTX)
11958 return NULL_RTX;
11959
11960 if (t_reg_operand (s.set_src, VOIDmode)
11961 || negt_reg_operand (s.set_src, VOIDmode))
11962 return extended_op;
11963
11964 /* If the zero extended reg was formed by a logical operation, check the
11965 operands of the logical operation. If both originated from T bit
11966 stores the zero extension can be eliminated. */
11967 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
11968 return extended_op;
11969
11970 return NULL_RTX;
11971 }
11972
11973 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
11974 figure out whether it should be converted into a movt-xor sequence in
11975 the movrt_negc splitter.
11976 Returns true if insns have been modified and the splitter has succeeded. */
11977 bool
sh_split_movrt_negc_to_movt_xor(rtx_insn * curr_insn,rtx operands[])11978 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
11979 {
11980 /* In cases such as
11981 tst r4,r4
11982 mov #-1,r1
11983 negc r1,r1
11984 tst r4,r4
11985 we can replace the T bit clobbering negc with a movt-xor sequence and
11986 eliminate the redundant comparison.
11987 Because the xor insn depends on register allocation results, allow this
11988 only before reload. */
11989 if (!can_create_pseudo_p ())
11990 return false;
11991
11992 set_of_reg t_before_negc = sh_find_set_of_reg
11993 (get_t_reg_rtx (), curr_insn, prev_nonnote_nondebug_insn_bb);
11994 set_of_reg t_after_negc = sh_find_set_of_reg
11995 (get_t_reg_rtx (), curr_insn, next_nonnote_nondebug_insn_bb);
11996
11997 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
11998 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
11999 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
12000 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
12001 t_before_negc.insn,
12002 t_after_negc.insn)
12003 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
12004 && !sh_unspec_insn_p (t_after_negc.insn)
12005 && !volatile_insn_p (PATTERN (t_after_negc.insn))
12006 && !side_effects_p (PATTERN (t_after_negc.insn))
12007 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
12008 {
12009 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
12010 set_insn_deleted (t_after_negc.insn);
12011 return true;
12012 }
12013 else
12014 return false;
12015 }
12016
12017 /* Given a reg and the current insn, see if the value of the reg originated
12018 from a sign or zero extension and return the discovered information. */
12019 sh_extending_set_of_reg
sh_find_extending_set_of_reg(rtx reg,rtx_insn * curr_insn)12020 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
12021 {
12022 if (reg == NULL)
12023 return sh_extending_set_of_reg (curr_insn);
12024
12025 if (SUBREG_P (reg))
12026 reg = SUBREG_REG (reg);
12027
12028 if (!REG_P (reg))
12029 return sh_extending_set_of_reg (curr_insn);
12030
12031 /* FIXME: Also search the predecessor basic blocks. It seems that checking
12032 only the adjacent predecessor blocks would cover most of the cases.
12033 Also try to look through the first extension that we hit. There are some
12034 cases, where a zero_extend is followed an (implicit) sign_extend, and it
12035 fails to see the sign_extend. */
12036 sh_extending_set_of_reg result = sh_find_set_of_reg
12037 (reg, curr_insn, prev_nonnote_nondebug_insn_bb, true);
12038
12039 if (result.set_src != NULL)
12040 {
12041 if (GET_CODE (result.set_src) == SIGN_EXTEND
12042 || GET_CODE (result.set_src) == ZERO_EXTEND)
12043 {
12044 if (dump_file)
12045 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12046 "explicitly sign/zero extended in insn %d\n",
12047 REGNO (reg), INSN_UID (result.insn));
12048 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
12049 result.ext_code = GET_CODE (result.set_src);
12050 }
12051 else if (MEM_P (result.set_src)
12052 && (GET_MODE (result.set_src) == QImode
12053 || GET_MODE (result.set_src) == HImode)
12054 && !sh_unspec_insn_p (result.insn))
12055 {
12056 /* On SH QIHImode memory loads always sign extend. However, in
12057 some cases where it seems that the higher bits are not
12058 interesting, the loads will not be expanded as sign extending
12059 insns, but as QIHImode loads into QIHImode regs. We report that
12060 the reg has been sign extended by the mem load. When it is used
12061 as such, we must convert the mem load into a sign extending insn,
12062 see also sh_extending_set_of_reg::use_as_extended_reg. */
12063 if (dump_file)
12064 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12065 "implicitly sign extended in insn %d\n",
12066 REGNO (reg), INSN_UID (result.insn));
12067 result.from_mode = GET_MODE (result.set_src);
12068 result.ext_code = SIGN_EXTEND;
12069 }
12070 }
12071
12072 return result;
12073 }
12074
12075 /* Given a reg that is known to be sign or zero extended at some insn,
12076 take the appropriate measures so that the extended value can be used as
12077 a reg at the specified insn and return the resulting reg rtx. */
12078 rtx
use_as_extended_reg(rtx_insn * use_at_insn)12079 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
12080 {
12081 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
12082 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
12083 gcc_assert (from_mode == QImode || from_mode == HImode);
12084
12085 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
12086 {
12087 if (dump_file)
12088 fprintf (dump_file,
12089 "use_as_extended_reg: converting non-extending mem load in "
12090 "insn %d into sign-extending load\n", INSN_UID (insn));
12091
12092 rtx r = gen_reg_rtx (SImode);
12093 rtx_insn* i0;
12094 if (from_mode == QImode)
12095 i0 = sh_check_add_incdec_notes (
12096 emit_insn_after (gen_extendqisi2 (r, set_src), insn));
12097 else if (from_mode == HImode)
12098 i0 = sh_check_add_incdec_notes (
12099 emit_insn_after (gen_extendhisi2 (r, set_src), insn));
12100 else
12101 gcc_unreachable ();
12102
12103 emit_insn_after (
12104 gen_move_insn (XEXP (set_rtx, 0),
12105 gen_lowpart (GET_MODE (set_src), r)), i0);
12106 set_insn_deleted (insn);
12107 return r;
12108 }
12109 else
12110 {
12111 rtx extension_dst = XEXP (set_rtx, 0);
12112 if (GET_MODE (extension_dst) != SImode)
12113 extension_dst = simplify_gen_subreg (SImode, extension_dst,
12114 GET_MODE (extension_dst), 0);
12115 if (modified_between_p (extension_dst, insn, use_at_insn))
12116 {
12117 if (dump_file)
12118 fprintf (dump_file,
12119 "use_as_extended_reg: dest reg %d of extending insn %d is "
12120 "modified, inserting a reg-reg copy\n",
12121 REGNO (extension_dst), INSN_UID (insn));
12122
12123 rtx r = gen_reg_rtx (SImode);
12124 emit_insn_after (gen_move_insn (r, extension_dst), insn);
12125 return r;
12126 }
12127 else
12128 {
12129 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
12130 return extension_dst;
12131 }
12132 }
12133 }
12134
12135 bool
can_use_as_unextended_reg(void)12136 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
12137 {
12138 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
12139 && (from_mode == QImode || from_mode == HImode)
12140 && set_src != NULL)
12141 return arith_reg_operand (XEXP (set_src, 0), from_mode);
12142 else
12143 return false;
12144 }
12145
12146 rtx
use_as_unextended_reg(rtx_insn * use_at_insn)12147 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
12148 {
12149 gcc_assert (can_use_as_unextended_reg ());
12150
12151 rtx r = XEXP (set_src, 0);
12152 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
12153
12154 if (modified_between_p (r, insn, use_at_insn))
12155 {
12156 rtx r1 = gen_reg_rtx (SImode);
12157 emit_insn_after (gen_move_insn (r1, r0), insn);
12158 return r1;
12159 }
12160 else
12161 {
12162 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
12163 ? REGNO (SUBREG_REG (r))
12164 : REGNO (r));
12165 return r0;
12166 }
12167 }
12168
12169 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
12170 perform the necessary checks on the operands and split it accordingly. */
12171 void
sh_split_tst_subregs(rtx_insn * curr_insn,machine_mode subreg_mode,int subreg_offset,rtx operands[])12172 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
12173 int subreg_offset, rtx operands[])
12174 {
12175 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
12176
12177 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
12178 curr_insn);
12179 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
12180 curr_insn);
12181
12182 /* If one of the operands is known to be zero extended, that's already
12183 sufficient to mask out the unwanted high bits. */
12184 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
12185 {
12186 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12187 operands[1]));
12188 return;
12189 }
12190 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
12191 {
12192 emit_insn (gen_tstsi_t (operands[0],
12193 eop1.use_as_extended_reg (curr_insn)));
12194 return;
12195 }
12196
12197 /* None of the operands seem to be zero extended.
12198 If both are sign extended it's OK, too. */
12199 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
12200 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
12201 {
12202 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12203 eop1.use_as_extended_reg (curr_insn)));
12204 return;
12205 }
12206
12207 /* Otherwise we have to insert a zero extension on one of the operands to
12208 mask out the unwanted high bits.
12209 Prefer the operand that has no known extension. */
12210 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
12211 std::swap (operands[0], operands[1]);
12212
12213 rtx tmp0 = gen_reg_rtx (SImode);
12214 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
12215 GET_MODE (operands[0]), subreg_offset);
12216 emit_insn (subreg_mode == QImode
12217 ? gen_zero_extendqisi2 (tmp0, tmp1)
12218 : gen_zero_extendhisi2 (tmp0, tmp1));
12219 emit_insn (gen_tstsi_t (tmp0, operands[1]));
12220 }
12221
12222 /* A helper class to increment/decrement a counter variable each time a
12223 function is entered/left. */
12224 class scope_counter
12225 {
12226 public:
scope_counter(int & counter)12227 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
12228
~scope_counter(void)12229 ~scope_counter (void)
12230 {
12231 --m_counter;
12232 gcc_assert (m_counter >= 0);
12233 }
12234
count(void)12235 int count (void) const { return m_counter; }
12236
12237 private:
12238 int& m_counter;
12239 };
12240
12241 /* Given an rtx x, determine whether the expression can be used to create
12242 an insn that calulates x and stores the result in the T bit.
12243 This is used by the 'treg_set_expr' predicate to construct insns sequences
12244 where T bit results are fed into other insns, such as addc, subc, negc
12245 insns.
12246
12247 FIXME: The patterns that expand 'treg_set_expr' operands tend to
12248 distinguish between 'positive' and 'negative' forms. For now this has to
12249 be done in the preparation code. We could also introduce
12250 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
12251 two different patterns for the 'postive' and 'negative' forms. However,
12252 the total amount of lines of code seems to be about the same and the
12253 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
12254 recog function would need to look inside the expression by temporarily
12255 splitting it. */
12256 static int sh_recog_treg_set_expr_reent_count = 0;
12257
12258 bool
sh_recog_treg_set_expr(rtx op,machine_mode mode)12259 sh_recog_treg_set_expr (rtx op, machine_mode mode)
12260 {
12261 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
12262
12263 /* Limit the recursion count to avoid nested expressions which we can't
12264 resolve to a single treg set insn. */
12265 if (recursion.count () > 1)
12266 return false;
12267
12268 /* Early accept known possible operands before doing recog. */
12269 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode)
12270 || negt_reg_operand (op, mode))
12271 return true;
12272
12273 /* Early reject impossible operands before doing recog.
12274 There are some (set ((t) (subreg ...))) patterns, but we must be careful
12275 not to allow any invalid reg-reg or mem-reg moves, or else other passes
12276 such as lower-subreg will bail out. Some insns such as SH4A movua are
12277 done with UNSPEC, so must reject those, too, or else it would result
12278 in an invalid reg -> treg move. */
12279 if (CONST_INT_P (op) || register_operand (op, mode)
12280 || memory_operand (op, mode) || sh_unspec_insn_p (op))
12281 return false;
12282
12283 if (!can_create_pseudo_p ())
12284 return false;
12285
12286 /* expand_debug_locations may call this to compute rtx costs at
12287 very early stage. In that case, don't make new insns here to
12288 avoid codegen differences with -g. */
12289 if (currently_expanding_to_rtl)
12290 return false;
12291
12292 /* We are going to invoke recog in a re-entrant way and thus
12293 have to capture its current state and restore it afterwards. */
12294 recog_data_d prev_recog_data = recog_data;
12295
12296 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
12297 SET_PREV_INSN (i) = NULL;
12298 SET_NEXT_INSN (i) = NULL;
12299
12300 /* If the comparison op doesn't have a result mode, set it to SImode. */
12301 machine_mode prev_op_mode = GET_MODE (op);
12302 if (COMPARISON_P (op) && prev_op_mode == VOIDmode)
12303 PUT_MODE (op, SImode);
12304
12305 int result = recog (PATTERN (i), i, 0);
12306
12307 /* It seems there is no insn like that. Create a negated version and
12308 try again. If we hit a negated form, we'll allow that and append a
12309 nott sequence when splitting out the insns. Insns that do the split
12310 can then remove the trailing nott if they know how to deal with it. */
12311 if (result < 0 && COMPARISON_P (op))
12312 {
12313 machine_mode cmp_mode = GET_MODE (XEXP (op, 0));
12314 if (cmp_mode == VOIDmode)
12315 cmp_mode = GET_MODE (XEXP (op, 1));
12316
12317 rtx_code prev_code = GET_CODE (op);
12318 PUT_CODE (op, reverse_condition (GET_CODE (op)));
12319 result = recog (PATTERN (i), i, 0);
12320 PUT_CODE (op, prev_code);
12321 }
12322
12323 PUT_MODE (op, prev_op_mode);
12324 recog_data = prev_recog_data;
12325 return result >= 0;
12326 }
12327
12328 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
12329 This can be used as a condition for insn/split patterns to allow certain
12330 T bit setting patters only to be matched as sub expressions of other
12331 patterns. */
12332 bool
sh_in_recog_treg_set_expr(void)12333 sh_in_recog_treg_set_expr (void)
12334 {
12335 return sh_recog_treg_set_expr_reent_count > 0;
12336 }
12337
12338 /* Given an rtx x, which is assumed to be some expression that has been
12339 matched by the 'treg_set_expr' predicate before, split and emit the
12340 insns that are necessary to calculate the expression and store the result
12341 in the T bit.
12342 The splitting is done recursively similar to 'try_split' in emit-rt.c.
12343 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
12344 'delete_insn' which then causes the DF parts to bail out, because we
12345 currently are inside another gen_split* function and would invoke
12346 'try_split' in a reentrant way. */
12347 static std::pair<rtx_insn*, rtx_insn*>
12348 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
12349 {
12350 if (dump_file)
12351 {
12352 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
12353 print_rtl_single (dump_file, i);
12354 fprintf (dump_file, "\n");
12355 }
12356
12357 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
12358
12359 if (seq == NULL)
12360 return std::make_pair (i, i);
12361
12362 /* Avoid infinite splitter loops if any insn of the result matches
12363 the original pattern. */
12364 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
12365 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
12366 return std::make_pair (i, i);
12367
12368 unshare_all_rtl_in_chain (seq);
12369
12370 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
12371 a linked list, replace the single insn with the new insns. */
12372 rtx_insn* seqlast = seq;
12373 while (NEXT_INSN (seqlast) != NULL)
12374 seqlast = NEXT_INSN (seqlast);
12375
12376 if (rtx_insn* iprev = PREV_INSN (i))
12377 SET_NEXT_INSN (iprev) = seq;
12378 if (rtx_insn* inext = NEXT_INSN (i))
12379 SET_PREV_INSN (inext) = seqlast;
12380
12381 SET_PREV_INSN (seq) = PREV_INSN (i);
12382 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
12383
12384 SET_PREV_INSN (i) = NULL;
12385 SET_NEXT_INSN (i) = NULL;
12386
12387 /* Recursively split all insns. */
12388 for (i = seq; ; i = NEXT_INSN (i))
12389 {
12390 std::pair<rtx_insn*, rtx_insn*> ii =
12391 sh_try_split_insn_simple (i, curr_insn, n + 1);
12392 if (i == seq)
12393 seq = ii.first;
12394 if (i == seqlast)
12395 {
12396 seqlast = ii.second;
12397 break;
12398 }
12399 i = ii.first;
12400 }
12401
12402 return std::make_pair (seq, seqlast);
12403 }
12404
12405 sh_treg_insns
sh_split_treg_set_expr(rtx x,rtx_insn * curr_insn)12406 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
12407 {
12408 if (t_reg_operand (x, VOIDmode))
12409 return sh_treg_insns ();
12410
12411 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
12412
12413 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
12414 SET_PREV_INSN (i) = NULL;
12415 SET_NEXT_INSN (i) = NULL;
12416
12417 if (dump_file)
12418 {
12419 fprintf (dump_file, "split_treg_set_expr insn:\n");
12420 print_rtl (dump_file, i);
12421 fprintf (dump_file, "\n");
12422 }
12423
12424 /* If the insn is not found, we will try a negated form and append
12425 a nott. */
12426 bool append_nott = false;
12427
12428 /* We are going to invoke recog/split_insns in a re-entrant way and thus
12429 have to capture its current state and restore it afterwards. */
12430 recog_data_d prev_recog_data = recog_data;
12431
12432 if (negt_reg_operand (x, GET_MODE (x)))
12433 {
12434 /* This is a normal movt followed by a nott. It will be converted
12435 into a movrt after initial expansion. */
12436 XEXP (PATTERN (i), 1) = get_t_reg_rtx ();
12437 append_nott = true;
12438 }
12439 else
12440 {
12441 /* If the comparison op doesn't have a mode set, set it to SImode. */
12442 if (COMPARISON_P (x) && GET_MODE (x) == VOIDmode)
12443 PUT_MODE (x, SImode);
12444
12445 int insn_code = recog (PATTERN (i), i, 0);
12446
12447 if (insn_code < 0 && COMPARISON_P (x))
12448 {
12449 machine_mode cmp_mode = GET_MODE (XEXP (x, 0));
12450 if (cmp_mode == VOIDmode)
12451 cmp_mode = GET_MODE (XEXP (x, 1));
12452
12453 PUT_CODE (x, reverse_condition (GET_CODE (x)));
12454 insn_code = recog (PATTERN (i), i, 0);
12455 append_nott = true;
12456 }
12457
12458 gcc_assert (insn_code >= 0);
12459 }
12460
12461 /* Try to recursively split the insn. Some insns might refuse to split
12462 any further while we are in the treg_set_expr splitting phase. They
12463 will be emitted as part of the outer insn and then split again. */
12464 std::pair<rtx_insn*, rtx_insn*> insnlist =
12465 sh_try_split_insn_simple (i, curr_insn);
12466
12467 /* Restore recog state. */
12468 recog_data = prev_recog_data;
12469
12470 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
12471 ? insnlist.second
12472 : NULL;
12473 if (dump_file)
12474 {
12475 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
12476 print_rtl (dump_file, insnlist.first);
12477 fprintf (dump_file, "\n");
12478
12479 if (nott_insn != NULL)
12480 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
12481 }
12482
12483 emit_insn (insnlist.first);
12484
12485 if (nott_insn != NULL && append_nott)
12486 {
12487 if (dump_file)
12488 fprintf (dump_file, "removing trailing nott\n");
12489 remove_insn (nott_insn);
12490 nott_insn = NULL;
12491 append_nott = false;
12492 }
12493
12494 if (append_nott)
12495 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
12496
12497 rtx_insn* first_insn = get_insns ();
12498
12499 if (dump_file)
12500 {
12501 fprintf (dump_file, "resulting insns:\n");
12502 print_rtl (dump_file, first_insn);
12503 fprintf (dump_file, "\n");
12504 }
12505
12506 return sh_treg_insns (first_insn, nott_insn);
12507 }
12508
12509 /*------------------------------------------------------------------------------
12510 Mode switching support code.
12511 */
12512
12513 static void
sh_emit_mode_set(int entity ATTRIBUTE_UNUSED,int mode,int prev_mode,HARD_REG_SET regs_live ATTRIBUTE_UNUSED)12514 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
12515 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
12516 {
12517 if ((TARGET_SH4A_FP || TARGET_FPU_SH4_300)
12518 && prev_mode != FP_MODE_NONE && prev_mode != mode)
12519 {
12520 emit_insn (gen_toggle_pr ());
12521 if (TARGET_FMOVD)
12522 emit_insn (gen_toggle_sz ());
12523 }
12524 else if (mode != FP_MODE_NONE)
12525 {
12526 rtx tmp = gen_reg_rtx (SImode);
12527 emit_insn (gen_sts_fpscr (tmp));
12528 rtx i = NULL;
12529
12530 const unsigned HOST_WIDE_INT fpbits =
12531 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
12532
12533 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
12534 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12535 else if (mode == FP_MODE_SINGLE)
12536 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
12537 else if (mode == FP_MODE_DOUBLE)
12538 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12539 else
12540 gcc_unreachable ();
12541
12542 emit_insn (i);
12543 emit_insn (gen_lds_fpscr (tmp));
12544 }
12545 }
12546
12547 static int
sh_mode_needed(int entity ATTRIBUTE_UNUSED,rtx_insn * insn)12548 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
12549 {
12550 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
12551 }
12552
12553 static int
sh_mode_after(int entity ATTRIBUTE_UNUSED,int mode,rtx_insn * insn)12554 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
12555 {
12556 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
12557 get_attr_fp_set (insn) != FP_SET_NONE)
12558 return (int) get_attr_fp_set (insn);
12559 else
12560 return mode;
12561 }
12562
12563 static int
sh_mode_entry(int entity ATTRIBUTE_UNUSED)12564 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
12565 {
12566 return NORMAL_MODE (entity);
12567 }
12568
12569 static int
sh_mode_exit(int entity ATTRIBUTE_UNUSED)12570 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
12571 {
12572 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
12573 }
12574
12575 static int
sh_mode_priority(int entity ATTRIBUTE_UNUSED,int n)12576 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
12577 {
12578 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
12579 }
12580
12581 /*------------------------------------------------------------------------------
12582 Misc
12583 */
12584
12585 /* Return true if we use LRA instead of reload pass. */
12586 bool
sh_lra_p(void)12587 sh_lra_p (void)
12588 {
12589 return sh_lra_flag;
12590 }
12591
12592 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
12593
12594 static bool
sh_use_by_pieces_infrastructure_p(unsigned HOST_WIDE_INT size,unsigned int align,enum by_pieces_operation op,bool speed_p)12595 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
12596 unsigned int align,
12597 enum by_pieces_operation op,
12598 bool speed_p)
12599 {
12600 switch (op)
12601 {
12602 case MOVE_BY_PIECES:
12603 return by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1, op)
12604 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12605 case STORE_BY_PIECES:
12606 case SET_BY_PIECES:
12607 return by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1, op)
12608 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12609 default:
12610 return default_use_by_pieces_infrastructure_p (size, align,
12611 op, speed_p);
12612 }
12613 }
12614
12615 bool
sh_cannot_force_const_mem_p(machine_mode mode ATTRIBUTE_UNUSED,rtx x ATTRIBUTE_UNUSED)12616 sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
12617 rtx x ATTRIBUTE_UNUSED)
12618 {
12619 return TARGET_FDPIC;
12620 }
12621
12622 /* Emit insns to load the function address from FUNCDESC (an FDPIC
12623 function descriptor) into r1 and the GOT address into r12,
12624 returning an rtx for r1. */
12625
12626 rtx
sh_load_function_descriptor(rtx funcdesc)12627 sh_load_function_descriptor (rtx funcdesc)
12628 {
12629 rtx r1 = gen_rtx_REG (Pmode, R1_REG);
12630 rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
12631 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
12632 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
12633
12634 emit_move_insn (r1, fnaddr);
12635 /* The ABI requires the entry point address to be loaded first, so
12636 prevent the load from being moved after that of the GOT
12637 address. */
12638 emit_insn (gen_blockage ());
12639 emit_move_insn (pic_reg, gotaddr);
12640 return r1;
12641 }
12642
12643 /* Return an rtx holding the initial value of the FDPIC register (the
12644 FDPIC pointer passed in from the caller). */
12645
12646 rtx
sh_get_fdpic_reg_initial_val(void)12647 sh_get_fdpic_reg_initial_val (void)
12648 {
12649 return get_hard_reg_initial_val (Pmode, PIC_REG);
12650 }
12651
12652 #include "gt-sh.h"
12653