1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
56
57
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
59
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
62
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
69
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
72
73 tree sh_deferred_function_attributes;
74 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
75
76 /* Global variables for machine-dependent things. */
77
78 /* Which cpu are we scheduling for. */
79 enum processor_type sh_cpu;
80
81 /* Definitions used in ready queue reordering for first scheduling pass. */
82
83 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
84 static short *regmode_weight[2];
85
86 /* Total SFmode and SImode weights of scheduled insns. */
87 static int curr_regmode_pressure[2];
88
89 /* If true, skip cycles for Q -> R movement. */
90 static int skip_cycles = 0;
91
92 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
93 and returned from sh_reorder2. */
94 static short cached_can_issue_more;
95
96 /* Saved operands from the last compare to use when we generate an scc
97 or bcc insn. */
98
99 rtx sh_compare_op0;
100 rtx sh_compare_op1;
101
102 /* Provides the class number of the smallest class containing
103 reg number. */
104
105 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
106 {
107 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
108 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
124 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
144 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
145 GENERAL_REGS, GENERAL_REGS,
146 };
147
148 char sh_register_names[FIRST_PSEUDO_REGISTER] \
149 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
150
151 char sh_additional_register_names[ADDREGNAMES_SIZE] \
152 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
153 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
154
155 /* Provide reg_class from a letter such as appears in the machine
156 description. *: target independently reserved letter.
157 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
158
159 enum reg_class reg_class_from_letter[] =
160 {
161 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
162 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
163 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
164 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
165 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
166 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
167 /* y */ FPUL_REGS, /* z */ R0_REGS
168 };
169
170 int assembler_dialect;
171
172 static bool shmedia_space_reserved_for_target_registers;
173
174 static bool sh_handle_option (size_t, const char *, int);
175 static void split_branches (rtx);
176 static int branch_dest (rtx);
177 static void force_into (rtx, rtx);
178 static void print_slot (rtx);
179 static rtx add_constant (rtx, enum machine_mode, rtx);
180 static void dump_table (rtx, rtx);
181 static int hi_const (rtx);
182 static int broken_move (rtx);
183 static int mova_p (rtx);
184 static rtx find_barrier (int, rtx, rtx);
185 static int noncall_uses_reg (rtx, rtx, rtx *);
186 static rtx gen_block_redirect (rtx, int, int);
187 static void sh_reorg (void);
188 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
189 static rtx frame_insn (rtx);
190 static rtx push (int);
191 static void pop (int);
192 static void push_regs (HARD_REG_SET *, int);
193 static int calc_live_regs (HARD_REG_SET *);
194 static void mark_use (rtx, rtx *);
195 static HOST_WIDE_INT rounded_frame_size (int);
196 static rtx mark_constant_pool_use (rtx);
197 const struct attribute_spec sh_attribute_table[];
198 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
199 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
202 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
203 static void sh_insert_attributes (tree, tree *);
204 static const char *sh_check_pch_target_flags (int);
205 static int sh_adjust_cost (rtx, rtx, rtx, int);
206 static int sh_issue_rate (void);
207 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
208 static short find_set_regmode_weight (rtx, enum machine_mode);
209 static short find_insn_regmode_weight (rtx, enum machine_mode);
210 static void find_regmode_weight (int, enum machine_mode);
211 static void sh_md_init_global (FILE *, int, int);
212 static void sh_md_finish_global (FILE *, int);
213 static int rank_for_reorder (const void *, const void *);
214 static void swap_reorder (rtx *, int);
215 static void ready_reorder (rtx *, int);
216 static short high_pressure (enum machine_mode);
217 static int sh_reorder (FILE *, int, rtx *, int *, int);
218 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
219 static void sh_md_init (FILE *, int, int);
220 static int sh_variable_issue (FILE *, int, rtx, int);
221
222 static bool sh_function_ok_for_sibcall (tree, tree);
223
224 static bool sh_cannot_modify_jumps_p (void);
225 static int sh_target_reg_class (void);
226 static bool sh_optimize_target_register_callee_saved (bool);
227 static bool sh_ms_bitfield_layout_p (tree);
228
229 static void sh_init_builtins (void);
230 static void sh_media_init_builtins (void);
231 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
232 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
233 static void sh_file_start (void);
234 static int flow_dependent_p (rtx, rtx);
235 static void flow_dependent_p_1 (rtx, rtx, void *);
236 static int shiftcosts (rtx);
237 static int andcosts (rtx);
238 static int addsubcosts (rtx);
239 static int multcosts (rtx);
240 static bool unspec_caller_rtx_p (rtx);
241 static bool sh_cannot_copy_insn_p (rtx);
242 static bool sh_rtx_costs (rtx, int, int, int *);
243 static int sh_address_cost (rtx);
244 #ifdef TARGET_ADJUST_UNROLL_MAX
245 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
246 #endif
247 static int sh_pr_n_sets (void);
248 static rtx sh_allocate_initial_value (rtx);
249 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
250 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
251 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
252 static int scavenge_reg (HARD_REG_SET *s);
253 struct save_schedule_s;
254 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
255 struct save_schedule_s *, int);
256
257 static rtx sh_struct_value_rtx (tree, int);
258 static bool sh_return_in_memory (tree, tree);
259 static rtx sh_builtin_saveregs (void);
260 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
261 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
262 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
263 static tree sh_build_builtin_va_list (void);
264 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
266 tree, bool);
267 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
268 tree, bool);
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
270 tree, bool);
271 static int sh_dwarf_calling_convention (tree);
272 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
273
274
275 /* Initialize the GCC target structure. */
276 #undef TARGET_ATTRIBUTE_TABLE
277 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
278
279 /* The next two are used for debug info when compiling with -gdwarf. */
280 #undef TARGET_ASM_UNALIGNED_HI_OP
281 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
282 #undef TARGET_ASM_UNALIGNED_SI_OP
283 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
284
285 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
286 #undef TARGET_ASM_UNALIGNED_DI_OP
287 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
288 #undef TARGET_ASM_ALIGNED_DI_OP
289 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
290
291 #undef TARGET_ASM_FUNCTION_EPILOGUE
292 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
293
294 #undef TARGET_ASM_OUTPUT_MI_THUNK
295 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
296
297 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
298 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
299
300 #undef TARGET_ASM_FILE_START
301 #define TARGET_ASM_FILE_START sh_file_start
302 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
303 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
304
305 #undef TARGET_DEFAULT_TARGET_FLAGS
306 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
307 #undef TARGET_HANDLE_OPTION
308 #define TARGET_HANDLE_OPTION sh_handle_option
309
310 #undef TARGET_INSERT_ATTRIBUTES
311 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
312
313 #undef TARGET_SCHED_ADJUST_COST
314 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
315
316 #undef TARGET_SCHED_ISSUE_RATE
317 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
318
319 /* The next 5 hooks have been implemented for reenabling sched1. With the
320 help of these macros we are limiting the movement of insns in sched1 to
321 reduce the register pressure. The overall idea is to keep count of SImode
322 and SFmode regs required by already scheduled insns. When these counts
323 cross some threshold values; give priority to insns that free registers.
324 The insn that frees registers is most likely to be the insn with lowest
325 LUID (original insn order); but such an insn might be there in the stalled
326 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
327 upto a max of 8 cycles so that such insns may move from Q -> R.
328
329 The description of the hooks are as below:
330
331 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
332 scheduler; it is called inside the sched_init function just after
333 find_insn_reg_weights function call. It is used to calculate the SImode
334 and SFmode weights of insns of basic blocks; much similar to what
335 find_insn_reg_weights does.
336 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
337
338 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
339 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
340 (Q)->(R).
341
342 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
343 high; reorder the ready queue so that the insn with lowest LUID will be
344 issued next.
345
346 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
347 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
348
349 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
350 can be returned from TARGET_SCHED_REORDER2.
351
352 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
353
354 #undef TARGET_SCHED_DFA_NEW_CYCLE
355 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
356
357 #undef TARGET_SCHED_INIT_GLOBAL
358 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
359
360 #undef TARGET_SCHED_FINISH_GLOBAL
361 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
362
363 #undef TARGET_SCHED_VARIABLE_ISSUE
364 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
365
366 #undef TARGET_SCHED_REORDER
367 #define TARGET_SCHED_REORDER sh_reorder
368
369 #undef TARGET_SCHED_REORDER2
370 #define TARGET_SCHED_REORDER2 sh_reorder2
371
372 #undef TARGET_SCHED_INIT
373 #define TARGET_SCHED_INIT sh_md_init
374
375 #undef TARGET_CANNOT_MODIFY_JUMPS_P
376 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
377 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
378 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
379 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
380 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
381 sh_optimize_target_register_callee_saved
382
383 #undef TARGET_MS_BITFIELD_LAYOUT_P
384 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
385
386 #undef TARGET_INIT_BUILTINS
387 #define TARGET_INIT_BUILTINS sh_init_builtins
388 #undef TARGET_EXPAND_BUILTIN
389 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
390
391 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
392 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
393
394 #undef TARGET_CANNOT_COPY_INSN_P
395 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
396 #undef TARGET_RTX_COSTS
397 #define TARGET_RTX_COSTS sh_rtx_costs
398 #undef TARGET_ADDRESS_COST
399 #define TARGET_ADDRESS_COST sh_address_cost
400 #undef TARGET_ALLOCATE_INITIAL_VALUE
401 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
402
403 #undef TARGET_MACHINE_DEPENDENT_REORG
404 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
405
406 #ifdef HAVE_AS_TLS
407 #undef TARGET_HAVE_TLS
408 #define TARGET_HAVE_TLS true
409 #endif
410
411 #undef TARGET_PROMOTE_PROTOTYPES
412 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
413 #undef TARGET_PROMOTE_FUNCTION_ARGS
414 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
415 #undef TARGET_PROMOTE_FUNCTION_RETURN
416 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
417
418 #undef TARGET_STRUCT_VALUE_RTX
419 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
420 #undef TARGET_RETURN_IN_MEMORY
421 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
422
423 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
424 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
425 #undef TARGET_SETUP_INCOMING_VARARGS
426 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
427 #undef TARGET_STRICT_ARGUMENT_NAMING
428 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
429 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
430 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
431 #undef TARGET_MUST_PASS_IN_STACK
432 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
433 #undef TARGET_PASS_BY_REFERENCE
434 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
435 #undef TARGET_CALLEE_COPIES
436 #define TARGET_CALLEE_COPIES sh_callee_copies
437 #undef TARGET_ARG_PARTIAL_BYTES
438 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
439
440 #undef TARGET_BUILD_BUILTIN_VA_LIST
441 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
442 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
443 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
444
445 #undef TARGET_VECTOR_MODE_SUPPORTED_P
446 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
447
448 #undef TARGET_CHECK_PCH_TARGET_FLAGS
449 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
450
451 #undef TARGET_DWARF_CALLING_CONVENTION
452 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
453
454 /* Return regmode weight for insn. */
455 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
456
457 /* Return current register pressure for regmode. */
458 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
459
460 #ifdef SYMBIAN
461
462 #undef TARGET_ENCODE_SECTION_INFO
463 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
464 #undef TARGET_STRIP_NAME_ENCODING
465 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
466 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
467 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
468
469 #endif /* SYMBIAN */
470
471 #ifdef TARGET_ADJUST_UNROLL_MAX
472 #undef TARGET_ADJUST_UNROLL_MAX
473 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
474 #endif
475
476 struct gcc_target targetm = TARGET_INITIALIZER;
477
478 /* Implement TARGET_HANDLE_OPTION. */
479
480 static bool
sh_handle_option(size_t code,const char * arg ATTRIBUTE_UNUSED,int value ATTRIBUTE_UNUSED)481 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
482 int value ATTRIBUTE_UNUSED)
483 {
484 switch (code)
485 {
486 case OPT_m1:
487 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
488 return true;
489
490 case OPT_m2:
491 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
492 return true;
493
494 case OPT_m2a:
495 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
496 return true;
497
498 case OPT_m2a_nofpu:
499 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
500 return true;
501
502 case OPT_m2a_single:
503 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
504 return true;
505
506 case OPT_m2a_single_only:
507 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
508 return true;
509
510 case OPT_m2e:
511 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
512 return true;
513
514 case OPT_m3:
515 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
516 return true;
517
518 case OPT_m3e:
519 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
520 return true;
521
522 case OPT_m4:
523 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
524 return true;
525
526 case OPT_m4_nofpu:
527 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
528 return true;
529
530 case OPT_m4_single:
531 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
532 return true;
533
534 case OPT_m4_single_only:
535 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
536 return true;
537
538 case OPT_m4a:
539 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
540 return true;
541
542 case OPT_m4a_nofpu:
543 case OPT_m4al:
544 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
545 return true;
546
547 case OPT_m4a_single:
548 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
549 return true;
550
551 case OPT_m4a_single_only:
552 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
553 return true;
554
555 case OPT_m5_32media:
556 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
557 return true;
558
559 case OPT_m5_32media_nofpu:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
561 return true;
562
563 case OPT_m5_64media:
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
565 return true;
566
567 case OPT_m5_64media_nofpu:
568 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
569 return true;
570
571 case OPT_m5_compact:
572 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
573 return true;
574
575 case OPT_m5_compact_nofpu:
576 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
577 return true;
578
579 default:
580 return true;
581 }
582 }
583
584 /* Print the operand address in x to the stream. */
585
586 void
print_operand_address(FILE * stream,rtx x)587 print_operand_address (FILE *stream, rtx x)
588 {
589 switch (GET_CODE (x))
590 {
591 case REG:
592 case SUBREG:
593 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
594 break;
595
596 case PLUS:
597 {
598 rtx base = XEXP (x, 0);
599 rtx index = XEXP (x, 1);
600
601 switch (GET_CODE (index))
602 {
603 case CONST_INT:
604 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
605 reg_names[true_regnum (base)]);
606 break;
607
608 case REG:
609 case SUBREG:
610 {
611 int base_num = true_regnum (base);
612 int index_num = true_regnum (index);
613
614 fprintf (stream, "@(r0,%s)",
615 reg_names[MAX (base_num, index_num)]);
616 break;
617 }
618
619 default:
620 gcc_unreachable ();
621 }
622 }
623 break;
624
625 case PRE_DEC:
626 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
627 break;
628
629 case POST_INC:
630 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
631 break;
632
633 default:
634 x = mark_constant_pool_use (x);
635 output_addr_const (stream, x);
636 break;
637 }
638 }
639
640 /* Print operand x (an rtx) in assembler syntax to file stream
641 according to modifier code.
642
643 '.' print a .s if insn needs delay slot
644 ',' print LOCAL_LABEL_PREFIX
645 '@' print trap, rte or rts depending upon pragma interruptness
646 '#' output a nop if there is nothing to put in the delay slot
647 ''' print likelihood suffix (/u for unlikely).
648 '>' print branch target if -fverbose-asm
649 'O' print a constant without the #
650 'R' print the LSW of a dp value - changes if in little endian
651 'S' print the MSW of a dp value - changes if in little endian
652 'T' print the next word of a dp value - same as 'R' in big endian mode.
653 'M' print an `x' if `m' will print `base,index'.
654 'N' print 'r63' if the operand is (const_int 0).
655 'd' print a V2SF reg as dN instead of fpN.
656 'm' print a pair `base,offset' or `base,index', for LD and ST.
657 'U' Likewise for {LD,ST}{HI,LO}.
658 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
659 'o' output an operator. */
660
661 void
print_operand(FILE * stream,rtx x,int code)662 print_operand (FILE *stream, rtx x, int code)
663 {
664 int regno;
665 enum machine_mode mode;
666
667 switch (code)
668 {
669 tree trapa_attr;
670
671 case '.':
672 if (final_sequence
673 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
674 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
675 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
676 break;
677 case ',':
678 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
679 break;
680 case '@':
681 trapa_attr = lookup_attribute ("trap_exit",
682 DECL_ATTRIBUTES (current_function_decl));
683 if (trapa_attr)
684 fprintf (stream, "trapa #%ld",
685 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
686 else if (sh_cfun_interrupt_handler_p ())
687 fprintf (stream, "rte");
688 else
689 fprintf (stream, "rts");
690 break;
691 case '#':
692 /* Output a nop if there's nothing in the delay slot. */
693 if (dbr_sequence_length () == 0)
694 fprintf (stream, "\n\tnop");
695 break;
696 case '\'':
697 {
698 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
699
700 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
701 fputs ("/u", stream);
702 break;
703 }
704 case '>':
705 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
706 {
707 fputs ("\t! target: ", stream);
708 output_addr_const (stream, JUMP_LABEL (current_output_insn));
709 }
710 break;
711 case 'O':
712 x = mark_constant_pool_use (x);
713 output_addr_const (stream, x);
714 break;
715 /* N.B.: %R / %S / %T adjust memory addresses by four.
716 For SHMEDIA, that means they can be used to access the first and
717 second 32 bit part of a 64 bit (or larger) value that
718 might be held in floating point registers or memory.
719 While they can be used to access 64 bit parts of a larger value
720 held in general purpose registers, that won't work with memory -
721 neither for fp registers, since the frxx names are used. */
722 case 'R':
723 if (REG_P (x) || GET_CODE (x) == SUBREG)
724 {
725 regno = true_regnum (x);
726 regno += FP_REGISTER_P (regno) ? 1 : LSW;
727 fputs (reg_names[regno], (stream));
728 }
729 else if (MEM_P (x))
730 {
731 x = adjust_address (x, SImode, 4 * LSW);
732 print_operand_address (stream, XEXP (x, 0));
733 }
734 else
735 {
736 rtx sub = NULL_RTX;
737
738 mode = GET_MODE (x);
739 if (mode == VOIDmode)
740 mode = DImode;
741 if (GET_MODE_SIZE (mode) >= 8)
742 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
743 if (sub)
744 print_operand (stream, sub, 0);
745 else
746 output_operand_lossage ("invalid operand to %%R");
747 }
748 break;
749 case 'S':
750 if (REG_P (x) || GET_CODE (x) == SUBREG)
751 {
752 regno = true_regnum (x);
753 regno += FP_REGISTER_P (regno) ? 0 : MSW;
754 fputs (reg_names[regno], (stream));
755 }
756 else if (MEM_P (x))
757 {
758 x = adjust_address (x, SImode, 4 * MSW);
759 print_operand_address (stream, XEXP (x, 0));
760 }
761 else
762 {
763 rtx sub = NULL_RTX;
764
765 mode = GET_MODE (x);
766 if (mode == VOIDmode)
767 mode = DImode;
768 if (GET_MODE_SIZE (mode) >= 8)
769 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
770 if (sub)
771 print_operand (stream, sub, 0);
772 else
773 output_operand_lossage ("invalid operand to %%S");
774 }
775 break;
776 case 'T':
777 /* Next word of a double. */
778 switch (GET_CODE (x))
779 {
780 case REG:
781 fputs (reg_names[REGNO (x) + 1], (stream));
782 break;
783 case MEM:
784 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
785 && GET_CODE (XEXP (x, 0)) != POST_INC)
786 x = adjust_address (x, SImode, 4);
787 print_operand_address (stream, XEXP (x, 0));
788 break;
789 default:
790 break;
791 }
792 break;
793 case 'o':
794 switch (GET_CODE (x))
795 {
796 case PLUS: fputs ("add", stream); break;
797 case MINUS: fputs ("sub", stream); break;
798 case MULT: fputs ("mul", stream); break;
799 case DIV: fputs ("div", stream); break;
800 case EQ: fputs ("eq", stream); break;
801 case NE: fputs ("ne", stream); break;
802 case GT: case LT: fputs ("gt", stream); break;
803 case GE: case LE: fputs ("ge", stream); break;
804 case GTU: case LTU: fputs ("gtu", stream); break;
805 case GEU: case LEU: fputs ("geu", stream); break;
806 default:
807 break;
808 }
809 break;
810 case 'M':
811 if (GET_CODE (x) == MEM
812 && GET_CODE (XEXP (x, 0)) == PLUS
813 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
814 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
815 fputc ('x', stream);
816 break;
817
818 case 'm':
819 gcc_assert (GET_CODE (x) == MEM);
820 x = XEXP (x, 0);
821 /* Fall through. */
822 case 'U':
823 switch (GET_CODE (x))
824 {
825 case REG:
826 case SUBREG:
827 print_operand (stream, x, 0);
828 fputs (", 0", stream);
829 break;
830
831 case PLUS:
832 print_operand (stream, XEXP (x, 0), 0);
833 fputs (", ", stream);
834 print_operand (stream, XEXP (x, 1), 0);
835 break;
836
837 default:
838 gcc_unreachable ();
839 }
840 break;
841
842 case 'd':
843 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
844
845 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
846 break;
847
848 case 'N':
849 if (x == CONST0_RTX (GET_MODE (x)))
850 {
851 fprintf ((stream), "r63");
852 break;
853 }
854 goto default_output;
855 case 'u':
856 if (GET_CODE (x) == CONST_INT)
857 {
858 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
859 break;
860 }
861 /* Fall through. */
862
863 default_output:
864 default:
865 regno = 0;
866 mode = GET_MODE (x);
867
868 switch (GET_CODE (x))
869 {
870 case TRUNCATE:
871 {
872 rtx inner = XEXP (x, 0);
873 int offset = 0;
874 enum machine_mode inner_mode;
875
876 /* We might see SUBREGs with vector mode registers inside. */
877 if (GET_CODE (inner) == SUBREG
878 && (GET_MODE_SIZE (GET_MODE (inner))
879 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
880 && subreg_lowpart_p (inner))
881 inner = SUBREG_REG (inner);
882 if (GET_CODE (inner) == CONST_INT)
883 {
884 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
885 goto default_output;
886 }
887 inner_mode = GET_MODE (inner);
888 if (GET_CODE (inner) == SUBREG
889 && (GET_MODE_SIZE (GET_MODE (inner))
890 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
891 && GET_CODE (SUBREG_REG (inner)) == REG)
892 {
893 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
894 GET_MODE (SUBREG_REG (inner)),
895 SUBREG_BYTE (inner),
896 GET_MODE (inner));
897 inner = SUBREG_REG (inner);
898 }
899 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
900 abort ();
901 /* Floating point register pairs are always big endian;
902 general purpose registers are 64 bit wide. */
903 regno = REGNO (inner);
904 regno = (HARD_REGNO_NREGS (regno, inner_mode)
905 - HARD_REGNO_NREGS (regno, mode))
906 + offset;
907 x = inner;
908 goto reg;
909 }
910 case SIGN_EXTEND:
911 x = XEXP (x, 0);
912 goto reg;
913 /* FIXME: We need this on SHmedia32 because reload generates
914 some sign-extended HI or QI loads into DImode registers
915 but, because Pmode is SImode, the address ends up with a
916 subreg:SI of the DImode register. Maybe reload should be
917 fixed so as to apply alter_subreg to such loads? */
918 case IF_THEN_ELSE:
919 gcc_assert (trapping_target_operand (x, VOIDmode));
920 x = XEXP (XEXP (x, 2), 0);
921 goto default_output;
922 case SUBREG:
923 gcc_assert (SUBREG_BYTE (x) == 0
924 && GET_CODE (SUBREG_REG (x)) == REG);
925
926 x = SUBREG_REG (x);
927 /* Fall through. */
928
929 reg:
930 case REG:
931 regno += REGNO (x);
932 if (FP_REGISTER_P (regno)
933 && mode == V16SFmode)
934 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
935 else if (FP_REGISTER_P (REGNO (x))
936 && mode == V4SFmode)
937 fprintf ((stream), "fv%s", reg_names[regno] + 2);
938 else if (GET_CODE (x) == REG
939 && mode == V2SFmode)
940 fprintf ((stream), "fp%s", reg_names[regno] + 2);
941 else if (FP_REGISTER_P (REGNO (x))
942 && GET_MODE_SIZE (mode) > 4)
943 fprintf ((stream), "d%s", reg_names[regno] + 1);
944 else
945 fputs (reg_names[regno], (stream));
946 break;
947
948 case MEM:
949 output_address (XEXP (x, 0));
950 break;
951
952 case CONST:
953 if (TARGET_SHMEDIA
954 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
955 && (GET_MODE (XEXP (x, 0)) == DImode
956 || GET_MODE (XEXP (x, 0)) == SImode)
957 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
958 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
959 {
960 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
961
962 fputc ('(', stream);
963 if (GET_CODE (val) == ASHIFTRT)
964 {
965 fputc ('(', stream);
966 if (GET_CODE (XEXP (val, 0)) == CONST)
967 fputc ('(', stream);
968 output_addr_const (stream, XEXP (val, 0));
969 if (GET_CODE (XEXP (val, 0)) == CONST)
970 fputc (')', stream);
971 fputs (" >> ", stream);
972 output_addr_const (stream, XEXP (val, 1));
973 fputc (')', stream);
974 }
975 else
976 {
977 if (GET_CODE (val) == CONST)
978 fputc ('(', stream);
979 output_addr_const (stream, val);
980 if (GET_CODE (val) == CONST)
981 fputc (')', stream);
982 }
983 fputs (" & 65535)", stream);
984 break;
985 }
986
987 /* Fall through. */
988 default:
989 if (TARGET_SH1)
990 fputc ('#', stream);
991 output_addr_const (stream, x);
992 break;
993 }
994 break;
995 }
996 }
997
998 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
999 static void
force_into(rtx value,rtx target)1000 force_into (rtx value, rtx target)
1001 {
1002 value = force_operand (value, target);
1003 if (! rtx_equal_p (value, target))
1004 emit_insn (gen_move_insn (target, value));
1005 }
1006
1007 /* Emit code to perform a block move. Choose the best method.
1008
1009 OPERANDS[0] is the destination.
1010 OPERANDS[1] is the source.
1011 OPERANDS[2] is the size.
1012 OPERANDS[3] is the alignment safe to use. */
1013
1014 int
expand_block_move(rtx * operands)1015 expand_block_move (rtx *operands)
1016 {
1017 int align = INTVAL (operands[3]);
1018 int constp = (GET_CODE (operands[2]) == CONST_INT);
1019 int bytes = (constp ? INTVAL (operands[2]) : 0);
1020
1021 if (! constp)
1022 return 0;
1023
1024 /* If we could use mov.l to move words and dest is word-aligned, we
1025 can use movua.l for loads and still generate a relatively short
1026 and efficient sequence. */
1027 if (TARGET_SH4A_ARCH && align < 4
1028 && MEM_ALIGN (operands[0]) >= 32
1029 && can_move_by_pieces (bytes, 32))
1030 {
1031 rtx dest = copy_rtx (operands[0]);
1032 rtx src = copy_rtx (operands[1]);
1033 /* We could use different pseudos for each copied word, but
1034 since movua can only load into r0, it's kind of
1035 pointless. */
1036 rtx temp = gen_reg_rtx (SImode);
1037 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1038 int copied = 0;
1039
1040 while (copied + 4 <= bytes)
1041 {
1042 rtx to = adjust_address (dest, SImode, copied);
1043 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1044
1045 emit_insn (gen_movua (temp, from));
1046 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1047 emit_move_insn (to, temp);
1048 copied += 4;
1049 }
1050
1051 if (copied < bytes)
1052 move_by_pieces (adjust_address (dest, BLKmode, copied),
1053 adjust_automodify_address (src, BLKmode,
1054 src_addr, copied),
1055 bytes - copied, align, 0);
1056
1057 return 1;
1058 }
1059
1060 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1061 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1062 if (align < 4 || (bytes % 4 != 0))
1063 return 0;
1064
1065 if (TARGET_HARD_SH4)
1066 {
1067 if (bytes < 12)
1068 return 0;
1069 else if (bytes == 12)
1070 {
1071 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1072 rtx r4 = gen_rtx_REG (SImode, 4);
1073 rtx r5 = gen_rtx_REG (SImode, 5);
1074
1075 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1076 force_into (XEXP (operands[0], 0), r4);
1077 force_into (XEXP (operands[1], 0), r5);
1078 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1079 return 1;
1080 }
1081 else if (! TARGET_SMALLCODE)
1082 {
1083 const char *entry_name;
1084 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1085 int dwords;
1086 rtx r4 = gen_rtx_REG (SImode, 4);
1087 rtx r5 = gen_rtx_REG (SImode, 5);
1088 rtx r6 = gen_rtx_REG (SImode, 6);
1089
1090 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1091 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1092 force_into (XEXP (operands[0], 0), r4);
1093 force_into (XEXP (operands[1], 0), r5);
1094
1095 dwords = bytes >> 3;
1096 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1097 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1098 return 1;
1099 }
1100 else
1101 return 0;
1102 }
1103 if (bytes < 64)
1104 {
1105 char entry[30];
1106 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1107 rtx r4 = gen_rtx_REG (SImode, 4);
1108 rtx r5 = gen_rtx_REG (SImode, 5);
1109
1110 sprintf (entry, "__movmemSI%d", bytes);
1111 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1112 force_into (XEXP (operands[0], 0), r4);
1113 force_into (XEXP (operands[1], 0), r5);
1114 emit_insn (gen_block_move_real (func_addr_rtx));
1115 return 1;
1116 }
1117
1118 /* This is the same number of bytes as a memcpy call, but to a different
1119 less common function name, so this will occasionally use more space. */
1120 if (! TARGET_SMALLCODE)
1121 {
1122 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1123 int final_switch, while_loop;
1124 rtx r4 = gen_rtx_REG (SImode, 4);
1125 rtx r5 = gen_rtx_REG (SImode, 5);
1126 rtx r6 = gen_rtx_REG (SImode, 6);
1127
1128 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1129 force_into (XEXP (operands[0], 0), r4);
1130 force_into (XEXP (operands[1], 0), r5);
1131
1132 /* r6 controls the size of the move. 16 is decremented from it
1133 for each 64 bytes moved. Then the negative bit left over is used
1134 as an index into a list of move instructions. e.g., a 72 byte move
1135 would be set up with size(r6) = 14, for one iteration through the
1136 big while loop, and a switch of -2 for the last part. */
1137
1138 final_switch = 16 - ((bytes / 4) % 16);
1139 while_loop = ((bytes / 4) / 16 - 1) * 16;
1140 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1141 emit_insn (gen_block_lump_real (func_addr_rtx));
1142 return 1;
1143 }
1144
1145 return 0;
1146 }
1147
1148 /* Prepare operands for a move define_expand; specifically, one of the
1149 operands must be in a register. */
1150
1151 int
prepare_move_operands(rtx operands[],enum machine_mode mode)1152 prepare_move_operands (rtx operands[], enum machine_mode mode)
1153 {
1154 if ((mode == SImode || mode == DImode)
1155 && flag_pic
1156 && ! ((mode == Pmode || mode == ptr_mode)
1157 && tls_symbolic_operand (operands[1], Pmode) != 0))
1158 {
1159 rtx temp;
1160 if (SYMBOLIC_CONST_P (operands[1]))
1161 {
1162 if (GET_CODE (operands[0]) == MEM)
1163 operands[1] = force_reg (Pmode, operands[1]);
1164 else if (TARGET_SHMEDIA
1165 && GET_CODE (operands[1]) == LABEL_REF
1166 && target_reg_operand (operands[0], mode))
1167 /* It's ok. */;
1168 else
1169 {
1170 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1171 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1172 }
1173 }
1174 else if (GET_CODE (operands[1]) == CONST
1175 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1176 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1177 {
1178 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1179 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1180 mode, temp);
1181 operands[1] = expand_binop (mode, add_optab, temp,
1182 XEXP (XEXP (operands[1], 0), 1),
1183 no_new_pseudos ? temp
1184 : gen_reg_rtx (Pmode),
1185 0, OPTAB_LIB_WIDEN);
1186 }
1187 }
1188
1189 if (! reload_in_progress && ! reload_completed)
1190 {
1191 /* Copy the source to a register if both operands aren't registers. */
1192 if (! register_operand (operands[0], mode)
1193 && ! sh_register_operand (operands[1], mode))
1194 operands[1] = copy_to_mode_reg (mode, operands[1]);
1195
1196 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1197 {
1198 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1199 except that we can't use that function because it is static. */
1200 rtx new = change_address (operands[0], mode, 0);
1201 MEM_COPY_ATTRIBUTES (new, operands[0]);
1202 operands[0] = new;
1203 }
1204
1205 /* This case can happen while generating code to move the result
1206 of a library call to the target. Reject `st r0,@(rX,rY)' because
1207 reload will fail to find a spill register for rX, since r0 is already
1208 being used for the source. */
1209 else if (TARGET_SH1
1210 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1211 && GET_CODE (operands[0]) == MEM
1212 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1213 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1214 operands[1] = copy_to_mode_reg (mode, operands[1]);
1215 }
1216
1217 if (mode == Pmode || mode == ptr_mode)
1218 {
1219 rtx op0, op1, opc;
1220 enum tls_model tls_kind;
1221
1222 op0 = operands[0];
1223 op1 = operands[1];
1224 if (GET_CODE (op1) == CONST
1225 && GET_CODE (XEXP (op1, 0)) == PLUS
1226 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1227 {
1228 opc = XEXP (XEXP (op1, 0), 1);
1229 op1 = XEXP (XEXP (op1, 0), 0);
1230 }
1231 else
1232 opc = NULL_RTX;
1233
1234 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1235 {
1236 rtx tga_op1, tga_ret, tmp, tmp2;
1237
1238 switch (tls_kind)
1239 {
1240 case TLS_MODEL_GLOBAL_DYNAMIC:
1241 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1242 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1243 op1 = tga_ret;
1244 break;
1245
1246 case TLS_MODEL_LOCAL_DYNAMIC:
1247 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1248 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1249
1250 tmp = gen_reg_rtx (Pmode);
1251 emit_move_insn (tmp, tga_ret);
1252
1253 if (register_operand (op0, Pmode))
1254 tmp2 = op0;
1255 else
1256 tmp2 = gen_reg_rtx (Pmode);
1257
1258 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1259 op1 = tmp2;
1260 break;
1261
1262 case TLS_MODEL_INITIAL_EXEC:
1263 if (! flag_pic)
1264 {
1265 /* Don't schedule insns for getting GOT address when
1266 the first scheduling is enabled, to avoid spill
1267 failures for R0. */
1268 if (flag_schedule_insns)
1269 emit_insn (gen_blockage ());
1270 emit_insn (gen_GOTaddr2picreg ());
1271 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1272 PIC_REG)));
1273 if (flag_schedule_insns)
1274 emit_insn (gen_blockage ());
1275 }
1276 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1277 tmp = gen_sym2GOTTPOFF (op1);
1278 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1279 op1 = tga_op1;
1280 break;
1281
1282 case TLS_MODEL_LOCAL_EXEC:
1283 tmp2 = gen_reg_rtx (Pmode);
1284 emit_insn (gen_load_gbr (tmp2));
1285 tmp = gen_reg_rtx (Pmode);
1286 emit_insn (gen_symTPOFF2reg (tmp, op1));
1287
1288 if (register_operand (op0, Pmode))
1289 op1 = op0;
1290 else
1291 op1 = gen_reg_rtx (Pmode);
1292
1293 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1294 break;
1295
1296 default:
1297 gcc_unreachable ();
1298 }
1299 if (opc)
1300 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1301 operands[1] = op1;
1302 }
1303 }
1304
1305 return 0;
1306 }
1307
1308 /* Prepare the operands for an scc instruction; make sure that the
1309 compare has been done. */
1310 rtx
prepare_scc_operands(enum rtx_code code)1311 prepare_scc_operands (enum rtx_code code)
1312 {
1313 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1314 enum rtx_code oldcode = code;
1315 enum machine_mode mode;
1316
1317 /* First need a compare insn. */
1318 switch (code)
1319 {
1320 case NE:
1321 /* It isn't possible to handle this case. */
1322 gcc_unreachable ();
1323 case LT:
1324 code = GT;
1325 break;
1326 case LE:
1327 code = GE;
1328 break;
1329 case LTU:
1330 code = GTU;
1331 break;
1332 case LEU:
1333 code = GEU;
1334 break;
1335 default:
1336 break;
1337 }
1338 if (code != oldcode)
1339 {
1340 rtx tmp = sh_compare_op0;
1341 sh_compare_op0 = sh_compare_op1;
1342 sh_compare_op1 = tmp;
1343 }
1344
1345 mode = GET_MODE (sh_compare_op0);
1346 if (mode == VOIDmode)
1347 mode = GET_MODE (sh_compare_op1);
1348
1349 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1350 if ((code != EQ && code != NE
1351 && (sh_compare_op1 != const0_rtx
1352 || code == GTU || code == GEU || code == LTU || code == LEU))
1353 || (mode == DImode && sh_compare_op1 != const0_rtx)
1354 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1355 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1356
1357 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1358 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1359 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1360 gen_rtx_SET (VOIDmode, t_reg,
1361 gen_rtx_fmt_ee (code, SImode,
1362 sh_compare_op0, sh_compare_op1)),
1363 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1364 else
1365 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1366 gen_rtx_fmt_ee (code, SImode,
1367 sh_compare_op0, sh_compare_op1)));
1368
1369 return t_reg;
1370 }
1371
1372 /* Called from the md file, set up the operands of a compare instruction. */
1373
1374 void
from_compare(rtx * operands,int code)1375 from_compare (rtx *operands, int code)
1376 {
1377 enum machine_mode mode = GET_MODE (sh_compare_op0);
1378 rtx insn;
1379 if (mode == VOIDmode)
1380 mode = GET_MODE (sh_compare_op1);
1381 if (code != EQ
1382 || mode == DImode
1383 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1384 {
1385 /* Force args into regs, since we can't use constants here. */
1386 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1387 if (sh_compare_op1 != const0_rtx
1388 || code == GTU || code == GEU
1389 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1390 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1391 }
1392 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1393 {
1394 from_compare (operands, GT);
1395 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1396 }
1397 else
1398 insn = gen_rtx_SET (VOIDmode,
1399 gen_rtx_REG (SImode, T_REG),
1400 gen_rtx_fmt_ee (code, SImode,
1401 sh_compare_op0, sh_compare_op1));
1402 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1403 {
1404 insn = gen_rtx_PARALLEL (VOIDmode,
1405 gen_rtvec (2, insn,
1406 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1407 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1408 }
1409 else
1410 emit_insn (insn);
1411 }
1412
1413 /* Functions to output assembly code. */
1414
1415 /* Return a sequence of instructions to perform DI or DF move.
1416
1417 Since the SH cannot move a DI or DF in one instruction, we have
1418 to take care when we see overlapping source and dest registers. */
1419
1420 const char *
output_movedouble(rtx insn ATTRIBUTE_UNUSED,rtx operands[],enum machine_mode mode)1421 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1422 enum machine_mode mode)
1423 {
1424 rtx dst = operands[0];
1425 rtx src = operands[1];
1426
1427 if (GET_CODE (dst) == MEM
1428 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1429 return "mov.l %T1,%0\n\tmov.l %1,%0";
1430
1431 if (register_operand (dst, mode)
1432 && register_operand (src, mode))
1433 {
1434 if (REGNO (src) == MACH_REG)
1435 return "sts mach,%S0\n\tsts macl,%R0";
1436
1437 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1438 when mov.d r1,r0 do r1->r0 then r2->r1. */
1439
1440 if (REGNO (src) + 1 == REGNO (dst))
1441 return "mov %T1,%T0\n\tmov %1,%0";
1442 else
1443 return "mov %1,%0\n\tmov %T1,%T0";
1444 }
1445 else if (GET_CODE (src) == CONST_INT)
1446 {
1447 if (INTVAL (src) < 0)
1448 output_asm_insn ("mov #-1,%S0", operands);
1449 else
1450 output_asm_insn ("mov #0,%S0", operands);
1451
1452 return "mov %1,%R0";
1453 }
1454 else if (GET_CODE (src) == MEM)
1455 {
1456 int ptrreg = -1;
1457 int dreg = REGNO (dst);
1458 rtx inside = XEXP (src, 0);
1459
1460 switch (GET_CODE (inside))
1461 {
1462 case REG:
1463 ptrreg = REGNO (inside);
1464 break;
1465
1466 case SUBREG:
1467 ptrreg = subreg_regno (inside);
1468 break;
1469
1470 case PLUS:
1471 ptrreg = REGNO (XEXP (inside, 0));
1472 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1473 an offsettable address. Unfortunately, offsettable addresses use
1474 QImode to check the offset, and a QImode offsettable address
1475 requires r0 for the other operand, which is not currently
1476 supported, so we can't use the 'o' constraint.
1477 Thus we must check for and handle r0+REG addresses here.
1478 We punt for now, since this is likely very rare. */
1479 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1480 break;
1481
1482 case LABEL_REF:
1483 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1484 case POST_INC:
1485 return "mov.l %1,%0\n\tmov.l %1,%T0";
1486 default:
1487 gcc_unreachable ();
1488 }
1489
1490 /* Work out the safe way to copy. Copy into the second half first. */
1491 if (dreg == ptrreg)
1492 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1493 }
1494
1495 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1496 }
1497
1498 /* Print an instruction which would have gone into a delay slot after
1499 another instruction, but couldn't because the other instruction expanded
1500 into a sequence where putting the slot insn at the end wouldn't work. */
1501
1502 static void
print_slot(rtx insn)1503 print_slot (rtx insn)
1504 {
1505 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1506
1507 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1508 }
1509
1510 const char *
output_far_jump(rtx insn,rtx op)1511 output_far_jump (rtx insn, rtx op)
1512 {
1513 struct { rtx lab, reg, op; } this;
1514 rtx braf_base_lab = NULL_RTX;
1515 const char *jump;
1516 int far;
1517 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1518 rtx prev;
1519
1520 this.lab = gen_label_rtx ();
1521
1522 if (TARGET_SH2
1523 && offset >= -32764
1524 && offset - get_attr_length (insn) <= 32766)
1525 {
1526 far = 0;
1527 jump = "mov.w %O0,%1; braf %1";
1528 }
1529 else
1530 {
1531 far = 1;
1532 if (flag_pic)
1533 {
1534 if (TARGET_SH2)
1535 jump = "mov.l %O0,%1; braf %1";
1536 else
1537 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1538 }
1539 else
1540 jump = "mov.l %O0,%1; jmp @%1";
1541 }
1542 /* If we have a scratch register available, use it. */
1543 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1544 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1545 {
1546 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1547 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1548 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1549 output_asm_insn (jump, &this.lab);
1550 if (dbr_sequence_length ())
1551 print_slot (final_sequence);
1552 else
1553 output_asm_insn ("nop", 0);
1554 }
1555 else
1556 {
1557 /* Output the delay slot insn first if any. */
1558 if (dbr_sequence_length ())
1559 print_slot (final_sequence);
1560
1561 this.reg = gen_rtx_REG (SImode, 13);
1562 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1563 Fortunately, MACL is fixed and call-clobbered, and we never
1564 need its value across jumps, so save r13 in it instead of in
1565 the stack. */
1566 if (TARGET_SH5)
1567 output_asm_insn ("lds r13, macl", 0);
1568 else
1569 output_asm_insn ("mov.l r13,@-r15", 0);
1570 output_asm_insn (jump, &this.lab);
1571 if (TARGET_SH5)
1572 output_asm_insn ("sts macl, r13", 0);
1573 else
1574 output_asm_insn ("mov.l @r15+,r13", 0);
1575 }
1576 if (far && flag_pic && TARGET_SH2)
1577 {
1578 braf_base_lab = gen_label_rtx ();
1579 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1580 CODE_LABEL_NUMBER (braf_base_lab));
1581 }
1582 if (far)
1583 output_asm_insn (".align 2", 0);
1584 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1585 this.op = op;
1586 if (far && flag_pic)
1587 {
1588 if (TARGET_SH2)
1589 this.lab = braf_base_lab;
1590 output_asm_insn (".long %O2-%O0", &this.lab);
1591 }
1592 else
1593 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1594 return "";
1595 }
1596
1597 /* Local label counter, used for constants in the pool and inside
1598 pattern branches. */
1599
1600 static int lf = 100;
1601
1602 /* Output code for ordinary branches. */
1603
1604 const char *
output_branch(int logic,rtx insn,rtx * operands)1605 output_branch (int logic, rtx insn, rtx *operands)
1606 {
1607 switch (get_attr_length (insn))
1608 {
1609 case 6:
1610 /* This can happen if filling the delay slot has caused a forward
1611 branch to exceed its range (we could reverse it, but only
1612 when we know we won't overextend other branches; this should
1613 best be handled by relaxation).
1614 It can also happen when other condbranches hoist delay slot insn
1615 from their destination, thus leading to code size increase.
1616 But the branch will still be in the range -4092..+4098 bytes. */
1617
1618 if (! TARGET_RELAX)
1619 {
1620 int label = lf++;
1621 /* The call to print_slot will clobber the operands. */
1622 rtx op0 = operands[0];
1623
1624 /* If the instruction in the delay slot is annulled (true), then
1625 there is no delay slot where we can put it now. The only safe
1626 place for it is after the label. final will do that by default. */
1627
1628 if (final_sequence
1629 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1630 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1631 {
1632 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1633 ASSEMBLER_DIALECT ? "/" : ".", label);
1634 print_slot (final_sequence);
1635 }
1636 else
1637 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1638
1639 output_asm_insn ("bra\t%l0", &op0);
1640 fprintf (asm_out_file, "\tnop\n");
1641 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1642
1643 return "";
1644 }
1645 /* When relaxing, handle this like a short branch. The linker
1646 will fix it up if it still doesn't fit after relaxation. */
1647 case 2:
1648 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1649
1650 /* These are for SH2e, in which we have to account for the
1651 extra nop because of the hardware bug in annulled branches. */
1652 case 8:
1653 if (! TARGET_RELAX)
1654 {
1655 int label = lf++;
1656
1657 gcc_assert (!final_sequence
1658 || !(INSN_ANNULLED_BRANCH_P
1659 (XVECEXP (final_sequence, 0, 0))));
1660 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1661 logic ? "f" : "t",
1662 ASSEMBLER_DIALECT ? "/" : ".", label);
1663 fprintf (asm_out_file, "\tnop\n");
1664 output_asm_insn ("bra\t%l0", operands);
1665 fprintf (asm_out_file, "\tnop\n");
1666 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1667
1668 return "";
1669 }
1670 /* When relaxing, fall through. */
1671 case 4:
1672 {
1673 char buffer[10];
1674
1675 sprintf (buffer, "b%s%ss\t%%l0",
1676 logic ? "t" : "f",
1677 ASSEMBLER_DIALECT ? "/" : ".");
1678 output_asm_insn (buffer, &operands[0]);
1679 return "nop";
1680 }
1681
1682 default:
1683 /* There should be no longer branches now - that would
1684 indicate that something has destroyed the branches set
1685 up in machine_dependent_reorg. */
1686 gcc_unreachable ();
1687 }
1688 }
1689
1690 const char *
output_branchy_insn(enum rtx_code code,const char * template,rtx insn,rtx * operands)1691 output_branchy_insn (enum rtx_code code, const char *template,
1692 rtx insn, rtx *operands)
1693 {
1694 rtx next_insn = NEXT_INSN (insn);
1695
1696 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1697 {
1698 rtx src = SET_SRC (PATTERN (next_insn));
1699 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1700 {
1701 /* Following branch not taken */
1702 operands[9] = gen_label_rtx ();
1703 emit_label_after (operands[9], next_insn);
1704 INSN_ADDRESSES_NEW (operands[9],
1705 INSN_ADDRESSES (INSN_UID (next_insn))
1706 + get_attr_length (next_insn));
1707 return template;
1708 }
1709 else
1710 {
1711 int offset = (branch_dest (next_insn)
1712 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1713 if (offset >= -252 && offset <= 258)
1714 {
1715 if (GET_CODE (src) == IF_THEN_ELSE)
1716 /* branch_true */
1717 src = XEXP (src, 1);
1718 operands[9] = src;
1719 return template;
1720 }
1721 }
1722 }
1723 operands[9] = gen_label_rtx ();
1724 emit_label_after (operands[9], insn);
1725 INSN_ADDRESSES_NEW (operands[9],
1726 INSN_ADDRESSES (INSN_UID (insn))
1727 + get_attr_length (insn));
1728 return template;
1729 }
1730
1731 const char *
output_ieee_ccmpeq(rtx insn,rtx * operands)1732 output_ieee_ccmpeq (rtx insn, rtx *operands)
1733 {
1734 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1735 insn, operands);
1736 }
1737
1738 /* Output the start of the assembler file. */
1739
1740 static void
sh_file_start(void)1741 sh_file_start (void)
1742 {
1743 default_file_start ();
1744
1745 #ifdef SYMBIAN
1746 /* Declare the .directive section before it is used. */
1747 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1748 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1749 #endif
1750
1751 if (TARGET_ELF)
1752 /* We need to show the text section with the proper
1753 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1754 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1755 will complain. We can teach GAS specifically about the
1756 default attributes for our choice of text section, but
1757 then we would have to change GAS again if/when we change
1758 the text section name. */
1759 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1760 else
1761 /* Switch to the data section so that the coffsem symbol
1762 isn't in the text section. */
1763 data_section ();
1764
1765 if (TARGET_LITTLE_ENDIAN)
1766 fputs ("\t.little\n", asm_out_file);
1767
1768 if (!TARGET_ELF)
1769 {
1770 if (TARGET_SHCOMPACT)
1771 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1772 else if (TARGET_SHMEDIA)
1773 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1774 TARGET_SHMEDIA64 ? 64 : 32);
1775 }
1776 }
1777
1778 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1779
1780 static bool
unspec_caller_rtx_p(rtx pat)1781 unspec_caller_rtx_p (rtx pat)
1782 {
1783 switch (GET_CODE (pat))
1784 {
1785 case CONST:
1786 return unspec_caller_rtx_p (XEXP (pat, 0));
1787 case PLUS:
1788 case MINUS:
1789 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1790 return true;
1791 return unspec_caller_rtx_p (XEXP (pat, 1));
1792 case UNSPEC:
1793 if (XINT (pat, 1) == UNSPEC_CALLER)
1794 return true;
1795 default:
1796 break;
1797 }
1798
1799 return false;
1800 }
1801
1802 /* Indicate that INSN cannot be duplicated. This is true for insn
1803 that generates a unique label. */
1804
1805 static bool
sh_cannot_copy_insn_p(rtx insn)1806 sh_cannot_copy_insn_p (rtx insn)
1807 {
1808 rtx pat;
1809
1810 if (!reload_completed || !flag_pic)
1811 return false;
1812
1813 if (GET_CODE (insn) != INSN)
1814 return false;
1815 if (asm_noperands (insn) >= 0)
1816 return false;
1817
1818 pat = PATTERN (insn);
1819 if (GET_CODE (pat) != SET)
1820 return false;
1821 pat = SET_SRC (pat);
1822
1823 if (unspec_caller_rtx_p (pat))
1824 return true;
1825
1826 return false;
1827 }
1828
1829 /* Actual number of instructions used to make a shift by N. */
1830 static const char ashiftrt_insns[] =
1831 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1832
1833 /* Left shift and logical right shift are the same. */
1834 static const char shift_insns[] =
1835 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1836
1837 /* Individual shift amounts needed to get the above length sequences.
1838 One bit right shifts clobber the T bit, so when possible, put one bit
1839 shifts in the middle of the sequence, so the ends are eligible for
1840 branch delay slots. */
1841 static const short shift_amounts[32][5] = {
1842 {0}, {1}, {2}, {2, 1},
1843 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1844 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1845 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1846 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1847 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1848 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1849 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1850
1851 /* Likewise, but for shift amounts < 16, up to three highmost bits
1852 might be clobbered. This is typically used when combined with some
1853 kind of sign or zero extension. */
1854
1855 static const char ext_shift_insns[] =
1856 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1857
1858 static const short ext_shift_amounts[32][4] = {
1859 {0}, {1}, {2}, {2, 1},
1860 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1861 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1862 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1863 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1864 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1865 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1866 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1867
1868 /* Assuming we have a value that has been sign-extended by at least one bit,
1869 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1870 to shift it by N without data loss, and quicker than by other means? */
1871 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1872
1873 /* This is used in length attributes in sh.md to help compute the length
1874 of arbitrary constant shift instructions. */
1875
1876 int
shift_insns_rtx(rtx insn)1877 shift_insns_rtx (rtx insn)
1878 {
1879 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1880 int shift_count = INTVAL (XEXP (set_src, 1));
1881 enum rtx_code shift_code = GET_CODE (set_src);
1882
1883 switch (shift_code)
1884 {
1885 case ASHIFTRT:
1886 return ashiftrt_insns[shift_count];
1887 case LSHIFTRT:
1888 case ASHIFT:
1889 return shift_insns[shift_count];
1890 default:
1891 gcc_unreachable ();
1892 }
1893 }
1894
1895 /* Return the cost of a shift. */
1896
1897 static inline int
shiftcosts(rtx x)1898 shiftcosts (rtx x)
1899 {
1900 int value;
1901
1902 if (TARGET_SHMEDIA)
1903 return 1;
1904
1905 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1906 {
1907 if (GET_MODE (x) == DImode
1908 && GET_CODE (XEXP (x, 1)) == CONST_INT
1909 && INTVAL (XEXP (x, 1)) == 1)
1910 return 2;
1911
1912 /* Everything else is invalid, because there is no pattern for it. */
1913 return MAX_COST;
1914 }
1915 /* If shift by a non constant, then this will be expensive. */
1916 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1917 return SH_DYNAMIC_SHIFT_COST;
1918
1919 value = INTVAL (XEXP (x, 1));
1920
1921 /* Otherwise, return the true cost in instructions. */
1922 if (GET_CODE (x) == ASHIFTRT)
1923 {
1924 int cost = ashiftrt_insns[value];
1925 /* If SH3, then we put the constant in a reg and use shad. */
1926 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1927 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1928 return cost;
1929 }
1930 else
1931 return shift_insns[value];
1932 }
1933
1934 /* Return the cost of an AND operation. */
1935
1936 static inline int
andcosts(rtx x)1937 andcosts (rtx x)
1938 {
1939 int i;
1940
1941 /* Anding with a register is a single cycle and instruction. */
1942 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1943 return 1;
1944
1945 i = INTVAL (XEXP (x, 1));
1946
1947 if (TARGET_SHMEDIA)
1948 {
1949 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1950 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1951 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1952 return 1;
1953 else
1954 return 2;
1955 }
1956
1957 /* These constants are single cycle extu.[bw] instructions. */
1958 if (i == 0xff || i == 0xffff)
1959 return 1;
1960 /* Constants that can be used in an and immediate instruction in a single
1961 cycle, but this requires r0, so make it a little more expensive. */
1962 if (CONST_OK_FOR_K08 (i))
1963 return 2;
1964 /* Constants that can be loaded with a mov immediate and an and.
1965 This case is probably unnecessary. */
1966 if (CONST_OK_FOR_I08 (i))
1967 return 2;
1968 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1969 This case is probably unnecessary. */
1970 return 3;
1971 }
1972
1973 /* Return the cost of an addition or a subtraction. */
1974
1975 static inline int
addsubcosts(rtx x)1976 addsubcosts (rtx x)
1977 {
1978 /* Adding a register is a single cycle insn. */
1979 if (GET_CODE (XEXP (x, 1)) == REG
1980 || GET_CODE (XEXP (x, 1)) == SUBREG)
1981 return 1;
1982
1983 /* Likewise for small constants. */
1984 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1985 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1986 return 1;
1987
1988 if (TARGET_SHMEDIA)
1989 switch (GET_CODE (XEXP (x, 1)))
1990 {
1991 case CONST:
1992 case LABEL_REF:
1993 case SYMBOL_REF:
1994 return TARGET_SHMEDIA64 ? 5 : 3;
1995
1996 case CONST_INT:
1997 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1998 return 2;
1999 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2000 return 3;
2001 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2002 return 4;
2003
2004 /* Fall through. */
2005 default:
2006 return 5;
2007 }
2008
2009 /* Any other constant requires a 2 cycle pc-relative load plus an
2010 addition. */
2011 return 3;
2012 }
2013
2014 /* Return the cost of a multiply. */
2015 static inline int
multcosts(rtx x ATTRIBUTE_UNUSED)2016 multcosts (rtx x ATTRIBUTE_UNUSED)
2017 {
2018 if (sh_multcost >= 0)
2019 return sh_multcost;
2020 if (TARGET_SHMEDIA)
2021 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2022 accept constants. Ideally, we would use a cost of one or two and
2023 add the cost of the operand, but disregard the latter when inside loops
2024 and loop invariant code motion is still to follow.
2025 Using a multiply first and splitting it later if it's a loss
2026 doesn't work because of different sign / zero extension semantics
2027 of multiplies vs. shifts. */
2028 return TARGET_SMALLCODE ? 2 : 3;
2029
2030 if (TARGET_SH2)
2031 {
2032 /* We have a mul insn, so we can never take more than the mul and the
2033 read of the mac reg, but count more because of the latency and extra
2034 reg usage. */
2035 if (TARGET_SMALLCODE)
2036 return 2;
2037 return 3;
2038 }
2039
2040 /* If we're aiming at small code, then just count the number of
2041 insns in a multiply call sequence. */
2042 if (TARGET_SMALLCODE)
2043 return 5;
2044
2045 /* Otherwise count all the insns in the routine we'd be calling too. */
2046 return 20;
2047 }
2048
2049 /* Compute a (partial) cost for rtx X. Return true if the complete
2050 cost has been computed, and false if subexpressions should be
2051 scanned. In either case, *TOTAL contains the cost result. */
2052
2053 static bool
sh_rtx_costs(rtx x,int code,int outer_code,int * total)2054 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2055 {
2056 switch (code)
2057 {
2058 case CONST_INT:
2059 if (TARGET_SHMEDIA)
2060 {
2061 if (INTVAL (x) == 0)
2062 *total = 0;
2063 else if (outer_code == AND && and_operand ((x), DImode))
2064 *total = 0;
2065 else if ((outer_code == IOR || outer_code == XOR
2066 || outer_code == PLUS)
2067 && CONST_OK_FOR_I10 (INTVAL (x)))
2068 *total = 0;
2069 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2070 *total = COSTS_N_INSNS (outer_code != SET);
2071 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2072 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2073 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2074 *total = COSTS_N_INSNS (3);
2075 else
2076 *total = COSTS_N_INSNS (4);
2077 return true;
2078 }
2079 if (CONST_OK_FOR_I08 (INTVAL (x)))
2080 *total = 0;
2081 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2082 && CONST_OK_FOR_K08 (INTVAL (x)))
2083 *total = 1;
2084 else
2085 *total = 8;
2086 return true;
2087
2088 case CONST:
2089 case LABEL_REF:
2090 case SYMBOL_REF:
2091 if (TARGET_SHMEDIA64)
2092 *total = COSTS_N_INSNS (4);
2093 else if (TARGET_SHMEDIA32)
2094 *total = COSTS_N_INSNS (2);
2095 else
2096 *total = 5;
2097 return true;
2098
2099 case CONST_DOUBLE:
2100 if (TARGET_SHMEDIA)
2101 *total = COSTS_N_INSNS (4);
2102 else
2103 *total = 10;
2104 return true;
2105 case CONST_VECTOR:
2106 if (x == CONST0_RTX (GET_MODE (x)))
2107 *total = 0;
2108 else if (sh_1el_vec (x, VOIDmode))
2109 *total = outer_code != SET;
2110 if (sh_rep_vec (x, VOIDmode))
2111 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2112 + (outer_code != SET));
2113 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2114 return true;
2115
2116 case PLUS:
2117 case MINUS:
2118 *total = COSTS_N_INSNS (addsubcosts (x));
2119 return true;
2120
2121 case AND:
2122 *total = COSTS_N_INSNS (andcosts (x));
2123 return true;
2124
2125 case MULT:
2126 *total = COSTS_N_INSNS (multcosts (x));
2127 return true;
2128
2129 case ASHIFT:
2130 case ASHIFTRT:
2131 case LSHIFTRT:
2132 *total = COSTS_N_INSNS (shiftcosts (x));
2133 return true;
2134
2135 case DIV:
2136 case UDIV:
2137 case MOD:
2138 case UMOD:
2139 *total = COSTS_N_INSNS (20);
2140 return true;
2141
2142 case PARALLEL:
2143 if (sh_1el_vec (x, VOIDmode))
2144 *total = outer_code != SET;
2145 if (sh_rep_vec (x, VOIDmode))
2146 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2147 + (outer_code != SET));
2148 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2149 return true;
2150
2151 case FLOAT:
2152 case FIX:
2153 *total = 100;
2154 return true;
2155
2156 default:
2157 return false;
2158 }
2159 }
2160
2161 /* Compute the cost of an address. For the SH, all valid addresses are
2162 the same cost. Use a slightly higher cost for reg + reg addressing,
2163 since it increases pressure on r0. */
2164
2165 static int
sh_address_cost(rtx X)2166 sh_address_cost (rtx X)
2167 {
2168 return (GET_CODE (X) == PLUS
2169 && ! CONSTANT_P (XEXP (X, 1))
2170 && ! TARGET_SHMEDIA ? 1 : 0);
2171 }
2172
2173 /* Code to expand a shift. */
2174
2175 void
gen_ashift(int type,int n,rtx reg)2176 gen_ashift (int type, int n, rtx reg)
2177 {
2178 /* Negative values here come from the shift_amounts array. */
2179 if (n < 0)
2180 {
2181 if (type == ASHIFT)
2182 type = LSHIFTRT;
2183 else
2184 type = ASHIFT;
2185 n = -n;
2186 }
2187
2188 switch (type)
2189 {
2190 case ASHIFTRT:
2191 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2192 break;
2193 case LSHIFTRT:
2194 if (n == 1)
2195 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2196 else
2197 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2198 break;
2199 case ASHIFT:
2200 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2201 break;
2202 }
2203 }
2204
2205 /* Same for HImode */
2206
2207 void
gen_ashift_hi(int type,int n,rtx reg)2208 gen_ashift_hi (int type, int n, rtx reg)
2209 {
2210 /* Negative values here come from the shift_amounts array. */
2211 if (n < 0)
2212 {
2213 if (type == ASHIFT)
2214 type = LSHIFTRT;
2215 else
2216 type = ASHIFT;
2217 n = -n;
2218 }
2219
2220 switch (type)
2221 {
2222 case ASHIFTRT:
2223 case LSHIFTRT:
2224 /* We don't have HImode right shift operations because using the
2225 ordinary 32 bit shift instructions for that doesn't generate proper
2226 zero/sign extension.
2227 gen_ashift_hi is only called in contexts where we know that the
2228 sign extension works out correctly. */
2229 {
2230 int offset = 0;
2231 if (GET_CODE (reg) == SUBREG)
2232 {
2233 offset = SUBREG_BYTE (reg);
2234 reg = SUBREG_REG (reg);
2235 }
2236 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2237 break;
2238 }
2239 case ASHIFT:
2240 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2241 break;
2242 }
2243 }
2244
2245 /* Output RTL to split a constant shift into its component SH constant
2246 shift instructions. */
2247
2248 void
gen_shifty_op(int code,rtx * operands)2249 gen_shifty_op (int code, rtx *operands)
2250 {
2251 int value = INTVAL (operands[2]);
2252 int max, i;
2253
2254 /* Truncate the shift count in case it is out of bounds. */
2255 value = value & 0x1f;
2256
2257 if (value == 31)
2258 {
2259 if (code == LSHIFTRT)
2260 {
2261 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2262 emit_insn (gen_movt (operands[0]));
2263 return;
2264 }
2265 else if (code == ASHIFT)
2266 {
2267 /* There is a two instruction sequence for 31 bit left shifts,
2268 but it requires r0. */
2269 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2270 {
2271 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2272 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2273 return;
2274 }
2275 }
2276 }
2277 else if (value == 0)
2278 {
2279 /* This can happen even when optimizing, if there were subregs before
2280 reload. Don't output a nop here, as this is never optimized away;
2281 use a no-op move instead. */
2282 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2283 return;
2284 }
2285
2286 max = shift_insns[value];
2287 for (i = 0; i < max; i++)
2288 gen_ashift (code, shift_amounts[value][i], operands[0]);
2289 }
2290
2291 /* Same as above, but optimized for values where the topmost bits don't
2292 matter. */
2293
2294 void
gen_shifty_hi_op(int code,rtx * operands)2295 gen_shifty_hi_op (int code, rtx *operands)
2296 {
2297 int value = INTVAL (operands[2]);
2298 int max, i;
2299 void (*gen_fun) (int, int, rtx);
2300
2301 /* This operation is used by and_shl for SImode values with a few
2302 high bits known to be cleared. */
2303 value &= 31;
2304 if (value == 0)
2305 {
2306 emit_insn (gen_nop ());
2307 return;
2308 }
2309
2310 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2311 if (code == ASHIFT)
2312 {
2313 max = ext_shift_insns[value];
2314 for (i = 0; i < max; i++)
2315 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2316 }
2317 else
2318 /* When shifting right, emit the shifts in reverse order, so that
2319 solitary negative values come first. */
2320 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2321 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2322 }
2323
2324 /* Output RTL for an arithmetic right shift. */
2325
2326 /* ??? Rewrite to use super-optimizer sequences. */
2327
2328 int
expand_ashiftrt(rtx * operands)2329 expand_ashiftrt (rtx *operands)
2330 {
2331 rtx wrk;
2332 char func[18];
2333 int value;
2334
2335 if (TARGET_SH3)
2336 {
2337 if (GET_CODE (operands[2]) != CONST_INT)
2338 {
2339 rtx count = copy_to_mode_reg (SImode, operands[2]);
2340 emit_insn (gen_negsi2 (count, count));
2341 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2342 return 1;
2343 }
2344 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2345 > 1 + SH_DYNAMIC_SHIFT_COST)
2346 {
2347 rtx count
2348 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2349 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2350 return 1;
2351 }
2352 }
2353 if (GET_CODE (operands[2]) != CONST_INT)
2354 return 0;
2355
2356 value = INTVAL (operands[2]) & 31;
2357
2358 if (value == 31)
2359 {
2360 /* If we are called from abs expansion, arrange things so that we
2361 we can use a single MT instruction that doesn't clobber the source,
2362 if LICM can hoist out the load of the constant zero. */
2363 if (currently_expanding_to_rtl)
2364 {
2365 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2366 operands[1]));
2367 emit_insn (gen_mov_neg_si_t (operands[0]));
2368 return 1;
2369 }
2370 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2371 return 1;
2372 }
2373 else if (value >= 16 && value <= 19)
2374 {
2375 wrk = gen_reg_rtx (SImode);
2376 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2377 value -= 16;
2378 while (value--)
2379 gen_ashift (ASHIFTRT, 1, wrk);
2380 emit_move_insn (operands[0], wrk);
2381 return 1;
2382 }
2383 /* Expand a short sequence inline, longer call a magic routine. */
2384 else if (value <= 5)
2385 {
2386 wrk = gen_reg_rtx (SImode);
2387 emit_move_insn (wrk, operands[1]);
2388 while (value--)
2389 gen_ashift (ASHIFTRT, 1, wrk);
2390 emit_move_insn (operands[0], wrk);
2391 return 1;
2392 }
2393
2394 wrk = gen_reg_rtx (Pmode);
2395
2396 /* Load the value into an arg reg and call a helper. */
2397 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2398 sprintf (func, "__ashiftrt_r4_%d", value);
2399 function_symbol (wrk, func, SFUNC_STATIC);
2400 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2401 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2402 return 1;
2403 }
2404
2405 int
sh_dynamicalize_shift_p(rtx count)2406 sh_dynamicalize_shift_p (rtx count)
2407 {
2408 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2409 }
2410
2411 /* Try to find a good way to implement the combiner pattern
2412 [(set (match_operand:SI 0 "register_operand" "r")
2413 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2414 (match_operand:SI 2 "const_int_operand" "n"))
2415 (match_operand:SI 3 "const_int_operand" "n"))) .
2416 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2417 return 0 for simple right / left or left/right shift combination.
2418 return 1 for a combination of shifts with zero_extend.
2419 return 2 for a combination of shifts with an AND that needs r0.
2420 return 3 for a combination of shifts with an AND that needs an extra
2421 scratch register, when the three highmost bits of the AND mask are clear.
2422 return 4 for a combination of shifts with an AND that needs an extra
2423 scratch register, when any of the three highmost bits of the AND mask
2424 is set.
2425 If ATTRP is set, store an initial right shift width in ATTRP[0],
2426 and the instruction length in ATTRP[1] . These values are not valid
2427 when returning 0.
2428 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2429 shift_amounts for the last shift value that is to be used before the
2430 sign extend. */
2431 int
shl_and_kind(rtx left_rtx,rtx mask_rtx,int * attrp)2432 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2433 {
2434 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2435 int left = INTVAL (left_rtx), right;
2436 int best = 0;
2437 int cost, best_cost = 10000;
2438 int best_right = 0, best_len = 0;
2439 int i;
2440 int can_ext;
2441
2442 if (left < 0 || left > 31)
2443 return 0;
2444 if (GET_CODE (mask_rtx) == CONST_INT)
2445 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2446 else
2447 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2448 /* Can this be expressed as a right shift / left shift pair? */
2449 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2450 right = exact_log2 (lsb);
2451 mask2 = ~(mask + lsb - 1);
2452 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2453 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2454 if (! mask2)
2455 best_cost = shift_insns[right] + shift_insns[right + left];
2456 /* mask has no trailing zeroes <==> ! right */
2457 else if (! right && mask2 == ~(lsb2 - 1))
2458 {
2459 int late_right = exact_log2 (lsb2);
2460 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2461 }
2462 /* Try to use zero extend. */
2463 if (mask2 == ~(lsb2 - 1))
2464 {
2465 int width, first;
2466
2467 for (width = 8; width <= 16; width += 8)
2468 {
2469 /* Can we zero-extend right away? */
2470 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2471 {
2472 cost
2473 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2474 if (cost < best_cost)
2475 {
2476 best = 1;
2477 best_cost = cost;
2478 best_right = right;
2479 best_len = cost;
2480 if (attrp)
2481 attrp[2] = -1;
2482 }
2483 continue;
2484 }
2485 /* ??? Could try to put zero extend into initial right shift,
2486 or even shift a bit left before the right shift. */
2487 /* Determine value of first part of left shift, to get to the
2488 zero extend cut-off point. */
2489 first = width - exact_log2 (lsb2) + right;
2490 if (first >= 0 && right + left - first >= 0)
2491 {
2492 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2493 + ext_shift_insns[right + left - first];
2494 if (cost < best_cost)
2495 {
2496 best = 1;
2497 best_cost = cost;
2498 best_right = right;
2499 best_len = cost;
2500 if (attrp)
2501 attrp[2] = first;
2502 }
2503 }
2504 }
2505 }
2506 /* Try to use r0 AND pattern */
2507 for (i = 0; i <= 2; i++)
2508 {
2509 if (i > right)
2510 break;
2511 if (! CONST_OK_FOR_K08 (mask >> i))
2512 continue;
2513 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2514 if (cost < best_cost)
2515 {
2516 best = 2;
2517 best_cost = cost;
2518 best_right = i;
2519 best_len = cost - 1;
2520 }
2521 }
2522 /* Try to use a scratch register to hold the AND operand. */
2523 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2524 for (i = 0; i <= 2; i++)
2525 {
2526 if (i > right)
2527 break;
2528 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2529 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2530 if (cost < best_cost)
2531 {
2532 best = 4 - can_ext;
2533 best_cost = cost;
2534 best_right = i;
2535 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2536 }
2537 }
2538
2539 if (attrp)
2540 {
2541 attrp[0] = best_right;
2542 attrp[1] = best_len;
2543 }
2544 return best;
2545 }
2546
2547 /* This is used in length attributes of the unnamed instructions
2548 corresponding to shl_and_kind return values of 1 and 2. */
2549 int
shl_and_length(rtx insn)2550 shl_and_length (rtx insn)
2551 {
2552 rtx set_src, left_rtx, mask_rtx;
2553 int attributes[3];
2554
2555 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2556 left_rtx = XEXP (XEXP (set_src, 0), 1);
2557 mask_rtx = XEXP (set_src, 1);
2558 shl_and_kind (left_rtx, mask_rtx, attributes);
2559 return attributes[1];
2560 }
2561
2562 /* This is used in length attribute of the and_shl_scratch instruction. */
2563
2564 int
shl_and_scr_length(rtx insn)2565 shl_and_scr_length (rtx insn)
2566 {
2567 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2568 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2569 rtx op = XEXP (set_src, 0);
2570 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2571 op = XEXP (XEXP (op, 0), 0);
2572 return len + shift_insns[INTVAL (XEXP (op, 1))];
2573 }
2574
2575 /* Generate rtl for instructions for which shl_and_kind advised a particular
2576 method of generating them, i.e. returned zero. */
2577
2578 int
gen_shl_and(rtx dest,rtx left_rtx,rtx mask_rtx,rtx source)2579 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2580 {
2581 int attributes[3];
2582 unsigned HOST_WIDE_INT mask;
2583 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2584 int right, total_shift;
2585 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2586
2587 right = attributes[0];
2588 total_shift = INTVAL (left_rtx) + right;
2589 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2590 switch (kind)
2591 {
2592 default:
2593 return -1;
2594 case 1:
2595 {
2596 int first = attributes[2];
2597 rtx operands[3];
2598
2599 if (first < 0)
2600 {
2601 emit_insn ((mask << right) <= 0xff
2602 ? gen_zero_extendqisi2 (dest,
2603 gen_lowpart (QImode, source))
2604 : gen_zero_extendhisi2 (dest,
2605 gen_lowpart (HImode, source)));
2606 source = dest;
2607 }
2608 if (source != dest)
2609 emit_insn (gen_movsi (dest, source));
2610 operands[0] = dest;
2611 if (right)
2612 {
2613 operands[2] = GEN_INT (right);
2614 gen_shifty_hi_op (LSHIFTRT, operands);
2615 }
2616 if (first > 0)
2617 {
2618 operands[2] = GEN_INT (first);
2619 gen_shifty_hi_op (ASHIFT, operands);
2620 total_shift -= first;
2621 mask <<= first;
2622 }
2623 if (first >= 0)
2624 emit_insn (mask <= 0xff
2625 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2626 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2627 if (total_shift > 0)
2628 {
2629 operands[2] = GEN_INT (total_shift);
2630 gen_shifty_hi_op (ASHIFT, operands);
2631 }
2632 break;
2633 }
2634 case 4:
2635 shift_gen_fun = gen_shifty_op;
2636 case 3:
2637 /* If the topmost bit that matters is set, set the topmost bits
2638 that don't matter. This way, we might be able to get a shorter
2639 signed constant. */
2640 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2641 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2642 case 2:
2643 /* Don't expand fine-grained when combining, because that will
2644 make the pattern fail. */
2645 if (currently_expanding_to_rtl
2646 || reload_in_progress || reload_completed)
2647 {
2648 rtx operands[3];
2649
2650 /* Cases 3 and 4 should be handled by this split
2651 only while combining */
2652 gcc_assert (kind <= 2);
2653 if (right)
2654 {
2655 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2656 source = dest;
2657 }
2658 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2659 if (total_shift)
2660 {
2661 operands[0] = dest;
2662 operands[1] = dest;
2663 operands[2] = GEN_INT (total_shift);
2664 shift_gen_fun (ASHIFT, operands);
2665 }
2666 break;
2667 }
2668 else
2669 {
2670 int neg = 0;
2671 if (kind != 4 && total_shift < 16)
2672 {
2673 neg = -ext_shift_amounts[total_shift][1];
2674 if (neg > 0)
2675 neg -= ext_shift_amounts[total_shift][2];
2676 else
2677 neg = 0;
2678 }
2679 emit_insn (gen_and_shl_scratch (dest, source,
2680 GEN_INT (right),
2681 GEN_INT (mask),
2682 GEN_INT (total_shift + neg),
2683 GEN_INT (neg)));
2684 emit_insn (gen_movsi (dest, dest));
2685 break;
2686 }
2687 }
2688 return 0;
2689 }
2690
2691 /* Try to find a good way to implement the combiner pattern
2692 [(set (match_operand:SI 0 "register_operand" "=r")
2693 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2694 (match_operand:SI 2 "const_int_operand" "n")
2695 (match_operand:SI 3 "const_int_operand" "n")
2696 (const_int 0)))
2697 (clobber (reg:SI T_REG))]
2698 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2699 return 0 for simple left / right shift combination.
2700 return 1 for left shift / 8 bit sign extend / left shift.
2701 return 2 for left shift / 16 bit sign extend / left shift.
2702 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2703 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2704 return 5 for left shift / 16 bit sign extend / right shift
2705 return 6 for < 8 bit sign extend / left shift.
2706 return 7 for < 8 bit sign extend / left shift / single right shift.
2707 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2708
2709 int
shl_sext_kind(rtx left_rtx,rtx size_rtx,int * costp)2710 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2711 {
2712 int left, size, insize, ext;
2713 int cost = 0, best_cost;
2714 int kind;
2715
2716 left = INTVAL (left_rtx);
2717 size = INTVAL (size_rtx);
2718 insize = size - left;
2719 gcc_assert (insize > 0);
2720 /* Default to left / right shift. */
2721 kind = 0;
2722 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2723 if (size <= 16)
2724 {
2725 /* 16 bit shift / sign extend / 16 bit shift */
2726 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2727 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2728 below, by alternative 3 or something even better. */
2729 if (cost < best_cost)
2730 {
2731 kind = 5;
2732 best_cost = cost;
2733 }
2734 }
2735 /* Try a plain sign extend between two shifts. */
2736 for (ext = 16; ext >= insize; ext -= 8)
2737 {
2738 if (ext <= size)
2739 {
2740 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2741 if (cost < best_cost)
2742 {
2743 kind = ext / (unsigned) 8;
2744 best_cost = cost;
2745 }
2746 }
2747 /* Check if we can do a sloppy shift with a final signed shift
2748 restoring the sign. */
2749 if (EXT_SHIFT_SIGNED (size - ext))
2750 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2751 /* If not, maybe it's still cheaper to do the second shift sloppy,
2752 and do a final sign extend? */
2753 else if (size <= 16)
2754 cost = ext_shift_insns[ext - insize] + 1
2755 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2756 else
2757 continue;
2758 if (cost < best_cost)
2759 {
2760 kind = ext / (unsigned) 8 + 2;
2761 best_cost = cost;
2762 }
2763 }
2764 /* Check if we can sign extend in r0 */
2765 if (insize < 8)
2766 {
2767 cost = 3 + shift_insns[left];
2768 if (cost < best_cost)
2769 {
2770 kind = 6;
2771 best_cost = cost;
2772 }
2773 /* Try the same with a final signed shift. */
2774 if (left < 31)
2775 {
2776 cost = 3 + ext_shift_insns[left + 1] + 1;
2777 if (cost < best_cost)
2778 {
2779 kind = 7;
2780 best_cost = cost;
2781 }
2782 }
2783 }
2784 if (TARGET_SH3)
2785 {
2786 /* Try to use a dynamic shift. */
2787 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2788 if (cost < best_cost)
2789 {
2790 kind = 0;
2791 best_cost = cost;
2792 }
2793 }
2794 if (costp)
2795 *costp = cost;
2796 return kind;
2797 }
2798
2799 /* Function to be used in the length attribute of the instructions
2800 implementing this pattern. */
2801
2802 int
shl_sext_length(rtx insn)2803 shl_sext_length (rtx insn)
2804 {
2805 rtx set_src, left_rtx, size_rtx;
2806 int cost;
2807
2808 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2809 left_rtx = XEXP (XEXP (set_src, 0), 1);
2810 size_rtx = XEXP (set_src, 1);
2811 shl_sext_kind (left_rtx, size_rtx, &cost);
2812 return cost;
2813 }
2814
2815 /* Generate rtl for this pattern */
2816
2817 int
gen_shl_sext(rtx dest,rtx left_rtx,rtx size_rtx,rtx source)2818 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2819 {
2820 int kind;
2821 int left, size, insize, cost;
2822 rtx operands[3];
2823
2824 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2825 left = INTVAL (left_rtx);
2826 size = INTVAL (size_rtx);
2827 insize = size - left;
2828 switch (kind)
2829 {
2830 case 1:
2831 case 2:
2832 case 3:
2833 case 4:
2834 {
2835 int ext = kind & 1 ? 8 : 16;
2836 int shift2 = size - ext;
2837
2838 /* Don't expand fine-grained when combining, because that will
2839 make the pattern fail. */
2840 if (! currently_expanding_to_rtl
2841 && ! reload_in_progress && ! reload_completed)
2842 {
2843 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2844 emit_insn (gen_movsi (dest, source));
2845 break;
2846 }
2847 if (dest != source)
2848 emit_insn (gen_movsi (dest, source));
2849 operands[0] = dest;
2850 if (ext - insize)
2851 {
2852 operands[2] = GEN_INT (ext - insize);
2853 gen_shifty_hi_op (ASHIFT, operands);
2854 }
2855 emit_insn (kind & 1
2856 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2857 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2858 if (kind <= 2)
2859 {
2860 if (shift2)
2861 {
2862 operands[2] = GEN_INT (shift2);
2863 gen_shifty_op (ASHIFT, operands);
2864 }
2865 }
2866 else
2867 {
2868 if (shift2 > 0)
2869 {
2870 if (EXT_SHIFT_SIGNED (shift2))
2871 {
2872 operands[2] = GEN_INT (shift2 + 1);
2873 gen_shifty_op (ASHIFT, operands);
2874 operands[2] = const1_rtx;
2875 gen_shifty_op (ASHIFTRT, operands);
2876 break;
2877 }
2878 operands[2] = GEN_INT (shift2);
2879 gen_shifty_hi_op (ASHIFT, operands);
2880 }
2881 else if (shift2)
2882 {
2883 operands[2] = GEN_INT (-shift2);
2884 gen_shifty_hi_op (LSHIFTRT, operands);
2885 }
2886 emit_insn (size <= 8
2887 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2888 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2889 }
2890 break;
2891 }
2892 case 5:
2893 {
2894 int i = 16 - size;
2895 if (! currently_expanding_to_rtl
2896 && ! reload_in_progress && ! reload_completed)
2897 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2898 else
2899 {
2900 operands[0] = dest;
2901 operands[2] = GEN_INT (16 - insize);
2902 gen_shifty_hi_op (ASHIFT, operands);
2903 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2904 }
2905 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2906 while (--i >= 0)
2907 gen_ashift (ASHIFTRT, 1, dest);
2908 break;
2909 }
2910 case 6:
2911 case 7:
2912 /* Don't expand fine-grained when combining, because that will
2913 make the pattern fail. */
2914 if (! currently_expanding_to_rtl
2915 && ! reload_in_progress && ! reload_completed)
2916 {
2917 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2918 emit_insn (gen_movsi (dest, source));
2919 break;
2920 }
2921 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2922 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2923 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2924 operands[0] = dest;
2925 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2926 gen_shifty_op (ASHIFT, operands);
2927 if (kind == 7)
2928 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2929 break;
2930 default:
2931 return -1;
2932 }
2933 return 0;
2934 }
2935
2936 /* Prefix a symbol_ref name with "datalabel". */
2937
2938 rtx
gen_datalabel_ref(rtx sym)2939 gen_datalabel_ref (rtx sym)
2940 {
2941 const char *str;
2942
2943 if (GET_CODE (sym) == LABEL_REF)
2944 return gen_rtx_CONST (GET_MODE (sym),
2945 gen_rtx_UNSPEC (GET_MODE (sym),
2946 gen_rtvec (1, sym),
2947 UNSPEC_DATALABEL));
2948
2949 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2950
2951 str = XSTR (sym, 0);
2952 /* Share all SYMBOL_REF strings with the same value - that is important
2953 for cse. */
2954 str = IDENTIFIER_POINTER (get_identifier (str));
2955 XSTR (sym, 0) = str;
2956
2957 return sym;
2958 }
2959
2960
2961 /* The SH cannot load a large constant into a register, constants have to
2962 come from a pc relative load. The reference of a pc relative load
2963 instruction must be less than 1k in front of the instruction. This
2964 means that we often have to dump a constant inside a function, and
2965 generate code to branch around it.
2966
2967 It is important to minimize this, since the branches will slow things
2968 down and make things bigger.
2969
2970 Worst case code looks like:
2971
2972 mov.l L1,rn
2973 bra L2
2974 nop
2975 align
2976 L1: .long value
2977 L2:
2978 ..
2979
2980 mov.l L3,rn
2981 bra L4
2982 nop
2983 align
2984 L3: .long value
2985 L4:
2986 ..
2987
2988 We fix this by performing a scan before scheduling, which notices which
2989 instructions need to have their operands fetched from the constant table
2990 and builds the table.
2991
2992 The algorithm is:
2993
2994 scan, find an instruction which needs a pcrel move. Look forward, find the
2995 last barrier which is within MAX_COUNT bytes of the requirement.
2996 If there isn't one, make one. Process all the instructions between
2997 the find and the barrier.
2998
2999 In the above example, we can tell that L3 is within 1k of L1, so
3000 the first move can be shrunk from the 3 insn+constant sequence into
3001 just 1 insn, and the constant moved to L3 to make:
3002
3003 mov.l L1,rn
3004 ..
3005 mov.l L3,rn
3006 bra L4
3007 nop
3008 align
3009 L3:.long value
3010 L4:.long value
3011
3012 Then the second move becomes the target for the shortening process. */
3013
3014 typedef struct
3015 {
3016 rtx value; /* Value in table. */
3017 rtx label; /* Label of value. */
3018 rtx wend; /* End of window. */
3019 enum machine_mode mode; /* Mode of value. */
3020
3021 /* True if this constant is accessed as part of a post-increment
3022 sequence. Note that HImode constants are never accessed in this way. */
3023 bool part_of_sequence_p;
3024 } pool_node;
3025
3026 /* The maximum number of constants that can fit into one pool, since
3027 constants in the range 0..510 are at least 2 bytes long, and in the
3028 range from there to 1018 at least 4 bytes. */
3029
3030 #define MAX_POOL_SIZE 372
3031 static pool_node pool_vector[MAX_POOL_SIZE];
3032 static int pool_size;
3033 static rtx pool_window_label;
3034 static int pool_window_last;
3035
3036 /* ??? If we need a constant in HImode which is the truncated value of a
3037 constant we need in SImode, we could combine the two entries thus saving
3038 two bytes. Is this common enough to be worth the effort of implementing
3039 it? */
3040
3041 /* ??? This stuff should be done at the same time that we shorten branches.
3042 As it is now, we must assume that all branches are the maximum size, and
3043 this causes us to almost always output constant pools sooner than
3044 necessary. */
3045
3046 /* Add a constant to the pool and return its label. */
3047
3048 static rtx
add_constant(rtx x,enum machine_mode mode,rtx last_value)3049 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3050 {
3051 int i;
3052 rtx lab, new, ref, newref;
3053
3054 /* First see if we've already got it. */
3055 for (i = 0; i < pool_size; i++)
3056 {
3057 if (x->code == pool_vector[i].value->code
3058 && mode == pool_vector[i].mode)
3059 {
3060 if (x->code == CODE_LABEL)
3061 {
3062 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3063 continue;
3064 }
3065 if (rtx_equal_p (x, pool_vector[i].value))
3066 {
3067 lab = new = 0;
3068 if (! last_value
3069 || ! i
3070 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3071 {
3072 new = gen_label_rtx ();
3073 LABEL_REFS (new) = pool_vector[i].label;
3074 pool_vector[i].label = lab = new;
3075 }
3076 if (lab && pool_window_label)
3077 {
3078 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3079 ref = pool_vector[pool_window_last].wend;
3080 LABEL_NEXTREF (newref) = ref;
3081 pool_vector[pool_window_last].wend = newref;
3082 }
3083 if (new)
3084 pool_window_label = new;
3085 pool_window_last = i;
3086 return lab;
3087 }
3088 }
3089 }
3090
3091 /* Need a new one. */
3092 pool_vector[pool_size].value = x;
3093 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3094 {
3095 lab = 0;
3096 pool_vector[pool_size - 1].part_of_sequence_p = true;
3097 }
3098 else
3099 lab = gen_label_rtx ();
3100 pool_vector[pool_size].mode = mode;
3101 pool_vector[pool_size].label = lab;
3102 pool_vector[pool_size].wend = NULL_RTX;
3103 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3104 if (lab && pool_window_label)
3105 {
3106 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3107 ref = pool_vector[pool_window_last].wend;
3108 LABEL_NEXTREF (newref) = ref;
3109 pool_vector[pool_window_last].wend = newref;
3110 }
3111 if (lab)
3112 pool_window_label = lab;
3113 pool_window_last = pool_size;
3114 pool_size++;
3115 return lab;
3116 }
3117
3118 /* Output the literal table. START, if nonzero, is the first instruction
3119 this table is needed for, and also indicates that there is at least one
3120 casesi_worker_2 instruction; We have to emit the operand3 labels from
3121 these insns at a 4-byte aligned position. BARRIER is the barrier
3122 after which we are to place the table. */
3123
3124 static void
dump_table(rtx start,rtx barrier)3125 dump_table (rtx start, rtx barrier)
3126 {
3127 rtx scan = barrier;
3128 int i;
3129 int need_align = 1;
3130 rtx lab, ref;
3131 int have_df = 0;
3132
3133 /* Do two passes, first time dump out the HI sized constants. */
3134
3135 for (i = 0; i < pool_size; i++)
3136 {
3137 pool_node *p = &pool_vector[i];
3138
3139 if (p->mode == HImode)
3140 {
3141 if (need_align)
3142 {
3143 scan = emit_insn_after (gen_align_2 (), scan);
3144 need_align = 0;
3145 }
3146 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3147 scan = emit_label_after (lab, scan);
3148 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3149 scan);
3150 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3151 {
3152 lab = XEXP (ref, 0);
3153 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3154 }
3155 }
3156 else if (p->mode == DFmode)
3157 have_df = 1;
3158 }
3159
3160 need_align = 1;
3161
3162 if (start)
3163 {
3164 scan = emit_insn_after (gen_align_4 (), scan);
3165 need_align = 0;
3166 for (; start != barrier; start = NEXT_INSN (start))
3167 if (GET_CODE (start) == INSN
3168 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3169 {
3170 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3171 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3172
3173 scan = emit_label_after (lab, scan);
3174 }
3175 }
3176 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3177 {
3178 rtx align_insn = NULL_RTX;
3179
3180 scan = emit_label_after (gen_label_rtx (), scan);
3181 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3182 need_align = 0;
3183
3184 for (i = 0; i < pool_size; i++)
3185 {
3186 pool_node *p = &pool_vector[i];
3187
3188 switch (p->mode)
3189 {
3190 case HImode:
3191 break;
3192 case SImode:
3193 case SFmode:
3194 if (align_insn && !p->part_of_sequence_p)
3195 {
3196 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3197 emit_label_before (lab, align_insn);
3198 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3199 align_insn);
3200 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3201 {
3202 lab = XEXP (ref, 0);
3203 emit_insn_before (gen_consttable_window_end (lab),
3204 align_insn);
3205 }
3206 delete_insn (align_insn);
3207 align_insn = NULL_RTX;
3208 continue;
3209 }
3210 else
3211 {
3212 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3213 scan = emit_label_after (lab, scan);
3214 scan = emit_insn_after (gen_consttable_4 (p->value,
3215 const0_rtx), scan);
3216 need_align = ! need_align;
3217 }
3218 break;
3219 case DFmode:
3220 if (need_align)
3221 {
3222 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3223 align_insn = scan;
3224 need_align = 0;
3225 }
3226 case DImode:
3227 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3228 scan = emit_label_after (lab, scan);
3229 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3230 scan);
3231 break;
3232 default:
3233 gcc_unreachable ();
3234 }
3235
3236 if (p->mode != HImode)
3237 {
3238 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3239 {
3240 lab = XEXP (ref, 0);
3241 scan = emit_insn_after (gen_consttable_window_end (lab),
3242 scan);
3243 }
3244 }
3245 }
3246
3247 pool_size = 0;
3248 }
3249
3250 for (i = 0; i < pool_size; i++)
3251 {
3252 pool_node *p = &pool_vector[i];
3253
3254 switch (p->mode)
3255 {
3256 case HImode:
3257 break;
3258 case SImode:
3259 case SFmode:
3260 if (need_align)
3261 {
3262 need_align = 0;
3263 scan = emit_label_after (gen_label_rtx (), scan);
3264 scan = emit_insn_after (gen_align_4 (), scan);
3265 }
3266 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3267 scan = emit_label_after (lab, scan);
3268 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3269 scan);
3270 break;
3271 case DFmode:
3272 case DImode:
3273 if (need_align)
3274 {
3275 need_align = 0;
3276 scan = emit_label_after (gen_label_rtx (), scan);
3277 scan = emit_insn_after (gen_align_4 (), scan);
3278 }
3279 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3280 scan = emit_label_after (lab, scan);
3281 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3282 scan);
3283 break;
3284 default:
3285 gcc_unreachable ();
3286 }
3287
3288 if (p->mode != HImode)
3289 {
3290 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3291 {
3292 lab = XEXP (ref, 0);
3293 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3294 }
3295 }
3296 }
3297
3298 scan = emit_insn_after (gen_consttable_end (), scan);
3299 scan = emit_barrier_after (scan);
3300 pool_size = 0;
3301 pool_window_label = NULL_RTX;
3302 pool_window_last = 0;
3303 }
3304
3305 /* Return nonzero if constant would be an ok source for a
3306 mov.w instead of a mov.l. */
3307
3308 static int
hi_const(rtx src)3309 hi_const (rtx src)
3310 {
3311 return (GET_CODE (src) == CONST_INT
3312 && INTVAL (src) >= -32768
3313 && INTVAL (src) <= 32767);
3314 }
3315
3316 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3317
3318 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3319 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3320 need to fix it if the input value is CONST_OK_FOR_I08. */
3321
3322 static int
broken_move(rtx insn)3323 broken_move (rtx insn)
3324 {
3325 if (GET_CODE (insn) == INSN)
3326 {
3327 rtx pat = PATTERN (insn);
3328 if (GET_CODE (pat) == PARALLEL)
3329 pat = XVECEXP (pat, 0, 0);
3330 if (GET_CODE (pat) == SET
3331 /* We can load any 8 bit value if we don't care what the high
3332 order bits end up as. */
3333 && GET_MODE (SET_DEST (pat)) != QImode
3334 && (CONSTANT_P (SET_SRC (pat))
3335 /* Match mova_const. */
3336 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3337 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3338 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3339 && ! (TARGET_SH2E
3340 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3341 && (fp_zero_operand (SET_SRC (pat))
3342 || fp_one_operand (SET_SRC (pat)))
3343 /* ??? If this is a -m4 or -m4-single compilation, in general
3344 we don't know the current setting of fpscr, so disable fldi.
3345 There is an exception if this was a register-register move
3346 before reload - and hence it was ascertained that we have
3347 single precision setting - and in a post-reload optimization
3348 we changed this to do a constant load. In that case
3349 we don't have an r0 clobber, hence we must use fldi. */
3350 && (! TARGET_SH4 || TARGET_FMOVD
3351 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3352 == SCRATCH))
3353 && GET_CODE (SET_DEST (pat)) == REG
3354 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3355 && ! (TARGET_SH2A
3356 && GET_MODE (SET_DEST (pat)) == SImode
3357 && GET_CODE (SET_SRC (pat)) == CONST_INT
3358 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3359 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3360 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3361 return 1;
3362 }
3363
3364 return 0;
3365 }
3366
3367 static int
mova_p(rtx insn)3368 mova_p (rtx insn)
3369 {
3370 return (GET_CODE (insn) == INSN
3371 && GET_CODE (PATTERN (insn)) == SET
3372 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3373 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3374 /* Don't match mova_const. */
3375 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3376 }
3377
3378 /* Fix up a mova from a switch that went out of range. */
3379 static void
fixup_mova(rtx mova)3380 fixup_mova (rtx mova)
3381 {
3382 if (! flag_pic)
3383 {
3384 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3385 INSN_CODE (mova) = -1;
3386 }
3387 else
3388 {
3389 rtx worker = mova;
3390 rtx lab = gen_label_rtx ();
3391 rtx wpat, wpat0, wpat1, wsrc, diff;
3392
3393 do
3394 {
3395 worker = NEXT_INSN (worker);
3396 gcc_assert (worker
3397 && GET_CODE (worker) != CODE_LABEL
3398 && GET_CODE (worker) != JUMP_INSN);
3399 } while (GET_CODE (worker) == NOTE
3400 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3401 wpat = PATTERN (worker);
3402 wpat0 = XVECEXP (wpat, 0, 0);
3403 wpat1 = XVECEXP (wpat, 0, 1);
3404 wsrc = SET_SRC (wpat0);
3405 PATTERN (worker) = (gen_casesi_worker_2
3406 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3407 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3408 XEXP (wpat1, 0)));
3409 INSN_CODE (worker) = -1;
3410 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3411 gen_rtx_LABEL_REF (Pmode, lab));
3412 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3413 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3414 INSN_CODE (mova) = -1;
3415 }
3416 }
3417
3418 /* Find the last barrier from insn FROM which is close enough to hold the
3419 constant pool. If we can't find one, then create one near the end of
3420 the range. */
3421
3422 static rtx
find_barrier(int num_mova,rtx mova,rtx from)3423 find_barrier (int num_mova, rtx mova, rtx from)
3424 {
3425 int count_si = 0;
3426 int count_hi = 0;
3427 int found_hi = 0;
3428 int found_si = 0;
3429 int found_di = 0;
3430 int hi_align = 2;
3431 int si_align = 2;
3432 int leading_mova = num_mova;
3433 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3434 int si_limit;
3435 int hi_limit;
3436
3437 /* For HImode: range is 510, add 4 because pc counts from address of
3438 second instruction after this one, subtract 2 for the jump instruction
3439 that we may need to emit before the table, subtract 2 for the instruction
3440 that fills the jump delay slot (in very rare cases, reorg will take an
3441 instruction from after the constant pool or will leave the delay slot
3442 empty). This gives 510.
3443 For SImode: range is 1020, add 4 because pc counts from address of
3444 second instruction after this one, subtract 2 in case pc is 2 byte
3445 aligned, subtract 2 for the jump instruction that we may need to emit
3446 before the table, subtract 2 for the instruction that fills the jump
3447 delay slot. This gives 1018. */
3448
3449 /* The branch will always be shortened now that the reference address for
3450 forward branches is the successor address, thus we need no longer make
3451 adjustments to the [sh]i_limit for -O0. */
3452
3453 si_limit = 1018;
3454 hi_limit = 510;
3455
3456 while (from && count_si < si_limit && count_hi < hi_limit)
3457 {
3458 int inc = get_attr_length (from);
3459 int new_align = 1;
3460
3461 if (GET_CODE (from) == CODE_LABEL)
3462 {
3463 if (optimize)
3464 new_align = 1 << label_to_alignment (from);
3465 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3466 new_align = 1 << barrier_align (from);
3467 else
3468 new_align = 1;
3469 inc = 0;
3470 }
3471
3472 if (GET_CODE (from) == BARRIER)
3473 {
3474
3475 found_barrier = from;
3476
3477 /* If we are at the end of the function, or in front of an alignment
3478 instruction, we need not insert an extra alignment. We prefer
3479 this kind of barrier. */
3480 if (barrier_align (from) > 2)
3481 good_barrier = from;
3482 }
3483
3484 if (broken_move (from))
3485 {
3486 rtx pat, src, dst;
3487 enum machine_mode mode;
3488
3489 pat = PATTERN (from);
3490 if (GET_CODE (pat) == PARALLEL)
3491 pat = XVECEXP (pat, 0, 0);
3492 src = SET_SRC (pat);
3493 dst = SET_DEST (pat);
3494 mode = GET_MODE (dst);
3495
3496 /* We must explicitly check the mode, because sometimes the
3497 front end will generate code to load unsigned constants into
3498 HImode targets without properly sign extending them. */
3499 if (mode == HImode
3500 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3501 {
3502 found_hi += 2;
3503 /* We put the short constants before the long constants, so
3504 we must count the length of short constants in the range
3505 for the long constants. */
3506 /* ??? This isn't optimal, but is easy to do. */
3507 si_limit -= 2;
3508 }
3509 else
3510 {
3511 /* We dump DF/DI constants before SF/SI ones, because
3512 the limit is the same, but the alignment requirements
3513 are higher. We may waste up to 4 additional bytes
3514 for alignment, and the DF/DI constant may have
3515 another SF/SI constant placed before it. */
3516 if (TARGET_SHCOMPACT
3517 && ! found_di
3518 && (mode == DFmode || mode == DImode))
3519 {
3520 found_di = 1;
3521 si_limit -= 8;
3522 }
3523 while (si_align > 2 && found_si + si_align - 2 > count_si)
3524 si_align >>= 1;
3525 if (found_si > count_si)
3526 count_si = found_si;
3527 found_si += GET_MODE_SIZE (mode);
3528 if (num_mova)
3529 si_limit -= GET_MODE_SIZE (mode);
3530 }
3531 }
3532
3533 if (mova_p (from))
3534 {
3535 if (! num_mova++)
3536 {
3537 leading_mova = 0;
3538 mova = from;
3539 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3540 }
3541 if (found_si > count_si)
3542 count_si = found_si;
3543 }
3544 else if (GET_CODE (from) == JUMP_INSN
3545 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3546 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3547 {
3548 if (num_mova)
3549 num_mova--;
3550 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3551 {
3552 /* We have just passed the barrier in front of the
3553 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3554 the ADDR_DIFF_VEC is accessed as data, just like our pool
3555 constants, this is a good opportunity to accommodate what
3556 we have gathered so far.
3557 If we waited any longer, we could end up at a barrier in
3558 front of code, which gives worse cache usage for separated
3559 instruction / data caches. */
3560 good_barrier = found_barrier;
3561 break;
3562 }
3563 else
3564 {
3565 rtx body = PATTERN (from);
3566 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3567 }
3568 }
3569 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3570 else if (GET_CODE (from) == JUMP_INSN
3571 && ! TARGET_SH2
3572 && ! TARGET_SMALLCODE)
3573 new_align = 4;
3574
3575 if (found_si)
3576 {
3577 count_si += inc;
3578 if (new_align > si_align)
3579 {
3580 si_limit -= (count_si - 1) & (new_align - si_align);
3581 si_align = new_align;
3582 }
3583 count_si = (count_si + new_align - 1) & -new_align;
3584 }
3585 if (found_hi)
3586 {
3587 count_hi += inc;
3588 if (new_align > hi_align)
3589 {
3590 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3591 hi_align = new_align;
3592 }
3593 count_hi = (count_hi + new_align - 1) & -new_align;
3594 }
3595 from = NEXT_INSN (from);
3596 }
3597
3598 if (num_mova)
3599 {
3600 if (leading_mova)
3601 {
3602 /* Try as we might, the leading mova is out of range. Change
3603 it into a load (which will become a pcload) and retry. */
3604 fixup_mova (mova);
3605 return find_barrier (0, 0, mova);
3606 }
3607 else
3608 {
3609 /* Insert the constant pool table before the mova instruction,
3610 to prevent the mova label reference from going out of range. */
3611 from = mova;
3612 good_barrier = found_barrier = barrier_before_mova;
3613 }
3614 }
3615
3616 if (found_barrier)
3617 {
3618 if (good_barrier && next_real_insn (found_barrier))
3619 found_barrier = good_barrier;
3620 }
3621 else
3622 {
3623 /* We didn't find a barrier in time to dump our stuff,
3624 so we'll make one. */
3625 rtx label = gen_label_rtx ();
3626
3627 /* If we exceeded the range, then we must back up over the last
3628 instruction we looked at. Otherwise, we just need to undo the
3629 NEXT_INSN at the end of the loop. */
3630 if (count_hi > hi_limit || count_si > si_limit)
3631 from = PREV_INSN (PREV_INSN (from));
3632 else
3633 from = PREV_INSN (from);
3634
3635 /* Walk back to be just before any jump or label.
3636 Putting it before a label reduces the number of times the branch
3637 around the constant pool table will be hit. Putting it before
3638 a jump makes it more likely that the bra delay slot will be
3639 filled. */
3640 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3641 || GET_CODE (from) == CODE_LABEL)
3642 from = PREV_INSN (from);
3643
3644 from = emit_jump_insn_after (gen_jump (label), from);
3645 JUMP_LABEL (from) = label;
3646 LABEL_NUSES (label) = 1;
3647 found_barrier = emit_barrier_after (from);
3648 emit_label_after (label, found_barrier);
3649 }
3650
3651 return found_barrier;
3652 }
3653
3654 /* If the instruction INSN is implemented by a special function, and we can
3655 positively find the register that is used to call the sfunc, and this
3656 register is not used anywhere else in this instruction - except as the
3657 destination of a set, return this register; else, return 0. */
3658 rtx
sfunc_uses_reg(rtx insn)3659 sfunc_uses_reg (rtx insn)
3660 {
3661 int i;
3662 rtx pattern, part, reg_part, reg;
3663
3664 if (GET_CODE (insn) != INSN)
3665 return 0;
3666 pattern = PATTERN (insn);
3667 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3668 return 0;
3669
3670 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3671 {
3672 part = XVECEXP (pattern, 0, i);
3673 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3674 reg_part = part;
3675 }
3676 if (! reg_part)
3677 return 0;
3678 reg = XEXP (reg_part, 0);
3679 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3680 {
3681 part = XVECEXP (pattern, 0, i);
3682 if (part == reg_part || GET_CODE (part) == CLOBBER)
3683 continue;
3684 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3685 && GET_CODE (SET_DEST (part)) == REG)
3686 ? SET_SRC (part) : part)))
3687 return 0;
3688 }
3689 return reg;
3690 }
3691
3692 /* See if the only way in which INSN uses REG is by calling it, or by
3693 setting it while calling it. Set *SET to a SET rtx if the register
3694 is set by INSN. */
3695
3696 static int
noncall_uses_reg(rtx reg,rtx insn,rtx * set)3697 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3698 {
3699 rtx pattern, reg2;
3700
3701 *set = NULL_RTX;
3702
3703 reg2 = sfunc_uses_reg (insn);
3704 if (reg2 && REGNO (reg2) == REGNO (reg))
3705 {
3706 pattern = single_set (insn);
3707 if (pattern
3708 && GET_CODE (SET_DEST (pattern)) == REG
3709 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3710 *set = pattern;
3711 return 0;
3712 }
3713 if (GET_CODE (insn) != CALL_INSN)
3714 {
3715 /* We don't use rtx_equal_p because we don't care if the mode is
3716 different. */
3717 pattern = single_set (insn);
3718 if (pattern
3719 && GET_CODE (SET_DEST (pattern)) == REG
3720 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3721 {
3722 rtx par, part;
3723 int i;
3724
3725 *set = pattern;
3726 par = PATTERN (insn);
3727 if (GET_CODE (par) == PARALLEL)
3728 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3729 {
3730 part = XVECEXP (par, 0, i);
3731 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3732 return 1;
3733 }
3734 return reg_mentioned_p (reg, SET_SRC (pattern));
3735 }
3736
3737 return 1;
3738 }
3739
3740 pattern = PATTERN (insn);
3741
3742 if (GET_CODE (pattern) == PARALLEL)
3743 {
3744 int i;
3745
3746 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3747 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3748 return 1;
3749 pattern = XVECEXP (pattern, 0, 0);
3750 }
3751
3752 if (GET_CODE (pattern) == SET)
3753 {
3754 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3755 {
3756 /* We don't use rtx_equal_p, because we don't care if the
3757 mode is different. */
3758 if (GET_CODE (SET_DEST (pattern)) != REG
3759 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3760 return 1;
3761
3762 *set = pattern;
3763 }
3764
3765 pattern = SET_SRC (pattern);
3766 }
3767
3768 if (GET_CODE (pattern) != CALL
3769 || GET_CODE (XEXP (pattern, 0)) != MEM
3770 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3771 return 1;
3772
3773 return 0;
3774 }
3775
3776 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3777 general registers. Bits 0..15 mean that the respective registers
3778 are used as inputs in the instruction. Bits 16..31 mean that the
3779 registers 0..15, respectively, are used as outputs, or are clobbered.
3780 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3781 int
regs_used(rtx x,int is_dest)3782 regs_used (rtx x, int is_dest)
3783 {
3784 enum rtx_code code;
3785 const char *fmt;
3786 int i, used = 0;
3787
3788 if (! x)
3789 return used;
3790 code = GET_CODE (x);
3791 switch (code)
3792 {
3793 case REG:
3794 if (REGNO (x) < 16)
3795 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3796 << (REGNO (x) + is_dest));
3797 return 0;
3798 case SUBREG:
3799 {
3800 rtx y = SUBREG_REG (x);
3801
3802 if (GET_CODE (y) != REG)
3803 break;
3804 if (REGNO (y) < 16)
3805 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3806 << (REGNO (y) +
3807 subreg_regno_offset (REGNO (y),
3808 GET_MODE (y),
3809 SUBREG_BYTE (x),
3810 GET_MODE (x)) + is_dest));
3811 return 0;
3812 }
3813 case SET:
3814 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3815 case RETURN:
3816 /* If there was a return value, it must have been indicated with USE. */
3817 return 0x00ffff00;
3818 case CLOBBER:
3819 is_dest = 1;
3820 break;
3821 case MEM:
3822 is_dest = 0;
3823 break;
3824 case CALL:
3825 used |= 0x00ff00f0;
3826 break;
3827 default:
3828 break;
3829 }
3830
3831 fmt = GET_RTX_FORMAT (code);
3832
3833 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3834 {
3835 if (fmt[i] == 'E')
3836 {
3837 register int j;
3838 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3839 used |= regs_used (XVECEXP (x, i, j), is_dest);
3840 }
3841 else if (fmt[i] == 'e')
3842 used |= regs_used (XEXP (x, i), is_dest);
3843 }
3844 return used;
3845 }
3846
3847 /* Create an instruction that prevents redirection of a conditional branch
3848 to the destination of the JUMP with address ADDR.
3849 If the branch needs to be implemented as an indirect jump, try to find
3850 a scratch register for it.
3851 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3852 If any preceding insn that doesn't fit into a delay slot is good enough,
3853 pass 1. Pass 2 if a definite blocking insn is needed.
3854 -1 is used internally to avoid deep recursion.
3855 If a blocking instruction is made or recognized, return it. */
3856
3857 static rtx
gen_block_redirect(rtx jump,int addr,int need_block)3858 gen_block_redirect (rtx jump, int addr, int need_block)
3859 {
3860 int dead = 0;
3861 rtx prev = prev_nonnote_insn (jump);
3862 rtx dest;
3863
3864 /* First, check if we already have an instruction that satisfies our need. */
3865 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3866 {
3867 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3868 return prev;
3869 if (GET_CODE (PATTERN (prev)) == USE
3870 || GET_CODE (PATTERN (prev)) == CLOBBER
3871 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3872 prev = jump;
3873 else if ((need_block &= ~1) < 0)
3874 return prev;
3875 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3876 need_block = 0;
3877 }
3878 if (GET_CODE (PATTERN (jump)) == RETURN)
3879 {
3880 if (! need_block)
3881 return prev;
3882 /* Reorg even does nasty things with return insns that cause branches
3883 to go out of range - see find_end_label and callers. */
3884 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3885 }
3886 /* We can't use JUMP_LABEL here because it might be undefined
3887 when not optimizing. */
3888 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3889 /* If the branch is out of range, try to find a scratch register for it. */
3890 if (optimize
3891 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3892 > 4092 + 4098))
3893 {
3894 rtx scan;
3895 /* Don't look for the stack pointer as a scratch register,
3896 it would cause trouble if an interrupt occurred. */
3897 unsigned try = 0x7fff, used;
3898 int jump_left = flag_expensive_optimizations + 1;
3899
3900 /* It is likely that the most recent eligible instruction is wanted for
3901 the delay slot. Therefore, find out which registers it uses, and
3902 try to avoid using them. */
3903
3904 for (scan = jump; (scan = PREV_INSN (scan)); )
3905 {
3906 enum rtx_code code;
3907
3908 if (INSN_DELETED_P (scan))
3909 continue;
3910 code = GET_CODE (scan);
3911 if (code == CODE_LABEL || code == JUMP_INSN)
3912 break;
3913 if (code == INSN
3914 && GET_CODE (PATTERN (scan)) != USE
3915 && GET_CODE (PATTERN (scan)) != CLOBBER
3916 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3917 {
3918 try &= ~regs_used (PATTERN (scan), 0);
3919 break;
3920 }
3921 }
3922 for (used = dead = 0, scan = JUMP_LABEL (jump);
3923 (scan = NEXT_INSN (scan)); )
3924 {
3925 enum rtx_code code;
3926
3927 if (INSN_DELETED_P (scan))
3928 continue;
3929 code = GET_CODE (scan);
3930 if (INSN_P (scan))
3931 {
3932 used |= regs_used (PATTERN (scan), 0);
3933 if (code == CALL_INSN)
3934 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3935 dead |= (used >> 16) & ~used;
3936 if (dead & try)
3937 {
3938 dead &= try;
3939 break;
3940 }
3941 if (code == JUMP_INSN)
3942 {
3943 if (jump_left-- && simplejump_p (scan))
3944 scan = JUMP_LABEL (scan);
3945 else
3946 break;
3947 }
3948 }
3949 }
3950 /* Mask out the stack pointer again, in case it was
3951 the only 'free' register we have found. */
3952 dead &= 0x7fff;
3953 }
3954 /* If the immediate destination is still in range, check for possible
3955 threading with a jump beyond the delay slot insn.
3956 Don't check if we are called recursively; the jump has been or will be
3957 checked in a different invocation then. */
3958
3959 else if (optimize && need_block >= 0)
3960 {
3961 rtx next = next_active_insn (next_active_insn (dest));
3962 if (next && GET_CODE (next) == JUMP_INSN
3963 && GET_CODE (PATTERN (next)) == SET
3964 && recog_memoized (next) == CODE_FOR_jump_compact)
3965 {
3966 dest = JUMP_LABEL (next);
3967 if (dest
3968 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3969 > 4092 + 4098))
3970 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3971 }
3972 }
3973
3974 if (dead)
3975 {
3976 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3977
3978 /* It would be nice if we could convert the jump into an indirect
3979 jump / far branch right now, and thus exposing all constituent
3980 instructions to further optimization. However, reorg uses
3981 simplejump_p to determine if there is an unconditional jump where
3982 it should try to schedule instructions from the target of the
3983 branch; simplejump_p fails for indirect jumps even if they have
3984 a JUMP_LABEL. */
3985 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3986 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3987 , jump);
3988 /* ??? We would like this to have the scope of the jump, but that
3989 scope will change when a delay slot insn of an inner scope is added.
3990 Hence, after delay slot scheduling, we'll have to expect
3991 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3992 the jump. */
3993
3994 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3995 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3996 return insn;
3997 }
3998 else if (need_block)
3999 /* We can't use JUMP_LABEL here because it might be undefined
4000 when not optimizing. */
4001 return emit_insn_before (gen_block_branch_redirect
4002 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4003 , jump);
4004 return prev;
4005 }
4006
4007 #define CONDJUMP_MIN -252
4008 #define CONDJUMP_MAX 262
4009 struct far_branch
4010 {
4011 /* A label (to be placed) in front of the jump
4012 that jumps to our ultimate destination. */
4013 rtx near_label;
4014 /* Where we are going to insert it if we cannot move the jump any farther,
4015 or the jump itself if we have picked up an existing jump. */
4016 rtx insert_place;
4017 /* The ultimate destination. */
4018 rtx far_label;
4019 struct far_branch *prev;
4020 /* If the branch has already been created, its address;
4021 else the address of its first prospective user. */
4022 int address;
4023 };
4024
4025 static void gen_far_branch (struct far_branch *);
4026 enum mdep_reorg_phase_e mdep_reorg_phase;
4027 static void
gen_far_branch(struct far_branch * bp)4028 gen_far_branch (struct far_branch *bp)
4029 {
4030 rtx insn = bp->insert_place;
4031 rtx jump;
4032 rtx label = gen_label_rtx ();
4033 int ok;
4034
4035 emit_label_after (label, insn);
4036 if (bp->far_label)
4037 {
4038 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4039 LABEL_NUSES (bp->far_label)++;
4040 }
4041 else
4042 jump = emit_jump_insn_after (gen_return (), insn);
4043 /* Emit a barrier so that reorg knows that any following instructions
4044 are not reachable via a fall-through path.
4045 But don't do this when not optimizing, since we wouldn't suppress the
4046 alignment for the barrier then, and could end up with out-of-range
4047 pc-relative loads. */
4048 if (optimize)
4049 emit_barrier_after (jump);
4050 emit_label_after (bp->near_label, insn);
4051 JUMP_LABEL (jump) = bp->far_label;
4052 ok = invert_jump (insn, label, 1);
4053 gcc_assert (ok);
4054
4055 /* If we are branching around a jump (rather than a return), prevent
4056 reorg from using an insn from the jump target as the delay slot insn -
4057 when reorg did this, it pessimized code (we rather hide the delay slot)
4058 and it could cause branches to go out of range. */
4059 if (bp->far_label)
4060 (emit_insn_after
4061 (gen_stuff_delay_slot
4062 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4063 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4064 insn));
4065 /* Prevent reorg from undoing our splits. */
4066 gen_block_redirect (jump, bp->address += 2, 2);
4067 }
4068
4069 /* Fix up ADDR_DIFF_VECs. */
4070 void
fixup_addr_diff_vecs(rtx first)4071 fixup_addr_diff_vecs (rtx first)
4072 {
4073 rtx insn;
4074
4075 for (insn = first; insn; insn = NEXT_INSN (insn))
4076 {
4077 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4078
4079 if (GET_CODE (insn) != JUMP_INSN
4080 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4081 continue;
4082 pat = PATTERN (insn);
4083 vec_lab = XEXP (XEXP (pat, 0), 0);
4084
4085 /* Search the matching casesi_jump_2. */
4086 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4087 {
4088 if (GET_CODE (prev) != JUMP_INSN)
4089 continue;
4090 prevpat = PATTERN (prev);
4091 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4092 continue;
4093 x = XVECEXP (prevpat, 0, 1);
4094 if (GET_CODE (x) != USE)
4095 continue;
4096 x = XEXP (x, 0);
4097 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4098 break;
4099 }
4100 /* FIXME: This is a bug in the optimizer, but it seems harmless
4101 to just avoid panicing. */
4102 if (!prev)
4103 continue;
4104
4105 /* Emit the reference label of the braf where it belongs, right after
4106 the casesi_jump_2 (i.e. braf). */
4107 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4108 emit_label_after (braf_label, prev);
4109
4110 /* Fix up the ADDR_DIF_VEC to be relative
4111 to the reference address of the braf. */
4112 XEXP (XEXP (pat, 0), 0) = braf_label;
4113 }
4114 }
4115
4116 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4117 a barrier. Return the base 2 logarithm of the desired alignment. */
4118 int
barrier_align(rtx barrier_or_label)4119 barrier_align (rtx barrier_or_label)
4120 {
4121 rtx next = next_real_insn (barrier_or_label), pat, prev;
4122 int slot, credit, jump_to_next = 0;
4123
4124 if (! next)
4125 return 0;
4126
4127 pat = PATTERN (next);
4128
4129 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4130 return 2;
4131
4132 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4133 /* This is a barrier in front of a constant table. */
4134 return 0;
4135
4136 prev = prev_real_insn (barrier_or_label);
4137 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4138 {
4139 pat = PATTERN (prev);
4140 /* If this is a very small table, we want to keep the alignment after
4141 the table to the minimum for proper code alignment. */
4142 return ((TARGET_SMALLCODE
4143 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4144 <= (unsigned) 1 << (CACHE_LOG - 2)))
4145 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4146 }
4147
4148 if (TARGET_SMALLCODE)
4149 return 0;
4150
4151 if (! TARGET_SH2 || ! optimize)
4152 return align_jumps_log;
4153
4154 /* When fixing up pcloads, a constant table might be inserted just before
4155 the basic block that ends with the barrier. Thus, we can't trust the
4156 instruction lengths before that. */
4157 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4158 {
4159 /* Check if there is an immediately preceding branch to the insn beyond
4160 the barrier. We must weight the cost of discarding useful information
4161 from the current cache line when executing this branch and there is
4162 an alignment, against that of fetching unneeded insn in front of the
4163 branch target when there is no alignment. */
4164
4165 /* There are two delay_slot cases to consider. One is the simple case
4166 where the preceding branch is to the insn beyond the barrier (simple
4167 delay slot filling), and the other is where the preceding branch has
4168 a delay slot that is a duplicate of the insn after the barrier
4169 (fill_eager_delay_slots) and the branch is to the insn after the insn
4170 after the barrier. */
4171
4172 /* PREV is presumed to be the JUMP_INSN for the barrier under
4173 investigation. Skip to the insn before it. */
4174 prev = prev_real_insn (prev);
4175
4176 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4177 credit >= 0 && prev && GET_CODE (prev) == INSN;
4178 prev = prev_real_insn (prev))
4179 {
4180 jump_to_next = 0;
4181 if (GET_CODE (PATTERN (prev)) == USE
4182 || GET_CODE (PATTERN (prev)) == CLOBBER)
4183 continue;
4184 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4185 {
4186 prev = XVECEXP (PATTERN (prev), 0, 1);
4187 if (INSN_UID (prev) == INSN_UID (next))
4188 {
4189 /* Delay slot was filled with insn at jump target. */
4190 jump_to_next = 1;
4191 continue;
4192 }
4193 }
4194
4195 if (slot &&
4196 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4197 slot = 0;
4198 credit -= get_attr_length (prev);
4199 }
4200 if (prev
4201 && GET_CODE (prev) == JUMP_INSN
4202 && JUMP_LABEL (prev))
4203 {
4204 rtx x;
4205 if (jump_to_next
4206 || next_real_insn (JUMP_LABEL (prev)) == next
4207 /* If relax_delay_slots() decides NEXT was redundant
4208 with some previous instruction, it will have
4209 redirected PREV's jump to the following insn. */
4210 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4211 /* There is no upper bound on redundant instructions
4212 that might have been skipped, but we must not put an
4213 alignment where none had been before. */
4214 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4215 (INSN_P (x)
4216 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4217 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4218 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4219 {
4220 rtx pat = PATTERN (prev);
4221 if (GET_CODE (pat) == PARALLEL)
4222 pat = XVECEXP (pat, 0, 0);
4223 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4224 return 0;
4225 }
4226 }
4227 }
4228
4229 return align_jumps_log;
4230 }
4231
4232 /* If we are inside a phony loop, almost any kind of label can turn up as the
4233 first one in the loop. Aligning a braf label causes incorrect switch
4234 destination addresses; we can detect braf labels because they are
4235 followed by a BARRIER.
4236 Applying loop alignment to small constant or switch tables is a waste
4237 of space, so we suppress this too. */
4238 int
sh_loop_align(rtx label)4239 sh_loop_align (rtx label)
4240 {
4241 rtx next = label;
4242
4243 do
4244 next = next_nonnote_insn (next);
4245 while (next && GET_CODE (next) == CODE_LABEL);
4246
4247 if (! next
4248 || ! INSN_P (next)
4249 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4250 || recog_memoized (next) == CODE_FOR_consttable_2)
4251 return 0;
4252
4253 return align_loops_log;
4254 }
4255
4256 /* Do a final pass over the function, just before delayed branch
4257 scheduling. */
4258
4259 static void
sh_reorg(void)4260 sh_reorg (void)
4261 {
4262 rtx first, insn, mova = NULL_RTX;
4263 int num_mova;
4264 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4265 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4266
4267 first = get_insns ();
4268
4269 /* We must split call insns before introducing `mova's. If we're
4270 optimizing, they'll have already been split. Otherwise, make
4271 sure we don't split them too late. */
4272 if (! optimize)
4273 split_all_insns_noflow ();
4274
4275 if (TARGET_SHMEDIA)
4276 return;
4277
4278 /* If relaxing, generate pseudo-ops to associate function calls with
4279 the symbols they call. It does no harm to not generate these
4280 pseudo-ops. However, when we can generate them, it enables to
4281 linker to potentially relax the jsr to a bsr, and eliminate the
4282 register load and, possibly, the constant pool entry. */
4283
4284 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4285 if (TARGET_RELAX)
4286 {
4287 /* Remove all REG_LABEL notes. We want to use them for our own
4288 purposes. This works because none of the remaining passes
4289 need to look at them.
4290
4291 ??? But it may break in the future. We should use a machine
4292 dependent REG_NOTE, or some other approach entirely. */
4293 for (insn = first; insn; insn = NEXT_INSN (insn))
4294 {
4295 if (INSN_P (insn))
4296 {
4297 rtx note;
4298
4299 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4300 remove_note (insn, note);
4301 }
4302 }
4303
4304 for (insn = first; insn; insn = NEXT_INSN (insn))
4305 {
4306 rtx pattern, reg, link, set, scan, dies, label;
4307 int rescan = 0, foundinsn = 0;
4308
4309 if (GET_CODE (insn) == CALL_INSN)
4310 {
4311 pattern = PATTERN (insn);
4312
4313 if (GET_CODE (pattern) == PARALLEL)
4314 pattern = XVECEXP (pattern, 0, 0);
4315 if (GET_CODE (pattern) == SET)
4316 pattern = SET_SRC (pattern);
4317
4318 if (GET_CODE (pattern) != CALL
4319 || GET_CODE (XEXP (pattern, 0)) != MEM)
4320 continue;
4321
4322 reg = XEXP (XEXP (pattern, 0), 0);
4323 }
4324 else
4325 {
4326 reg = sfunc_uses_reg (insn);
4327 if (! reg)
4328 continue;
4329 }
4330
4331 if (GET_CODE (reg) != REG)
4332 continue;
4333
4334 /* This is a function call via REG. If the only uses of REG
4335 between the time that it is set and the time that it dies
4336 are in function calls, then we can associate all the
4337 function calls with the setting of REG. */
4338
4339 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4340 {
4341 if (REG_NOTE_KIND (link) != 0)
4342 continue;
4343 set = single_set (XEXP (link, 0));
4344 if (set && rtx_equal_p (reg, SET_DEST (set)))
4345 {
4346 link = XEXP (link, 0);
4347 break;
4348 }
4349 }
4350
4351 if (! link)
4352 {
4353 /* ??? Sometimes global register allocation will have
4354 deleted the insn pointed to by LOG_LINKS. Try
4355 scanning backward to find where the register is set. */
4356 for (scan = PREV_INSN (insn);
4357 scan && GET_CODE (scan) != CODE_LABEL;
4358 scan = PREV_INSN (scan))
4359 {
4360 if (! INSN_P (scan))
4361 continue;
4362
4363 if (! reg_mentioned_p (reg, scan))
4364 continue;
4365
4366 if (noncall_uses_reg (reg, scan, &set))
4367 break;
4368
4369 if (set)
4370 {
4371 link = scan;
4372 break;
4373 }
4374 }
4375 }
4376
4377 if (! link)
4378 continue;
4379
4380 /* The register is set at LINK. */
4381
4382 /* We can only optimize the function call if the register is
4383 being set to a symbol. In theory, we could sometimes
4384 optimize calls to a constant location, but the assembler
4385 and linker do not support that at present. */
4386 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4387 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4388 continue;
4389
4390 /* Scan forward from LINK to the place where REG dies, and
4391 make sure that the only insns which use REG are
4392 themselves function calls. */
4393
4394 /* ??? This doesn't work for call targets that were allocated
4395 by reload, since there may not be a REG_DEAD note for the
4396 register. */
4397
4398 dies = NULL_RTX;
4399 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4400 {
4401 rtx scanset;
4402
4403 /* Don't try to trace forward past a CODE_LABEL if we haven't
4404 seen INSN yet. Ordinarily, we will only find the setting insn
4405 in LOG_LINKS if it is in the same basic block. However,
4406 cross-jumping can insert code labels in between the load and
4407 the call, and can result in situations where a single call
4408 insn may have two targets depending on where we came from. */
4409
4410 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4411 break;
4412
4413 if (! INSN_P (scan))
4414 continue;
4415
4416 /* Don't try to trace forward past a JUMP. To optimize
4417 safely, we would have to check that all the
4418 instructions at the jump destination did not use REG. */
4419
4420 if (GET_CODE (scan) == JUMP_INSN)
4421 break;
4422
4423 if (! reg_mentioned_p (reg, scan))
4424 continue;
4425
4426 if (noncall_uses_reg (reg, scan, &scanset))
4427 break;
4428
4429 if (scan == insn)
4430 foundinsn = 1;
4431
4432 if (scan != insn
4433 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4434 {
4435 /* There is a function call to this register other
4436 than the one we are checking. If we optimize
4437 this call, we need to rescan again below. */
4438 rescan = 1;
4439 }
4440
4441 /* ??? We shouldn't have to worry about SCANSET here.
4442 We should just be able to check for a REG_DEAD note
4443 on a function call. However, the REG_DEAD notes are
4444 apparently not dependable around libcalls; c-torture
4445 execute/920501-2 is a test case. If SCANSET is set,
4446 then this insn sets the register, so it must have
4447 died earlier. Unfortunately, this will only handle
4448 the cases in which the register is, in fact, set in a
4449 later insn. */
4450
4451 /* ??? We shouldn't have to use FOUNDINSN here.
4452 However, the LOG_LINKS fields are apparently not
4453 entirely reliable around libcalls;
4454 newlib/libm/math/e_pow.c is a test case. Sometimes
4455 an insn will appear in LOG_LINKS even though it is
4456 not the most recent insn which sets the register. */
4457
4458 if (foundinsn
4459 && (scanset
4460 || find_reg_note (scan, REG_DEAD, reg)))
4461 {
4462 dies = scan;
4463 break;
4464 }
4465 }
4466
4467 if (! dies)
4468 {
4469 /* Either there was a branch, or some insn used REG
4470 other than as a function call address. */
4471 continue;
4472 }
4473
4474 /* Create a code label, and put it in a REG_LABEL note on
4475 the insn which sets the register, and on each call insn
4476 which uses the register. In final_prescan_insn we look
4477 for the REG_LABEL notes, and output the appropriate label
4478 or pseudo-op. */
4479
4480 label = gen_label_rtx ();
4481 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4482 REG_NOTES (link));
4483 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4484 REG_NOTES (insn));
4485 if (rescan)
4486 {
4487 scan = link;
4488 do
4489 {
4490 rtx reg2;
4491
4492 scan = NEXT_INSN (scan);
4493 if (scan != insn
4494 && ((GET_CODE (scan) == CALL_INSN
4495 && reg_mentioned_p (reg, scan))
4496 || ((reg2 = sfunc_uses_reg (scan))
4497 && REGNO (reg2) == REGNO (reg))))
4498 REG_NOTES (scan)
4499 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4500 }
4501 while (scan != dies);
4502 }
4503 }
4504 }
4505
4506 if (TARGET_SH2)
4507 fixup_addr_diff_vecs (first);
4508
4509 if (optimize)
4510 {
4511 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4512 shorten_branches (first);
4513 }
4514 /* Scan the function looking for move instructions which have to be
4515 changed to pc-relative loads and insert the literal tables. */
4516
4517 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4518 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4519 {
4520 if (mova_p (insn))
4521 {
4522 /* ??? basic block reordering can move a switch table dispatch
4523 below the switch table. Check if that has happened.
4524 We only have the addresses available when optimizing; but then,
4525 this check shouldn't be needed when not optimizing. */
4526 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4527 if (optimize
4528 && (INSN_ADDRESSES (INSN_UID (insn))
4529 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4530 {
4531 /* Change the mova into a load.
4532 broken_move will then return true for it. */
4533 fixup_mova (insn);
4534 }
4535 else if (! num_mova++)
4536 mova = insn;
4537 }
4538 else if (GET_CODE (insn) == JUMP_INSN
4539 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4540 && num_mova)
4541 {
4542 rtx scan;
4543 int total;
4544
4545 num_mova--;
4546
4547 /* Some code might have been inserted between the mova and
4548 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4549 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4550 total += get_attr_length (scan);
4551
4552 /* range of mova is 1020, add 4 because pc counts from address of
4553 second instruction after this one, subtract 2 in case pc is 2
4554 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4555 cancels out with alignment effects of the mova itself. */
4556 if (total > 1022)
4557 {
4558 /* Change the mova into a load, and restart scanning
4559 there. broken_move will then return true for mova. */
4560 fixup_mova (mova);
4561 insn = mova;
4562 }
4563 }
4564 if (broken_move (insn)
4565 || (GET_CODE (insn) == INSN
4566 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4567 {
4568 rtx scan;
4569 /* Scan ahead looking for a barrier to stick the constant table
4570 behind. */
4571 rtx barrier = find_barrier (num_mova, mova, insn);
4572 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4573 int need_aligned_label = 0;
4574
4575 if (num_mova && ! mova_p (mova))
4576 {
4577 /* find_barrier had to change the first mova into a
4578 pcload; thus, we have to start with this new pcload. */
4579 insn = mova;
4580 num_mova = 0;
4581 }
4582 /* Now find all the moves between the points and modify them. */
4583 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4584 {
4585 if (GET_CODE (scan) == CODE_LABEL)
4586 last_float = 0;
4587 if (GET_CODE (scan) == INSN
4588 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4589 need_aligned_label = 1;
4590 if (broken_move (scan))
4591 {
4592 rtx *patp = &PATTERN (scan), pat = *patp;
4593 rtx src, dst;
4594 rtx lab;
4595 rtx newsrc;
4596 enum machine_mode mode;
4597
4598 if (GET_CODE (pat) == PARALLEL)
4599 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4600 src = SET_SRC (pat);
4601 dst = SET_DEST (pat);
4602 mode = GET_MODE (dst);
4603
4604 if (mode == SImode && hi_const (src)
4605 && REGNO (dst) != FPUL_REG)
4606 {
4607 int offset = 0;
4608
4609 mode = HImode;
4610 while (GET_CODE (dst) == SUBREG)
4611 {
4612 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4613 GET_MODE (SUBREG_REG (dst)),
4614 SUBREG_BYTE (dst),
4615 GET_MODE (dst));
4616 dst = SUBREG_REG (dst);
4617 }
4618 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4619 }
4620 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4621 {
4622 /* This must be an insn that clobbers r0. */
4623 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4624 XVECLEN (PATTERN (scan), 0)
4625 - 1);
4626 rtx clobber = *clobberp;
4627
4628 gcc_assert (GET_CODE (clobber) == CLOBBER
4629 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4630
4631 if (last_float
4632 && reg_set_between_p (r0_rtx, last_float_move, scan))
4633 last_float = 0;
4634 if (last_float
4635 && TARGET_SHCOMPACT
4636 && GET_MODE_SIZE (mode) != 4
4637 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4638 last_float = 0;
4639 lab = add_constant (src, mode, last_float);
4640 if (lab)
4641 emit_insn_before (gen_mova (lab), scan);
4642 else
4643 {
4644 /* There will be a REG_UNUSED note for r0 on
4645 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4646 lest reorg:mark_target_live_regs will not
4647 consider r0 to be used, and we end up with delay
4648 slot insn in front of SCAN that clobbers r0. */
4649 rtx note
4650 = find_regno_note (last_float_move, REG_UNUSED, 0);
4651
4652 /* If we are not optimizing, then there may not be
4653 a note. */
4654 if (note)
4655 PUT_MODE (note, REG_INC);
4656
4657 *last_float_addr = r0_inc_rtx;
4658 }
4659 last_float_move = scan;
4660 last_float = src;
4661 newsrc = gen_const_mem (mode,
4662 (((TARGET_SH4 && ! TARGET_FMOVD)
4663 || REGNO (dst) == FPUL_REG)
4664 ? r0_inc_rtx
4665 : r0_rtx));
4666 last_float_addr = &XEXP (newsrc, 0);
4667
4668 /* Remove the clobber of r0. */
4669 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4670 gen_rtx_SCRATCH (Pmode));
4671 }
4672 /* This is a mova needing a label. Create it. */
4673 else if (GET_CODE (src) == UNSPEC
4674 && XINT (src, 1) == UNSPEC_MOVA
4675 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4676 {
4677 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4678 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4679 newsrc = gen_rtx_UNSPEC (SImode,
4680 gen_rtvec (1, newsrc),
4681 UNSPEC_MOVA);
4682 }
4683 else
4684 {
4685 lab = add_constant (src, mode, 0);
4686 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4687 newsrc = gen_const_mem (mode, newsrc);
4688 }
4689 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4690 INSN_CODE (scan) = -1;
4691 }
4692 }
4693 dump_table (need_aligned_label ? insn : 0, barrier);
4694 insn = barrier;
4695 }
4696 }
4697
4698 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4699 INSN_ADDRESSES_FREE ();
4700 split_branches (first);
4701
4702 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4703 also has an effect on the register that holds the address of the sfunc.
4704 Insert an extra dummy insn in front of each sfunc that pretends to
4705 use this register. */
4706 if (flag_delayed_branch)
4707 {
4708 for (insn = first; insn; insn = NEXT_INSN (insn))
4709 {
4710 rtx reg = sfunc_uses_reg (insn);
4711
4712 if (! reg)
4713 continue;
4714 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4715 }
4716 }
4717 #if 0
4718 /* fpscr is not actually a user variable, but we pretend it is for the
4719 sake of the previous optimization passes, since we want it handled like
4720 one. However, we don't have any debugging information for it, so turn
4721 it into a non-user variable now. */
4722 if (TARGET_SH4)
4723 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4724 #endif
4725 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4726 }
4727
4728 int
get_dest_uid(rtx label,int max_uid)4729 get_dest_uid (rtx label, int max_uid)
4730 {
4731 rtx dest = next_real_insn (label);
4732 int dest_uid;
4733 if (! dest)
4734 /* This can happen for an undefined label. */
4735 return 0;
4736 dest_uid = INSN_UID (dest);
4737 /* If this is a newly created branch redirection blocking instruction,
4738 we cannot index the branch_uid or insn_addresses arrays with its
4739 uid. But then, we won't need to, because the actual destination is
4740 the following branch. */
4741 while (dest_uid >= max_uid)
4742 {
4743 dest = NEXT_INSN (dest);
4744 dest_uid = INSN_UID (dest);
4745 }
4746 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4747 return 0;
4748 return dest_uid;
4749 }
4750
4751 /* Split condbranches that are out of range. Also add clobbers for
4752 scratch registers that are needed in far jumps.
4753 We do this before delay slot scheduling, so that it can take our
4754 newly created instructions into account. It also allows us to
4755 find branches with common targets more easily. */
4756
4757 static void
split_branches(rtx first)4758 split_branches (rtx first)
4759 {
4760 rtx insn;
4761 struct far_branch **uid_branch, *far_branch_list = 0;
4762 int max_uid = get_max_uid ();
4763 int ok;
4764
4765 /* Find out which branches are out of range. */
4766 shorten_branches (first);
4767
4768 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4769 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4770
4771 for (insn = first; insn; insn = NEXT_INSN (insn))
4772 if (! INSN_P (insn))
4773 continue;
4774 else if (INSN_DELETED_P (insn))
4775 {
4776 /* Shorten_branches would split this instruction again,
4777 so transform it into a note. */
4778 PUT_CODE (insn, NOTE);
4779 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4780 NOTE_SOURCE_FILE (insn) = 0;
4781 }
4782 else if (GET_CODE (insn) == JUMP_INSN
4783 /* Don't mess with ADDR_DIFF_VEC */
4784 && (GET_CODE (PATTERN (insn)) == SET
4785 || GET_CODE (PATTERN (insn)) == RETURN))
4786 {
4787 enum attr_type type = get_attr_type (insn);
4788 if (type == TYPE_CBRANCH)
4789 {
4790 rtx next, beyond;
4791
4792 if (get_attr_length (insn) > 4)
4793 {
4794 rtx src = SET_SRC (PATTERN (insn));
4795 rtx olabel = XEXP (XEXP (src, 1), 0);
4796 int addr = INSN_ADDRESSES (INSN_UID (insn));
4797 rtx label = 0;
4798 int dest_uid = get_dest_uid (olabel, max_uid);
4799 struct far_branch *bp = uid_branch[dest_uid];
4800
4801 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4802 the label if the LABEL_NUSES count drops to zero. There is
4803 always a jump_optimize pass that sets these values, but it
4804 proceeds to delete unreferenced code, and then if not
4805 optimizing, to un-delete the deleted instructions, thus
4806 leaving labels with too low uses counts. */
4807 if (! optimize)
4808 {
4809 JUMP_LABEL (insn) = olabel;
4810 LABEL_NUSES (olabel)++;
4811 }
4812 if (! bp)
4813 {
4814 bp = (struct far_branch *) alloca (sizeof *bp);
4815 uid_branch[dest_uid] = bp;
4816 bp->prev = far_branch_list;
4817 far_branch_list = bp;
4818 bp->far_label
4819 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4820 LABEL_NUSES (bp->far_label)++;
4821 }
4822 else
4823 {
4824 label = bp->near_label;
4825 if (! label && bp->address - addr >= CONDJUMP_MIN)
4826 {
4827 rtx block = bp->insert_place;
4828
4829 if (GET_CODE (PATTERN (block)) == RETURN)
4830 block = PREV_INSN (block);
4831 else
4832 block = gen_block_redirect (block,
4833 bp->address, 2);
4834 label = emit_label_after (gen_label_rtx (),
4835 PREV_INSN (block));
4836 bp->near_label = label;
4837 }
4838 else if (label && ! NEXT_INSN (label))
4839 {
4840 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4841 bp->insert_place = insn;
4842 else
4843 gen_far_branch (bp);
4844 }
4845 }
4846 if (! label
4847 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4848 {
4849 bp->near_label = label = gen_label_rtx ();
4850 bp->insert_place = insn;
4851 bp->address = addr;
4852 }
4853 ok = redirect_jump (insn, label, 1);
4854 gcc_assert (ok);
4855 }
4856 else
4857 {
4858 /* get_attr_length (insn) == 2 */
4859 /* Check if we have a pattern where reorg wants to redirect
4860 the branch to a label from an unconditional branch that
4861 is too far away. */
4862 /* We can't use JUMP_LABEL here because it might be undefined
4863 when not optimizing. */
4864 /* A syntax error might cause beyond to be NULL_RTX. */
4865 beyond
4866 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4867 0));
4868
4869 if (beyond
4870 && (GET_CODE (beyond) == JUMP_INSN
4871 || ((beyond = next_active_insn (beyond))
4872 && GET_CODE (beyond) == JUMP_INSN))
4873 && GET_CODE (PATTERN (beyond)) == SET
4874 && recog_memoized (beyond) == CODE_FOR_jump_compact
4875 && ((INSN_ADDRESSES
4876 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4877 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4878 > 252 + 258 + 2))
4879 gen_block_redirect (beyond,
4880 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4881 }
4882
4883 next = next_active_insn (insn);
4884
4885 if ((GET_CODE (next) == JUMP_INSN
4886 || ((next = next_active_insn (next))
4887 && GET_CODE (next) == JUMP_INSN))
4888 && GET_CODE (PATTERN (next)) == SET
4889 && recog_memoized (next) == CODE_FOR_jump_compact
4890 && ((INSN_ADDRESSES
4891 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4892 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4893 > 252 + 258 + 2))
4894 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4895 }
4896 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4897 {
4898 int addr = INSN_ADDRESSES (INSN_UID (insn));
4899 rtx far_label = 0;
4900 int dest_uid = 0;
4901 struct far_branch *bp;
4902
4903 if (type == TYPE_JUMP)
4904 {
4905 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4906 dest_uid = get_dest_uid (far_label, max_uid);
4907 if (! dest_uid)
4908 {
4909 /* Parse errors can lead to labels outside
4910 the insn stream. */
4911 if (! NEXT_INSN (far_label))
4912 continue;
4913
4914 if (! optimize)
4915 {
4916 JUMP_LABEL (insn) = far_label;
4917 LABEL_NUSES (far_label)++;
4918 }
4919 redirect_jump (insn, NULL_RTX, 1);
4920 far_label = 0;
4921 }
4922 }
4923 bp = uid_branch[dest_uid];
4924 if (! bp)
4925 {
4926 bp = (struct far_branch *) alloca (sizeof *bp);
4927 uid_branch[dest_uid] = bp;
4928 bp->prev = far_branch_list;
4929 far_branch_list = bp;
4930 bp->near_label = 0;
4931 bp->far_label = far_label;
4932 if (far_label)
4933 LABEL_NUSES (far_label)++;
4934 }
4935 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4936 if (addr - bp->address <= CONDJUMP_MAX)
4937 emit_label_after (bp->near_label, PREV_INSN (insn));
4938 else
4939 {
4940 gen_far_branch (bp);
4941 bp->near_label = 0;
4942 }
4943 else
4944 bp->near_label = 0;
4945 bp->address = addr;
4946 bp->insert_place = insn;
4947 if (! far_label)
4948 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4949 else
4950 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4951 }
4952 }
4953 /* Generate all pending far branches,
4954 and free our references to the far labels. */
4955 while (far_branch_list)
4956 {
4957 if (far_branch_list->near_label
4958 && ! NEXT_INSN (far_branch_list->near_label))
4959 gen_far_branch (far_branch_list);
4960 if (optimize
4961 && far_branch_list->far_label
4962 && ! --LABEL_NUSES (far_branch_list->far_label))
4963 delete_insn (far_branch_list->far_label);
4964 far_branch_list = far_branch_list->prev;
4965 }
4966
4967 /* Instruction length information is no longer valid due to the new
4968 instructions that have been generated. */
4969 init_insn_lengths ();
4970 }
4971
4972 /* Dump out instruction addresses, which is useful for debugging the
4973 constant pool table stuff.
4974
4975 If relaxing, output the label and pseudo-ops used to link together
4976 calls and the instruction which set the registers. */
4977
4978 /* ??? The addresses printed by this routine for insns are nonsense for
4979 insns which are inside of a sequence where none of the inner insns have
4980 variable length. This is because the second pass of shorten_branches
4981 does not bother to update them. */
4982
4983 void
final_prescan_insn(rtx insn,rtx * opvec ATTRIBUTE_UNUSED,int noperands ATTRIBUTE_UNUSED)4984 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4985 int noperands ATTRIBUTE_UNUSED)
4986 {
4987 if (TARGET_DUMPISIZE)
4988 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4989
4990 if (TARGET_RELAX)
4991 {
4992 rtx note;
4993
4994 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4995 if (note)
4996 {
4997 rtx pattern;
4998
4999 pattern = PATTERN (insn);
5000 if (GET_CODE (pattern) == PARALLEL)
5001 pattern = XVECEXP (pattern, 0, 0);
5002 switch (GET_CODE (pattern))
5003 {
5004 case SET:
5005 if (GET_CODE (SET_SRC (pattern)) != CALL
5006 && get_attr_type (insn) != TYPE_SFUNC)
5007 {
5008 targetm.asm_out.internal_label
5009 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5010 break;
5011 }
5012 /* else FALLTHROUGH */
5013 case CALL:
5014 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5015 CODE_LABEL_NUMBER (XEXP (note, 0)));
5016 break;
5017
5018 default:
5019 gcc_unreachable ();
5020 }
5021 }
5022 }
5023 }
5024
5025 /* Dump out any constants accumulated in the final pass. These will
5026 only be labels. */
5027
5028 const char *
output_jump_label_table(void)5029 output_jump_label_table (void)
5030 {
5031 int i;
5032
5033 if (pool_size)
5034 {
5035 fprintf (asm_out_file, "\t.align 2\n");
5036 for (i = 0; i < pool_size; i++)
5037 {
5038 pool_node *p = &pool_vector[i];
5039
5040 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5041 CODE_LABEL_NUMBER (p->label));
5042 output_asm_insn (".long %O0", &p->value);
5043 }
5044 pool_size = 0;
5045 }
5046
5047 return "";
5048 }
5049
5050 /* A full frame looks like:
5051
5052 arg-5
5053 arg-4
5054 [ if current_function_anonymous_args
5055 arg-3
5056 arg-2
5057 arg-1
5058 arg-0 ]
5059 saved-fp
5060 saved-r10
5061 saved-r11
5062 saved-r12
5063 saved-pr
5064 local-n
5065 ..
5066 local-1
5067 local-0 <- fp points here. */
5068
5069 /* Number of bytes pushed for anonymous args, used to pass information
5070 between expand_prologue and expand_epilogue. */
5071
5072 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5073 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5074 for an epilogue and a negative value means that it's for a sibcall
5075 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5076 all the registers that are about to be restored, and hence dead. */
5077
5078 static void
output_stack_adjust(int size,rtx reg,int epilogue_p,HARD_REG_SET * live_regs_mask)5079 output_stack_adjust (int size, rtx reg, int epilogue_p,
5080 HARD_REG_SET *live_regs_mask)
5081 {
5082 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5083 if (size)
5084 {
5085 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5086
5087 /* This test is bogus, as output_stack_adjust is used to re-align the
5088 stack. */
5089 #if 0
5090 gcc_assert (!(size % align));
5091 #endif
5092
5093 if (CONST_OK_FOR_ADD (size))
5094 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5095 /* Try to do it with two partial adjustments; however, we must make
5096 sure that the stack is properly aligned at all times, in case
5097 an interrupt occurs between the two partial adjustments. */
5098 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5099 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5100 {
5101 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5102 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5103 }
5104 else
5105 {
5106 rtx const_reg;
5107 rtx insn;
5108 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5109 int i;
5110
5111 /* If TEMP is invalid, we could temporarily save a general
5112 register to MACL. However, there is currently no need
5113 to handle this case, so just die when we see it. */
5114 if (epilogue_p < 0
5115 || current_function_interrupt
5116 || ! call_really_used_regs[temp] || fixed_regs[temp])
5117 temp = -1;
5118 if (temp < 0 && ! current_function_interrupt
5119 && (TARGET_SHMEDIA || epilogue_p >= 0))
5120 {
5121 HARD_REG_SET temps;
5122 COPY_HARD_REG_SET (temps, call_used_reg_set);
5123 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5124 if (epilogue_p > 0)
5125 {
5126 int nreg = 0;
5127 if (current_function_return_rtx)
5128 {
5129 enum machine_mode mode;
5130 mode = GET_MODE (current_function_return_rtx);
5131 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5132 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5133 }
5134 for (i = 0; i < nreg; i++)
5135 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5136 if (current_function_calls_eh_return)
5137 {
5138 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5139 for (i = 0; i <= 3; i++)
5140 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5141 }
5142 }
5143 if (TARGET_SHMEDIA && epilogue_p < 0)
5144 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5145 CLEAR_HARD_REG_BIT (temps, i);
5146 if (epilogue_p <= 0)
5147 {
5148 for (i = FIRST_PARM_REG;
5149 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5150 CLEAR_HARD_REG_BIT (temps, i);
5151 if (cfun->static_chain_decl != NULL)
5152 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5153 }
5154 temp = scavenge_reg (&temps);
5155 }
5156 if (temp < 0 && live_regs_mask)
5157 temp = scavenge_reg (live_regs_mask);
5158 if (temp < 0)
5159 {
5160 rtx adj_reg, tmp_reg, mem;
5161
5162 /* If we reached here, the most likely case is the (sibcall)
5163 epilogue for non SHmedia. Put a special push/pop sequence
5164 for such case as the last resort. This looks lengthy but
5165 would not be problem because it seems to be very
5166 rare. */
5167
5168 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5169
5170
5171 /* ??? There is still the slight possibility that r4 or
5172 r5 have been reserved as fixed registers or assigned
5173 as global registers, and they change during an
5174 interrupt. There are possible ways to handle this:
5175
5176 - If we are adjusting the frame pointer (r14), we can do
5177 with a single temp register and an ordinary push / pop
5178 on the stack.
5179 - Grab any call-used or call-saved registers (i.e. not
5180 fixed or globals) for the temps we need. We might
5181 also grab r14 if we are adjusting the stack pointer.
5182 If we can't find enough available registers, issue
5183 a diagnostic and die - the user must have reserved
5184 way too many registers.
5185 But since all this is rather unlikely to happen and
5186 would require extra testing, we just die if r4 / r5
5187 are not available. */
5188 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5189 && !global_regs[4] && !global_regs[5]);
5190
5191 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5192 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5193 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5194 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5195 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5196 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5197 emit_move_insn (mem, tmp_reg);
5198 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5199 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5200 emit_move_insn (mem, tmp_reg);
5201 emit_move_insn (reg, adj_reg);
5202 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5203 emit_move_insn (adj_reg, mem);
5204 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5205 emit_move_insn (tmp_reg, mem);
5206 return;
5207 }
5208 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5209
5210 /* If SIZE is negative, subtract the positive value.
5211 This sometimes allows a constant pool entry to be shared
5212 between prologue and epilogue code. */
5213 if (size < 0)
5214 {
5215 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5216 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5217 }
5218 else
5219 {
5220 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5221 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5222 }
5223 if (! epilogue_p)
5224 REG_NOTES (insn)
5225 = (gen_rtx_EXPR_LIST
5226 (REG_FRAME_RELATED_EXPR,
5227 gen_rtx_SET (VOIDmode, reg,
5228 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5229 REG_NOTES (insn)));
5230 }
5231 }
5232 }
5233
5234 static rtx
frame_insn(rtx x)5235 frame_insn (rtx x)
5236 {
5237 x = emit_insn (x);
5238 RTX_FRAME_RELATED_P (x) = 1;
5239 return x;
5240 }
5241
5242 /* Output RTL to push register RN onto the stack. */
5243
5244 static rtx
push(int rn)5245 push (int rn)
5246 {
5247 rtx x;
5248 if (rn == FPUL_REG)
5249 x = gen_push_fpul ();
5250 else if (rn == FPSCR_REG)
5251 x = gen_push_fpscr ();
5252 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5253 && FP_OR_XD_REGISTER_P (rn))
5254 {
5255 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5256 return NULL_RTX;
5257 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5258 }
5259 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5260 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5261 else
5262 x = gen_push (gen_rtx_REG (SImode, rn));
5263
5264 x = frame_insn (x);
5265 REG_NOTES (x)
5266 = gen_rtx_EXPR_LIST (REG_INC,
5267 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5268 return x;
5269 }
5270
5271 /* Output RTL to pop register RN from the stack. */
5272
5273 static void
pop(int rn)5274 pop (int rn)
5275 {
5276 rtx x;
5277 if (rn == FPUL_REG)
5278 x = gen_pop_fpul ();
5279 else if (rn == FPSCR_REG)
5280 x = gen_pop_fpscr ();
5281 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5282 && FP_OR_XD_REGISTER_P (rn))
5283 {
5284 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5285 return;
5286 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5287 }
5288 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5289 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5290 else
5291 x = gen_pop (gen_rtx_REG (SImode, rn));
5292
5293 x = emit_insn (x);
5294 REG_NOTES (x)
5295 = gen_rtx_EXPR_LIST (REG_INC,
5296 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5297 }
5298
5299 /* Generate code to push the regs specified in the mask. */
5300
5301 static void
push_regs(HARD_REG_SET * mask,int interrupt_handler)5302 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5303 {
5304 int i;
5305 int skip_fpscr = 0;
5306
5307 /* Push PR last; this gives better latencies after the prologue, and
5308 candidates for the return delay slot when there are no general
5309 registers pushed. */
5310 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5311 {
5312 /* If this is an interrupt handler, and the SZ bit varies,
5313 and we have to push any floating point register, we need
5314 to switch to the correct precision first. */
5315 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5316 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
5317 {
5318 HARD_REG_SET unsaved;
5319
5320 push (FPSCR_REG);
5321 COMPL_HARD_REG_SET (unsaved, *mask);
5322 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5323 skip_fpscr = 1;
5324 }
5325 if (i != PR_REG
5326 && (i != FPSCR_REG || ! skip_fpscr)
5327 && TEST_HARD_REG_BIT (*mask, i))
5328 push (i);
5329 }
5330 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5331 push (PR_REG);
5332 }
5333
5334 /* Calculate how much extra space is needed to save all callee-saved
5335 target registers.
5336 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5337
5338 static int
shmedia_target_regs_stack_space(HARD_REG_SET * live_regs_mask)5339 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5340 {
5341 int reg;
5342 int stack_space = 0;
5343 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5344
5345 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5346 if ((! call_really_used_regs[reg] || interrupt_handler)
5347 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5348 /* Leave space to save this target register on the stack,
5349 in case target register allocation wants to use it. */
5350 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5351 return stack_space;
5352 }
5353
5354 /* Decide whether we should reserve space for callee-save target registers,
5355 in case target register allocation wants to use them. REGS_SAVED is
5356 the space, in bytes, that is already required for register saves.
5357 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5358
5359 static int
shmedia_reserve_space_for_target_registers_p(int regs_saved,HARD_REG_SET * live_regs_mask)5360 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5361 HARD_REG_SET *live_regs_mask)
5362 {
5363 if (optimize_size)
5364 return 0;
5365 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5366 }
5367
5368 /* Decide how much space to reserve for callee-save target registers
5369 in case target register allocation wants to use them.
5370 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5371
5372 static int
shmedia_target_regs_stack_adjust(HARD_REG_SET * live_regs_mask)5373 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5374 {
5375 if (shmedia_space_reserved_for_target_registers)
5376 return shmedia_target_regs_stack_space (live_regs_mask);
5377 else
5378 return 0;
5379 }
5380
5381 /* Work out the registers which need to be saved, both as a mask and a
5382 count of saved words. Return the count.
5383
5384 If doing a pragma interrupt function, then push all regs used by the
5385 function, and if we call another function (we can tell by looking at PR),
5386 make sure that all the regs it clobbers are safe too. */
5387
5388 static int
calc_live_regs(HARD_REG_SET * live_regs_mask)5389 calc_live_regs (HARD_REG_SET *live_regs_mask)
5390 {
5391 unsigned int reg;
5392 int count;
5393 tree attrs;
5394 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5395 bool nosave_low_regs;
5396 int pr_live, has_call;
5397
5398 attrs = DECL_ATTRIBUTES (current_function_decl);
5399 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5400 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5401 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5402 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5403
5404 CLEAR_HARD_REG_SET (*live_regs_mask);
5405 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5406 && regs_ever_live[FPSCR_REG])
5407 target_flags &= ~MASK_FPU_SINGLE;
5408 /* If we can save a lot of saves by switching to double mode, do that. */
5409 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5410 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5411 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5412 && (! call_really_used_regs[reg]
5413 || interrupt_handler)
5414 && ++count > 2)
5415 {
5416 target_flags &= ~MASK_FPU_SINGLE;
5417 break;
5418 }
5419 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5420 knows how to use it. That means the pseudo originally allocated for
5421 the initial value can become the PR_MEDIA_REG hard register, as seen for
5422 execute/20010122-1.c:test9. */
5423 if (TARGET_SHMEDIA)
5424 /* ??? this function is called from initial_elimination_offset, hence we
5425 can't use the result of sh_media_register_for_return here. */
5426 pr_live = sh_pr_n_sets ();
5427 else
5428 {
5429 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5430 pr_live = (pr_initial
5431 ? (GET_CODE (pr_initial) != REG
5432 || REGNO (pr_initial) != (PR_REG))
5433 : regs_ever_live[PR_REG]);
5434 /* For Shcompact, if not optimizing, we end up with a memory reference
5435 using the return address pointer for __builtin_return_address even
5436 though there is no actual need to put the PR register on the stack. */
5437 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5438 }
5439 /* Force PR to be live if the prologue has to call the SHmedia
5440 argument decoder or register saver. */
5441 if (TARGET_SHCOMPACT
5442 && ((current_function_args_info.call_cookie
5443 & ~ CALL_COOKIE_RET_TRAMP (1))
5444 || current_function_has_nonlocal_label))
5445 pr_live = 1;
5446 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5447 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5448 {
5449 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5450 ? pr_live
5451 : interrupt_handler
5452 ? (/* Need to save all the regs ever live. */
5453 (regs_ever_live[reg]
5454 || (call_really_used_regs[reg]
5455 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5456 || reg == PIC_OFFSET_TABLE_REGNUM)
5457 && has_call)
5458 || (TARGET_SHMEDIA && has_call
5459 && REGISTER_NATURAL_MODE (reg) == SImode
5460 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5461 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5462 && reg != RETURN_ADDRESS_POINTER_REGNUM
5463 && reg != T_REG && reg != GBR_REG
5464 /* Push fpscr only on targets which have FPU */
5465 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5466 : (/* Only push those regs which are used and need to be saved. */
5467 (TARGET_SHCOMPACT
5468 && flag_pic
5469 && current_function_args_info.call_cookie
5470 && reg == PIC_OFFSET_TABLE_REGNUM)
5471 || (regs_ever_live[reg]
5472 && (!call_really_used_regs[reg]
5473 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5474 || (current_function_calls_eh_return
5475 && (reg == EH_RETURN_DATA_REGNO (0)
5476 || reg == EH_RETURN_DATA_REGNO (1)
5477 || reg == EH_RETURN_DATA_REGNO (2)
5478 || reg == EH_RETURN_DATA_REGNO (3)))
5479 || ((reg == MACL_REG || reg == MACH_REG)
5480 && regs_ever_live[reg]
5481 && sh_cfun_attr_renesas_p ())
5482 ))
5483 {
5484 SET_HARD_REG_BIT (*live_regs_mask, reg);
5485 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5486
5487 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5488 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5489 {
5490 if (FP_REGISTER_P (reg))
5491 {
5492 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5493 {
5494 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5495 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5496 }
5497 }
5498 else if (XD_REGISTER_P (reg))
5499 {
5500 /* Must switch to double mode to access these registers. */
5501 target_flags &= ~MASK_FPU_SINGLE;
5502 }
5503 }
5504 }
5505 if (nosave_low_regs && reg == R8_REG)
5506 break;
5507 }
5508 /* If we have a target register optimization pass after prologue / epilogue
5509 threading, we need to assume all target registers will be live even if
5510 they aren't now. */
5511 if (flag_branch_target_load_optimize2
5512 && TARGET_SAVE_ALL_TARGET_REGS
5513 && shmedia_space_reserved_for_target_registers)
5514 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5515 if ((! call_really_used_regs[reg] || interrupt_handler)
5516 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5517 {
5518 SET_HARD_REG_BIT (*live_regs_mask, reg);
5519 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5520 }
5521 /* If this is an interrupt handler, we don't have any call-clobbered
5522 registers we can conveniently use for target register save/restore.
5523 Make sure we save at least one general purpose register when we need
5524 to save target registers. */
5525 if (interrupt_handler
5526 && hard_regs_intersect_p (live_regs_mask,
5527 ®_class_contents[TARGET_REGS])
5528 && ! hard_regs_intersect_p (live_regs_mask,
5529 ®_class_contents[GENERAL_REGS]))
5530 {
5531 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5532 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5533 }
5534
5535 return count;
5536 }
5537
5538 /* Code to generate prologue and epilogue sequences */
5539
5540 /* PUSHED is the number of bytes that are being pushed on the
5541 stack for register saves. Return the frame size, padded
5542 appropriately so that the stack stays properly aligned. */
5543 static HOST_WIDE_INT
rounded_frame_size(int pushed)5544 rounded_frame_size (int pushed)
5545 {
5546 HOST_WIDE_INT size = get_frame_size ();
5547 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5548
5549 return ((size + pushed + align - 1) & -align) - pushed;
5550 }
5551
5552 /* Choose a call-clobbered target-branch register that remains
5553 unchanged along the whole function. We set it up as the return
5554 value in the prologue. */
5555 int
sh_media_register_for_return(void)5556 sh_media_register_for_return (void)
5557 {
5558 int regno;
5559 int tr0_used;
5560
5561 if (! current_function_is_leaf)
5562 return -1;
5563 if (lookup_attribute ("interrupt_handler",
5564 DECL_ATTRIBUTES (current_function_decl)))
5565 return -1;
5566 if (sh_cfun_interrupt_handler_p ())
5567 return -1;
5568
5569 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5570
5571 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5572 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5573 return regno;
5574
5575 return -1;
5576 }
5577
5578 /* The maximum registers we need to save are:
5579 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5580 - 32 floating point registers (for each pair, we save none,
5581 one single precision value, or a double precision value).
5582 - 8 target registers
5583 - add 1 entry for a delimiter. */
5584 #define MAX_SAVED_REGS (62+32+8)
5585
5586 typedef struct save_entry_s
5587 {
5588 unsigned char reg;
5589 unsigned char mode;
5590 short offset;
5591 } save_entry;
5592
5593 #define MAX_TEMPS 4
5594
5595 /* There will be a delimiter entry with VOIDmode both at the start and the
5596 end of a filled in schedule. The end delimiter has the offset of the
5597 save with the smallest (i.e. most negative) offset. */
5598 typedef struct save_schedule_s
5599 {
5600 save_entry entries[MAX_SAVED_REGS + 2];
5601 int temps[MAX_TEMPS+1];
5602 } save_schedule;
5603
5604 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5605 use reverse order. Returns the last entry written to (not counting
5606 the delimiter). OFFSET_BASE is a number to be added to all offset
5607 entries. */
5608
5609 static save_entry *
sh5_schedule_saves(HARD_REG_SET * live_regs_mask,save_schedule * schedule,int offset_base)5610 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5611 int offset_base)
5612 {
5613 int align, i;
5614 save_entry *entry = schedule->entries;
5615 int tmpx = 0;
5616 int offset;
5617
5618 if (! current_function_interrupt)
5619 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5620 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5621 && ! FUNCTION_ARG_REGNO_P (i)
5622 && i != FIRST_RET_REG
5623 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5624 && ! (current_function_calls_eh_return
5625 && (i == EH_RETURN_STACKADJ_REGNO
5626 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5627 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5628 schedule->temps[tmpx++] = i;
5629 entry->reg = -1;
5630 entry->mode = VOIDmode;
5631 entry->offset = offset_base;
5632 entry++;
5633 /* We loop twice: first, we save 8-byte aligned registers in the
5634 higher addresses, that are known to be aligned. Then, we
5635 proceed to saving 32-bit registers that don't need 8-byte
5636 alignment.
5637 If this is an interrupt function, all registers that need saving
5638 need to be saved in full. moreover, we need to postpone saving
5639 target registers till we have saved some general purpose registers
5640 we can then use as scratch registers. */
5641 offset = offset_base;
5642 for (align = 1; align >= 0; align--)
5643 {
5644 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5645 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5646 {
5647 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5648 int reg = i;
5649
5650 if (current_function_interrupt)
5651 {
5652 if (TARGET_REGISTER_P (i))
5653 continue;
5654 if (GENERAL_REGISTER_P (i))
5655 mode = DImode;
5656 }
5657 if (mode == SFmode && (i % 2) == 1
5658 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5659 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5660 {
5661 mode = DFmode;
5662 i--;
5663 reg--;
5664 }
5665
5666 /* If we're doing the aligned pass and this is not aligned,
5667 or we're doing the unaligned pass and this is aligned,
5668 skip it. */
5669 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5670 != align)
5671 continue;
5672
5673 if (current_function_interrupt
5674 && GENERAL_REGISTER_P (i)
5675 && tmpx < MAX_TEMPS)
5676 schedule->temps[tmpx++] = i;
5677
5678 offset -= GET_MODE_SIZE (mode);
5679 entry->reg = i;
5680 entry->mode = mode;
5681 entry->offset = offset;
5682 entry++;
5683 }
5684 if (align && current_function_interrupt)
5685 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5686 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5687 {
5688 offset -= GET_MODE_SIZE (DImode);
5689 entry->reg = i;
5690 entry->mode = DImode;
5691 entry->offset = offset;
5692 entry++;
5693 }
5694 }
5695 entry->reg = -1;
5696 entry->mode = VOIDmode;
5697 entry->offset = offset;
5698 schedule->temps[tmpx] = -1;
5699 return entry - 1;
5700 }
5701
5702 void
sh_expand_prologue(void)5703 sh_expand_prologue (void)
5704 {
5705 HARD_REG_SET live_regs_mask;
5706 int d, i;
5707 int d_rounding = 0;
5708 int save_flags = target_flags;
5709 int pretend_args;
5710 tree sp_switch_attr
5711 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
5712
5713 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5714
5715 /* We have pretend args if we had an object sent partially in registers
5716 and partially on the stack, e.g. a large structure. */
5717 pretend_args = current_function_pretend_args_size;
5718 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5719 && (NPARM_REGS(SImode)
5720 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5721 pretend_args = 0;
5722 output_stack_adjust (-pretend_args
5723 - current_function_args_info.stack_regs * 8,
5724 stack_pointer_rtx, 0, NULL);
5725
5726 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5727 /* We're going to use the PIC register to load the address of the
5728 incoming-argument decoder and/or of the return trampoline from
5729 the GOT, so make sure the PIC register is preserved and
5730 initialized. */
5731 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5732
5733 if (TARGET_SHCOMPACT
5734 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5735 {
5736 int reg;
5737
5738 /* First, make all registers with incoming arguments that will
5739 be pushed onto the stack live, so that register renaming
5740 doesn't overwrite them. */
5741 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5742 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5743 >= NPARM_REGS (SImode) - reg)
5744 for (; reg < NPARM_REGS (SImode); reg++)
5745 emit_insn (gen_shcompact_preserve_incoming_args
5746 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5747 else if (CALL_COOKIE_INT_REG_GET
5748 (current_function_args_info.call_cookie, reg) == 1)
5749 emit_insn (gen_shcompact_preserve_incoming_args
5750 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5751
5752 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5753 stack_pointer_rtx);
5754 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5755 GEN_INT (current_function_args_info.call_cookie));
5756 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5757 gen_rtx_REG (SImode, R0_REG));
5758 }
5759 else if (TARGET_SHMEDIA)
5760 {
5761 int tr = sh_media_register_for_return ();
5762
5763 if (tr >= 0)
5764 {
5765 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5766 gen_rtx_REG (DImode, PR_MEDIA_REG));
5767
5768 /* ??? We should suppress saving pr when we don't need it, but this
5769 is tricky because of builtin_return_address. */
5770
5771 /* If this function only exits with sibcalls, this copy
5772 will be flagged as dead. */
5773 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5774 const0_rtx,
5775 REG_NOTES (insn));
5776 }
5777 }
5778
5779 /* Emit the code for SETUP_VARARGS. */
5780 if (current_function_stdarg)
5781 {
5782 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5783 {
5784 /* Push arg regs as if they'd been provided by caller in stack. */
5785 for (i = 0; i < NPARM_REGS(SImode); i++)
5786 {
5787 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5788 rtx insn;
5789
5790 if (i >= (NPARM_REGS(SImode)
5791 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5792 ))
5793 break;
5794 insn = push (rn);
5795 RTX_FRAME_RELATED_P (insn) = 0;
5796 }
5797 }
5798 }
5799
5800 /* If we're supposed to switch stacks at function entry, do so now. */
5801 if (sp_switch_attr)
5802 {
5803 /* The argument specifies a variable holding the address of the
5804 stack the interrupt function should switch to/from at entry/exit. */
5805 const char *s
5806 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
5807 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
5808
5809 emit_insn (gen_sp_switch_1 (sp_switch));
5810 }
5811
5812 d = calc_live_regs (&live_regs_mask);
5813 /* ??? Maybe we could save some switching if we can move a mode switch
5814 that already happens to be at the function start into the prologue. */
5815 if (target_flags != save_flags && ! current_function_interrupt)
5816 emit_insn (gen_toggle_sz ());
5817
5818 if (TARGET_SH5)
5819 {
5820 int offset_base, offset;
5821 rtx r0 = NULL_RTX;
5822 int offset_in_r0 = -1;
5823 int sp_in_r0 = 0;
5824 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5825 int total_size, save_size;
5826 save_schedule schedule;
5827 save_entry *entry;
5828 int *tmp_pnt;
5829
5830 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5831 && ! current_function_interrupt)
5832 r0 = gen_rtx_REG (Pmode, R0_REG);
5833
5834 /* D is the actual number of bytes that we need for saving registers,
5835 however, in initial_elimination_offset we have committed to using
5836 an additional TREGS_SPACE amount of bytes - in order to keep both
5837 addresses to arguments supplied by the caller and local variables
5838 valid, we must keep this gap. Place it between the incoming
5839 arguments and the actually saved registers in a bid to optimize
5840 locality of reference. */
5841 total_size = d + tregs_space;
5842 total_size += rounded_frame_size (total_size);
5843 save_size = total_size - rounded_frame_size (d);
5844 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5845 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5846 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5847
5848 /* If adjusting the stack in a single step costs nothing extra, do so.
5849 I.e. either if a single addi is enough, or we need a movi anyway,
5850 and we don't exceed the maximum offset range (the test for the
5851 latter is conservative for simplicity). */
5852 if (TARGET_SHMEDIA
5853 && (CONST_OK_FOR_I10 (-total_size)
5854 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5855 && total_size <= 2044)))
5856 d_rounding = total_size - save_size;
5857
5858 offset_base = d + d_rounding;
5859
5860 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5861 0, NULL);
5862
5863 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5864 tmp_pnt = schedule.temps;
5865 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5866 {
5867 enum machine_mode mode = entry->mode;
5868 unsigned int reg = entry->reg;
5869 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5870 rtx orig_reg_rtx;
5871
5872 offset = entry->offset;
5873
5874 reg_rtx = gen_rtx_REG (mode, reg);
5875
5876 mem_rtx = gen_frame_mem (mode,
5877 gen_rtx_PLUS (Pmode,
5878 stack_pointer_rtx,
5879 GEN_INT (offset)));
5880
5881 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5882
5883 gcc_assert (r0);
5884 mem_rtx = NULL_RTX;
5885
5886 try_pre_dec:
5887 do
5888 if (HAVE_PRE_DECREMENT
5889 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5890 || mem_rtx == NULL_RTX
5891 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5892 {
5893 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
5894
5895 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5896 pre_dec_ok);
5897
5898 pre_dec = NULL_RTX;
5899
5900 break;
5901
5902 pre_dec_ok:
5903 mem_rtx = NULL_RTX;
5904 offset += GET_MODE_SIZE (mode);
5905 }
5906 while (0);
5907
5908 if (mem_rtx != NULL_RTX)
5909 goto addr_ok;
5910
5911 if (offset_in_r0 == -1)
5912 {
5913 emit_move_insn (r0, GEN_INT (offset));
5914 offset_in_r0 = offset;
5915 }
5916 else if (offset != offset_in_r0)
5917 {
5918 emit_move_insn (r0,
5919 gen_rtx_PLUS
5920 (Pmode, r0,
5921 GEN_INT (offset - offset_in_r0)));
5922 offset_in_r0 += offset - offset_in_r0;
5923 }
5924
5925 if (pre_dec != NULL_RTX)
5926 {
5927 if (! sp_in_r0)
5928 {
5929 emit_move_insn (r0,
5930 gen_rtx_PLUS
5931 (Pmode, r0, stack_pointer_rtx));
5932 sp_in_r0 = 1;
5933 }
5934
5935 offset -= GET_MODE_SIZE (mode);
5936 offset_in_r0 -= GET_MODE_SIZE (mode);
5937
5938 mem_rtx = pre_dec;
5939 }
5940 else if (sp_in_r0)
5941 mem_rtx = gen_frame_mem (mode, r0);
5942 else
5943 mem_rtx = gen_frame_mem (mode,
5944 gen_rtx_PLUS (Pmode,
5945 stack_pointer_rtx,
5946 r0));
5947
5948 /* We must not use an r0-based address for target-branch
5949 registers or for special registers without pre-dec
5950 memory addresses, since we store their values in r0
5951 first. */
5952 gcc_assert (!TARGET_REGISTER_P (reg)
5953 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
5954 || mem_rtx == pre_dec));
5955
5956 addr_ok:
5957 orig_reg_rtx = reg_rtx;
5958 if (TARGET_REGISTER_P (reg)
5959 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5960 && mem_rtx != pre_dec))
5961 {
5962 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5963
5964 emit_move_insn (tmp_reg, reg_rtx);
5965
5966 if (REGNO (tmp_reg) == R0_REG)
5967 {
5968 offset_in_r0 = -1;
5969 sp_in_r0 = 0;
5970 gcc_assert (!refers_to_regno_p
5971 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
5972 }
5973
5974 if (*++tmp_pnt <= 0)
5975 tmp_pnt = schedule.temps;
5976
5977 reg_rtx = tmp_reg;
5978 }
5979 {
5980 rtx insn;
5981
5982 /* Mark as interesting for dwarf cfi generator */
5983 insn = emit_move_insn (mem_rtx, reg_rtx);
5984 RTX_FRAME_RELATED_P (insn) = 1;
5985 /* If we use an intermediate register for the save, we can't
5986 describe this exactly in cfi as a copy of the to-be-saved
5987 register into the temporary register and then the temporary
5988 register on the stack, because the temporary register can
5989 have a different natural size than the to-be-saved register.
5990 Thus, we gloss over the intermediate copy and pretend we do
5991 a direct save from the to-be-saved register. */
5992 if (REGNO (reg_rtx) != reg)
5993 {
5994 rtx set, note_rtx;
5995
5996 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
5997 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5998 REG_NOTES (insn));
5999 REG_NOTES (insn) = note_rtx;
6000 }
6001
6002 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6003 {
6004 rtx reg_rtx = gen_rtx_REG (mode, reg);
6005 rtx set, note_rtx;
6006 rtx mem_rtx = gen_frame_mem (mode,
6007 gen_rtx_PLUS (Pmode,
6008 stack_pointer_rtx,
6009 GEN_INT (offset)));
6010
6011 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6012 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6013 REG_NOTES (insn));
6014 REG_NOTES (insn) = note_rtx;
6015 }
6016 }
6017 }
6018
6019 gcc_assert (entry->offset == d_rounding);
6020 }
6021 else
6022 push_regs (&live_regs_mask, current_function_interrupt);
6023
6024 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6025 {
6026 rtx insn = get_last_insn ();
6027 rtx last = emit_insn (gen_GOTaddr2picreg ());
6028
6029 /* Mark these insns as possibly dead. Sometimes, flow2 may
6030 delete all uses of the PIC register. In this case, let it
6031 delete the initialization too. */
6032 do
6033 {
6034 insn = NEXT_INSN (insn);
6035
6036 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6037 const0_rtx,
6038 REG_NOTES (insn));
6039 }
6040 while (insn != last);
6041 }
6042
6043 if (SHMEDIA_REGS_STACK_ADJUST ())
6044 {
6045 /* This must NOT go through the PLT, otherwise mach and macl
6046 may be clobbered. */
6047 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6048 (TARGET_FPU_ANY
6049 ? "__GCC_push_shmedia_regs"
6050 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6051 emit_insn (gen_shmedia_save_restore_regs_compact
6052 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6053 }
6054
6055 if (target_flags != save_flags && ! current_function_interrupt)
6056 {
6057 rtx insn = emit_insn (gen_toggle_sz ());
6058
6059 /* If we're lucky, a mode switch in the function body will
6060 overwrite fpscr, turning this insn dead. Tell flow this
6061 insn is ok to delete. */
6062 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6063 const0_rtx,
6064 REG_NOTES (insn));
6065 }
6066
6067 target_flags = save_flags;
6068
6069 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6070 stack_pointer_rtx, 0, NULL);
6071
6072 if (frame_pointer_needed)
6073 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6074
6075 if (TARGET_SHCOMPACT
6076 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6077 {
6078 /* This must NOT go through the PLT, otherwise mach and macl
6079 may be clobbered. */
6080 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6081 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6082 emit_insn (gen_shcompact_incoming_args ());
6083 }
6084 }
6085
6086 void
sh_expand_epilogue(bool sibcall_p)6087 sh_expand_epilogue (bool sibcall_p)
6088 {
6089 HARD_REG_SET live_regs_mask;
6090 int d, i;
6091 int d_rounding = 0;
6092
6093 int save_flags = target_flags;
6094 int frame_size, save_size;
6095 int fpscr_deferred = 0;
6096 int e = sibcall_p ? -1 : 1;
6097
6098 d = calc_live_regs (&live_regs_mask);
6099
6100 save_size = d;
6101 frame_size = rounded_frame_size (d);
6102
6103 if (TARGET_SH5)
6104 {
6105 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6106 int total_size;
6107 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6108 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6109 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6110
6111 total_size = d + tregs_space;
6112 total_size += rounded_frame_size (total_size);
6113 save_size = total_size - frame_size;
6114
6115 /* If adjusting the stack in a single step costs nothing extra, do so.
6116 I.e. either if a single addi is enough, or we need a movi anyway,
6117 and we don't exceed the maximum offset range (the test for the
6118 latter is conservative for simplicity). */
6119 if (TARGET_SHMEDIA
6120 && ! frame_pointer_needed
6121 && (CONST_OK_FOR_I10 (total_size)
6122 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6123 && total_size <= 2044)))
6124 d_rounding = frame_size;
6125
6126 frame_size -= d_rounding;
6127 }
6128
6129 if (frame_pointer_needed)
6130 {
6131 /* We must avoid scheduling the epilogue with previous basic blocks
6132 when exception handling is enabled. See PR/18032. */
6133 if (flag_exceptions)
6134 emit_insn (gen_blockage ());
6135 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6136 &live_regs_mask);
6137
6138 /* We must avoid moving the stack pointer adjustment past code
6139 which reads from the local frame, else an interrupt could
6140 occur after the SP adjustment and clobber data in the local
6141 frame. */
6142 emit_insn (gen_blockage ());
6143 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6144 }
6145 else if (frame_size)
6146 {
6147 /* We must avoid moving the stack pointer adjustment past code
6148 which reads from the local frame, else an interrupt could
6149 occur after the SP adjustment and clobber data in the local
6150 frame. */
6151 emit_insn (gen_blockage ());
6152 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6153 }
6154
6155 if (SHMEDIA_REGS_STACK_ADJUST ())
6156 {
6157 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6158 (TARGET_FPU_ANY
6159 ? "__GCC_pop_shmedia_regs"
6160 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6161 /* This must NOT go through the PLT, otherwise mach and macl
6162 may be clobbered. */
6163 emit_insn (gen_shmedia_save_restore_regs_compact
6164 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6165 }
6166
6167 /* Pop all the registers. */
6168
6169 if (target_flags != save_flags && ! current_function_interrupt)
6170 emit_insn (gen_toggle_sz ());
6171 if (TARGET_SH5)
6172 {
6173 int offset_base, offset;
6174 int offset_in_r0 = -1;
6175 int sp_in_r0 = 0;
6176 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6177 save_schedule schedule;
6178 save_entry *entry;
6179 int *tmp_pnt;
6180
6181 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6182 offset_base = -entry[1].offset + d_rounding;
6183 tmp_pnt = schedule.temps;
6184 for (; entry->mode != VOIDmode; entry--)
6185 {
6186 enum machine_mode mode = entry->mode;
6187 int reg = entry->reg;
6188 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6189
6190 offset = offset_base + entry->offset;
6191 reg_rtx = gen_rtx_REG (mode, reg);
6192
6193 mem_rtx = gen_frame_mem (mode,
6194 gen_rtx_PLUS (Pmode,
6195 stack_pointer_rtx,
6196 GEN_INT (offset)));
6197
6198 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6199
6200 mem_rtx = NULL_RTX;
6201
6202 try_post_inc:
6203 do
6204 if (HAVE_POST_INCREMENT
6205 && (offset == offset_in_r0
6206 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6207 && mem_rtx == NULL_RTX)
6208 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6209 {
6210 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6211
6212 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6213 post_inc_ok);
6214
6215 post_inc = NULL_RTX;
6216
6217 break;
6218
6219 post_inc_ok:
6220 mem_rtx = NULL_RTX;
6221 }
6222 while (0);
6223
6224 if (mem_rtx != NULL_RTX)
6225 goto addr_ok;
6226
6227 if (offset_in_r0 == -1)
6228 {
6229 emit_move_insn (r0, GEN_INT (offset));
6230 offset_in_r0 = offset;
6231 }
6232 else if (offset != offset_in_r0)
6233 {
6234 emit_move_insn (r0,
6235 gen_rtx_PLUS
6236 (Pmode, r0,
6237 GEN_INT (offset - offset_in_r0)));
6238 offset_in_r0 += offset - offset_in_r0;
6239 }
6240
6241 if (post_inc != NULL_RTX)
6242 {
6243 if (! sp_in_r0)
6244 {
6245 emit_move_insn (r0,
6246 gen_rtx_PLUS
6247 (Pmode, r0, stack_pointer_rtx));
6248 sp_in_r0 = 1;
6249 }
6250
6251 mem_rtx = post_inc;
6252
6253 offset_in_r0 += GET_MODE_SIZE (mode);
6254 }
6255 else if (sp_in_r0)
6256 mem_rtx = gen_frame_mem (mode, r0);
6257 else
6258 mem_rtx = gen_frame_mem (mode,
6259 gen_rtx_PLUS (Pmode,
6260 stack_pointer_rtx,
6261 r0));
6262
6263 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6264 || mem_rtx == post_inc);
6265
6266 addr_ok:
6267 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6268 && mem_rtx != post_inc)
6269 {
6270 insn = emit_move_insn (r0, mem_rtx);
6271 mem_rtx = r0;
6272 }
6273 else if (TARGET_REGISTER_P (reg))
6274 {
6275 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6276
6277 /* Give the scheduler a bit of freedom by using up to
6278 MAX_TEMPS registers in a round-robin fashion. */
6279 insn = emit_move_insn (tmp_reg, mem_rtx);
6280 mem_rtx = tmp_reg;
6281 if (*++tmp_pnt < 0)
6282 tmp_pnt = schedule.temps;
6283 }
6284
6285 insn = emit_move_insn (reg_rtx, mem_rtx);
6286 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6287 /* This is dead, unless we return with a sibcall. */
6288 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6289 const0_rtx,
6290 REG_NOTES (insn));
6291 }
6292
6293 gcc_assert (entry->offset + offset_base == d + d_rounding);
6294 }
6295 else /* ! TARGET_SH5 */
6296 {
6297 save_size = 0;
6298 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6299 pop (PR_REG);
6300 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6301 {
6302 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6303
6304 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6305 && hard_regs_intersect_p (&live_regs_mask,
6306 ®_class_contents[DF_REGS]))
6307 fpscr_deferred = 1;
6308 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6309 pop (j);
6310 if (j == FIRST_FP_REG && fpscr_deferred)
6311 pop (FPSCR_REG);
6312
6313 }
6314 }
6315 if (target_flags != save_flags && ! current_function_interrupt)
6316 emit_insn (gen_toggle_sz ());
6317 target_flags = save_flags;
6318
6319 output_stack_adjust (current_function_pretend_args_size
6320 + save_size + d_rounding
6321 + current_function_args_info.stack_regs * 8,
6322 stack_pointer_rtx, e, NULL);
6323
6324 if (current_function_calls_eh_return)
6325 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6326 EH_RETURN_STACKADJ_RTX));
6327
6328 /* Switch back to the normal stack if necessary. */
6329 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6330 emit_insn (gen_sp_switch_2 ());
6331
6332 /* Tell flow the insn that pops PR isn't dead. */
6333 /* PR_REG will never be live in SHmedia mode, and we don't need to
6334 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6335 by the return pattern. */
6336 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6337 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6338 }
6339
6340 static int sh_need_epilogue_known = 0;
6341
6342 int
sh_need_epilogue(void)6343 sh_need_epilogue (void)
6344 {
6345 if (! sh_need_epilogue_known)
6346 {
6347 rtx epilogue;
6348
6349 start_sequence ();
6350 sh_expand_epilogue (0);
6351 epilogue = get_insns ();
6352 end_sequence ();
6353 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6354 }
6355 return sh_need_epilogue_known > 0;
6356 }
6357
6358 /* Emit code to change the current function's return address to RA.
6359 TEMP is available as a scratch register, if needed. */
6360
6361 void
sh_set_return_address(rtx ra,rtx tmp)6362 sh_set_return_address (rtx ra, rtx tmp)
6363 {
6364 HARD_REG_SET live_regs_mask;
6365 int d;
6366 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6367 int pr_offset;
6368
6369 d = calc_live_regs (&live_regs_mask);
6370
6371 /* If pr_reg isn't life, we can set it (or the register given in
6372 sh_media_register_for_return) directly. */
6373 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6374 {
6375 rtx rr;
6376
6377 if (TARGET_SHMEDIA)
6378 {
6379 int rr_regno = sh_media_register_for_return ();
6380
6381 if (rr_regno < 0)
6382 rr_regno = pr_reg;
6383
6384 rr = gen_rtx_REG (DImode, rr_regno);
6385 }
6386 else
6387 rr = gen_rtx_REG (SImode, pr_reg);
6388
6389 emit_insn (GEN_MOV (rr, ra));
6390 /* Tell flow the register for return isn't dead. */
6391 emit_insn (gen_rtx_USE (VOIDmode, rr));
6392 return;
6393 }
6394
6395 if (TARGET_SH5)
6396 {
6397 int offset;
6398 save_schedule schedule;
6399 save_entry *entry;
6400
6401 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6402 offset = entry[1].offset;
6403 for (; entry->mode != VOIDmode; entry--)
6404 if (entry->reg == pr_reg)
6405 goto found;
6406
6407 /* We can't find pr register. */
6408 gcc_unreachable ();
6409
6410 found:
6411 offset = entry->offset - offset;
6412 pr_offset = (rounded_frame_size (d) + offset
6413 + SHMEDIA_REGS_STACK_ADJUST ());
6414 }
6415 else
6416 pr_offset = rounded_frame_size (d);
6417
6418 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6419 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6420
6421 tmp = gen_frame_mem (Pmode, tmp);
6422 emit_insn (GEN_MOV (tmp, ra));
6423 }
6424
6425 /* Clear variables at function end. */
6426
6427 static void
sh_output_function_epilogue(FILE * file ATTRIBUTE_UNUSED,HOST_WIDE_INT size ATTRIBUTE_UNUSED)6428 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6429 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6430 {
6431 sh_need_epilogue_known = 0;
6432 }
6433
6434 static rtx
sh_builtin_saveregs(void)6435 sh_builtin_saveregs (void)
6436 {
6437 /* First unnamed integer register. */
6438 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6439 /* Number of integer registers we need to save. */
6440 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6441 /* First unnamed SFmode float reg */
6442 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6443 /* Number of SFmode float regs to save. */
6444 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6445 rtx regbuf, fpregs;
6446 int bufsize, regno;
6447 HOST_WIDE_INT alias_set;
6448
6449 if (TARGET_SH5)
6450 {
6451 if (n_intregs)
6452 {
6453 int pushregs = n_intregs;
6454
6455 while (pushregs < NPARM_REGS (SImode) - 1
6456 && (CALL_COOKIE_INT_REG_GET
6457 (current_function_args_info.call_cookie,
6458 NPARM_REGS (SImode) - pushregs)
6459 == 1))
6460 {
6461 current_function_args_info.call_cookie
6462 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6463 - pushregs, 1);
6464 pushregs++;
6465 }
6466
6467 if (pushregs == NPARM_REGS (SImode))
6468 current_function_args_info.call_cookie
6469 |= (CALL_COOKIE_INT_REG (0, 1)
6470 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6471 else
6472 current_function_args_info.call_cookie
6473 |= CALL_COOKIE_STACKSEQ (pushregs);
6474
6475 current_function_pretend_args_size += 8 * n_intregs;
6476 }
6477 if (TARGET_SHCOMPACT)
6478 return const0_rtx;
6479 }
6480
6481 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6482 {
6483 error ("__builtin_saveregs not supported by this subtarget");
6484 return const0_rtx;
6485 }
6486
6487 if (TARGET_SHMEDIA)
6488 n_floatregs = 0;
6489
6490 /* Allocate block of memory for the regs. */
6491 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6492 Or can assign_stack_local accept a 0 SIZE argument? */
6493 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6494
6495 if (TARGET_SHMEDIA)
6496 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6497 else if (n_floatregs & 1)
6498 {
6499 rtx addr;
6500
6501 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6502 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6503 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6504 regbuf = change_address (regbuf, BLKmode, addr);
6505 }
6506 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6507 {
6508 rtx addr, mask;
6509
6510 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6511 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6512 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6513 emit_insn (gen_andsi3 (addr, addr, mask));
6514 regbuf = change_address (regbuf, BLKmode, addr);
6515 }
6516 else
6517 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6518 alias_set = get_varargs_alias_set ();
6519 set_mem_alias_set (regbuf, alias_set);
6520
6521 /* Save int args.
6522 This is optimized to only save the regs that are necessary. Explicitly
6523 named args need not be saved. */
6524 if (n_intregs > 0)
6525 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6526 adjust_address (regbuf, BLKmode,
6527 n_floatregs * UNITS_PER_WORD),
6528 n_intregs);
6529
6530 if (TARGET_SHMEDIA)
6531 /* Return the address of the regbuf. */
6532 return XEXP (regbuf, 0);
6533
6534 /* Save float args.
6535 This is optimized to only save the regs that are necessary. Explicitly
6536 named args need not be saved.
6537 We explicitly build a pointer to the buffer because it halves the insn
6538 count when not optimizing (otherwise the pointer is built for each reg
6539 saved).
6540 We emit the moves in reverse order so that we can use predecrement. */
6541
6542 fpregs = copy_to_mode_reg (Pmode,
6543 plus_constant (XEXP (regbuf, 0),
6544 n_floatregs * UNITS_PER_WORD));
6545 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6546 {
6547 rtx mem;
6548 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6549 {
6550 emit_insn (gen_addsi3 (fpregs, fpregs,
6551 GEN_INT (-2 * UNITS_PER_WORD)));
6552 mem = change_address (regbuf, DFmode, fpregs);
6553 emit_move_insn (mem,
6554 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6555 }
6556 regno = first_floatreg;
6557 if (regno & 1)
6558 {
6559 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6560 mem = change_address (regbuf, SFmode, fpregs);
6561 emit_move_insn (mem,
6562 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6563 - (TARGET_LITTLE_ENDIAN != 0)));
6564 }
6565 }
6566 else
6567 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6568 {
6569 rtx mem;
6570
6571 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6572 mem = change_address (regbuf, SFmode, fpregs);
6573 emit_move_insn (mem,
6574 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6575 }
6576
6577 /* Return the address of the regbuf. */
6578 return XEXP (regbuf, 0);
6579 }
6580
6581 /* Define the `__builtin_va_list' type for the ABI. */
6582
6583 static tree
sh_build_builtin_va_list(void)6584 sh_build_builtin_va_list (void)
6585 {
6586 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6587 tree record;
6588
6589 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6590 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6591 return ptr_type_node;
6592
6593 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6594
6595 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6596 ptr_type_node);
6597 f_next_o_limit = build_decl (FIELD_DECL,
6598 get_identifier ("__va_next_o_limit"),
6599 ptr_type_node);
6600 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6601 ptr_type_node);
6602 f_next_fp_limit = build_decl (FIELD_DECL,
6603 get_identifier ("__va_next_fp_limit"),
6604 ptr_type_node);
6605 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6606 ptr_type_node);
6607
6608 DECL_FIELD_CONTEXT (f_next_o) = record;
6609 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6610 DECL_FIELD_CONTEXT (f_next_fp) = record;
6611 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6612 DECL_FIELD_CONTEXT (f_next_stack) = record;
6613
6614 TYPE_FIELDS (record) = f_next_o;
6615 TREE_CHAIN (f_next_o) = f_next_o_limit;
6616 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6617 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6618 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6619
6620 layout_type (record);
6621
6622 return record;
6623 }
6624
6625 /* Implement `va_start' for varargs and stdarg. */
6626
6627 void
sh_va_start(tree valist,rtx nextarg)6628 sh_va_start (tree valist, rtx nextarg)
6629 {
6630 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6631 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6632 tree t, u;
6633 int nfp, nint;
6634
6635 if (TARGET_SH5)
6636 {
6637 expand_builtin_saveregs ();
6638 std_expand_builtin_va_start (valist, nextarg);
6639 return;
6640 }
6641
6642 if ((! TARGET_SH2E && ! TARGET_SH4)
6643 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6644 {
6645 std_expand_builtin_va_start (valist, nextarg);
6646 return;
6647 }
6648
6649 f_next_o = TYPE_FIELDS (va_list_type_node);
6650 f_next_o_limit = TREE_CHAIN (f_next_o);
6651 f_next_fp = TREE_CHAIN (f_next_o_limit);
6652 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6653 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6654
6655 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6656 NULL_TREE);
6657 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6658 valist, f_next_o_limit, NULL_TREE);
6659 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6660 NULL_TREE);
6661 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6662 valist, f_next_fp_limit, NULL_TREE);
6663 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6664 valist, f_next_stack, NULL_TREE);
6665
6666 /* Call __builtin_saveregs. */
6667 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6668 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6669 TREE_SIDE_EFFECTS (t) = 1;
6670 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6671
6672 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6673 if (nfp < 8)
6674 nfp = 8 - nfp;
6675 else
6676 nfp = 0;
6677 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6678 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6679 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6680 TREE_SIDE_EFFECTS (t) = 1;
6681 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6682
6683 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6684 TREE_SIDE_EFFECTS (t) = 1;
6685 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6686
6687 nint = current_function_args_info.arg_count[SH_ARG_INT];
6688 if (nint < 4)
6689 nint = 4 - nint;
6690 else
6691 nint = 0;
6692 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6693 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6694 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6695 TREE_SIDE_EFFECTS (t) = 1;
6696 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6697
6698 u = make_tree (ptr_type_node, nextarg);
6699 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6700 TREE_SIDE_EFFECTS (t) = 1;
6701 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6702 }
6703
6704 /* TYPE is a RECORD_TYPE. If there is only a single non-zero-sized
6705 member, return it. */
6706 static tree
find_sole_member(tree type)6707 find_sole_member (tree type)
6708 {
6709 tree field, member = NULL_TREE;
6710
6711 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6712 {
6713 if (TREE_CODE (field) != FIELD_DECL)
6714 continue;
6715 if (!DECL_SIZE (field))
6716 return NULL_TREE;
6717 if (integer_zerop (DECL_SIZE (field)))
6718 continue;
6719 if (member)
6720 return NULL_TREE;
6721 member = field;
6722 }
6723 return member;
6724 }
6725 /* Implement `va_arg'. */
6726
6727 static tree
sh_gimplify_va_arg_expr(tree valist,tree type,tree * pre_p,tree * post_p ATTRIBUTE_UNUSED)6728 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6729 tree *post_p ATTRIBUTE_UNUSED)
6730 {
6731 HOST_WIDE_INT size, rsize;
6732 tree tmp, pptr_type_node;
6733 tree addr, lab_over = NULL, result = NULL;
6734 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6735 tree eff_type;
6736
6737 if (pass_by_ref)
6738 type = build_pointer_type (type);
6739
6740 size = int_size_in_bytes (type);
6741 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6742 pptr_type_node = build_pointer_type (ptr_type_node);
6743
6744 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6745 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6746 {
6747 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6748 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6749 int pass_as_float;
6750 tree lab_false;
6751 tree member;
6752
6753 f_next_o = TYPE_FIELDS (va_list_type_node);
6754 f_next_o_limit = TREE_CHAIN (f_next_o);
6755 f_next_fp = TREE_CHAIN (f_next_o_limit);
6756 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6757 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6758
6759 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6760 NULL_TREE);
6761 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6762 valist, f_next_o_limit, NULL_TREE);
6763 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6764 valist, f_next_fp, NULL_TREE);
6765 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6766 valist, f_next_fp_limit, NULL_TREE);
6767 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6768 valist, f_next_stack, NULL_TREE);
6769
6770 /* Structures with a single member with a distinct mode are passed
6771 like their member. This is relevant if the latter has a REAL_TYPE
6772 or COMPLEX_TYPE type. */
6773 eff_type = type;
6774 while (TREE_CODE (eff_type) == RECORD_TYPE
6775 && (member = find_sole_member (eff_type))
6776 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
6777 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
6778 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
6779 {
6780 tree field_type = TREE_TYPE (member);
6781
6782 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
6783 eff_type = field_type;
6784 else
6785 {
6786 gcc_assert ((TYPE_ALIGN (eff_type)
6787 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
6788 || (TYPE_ALIGN (eff_type)
6789 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
6790 break;
6791 }
6792 }
6793
6794 if (TARGET_SH4)
6795 {
6796 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
6797 || (TREE_CODE (eff_type) == COMPLEX_TYPE
6798 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
6799 && size <= 16));
6800 }
6801 else
6802 {
6803 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
6804 }
6805
6806 addr = create_tmp_var (pptr_type_node, NULL);
6807 lab_false = create_artificial_label ();
6808 lab_over = create_artificial_label ();
6809
6810 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6811
6812 if (pass_as_float)
6813 {
6814 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
6815 tree cmp;
6816 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
6817
6818 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6819 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6820 gimplify_and_add (tmp, pre_p);
6821
6822 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6823 gimplify_and_add (tmp, pre_p);
6824 tmp = next_fp_limit;
6825 if (size > 4 && !is_double)
6826 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
6827 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
6828 tmp = build (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
6829 cmp = build (COND_EXPR, void_type_node, tmp,
6830 build (GOTO_EXPR, void_type_node, lab_false),
6831 NULL);
6832 if (!is_double)
6833 gimplify_and_add (cmp, pre_p);
6834
6835 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
6836 || (is_double || size == 16))
6837 {
6838 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6839 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
6840 tmp = build (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
6841 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp_tmp, tmp);
6842 gimplify_and_add (tmp, pre_p);
6843 }
6844 if (is_double)
6845 gimplify_and_add (cmp, pre_p);
6846
6847 #ifdef FUNCTION_ARG_SCmode_WART
6848 if (TYPE_MODE (eff_type) == SCmode
6849 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6850 {
6851 tree subtype = TREE_TYPE (eff_type);
6852 tree real, imag;
6853
6854 imag
6855 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6856 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6857
6858 real
6859 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6860 real = get_initialized_tmp_var (real, pre_p, NULL);
6861
6862 result = build (COMPLEX_EXPR, type, real, imag);
6863 result = get_initialized_tmp_var (result, pre_p, NULL);
6864 }
6865 #endif /* FUNCTION_ARG_SCmode_WART */
6866
6867 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6868 gimplify_and_add (tmp, pre_p);
6869
6870 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6871 gimplify_and_add (tmp, pre_p);
6872
6873 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6874 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6875 gimplify_and_add (tmp, pre_p);
6876 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6877 gimplify_and_add (tmp, pre_p);
6878
6879 tmp = build2 (MODIFY_EXPR, ptr_type_node, valist, next_fp_tmp);
6880 gimplify_and_add (tmp, post_p);
6881 valist = next_fp_tmp;
6882 }
6883 else
6884 {
6885 tmp = fold_convert (ptr_type_node, size_int (rsize));
6886 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6887 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6888 tmp = build (COND_EXPR, void_type_node, tmp,
6889 build (GOTO_EXPR, void_type_node, lab_false),
6890 NULL);
6891 gimplify_and_add (tmp, pre_p);
6892
6893 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6894 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6895 gimplify_and_add (tmp, pre_p);
6896
6897 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6898 gimplify_and_add (tmp, pre_p);
6899
6900 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6901 gimplify_and_add (tmp, pre_p);
6902
6903 if (size > 4 && ! TARGET_SH4)
6904 {
6905 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6906 gimplify_and_add (tmp, pre_p);
6907 }
6908
6909 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6910 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6911 gimplify_and_add (tmp, pre_p);
6912 }
6913
6914 if (!result)
6915 {
6916 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6917 gimplify_and_add (tmp, pre_p);
6918 }
6919 }
6920
6921 /* ??? In va-sh.h, there had been code to make values larger than
6922 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6923
6924 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6925 if (result)
6926 {
6927 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6928 gimplify_and_add (tmp, pre_p);
6929
6930 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6931 gimplify_and_add (tmp, pre_p);
6932 }
6933 else
6934 result = tmp;
6935
6936 if (pass_by_ref)
6937 result = build_va_arg_indirect_ref (result);
6938
6939 return result;
6940 }
6941
6942 bool
sh_promote_prototypes(tree type)6943 sh_promote_prototypes (tree type)
6944 {
6945 if (TARGET_HITACHI)
6946 return 0;
6947 if (! type)
6948 return 1;
6949 return ! sh_attr_renesas_p (type);
6950 }
6951
6952 /* Whether an argument must be passed by reference. On SHcompact, we
6953 pretend arguments wider than 32-bits that would have been passed in
6954 registers are passed by reference, so that an SHmedia trampoline
6955 loads them into the full 64-bits registers. */
6956
6957 static int
shcompact_byref(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,bool named)6958 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6959 tree type, bool named)
6960 {
6961 unsigned HOST_WIDE_INT size;
6962
6963 if (type)
6964 size = int_size_in_bytes (type);
6965 else
6966 size = GET_MODE_SIZE (mode);
6967
6968 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6969 && (!named
6970 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6971 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6972 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6973 && size > 4
6974 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6975 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6976 return size;
6977 else
6978 return 0;
6979 }
6980
6981 static bool
sh_pass_by_reference(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,bool named)6982 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6983 tree type, bool named)
6984 {
6985 if (targetm.calls.must_pass_in_stack (mode, type))
6986 return true;
6987
6988 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6989 wants to know about pass-by-reference semantics for incoming
6990 arguments. */
6991 if (! cum)
6992 return false;
6993
6994 if (TARGET_SHCOMPACT)
6995 {
6996 cum->byref = shcompact_byref (cum, mode, type, named);
6997 return cum->byref != 0;
6998 }
6999
7000 return false;
7001 }
7002
7003 static bool
sh_callee_copies(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,bool named ATTRIBUTE_UNUSED)7004 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7005 tree type, bool named ATTRIBUTE_UNUSED)
7006 {
7007 /* ??? How can it possibly be correct to return true only on the
7008 caller side of the equation? Is there someplace else in the
7009 sh backend that's magically producing the copies? */
7010 return (cum->outgoing
7011 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7012 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7013 }
7014
7015 static int
sh_arg_partial_bytes(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,bool named ATTRIBUTE_UNUSED)7016 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7017 tree type, bool named ATTRIBUTE_UNUSED)
7018 {
7019 int words = 0;
7020
7021 if (!TARGET_SH5
7022 && PASS_IN_REG_P (*cum, mode, type)
7023 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7024 && (ROUND_REG (*cum, mode)
7025 + (mode != BLKmode
7026 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7027 : ROUND_ADVANCE (int_size_in_bytes (type)))
7028 > NPARM_REGS (mode)))
7029 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7030
7031 else if (!TARGET_SHCOMPACT
7032 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7033 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7034
7035 return words * UNITS_PER_WORD;
7036 }
7037
7038
7039 /* Define where to put the arguments to a function.
7040 Value is zero to push the argument on the stack,
7041 or a hard register in which to store the argument.
7042
7043 MODE is the argument's machine mode.
7044 TYPE is the data type of the argument (as a tree).
7045 This is null for libcalls where that information may
7046 not be available.
7047 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7048 the preceding args and about the function being called.
7049 NAMED is nonzero if this argument is a named parameter
7050 (otherwise it is an extra parameter matching an ellipsis).
7051
7052 On SH the first args are normally in registers
7053 and the rest are pushed. Any arg that starts within the first
7054 NPARM_REGS words is at least partially passed in a register unless
7055 its data type forbids. */
7056
7057
7058 rtx
sh_function_arg(CUMULATIVE_ARGS * ca,enum machine_mode mode,tree type,int named)7059 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7060 tree type, int named)
7061 {
7062 if (! TARGET_SH5 && mode == VOIDmode)
7063 return GEN_INT (ca->renesas_abi ? 1 : 0);
7064
7065 if (! TARGET_SH5
7066 && PASS_IN_REG_P (*ca, mode, type)
7067 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7068 {
7069 int regno;
7070
7071 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7072 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7073 {
7074 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7075 gen_rtx_REG (SFmode,
7076 BASE_ARG_REG (mode)
7077 + (ROUND_REG (*ca, mode) ^ 1)),
7078 const0_rtx);
7079 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7080 gen_rtx_REG (SFmode,
7081 BASE_ARG_REG (mode)
7082 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7083 GEN_INT (4));
7084 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7085 }
7086
7087 /* If the alignment of a DF value causes an SF register to be
7088 skipped, we will use that skipped register for the next SF
7089 value. */
7090 if ((TARGET_HITACHI || ca->renesas_abi)
7091 && ca->free_single_fp_reg
7092 && mode == SFmode)
7093 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7094
7095 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7096 ^ (mode == SFmode && TARGET_SH4
7097 && TARGET_LITTLE_ENDIAN != 0
7098 && ! TARGET_HITACHI && ! ca->renesas_abi);
7099 return gen_rtx_REG (mode, regno);
7100
7101 }
7102
7103 if (TARGET_SH5)
7104 {
7105 if (mode == VOIDmode && TARGET_SHCOMPACT)
7106 return GEN_INT (ca->call_cookie);
7107
7108 /* The following test assumes unnamed arguments are promoted to
7109 DFmode. */
7110 if (mode == SFmode && ca->free_single_fp_reg)
7111 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7112
7113 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7114 && (named || ! ca->prototype_p)
7115 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7116 {
7117 if (! ca->prototype_p && TARGET_SHMEDIA)
7118 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7119
7120 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7121 FIRST_FP_PARM_REG
7122 + ca->arg_count[(int) SH_ARG_FLOAT]);
7123 }
7124
7125 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7126 && (! TARGET_SHCOMPACT
7127 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7128 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7129 type, named))))
7130 {
7131 return gen_rtx_REG (mode, (FIRST_PARM_REG
7132 + ca->arg_count[(int) SH_ARG_INT]));
7133 }
7134
7135 return 0;
7136 }
7137
7138 return 0;
7139 }
7140
7141 /* Update the data in CUM to advance over an argument
7142 of mode MODE and data type TYPE.
7143 (TYPE is null for libcalls where that information may not be
7144 available.) */
7145
7146 void
sh_function_arg_advance(CUMULATIVE_ARGS * ca,enum machine_mode mode,tree type,int named)7147 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7148 tree type, int named)
7149 {
7150 if (ca->force_mem)
7151 ca->force_mem = 0;
7152 else if (TARGET_SH5)
7153 {
7154 tree type2 = (ca->byref && type
7155 ? TREE_TYPE (type)
7156 : type);
7157 enum machine_mode mode2 = (ca->byref && type
7158 ? TYPE_MODE (type2)
7159 : mode);
7160 int dwords = ((ca->byref
7161 ? ca->byref
7162 : mode2 == BLKmode
7163 ? int_size_in_bytes (type2)
7164 : GET_MODE_SIZE (mode2)) + 7) / 8;
7165 int numregs = MIN (dwords, NPARM_REGS (SImode)
7166 - ca->arg_count[(int) SH_ARG_INT]);
7167
7168 if (numregs)
7169 {
7170 ca->arg_count[(int) SH_ARG_INT] += numregs;
7171 if (TARGET_SHCOMPACT
7172 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7173 {
7174 ca->call_cookie
7175 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7176 - numregs, 1);
7177 /* N.B. We want this also for outgoing. */
7178 ca->stack_regs += numregs;
7179 }
7180 else if (ca->byref)
7181 {
7182 if (! ca->outgoing)
7183 ca->stack_regs += numregs;
7184 ca->byref_regs += numregs;
7185 ca->byref = 0;
7186 do
7187 ca->call_cookie
7188 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7189 - numregs, 2);
7190 while (--numregs);
7191 ca->call_cookie
7192 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7193 - 1, 1);
7194 }
7195 else if (dwords > numregs)
7196 {
7197 int pushregs = numregs;
7198
7199 if (TARGET_SHCOMPACT)
7200 ca->stack_regs += numregs;
7201 while (pushregs < NPARM_REGS (SImode) - 1
7202 && (CALL_COOKIE_INT_REG_GET
7203 (ca->call_cookie,
7204 NPARM_REGS (SImode) - pushregs)
7205 == 1))
7206 {
7207 ca->call_cookie
7208 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7209 - pushregs, 1);
7210 pushregs++;
7211 }
7212 if (numregs == NPARM_REGS (SImode))
7213 ca->call_cookie
7214 |= CALL_COOKIE_INT_REG (0, 1)
7215 | CALL_COOKIE_STACKSEQ (numregs - 1);
7216 else
7217 ca->call_cookie
7218 |= CALL_COOKIE_STACKSEQ (numregs);
7219 }
7220 }
7221 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7222 && (named || ! ca->prototype_p))
7223 {
7224 if (mode2 == SFmode && ca->free_single_fp_reg)
7225 ca->free_single_fp_reg = 0;
7226 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7227 < NPARM_REGS (SFmode))
7228 {
7229 int numfpregs
7230 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7231 NPARM_REGS (SFmode)
7232 - ca->arg_count[(int) SH_ARG_FLOAT]);
7233
7234 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7235
7236 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7237 {
7238 if (ca->outgoing && numregs > 0)
7239 do
7240 {
7241 ca->call_cookie
7242 |= (CALL_COOKIE_INT_REG
7243 (ca->arg_count[(int) SH_ARG_INT]
7244 - numregs + ((numfpregs - 2) / 2),
7245 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7246 - numfpregs) / 2));
7247 }
7248 while (numfpregs -= 2);
7249 }
7250 else if (mode2 == SFmode && (named)
7251 && (ca->arg_count[(int) SH_ARG_FLOAT]
7252 < NPARM_REGS (SFmode)))
7253 ca->free_single_fp_reg
7254 = FIRST_FP_PARM_REG - numfpregs
7255 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7256 }
7257 }
7258 return;
7259 }
7260
7261 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7262 {
7263 /* Note that we've used the skipped register. */
7264 if (mode == SFmode && ca->free_single_fp_reg)
7265 {
7266 ca->free_single_fp_reg = 0;
7267 return;
7268 }
7269 /* When we have a DF after an SF, there's an SF register that get
7270 skipped in order to align the DF value. We note this skipped
7271 register, because the next SF value will use it, and not the
7272 SF that follows the DF. */
7273 if (mode == DFmode
7274 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7275 {
7276 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7277 + BASE_ARG_REG (mode));
7278 }
7279 }
7280
7281 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7282 || PASS_IN_REG_P (*ca, mode, type))
7283 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7284 = (ROUND_REG (*ca, mode)
7285 + (mode == BLKmode
7286 ? ROUND_ADVANCE (int_size_in_bytes (type))
7287 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7288 }
7289
7290 /* The Renesas calling convention doesn't quite fit into this scheme since
7291 the address is passed like an invisible argument, but one that is always
7292 passed in memory. */
7293 static rtx
sh_struct_value_rtx(tree fndecl,int incoming ATTRIBUTE_UNUSED)7294 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7295 {
7296 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7297 return 0;
7298 return gen_rtx_REG (Pmode, 2);
7299 }
7300
7301 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7302
7303 static bool
sh_return_in_memory(tree type,tree fndecl)7304 sh_return_in_memory (tree type, tree fndecl)
7305 {
7306 if (TARGET_SH5)
7307 {
7308 if (TYPE_MODE (type) == BLKmode)
7309 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7310 else
7311 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7312 }
7313 else
7314 {
7315 return (TYPE_MODE (type) == BLKmode
7316 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7317 && TREE_CODE (type) == RECORD_TYPE));
7318 }
7319 }
7320
7321 /* We actually emit the code in sh_expand_prologue. We used to use
7322 a static variable to flag that we need to emit this code, but that
7323 doesn't when inlining, when functions are deferred and then emitted
7324 later. Fortunately, we already have two flags that are part of struct
7325 function that tell if a function uses varargs or stdarg. */
7326 static void
sh_setup_incoming_varargs(CUMULATIVE_ARGS * ca,enum machine_mode mode,tree type,int * pretend_arg_size,int second_time ATTRIBUTE_UNUSED)7327 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7328 enum machine_mode mode,
7329 tree type,
7330 int *pretend_arg_size,
7331 int second_time ATTRIBUTE_UNUSED)
7332 {
7333 gcc_assert (current_function_stdarg);
7334 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7335 {
7336 int named_parm_regs, anon_parm_regs;
7337
7338 named_parm_regs = (ROUND_REG (*ca, mode)
7339 + (mode == BLKmode
7340 ? ROUND_ADVANCE (int_size_in_bytes (type))
7341 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7342 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7343 if (anon_parm_regs > 0)
7344 *pretend_arg_size = anon_parm_regs * 4;
7345 }
7346 }
7347
7348 static bool
sh_strict_argument_naming(CUMULATIVE_ARGS * ca ATTRIBUTE_UNUSED)7349 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7350 {
7351 return TARGET_SH5;
7352 }
7353
7354 static bool
sh_pretend_outgoing_varargs_named(CUMULATIVE_ARGS * ca)7355 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7356 {
7357 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7358 }
7359
7360
7361 /* Define the offset between two registers, one to be eliminated, and
7362 the other its replacement, at the start of a routine. */
7363
7364 int
initial_elimination_offset(int from,int to)7365 initial_elimination_offset (int from, int to)
7366 {
7367 int regs_saved;
7368 int regs_saved_rounding = 0;
7369 int total_saved_regs_space;
7370 int total_auto_space;
7371 int save_flags = target_flags;
7372 int copy_flags;
7373 HARD_REG_SET live_regs_mask;
7374
7375 shmedia_space_reserved_for_target_registers = false;
7376 regs_saved = calc_live_regs (&live_regs_mask);
7377 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7378
7379 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7380 {
7381 shmedia_space_reserved_for_target_registers = true;
7382 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7383 }
7384
7385 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7386 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7387 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7388
7389 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7390 copy_flags = target_flags;
7391 target_flags = save_flags;
7392
7393 total_saved_regs_space = regs_saved + regs_saved_rounding;
7394
7395 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7396 return total_saved_regs_space + total_auto_space
7397 + current_function_args_info.byref_regs * 8;
7398
7399 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7400 return total_saved_regs_space + total_auto_space
7401 + current_function_args_info.byref_regs * 8;
7402
7403 /* Initial gap between fp and sp is 0. */
7404 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7405 return 0;
7406
7407 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7408 return rounded_frame_size (0);
7409
7410 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7411 return rounded_frame_size (0);
7412
7413 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7414 && (to == HARD_FRAME_POINTER_REGNUM
7415 || to == STACK_POINTER_REGNUM));
7416 if (TARGET_SH5)
7417 {
7418 int n = total_saved_regs_space;
7419 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7420 save_schedule schedule;
7421 save_entry *entry;
7422
7423 n += total_auto_space;
7424
7425 /* If it wasn't saved, there's not much we can do. */
7426 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7427 return n;
7428
7429 target_flags = copy_flags;
7430
7431 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7432 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7433 if (entry->reg == pr_reg)
7434 {
7435 target_flags = save_flags;
7436 return entry->offset;
7437 }
7438 gcc_unreachable ();
7439 }
7440 else
7441 return total_auto_space;
7442 }
7443
7444 /* Insert any deferred function attributes from earlier pragmas. */
7445 static void
sh_insert_attributes(tree node,tree * attributes)7446 sh_insert_attributes (tree node, tree *attributes)
7447 {
7448 tree attrs;
7449
7450 if (TREE_CODE (node) != FUNCTION_DECL)
7451 return;
7452
7453 /* We are only interested in fields. */
7454 if (!DECL_P (node))
7455 return;
7456
7457 /* Append the attributes to the deferred attributes. */
7458 *sh_deferred_function_attributes_tail = *attributes;
7459 attrs = sh_deferred_function_attributes;
7460 if (!attrs)
7461 return;
7462
7463 /* Some attributes imply or require the interrupt attribute. */
7464 if (!lookup_attribute ("interrupt_handler", attrs)
7465 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7466 {
7467 /* If we have a trapa_handler, but no interrupt_handler attribute,
7468 insert an interrupt_handler attribute. */
7469 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7470 /* We can't use sh_pr_interrupt here because that's not in the
7471 java frontend. */
7472 attrs
7473 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7474 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7475 interrupt attribute is missing, we ignore the attribute and warn. */
7476 else if (lookup_attribute ("sp_switch", attrs)
7477 || lookup_attribute ("trap_exit", attrs)
7478 || lookup_attribute ("nosave_low_regs", attrs))
7479 {
7480 tree *tail;
7481
7482 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7483 {
7484 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7485 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7486 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7487 warning (OPT_Wattributes,
7488 "%qs attribute only applies to interrupt functions",
7489 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7490 else
7491 {
7492 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7493 NULL_TREE);
7494 tail = &TREE_CHAIN (*tail);
7495 }
7496 }
7497 attrs = *attributes;
7498 }
7499 }
7500
7501 /* Install the processed list. */
7502 *attributes = attrs;
7503
7504 /* Clear deferred attributes. */
7505 sh_deferred_function_attributes = NULL_TREE;
7506 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7507
7508 return;
7509 }
7510
7511 /* Supported attributes:
7512
7513 interrupt_handler -- specifies this function is an interrupt handler.
7514
7515 trapa_handler - like above, but don't save all registers.
7516
7517 sp_switch -- specifies an alternate stack for an interrupt handler
7518 to run on.
7519
7520 trap_exit -- use a trapa to exit an interrupt function instead of
7521 an rte instruction.
7522
7523 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7524 This is useful on the SH3 and upwards,
7525 which has a separate set of low regs for User and Supervisor modes.
7526 This should only be used for the lowest level of interrupts. Higher levels
7527 of interrupts must save the registers in case they themselves are
7528 interrupted.
7529
7530 renesas -- use Renesas calling/layout conventions (functions and
7531 structures).
7532
7533 */
7534
7535 const struct attribute_spec sh_attribute_table[] =
7536 {
7537 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7538 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7539 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7540 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7541 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7542 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7543 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7544 #ifdef SYMBIAN
7545 /* Symbian support adds three new attributes:
7546 dllexport - for exporting a function/variable that will live in a dll
7547 dllimport - for importing a function/variable from a dll
7548
7549 Microsoft allows multiple declspecs in one __declspec, separating
7550 them with spaces. We do NOT support this. Instead, use __declspec
7551 multiple times. */
7552 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7553 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7554 #endif
7555 { NULL, 0, 0, false, false, false, NULL }
7556 };
7557
7558 /* Handle an "interrupt_handler" attribute; arguments as in
7559 struct attribute_spec.handler. */
7560 static tree
sh_handle_interrupt_handler_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7561 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7562 tree args ATTRIBUTE_UNUSED,
7563 int flags ATTRIBUTE_UNUSED,
7564 bool *no_add_attrs)
7565 {
7566 if (TREE_CODE (*node) != FUNCTION_DECL)
7567 {
7568 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7569 IDENTIFIER_POINTER (name));
7570 *no_add_attrs = true;
7571 }
7572 else if (TARGET_SHCOMPACT)
7573 {
7574 error ("attribute interrupt_handler is not compatible with -m5-compact");
7575 *no_add_attrs = true;
7576 }
7577
7578 return NULL_TREE;
7579 }
7580
7581 /* Handle an "sp_switch" attribute; arguments as in
7582 struct attribute_spec.handler. */
7583 static tree
sh_handle_sp_switch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7584 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7585 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7586 {
7587 if (TREE_CODE (*node) != FUNCTION_DECL)
7588 {
7589 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7590 IDENTIFIER_POINTER (name));
7591 *no_add_attrs = true;
7592 }
7593 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7594 {
7595 /* The argument must be a constant string. */
7596 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7597 IDENTIFIER_POINTER (name));
7598 *no_add_attrs = true;
7599 }
7600
7601 return NULL_TREE;
7602 }
7603
7604 /* Handle an "trap_exit" attribute; arguments as in
7605 struct attribute_spec.handler. */
7606 static tree
sh_handle_trap_exit_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7607 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7608 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7609 {
7610 if (TREE_CODE (*node) != FUNCTION_DECL)
7611 {
7612 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7613 IDENTIFIER_POINTER (name));
7614 *no_add_attrs = true;
7615 }
7616 /* The argument specifies a trap number to be used in a trapa instruction
7617 at function exit (instead of an rte instruction). */
7618 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7619 {
7620 /* The argument must be a constant integer. */
7621 warning (OPT_Wattributes, "%qs attribute argument not an "
7622 "integer constant", IDENTIFIER_POINTER (name));
7623 *no_add_attrs = true;
7624 }
7625
7626 return NULL_TREE;
7627 }
7628
7629 static tree
sh_handle_renesas_attribute(tree * node ATTRIBUTE_UNUSED,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs ATTRIBUTE_UNUSED)7630 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7631 tree name ATTRIBUTE_UNUSED,
7632 tree args ATTRIBUTE_UNUSED,
7633 int flags ATTRIBUTE_UNUSED,
7634 bool *no_add_attrs ATTRIBUTE_UNUSED)
7635 {
7636 return NULL_TREE;
7637 }
7638
7639 /* True if __attribute__((renesas)) or -mrenesas. */
7640 int
sh_attr_renesas_p(tree td)7641 sh_attr_renesas_p (tree td)
7642 {
7643 if (TARGET_HITACHI)
7644 return 1;
7645 if (td == 0)
7646 return 0;
7647 if (DECL_P (td))
7648 td = TREE_TYPE (td);
7649 if (td == error_mark_node)
7650 return 0;
7651 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7652 != NULL_TREE);
7653 }
7654
7655 /* True if __attribute__((renesas)) or -mrenesas, for the current
7656 function. */
7657 int
sh_cfun_attr_renesas_p(void)7658 sh_cfun_attr_renesas_p (void)
7659 {
7660 return sh_attr_renesas_p (current_function_decl);
7661 }
7662
7663 int
sh_cfun_interrupt_handler_p(void)7664 sh_cfun_interrupt_handler_p (void)
7665 {
7666 return (lookup_attribute ("interrupt_handler",
7667 DECL_ATTRIBUTES (current_function_decl))
7668 != NULL_TREE);
7669 }
7670
7671 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7672
7673 static const char *
sh_check_pch_target_flags(int old_flags)7674 sh_check_pch_target_flags (int old_flags)
7675 {
7676 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7677 | MASK_SH_E | MASK_HARD_SH4
7678 | MASK_FPU_SINGLE | MASK_SH4))
7679 return _("created and used with different architectures / ABIs");
7680 if ((old_flags ^ target_flags) & MASK_HITACHI)
7681 return _("created and used with different ABIs");
7682 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7683 return _("created and used with different endianness");
7684 return NULL;
7685 }
7686
7687 /* Predicates used by the templates. */
7688
7689 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7690 Used only in general_movsrc_operand. */
7691
7692 int
system_reg_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)7693 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7694 {
7695 switch (REGNO (op))
7696 {
7697 case PR_REG:
7698 case MACL_REG:
7699 case MACH_REG:
7700 return 1;
7701 }
7702 return 0;
7703 }
7704
7705 /* Nonzero if OP is a floating point value with value 0.0. */
7706
7707 int
fp_zero_operand(rtx op)7708 fp_zero_operand (rtx op)
7709 {
7710 REAL_VALUE_TYPE r;
7711
7712 if (GET_MODE (op) != SFmode)
7713 return 0;
7714
7715 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7716 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7717 }
7718
7719 /* Nonzero if OP is a floating point value with value 1.0. */
7720
7721 int
fp_one_operand(rtx op)7722 fp_one_operand (rtx op)
7723 {
7724 REAL_VALUE_TYPE r;
7725
7726 if (GET_MODE (op) != SFmode)
7727 return 0;
7728
7729 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7730 return REAL_VALUES_EQUAL (r, dconst1);
7731 }
7732
7733 /* For -m4 and -m4-single-only, mode switching is used. If we are
7734 compiling without -mfmovd, movsf_ie isn't taken into account for
7735 mode switching. We could check in machine_dependent_reorg for
7736 cases where we know we are in single precision mode, but there is
7737 interface to find that out during reload, so we must avoid
7738 choosing an fldi alternative during reload and thus failing to
7739 allocate a scratch register for the constant loading. */
7740 int
fldi_ok(void)7741 fldi_ok (void)
7742 {
7743 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7744 }
7745
7746 int
tertiary_reload_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)7747 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7748 {
7749 enum rtx_code code = GET_CODE (op);
7750 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7751 }
7752
7753 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7754 int
tls_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)7755 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7756 {
7757 if (GET_CODE (op) != SYMBOL_REF)
7758 return 0;
7759 return SYMBOL_REF_TLS_MODEL (op);
7760 }
7761
7762 /* Return the destination address of a branch. */
7763
7764 static int
branch_dest(rtx branch)7765 branch_dest (rtx branch)
7766 {
7767 rtx dest = SET_SRC (PATTERN (branch));
7768 int dest_uid;
7769
7770 if (GET_CODE (dest) == IF_THEN_ELSE)
7771 dest = XEXP (dest, 1);
7772 dest = XEXP (dest, 0);
7773 dest_uid = INSN_UID (dest);
7774 return INSN_ADDRESSES (dest_uid);
7775 }
7776
7777 /* Return nonzero if REG is not used after INSN.
7778 We assume REG is a reload reg, and therefore does
7779 not live past labels. It may live past calls or jumps though. */
7780 int
reg_unused_after(rtx reg,rtx insn)7781 reg_unused_after (rtx reg, rtx insn)
7782 {
7783 enum rtx_code code;
7784 rtx set;
7785
7786 /* If the reg is set by this instruction, then it is safe for our
7787 case. Disregard the case where this is a store to memory, since
7788 we are checking a register used in the store address. */
7789 set = single_set (insn);
7790 if (set && GET_CODE (SET_DEST (set)) != MEM
7791 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7792 return 1;
7793
7794 while ((insn = NEXT_INSN (insn)))
7795 {
7796 rtx set;
7797 if (!INSN_P (insn))
7798 continue;
7799
7800 code = GET_CODE (insn);
7801
7802 #if 0
7803 /* If this is a label that existed before reload, then the register
7804 if dead here. However, if this is a label added by reorg, then
7805 the register may still be live here. We can't tell the difference,
7806 so we just ignore labels completely. */
7807 if (code == CODE_LABEL)
7808 return 1;
7809 /* else */
7810 #endif
7811
7812 if (code == JUMP_INSN)
7813 return 0;
7814
7815 /* If this is a sequence, we must handle them all at once.
7816 We could have for instance a call that sets the target register,
7817 and an insn in a delay slot that uses the register. In this case,
7818 we must return 0. */
7819 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7820 {
7821 int i;
7822 int retval = 0;
7823
7824 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7825 {
7826 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7827 rtx set = single_set (this_insn);
7828
7829 if (GET_CODE (this_insn) == CALL_INSN)
7830 code = CALL_INSN;
7831 else if (GET_CODE (this_insn) == JUMP_INSN)
7832 {
7833 if (INSN_ANNULLED_BRANCH_P (this_insn))
7834 return 0;
7835 code = JUMP_INSN;
7836 }
7837
7838 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7839 return 0;
7840 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7841 {
7842 if (GET_CODE (SET_DEST (set)) != MEM)
7843 retval = 1;
7844 else
7845 return 0;
7846 }
7847 if (set == 0
7848 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7849 return 0;
7850 }
7851 if (retval == 1)
7852 return 1;
7853 else if (code == JUMP_INSN)
7854 return 0;
7855 }
7856
7857 set = single_set (insn);
7858 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7859 return 0;
7860 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7861 return GET_CODE (SET_DEST (set)) != MEM;
7862 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7863 return 0;
7864
7865 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
7866 return 1;
7867 }
7868 return 1;
7869 }
7870
7871 #include "ggc.h"
7872
7873 static GTY(()) rtx fpscr_rtx;
7874 rtx
get_fpscr_rtx(void)7875 get_fpscr_rtx (void)
7876 {
7877 if (! fpscr_rtx)
7878 {
7879 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7880 REG_USERVAR_P (fpscr_rtx) = 1;
7881 mark_user_reg (fpscr_rtx);
7882 }
7883 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7884 mark_user_reg (fpscr_rtx);
7885 return fpscr_rtx;
7886 }
7887
7888 static GTY(()) tree fpscr_values;
7889
7890 static void
emit_fpu_switch(rtx scratch,int index)7891 emit_fpu_switch (rtx scratch, int index)
7892 {
7893 rtx dst, src;
7894
7895 if (fpscr_values == NULL)
7896 {
7897 tree t;
7898
7899 t = build_index_type (integer_one_node);
7900 t = build_array_type (integer_type_node, t);
7901 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
7902 DECL_ARTIFICIAL (t) = 1;
7903 DECL_IGNORED_P (t) = 1;
7904 DECL_EXTERNAL (t) = 1;
7905 TREE_STATIC (t) = 1;
7906 TREE_PUBLIC (t) = 1;
7907 TREE_USED (t) = 1;
7908
7909 fpscr_values = t;
7910 }
7911
7912 src = DECL_RTL (fpscr_values);
7913 if (no_new_pseudos)
7914 {
7915 emit_move_insn (scratch, XEXP (src, 0));
7916 if (index != 0)
7917 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
7918 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
7919 }
7920 else
7921 src = adjust_address (src, PSImode, index * 4);
7922
7923 dst = get_fpscr_rtx ();
7924 emit_move_insn (dst, src);
7925 }
7926
7927 void
emit_sf_insn(rtx pat)7928 emit_sf_insn (rtx pat)
7929 {
7930 emit_insn (pat);
7931 }
7932
7933 void
emit_df_insn(rtx pat)7934 emit_df_insn (rtx pat)
7935 {
7936 emit_insn (pat);
7937 }
7938
7939 void
expand_sf_unop(rtx (* fun)(rtx,rtx,rtx),rtx * operands)7940 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7941 {
7942 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7943 }
7944
7945 void
expand_sf_binop(rtx (* fun)(rtx,rtx,rtx,rtx),rtx * operands)7946 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7947 {
7948 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7949 get_fpscr_rtx ()));
7950 }
7951
7952 void
expand_df_unop(rtx (* fun)(rtx,rtx,rtx),rtx * operands)7953 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7954 {
7955 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7956 }
7957
7958 void
expand_df_binop(rtx (* fun)(rtx,rtx,rtx,rtx),rtx * operands)7959 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7960 {
7961 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7962 get_fpscr_rtx ()));
7963 }
7964
7965 /* ??? gcc does flow analysis strictly after common subexpression
7966 elimination. As a result, common subexpression elimination fails
7967 when there are some intervening statements setting the same register.
7968 If we did nothing about this, this would hurt the precision switching
7969 for SH4 badly. There is some cse after reload, but it is unable to
7970 undo the extra register pressure from the unused instructions, and
7971 it cannot remove auto-increment loads.
7972
7973 A C code example that shows this flow/cse weakness for (at least) SH
7974 and sparc (as of gcc ss-970706) is this:
7975
7976 double
7977 f(double a)
7978 {
7979 double d;
7980 d = 0.1;
7981 a += d;
7982 d = 1.1;
7983 d = 0.1;
7984 a *= d;
7985 return a;
7986 }
7987
7988 So we add another pass before common subexpression elimination, to
7989 remove assignments that are dead due to a following assignment in the
7990 same basic block. */
7991
7992 static void
mark_use(rtx x,rtx * reg_set_block)7993 mark_use (rtx x, rtx *reg_set_block)
7994 {
7995 enum rtx_code code;
7996
7997 if (! x)
7998 return;
7999 code = GET_CODE (x);
8000 switch (code)
8001 {
8002 case REG:
8003 {
8004 int regno = REGNO (x);
8005 int nregs = (regno < FIRST_PSEUDO_REGISTER
8006 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8007 : 1);
8008 do
8009 {
8010 reg_set_block[regno + nregs - 1] = 0;
8011 }
8012 while (--nregs);
8013 break;
8014 }
8015 case SET:
8016 {
8017 rtx dest = SET_DEST (x);
8018
8019 if (GET_CODE (dest) == SUBREG)
8020 dest = SUBREG_REG (dest);
8021 if (GET_CODE (dest) != REG)
8022 mark_use (dest, reg_set_block);
8023 mark_use (SET_SRC (x), reg_set_block);
8024 break;
8025 }
8026 case CLOBBER:
8027 break;
8028 default:
8029 {
8030 const char *fmt = GET_RTX_FORMAT (code);
8031 int i, j;
8032 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8033 {
8034 if (fmt[i] == 'e')
8035 mark_use (XEXP (x, i), reg_set_block);
8036 else if (fmt[i] == 'E')
8037 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8038 mark_use (XVECEXP (x, i, j), reg_set_block);
8039 }
8040 break;
8041 }
8042 }
8043 }
8044
8045 static rtx get_free_reg (HARD_REG_SET);
8046
8047 /* This function returns a register to use to load the address to load
8048 the fpscr from. Currently it always returns r1 or r7, but when we are
8049 able to use pseudo registers after combine, or have a better mechanism
8050 for choosing a register, it should be done here. */
8051 /* REGS_LIVE is the liveness information for the point for which we
8052 need this allocation. In some bare-bones exit blocks, r1 is live at the
8053 start. We can even have all of r0..r3 being live:
8054 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8055 INSN before which new insns are placed with will clobber the register
8056 we return. If a basic block consists only of setting the return value
8057 register to a pseudo and using that register, the return value is not
8058 live before or after this block, yet we we'll insert our insns right in
8059 the middle. */
8060
8061 static rtx
get_free_reg(HARD_REG_SET regs_live)8062 get_free_reg (HARD_REG_SET regs_live)
8063 {
8064 if (! TEST_HARD_REG_BIT (regs_live, 1))
8065 return gen_rtx_REG (Pmode, 1);
8066
8067 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8068 there shouldn't be anything but a jump before the function end. */
8069 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8070 return gen_rtx_REG (Pmode, 7);
8071 }
8072
8073 /* This function will set the fpscr from memory.
8074 MODE is the mode we are setting it to. */
8075 void
fpscr_set_from_mem(int mode,HARD_REG_SET regs_live)8076 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8077 {
8078 enum attr_fp_mode fp_mode = mode;
8079 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8080 rtx addr_reg = get_free_reg (regs_live);
8081
8082 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8083 }
8084
8085 /* Is the given character a logical line separator for the assembler? */
8086 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8087 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8088 #endif
8089
8090 int
sh_insn_length_adjustment(rtx insn)8091 sh_insn_length_adjustment (rtx insn)
8092 {
8093 /* Instructions with unfilled delay slots take up an extra two bytes for
8094 the nop in the delay slot. */
8095 if (((GET_CODE (insn) == INSN
8096 && GET_CODE (PATTERN (insn)) != USE
8097 && GET_CODE (PATTERN (insn)) != CLOBBER)
8098 || GET_CODE (insn) == CALL_INSN
8099 || (GET_CODE (insn) == JUMP_INSN
8100 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8101 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8102 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8103 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8104 return 2;
8105
8106 /* SH2e has a bug that prevents the use of annulled branches, so if
8107 the delay slot is not filled, we'll have to put a NOP in it. */
8108 if (sh_cpu == CPU_SH2E
8109 && GET_CODE (insn) == JUMP_INSN
8110 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8111 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8112 && get_attr_type (insn) == TYPE_CBRANCH
8113 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8114 return 2;
8115
8116 /* sh-dsp parallel processing insn take four bytes instead of two. */
8117
8118 if (GET_CODE (insn) == INSN)
8119 {
8120 int sum = 0;
8121 rtx body = PATTERN (insn);
8122 const char *template;
8123 char c;
8124 int maybe_label = 1;
8125
8126 if (GET_CODE (body) == ASM_INPUT)
8127 template = XSTR (body, 0);
8128 else if (asm_noperands (body) >= 0)
8129 template
8130 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8131 else
8132 return 0;
8133 do
8134 {
8135 int ppi_adjust = 0;
8136
8137 do
8138 c = *template++;
8139 while (c == ' ' || c == '\t');
8140 /* all sh-dsp parallel-processing insns start with p.
8141 The only non-ppi sh insn starting with p is pref.
8142 The only ppi starting with pr is prnd. */
8143 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8144 ppi_adjust = 2;
8145 /* The repeat pseudo-insn expands two three insns, a total of
8146 six bytes in size. */
8147 else if ((c == 'r' || c == 'R')
8148 && ! strncasecmp ("epeat", template, 5))
8149 ppi_adjust = 4;
8150 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8151 {
8152 /* If this is a label, it is obviously not a ppi insn. */
8153 if (c == ':' && maybe_label)
8154 {
8155 ppi_adjust = 0;
8156 break;
8157 }
8158 else if (c == '\'' || c == '"')
8159 maybe_label = 0;
8160 c = *template++;
8161 }
8162 sum += ppi_adjust;
8163 maybe_label = c != ':';
8164 }
8165 while (c);
8166 return sum;
8167 }
8168 return 0;
8169 }
8170
8171 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8172 isn't protected by a PIC unspec. */
8173 int
nonpic_symbol_mentioned_p(rtx x)8174 nonpic_symbol_mentioned_p (rtx x)
8175 {
8176 register const char *fmt;
8177 register int i;
8178
8179 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8180 || GET_CODE (x) == PC)
8181 return 1;
8182
8183 /* We don't want to look into the possible MEM location of a
8184 CONST_DOUBLE, since we're not going to use it, in general. */
8185 if (GET_CODE (x) == CONST_DOUBLE)
8186 return 0;
8187
8188 if (GET_CODE (x) == UNSPEC
8189 && (XINT (x, 1) == UNSPEC_PIC
8190 || XINT (x, 1) == UNSPEC_GOT
8191 || XINT (x, 1) == UNSPEC_GOTOFF
8192 || XINT (x, 1) == UNSPEC_GOTPLT
8193 || XINT (x, 1) == UNSPEC_GOTTPOFF
8194 || XINT (x, 1) == UNSPEC_DTPOFF
8195 || XINT (x, 1) == UNSPEC_PLT))
8196 return 0;
8197
8198 fmt = GET_RTX_FORMAT (GET_CODE (x));
8199 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8200 {
8201 if (fmt[i] == 'E')
8202 {
8203 register int j;
8204
8205 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8206 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8207 return 1;
8208 }
8209 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8210 return 1;
8211 }
8212
8213 return 0;
8214 }
8215
8216 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8217 @GOTOFF in `reg'. */
8218 rtx
legitimize_pic_address(rtx orig,enum machine_mode mode ATTRIBUTE_UNUSED,rtx reg)8219 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8220 rtx reg)
8221 {
8222 if (tls_symbolic_operand (orig, Pmode))
8223 return orig;
8224
8225 if (GET_CODE (orig) == LABEL_REF
8226 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8227 {
8228 if (reg == 0)
8229 reg = gen_reg_rtx (Pmode);
8230
8231 emit_insn (gen_symGOTOFF2reg (reg, orig));
8232 return reg;
8233 }
8234 else if (GET_CODE (orig) == SYMBOL_REF)
8235 {
8236 if (reg == 0)
8237 reg = gen_reg_rtx (Pmode);
8238
8239 emit_insn (gen_symGOT2reg (reg, orig));
8240 return reg;
8241 }
8242 return orig;
8243 }
8244
8245 /* Mark the use of a constant in the literal table. If the constant
8246 has multiple labels, make it unique. */
8247 static rtx
mark_constant_pool_use(rtx x)8248 mark_constant_pool_use (rtx x)
8249 {
8250 rtx insn, lab, pattern;
8251
8252 if (x == NULL)
8253 return x;
8254
8255 switch (GET_CODE (x))
8256 {
8257 case LABEL_REF:
8258 x = XEXP (x, 0);
8259 case CODE_LABEL:
8260 break;
8261 default:
8262 return x;
8263 }
8264
8265 /* Get the first label in the list of labels for the same constant
8266 and delete another labels in the list. */
8267 lab = x;
8268 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8269 {
8270 if (GET_CODE (insn) != CODE_LABEL
8271 || LABEL_REFS (insn) != NEXT_INSN (insn))
8272 break;
8273 lab = insn;
8274 }
8275
8276 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8277 INSN_DELETED_P (insn) = 1;
8278
8279 /* Mark constants in a window. */
8280 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8281 {
8282 if (GET_CODE (insn) != INSN)
8283 continue;
8284
8285 pattern = PATTERN (insn);
8286 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8287 continue;
8288
8289 switch (XINT (pattern, 1))
8290 {
8291 case UNSPECV_CONST2:
8292 case UNSPECV_CONST4:
8293 case UNSPECV_CONST8:
8294 XVECEXP (pattern, 0, 1) = const1_rtx;
8295 break;
8296 case UNSPECV_WINDOW_END:
8297 if (XVECEXP (pattern, 0, 0) == x)
8298 return lab;
8299 break;
8300 case UNSPECV_CONST_END:
8301 return lab;
8302 default:
8303 break;
8304 }
8305 }
8306
8307 return lab;
8308 }
8309
8310 /* Return true if it's possible to redirect BRANCH1 to the destination
8311 of an unconditional jump BRANCH2. We only want to do this if the
8312 resulting branch will have a short displacement. */
8313 int
sh_can_redirect_branch(rtx branch1,rtx branch2)8314 sh_can_redirect_branch (rtx branch1, rtx branch2)
8315 {
8316 if (flag_expensive_optimizations && simplejump_p (branch2))
8317 {
8318 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8319 rtx insn;
8320 int distance;
8321
8322 for (distance = 0, insn = NEXT_INSN (branch1);
8323 insn && distance < 256;
8324 insn = PREV_INSN (insn))
8325 {
8326 if (insn == dest)
8327 return 1;
8328 else
8329 distance += get_attr_length (insn);
8330 }
8331 for (distance = 0, insn = NEXT_INSN (branch1);
8332 insn && distance < 256;
8333 insn = NEXT_INSN (insn))
8334 {
8335 if (insn == dest)
8336 return 1;
8337 else
8338 distance += get_attr_length (insn);
8339 }
8340 }
8341 return 0;
8342 }
8343
8344 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8345 int
sh_hard_regno_rename_ok(unsigned int old_reg ATTRIBUTE_UNUSED,unsigned int new_reg)8346 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8347 unsigned int new_reg)
8348 {
8349 /* Interrupt functions can only use registers that have already been
8350 saved by the prologue, even if they would normally be
8351 call-clobbered. */
8352
8353 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8354 return 0;
8355
8356 return 1;
8357 }
8358
8359 /* Function to update the integer COST
8360 based on the relationship between INSN that is dependent on
8361 DEP_INSN through the dependence LINK. The default is to make no
8362 adjustment to COST. This can be used for example to specify to
8363 the scheduler that an output- or anti-dependence does not incur
8364 the same cost as a data-dependence. The return value should be
8365 the new value for COST. */
8366 static int
sh_adjust_cost(rtx insn,rtx link ATTRIBUTE_UNUSED,rtx dep_insn,int cost)8367 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8368 {
8369 rtx reg, use_pat;
8370
8371 if (TARGET_SHMEDIA)
8372 {
8373 /* On SHmedia, if the dependence is an anti-dependence or
8374 output-dependence, there is no cost. */
8375 if (REG_NOTE_KIND (link) != 0)
8376 {
8377 /* However, dependencies between target register loads and
8378 uses of the register in a subsequent block that are separated
8379 by a conditional branch are not modelled - we have to do with
8380 the anti-dependency between the target register load and the
8381 conditional branch that ends the current block. */
8382 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8383 && GET_CODE (PATTERN (dep_insn)) == SET
8384 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8385 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8386 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8387 {
8388 int orig_cost = cost;
8389 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8390 rtx target = ((! note
8391 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8392 ? insn : JUMP_LABEL (insn));
8393 /* On the likely path, the branch costs 1, on the unlikely path,
8394 it costs 3. */
8395 cost--;
8396 do
8397 target = next_active_insn (target);
8398 while (target && ! flow_dependent_p (target, dep_insn)
8399 && --cost > 0);
8400 /* If two branches are executed in immediate succession, with the
8401 first branch properly predicted, this causes a stall at the
8402 second branch, hence we won't need the target for the
8403 second branch for two cycles after the launch of the first
8404 branch. */
8405 if (cost > orig_cost - 2)
8406 cost = orig_cost - 2;
8407 }
8408 else
8409 cost = 0;
8410 }
8411
8412 else if (get_attr_is_mac_media (insn)
8413 && get_attr_is_mac_media (dep_insn))
8414 cost = 1;
8415
8416 else if (! reload_completed
8417 && GET_CODE (PATTERN (insn)) == SET
8418 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8419 && GET_CODE (PATTERN (dep_insn)) == SET
8420 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8421 && cost < 4)
8422 cost = 4;
8423 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8424 that is needed at the target. */
8425 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8426 && ! flow_dependent_p (insn, dep_insn))
8427 cost--;
8428 }
8429 else if (REG_NOTE_KIND (link) == 0)
8430 {
8431 enum attr_type dep_type, type;
8432
8433 if (recog_memoized (insn) < 0
8434 || recog_memoized (dep_insn) < 0)
8435 return cost;
8436
8437 dep_type = get_attr_type (dep_insn);
8438 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8439 cost--;
8440 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8441 && (type = get_attr_type (insn)) != TYPE_CALL
8442 && type != TYPE_SFUNC)
8443 cost--;
8444
8445 /* The only input for a call that is timing-critical is the
8446 function's address. */
8447 if (GET_CODE(insn) == CALL_INSN)
8448 {
8449 rtx call = PATTERN (insn);
8450
8451 if (GET_CODE (call) == PARALLEL)
8452 call = XVECEXP (call, 0 ,0);
8453 if (GET_CODE (call) == SET)
8454 call = SET_SRC (call);
8455 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8456 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8457 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8458 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8459 cost = 0;
8460 }
8461 /* Likewise, the most timing critical input for an sfuncs call
8462 is the function address. However, sfuncs typically start
8463 using their arguments pretty quickly.
8464 Assume a four cycle delay before they are needed. */
8465 /* All sfunc calls are parallels with at least four components.
8466 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8467 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8468 && XVECLEN (PATTERN (insn), 0) >= 4
8469 && (reg = sfunc_uses_reg (insn)))
8470 {
8471 if (! reg_set_p (reg, dep_insn))
8472 cost -= 4;
8473 }
8474 /* When the preceding instruction loads the shift amount of
8475 the following SHAD/SHLD, the latency of the load is increased
8476 by 1 cycle. */
8477 else if (TARGET_SH4
8478 && get_attr_type (insn) == TYPE_DYN_SHIFT
8479 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8480 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8481 XEXP (SET_SRC (single_set (insn)),
8482 1)))
8483 cost++;
8484 /* When an LS group instruction with a latency of less than
8485 3 cycles is followed by a double-precision floating-point
8486 instruction, FIPR, or FTRV, the latency of the first
8487 instruction is increased to 3 cycles. */
8488 else if (cost < 3
8489 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8490 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8491 cost = 3;
8492 /* The lsw register of a double-precision computation is ready one
8493 cycle earlier. */
8494 else if (reload_completed
8495 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8496 && (use_pat = single_set (insn))
8497 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8498 SET_SRC (use_pat)))
8499 cost -= 1;
8500
8501 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8502 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8503 cost -= 1;
8504 }
8505 /* An anti-dependence penalty of two applies if the first insn is a double
8506 precision fadd / fsub / fmul. */
8507 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8508 && recog_memoized (dep_insn) >= 0
8509 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8510 /* A lot of alleged anti-flow dependences are fake,
8511 so check this one is real. */
8512 && flow_dependent_p (dep_insn, insn))
8513 cost = 2;
8514
8515
8516 return cost;
8517 }
8518
8519 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8520 if DEP_INSN is anti-flow dependent on INSN. */
8521 static int
flow_dependent_p(rtx insn,rtx dep_insn)8522 flow_dependent_p (rtx insn, rtx dep_insn)
8523 {
8524 rtx tmp = PATTERN (insn);
8525
8526 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8527 return tmp == NULL_RTX;
8528 }
8529
8530 /* A helper function for flow_dependent_p called through note_stores. */
8531 static void
flow_dependent_p_1(rtx x,rtx pat ATTRIBUTE_UNUSED,void * data)8532 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8533 {
8534 rtx * pinsn = (rtx *) data;
8535
8536 if (*pinsn && reg_referenced_p (x, *pinsn))
8537 *pinsn = NULL_RTX;
8538 }
8539
8540 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8541 'special function' patterns (type sfunc) that clobber pr, but that
8542 do not look like function calls to leaf_function_p. Hence we must
8543 do this extra check. */
8544 static int
sh_pr_n_sets(void)8545 sh_pr_n_sets (void)
8546 {
8547 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8548 }
8549
8550 /* Return where to allocate pseudo for a given hard register initial
8551 value. */
8552 static rtx
sh_allocate_initial_value(rtx hard_reg)8553 sh_allocate_initial_value (rtx hard_reg)
8554 {
8555 rtx x;
8556
8557 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8558 {
8559 if (current_function_is_leaf
8560 && ! sh_pr_n_sets ()
8561 && ! (TARGET_SHCOMPACT
8562 && ((current_function_args_info.call_cookie
8563 & ~ CALL_COOKIE_RET_TRAMP (1))
8564 || current_function_has_nonlocal_label)))
8565 x = hard_reg;
8566 else
8567 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8568 }
8569 else
8570 x = NULL_RTX;
8571
8572 return x;
8573 }
8574
8575 /* This function returns "2" to indicate dual issue for the SH4
8576 processor. To be used by the DFA pipeline description. */
8577 static int
sh_issue_rate(void)8578 sh_issue_rate (void)
8579 {
8580 if (TARGET_SUPERSCALAR)
8581 return 2;
8582 else
8583 return 1;
8584 }
8585
8586 /* Functions for ready queue reordering for sched1. */
8587
8588 /* Get weight for mode for a set x. */
8589 static short
find_set_regmode_weight(rtx x,enum machine_mode mode)8590 find_set_regmode_weight (rtx x, enum machine_mode mode)
8591 {
8592 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8593 return 1;
8594 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8595 {
8596 if (GET_CODE (SET_DEST (x)) == REG)
8597 {
8598 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8599 return 1;
8600 else
8601 return 0;
8602 }
8603 return 1;
8604 }
8605 return 0;
8606 }
8607
8608 /* Get regmode weight for insn. */
8609 static short
find_insn_regmode_weight(rtx insn,enum machine_mode mode)8610 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8611 {
8612 short reg_weight = 0;
8613 rtx x;
8614
8615 /* Increment weight for each register born here. */
8616 x = PATTERN (insn);
8617 reg_weight += find_set_regmode_weight (x, mode);
8618 if (GET_CODE (x) == PARALLEL)
8619 {
8620 int j;
8621 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8622 {
8623 x = XVECEXP (PATTERN (insn), 0, j);
8624 reg_weight += find_set_regmode_weight (x, mode);
8625 }
8626 }
8627 /* Decrement weight for each register that dies here. */
8628 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8629 {
8630 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8631 {
8632 rtx note = XEXP (x, 0);
8633 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8634 reg_weight--;
8635 }
8636 }
8637 return reg_weight;
8638 }
8639
8640 /* Calculate regmode weights for all insns of a basic block. */
8641 static void
find_regmode_weight(int b,enum machine_mode mode)8642 find_regmode_weight (int b, enum machine_mode mode)
8643 {
8644 rtx insn, next_tail, head, tail;
8645
8646 get_block_head_tail (b, &head, &tail);
8647 next_tail = NEXT_INSN (tail);
8648
8649 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8650 {
8651 /* Handle register life information. */
8652 if (!INSN_P (insn))
8653 continue;
8654
8655 if (mode == SFmode)
8656 INSN_REGMODE_WEIGHT (insn, mode) =
8657 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8658 else if (mode == SImode)
8659 INSN_REGMODE_WEIGHT (insn, mode) =
8660 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8661 }
8662 }
8663
8664 /* Comparison function for ready queue sorting. */
8665 static int
rank_for_reorder(const void * x,const void * y)8666 rank_for_reorder (const void *x, const void *y)
8667 {
8668 rtx tmp = *(const rtx *) y;
8669 rtx tmp2 = *(const rtx *) x;
8670
8671 /* The insn in a schedule group should be issued the first. */
8672 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8673 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8674
8675 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8676 minimizes instruction movement, thus minimizing sched's effect on
8677 register pressure. */
8678 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8679 }
8680
8681 /* Resort the array A in which only element at index N may be out of order. */
8682 static void
swap_reorder(rtx * a,int n)8683 swap_reorder (rtx *a, int n)
8684 {
8685 rtx insn = a[n - 1];
8686 int i = n - 2;
8687
8688 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8689 {
8690 a[i + 1] = a[i];
8691 i -= 1;
8692 }
8693 a[i + 1] = insn;
8694 }
8695
8696 #define SCHED_REORDER(READY, N_READY) \
8697 do \
8698 { \
8699 if ((N_READY) == 2) \
8700 swap_reorder (READY, N_READY); \
8701 else if ((N_READY) > 2) \
8702 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8703 } \
8704 while (0)
8705
8706 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8707 macro. */
8708 static void
ready_reorder(rtx * ready,int nready)8709 ready_reorder (rtx *ready, int nready)
8710 {
8711 SCHED_REORDER (ready, nready);
8712 }
8713
8714 /* Calculate regmode weights for all insns of all basic block. */
8715 static void
sh_md_init_global(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int old_max_uid)8716 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8717 int verbose ATTRIBUTE_UNUSED,
8718 int old_max_uid)
8719 {
8720 basic_block b;
8721
8722 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8723 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8724
8725 FOR_EACH_BB_REVERSE (b)
8726 {
8727 find_regmode_weight (b->index, SImode);
8728 find_regmode_weight (b->index, SFmode);
8729 }
8730
8731 CURR_REGMODE_PRESSURE (SImode) = 0;
8732 CURR_REGMODE_PRESSURE (SFmode) = 0;
8733
8734 }
8735
8736 /* Cleanup. */
8737 static void
sh_md_finish_global(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED)8738 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8739 int verbose ATTRIBUTE_UNUSED)
8740 {
8741 if (regmode_weight[0])
8742 {
8743 free (regmode_weight[0]);
8744 regmode_weight[0] = NULL;
8745 }
8746 if (regmode_weight[1])
8747 {
8748 free (regmode_weight[1]);
8749 regmode_weight[1] = NULL;
8750 }
8751 }
8752
8753 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8754 keep count of register pressures on SImode and SFmode. */
8755 static int
sh_variable_issue(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx insn,int can_issue_more)8756 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8757 int sched_verbose ATTRIBUTE_UNUSED,
8758 rtx insn,
8759 int can_issue_more)
8760 {
8761 if (GET_CODE (PATTERN (insn)) != USE
8762 && GET_CODE (PATTERN (insn)) != CLOBBER)
8763 cached_can_issue_more = can_issue_more - 1;
8764 else
8765 cached_can_issue_more = can_issue_more;
8766
8767 if (reload_completed)
8768 return cached_can_issue_more;
8769
8770 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8771 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8772
8773 return cached_can_issue_more;
8774 }
8775
8776 static void
sh_md_init(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int veclen ATTRIBUTE_UNUSED)8777 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8778 int verbose ATTRIBUTE_UNUSED,
8779 int veclen ATTRIBUTE_UNUSED)
8780 {
8781 CURR_REGMODE_PRESSURE (SImode) = 0;
8782 CURR_REGMODE_PRESSURE (SFmode) = 0;
8783 }
8784
8785 /* Some magic numbers. */
8786 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8787 functions that already have high pressure on r0. */
8788 #define R0_MAX_LIFE_REGIONS 2
8789 #define R0_MAX_LIVE_LENGTH 12
8790 /* Register Pressure thresholds for SImode and SFmode registers. */
8791 #define SIMODE_MAX_WEIGHT 5
8792 #define SFMODE_MAX_WEIGHT 10
8793
8794 /* Return true if the pressure is high for MODE. */
8795 static short
high_pressure(enum machine_mode mode)8796 high_pressure (enum machine_mode mode)
8797 {
8798 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8799 functions that already have high pressure on r0. */
8800 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8801 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8802 return 1;
8803
8804 if (mode == SFmode)
8805 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8806 else
8807 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8808 }
8809
8810 /* Reorder ready queue if register pressure is high. */
8811 static int
sh_reorder(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx * ready,int * n_readyp,int clock_var ATTRIBUTE_UNUSED)8812 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8813 int sched_verbose ATTRIBUTE_UNUSED,
8814 rtx *ready,
8815 int *n_readyp,
8816 int clock_var ATTRIBUTE_UNUSED)
8817 {
8818 if (reload_completed)
8819 return sh_issue_rate ();
8820
8821 if (high_pressure (SFmode) || high_pressure (SImode))
8822 {
8823 ready_reorder (ready, *n_readyp);
8824 }
8825
8826 return sh_issue_rate ();
8827 }
8828
8829 /* Skip cycles if the current register pressure is high. */
8830 static int
sh_reorder2(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx * ready ATTRIBUTE_UNUSED,int * n_readyp ATTRIBUTE_UNUSED,int clock_var ATTRIBUTE_UNUSED)8831 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8832 int sched_verbose ATTRIBUTE_UNUSED,
8833 rtx *ready ATTRIBUTE_UNUSED,
8834 int *n_readyp ATTRIBUTE_UNUSED,
8835 int clock_var ATTRIBUTE_UNUSED)
8836 {
8837 if (reload_completed)
8838 return cached_can_issue_more;
8839
8840 if (high_pressure(SFmode) || high_pressure (SImode))
8841 skip_cycles = 1;
8842
8843 return cached_can_issue_more;
8844 }
8845
8846 /* Skip cycles without sorting the ready queue. This will move insn from
8847 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8848 queue by sh_reorder. */
8849
8850 /* Generally, skipping these many cycles are sufficient for all insns to move
8851 from Q -> R. */
8852 #define MAX_SKIPS 8
8853
8854 static int
sh_dfa_new_cycle(FILE * sched_dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx insn ATTRIBUTE_UNUSED,int last_clock_var,int clock_var,int * sort_p)8855 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8856 int sched_verbose ATTRIBUTE_UNUSED,
8857 rtx insn ATTRIBUTE_UNUSED,
8858 int last_clock_var,
8859 int clock_var,
8860 int *sort_p)
8861 {
8862 if (reload_completed)
8863 return 0;
8864
8865 if (skip_cycles)
8866 {
8867 if ((clock_var - last_clock_var) < MAX_SKIPS)
8868 {
8869 *sort_p = 0;
8870 return 1;
8871 }
8872 /* If this is the last cycle we are skipping, allow reordering of R. */
8873 if ((clock_var - last_clock_var) == MAX_SKIPS)
8874 {
8875 *sort_p = 1;
8876 return 1;
8877 }
8878 }
8879
8880 skip_cycles = 0;
8881
8882 return 0;
8883 }
8884
8885 /* SHmedia requires registers for branches, so we can't generate new
8886 branches past reload. */
8887 static bool
sh_cannot_modify_jumps_p(void)8888 sh_cannot_modify_jumps_p (void)
8889 {
8890 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8891 }
8892
8893 static int
sh_target_reg_class(void)8894 sh_target_reg_class (void)
8895 {
8896 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8897 }
8898
8899 static bool
sh_optimize_target_register_callee_saved(bool after_prologue_epilogue_gen)8900 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8901 {
8902 HARD_REG_SET dummy;
8903 rtx insn;
8904
8905 if (! shmedia_space_reserved_for_target_registers)
8906 return 0;
8907 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
8908 return 0;
8909 if (calc_live_regs (&dummy) >= 6 * 8)
8910 return 1;
8911 /* This is a borderline case. See if we got a nested loop, or a loop
8912 with a call, or with more than 4 labels inside. */
8913 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
8914 {
8915 if (GET_CODE (insn) == NOTE
8916 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8917 {
8918 int labels = 0;
8919
8920 do
8921 {
8922 insn = NEXT_INSN (insn);
8923 if ((GET_CODE (insn) == NOTE
8924 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8925 || GET_CODE (insn) == CALL_INSN
8926 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
8927 return 1;
8928 }
8929 while (GET_CODE (insn) != NOTE
8930 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
8931 }
8932 }
8933 return 0;
8934 }
8935
8936 static bool
sh_ms_bitfield_layout_p(tree record_type ATTRIBUTE_UNUSED)8937 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8938 {
8939 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8940 }
8941
8942 /*
8943 On the SH1..SH4, the trampoline looks like
8944 2 0002 D202 mov.l l2,r2
8945 1 0000 D301 mov.l l1,r3
8946 3 0004 422B jmp @r2
8947 4 0006 0009 nop
8948 5 0008 00000000 l1: .long area
8949 6 000c 00000000 l2: .long function
8950
8951 SH5 (compact) uses r1 instead of r3 for the static chain. */
8952
8953
8954 /* Emit RTL insns to initialize the variable parts of a trampoline.
8955 FNADDR is an RTX for the address of the function's pure code.
8956 CXT is an RTX for the static chain value for the function. */
8957
8958 void
sh_initialize_trampoline(rtx tramp,rtx fnaddr,rtx cxt)8959 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8960 {
8961 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
8962
8963 if (TARGET_SHMEDIA64)
8964 {
8965 rtx tramp_templ;
8966 int fixed_len;
8967
8968 rtx movi1 = GEN_INT (0xcc000010);
8969 rtx shori1 = GEN_INT (0xc8000010);
8970 rtx src, dst;
8971
8972 /* The following trampoline works within a +- 128 KB range for cxt:
8973 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8974 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8975 gettr tr1,r1; blink tr0,r63 */
8976 /* Address rounding makes it hard to compute the exact bounds of the
8977 offset for this trampoline, but we have a rather generous offset
8978 range, so frame_offset should do fine as an upper bound. */
8979 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8980 {
8981 /* ??? could optimize this trampoline initialization
8982 by writing DImode words with two insns each. */
8983 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8984 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8985 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8986 insn = gen_rtx_AND (DImode, insn, mask);
8987 /* Or in ptb/u .,tr1 pattern */
8988 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8989 insn = force_operand (insn, NULL_RTX);
8990 insn = gen_lowpart (SImode, insn);
8991 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
8992 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8993 insn = gen_rtx_AND (DImode, insn, mask);
8994 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8995 insn = gen_lowpart (SImode, insn);
8996 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
8997 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
8998 insn = gen_rtx_AND (DImode, insn, mask);
8999 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9000 insn = gen_lowpart (SImode, insn);
9001 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9002 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9003 insn = gen_rtx_AND (DImode, insn, mask);
9004 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9005 insn = gen_lowpart (SImode, insn);
9006 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9007 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9008 insn = gen_rtx_AND (DImode, insn, mask);
9009 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9010 insn = gen_lowpart (SImode, insn);
9011 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9012 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9013 GEN_INT (0x6bf10600));
9014 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9015 GEN_INT (0x4415fc10));
9016 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9017 GEN_INT (0x4401fff0));
9018 emit_insn (gen_ic_invalidate_line (tramp));
9019 return;
9020 }
9021 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9022 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9023
9024 tramp_templ = gen_datalabel_ref (tramp_templ);
9025 dst = tramp_mem;
9026 src = gen_const_mem (BLKmode, tramp_templ);
9027 set_mem_align (dst, 256);
9028 set_mem_align (src, 64);
9029 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9030
9031 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9032 emit_move_insn (adjust_address (tramp_mem, Pmode,
9033 fixed_len + GET_MODE_SIZE (Pmode)),
9034 cxt);
9035 emit_insn (gen_ic_invalidate_line (tramp));
9036 return;
9037 }
9038 else if (TARGET_SHMEDIA)
9039 {
9040 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9041 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9042 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9043 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9044 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9045 rotated 10 right, and higher 16 bit of every 32 selected. */
9046 rtx movishori
9047 = force_reg (V2HImode, (simplify_gen_subreg
9048 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9049 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9050 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9051
9052 tramp = force_reg (Pmode, tramp);
9053 fnaddr = force_reg (SImode, fnaddr);
9054 cxt = force_reg (SImode, cxt);
9055 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9056 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9057 movishori));
9058 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9059 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9060 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9061 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9062 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9063 gen_rtx_SUBREG (V2HImode, cxt, 0),
9064 movishori));
9065 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9066 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9067 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9068 if (TARGET_LITTLE_ENDIAN)
9069 {
9070 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9071 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9072 }
9073 else
9074 {
9075 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9076 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9077 }
9078 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9079 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9080 emit_insn (gen_ic_invalidate_line (tramp));
9081 return;
9082 }
9083 else if (TARGET_SHCOMPACT)
9084 {
9085 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9086 return;
9087 }
9088 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9089 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9090 SImode));
9091 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9092 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9093 SImode));
9094 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9095 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9096 if (TARGET_HARVARD)
9097 {
9098 if (TARGET_USERMODE)
9099 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9100 FUNCTION_ORDINARY),
9101 0, VOIDmode, 1, tramp, SImode);
9102 else
9103 emit_insn (gen_ic_invalidate_line (tramp));
9104 }
9105 }
9106
9107 /* FIXME: This is overly conservative. A SHcompact function that
9108 receives arguments ``by reference'' will have them stored in its
9109 own stack frame, so it must not pass pointers or references to
9110 these arguments to other functions by means of sibling calls. */
9111 /* If PIC, we cannot make sibling calls to global functions
9112 because the PLT requires r12 to be live. */
9113 static bool
sh_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)9114 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9115 {
9116 return (1
9117 && (! TARGET_SHCOMPACT
9118 || current_function_args_info.stack_regs == 0)
9119 && ! sh_cfun_interrupt_handler_p ()
9120 && (! flag_pic
9121 || (decl && ! TREE_PUBLIC (decl))
9122 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9123 }
9124
9125 /* Machine specific built-in functions. */
9126
9127 struct builtin_description
9128 {
9129 const enum insn_code icode;
9130 const char *const name;
9131 int signature;
9132 };
9133
9134 /* describe number and signedness of arguments; arg[0] == result
9135 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9136 /* 9: 64 bit pointer, 10: 32 bit pointer */
9137 static const char signature_args[][4] =
9138 {
9139 #define SH_BLTIN_V2SI2 0
9140 { 4, 4 },
9141 #define SH_BLTIN_V4HI2 1
9142 { 4, 4 },
9143 #define SH_BLTIN_V2SI3 2
9144 { 4, 4, 4 },
9145 #define SH_BLTIN_V4HI3 3
9146 { 4, 4, 4 },
9147 #define SH_BLTIN_V8QI3 4
9148 { 4, 4, 4 },
9149 #define SH_BLTIN_MAC_HISI 5
9150 { 1, 4, 4, 1 },
9151 #define SH_BLTIN_SH_HI 6
9152 { 4, 4, 1 },
9153 #define SH_BLTIN_SH_SI 7
9154 { 4, 4, 1 },
9155 #define SH_BLTIN_V4HI2V2SI 8
9156 { 4, 4, 4 },
9157 #define SH_BLTIN_V4HI2V8QI 9
9158 { 4, 4, 4 },
9159 #define SH_BLTIN_SISF 10
9160 { 4, 2 },
9161 #define SH_BLTIN_LDUA_L 11
9162 { 2, 10 },
9163 #define SH_BLTIN_LDUA_Q 12
9164 { 1, 10 },
9165 #define SH_BLTIN_STUA_L 13
9166 { 0, 10, 2 },
9167 #define SH_BLTIN_STUA_Q 14
9168 { 0, 10, 1 },
9169 #define SH_BLTIN_LDUA_L64 15
9170 { 2, 9 },
9171 #define SH_BLTIN_LDUA_Q64 16
9172 { 1, 9 },
9173 #define SH_BLTIN_STUA_L64 17
9174 { 0, 9, 2 },
9175 #define SH_BLTIN_STUA_Q64 18
9176 { 0, 9, 1 },
9177 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9178 #define SH_BLTIN_2 19
9179 #define SH_BLTIN_SU 19
9180 { 1, 2 },
9181 #define SH_BLTIN_3 20
9182 #define SH_BLTIN_SUS 20
9183 { 2, 2, 1 },
9184 #define SH_BLTIN_PSSV 21
9185 { 0, 8, 2, 2 },
9186 #define SH_BLTIN_XXUU 22
9187 #define SH_BLTIN_UUUU 22
9188 { 1, 1, 1, 1 },
9189 #define SH_BLTIN_PV 23
9190 { 0, 8 },
9191 };
9192 /* mcmv: operands considered unsigned. */
9193 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9194 /* mperm: control value considered unsigned int. */
9195 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9196 /* mshards_q: returns signed short. */
9197 /* nsb: takes long long arg, returns unsigned char. */
9198 static const struct builtin_description bdesc[] =
9199 {
9200 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9201 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9202 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9203 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9204 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9205 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9206 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9207 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9208 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9209 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9210 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9211 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9212 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9213 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9214 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9215 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9216 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9217 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9218 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9219 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9220 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9221 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9222 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9223 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9224 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9225 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9226 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9227 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9228 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9229 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9230 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9231 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9232 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9233 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9234 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9235 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9236 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9237 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9238 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9239 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9240 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9241 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9242 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9243 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9244 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9245 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9246 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9247 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9248 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9249 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9250 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9251 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9252 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9253 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9254 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9255 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9256 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9257 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9258 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9259 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9260 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9261 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9262 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9263 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9264 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9265 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9266 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9267 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9268 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9269 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9270 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9271 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9272 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9273 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9274 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9275 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9276 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9277 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9278 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9279 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9280 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9281 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9282 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9283 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9284 };
9285
9286 static void
sh_media_init_builtins(void)9287 sh_media_init_builtins (void)
9288 {
9289 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9290 const struct builtin_description *d;
9291
9292 memset (shared, 0, sizeof shared);
9293 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9294 {
9295 tree type, arg_type = 0;
9296 int signature = d->signature;
9297 int i;
9298
9299 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9300 type = shared[signature];
9301 else
9302 {
9303 int has_result = signature_args[signature][0] != 0;
9304
9305 if ((signature_args[signature][1] & 8)
9306 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9307 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9308 continue;
9309 if (! TARGET_FPU_ANY
9310 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9311 continue;
9312 type = void_list_node;
9313 for (i = 3; ; i--)
9314 {
9315 int arg = signature_args[signature][i];
9316 int opno = i - 1 + has_result;
9317
9318 if (arg & 8)
9319 arg_type = ptr_type_node;
9320 else if (arg)
9321 arg_type = (*lang_hooks.types.type_for_mode)
9322 (insn_data[d->icode].operand[opno].mode,
9323 (arg & 1));
9324 else if (i)
9325 continue;
9326 else
9327 arg_type = void_type_node;
9328 if (i == 0)
9329 break;
9330 type = tree_cons (NULL_TREE, arg_type, type);
9331 }
9332 type = build_function_type (arg_type, type);
9333 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9334 shared[signature] = type;
9335 }
9336 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9337 NULL, NULL_TREE);
9338 }
9339 }
9340
9341 /* Implements target hook vector_mode_supported_p. */
9342 bool
sh_vector_mode_supported_p(enum machine_mode mode)9343 sh_vector_mode_supported_p (enum machine_mode mode)
9344 {
9345 if (TARGET_FPU_ANY
9346 && ((mode == V2SFmode)
9347 || (mode == V4SFmode)
9348 || (mode == V16SFmode)))
9349 return true;
9350
9351 else if (TARGET_SHMEDIA
9352 && ((mode == V8QImode)
9353 || (mode == V2HImode)
9354 || (mode == V4HImode)
9355 || (mode == V2SImode)))
9356 return true;
9357
9358 return false;
9359 }
9360
9361 /* Implements target hook dwarf_calling_convention. Return an enum
9362 of dwarf_calling_convention. */
9363 int
sh_dwarf_calling_convention(tree func)9364 sh_dwarf_calling_convention (tree func)
9365 {
9366 if (sh_attr_renesas_p (func))
9367 return DW_CC_GNU_renesas_sh;
9368
9369 return DW_CC_normal;
9370 }
9371
9372 static void
sh_init_builtins(void)9373 sh_init_builtins (void)
9374 {
9375 if (TARGET_SHMEDIA)
9376 sh_media_init_builtins ();
9377 }
9378
9379 /* Expand an expression EXP that calls a built-in function,
9380 with result going to TARGET if that's convenient
9381 (and in mode MODE if that's convenient).
9382 SUBTARGET may be used as the target for computing one of EXP's operands.
9383 IGNORE is nonzero if the value is to be ignored. */
9384
9385 static rtx
sh_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,int ignore)9386 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9387 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9388 {
9389 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9390 tree arglist = TREE_OPERAND (exp, 1);
9391 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9392 const struct builtin_description *d = &bdesc[fcode];
9393 enum insn_code icode = d->icode;
9394 int signature = d->signature;
9395 enum machine_mode tmode = VOIDmode;
9396 int nop = 0, i;
9397 rtx op[4];
9398 rtx pat = 0;
9399
9400 if (signature_args[signature][0])
9401 {
9402 if (ignore)
9403 return 0;
9404
9405 tmode = insn_data[icode].operand[0].mode;
9406 if (! target
9407 || GET_MODE (target) != tmode
9408 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9409 target = gen_reg_rtx (tmode);
9410 op[nop++] = target;
9411 }
9412 else
9413 target = 0;
9414
9415 for (i = 1; i <= 3; i++, nop++)
9416 {
9417 tree arg;
9418 enum machine_mode opmode, argmode;
9419 tree optype;
9420
9421 if (! signature_args[signature][i])
9422 break;
9423 arg = TREE_VALUE (arglist);
9424 if (arg == error_mark_node)
9425 return const0_rtx;
9426 arglist = TREE_CHAIN (arglist);
9427 if (signature_args[signature][i] & 8)
9428 {
9429 opmode = ptr_mode;
9430 optype = ptr_type_node;
9431 }
9432 else
9433 {
9434 opmode = insn_data[icode].operand[nop].mode;
9435 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9436 }
9437 argmode = TYPE_MODE (TREE_TYPE (arg));
9438 if (argmode != opmode)
9439 arg = build1 (NOP_EXPR, optype, arg);
9440 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9441 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9442 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9443 }
9444
9445 switch (nop)
9446 {
9447 case 1:
9448 pat = (*insn_data[d->icode].genfun) (op[0]);
9449 break;
9450 case 2:
9451 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9452 break;
9453 case 3:
9454 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9455 break;
9456 case 4:
9457 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9458 break;
9459 default:
9460 gcc_unreachable ();
9461 }
9462 if (! pat)
9463 return 0;
9464 emit_insn (pat);
9465 return target;
9466 }
9467
9468 void
sh_expand_unop_v2sf(enum rtx_code code,rtx op0,rtx op1)9469 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9470 {
9471 rtx sel0 = const0_rtx;
9472 rtx sel1 = const1_rtx;
9473 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9474 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9475
9476 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9477 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9478 }
9479
9480 void
sh_expand_binop_v2sf(enum rtx_code code,rtx op0,rtx op1,rtx op2)9481 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9482 {
9483 rtx sel0 = const0_rtx;
9484 rtx sel1 = const1_rtx;
9485 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9486 = gen_binary_sf_op;
9487 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9488
9489 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9490 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9491 }
9492
9493 /* Return the class of registers for which a mode change from FROM to TO
9494 is invalid. */
9495 bool
sh_cannot_change_mode_class(enum machine_mode from,enum machine_mode to,enum reg_class class)9496 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9497 enum reg_class class)
9498 {
9499 /* We want to enable the use of SUBREGs as a means to
9500 VEC_SELECT a single element of a vector. */
9501 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9502 return (reg_classes_intersect_p (GENERAL_REGS, class));
9503
9504 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9505 {
9506 if (TARGET_LITTLE_ENDIAN)
9507 {
9508 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9509 return reg_classes_intersect_p (DF_REGS, class);
9510 }
9511 else
9512 {
9513 if (GET_MODE_SIZE (from) < 8)
9514 return reg_classes_intersect_p (DF_HI_REGS, class);
9515 }
9516 }
9517 return 0;
9518 }
9519
9520
9521 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9522 that label is used. */
9523
9524 void
sh_mark_label(rtx address,int nuses)9525 sh_mark_label (rtx address, int nuses)
9526 {
9527 if (GOTOFF_P (address))
9528 {
9529 /* Extract the label or symbol. */
9530 address = XEXP (address, 0);
9531 if (GET_CODE (address) == PLUS)
9532 address = XEXP (address, 0);
9533 address = XVECEXP (address, 0, 0);
9534 }
9535 if (GET_CODE (address) == LABEL_REF
9536 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9537 LABEL_NUSES (XEXP (address, 0)) += nuses;
9538 }
9539
9540 /* Compute extra cost of moving data between one register class
9541 and another. */
9542
9543 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9544 uses this information. Hence, the general register <-> floating point
9545 register information here is not used for SFmode. */
9546
9547 int
sh_register_move_cost(enum machine_mode mode,enum reg_class srcclass,enum reg_class dstclass)9548 sh_register_move_cost (enum machine_mode mode,
9549 enum reg_class srcclass, enum reg_class dstclass)
9550 {
9551 if (dstclass == T_REGS || dstclass == PR_REGS)
9552 return 10;
9553
9554 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9555 return 4;
9556
9557 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9558 && REGCLASS_HAS_FP_REG (srcclass)
9559 && REGCLASS_HAS_FP_REG (dstclass))
9560 return 4;
9561
9562 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9563 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9564
9565 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9566 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9567 return 9;
9568
9569 if ((REGCLASS_HAS_FP_REG (dstclass)
9570 && REGCLASS_HAS_GENERAL_REG (srcclass))
9571 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9572 && REGCLASS_HAS_FP_REG (srcclass)))
9573 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9574 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9575
9576 if ((dstclass == FPUL_REGS
9577 && REGCLASS_HAS_GENERAL_REG (srcclass))
9578 || (srcclass == FPUL_REGS
9579 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9580 return 5;
9581
9582 if ((dstclass == FPUL_REGS
9583 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9584 || (srcclass == FPUL_REGS
9585 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9586 return 7;
9587
9588 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9589 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9590 return 20;
9591
9592 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9593 if (TARGET_SHMEDIA
9594 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
9595 {
9596 if (sh_gettrcost >= 0)
9597 return sh_gettrcost;
9598 else if (!TARGET_PT_FIXED)
9599 return 100;
9600 }
9601
9602 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9603 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9604 return 4;
9605
9606 if (TARGET_SHMEDIA
9607 || (TARGET_FMOVD
9608 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9609 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9610 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9611
9612 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9613 }
9614
9615 static rtx emit_load_ptr (rtx, rtx);
9616
9617 static rtx
emit_load_ptr(rtx reg,rtx addr)9618 emit_load_ptr (rtx reg, rtx addr)
9619 {
9620 rtx mem = gen_const_mem (ptr_mode, addr);
9621
9622 if (Pmode != ptr_mode)
9623 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9624 return emit_move_insn (reg, mem);
9625 }
9626
9627 static void
sh_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)9628 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9629 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9630 tree function)
9631 {
9632 CUMULATIVE_ARGS cum;
9633 int structure_value_byref = 0;
9634 rtx this, this_value, sibcall, insns, funexp;
9635 tree funtype = TREE_TYPE (function);
9636 int simple_add = CONST_OK_FOR_ADD (delta);
9637 int did_load = 0;
9638 rtx scratch0, scratch1, scratch2;
9639 unsigned i;
9640
9641 reload_completed = 1;
9642 epilogue_completed = 1;
9643 no_new_pseudos = 1;
9644 current_function_uses_only_leaf_regs = 1;
9645 reset_block_changes ();
9646
9647 emit_note (NOTE_INSN_PROLOGUE_END);
9648
9649 /* Find the "this" pointer. We have such a wide range of ABIs for the
9650 SH that it's best to do this completely machine independently.
9651 "this" is passed as first argument, unless a structure return pointer
9652 comes first, in which case "this" comes second. */
9653 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9654 #ifndef PCC_STATIC_STRUCT_RETURN
9655 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9656 structure_value_byref = 1;
9657 #endif /* not PCC_STATIC_STRUCT_RETURN */
9658 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9659 {
9660 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9661
9662 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9663 }
9664 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9665
9666 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9667 static chain pointer (even if you can't have nested virtual functions
9668 right now, someone might implement them sometime), and the rest of the
9669 registers are used for argument passing, are callee-saved, or reserved. */
9670 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9671 -ffixed-reg has been used. */
9672 if (! call_used_regs[0] || fixed_regs[0])
9673 error ("r0 needs to be available as a call-clobbered register");
9674 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9675 if (! TARGET_SH5)
9676 {
9677 if (call_used_regs[1] && ! fixed_regs[1])
9678 scratch1 = gen_rtx_REG (ptr_mode, 1);
9679 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9680 pointing where to return struct values. */
9681 if (call_used_regs[3] && ! fixed_regs[3])
9682 scratch2 = gen_rtx_REG (Pmode, 3);
9683 }
9684 else if (TARGET_SHMEDIA)
9685 {
9686 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
9687 if (i != REGNO (scratch0) &&
9688 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
9689 {
9690 scratch1 = gen_rtx_REG (ptr_mode, i);
9691 break;
9692 }
9693 if (scratch1 == scratch0)
9694 error ("Need a second call-clobbered general purpose register");
9695 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
9696 if (call_used_regs[i] && ! fixed_regs[i])
9697 {
9698 scratch2 = gen_rtx_REG (Pmode, i);
9699 break;
9700 }
9701 if (scratch2 == scratch0)
9702 error ("Need a call-clobbered target register");
9703 }
9704
9705 this_value = plus_constant (this, delta);
9706 if (vcall_offset
9707 && (simple_add || scratch0 != scratch1)
9708 && strict_memory_address_p (ptr_mode, this_value))
9709 {
9710 emit_load_ptr (scratch0, this_value);
9711 did_load = 1;
9712 }
9713
9714 if (!delta)
9715 ; /* Do nothing. */
9716 else if (simple_add)
9717 emit_move_insn (this, this_value);
9718 else
9719 {
9720 emit_move_insn (scratch1, GEN_INT (delta));
9721 emit_insn (gen_add2_insn (this, scratch1));
9722 }
9723
9724 if (vcall_offset)
9725 {
9726 rtx offset_addr;
9727
9728 if (!did_load)
9729 emit_load_ptr (scratch0, this);
9730
9731 offset_addr = plus_constant (scratch0, vcall_offset);
9732 if (strict_memory_address_p (ptr_mode, offset_addr))
9733 ; /* Do nothing. */
9734 else if (! TARGET_SH5 && scratch0 != scratch1)
9735 {
9736 /* scratch0 != scratch1, and we have indexed loads. Get better
9737 schedule by loading the offset into r1 and using an indexed
9738 load - then the load of r1 can issue before the load from
9739 (this + delta) finishes. */
9740 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9741 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9742 }
9743 else if (CONST_OK_FOR_ADD (vcall_offset))
9744 {
9745 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9746 offset_addr = scratch0;
9747 }
9748 else if (scratch0 != scratch1)
9749 {
9750 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9751 emit_insn (gen_add2_insn (scratch0, scratch1));
9752 offset_addr = scratch0;
9753 }
9754 else
9755 gcc_unreachable (); /* FIXME */
9756 emit_load_ptr (scratch0, offset_addr);
9757
9758 if (Pmode != ptr_mode)
9759 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9760 emit_insn (gen_add2_insn (this, scratch0));
9761 }
9762
9763 /* Generate a tail call to the target function. */
9764 if (! TREE_USED (function))
9765 {
9766 assemble_external (function);
9767 TREE_USED (function) = 1;
9768 }
9769 funexp = XEXP (DECL_RTL (function), 0);
9770 /* If the function is overridden, so is the thunk, hence we don't
9771 need GOT addressing even if this is a public symbol. */
9772 #if 0
9773 if (TARGET_SH1 && ! flag_weak)
9774 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
9775 else
9776 #endif
9777 if (TARGET_SH2 && flag_pic)
9778 {
9779 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
9780 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
9781 }
9782 else
9783 {
9784 if (TARGET_SHMEDIA && flag_pic)
9785 {
9786 funexp = gen_sym2PIC (funexp);
9787 PUT_MODE (funexp, Pmode);
9788 }
9789 emit_move_insn (scratch2, funexp);
9790 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9791 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
9792 }
9793 sibcall = emit_call_insn (sibcall);
9794 SIBLING_CALL_P (sibcall) = 1;
9795 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9796 emit_barrier ();
9797
9798 /* Run just enough of rest_of_compilation to do scheduling and get
9799 the insns emitted. Note that use_thunk calls
9800 assemble_start_function and assemble_end_function. */
9801
9802 insn_locators_initialize ();
9803 insns = get_insns ();
9804
9805 if (optimize > 0)
9806 {
9807 /* Initialize the bitmap obstacks. */
9808 bitmap_obstack_initialize (NULL);
9809 bitmap_obstack_initialize (®_obstack);
9810 if (! cfun->cfg)
9811 init_flow ();
9812 rtl_register_cfg_hooks ();
9813 init_rtl_bb_info (ENTRY_BLOCK_PTR);
9814 init_rtl_bb_info (EXIT_BLOCK_PTR);
9815 ENTRY_BLOCK_PTR->flags |= BB_RTL;
9816 EXIT_BLOCK_PTR->flags |= BB_RTL;
9817 find_basic_blocks (insns);
9818
9819 if (flag_schedule_insns_after_reload)
9820 {
9821 life_analysis (dump_file, PROP_FINAL);
9822
9823 split_all_insns (1);
9824
9825 schedule_insns (dump_file);
9826 }
9827 /* We must split jmp insn in PIC case. */
9828 else if (flag_pic)
9829 split_all_insns_noflow ();
9830 }
9831
9832 sh_reorg ();
9833
9834 if (optimize > 0 && flag_delayed_branch)
9835 dbr_schedule (insns, dump_file);
9836
9837 shorten_branches (insns);
9838 final_start_function (insns, file, 1);
9839 final (insns, file, 1);
9840 final_end_function ();
9841
9842 if (optimize > 0)
9843 {
9844 /* Release all memory allocated by flow. */
9845 free_basic_block_vars ();
9846
9847 /* Release the bitmap obstacks. */
9848 bitmap_obstack_release (®_obstack);
9849 bitmap_obstack_release (NULL);
9850 }
9851
9852 reload_completed = 0;
9853 epilogue_completed = 0;
9854 no_new_pseudos = 0;
9855 }
9856
9857 rtx
function_symbol(rtx target,const char * name,enum sh_function_kind kind)9858 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
9859 {
9860 rtx sym;
9861
9862 /* If this is not an ordinary function, the name usually comes from a
9863 string literal or an sprintf buffer. Make sure we use the same
9864 string consistently, so that cse will be able to unify address loads. */
9865 if (kind != FUNCTION_ORDINARY)
9866 name = IDENTIFIER_POINTER (get_identifier (name));
9867 sym = gen_rtx_SYMBOL_REF (Pmode, name);
9868 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9869 if (flag_pic)
9870 switch (kind)
9871 {
9872 case FUNCTION_ORDINARY:
9873 break;
9874 case SFUNC_GOT:
9875 {
9876 rtx reg = target ? target : gen_reg_rtx (Pmode);
9877
9878 emit_insn (gen_symGOT2reg (reg, sym));
9879 sym = reg;
9880 break;
9881 }
9882 case SFUNC_STATIC:
9883 {
9884 /* ??? To allow cse to work, we use GOTOFF relocations.
9885 we could add combiner patterns to transform this into
9886 straight pc-relative calls with sym2PIC / bsrf when
9887 label load and function call are still 1:1 and in the
9888 same basic block during combine. */
9889 rtx reg = target ? target : gen_reg_rtx (Pmode);
9890
9891 emit_insn (gen_symGOTOFF2reg (reg, sym));
9892 sym = reg;
9893 break;
9894 }
9895 }
9896 if (target && sym != target)
9897 {
9898 emit_move_insn (target, sym);
9899 return target;
9900 }
9901 return sym;
9902 }
9903
9904 /* Find the number of a general purpose register in S. */
9905 static int
scavenge_reg(HARD_REG_SET * s)9906 scavenge_reg (HARD_REG_SET *s)
9907 {
9908 int r;
9909 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9910 if (TEST_HARD_REG_BIT (*s, r))
9911 return r;
9912 return -1;
9913 }
9914
9915 rtx
sh_get_pr_initial_val(void)9916 sh_get_pr_initial_val (void)
9917 {
9918 rtx val;
9919
9920 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9921 PR register on SHcompact, because it might be clobbered by the prologue.
9922 We check first if that is known to be the case. */
9923 if (TARGET_SHCOMPACT
9924 && ((current_function_args_info.call_cookie
9925 & ~ CALL_COOKIE_RET_TRAMP (1))
9926 || current_function_has_nonlocal_label))
9927 return gen_frame_mem (SImode, return_address_pointer_rtx);
9928
9929 /* If we haven't finished rtl generation, there might be a nonlocal label
9930 that we haven't seen yet.
9931 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9932 is set, unless it has been called before for the same register. And even
9933 then, we end in trouble if we didn't use the register in the same
9934 basic block before. So call get_hard_reg_initial_val now and wrap it
9935 in an unspec if we might need to replace it. */
9936 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9937 combine can put the pseudo returned by get_hard_reg_initial_val into
9938 instructions that need a general purpose registers, which will fail to
9939 be recognized when the pseudo becomes allocated to PR. */
9940 val
9941 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9942 if (TARGET_SH1)
9943 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9944 return val;
9945 }
9946
9947 int
sh_expand_t_scc(enum rtx_code code,rtx target)9948 sh_expand_t_scc (enum rtx_code code, rtx target)
9949 {
9950 rtx result = target;
9951 HOST_WIDE_INT val;
9952
9953 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9954 || GET_CODE (sh_compare_op1) != CONST_INT)
9955 return 0;
9956 if (GET_CODE (result) != REG)
9957 result = gen_reg_rtx (SImode);
9958 val = INTVAL (sh_compare_op1);
9959 if ((code == EQ && val == 1) || (code == NE && val == 0))
9960 emit_insn (gen_movt (result));
9961 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9962 {
9963 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9964 emit_insn (gen_subc (result, result, result));
9965 emit_insn (gen_addsi3 (result, result, const1_rtx));
9966 }
9967 else if (code == EQ || code == NE)
9968 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9969 else
9970 return 0;
9971 if (result != target)
9972 emit_move_insn (target, result);
9973 return 1;
9974 }
9975
9976 /* INSN is an sfunc; return the rtx that describes the address used. */
9977 static rtx
extract_sfunc_addr(rtx insn)9978 extract_sfunc_addr (rtx insn)
9979 {
9980 rtx pattern, part = NULL_RTX;
9981 int len, i;
9982
9983 pattern = PATTERN (insn);
9984 len = XVECLEN (pattern, 0);
9985 for (i = 0; i < len; i++)
9986 {
9987 part = XVECEXP (pattern, 0, i);
9988 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9989 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9990 return XEXP (part, 0);
9991 }
9992 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
9993 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9994 }
9995
9996 /* Verify that the register in use_sfunc_addr still agrees with the address
9997 used in the sfunc. This prevents fill_slots_from_thread from changing
9998 use_sfunc_addr.
9999 INSN is the use_sfunc_addr instruction, and REG is the register it
10000 guards. */
10001 int
check_use_sfunc_addr(rtx insn,rtx reg)10002 check_use_sfunc_addr (rtx insn, rtx reg)
10003 {
10004 /* Search for the sfunc. It should really come right after INSN. */
10005 while ((insn = NEXT_INSN (insn)))
10006 {
10007 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10008 break;
10009 if (! INSN_P (insn))
10010 continue;
10011
10012 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10013 insn = XVECEXP (PATTERN (insn), 0, 0);
10014 if (GET_CODE (PATTERN (insn)) != PARALLEL
10015 || get_attr_type (insn) != TYPE_SFUNC)
10016 continue;
10017 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10018 }
10019 gcc_unreachable ();
10020 }
10021
10022 /* This function returns a constant rtx that represents pi / 2**15 in
10023 SFmode. it's used to scale SFmode angles, in radians, to a
10024 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10025 maps to 0x10000). */
10026
10027 static GTY(()) rtx sh_fsca_sf2int_rtx;
10028
10029 rtx
sh_fsca_sf2int(void)10030 sh_fsca_sf2int (void)
10031 {
10032 if (! sh_fsca_sf2int_rtx)
10033 {
10034 REAL_VALUE_TYPE rv;
10035
10036 real_from_string (&rv, "10430.378350470453");
10037 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10038 }
10039
10040 return sh_fsca_sf2int_rtx;
10041 }
10042
10043 /* This function returns a constant rtx that represents pi / 2**15 in
10044 DFmode. it's used to scale DFmode angles, in radians, to a
10045 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10046 maps to 0x10000). */
10047
10048 static GTY(()) rtx sh_fsca_df2int_rtx;
10049
10050 rtx
sh_fsca_df2int(void)10051 sh_fsca_df2int (void)
10052 {
10053 if (! sh_fsca_df2int_rtx)
10054 {
10055 REAL_VALUE_TYPE rv;
10056
10057 real_from_string (&rv, "10430.378350470453");
10058 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10059 }
10060
10061 return sh_fsca_df2int_rtx;
10062 }
10063
10064 /* This function returns a constant rtx that represents 2**15 / pi in
10065 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10066 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10067 2*pi). */
10068
10069 static GTY(()) rtx sh_fsca_int2sf_rtx;
10070
10071 rtx
sh_fsca_int2sf(void)10072 sh_fsca_int2sf (void)
10073 {
10074 if (! sh_fsca_int2sf_rtx)
10075 {
10076 REAL_VALUE_TYPE rv;
10077
10078 real_from_string (&rv, "9.587379924285257e-5");
10079 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10080 }
10081
10082 return sh_fsca_int2sf_rtx;
10083 }
10084
10085 /* Initialize the CUMULATIVE_ARGS structure. */
10086
10087 void
sh_init_cumulative_args(CUMULATIVE_ARGS * pcum,tree fntype,rtx libname ATTRIBUTE_UNUSED,tree fndecl,signed int n_named_args,enum machine_mode mode)10088 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10089 tree fntype,
10090 rtx libname ATTRIBUTE_UNUSED,
10091 tree fndecl,
10092 signed int n_named_args,
10093 enum machine_mode mode)
10094 {
10095 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10096 pcum->free_single_fp_reg = 0;
10097 pcum->stack_regs = 0;
10098 pcum->byref_regs = 0;
10099 pcum->byref = 0;
10100 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10101
10102 /* XXX - Should we check TARGET_HITACHI here ??? */
10103 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10104
10105 if (fntype)
10106 {
10107 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10108 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10109 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10110 pcum->arg_count [(int) SH_ARG_INT]
10111 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10112
10113 pcum->call_cookie
10114 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10115 && pcum->arg_count [(int) SH_ARG_INT] == 0
10116 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10117 ? int_size_in_bytes (TREE_TYPE (fntype))
10118 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10119 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10120 == FIRST_RET_REG));
10121 }
10122 else
10123 {
10124 pcum->arg_count [(int) SH_ARG_INT] = 0;
10125 pcum->prototype_p = FALSE;
10126 if (mode != VOIDmode)
10127 {
10128 pcum->call_cookie =
10129 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10130 && GET_MODE_SIZE (mode) > 4
10131 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10132
10133 /* If the default ABI is the Renesas ABI then all library
10134 calls must assume that the library will be using the
10135 Renesas ABI. So if the function would return its result
10136 in memory then we must force the address of this memory
10137 block onto the stack. Ideally we would like to call
10138 targetm.calls.return_in_memory() here but we do not have
10139 the TYPE or the FNDECL available so we synthesize the
10140 contents of that function as best we can. */
10141 pcum->force_mem =
10142 (TARGET_DEFAULT & MASK_HITACHI)
10143 && (mode == BLKmode
10144 || (GET_MODE_SIZE (mode) > 4
10145 && !(mode == DFmode
10146 && TARGET_FPU_DOUBLE)));
10147 }
10148 else
10149 {
10150 pcum->call_cookie = 0;
10151 pcum->force_mem = FALSE;
10152 }
10153 }
10154 }
10155
10156 /* Determine if two hard register sets intersect.
10157 Return 1 if they do. */
10158
10159 static int
hard_regs_intersect_p(HARD_REG_SET * a,HARD_REG_SET * b)10160 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10161 {
10162 HARD_REG_SET c;
10163 COPY_HARD_REG_SET (c, *a);
10164 AND_HARD_REG_SET (c, *b);
10165 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10166 return 1;
10167 lose:
10168 return 0;
10169 }
10170
10171 #ifdef TARGET_ADJUST_UNROLL_MAX
10172 static int
sh_adjust_unroll_max(struct loop * loop,int insn_count,int max_unrolled_insns,int strength_reduce_p,int unroll_type)10173 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10174 int max_unrolled_insns, int strength_reduce_p,
10175 int unroll_type)
10176 {
10177 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10178 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10179 {
10180 /* Throttle back loop unrolling so that the costs of using more
10181 targets than the eight target register we have don't outweigh
10182 the benefits of unrolling. */
10183 rtx insn;
10184 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10185 int n_barriers = 0;
10186 rtx dest;
10187 int i;
10188 rtx exit_dest[8];
10189 int threshold;
10190 int unroll_benefit = 0, mem_latency = 0;
10191 int base_cost, best_cost, cost;
10192 int factor, best_factor;
10193 int n_dest;
10194 unsigned max_iterations = 32767;
10195 int n_iterations;
10196 int need_precond = 0, precond = 0;
10197 basic_block * bbs = get_loop_body (loop);
10198 struct niter_desc *desc;
10199
10200 /* Assume that all labels inside the loop are used from inside the
10201 loop. If the loop has multiple entry points, it is unlikely to
10202 be unrolled anyways.
10203 Also assume that all calls are to different functions. That is
10204 somewhat pessimistic, but if you have lots of calls, unrolling the
10205 loop is not likely to gain you much in the first place. */
10206 i = loop->num_nodes - 1;
10207 for (insn = BB_HEAD (bbs[i]); ; )
10208 {
10209 if (GET_CODE (insn) == CODE_LABEL)
10210 n_labels++;
10211 else if (GET_CODE (insn) == CALL_INSN)
10212 n_calls++;
10213 else if (GET_CODE (insn) == NOTE
10214 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10215 n_inner_loops++;
10216 else if (GET_CODE (insn) == BARRIER)
10217 n_barriers++;
10218 if (insn != BB_END (bbs[i]))
10219 insn = NEXT_INSN (insn);
10220 else if (--i >= 0)
10221 insn = BB_HEAD (bbs[i]);
10222 else
10223 break;
10224 }
10225 free (bbs);
10226 /* One label for the loop top is normal, and it won't be duplicated by
10227 unrolling. */
10228 if (n_labels <= 1)
10229 return max_unrolled_insns;
10230 if (n_inner_loops > 0)
10231 return 0;
10232 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10233 dest = LABEL_NEXTREF (dest))
10234 {
10235 for (i = n_exit_dest - 1;
10236 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10237 if (i < 0)
10238 exit_dest[n_exit_dest++] = dest;
10239 }
10240 /* If the loop top and call and exit destinations are enough to fill up
10241 the target registers, we're unlikely to do any more damage by
10242 unrolling. */
10243 if (n_calls + n_exit_dest >= 7)
10244 return max_unrolled_insns;
10245
10246 /* ??? In the new loop unroller, there is no longer any strength
10247 reduction information available. Thus, when it comes to unrolling,
10248 we know the cost of everything, but we know the value of nothing. */
10249 #if 0
10250 if (strength_reduce_p
10251 && (unroll_type == LPT_UNROLL_RUNTIME
10252 || unroll_type == LPT_UNROLL_CONSTANT
10253 || unroll_type == LPT_PEEL_COMPLETELY))
10254 {
10255 struct loop_ivs *ivs = LOOP_IVS (loop);
10256 struct iv_class *bl;
10257
10258 /* We'll save one compare-and-branch in each loop body copy
10259 but the last one. */
10260 unroll_benefit = 1;
10261 /* Assess the benefit of removing biv & giv updates. */
10262 for (bl = ivs->list; bl; bl = bl->next)
10263 {
10264 rtx increment = biv_total_increment (bl);
10265 struct induction *v;
10266
10267 if (increment && GET_CODE (increment) == CONST_INT)
10268 {
10269 unroll_benefit++;
10270 for (v = bl->giv; v; v = v->next_iv)
10271 {
10272 if (! v->ignore && v->same == 0
10273 && GET_CODE (v->mult_val) == CONST_INT)
10274 unroll_benefit++;
10275 /* If this giv uses an array, try to determine
10276 a maximum iteration count from the size of the
10277 array. This need not be correct all the time,
10278 but should not be too far off the mark too often. */
10279 while (v->giv_type == DEST_ADDR)
10280 {
10281 rtx mem = PATTERN (v->insn);
10282 tree mem_expr, type, size_tree;
10283
10284 if (GET_CODE (SET_SRC (mem)) == MEM)
10285 mem = SET_SRC (mem);
10286 else if (GET_CODE (SET_DEST (mem)) == MEM)
10287 mem = SET_DEST (mem);
10288 else
10289 break;
10290 mem_expr = MEM_EXPR (mem);
10291 if (! mem_expr)
10292 break;
10293 type = TREE_TYPE (mem_expr);
10294 if (TREE_CODE (type) != ARRAY_TYPE
10295 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10296 break;
10297 size_tree = fold (build (TRUNC_DIV_EXPR,
10298 bitsizetype,
10299 TYPE_SIZE (type),
10300 TYPE_SIZE_UNIT (type)));
10301 if (TREE_CODE (size_tree) == INTEGER_CST
10302 && ! TREE_INT_CST_HIGH (size_tree)
10303 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10304 max_iterations = TREE_INT_CST_LOW (size_tree);
10305 break;
10306 }
10307 }
10308 }
10309 }
10310 }
10311 #else /* 0 */
10312 /* Assume there is at least some benefit. */
10313 unroll_benefit = 1;
10314 #endif /* 0 */
10315
10316 desc = get_simple_loop_desc (loop);
10317 n_iterations = desc->const_iter ? desc->niter : 0;
10318 max_iterations
10319 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10320
10321 if (! strength_reduce_p || ! n_iterations)
10322 need_precond = 1;
10323 if (! n_iterations)
10324 {
10325 n_iterations
10326 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10327 if (! n_iterations)
10328 return 0;
10329 }
10330 #if 0 /* ??? See above - missing induction variable information. */
10331 while (unroll_benefit > 1) /* no loop */
10332 {
10333 /* We include the benefit of biv/ giv updates. Check if some or
10334 all of these updates are likely to fit into a scheduling
10335 bubble of a load.
10336 We check for the following case:
10337 - All the insns leading to the first JUMP_INSN are in a strict
10338 dependency chain.
10339 - there is at least one memory reference in them.
10340
10341 When we find such a pattern, we assume that we can hide as many
10342 updates as the total of the load latency is, if we have an
10343 unroll factor of at least two. We might or might not also do
10344 this without unrolling, so rather than considering this as an
10345 extra unroll benefit, discount it in the unroll benefits of unroll
10346 factors higher than two. */
10347
10348 rtx set, last_set;
10349
10350 insn = next_active_insn (loop->start);
10351 last_set = single_set (insn);
10352 if (! last_set)
10353 break;
10354 if (GET_CODE (SET_SRC (last_set)) == MEM)
10355 mem_latency += 2;
10356 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10357 {
10358 if (! INSN_P (insn))
10359 continue;
10360 if (GET_CODE (insn) == JUMP_INSN)
10361 break;
10362 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10363 {
10364 /* Check if this is a to-be-reduced giv insn. */
10365 struct loop_ivs *ivs = LOOP_IVS (loop);
10366 struct iv_class *bl;
10367 struct induction *v;
10368 for (bl = ivs->list; bl; bl = bl->next)
10369 {
10370 if (bl->biv->insn == insn)
10371 goto is_biv;
10372 for (v = bl->giv; v; v = v->next_iv)
10373 if (v->insn == insn)
10374 goto is_giv;
10375 }
10376 mem_latency--;
10377 is_biv:
10378 is_giv:
10379 continue;
10380 }
10381 set = single_set (insn);
10382 if (! set)
10383 continue;
10384 if (GET_CODE (SET_SRC (set)) == MEM)
10385 mem_latency += 2;
10386 last_set = set;
10387 }
10388 if (mem_latency < 0)
10389 mem_latency = 0;
10390 else if (mem_latency > unroll_benefit - 1)
10391 mem_latency = unroll_benefit - 1;
10392 break;
10393 }
10394 #endif /* 0 */
10395 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10396 <= unroll_benefit)
10397 return max_unrolled_insns;
10398
10399 n_dest = n_labels + n_calls + n_exit_dest;
10400 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10401 best_cost = 0;
10402 best_factor = 1;
10403 if (n_barriers * 2 > n_labels - 1)
10404 n_barriers = (n_labels - 1) / 2;
10405 for (factor = 2; factor <= 8; factor++)
10406 {
10407 /* Bump up preconditioning cost for each power of two. */
10408 if (! (factor & (factor-1)))
10409 precond += 4;
10410 /* When preconditioning, only powers of two will be considered. */
10411 else if (need_precond)
10412 continue;
10413 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10414 + (n_labels - 1) * factor + n_calls + n_exit_dest
10415 - (n_barriers * factor >> 1)
10416 + need_precond);
10417 cost
10418 = ((n_dest <= 8 ? 0 : n_dest - 7)
10419 - base_cost * factor
10420 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10421 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10422 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10423 / n_iterations));
10424 if (need_precond)
10425 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10426 if (cost < best_cost)
10427 {
10428 best_cost = cost;
10429 best_factor = factor;
10430 }
10431 }
10432 threshold = best_factor * insn_count;
10433 if (max_unrolled_insns > threshold)
10434 max_unrolled_insns = threshold;
10435 }
10436 return max_unrolled_insns;
10437 }
10438 #endif /* TARGET_ADJUST_UNROLL_MAX */
10439
10440 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10441 not enter into CONST_DOUBLE for the replace.
10442
10443 Note that copying is not done so X must not be shared unless all copies
10444 are to be modified.
10445
10446 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10447 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10448 replacements[n*2+1] - and that we take mode changes into account.
10449
10450 If a replacement is ambiguous, return NULL_RTX.
10451
10452 If MODIFY is zero, don't modify any rtl in place,
10453 just return zero or nonzero for failure / success. */
10454
10455 rtx
replace_n_hard_rtx(rtx x,rtx * replacements,int n_replacements,int modify)10456 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10457 {
10458 int i, j;
10459 const char *fmt;
10460
10461 /* The following prevents loops occurrence when we change MEM in
10462 CONST_DOUBLE onto the same CONST_DOUBLE. */
10463 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10464 return x;
10465
10466 for (i = n_replacements - 1; i >= 0 ; i--)
10467 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10468 return replacements[i*2+1];
10469
10470 /* Allow this function to make replacements in EXPR_LISTs. */
10471 if (x == 0)
10472 return 0;
10473
10474 if (GET_CODE (x) == SUBREG)
10475 {
10476 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10477 n_replacements, modify);
10478
10479 if (GET_CODE (new) == CONST_INT)
10480 {
10481 x = simplify_subreg (GET_MODE (x), new,
10482 GET_MODE (SUBREG_REG (x)),
10483 SUBREG_BYTE (x));
10484 if (! x)
10485 abort ();
10486 }
10487 else if (modify)
10488 SUBREG_REG (x) = new;
10489
10490 return x;
10491 }
10492 else if (GET_CODE (x) == REG)
10493 {
10494 unsigned regno = REGNO (x);
10495 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10496 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10497 rtx result = NULL_RTX;
10498
10499 for (i = n_replacements - 1; i >= 0; i--)
10500 {
10501 rtx from = replacements[i*2];
10502 rtx to = replacements[i*2+1];
10503 unsigned from_regno, from_nregs, to_regno, new_regno;
10504
10505 if (GET_CODE (from) != REG)
10506 continue;
10507 from_regno = REGNO (from);
10508 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10509 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10510 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10511 {
10512 if (regno < from_regno
10513 || regno + nregs > from_regno + nregs
10514 || GET_CODE (to) != REG
10515 || result)
10516 return NULL_RTX;
10517 to_regno = REGNO (to);
10518 if (to_regno < FIRST_PSEUDO_REGISTER)
10519 {
10520 new_regno = regno + to_regno - from_regno;
10521 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10522 != nregs)
10523 return NULL_RTX;
10524 result = gen_rtx_REG (GET_MODE (x), new_regno);
10525 }
10526 else if (GET_MODE (x) <= GET_MODE (to))
10527 result = gen_lowpart_common (GET_MODE (x), to);
10528 else
10529 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10530 }
10531 }
10532 return result ? result : x;
10533 }
10534 else if (GET_CODE (x) == ZERO_EXTEND)
10535 {
10536 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10537 n_replacements, modify);
10538
10539 if (GET_CODE (new) == CONST_INT)
10540 {
10541 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10542 new, GET_MODE (XEXP (x, 0)));
10543 if (! x)
10544 abort ();
10545 }
10546 else if (modify)
10547 XEXP (x, 0) = new;
10548
10549 return x;
10550 }
10551
10552 fmt = GET_RTX_FORMAT (GET_CODE (x));
10553 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10554 {
10555 rtx new;
10556
10557 if (fmt[i] == 'e')
10558 {
10559 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10560 n_replacements, modify);
10561 if (!new)
10562 return NULL_RTX;
10563 if (modify)
10564 XEXP (x, i) = new;
10565 }
10566 else if (fmt[i] == 'E')
10567 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10568 {
10569 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10570 n_replacements, modify);
10571 if (!new)
10572 return NULL_RTX;
10573 if (modify)
10574 XVECEXP (x, i, j) = new;
10575 }
10576 }
10577
10578 return x;
10579 }
10580
10581 rtx
sh_gen_truncate(enum machine_mode mode,rtx x,int need_sign_ext)10582 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10583 {
10584 enum rtx_code code = TRUNCATE;
10585
10586 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10587 {
10588 rtx inner = XEXP (x, 0);
10589 enum machine_mode inner_mode = GET_MODE (inner);
10590
10591 if (inner_mode == mode)
10592 return inner;
10593 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10594 x = inner;
10595 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10596 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10597 {
10598 code = GET_CODE (x);
10599 x = inner;
10600 }
10601 }
10602 return gen_rtx_fmt_e (code, mode, x);
10603 }
10604
10605 /* called via for_each_rtx after reload, to clean up truncates of
10606 registers that span multiple actual hard registers. */
10607 int
shmedia_cleanup_truncate(rtx * p,void * n_changes)10608 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10609 {
10610 rtx x = *p, reg;
10611
10612 if (GET_CODE (x) != TRUNCATE)
10613 return 0;
10614 reg = XEXP (x, 0);
10615 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10616 {
10617 enum machine_mode reg_mode = GET_MODE (reg);
10618 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10619 subreg_lowpart_offset (DImode, reg_mode));
10620 *(int*) n_changes += 1;
10621 return -1;
10622 }
10623 return 0;
10624 }
10625
10626 /* Load and store depend on the highpart of the address. However,
10627 set_attr_alternative does not give well-defined results before reload,
10628 so we must look at the rtl ourselves to see if any of the feeding
10629 registers is used in a memref. */
10630
10631 /* Called by sh_contains_memref_p via for_each_rtx. */
10632 static int
sh_contains_memref_p_1(rtx * loc,void * data ATTRIBUTE_UNUSED)10633 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10634 {
10635 return (GET_CODE (*loc) == MEM);
10636 }
10637
10638 /* Return nonzero iff INSN contains a MEM. */
10639 int
sh_contains_memref_p(rtx insn)10640 sh_contains_memref_p (rtx insn)
10641 {
10642 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10643 }
10644
10645 /* FNADDR is the MEM expression from a call expander. Return an address
10646 to use in an SHmedia insn pattern. */
10647 rtx
shmedia_prepare_call_address(rtx fnaddr,int is_sibcall)10648 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10649 {
10650 int is_sym;
10651
10652 fnaddr = XEXP (fnaddr, 0);
10653 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10654 if (flag_pic && is_sym)
10655 {
10656 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10657 {
10658 rtx reg = gen_reg_rtx (Pmode);
10659
10660 /* We must not use GOTPLT for sibcalls, because PIC_REG
10661 must be restored before the PLT code gets to run. */
10662 if (is_sibcall)
10663 emit_insn (gen_symGOT2reg (reg, fnaddr));
10664 else
10665 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10666 fnaddr = reg;
10667 }
10668 else
10669 {
10670 fnaddr = gen_sym2PIC (fnaddr);
10671 PUT_MODE (fnaddr, Pmode);
10672 }
10673 }
10674 /* If ptabs might trap, make this visible to the rest of the compiler.
10675 We generally assume that symbols pertain to valid locations, but
10676 it is possible to generate invalid symbols with asm or linker tricks.
10677 In a list of functions where each returns its successor, an invalid
10678 symbol might denote an empty list. */
10679 if (!TARGET_PT_FIXED
10680 && (!is_sym || TARGET_INVALID_SYMBOLS)
10681 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10682 {
10683 rtx tr = gen_reg_rtx (PDImode);
10684
10685 emit_insn (gen_ptabs (tr, fnaddr));
10686 fnaddr = tr;
10687 }
10688 else if (! target_reg_operand (fnaddr, Pmode))
10689 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10690 return fnaddr;
10691 }
10692
10693 /* ??? insn-conditions.c contains the insn conditions from sh.md,
10694 but does not include tree.h. This is fixed in 4.2 20060127. */
10695 bool
sh_cfun_trap_exit_p(void)10696 sh_cfun_trap_exit_p (void)
10697 {
10698 return (lookup_attribute ("trap_exit",
10699 DECL_ATTRIBUTES (current_function_decl))
10700 == NULL_TREE);
10701 }
10702
10703 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
10704
10705 /* This defines the storage for the variable part of a -mboard= option.
10706 It is only required when using the sh-superh-elf target */
10707 #ifdef _SUPERH_H
10708 const char * boardtype = "7750p2";
10709 const char * osruntime = "bare";
10710 #endif
10711
10712 #include "gt-sh.h"
10713