1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
56 #include "alloc-pool.h"
57
58
59 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60
61 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
62 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63
64 /* These are some macros to abstract register modes. */
65 #define CONST_OK_FOR_ADD(size) \
66 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
67 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
68 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
69 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70
71 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
72 int current_function_interrupt;
73
74 tree sh_deferred_function_attributes;
75 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
76
77 /* Global variables for machine-dependent things. */
78
79 /* Which cpu are we scheduling for. */
80 enum processor_type sh_cpu;
81
82 /* Definitions used in ready queue reordering for first scheduling pass. */
83
84 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
85 static short *regmode_weight[2];
86
87 /* Total SFmode and SImode weights of scheduled insns. */
88 static int curr_regmode_pressure[2];
89
90 /* If true, skip cycles for Q -> R movement. */
91 static int skip_cycles = 0;
92
93 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
94 and returned from sh_reorder2. */
95 static short cached_can_issue_more;
96
97 /* Saved operands from the last compare to use when we generate an scc
98 or bcc insn. */
99
100 rtx sh_compare_op0;
101 rtx sh_compare_op1;
102
103 /* Provides the class number of the smallest class containing
104 reg number. */
105
106 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
107 {
108 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
144 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
145 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
146 GENERAL_REGS, GENERAL_REGS,
147 };
148
149 char sh_register_names[FIRST_PSEUDO_REGISTER] \
150 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
151
152 char sh_additional_register_names[ADDREGNAMES_SIZE] \
153 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
154 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
155
156 /* Provide reg_class from a letter such as appears in the machine
157 description. *: target independently reserved letter.
158 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
159
160 enum reg_class reg_class_from_letter[] =
161 {
162 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
163 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
164 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
165 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
166 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
167 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
168 /* y */ FPUL_REGS, /* z */ R0_REGS
169 };
170
171 int assembler_dialect;
172
173 static bool shmedia_space_reserved_for_target_registers;
174
175 static bool sh_handle_option (size_t, const char *, int);
176 static void split_branches (rtx);
177 static int branch_dest (rtx);
178 static void force_into (rtx, rtx);
179 static void print_slot (rtx);
180 static rtx add_constant (rtx, enum machine_mode, rtx);
181 static void dump_table (rtx, rtx);
182 static int hi_const (rtx);
183 static int broken_move (rtx);
184 static int mova_p (rtx);
185 static rtx find_barrier (int, rtx, rtx);
186 static int noncall_uses_reg (rtx, rtx, rtx *);
187 static rtx gen_block_redirect (rtx, int, int);
188 static void sh_reorg (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
190 static rtx frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET *, int);
194 static int calc_live_regs (HARD_REG_SET *);
195 static void mark_use (rtx, rtx *);
196 static HOST_WIDE_INT rounded_frame_size (int);
197 static rtx mark_constant_pool_use (rtx);
198 const struct attribute_spec sh_attribute_table[];
199 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static void sh_md_init_global (FILE *, int, int);
213 static void sh_md_finish_global (FILE *, int);
214 static int rank_for_reorder (const void *, const void *);
215 static void swap_reorder (rtx *, int);
216 static void ready_reorder (rtx *, int);
217 static short high_pressure (enum machine_mode);
218 static int sh_reorder (FILE *, int, rtx *, int *, int);
219 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
220 static void sh_md_init (FILE *, int, int);
221 static int sh_variable_issue (FILE *, int, rtx, int);
222
223 static bool sh_function_ok_for_sibcall (tree, tree);
224
225 static bool sh_cannot_modify_jumps_p (void);
226 static int sh_target_reg_class (void);
227 static bool sh_optimize_target_register_callee_saved (bool);
228 static bool sh_ms_bitfield_layout_p (tree);
229
230 static void sh_init_builtins (void);
231 static void sh_media_init_builtins (void);
232 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
233 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
234 static void sh_file_start (void);
235 static int flow_dependent_p (rtx, rtx);
236 static void flow_dependent_p_1 (rtx, rtx, void *);
237 static int shiftcosts (rtx);
238 static int andcosts (rtx);
239 static int addsubcosts (rtx);
240 static int multcosts (rtx);
241 static bool unspec_caller_rtx_p (rtx);
242 static bool sh_cannot_copy_insn_p (rtx);
243 static bool sh_rtx_costs (rtx, int, int, int *);
244 static int sh_address_cost (rtx);
245 #ifdef TARGET_ADJUST_UNROLL_MAX
246 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
247 #endif
248 static int sh_pr_n_sets (void);
249 static rtx sh_allocate_initial_value (rtx);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
257
258 static rtx sh_struct_value_rtx (tree, int);
259 static bool sh_return_in_memory (tree, tree);
260 static rtx sh_builtin_saveregs (void);
261 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
262 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
263 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
264 static tree sh_build_builtin_va_list (void);
265 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
266 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
267 tree, bool);
268 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
269 tree, bool);
270 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
271 tree, bool);
272 static int sh_dwarf_calling_convention (tree);
273 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
274
275
276 /* Initialize the GCC target structure. */
277 #undef TARGET_ATTRIBUTE_TABLE
278 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
279
280 /* The next two are used for debug info when compiling with -gdwarf. */
281 #undef TARGET_ASM_UNALIGNED_HI_OP
282 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
283 #undef TARGET_ASM_UNALIGNED_SI_OP
284 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
285
286 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
287 #undef TARGET_ASM_UNALIGNED_DI_OP
288 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
289 #undef TARGET_ASM_ALIGNED_DI_OP
290 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
291
292 #undef TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
294
295 #undef TARGET_ASM_OUTPUT_MI_THUNK
296 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
297
298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
300
301 #undef TARGET_ASM_FILE_START
302 #define TARGET_ASM_FILE_START sh_file_start
303 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
304 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
305
306 #undef TARGET_DEFAULT_TARGET_FLAGS
307 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
308 #undef TARGET_HANDLE_OPTION
309 #define TARGET_HANDLE_OPTION sh_handle_option
310
311 #undef TARGET_INSERT_ATTRIBUTES
312 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
313
314 #undef TARGET_SCHED_ADJUST_COST
315 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
316
317 #undef TARGET_SCHED_ISSUE_RATE
318 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
319
320 /* The next 5 hooks have been implemented for reenabling sched1. With the
321 help of these macros we are limiting the movement of insns in sched1 to
322 reduce the register pressure. The overall idea is to keep count of SImode
323 and SFmode regs required by already scheduled insns. When these counts
324 cross some threshold values; give priority to insns that free registers.
325 The insn that frees registers is most likely to be the insn with lowest
326 LUID (original insn order); but such an insn might be there in the stalled
327 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
328 upto a max of 8 cycles so that such insns may move from Q -> R.
329
330 The description of the hooks are as below:
331
332 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
333 scheduler; it is called inside the sched_init function just after
334 find_insn_reg_weights function call. It is used to calculate the SImode
335 and SFmode weights of insns of basic blocks; much similar to what
336 find_insn_reg_weights does.
337 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
338
339 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
340 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
341 (Q)->(R).
342
343 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
344 high; reorder the ready queue so that the insn with lowest LUID will be
345 issued next.
346
347 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
348 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
349
350 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
351 can be returned from TARGET_SCHED_REORDER2.
352
353 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
354
355 #undef TARGET_SCHED_DFA_NEW_CYCLE
356 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
357
358 #undef TARGET_SCHED_INIT_GLOBAL
359 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
360
361 #undef TARGET_SCHED_FINISH_GLOBAL
362 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
363
364 #undef TARGET_SCHED_VARIABLE_ISSUE
365 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
366
367 #undef TARGET_SCHED_REORDER
368 #define TARGET_SCHED_REORDER sh_reorder
369
370 #undef TARGET_SCHED_REORDER2
371 #define TARGET_SCHED_REORDER2 sh_reorder2
372
373 #undef TARGET_SCHED_INIT
374 #define TARGET_SCHED_INIT sh_md_init
375
376 #undef TARGET_CANNOT_MODIFY_JUMPS_P
377 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
378 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
379 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
380 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
381 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
382 sh_optimize_target_register_callee_saved
383
384 #undef TARGET_MS_BITFIELD_LAYOUT_P
385 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
386
387 #undef TARGET_INIT_BUILTINS
388 #define TARGET_INIT_BUILTINS sh_init_builtins
389 #undef TARGET_EXPAND_BUILTIN
390 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
391
392 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
393 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
394
395 #undef TARGET_CANNOT_COPY_INSN_P
396 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
397 #undef TARGET_RTX_COSTS
398 #define TARGET_RTX_COSTS sh_rtx_costs
399 #undef TARGET_ADDRESS_COST
400 #define TARGET_ADDRESS_COST sh_address_cost
401 #undef TARGET_ALLOCATE_INITIAL_VALUE
402 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
403
404 #undef TARGET_MACHINE_DEPENDENT_REORG
405 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
406
407 #ifdef HAVE_AS_TLS
408 #undef TARGET_HAVE_TLS
409 #define TARGET_HAVE_TLS true
410 #endif
411
412 #undef TARGET_PROMOTE_PROTOTYPES
413 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
414 #undef TARGET_PROMOTE_FUNCTION_ARGS
415 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
416 #undef TARGET_PROMOTE_FUNCTION_RETURN
417 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
418
419 #undef TARGET_STRUCT_VALUE_RTX
420 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
423
424 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
425 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
426 #undef TARGET_SETUP_INCOMING_VARARGS
427 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
428 #undef TARGET_STRICT_ARGUMENT_NAMING
429 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
430 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
431 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
432 #undef TARGET_MUST_PASS_IN_STACK
433 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
434 #undef TARGET_PASS_BY_REFERENCE
435 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
436 #undef TARGET_CALLEE_COPIES
437 #define TARGET_CALLEE_COPIES sh_callee_copies
438 #undef TARGET_ARG_PARTIAL_BYTES
439 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
440
441 #undef TARGET_BUILD_BUILTIN_VA_LIST
442 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
443 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
444 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
445
446 #undef TARGET_VECTOR_MODE_SUPPORTED_P
447 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
448
449 #undef TARGET_CHECK_PCH_TARGET_FLAGS
450 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
451
452 #undef TARGET_DWARF_CALLING_CONVENTION
453 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
454
455 /* Return regmode weight for insn. */
456 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
457
458 /* Return current register pressure for regmode. */
459 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
460
461 #ifdef SYMBIAN
462
463 #undef TARGET_ENCODE_SECTION_INFO
464 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
465 #undef TARGET_STRIP_NAME_ENCODING
466 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
467 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
468 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
469
470 #endif /* SYMBIAN */
471
472 #ifdef TARGET_ADJUST_UNROLL_MAX
473 #undef TARGET_ADJUST_UNROLL_MAX
474 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
475 #endif
476
477 #undef TARGET_SECONDARY_RELOAD
478 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
479
480 struct gcc_target targetm = TARGET_INITIALIZER;
481
482 /* Implement TARGET_HANDLE_OPTION. */
483
484 static bool
sh_handle_option(size_t code,const char * arg ATTRIBUTE_UNUSED,int value ATTRIBUTE_UNUSED)485 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
486 int value ATTRIBUTE_UNUSED)
487 {
488 switch (code)
489 {
490 case OPT_m1:
491 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
492 return true;
493
494 case OPT_m2:
495 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
496 return true;
497
498 case OPT_m2a:
499 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
500 return true;
501
502 case OPT_m2a_nofpu:
503 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
504 return true;
505
506 case OPT_m2a_single:
507 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
508 return true;
509
510 case OPT_m2a_single_only:
511 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
512 return true;
513
514 case OPT_m2e:
515 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
516 return true;
517
518 case OPT_m3:
519 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
520 return true;
521
522 case OPT_m3e:
523 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
524 return true;
525
526 case OPT_m4:
527 case OPT_m4_100:
528 case OPT_m4_200:
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
530 return true;
531
532 case OPT_m4_nofpu:
533 case OPT_m4_400:
534 case OPT_m4_500:
535 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
536 return true;
537
538 case OPT_m4_single:
539 case OPT_m4_100_single:
540 case OPT_m4_200_single:
541 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
542 return true;
543
544 case OPT_m4_single_only:
545 case OPT_m4_100_single_only:
546 case OPT_m4_200_single_only:
547 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
548 return true;
549
550 case OPT_m4a:
551 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
552 return true;
553
554 case OPT_m4a_nofpu:
555 case OPT_m4al:
556 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
557 return true;
558
559 case OPT_m4a_single:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
561 return true;
562
563 case OPT_m4a_single_only:
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
565 return true;
566
567 case OPT_m5_32media:
568 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
569 return true;
570
571 case OPT_m5_32media_nofpu:
572 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
573 return true;
574
575 case OPT_m5_64media:
576 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
577 return true;
578
579 case OPT_m5_64media_nofpu:
580 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
581 return true;
582
583 case OPT_m5_compact:
584 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
585 return true;
586
587 case OPT_m5_compact_nofpu:
588 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
589 return true;
590
591 default:
592 return true;
593 }
594 }
595
596 /* Print the operand address in x to the stream. */
597
598 void
print_operand_address(FILE * stream,rtx x)599 print_operand_address (FILE *stream, rtx x)
600 {
601 switch (GET_CODE (x))
602 {
603 case REG:
604 case SUBREG:
605 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
606 break;
607
608 case PLUS:
609 {
610 rtx base = XEXP (x, 0);
611 rtx index = XEXP (x, 1);
612
613 switch (GET_CODE (index))
614 {
615 case CONST_INT:
616 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
617 reg_names[true_regnum (base)]);
618 break;
619
620 case REG:
621 case SUBREG:
622 {
623 int base_num = true_regnum (base);
624 int index_num = true_regnum (index);
625
626 fprintf (stream, "@(r0,%s)",
627 reg_names[MAX (base_num, index_num)]);
628 break;
629 }
630
631 default:
632 gcc_unreachable ();
633 }
634 }
635 break;
636
637 case PRE_DEC:
638 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
639 break;
640
641 case POST_INC:
642 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
643 break;
644
645 default:
646 x = mark_constant_pool_use (x);
647 output_addr_const (stream, x);
648 break;
649 }
650 }
651
652 /* Print operand x (an rtx) in assembler syntax to file stream
653 according to modifier code.
654
655 '.' print a .s if insn needs delay slot
656 ',' print LOCAL_LABEL_PREFIX
657 '@' print trap, rte or rts depending upon pragma interruptness
658 '#' output a nop if there is nothing to put in the delay slot
659 ''' print likelihood suffix (/u for unlikely).
660 '>' print branch target if -fverbose-asm
661 'O' print a constant without the #
662 'R' print the LSW of a dp value - changes if in little endian
663 'S' print the MSW of a dp value - changes if in little endian
664 'T' print the next word of a dp value - same as 'R' in big endian mode.
665 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
666 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
667 'N' print 'r63' if the operand is (const_int 0).
668 'd' print a V2SF reg as dN instead of fpN.
669 'm' print a pair `base,offset' or `base,index', for LD and ST.
670 'U' Likewise for {LD,ST}{HI,LO}.
671 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
672 'o' output an operator. */
673
674 void
print_operand(FILE * stream,rtx x,int code)675 print_operand (FILE *stream, rtx x, int code)
676 {
677 int regno;
678 enum machine_mode mode;
679
680 switch (code)
681 {
682 tree trapa_attr;
683
684 case '.':
685 if (final_sequence
686 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
687 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
688 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
689 break;
690 case ',':
691 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
692 break;
693 case '@':
694 trapa_attr = lookup_attribute ("trap_exit",
695 DECL_ATTRIBUTES (current_function_decl));
696 if (trapa_attr)
697 fprintf (stream, "trapa #%ld",
698 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
699 else if (sh_cfun_interrupt_handler_p ())
700 fprintf (stream, "rte");
701 else
702 fprintf (stream, "rts");
703 break;
704 case '#':
705 /* Output a nop if there's nothing in the delay slot. */
706 if (dbr_sequence_length () == 0)
707 fprintf (stream, "\n\tnop");
708 break;
709 case '\'':
710 {
711 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
712
713 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
714 fputs ("/u", stream);
715 break;
716 }
717 case '>':
718 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
719 {
720 fputs ("\t! target: ", stream);
721 output_addr_const (stream, JUMP_LABEL (current_output_insn));
722 }
723 break;
724 case 'O':
725 x = mark_constant_pool_use (x);
726 output_addr_const (stream, x);
727 break;
728 /* N.B.: %R / %S / %T adjust memory addresses by four.
729 For SHMEDIA, that means they can be used to access the first and
730 second 32 bit part of a 64 bit (or larger) value that
731 might be held in floating point registers or memory.
732 While they can be used to access 64 bit parts of a larger value
733 held in general purpose registers, that won't work with memory -
734 neither for fp registers, since the frxx names are used. */
735 case 'R':
736 if (REG_P (x) || GET_CODE (x) == SUBREG)
737 {
738 regno = true_regnum (x);
739 regno += FP_REGISTER_P (regno) ? 1 : LSW;
740 fputs (reg_names[regno], (stream));
741 }
742 else if (MEM_P (x))
743 {
744 x = adjust_address (x, SImode, 4 * LSW);
745 print_operand_address (stream, XEXP (x, 0));
746 }
747 else
748 {
749 rtx sub = NULL_RTX;
750
751 mode = GET_MODE (x);
752 if (mode == VOIDmode)
753 mode = DImode;
754 if (GET_MODE_SIZE (mode) >= 8)
755 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
756 if (sub)
757 print_operand (stream, sub, 0);
758 else
759 output_operand_lossage ("invalid operand to %%R");
760 }
761 break;
762 case 'S':
763 if (REG_P (x) || GET_CODE (x) == SUBREG)
764 {
765 regno = true_regnum (x);
766 regno += FP_REGISTER_P (regno) ? 0 : MSW;
767 fputs (reg_names[regno], (stream));
768 }
769 else if (MEM_P (x))
770 {
771 x = adjust_address (x, SImode, 4 * MSW);
772 print_operand_address (stream, XEXP (x, 0));
773 }
774 else
775 {
776 rtx sub = NULL_RTX;
777
778 mode = GET_MODE (x);
779 if (mode == VOIDmode)
780 mode = DImode;
781 if (GET_MODE_SIZE (mode) >= 8)
782 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
783 if (sub)
784 print_operand (stream, sub, 0);
785 else
786 output_operand_lossage ("invalid operand to %%S");
787 }
788 break;
789 case 'T':
790 /* Next word of a double. */
791 switch (GET_CODE (x))
792 {
793 case REG:
794 fputs (reg_names[REGNO (x) + 1], (stream));
795 break;
796 case MEM:
797 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
798 && GET_CODE (XEXP (x, 0)) != POST_INC)
799 x = adjust_address (x, SImode, 4);
800 print_operand_address (stream, XEXP (x, 0));
801 break;
802 default:
803 break;
804 }
805 break;
806 case 'o':
807 switch (GET_CODE (x))
808 {
809 case PLUS: fputs ("add", stream); break;
810 case MINUS: fputs ("sub", stream); break;
811 case MULT: fputs ("mul", stream); break;
812 case DIV: fputs ("div", stream); break;
813 case EQ: fputs ("eq", stream); break;
814 case NE: fputs ("ne", stream); break;
815 case GT: case LT: fputs ("gt", stream); break;
816 case GE: case LE: fputs ("ge", stream); break;
817 case GTU: case LTU: fputs ("gtu", stream); break;
818 case GEU: case LEU: fputs ("geu", stream); break;
819 default:
820 break;
821 }
822 break;
823 case 'M':
824 if (TARGET_SHMEDIA)
825 {
826 if (GET_CODE (x) == MEM
827 && GET_CODE (XEXP (x, 0)) == PLUS
828 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
829 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
830 fputc ('x', stream);
831 }
832 else
833 {
834 if (GET_CODE (x) == MEM)
835 {
836 switch (GET_MODE (x))
837 {
838 case QImode: fputs (".b", stream); break;
839 case HImode: fputs (".w", stream); break;
840 case SImode: fputs (".l", stream); break;
841 case SFmode: fputs (".s", stream); break;
842 case DFmode: fputs (".d", stream); break;
843 default: gcc_unreachable ();
844 }
845 }
846 }
847 break;
848
849 case 'm':
850 gcc_assert (GET_CODE (x) == MEM);
851 x = XEXP (x, 0);
852 /* Fall through. */
853 case 'U':
854 switch (GET_CODE (x))
855 {
856 case REG:
857 case SUBREG:
858 print_operand (stream, x, 0);
859 fputs (", 0", stream);
860 break;
861
862 case PLUS:
863 print_operand (stream, XEXP (x, 0), 0);
864 fputs (", ", stream);
865 print_operand (stream, XEXP (x, 1), 0);
866 break;
867
868 default:
869 gcc_unreachable ();
870 }
871 break;
872
873 case 'd':
874 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
875
876 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
877 break;
878
879 case 'N':
880 if (x == CONST0_RTX (GET_MODE (x)))
881 {
882 fprintf ((stream), "r63");
883 break;
884 }
885 goto default_output;
886 case 'u':
887 if (GET_CODE (x) == CONST_INT)
888 {
889 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
890 break;
891 }
892 /* Fall through. */
893
894 default_output:
895 default:
896 regno = 0;
897 mode = GET_MODE (x);
898
899 switch (GET_CODE (x))
900 {
901 case TRUNCATE:
902 {
903 rtx inner = XEXP (x, 0);
904 int offset = 0;
905 enum machine_mode inner_mode;
906
907 /* We might see SUBREGs with vector mode registers inside. */
908 if (GET_CODE (inner) == SUBREG
909 && (GET_MODE_SIZE (GET_MODE (inner))
910 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
911 && subreg_lowpart_p (inner))
912 inner = SUBREG_REG (inner);
913 if (GET_CODE (inner) == CONST_INT)
914 {
915 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
916 goto default_output;
917 }
918 inner_mode = GET_MODE (inner);
919 if (GET_CODE (inner) == SUBREG
920 && (GET_MODE_SIZE (GET_MODE (inner))
921 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
922 && GET_CODE (SUBREG_REG (inner)) == REG)
923 {
924 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
925 GET_MODE (SUBREG_REG (inner)),
926 SUBREG_BYTE (inner),
927 GET_MODE (inner));
928 inner = SUBREG_REG (inner);
929 }
930 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
931 abort ();
932 /* Floating point register pairs are always big endian;
933 general purpose registers are 64 bit wide. */
934 regno = REGNO (inner);
935 regno = (HARD_REGNO_NREGS (regno, inner_mode)
936 - HARD_REGNO_NREGS (regno, mode))
937 + offset;
938 x = inner;
939 goto reg;
940 }
941 case SIGN_EXTEND:
942 x = XEXP (x, 0);
943 goto reg;
944 /* FIXME: We need this on SHmedia32 because reload generates
945 some sign-extended HI or QI loads into DImode registers
946 but, because Pmode is SImode, the address ends up with a
947 subreg:SI of the DImode register. Maybe reload should be
948 fixed so as to apply alter_subreg to such loads? */
949 case IF_THEN_ELSE:
950 gcc_assert (trapping_target_operand (x, VOIDmode));
951 x = XEXP (XEXP (x, 2), 0);
952 goto default_output;
953 case SUBREG:
954 gcc_assert (SUBREG_BYTE (x) == 0
955 && GET_CODE (SUBREG_REG (x)) == REG);
956
957 x = SUBREG_REG (x);
958 /* Fall through. */
959
960 reg:
961 case REG:
962 regno += REGNO (x);
963 if (FP_REGISTER_P (regno)
964 && mode == V16SFmode)
965 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
966 else if (FP_REGISTER_P (REGNO (x))
967 && mode == V4SFmode)
968 fprintf ((stream), "fv%s", reg_names[regno] + 2);
969 else if (GET_CODE (x) == REG
970 && mode == V2SFmode)
971 fprintf ((stream), "fp%s", reg_names[regno] + 2);
972 else if (FP_REGISTER_P (REGNO (x))
973 && GET_MODE_SIZE (mode) > 4)
974 fprintf ((stream), "d%s", reg_names[regno] + 1);
975 else
976 fputs (reg_names[regno], (stream));
977 break;
978
979 case MEM:
980 output_address (XEXP (x, 0));
981 break;
982
983 case CONST:
984 if (TARGET_SHMEDIA
985 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
986 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
987 && (GET_MODE (XEXP (x, 0)) == DImode
988 || GET_MODE (XEXP (x, 0)) == SImode)
989 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
990 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
991 {
992 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
993 rtx val2 = val;
994 bool nested_expr = false;
995
996 fputc ('(', stream);
997 if (GET_CODE (val) == ASHIFTRT)
998 {
999 fputc ('(', stream);
1000 val2 = XEXP (val, 0);
1001 }
1002 if (GET_CODE (val2) == CONST
1003 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
1004 {
1005 fputc ('(', stream);
1006 nested_expr = true;
1007 }
1008 output_addr_const (stream, val2);
1009 if (nested_expr)
1010 fputc (')', stream);
1011 if (GET_CODE (val) == ASHIFTRT)
1012 {
1013 fputs (" >> ", stream);
1014 output_addr_const (stream, XEXP (val, 1));
1015 fputc (')', stream);
1016 }
1017 fputs (" & 65535)", stream);
1018 break;
1019 }
1020
1021 /* Fall through. */
1022 default:
1023 if (TARGET_SH1)
1024 fputc ('#', stream);
1025 output_addr_const (stream, x);
1026 break;
1027 }
1028 break;
1029 }
1030 }
1031
1032 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1033 static void
force_into(rtx value,rtx target)1034 force_into (rtx value, rtx target)
1035 {
1036 value = force_operand (value, target);
1037 if (! rtx_equal_p (value, target))
1038 emit_insn (gen_move_insn (target, value));
1039 }
1040
1041 /* Emit code to perform a block move. Choose the best method.
1042
1043 OPERANDS[0] is the destination.
1044 OPERANDS[1] is the source.
1045 OPERANDS[2] is the size.
1046 OPERANDS[3] is the alignment safe to use. */
1047
1048 int
expand_block_move(rtx * operands)1049 expand_block_move (rtx *operands)
1050 {
1051 int align = INTVAL (operands[3]);
1052 int constp = (GET_CODE (operands[2]) == CONST_INT);
1053 int bytes = (constp ? INTVAL (operands[2]) : 0);
1054
1055 if (! constp)
1056 return 0;
1057
1058 /* If we could use mov.l to move words and dest is word-aligned, we
1059 can use movua.l for loads and still generate a relatively short
1060 and efficient sequence. */
1061 if (TARGET_SH4A_ARCH && align < 4
1062 && MEM_ALIGN (operands[0]) >= 32
1063 && can_move_by_pieces (bytes, 32))
1064 {
1065 rtx dest = copy_rtx (operands[0]);
1066 rtx src = copy_rtx (operands[1]);
1067 /* We could use different pseudos for each copied word, but
1068 since movua can only load into r0, it's kind of
1069 pointless. */
1070 rtx temp = gen_reg_rtx (SImode);
1071 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1072 int copied = 0;
1073
1074 while (copied + 4 <= bytes)
1075 {
1076 rtx to = adjust_address (dest, SImode, copied);
1077 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1078
1079 emit_insn (gen_movua (temp, from));
1080 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1081 emit_move_insn (to, temp);
1082 copied += 4;
1083 }
1084
1085 if (copied < bytes)
1086 move_by_pieces (adjust_address (dest, BLKmode, copied),
1087 adjust_automodify_address (src, BLKmode,
1088 src_addr, copied),
1089 bytes - copied, align, 0);
1090
1091 return 1;
1092 }
1093
1094 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1095 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1096 if (align < 4 || (bytes % 4 != 0))
1097 return 0;
1098
1099 if (TARGET_HARD_SH4)
1100 {
1101 if (bytes < 12)
1102 return 0;
1103 else if (bytes == 12)
1104 {
1105 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1106 rtx r4 = gen_rtx_REG (SImode, 4);
1107 rtx r5 = gen_rtx_REG (SImode, 5);
1108
1109 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1110 force_into (XEXP (operands[0], 0), r4);
1111 force_into (XEXP (operands[1], 0), r5);
1112 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1113 return 1;
1114 }
1115 else if (! TARGET_SMALLCODE)
1116 {
1117 const char *entry_name;
1118 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1119 int dwords;
1120 rtx r4 = gen_rtx_REG (SImode, 4);
1121 rtx r5 = gen_rtx_REG (SImode, 5);
1122 rtx r6 = gen_rtx_REG (SImode, 6);
1123
1124 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1125 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1126 force_into (XEXP (operands[0], 0), r4);
1127 force_into (XEXP (operands[1], 0), r5);
1128
1129 dwords = bytes >> 3;
1130 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1131 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1132 return 1;
1133 }
1134 else
1135 return 0;
1136 }
1137 if (bytes < 64)
1138 {
1139 char entry[30];
1140 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1141 rtx r4 = gen_rtx_REG (SImode, 4);
1142 rtx r5 = gen_rtx_REG (SImode, 5);
1143
1144 sprintf (entry, "__movmemSI%d", bytes);
1145 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1146 force_into (XEXP (operands[0], 0), r4);
1147 force_into (XEXP (operands[1], 0), r5);
1148 emit_insn (gen_block_move_real (func_addr_rtx));
1149 return 1;
1150 }
1151
1152 /* This is the same number of bytes as a memcpy call, but to a different
1153 less common function name, so this will occasionally use more space. */
1154 if (! TARGET_SMALLCODE)
1155 {
1156 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1157 int final_switch, while_loop;
1158 rtx r4 = gen_rtx_REG (SImode, 4);
1159 rtx r5 = gen_rtx_REG (SImode, 5);
1160 rtx r6 = gen_rtx_REG (SImode, 6);
1161
1162 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1163 force_into (XEXP (operands[0], 0), r4);
1164 force_into (XEXP (operands[1], 0), r5);
1165
1166 /* r6 controls the size of the move. 16 is decremented from it
1167 for each 64 bytes moved. Then the negative bit left over is used
1168 as an index into a list of move instructions. e.g., a 72 byte move
1169 would be set up with size(r6) = 14, for one iteration through the
1170 big while loop, and a switch of -2 for the last part. */
1171
1172 final_switch = 16 - ((bytes / 4) % 16);
1173 while_loop = ((bytes / 4) / 16 - 1) * 16;
1174 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1175 emit_insn (gen_block_lump_real (func_addr_rtx));
1176 return 1;
1177 }
1178
1179 return 0;
1180 }
1181
1182 /* Prepare operands for a move define_expand; specifically, one of the
1183 operands must be in a register. */
1184
1185 int
prepare_move_operands(rtx operands[],enum machine_mode mode)1186 prepare_move_operands (rtx operands[], enum machine_mode mode)
1187 {
1188 if ((mode == SImode || mode == DImode)
1189 && flag_pic
1190 && ! ((mode == Pmode || mode == ptr_mode)
1191 && tls_symbolic_operand (operands[1], Pmode) != 0))
1192 {
1193 rtx temp;
1194 if (SYMBOLIC_CONST_P (operands[1]))
1195 {
1196 if (GET_CODE (operands[0]) == MEM)
1197 operands[1] = force_reg (Pmode, operands[1]);
1198 else if (TARGET_SHMEDIA
1199 && GET_CODE (operands[1]) == LABEL_REF
1200 && target_reg_operand (operands[0], mode))
1201 /* It's ok. */;
1202 else
1203 {
1204 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1205 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1206 }
1207 }
1208 else if (GET_CODE (operands[1]) == CONST
1209 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1210 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1211 {
1212 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1213 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1214 mode, temp);
1215 operands[1] = expand_binop (mode, add_optab, temp,
1216 XEXP (XEXP (operands[1], 0), 1),
1217 no_new_pseudos ? temp
1218 : gen_reg_rtx (Pmode),
1219 0, OPTAB_LIB_WIDEN);
1220 }
1221 }
1222
1223 if (! reload_in_progress && ! reload_completed)
1224 {
1225 /* Copy the source to a register if both operands aren't registers. */
1226 if (! register_operand (operands[0], mode)
1227 && ! sh_register_operand (operands[1], mode))
1228 operands[1] = copy_to_mode_reg (mode, operands[1]);
1229
1230 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1231 {
1232 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1233 except that we can't use that function because it is static. */
1234 rtx new = change_address (operands[0], mode, 0);
1235 MEM_COPY_ATTRIBUTES (new, operands[0]);
1236 operands[0] = new;
1237 }
1238
1239 /* This case can happen while generating code to move the result
1240 of a library call to the target. Reject `st r0,@(rX,rY)' because
1241 reload will fail to find a spill register for rX, since r0 is already
1242 being used for the source. */
1243 else if (TARGET_SH1
1244 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1245 && GET_CODE (operands[0]) == MEM
1246 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1247 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1248 operands[1] = copy_to_mode_reg (mode, operands[1]);
1249 }
1250
1251 if (mode == Pmode || mode == ptr_mode)
1252 {
1253 rtx op0, op1, opc;
1254 enum tls_model tls_kind;
1255
1256 op0 = operands[0];
1257 op1 = operands[1];
1258 if (GET_CODE (op1) == CONST
1259 && GET_CODE (XEXP (op1, 0)) == PLUS
1260 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1261 {
1262 opc = XEXP (XEXP (op1, 0), 1);
1263 op1 = XEXP (XEXP (op1, 0), 0);
1264 }
1265 else
1266 opc = NULL_RTX;
1267
1268 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1269 {
1270 rtx tga_op1, tga_ret, tmp, tmp2;
1271
1272 switch (tls_kind)
1273 {
1274 case TLS_MODEL_GLOBAL_DYNAMIC:
1275 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1276 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1277 op1 = tga_ret;
1278 break;
1279
1280 case TLS_MODEL_LOCAL_DYNAMIC:
1281 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1282 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1283
1284 tmp = gen_reg_rtx (Pmode);
1285 emit_move_insn (tmp, tga_ret);
1286
1287 if (register_operand (op0, Pmode))
1288 tmp2 = op0;
1289 else
1290 tmp2 = gen_reg_rtx (Pmode);
1291
1292 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1293 op1 = tmp2;
1294 break;
1295
1296 case TLS_MODEL_INITIAL_EXEC:
1297 if (! flag_pic)
1298 {
1299 /* Don't schedule insns for getting GOT address when
1300 the first scheduling is enabled, to avoid spill
1301 failures for R0. */
1302 if (flag_schedule_insns)
1303 emit_insn (gen_blockage ());
1304 emit_insn (gen_GOTaddr2picreg ());
1305 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1306 PIC_REG)));
1307 if (flag_schedule_insns)
1308 emit_insn (gen_blockage ());
1309 }
1310 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1311 tmp = gen_sym2GOTTPOFF (op1);
1312 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1313 op1 = tga_op1;
1314 break;
1315
1316 case TLS_MODEL_LOCAL_EXEC:
1317 tmp2 = gen_reg_rtx (Pmode);
1318 emit_insn (gen_load_gbr (tmp2));
1319 tmp = gen_reg_rtx (Pmode);
1320 emit_insn (gen_symTPOFF2reg (tmp, op1));
1321
1322 if (register_operand (op0, Pmode))
1323 op1 = op0;
1324 else
1325 op1 = gen_reg_rtx (Pmode);
1326
1327 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1328 break;
1329
1330 default:
1331 gcc_unreachable ();
1332 }
1333 if (opc)
1334 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1335 operands[1] = op1;
1336 }
1337 }
1338
1339 return 0;
1340 }
1341
1342 /* Prepare the operands for an scc instruction; make sure that the
1343 compare has been done. */
1344 rtx
prepare_scc_operands(enum rtx_code code)1345 prepare_scc_operands (enum rtx_code code)
1346 {
1347 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1348 enum rtx_code oldcode = code;
1349 enum machine_mode mode;
1350
1351 /* First need a compare insn. */
1352 switch (code)
1353 {
1354 case NE:
1355 /* It isn't possible to handle this case. */
1356 gcc_unreachable ();
1357 case LT:
1358 code = GT;
1359 break;
1360 case LE:
1361 code = GE;
1362 break;
1363 case LTU:
1364 code = GTU;
1365 break;
1366 case LEU:
1367 code = GEU;
1368 break;
1369 default:
1370 break;
1371 }
1372 if (code != oldcode)
1373 {
1374 rtx tmp = sh_compare_op0;
1375 sh_compare_op0 = sh_compare_op1;
1376 sh_compare_op1 = tmp;
1377 }
1378
1379 mode = GET_MODE (sh_compare_op0);
1380 if (mode == VOIDmode)
1381 mode = GET_MODE (sh_compare_op1);
1382
1383 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1384 if ((code != EQ && code != NE
1385 && (sh_compare_op1 != const0_rtx
1386 || code == GTU || code == GEU || code == LTU || code == LEU))
1387 || (mode == DImode && sh_compare_op1 != const0_rtx)
1388 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1389 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1390
1391 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1392 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1393 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1394 gen_rtx_SET (VOIDmode, t_reg,
1395 gen_rtx_fmt_ee (code, SImode,
1396 sh_compare_op0, sh_compare_op1)),
1397 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1398 else
1399 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1400 gen_rtx_fmt_ee (code, SImode,
1401 sh_compare_op0, sh_compare_op1)));
1402
1403 return t_reg;
1404 }
1405
1406 /* Called from the md file, set up the operands of a compare instruction. */
1407
1408 void
from_compare(rtx * operands,int code)1409 from_compare (rtx *operands, int code)
1410 {
1411 enum machine_mode mode = GET_MODE (sh_compare_op0);
1412 rtx insn;
1413 if (mode == VOIDmode)
1414 mode = GET_MODE (sh_compare_op1);
1415 if (code != EQ
1416 || mode == DImode
1417 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1418 {
1419 /* Force args into regs, since we can't use constants here. */
1420 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1421 if (sh_compare_op1 != const0_rtx
1422 || code == GTU || code == GEU
1423 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1424 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1425 }
1426 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1427 {
1428 from_compare (operands, GT);
1429 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1430 }
1431 else
1432 insn = gen_rtx_SET (VOIDmode,
1433 gen_rtx_REG (SImode, T_REG),
1434 gen_rtx_fmt_ee (code, SImode,
1435 sh_compare_op0, sh_compare_op1));
1436 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1437 {
1438 insn = gen_rtx_PARALLEL (VOIDmode,
1439 gen_rtvec (2, insn,
1440 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1441 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1442 }
1443 else
1444 emit_insn (insn);
1445 }
1446
1447 /* Functions to output assembly code. */
1448
1449 /* Return a sequence of instructions to perform DI or DF move.
1450
1451 Since the SH cannot move a DI or DF in one instruction, we have
1452 to take care when we see overlapping source and dest registers. */
1453
1454 const char *
output_movedouble(rtx insn ATTRIBUTE_UNUSED,rtx operands[],enum machine_mode mode)1455 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1456 enum machine_mode mode)
1457 {
1458 rtx dst = operands[0];
1459 rtx src = operands[1];
1460
1461 if (GET_CODE (dst) == MEM
1462 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1463 return "mov.l %T1,%0\n\tmov.l %1,%0";
1464
1465 if (register_operand (dst, mode)
1466 && register_operand (src, mode))
1467 {
1468 if (REGNO (src) == MACH_REG)
1469 return "sts mach,%S0\n\tsts macl,%R0";
1470
1471 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1472 when mov.d r1,r0 do r1->r0 then r2->r1. */
1473
1474 if (REGNO (src) + 1 == REGNO (dst))
1475 return "mov %T1,%T0\n\tmov %1,%0";
1476 else
1477 return "mov %1,%0\n\tmov %T1,%T0";
1478 }
1479 else if (GET_CODE (src) == CONST_INT)
1480 {
1481 if (INTVAL (src) < 0)
1482 output_asm_insn ("mov #-1,%S0", operands);
1483 else
1484 output_asm_insn ("mov #0,%S0", operands);
1485
1486 return "mov %1,%R0";
1487 }
1488 else if (GET_CODE (src) == MEM)
1489 {
1490 int ptrreg = -1;
1491 int dreg = REGNO (dst);
1492 rtx inside = XEXP (src, 0);
1493
1494 switch (GET_CODE (inside))
1495 {
1496 case REG:
1497 ptrreg = REGNO (inside);
1498 break;
1499
1500 case SUBREG:
1501 ptrreg = subreg_regno (inside);
1502 break;
1503
1504 case PLUS:
1505 ptrreg = REGNO (XEXP (inside, 0));
1506 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1507 an offsettable address. Unfortunately, offsettable addresses use
1508 QImode to check the offset, and a QImode offsettable address
1509 requires r0 for the other operand, which is not currently
1510 supported, so we can't use the 'o' constraint.
1511 Thus we must check for and handle r0+REG addresses here.
1512 We punt for now, since this is likely very rare. */
1513 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1514 break;
1515
1516 case LABEL_REF:
1517 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1518 case POST_INC:
1519 return "mov.l %1,%0\n\tmov.l %1,%T0";
1520 default:
1521 gcc_unreachable ();
1522 }
1523
1524 /* Work out the safe way to copy. Copy into the second half first. */
1525 if (dreg == ptrreg)
1526 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1527 }
1528
1529 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1530 }
1531
1532 /* Print an instruction which would have gone into a delay slot after
1533 another instruction, but couldn't because the other instruction expanded
1534 into a sequence where putting the slot insn at the end wouldn't work. */
1535
1536 static void
print_slot(rtx insn)1537 print_slot (rtx insn)
1538 {
1539 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1540
1541 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1542 }
1543
1544 const char *
output_far_jump(rtx insn,rtx op)1545 output_far_jump (rtx insn, rtx op)
1546 {
1547 struct { rtx lab, reg, op; } this;
1548 rtx braf_base_lab = NULL_RTX;
1549 const char *jump;
1550 int far;
1551 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1552 rtx prev;
1553
1554 this.lab = gen_label_rtx ();
1555
1556 if (TARGET_SH2
1557 && offset >= -32764
1558 && offset - get_attr_length (insn) <= 32766)
1559 {
1560 far = 0;
1561 jump = "mov.w %O0,%1; braf %1";
1562 }
1563 else
1564 {
1565 far = 1;
1566 if (flag_pic)
1567 {
1568 if (TARGET_SH2)
1569 jump = "mov.l %O0,%1; braf %1";
1570 else
1571 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1572 }
1573 else
1574 jump = "mov.l %O0,%1; jmp @%1";
1575 }
1576 /* If we have a scratch register available, use it. */
1577 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1578 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1579 {
1580 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1581 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1582 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1583 output_asm_insn (jump, &this.lab);
1584 if (dbr_sequence_length ())
1585 print_slot (final_sequence);
1586 else
1587 output_asm_insn ("nop", 0);
1588 }
1589 else
1590 {
1591 /* Output the delay slot insn first if any. */
1592 if (dbr_sequence_length ())
1593 print_slot (final_sequence);
1594
1595 this.reg = gen_rtx_REG (SImode, 13);
1596 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1597 Fortunately, MACL is fixed and call-clobbered, and we never
1598 need its value across jumps, so save r13 in it instead of in
1599 the stack. */
1600 if (TARGET_SH5)
1601 output_asm_insn ("lds r13, macl", 0);
1602 else
1603 output_asm_insn ("mov.l r13,@-r15", 0);
1604 output_asm_insn (jump, &this.lab);
1605 if (TARGET_SH5)
1606 output_asm_insn ("sts macl, r13", 0);
1607 else
1608 output_asm_insn ("mov.l @r15+,r13", 0);
1609 }
1610 if (far && flag_pic && TARGET_SH2)
1611 {
1612 braf_base_lab = gen_label_rtx ();
1613 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1614 CODE_LABEL_NUMBER (braf_base_lab));
1615 }
1616 if (far)
1617 output_asm_insn (".align 2", 0);
1618 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1619 this.op = op;
1620 if (far && flag_pic)
1621 {
1622 if (TARGET_SH2)
1623 this.lab = braf_base_lab;
1624 output_asm_insn (".long %O2-%O0", &this.lab);
1625 }
1626 else
1627 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1628 return "";
1629 }
1630
1631 /* Local label counter, used for constants in the pool and inside
1632 pattern branches. */
1633
1634 static int lf = 100;
1635
1636 /* Output code for ordinary branches. */
1637
1638 const char *
output_branch(int logic,rtx insn,rtx * operands)1639 output_branch (int logic, rtx insn, rtx *operands)
1640 {
1641 switch (get_attr_length (insn))
1642 {
1643 case 6:
1644 /* This can happen if filling the delay slot has caused a forward
1645 branch to exceed its range (we could reverse it, but only
1646 when we know we won't overextend other branches; this should
1647 best be handled by relaxation).
1648 It can also happen when other condbranches hoist delay slot insn
1649 from their destination, thus leading to code size increase.
1650 But the branch will still be in the range -4092..+4098 bytes. */
1651
1652 if (! TARGET_RELAX)
1653 {
1654 int label = lf++;
1655 /* The call to print_slot will clobber the operands. */
1656 rtx op0 = operands[0];
1657
1658 /* If the instruction in the delay slot is annulled (true), then
1659 there is no delay slot where we can put it now. The only safe
1660 place for it is after the label. final will do that by default. */
1661
1662 if (final_sequence
1663 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1664 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1665 {
1666 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1667 ASSEMBLER_DIALECT ? "/" : ".", label);
1668 print_slot (final_sequence);
1669 }
1670 else
1671 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1672
1673 output_asm_insn ("bra\t%l0", &op0);
1674 fprintf (asm_out_file, "\tnop\n");
1675 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1676
1677 return "";
1678 }
1679 /* When relaxing, handle this like a short branch. The linker
1680 will fix it up if it still doesn't fit after relaxation. */
1681 case 2:
1682 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1683
1684 /* These are for SH2e, in which we have to account for the
1685 extra nop because of the hardware bug in annulled branches. */
1686 case 8:
1687 if (! TARGET_RELAX)
1688 {
1689 int label = lf++;
1690
1691 gcc_assert (!final_sequence
1692 || !(INSN_ANNULLED_BRANCH_P
1693 (XVECEXP (final_sequence, 0, 0))));
1694 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1695 logic ? "f" : "t",
1696 ASSEMBLER_DIALECT ? "/" : ".", label);
1697 fprintf (asm_out_file, "\tnop\n");
1698 output_asm_insn ("bra\t%l0", operands);
1699 fprintf (asm_out_file, "\tnop\n");
1700 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1701
1702 return "";
1703 }
1704 /* When relaxing, fall through. */
1705 case 4:
1706 {
1707 char buffer[10];
1708
1709 sprintf (buffer, "b%s%ss\t%%l0",
1710 logic ? "t" : "f",
1711 ASSEMBLER_DIALECT ? "/" : ".");
1712 output_asm_insn (buffer, &operands[0]);
1713 return "nop";
1714 }
1715
1716 default:
1717 /* There should be no longer branches now - that would
1718 indicate that something has destroyed the branches set
1719 up in machine_dependent_reorg. */
1720 gcc_unreachable ();
1721 }
1722 }
1723
1724 const char *
output_branchy_insn(enum rtx_code code,const char * template,rtx insn,rtx * operands)1725 output_branchy_insn (enum rtx_code code, const char *template,
1726 rtx insn, rtx *operands)
1727 {
1728 rtx next_insn = NEXT_INSN (insn);
1729
1730 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1731 {
1732 rtx src = SET_SRC (PATTERN (next_insn));
1733 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1734 {
1735 /* Following branch not taken */
1736 operands[9] = gen_label_rtx ();
1737 emit_label_after (operands[9], next_insn);
1738 INSN_ADDRESSES_NEW (operands[9],
1739 INSN_ADDRESSES (INSN_UID (next_insn))
1740 + get_attr_length (next_insn));
1741 return template;
1742 }
1743 else
1744 {
1745 int offset = (branch_dest (next_insn)
1746 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1747 if (offset >= -252 && offset <= 258)
1748 {
1749 if (GET_CODE (src) == IF_THEN_ELSE)
1750 /* branch_true */
1751 src = XEXP (src, 1);
1752 operands[9] = src;
1753 return template;
1754 }
1755 }
1756 }
1757 operands[9] = gen_label_rtx ();
1758 emit_label_after (operands[9], insn);
1759 INSN_ADDRESSES_NEW (operands[9],
1760 INSN_ADDRESSES (INSN_UID (insn))
1761 + get_attr_length (insn));
1762 return template;
1763 }
1764
1765 const char *
output_ieee_ccmpeq(rtx insn,rtx * operands)1766 output_ieee_ccmpeq (rtx insn, rtx *operands)
1767 {
1768 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1769 insn, operands);
1770 }
1771
1772 /* Output the start of the assembler file. */
1773
1774 static void
sh_file_start(void)1775 sh_file_start (void)
1776 {
1777 default_file_start ();
1778
1779 #ifdef SYMBIAN
1780 /* Declare the .directive section before it is used. */
1781 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1782 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1783 #endif
1784
1785 if (TARGET_ELF)
1786 /* We need to show the text section with the proper
1787 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1788 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1789 will complain. We can teach GAS specifically about the
1790 default attributes for our choice of text section, but
1791 then we would have to change GAS again if/when we change
1792 the text section name. */
1793 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1794 else
1795 /* Switch to the data section so that the coffsem symbol
1796 isn't in the text section. */
1797 switch_to_section (data_section);
1798
1799 if (TARGET_LITTLE_ENDIAN)
1800 fputs ("\t.little\n", asm_out_file);
1801
1802 if (!TARGET_ELF)
1803 {
1804 if (TARGET_SHCOMPACT)
1805 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1806 else if (TARGET_SHMEDIA)
1807 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1808 TARGET_SHMEDIA64 ? 64 : 32);
1809 }
1810 }
1811
1812 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1813
1814 static bool
unspec_caller_rtx_p(rtx pat)1815 unspec_caller_rtx_p (rtx pat)
1816 {
1817 switch (GET_CODE (pat))
1818 {
1819 case CONST:
1820 return unspec_caller_rtx_p (XEXP (pat, 0));
1821 case PLUS:
1822 case MINUS:
1823 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1824 return true;
1825 return unspec_caller_rtx_p (XEXP (pat, 1));
1826 case UNSPEC:
1827 if (XINT (pat, 1) == UNSPEC_CALLER)
1828 return true;
1829 default:
1830 break;
1831 }
1832
1833 return false;
1834 }
1835
1836 /* Indicate that INSN cannot be duplicated. This is true for insn
1837 that generates a unique label. */
1838
1839 static bool
sh_cannot_copy_insn_p(rtx insn)1840 sh_cannot_copy_insn_p (rtx insn)
1841 {
1842 rtx pat;
1843
1844 if (!reload_completed || !flag_pic)
1845 return false;
1846
1847 if (GET_CODE (insn) != INSN)
1848 return false;
1849 if (asm_noperands (insn) >= 0)
1850 return false;
1851
1852 pat = PATTERN (insn);
1853 if (GET_CODE (pat) != SET)
1854 return false;
1855 pat = SET_SRC (pat);
1856
1857 if (unspec_caller_rtx_p (pat))
1858 return true;
1859
1860 return false;
1861 }
1862
1863 /* Actual number of instructions used to make a shift by N. */
1864 static const char ashiftrt_insns[] =
1865 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1866
1867 /* Left shift and logical right shift are the same. */
1868 static const char shift_insns[] =
1869 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1870
1871 /* Individual shift amounts needed to get the above length sequences.
1872 One bit right shifts clobber the T bit, so when possible, put one bit
1873 shifts in the middle of the sequence, so the ends are eligible for
1874 branch delay slots. */
1875 static const short shift_amounts[32][5] = {
1876 {0}, {1}, {2}, {2, 1},
1877 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1878 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1879 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1880 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1881 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1882 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1883 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1884
1885 /* Likewise, but for shift amounts < 16, up to three highmost bits
1886 might be clobbered. This is typically used when combined with some
1887 kind of sign or zero extension. */
1888
1889 static const char ext_shift_insns[] =
1890 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1891
1892 static const short ext_shift_amounts[32][4] = {
1893 {0}, {1}, {2}, {2, 1},
1894 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1895 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1896 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1897 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1898 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1899 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1900 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1901
1902 /* Assuming we have a value that has been sign-extended by at least one bit,
1903 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1904 to shift it by N without data loss, and quicker than by other means? */
1905 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1906
1907 /* This is used in length attributes in sh.md to help compute the length
1908 of arbitrary constant shift instructions. */
1909
1910 int
shift_insns_rtx(rtx insn)1911 shift_insns_rtx (rtx insn)
1912 {
1913 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1914 int shift_count = INTVAL (XEXP (set_src, 1));
1915 enum rtx_code shift_code = GET_CODE (set_src);
1916
1917 switch (shift_code)
1918 {
1919 case ASHIFTRT:
1920 return ashiftrt_insns[shift_count];
1921 case LSHIFTRT:
1922 case ASHIFT:
1923 return shift_insns[shift_count];
1924 default:
1925 gcc_unreachable ();
1926 }
1927 }
1928
1929 /* Return the cost of a shift. */
1930
1931 static inline int
shiftcosts(rtx x)1932 shiftcosts (rtx x)
1933 {
1934 int value;
1935
1936 if (TARGET_SHMEDIA)
1937 return 1;
1938
1939 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1940 {
1941 if (GET_MODE (x) == DImode
1942 && GET_CODE (XEXP (x, 1)) == CONST_INT
1943 && INTVAL (XEXP (x, 1)) == 1)
1944 return 2;
1945
1946 /* Everything else is invalid, because there is no pattern for it. */
1947 return MAX_COST;
1948 }
1949 /* If shift by a non constant, then this will be expensive. */
1950 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1951 return SH_DYNAMIC_SHIFT_COST;
1952
1953 value = INTVAL (XEXP (x, 1));
1954
1955 /* Otherwise, return the true cost in instructions. */
1956 if (GET_CODE (x) == ASHIFTRT)
1957 {
1958 int cost = ashiftrt_insns[value];
1959 /* If SH3, then we put the constant in a reg and use shad. */
1960 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1961 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1962 return cost;
1963 }
1964 else
1965 return shift_insns[value];
1966 }
1967
1968 /* Return the cost of an AND operation. */
1969
1970 static inline int
andcosts(rtx x)1971 andcosts (rtx x)
1972 {
1973 int i;
1974
1975 /* Anding with a register is a single cycle and instruction. */
1976 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1977 return 1;
1978
1979 i = INTVAL (XEXP (x, 1));
1980
1981 if (TARGET_SHMEDIA)
1982 {
1983 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1984 && (CONST_OK_FOR_I10 (INTVAL (XEXP (x, 1)))
1985 || CONST_OK_FOR_J16 (INTVAL (XEXP (x, 1)))))
1986 return 1;
1987 else
1988 return 1 + rtx_cost (XEXP (x, 1), AND);
1989 }
1990
1991 /* These constants are single cycle extu.[bw] instructions. */
1992 if (i == 0xff || i == 0xffff)
1993 return 1;
1994 /* Constants that can be used in an and immediate instruction in a single
1995 cycle, but this requires r0, so make it a little more expensive. */
1996 if (CONST_OK_FOR_K08 (i))
1997 return 2;
1998 /* Constants that can be loaded with a mov immediate and an and.
1999 This case is probably unnecessary. */
2000 if (CONST_OK_FOR_I08 (i))
2001 return 2;
2002 /* Any other constants requires a 2 cycle pc-relative load plus an and.
2003 This case is probably unnecessary. */
2004 return 3;
2005 }
2006
2007 /* Return the cost of an addition or a subtraction. */
2008
2009 static inline int
addsubcosts(rtx x)2010 addsubcosts (rtx x)
2011 {
2012 /* Adding a register is a single cycle insn. */
2013 if (GET_CODE (XEXP (x, 1)) == REG
2014 || GET_CODE (XEXP (x, 1)) == SUBREG)
2015 return 1;
2016
2017 /* Likewise for small constants. */
2018 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2019 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2020 return 1;
2021
2022 if (TARGET_SHMEDIA)
2023 switch (GET_CODE (XEXP (x, 1)))
2024 {
2025 case CONST:
2026 case LABEL_REF:
2027 case SYMBOL_REF:
2028 return TARGET_SHMEDIA64 ? 5 : 3;
2029
2030 case CONST_INT:
2031 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2032 return 2;
2033 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2034 return 3;
2035 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2036 return 4;
2037
2038 /* Fall through. */
2039 default:
2040 return 5;
2041 }
2042
2043 /* Any other constant requires a 2 cycle pc-relative load plus an
2044 addition. */
2045 return 3;
2046 }
2047
2048 /* Return the cost of a multiply. */
2049 static inline int
multcosts(rtx x ATTRIBUTE_UNUSED)2050 multcosts (rtx x ATTRIBUTE_UNUSED)
2051 {
2052 if (sh_multcost >= 0)
2053 return sh_multcost;
2054 if (TARGET_SHMEDIA)
2055 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2056 accept constants. Ideally, we would use a cost of one or two and
2057 add the cost of the operand, but disregard the latter when inside loops
2058 and loop invariant code motion is still to follow.
2059 Using a multiply first and splitting it later if it's a loss
2060 doesn't work because of different sign / zero extension semantics
2061 of multiplies vs. shifts. */
2062 return TARGET_SMALLCODE ? 2 : 3;
2063
2064 if (TARGET_SH2)
2065 {
2066 /* We have a mul insn, so we can never take more than the mul and the
2067 read of the mac reg, but count more because of the latency and extra
2068 reg usage. */
2069 if (TARGET_SMALLCODE)
2070 return 2;
2071 return 3;
2072 }
2073
2074 /* If we're aiming at small code, then just count the number of
2075 insns in a multiply call sequence. */
2076 if (TARGET_SMALLCODE)
2077 return 5;
2078
2079 /* Otherwise count all the insns in the routine we'd be calling too. */
2080 return 20;
2081 }
2082
2083 /* Compute a (partial) cost for rtx X. Return true if the complete
2084 cost has been computed, and false if subexpressions should be
2085 scanned. In either case, *TOTAL contains the cost result. */
2086
2087 static bool
sh_rtx_costs(rtx x,int code,int outer_code,int * total)2088 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2089 {
2090 switch (code)
2091 {
2092 case CONST_INT:
2093 if (TARGET_SHMEDIA)
2094 {
2095 if (INTVAL (x) == 0)
2096 *total = 0;
2097 else if (outer_code == AND && and_operand ((x), DImode))
2098 *total = 0;
2099 else if ((outer_code == IOR || outer_code == XOR
2100 || outer_code == PLUS)
2101 && CONST_OK_FOR_I10 (INTVAL (x)))
2102 *total = 0;
2103 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2104 *total = COSTS_N_INSNS (outer_code != SET);
2105 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2106 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2107 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2108 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2109 else
2110 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2111 return true;
2112 }
2113 if (CONST_OK_FOR_I08 (INTVAL (x)))
2114 *total = 0;
2115 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2116 && CONST_OK_FOR_K08 (INTVAL (x)))
2117 *total = 1;
2118 else
2119 *total = 8;
2120 return true;
2121
2122 case CONST:
2123 case LABEL_REF:
2124 case SYMBOL_REF:
2125 if (TARGET_SHMEDIA64)
2126 *total = COSTS_N_INSNS (4);
2127 else if (TARGET_SHMEDIA32)
2128 *total = COSTS_N_INSNS (2);
2129 else
2130 *total = 5;
2131 return true;
2132
2133 case CONST_DOUBLE:
2134 if (TARGET_SHMEDIA)
2135 *total = COSTS_N_INSNS (4);
2136 else
2137 *total = 10;
2138 return true;
2139 case CONST_VECTOR:
2140 if (x == CONST0_RTX (GET_MODE (x)))
2141 *total = 0;
2142 else if (sh_1el_vec (x, VOIDmode))
2143 *total = outer_code != SET;
2144 if (sh_rep_vec (x, VOIDmode))
2145 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2146 + (outer_code != SET));
2147 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2148 return true;
2149
2150 case PLUS:
2151 case MINUS:
2152 *total = COSTS_N_INSNS (addsubcosts (x));
2153 return true;
2154
2155 case AND:
2156 *total = COSTS_N_INSNS (andcosts (x));
2157 return true;
2158
2159 case MULT:
2160 *total = COSTS_N_INSNS (multcosts (x));
2161 return true;
2162
2163 case ASHIFT:
2164 case ASHIFTRT:
2165 case LSHIFTRT:
2166 *total = COSTS_N_INSNS (shiftcosts (x));
2167 return true;
2168
2169 case DIV:
2170 case UDIV:
2171 case MOD:
2172 case UMOD:
2173 *total = COSTS_N_INSNS (20);
2174 return true;
2175
2176 case PARALLEL:
2177 if (sh_1el_vec (x, VOIDmode))
2178 *total = outer_code != SET;
2179 if (sh_rep_vec (x, VOIDmode))
2180 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2181 + (outer_code != SET));
2182 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2183 return true;
2184
2185 case FLOAT:
2186 case FIX:
2187 *total = 100;
2188 return true;
2189
2190 default:
2191 return false;
2192 }
2193 }
2194
2195 /* Compute the cost of an address. For the SH, all valid addresses are
2196 the same cost. Use a slightly higher cost for reg + reg addressing,
2197 since it increases pressure on r0. */
2198
2199 static int
sh_address_cost(rtx X)2200 sh_address_cost (rtx X)
2201 {
2202 return (GET_CODE (X) == PLUS
2203 && ! CONSTANT_P (XEXP (X, 1))
2204 && ! TARGET_SHMEDIA ? 1 : 0);
2205 }
2206
2207 /* Code to expand a shift. */
2208
2209 void
gen_ashift(int type,int n,rtx reg)2210 gen_ashift (int type, int n, rtx reg)
2211 {
2212 /* Negative values here come from the shift_amounts array. */
2213 if (n < 0)
2214 {
2215 if (type == ASHIFT)
2216 type = LSHIFTRT;
2217 else
2218 type = ASHIFT;
2219 n = -n;
2220 }
2221
2222 switch (type)
2223 {
2224 case ASHIFTRT:
2225 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2226 break;
2227 case LSHIFTRT:
2228 if (n == 1)
2229 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2230 else
2231 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2232 break;
2233 case ASHIFT:
2234 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2235 break;
2236 }
2237 }
2238
2239 /* Same for HImode */
2240
2241 void
gen_ashift_hi(int type,int n,rtx reg)2242 gen_ashift_hi (int type, int n, rtx reg)
2243 {
2244 /* Negative values here come from the shift_amounts array. */
2245 if (n < 0)
2246 {
2247 if (type == ASHIFT)
2248 type = LSHIFTRT;
2249 else
2250 type = ASHIFT;
2251 n = -n;
2252 }
2253
2254 switch (type)
2255 {
2256 case ASHIFTRT:
2257 case LSHIFTRT:
2258 /* We don't have HImode right shift operations because using the
2259 ordinary 32 bit shift instructions for that doesn't generate proper
2260 zero/sign extension.
2261 gen_ashift_hi is only called in contexts where we know that the
2262 sign extension works out correctly. */
2263 {
2264 int offset = 0;
2265 if (GET_CODE (reg) == SUBREG)
2266 {
2267 offset = SUBREG_BYTE (reg);
2268 reg = SUBREG_REG (reg);
2269 }
2270 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2271 break;
2272 }
2273 case ASHIFT:
2274 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2275 break;
2276 }
2277 }
2278
2279 /* Output RTL to split a constant shift into its component SH constant
2280 shift instructions. */
2281
2282 void
gen_shifty_op(int code,rtx * operands)2283 gen_shifty_op (int code, rtx *operands)
2284 {
2285 int value = INTVAL (operands[2]);
2286 int max, i;
2287
2288 /* Truncate the shift count in case it is out of bounds. */
2289 value = value & 0x1f;
2290
2291 if (value == 31)
2292 {
2293 if (code == LSHIFTRT)
2294 {
2295 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2296 emit_insn (gen_movt (operands[0]));
2297 return;
2298 }
2299 else if (code == ASHIFT)
2300 {
2301 /* There is a two instruction sequence for 31 bit left shifts,
2302 but it requires r0. */
2303 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2304 {
2305 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2306 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2307 return;
2308 }
2309 }
2310 }
2311 else if (value == 0)
2312 {
2313 /* This can happen even when optimizing, if there were subregs before
2314 reload. Don't output a nop here, as this is never optimized away;
2315 use a no-op move instead. */
2316 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2317 return;
2318 }
2319
2320 max = shift_insns[value];
2321 for (i = 0; i < max; i++)
2322 gen_ashift (code, shift_amounts[value][i], operands[0]);
2323 }
2324
2325 /* Same as above, but optimized for values where the topmost bits don't
2326 matter. */
2327
2328 void
gen_shifty_hi_op(int code,rtx * operands)2329 gen_shifty_hi_op (int code, rtx *operands)
2330 {
2331 int value = INTVAL (operands[2]);
2332 int max, i;
2333 void (*gen_fun) (int, int, rtx);
2334
2335 /* This operation is used by and_shl for SImode values with a few
2336 high bits known to be cleared. */
2337 value &= 31;
2338 if (value == 0)
2339 {
2340 emit_insn (gen_nop ());
2341 return;
2342 }
2343
2344 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2345 if (code == ASHIFT)
2346 {
2347 max = ext_shift_insns[value];
2348 for (i = 0; i < max; i++)
2349 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2350 }
2351 else
2352 /* When shifting right, emit the shifts in reverse order, so that
2353 solitary negative values come first. */
2354 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2355 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2356 }
2357
2358 /* Output RTL for an arithmetic right shift. */
2359
2360 /* ??? Rewrite to use super-optimizer sequences. */
2361
2362 int
expand_ashiftrt(rtx * operands)2363 expand_ashiftrt (rtx *operands)
2364 {
2365 rtx wrk;
2366 char func[18];
2367 int value;
2368
2369 if (TARGET_SH3)
2370 {
2371 if (GET_CODE (operands[2]) != CONST_INT)
2372 {
2373 rtx count = copy_to_mode_reg (SImode, operands[2]);
2374 emit_insn (gen_negsi2 (count, count));
2375 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2376 return 1;
2377 }
2378 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2379 > 1 + SH_DYNAMIC_SHIFT_COST)
2380 {
2381 rtx count
2382 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2383 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2384 return 1;
2385 }
2386 }
2387 if (GET_CODE (operands[2]) != CONST_INT)
2388 return 0;
2389
2390 value = INTVAL (operands[2]) & 31;
2391
2392 if (value == 31)
2393 {
2394 /* If we are called from abs expansion, arrange things so that we
2395 we can use a single MT instruction that doesn't clobber the source,
2396 if LICM can hoist out the load of the constant zero. */
2397 if (currently_expanding_to_rtl)
2398 {
2399 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2400 operands[1]));
2401 emit_insn (gen_mov_neg_si_t (operands[0]));
2402 return 1;
2403 }
2404 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2405 return 1;
2406 }
2407 else if (value >= 16 && value <= 19)
2408 {
2409 wrk = gen_reg_rtx (SImode);
2410 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2411 value -= 16;
2412 while (value--)
2413 gen_ashift (ASHIFTRT, 1, wrk);
2414 emit_move_insn (operands[0], wrk);
2415 return 1;
2416 }
2417 /* Expand a short sequence inline, longer call a magic routine. */
2418 else if (value <= 5)
2419 {
2420 wrk = gen_reg_rtx (SImode);
2421 emit_move_insn (wrk, operands[1]);
2422 while (value--)
2423 gen_ashift (ASHIFTRT, 1, wrk);
2424 emit_move_insn (operands[0], wrk);
2425 return 1;
2426 }
2427
2428 wrk = gen_reg_rtx (Pmode);
2429
2430 /* Load the value into an arg reg and call a helper. */
2431 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2432 sprintf (func, "__ashiftrt_r4_%d", value);
2433 function_symbol (wrk, func, SFUNC_STATIC);
2434 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2435 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2436 return 1;
2437 }
2438
2439 int
sh_dynamicalize_shift_p(rtx count)2440 sh_dynamicalize_shift_p (rtx count)
2441 {
2442 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2443 }
2444
2445 /* Try to find a good way to implement the combiner pattern
2446 [(set (match_operand:SI 0 "register_operand" "r")
2447 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2448 (match_operand:SI 2 "const_int_operand" "n"))
2449 (match_operand:SI 3 "const_int_operand" "n"))) .
2450 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2451 return 0 for simple right / left or left/right shift combination.
2452 return 1 for a combination of shifts with zero_extend.
2453 return 2 for a combination of shifts with an AND that needs r0.
2454 return 3 for a combination of shifts with an AND that needs an extra
2455 scratch register, when the three highmost bits of the AND mask are clear.
2456 return 4 for a combination of shifts with an AND that needs an extra
2457 scratch register, when any of the three highmost bits of the AND mask
2458 is set.
2459 If ATTRP is set, store an initial right shift width in ATTRP[0],
2460 and the instruction length in ATTRP[1] . These values are not valid
2461 when returning 0.
2462 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2463 shift_amounts for the last shift value that is to be used before the
2464 sign extend. */
2465 int
shl_and_kind(rtx left_rtx,rtx mask_rtx,int * attrp)2466 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2467 {
2468 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2469 int left = INTVAL (left_rtx), right;
2470 int best = 0;
2471 int cost, best_cost = 10000;
2472 int best_right = 0, best_len = 0;
2473 int i;
2474 int can_ext;
2475
2476 if (left < 0 || left > 31)
2477 return 0;
2478 if (GET_CODE (mask_rtx) == CONST_INT)
2479 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2480 else
2481 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2482 /* Can this be expressed as a right shift / left shift pair? */
2483 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2484 right = exact_log2 (lsb);
2485 mask2 = ~(mask + lsb - 1);
2486 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2487 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2488 if (! mask2)
2489 best_cost = shift_insns[right] + shift_insns[right + left];
2490 /* mask has no trailing zeroes <==> ! right */
2491 else if (! right && mask2 == ~(lsb2 - 1))
2492 {
2493 int late_right = exact_log2 (lsb2);
2494 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2495 }
2496 /* Try to use zero extend. */
2497 if (mask2 == ~(lsb2 - 1))
2498 {
2499 int width, first;
2500
2501 for (width = 8; width <= 16; width += 8)
2502 {
2503 /* Can we zero-extend right away? */
2504 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2505 {
2506 cost
2507 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2508 if (cost < best_cost)
2509 {
2510 best = 1;
2511 best_cost = cost;
2512 best_right = right;
2513 best_len = cost;
2514 if (attrp)
2515 attrp[2] = -1;
2516 }
2517 continue;
2518 }
2519 /* ??? Could try to put zero extend into initial right shift,
2520 or even shift a bit left before the right shift. */
2521 /* Determine value of first part of left shift, to get to the
2522 zero extend cut-off point. */
2523 first = width - exact_log2 (lsb2) + right;
2524 if (first >= 0 && right + left - first >= 0)
2525 {
2526 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2527 + ext_shift_insns[right + left - first];
2528 if (cost < best_cost)
2529 {
2530 best = 1;
2531 best_cost = cost;
2532 best_right = right;
2533 best_len = cost;
2534 if (attrp)
2535 attrp[2] = first;
2536 }
2537 }
2538 }
2539 }
2540 /* Try to use r0 AND pattern */
2541 for (i = 0; i <= 2; i++)
2542 {
2543 if (i > right)
2544 break;
2545 if (! CONST_OK_FOR_K08 (mask >> i))
2546 continue;
2547 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2548 if (cost < best_cost)
2549 {
2550 best = 2;
2551 best_cost = cost;
2552 best_right = i;
2553 best_len = cost - 1;
2554 }
2555 }
2556 /* Try to use a scratch register to hold the AND operand. */
2557 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2558 for (i = 0; i <= 2; i++)
2559 {
2560 if (i > right)
2561 break;
2562 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2563 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2564 if (cost < best_cost)
2565 {
2566 best = 4 - can_ext;
2567 best_cost = cost;
2568 best_right = i;
2569 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2570 }
2571 }
2572
2573 if (attrp)
2574 {
2575 attrp[0] = best_right;
2576 attrp[1] = best_len;
2577 }
2578 return best;
2579 }
2580
2581 /* This is used in length attributes of the unnamed instructions
2582 corresponding to shl_and_kind return values of 1 and 2. */
2583 int
shl_and_length(rtx insn)2584 shl_and_length (rtx insn)
2585 {
2586 rtx set_src, left_rtx, mask_rtx;
2587 int attributes[3];
2588
2589 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2590 left_rtx = XEXP (XEXP (set_src, 0), 1);
2591 mask_rtx = XEXP (set_src, 1);
2592 shl_and_kind (left_rtx, mask_rtx, attributes);
2593 return attributes[1];
2594 }
2595
2596 /* This is used in length attribute of the and_shl_scratch instruction. */
2597
2598 int
shl_and_scr_length(rtx insn)2599 shl_and_scr_length (rtx insn)
2600 {
2601 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2602 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2603 rtx op = XEXP (set_src, 0);
2604 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2605 op = XEXP (XEXP (op, 0), 0);
2606 return len + shift_insns[INTVAL (XEXP (op, 1))];
2607 }
2608
2609 /* Generate rtl for instructions for which shl_and_kind advised a particular
2610 method of generating them, i.e. returned zero. */
2611
2612 int
gen_shl_and(rtx dest,rtx left_rtx,rtx mask_rtx,rtx source)2613 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2614 {
2615 int attributes[3];
2616 unsigned HOST_WIDE_INT mask;
2617 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2618 int right, total_shift;
2619 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2620
2621 right = attributes[0];
2622 total_shift = INTVAL (left_rtx) + right;
2623 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2624 switch (kind)
2625 {
2626 default:
2627 return -1;
2628 case 1:
2629 {
2630 int first = attributes[2];
2631 rtx operands[3];
2632
2633 if (first < 0)
2634 {
2635 emit_insn ((mask << right) <= 0xff
2636 ? gen_zero_extendqisi2 (dest,
2637 gen_lowpart (QImode, source))
2638 : gen_zero_extendhisi2 (dest,
2639 gen_lowpart (HImode, source)));
2640 source = dest;
2641 }
2642 if (source != dest)
2643 emit_insn (gen_movsi (dest, source));
2644 operands[0] = dest;
2645 if (right)
2646 {
2647 operands[2] = GEN_INT (right);
2648 gen_shifty_hi_op (LSHIFTRT, operands);
2649 }
2650 if (first > 0)
2651 {
2652 operands[2] = GEN_INT (first);
2653 gen_shifty_hi_op (ASHIFT, operands);
2654 total_shift -= first;
2655 mask <<= first;
2656 }
2657 if (first >= 0)
2658 emit_insn (mask <= 0xff
2659 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2660 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2661 if (total_shift > 0)
2662 {
2663 operands[2] = GEN_INT (total_shift);
2664 gen_shifty_hi_op (ASHIFT, operands);
2665 }
2666 break;
2667 }
2668 case 4:
2669 shift_gen_fun = gen_shifty_op;
2670 case 3:
2671 /* If the topmost bit that matters is set, set the topmost bits
2672 that don't matter. This way, we might be able to get a shorter
2673 signed constant. */
2674 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2675 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2676 case 2:
2677 /* Don't expand fine-grained when combining, because that will
2678 make the pattern fail. */
2679 if (currently_expanding_to_rtl
2680 || reload_in_progress || reload_completed)
2681 {
2682 rtx operands[3];
2683
2684 /* Cases 3 and 4 should be handled by this split
2685 only while combining */
2686 gcc_assert (kind <= 2);
2687 if (right)
2688 {
2689 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2690 source = dest;
2691 }
2692 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2693 if (total_shift)
2694 {
2695 operands[0] = dest;
2696 operands[1] = dest;
2697 operands[2] = GEN_INT (total_shift);
2698 shift_gen_fun (ASHIFT, operands);
2699 }
2700 break;
2701 }
2702 else
2703 {
2704 int neg = 0;
2705 if (kind != 4 && total_shift < 16)
2706 {
2707 neg = -ext_shift_amounts[total_shift][1];
2708 if (neg > 0)
2709 neg -= ext_shift_amounts[total_shift][2];
2710 else
2711 neg = 0;
2712 }
2713 emit_insn (gen_and_shl_scratch (dest, source,
2714 GEN_INT (right),
2715 GEN_INT (mask),
2716 GEN_INT (total_shift + neg),
2717 GEN_INT (neg)));
2718 emit_insn (gen_movsi (dest, dest));
2719 break;
2720 }
2721 }
2722 return 0;
2723 }
2724
2725 /* Try to find a good way to implement the combiner pattern
2726 [(set (match_operand:SI 0 "register_operand" "=r")
2727 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2728 (match_operand:SI 2 "const_int_operand" "n")
2729 (match_operand:SI 3 "const_int_operand" "n")
2730 (const_int 0)))
2731 (clobber (reg:SI T_REG))]
2732 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2733 return 0 for simple left / right shift combination.
2734 return 1 for left shift / 8 bit sign extend / left shift.
2735 return 2 for left shift / 16 bit sign extend / left shift.
2736 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2737 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2738 return 5 for left shift / 16 bit sign extend / right shift
2739 return 6 for < 8 bit sign extend / left shift.
2740 return 7 for < 8 bit sign extend / left shift / single right shift.
2741 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2742
2743 int
shl_sext_kind(rtx left_rtx,rtx size_rtx,int * costp)2744 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2745 {
2746 int left, size, insize, ext;
2747 int cost = 0, best_cost;
2748 int kind;
2749
2750 left = INTVAL (left_rtx);
2751 size = INTVAL (size_rtx);
2752 insize = size - left;
2753 gcc_assert (insize > 0);
2754 /* Default to left / right shift. */
2755 kind = 0;
2756 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2757 if (size <= 16)
2758 {
2759 /* 16 bit shift / sign extend / 16 bit shift */
2760 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2761 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2762 below, by alternative 3 or something even better. */
2763 if (cost < best_cost)
2764 {
2765 kind = 5;
2766 best_cost = cost;
2767 }
2768 }
2769 /* Try a plain sign extend between two shifts. */
2770 for (ext = 16; ext >= insize; ext -= 8)
2771 {
2772 if (ext <= size)
2773 {
2774 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2775 if (cost < best_cost)
2776 {
2777 kind = ext / (unsigned) 8;
2778 best_cost = cost;
2779 }
2780 }
2781 /* Check if we can do a sloppy shift with a final signed shift
2782 restoring the sign. */
2783 if (EXT_SHIFT_SIGNED (size - ext))
2784 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2785 /* If not, maybe it's still cheaper to do the second shift sloppy,
2786 and do a final sign extend? */
2787 else if (size <= 16)
2788 cost = ext_shift_insns[ext - insize] + 1
2789 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2790 else
2791 continue;
2792 if (cost < best_cost)
2793 {
2794 kind = ext / (unsigned) 8 + 2;
2795 best_cost = cost;
2796 }
2797 }
2798 /* Check if we can sign extend in r0 */
2799 if (insize < 8)
2800 {
2801 cost = 3 + shift_insns[left];
2802 if (cost < best_cost)
2803 {
2804 kind = 6;
2805 best_cost = cost;
2806 }
2807 /* Try the same with a final signed shift. */
2808 if (left < 31)
2809 {
2810 cost = 3 + ext_shift_insns[left + 1] + 1;
2811 if (cost < best_cost)
2812 {
2813 kind = 7;
2814 best_cost = cost;
2815 }
2816 }
2817 }
2818 if (TARGET_SH3)
2819 {
2820 /* Try to use a dynamic shift. */
2821 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2822 if (cost < best_cost)
2823 {
2824 kind = 0;
2825 best_cost = cost;
2826 }
2827 }
2828 if (costp)
2829 *costp = cost;
2830 return kind;
2831 }
2832
2833 /* Function to be used in the length attribute of the instructions
2834 implementing this pattern. */
2835
2836 int
shl_sext_length(rtx insn)2837 shl_sext_length (rtx insn)
2838 {
2839 rtx set_src, left_rtx, size_rtx;
2840 int cost;
2841
2842 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2843 left_rtx = XEXP (XEXP (set_src, 0), 1);
2844 size_rtx = XEXP (set_src, 1);
2845 shl_sext_kind (left_rtx, size_rtx, &cost);
2846 return cost;
2847 }
2848
2849 /* Generate rtl for this pattern */
2850
2851 int
gen_shl_sext(rtx dest,rtx left_rtx,rtx size_rtx,rtx source)2852 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2853 {
2854 int kind;
2855 int left, size, insize, cost;
2856 rtx operands[3];
2857
2858 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2859 left = INTVAL (left_rtx);
2860 size = INTVAL (size_rtx);
2861 insize = size - left;
2862 switch (kind)
2863 {
2864 case 1:
2865 case 2:
2866 case 3:
2867 case 4:
2868 {
2869 int ext = kind & 1 ? 8 : 16;
2870 int shift2 = size - ext;
2871
2872 /* Don't expand fine-grained when combining, because that will
2873 make the pattern fail. */
2874 if (! currently_expanding_to_rtl
2875 && ! reload_in_progress && ! reload_completed)
2876 {
2877 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2878 emit_insn (gen_movsi (dest, source));
2879 break;
2880 }
2881 if (dest != source)
2882 emit_insn (gen_movsi (dest, source));
2883 operands[0] = dest;
2884 if (ext - insize)
2885 {
2886 operands[2] = GEN_INT (ext - insize);
2887 gen_shifty_hi_op (ASHIFT, operands);
2888 }
2889 emit_insn (kind & 1
2890 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2891 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2892 if (kind <= 2)
2893 {
2894 if (shift2)
2895 {
2896 operands[2] = GEN_INT (shift2);
2897 gen_shifty_op (ASHIFT, operands);
2898 }
2899 }
2900 else
2901 {
2902 if (shift2 > 0)
2903 {
2904 if (EXT_SHIFT_SIGNED (shift2))
2905 {
2906 operands[2] = GEN_INT (shift2 + 1);
2907 gen_shifty_op (ASHIFT, operands);
2908 operands[2] = const1_rtx;
2909 gen_shifty_op (ASHIFTRT, operands);
2910 break;
2911 }
2912 operands[2] = GEN_INT (shift2);
2913 gen_shifty_hi_op (ASHIFT, operands);
2914 }
2915 else if (shift2)
2916 {
2917 operands[2] = GEN_INT (-shift2);
2918 gen_shifty_hi_op (LSHIFTRT, operands);
2919 }
2920 emit_insn (size <= 8
2921 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2922 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2923 }
2924 break;
2925 }
2926 case 5:
2927 {
2928 int i = 16 - size;
2929 if (! currently_expanding_to_rtl
2930 && ! reload_in_progress && ! reload_completed)
2931 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2932 else
2933 {
2934 operands[0] = dest;
2935 operands[2] = GEN_INT (16 - insize);
2936 gen_shifty_hi_op (ASHIFT, operands);
2937 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2938 }
2939 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2940 while (--i >= 0)
2941 gen_ashift (ASHIFTRT, 1, dest);
2942 break;
2943 }
2944 case 6:
2945 case 7:
2946 /* Don't expand fine-grained when combining, because that will
2947 make the pattern fail. */
2948 if (! currently_expanding_to_rtl
2949 && ! reload_in_progress && ! reload_completed)
2950 {
2951 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2952 emit_insn (gen_movsi (dest, source));
2953 break;
2954 }
2955 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2956 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2957 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2958 operands[0] = dest;
2959 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2960 gen_shifty_op (ASHIFT, operands);
2961 if (kind == 7)
2962 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2963 break;
2964 default:
2965 return -1;
2966 }
2967 return 0;
2968 }
2969
2970 /* Prefix a symbol_ref name with "datalabel". */
2971
2972 rtx
gen_datalabel_ref(rtx sym)2973 gen_datalabel_ref (rtx sym)
2974 {
2975 const char *str;
2976
2977 if (GET_CODE (sym) == LABEL_REF)
2978 return gen_rtx_CONST (GET_MODE (sym),
2979 gen_rtx_UNSPEC (GET_MODE (sym),
2980 gen_rtvec (1, sym),
2981 UNSPEC_DATALABEL));
2982
2983 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2984
2985 str = XSTR (sym, 0);
2986 /* Share all SYMBOL_REF strings with the same value - that is important
2987 for cse. */
2988 str = IDENTIFIER_POINTER (get_identifier (str));
2989 XSTR (sym, 0) = str;
2990
2991 return sym;
2992 }
2993
2994
2995 static alloc_pool label_ref_list_pool;
2996
2997 typedef struct label_ref_list_d
2998 {
2999 rtx label;
3000 struct label_ref_list_d *next;
3001 } *label_ref_list_t;
3002
3003 /* The SH cannot load a large constant into a register, constants have to
3004 come from a pc relative load. The reference of a pc relative load
3005 instruction must be less than 1k in front of the instruction. This
3006 means that we often have to dump a constant inside a function, and
3007 generate code to branch around it.
3008
3009 It is important to minimize this, since the branches will slow things
3010 down and make things bigger.
3011
3012 Worst case code looks like:
3013
3014 mov.l L1,rn
3015 bra L2
3016 nop
3017 align
3018 L1: .long value
3019 L2:
3020 ..
3021
3022 mov.l L3,rn
3023 bra L4
3024 nop
3025 align
3026 L3: .long value
3027 L4:
3028 ..
3029
3030 We fix this by performing a scan before scheduling, which notices which
3031 instructions need to have their operands fetched from the constant table
3032 and builds the table.
3033
3034 The algorithm is:
3035
3036 scan, find an instruction which needs a pcrel move. Look forward, find the
3037 last barrier which is within MAX_COUNT bytes of the requirement.
3038 If there isn't one, make one. Process all the instructions between
3039 the find and the barrier.
3040
3041 In the above example, we can tell that L3 is within 1k of L1, so
3042 the first move can be shrunk from the 3 insn+constant sequence into
3043 just 1 insn, and the constant moved to L3 to make:
3044
3045 mov.l L1,rn
3046 ..
3047 mov.l L3,rn
3048 bra L4
3049 nop
3050 align
3051 L3:.long value
3052 L4:.long value
3053
3054 Then the second move becomes the target for the shortening process. */
3055
3056 typedef struct
3057 {
3058 rtx value; /* Value in table. */
3059 rtx label; /* Label of value. */
3060 label_ref_list_t wend; /* End of window. */
3061 enum machine_mode mode; /* Mode of value. */
3062
3063 /* True if this constant is accessed as part of a post-increment
3064 sequence. Note that HImode constants are never accessed in this way. */
3065 bool part_of_sequence_p;
3066 } pool_node;
3067
3068 /* The maximum number of constants that can fit into one pool, since
3069 constants in the range 0..510 are at least 2 bytes long, and in the
3070 range from there to 1018 at least 4 bytes. */
3071
3072 #define MAX_POOL_SIZE 372
3073 static pool_node pool_vector[MAX_POOL_SIZE];
3074 static int pool_size;
3075 static rtx pool_window_label;
3076 static int pool_window_last;
3077
3078 static int max_labelno_before_reorg;
3079
3080 /* ??? If we need a constant in HImode which is the truncated value of a
3081 constant we need in SImode, we could combine the two entries thus saving
3082 two bytes. Is this common enough to be worth the effort of implementing
3083 it? */
3084
3085 /* ??? This stuff should be done at the same time that we shorten branches.
3086 As it is now, we must assume that all branches are the maximum size, and
3087 this causes us to almost always output constant pools sooner than
3088 necessary. */
3089
3090 /* Add a constant to the pool and return its label. */
3091
3092 static rtx
add_constant(rtx x,enum machine_mode mode,rtx last_value)3093 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3094 {
3095 int i;
3096 rtx lab, new;
3097 label_ref_list_t ref, newref;
3098
3099 /* First see if we've already got it. */
3100 for (i = 0; i < pool_size; i++)
3101 {
3102 if (x->code == pool_vector[i].value->code
3103 && mode == pool_vector[i].mode)
3104 {
3105 if (x->code == CODE_LABEL)
3106 {
3107 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3108 continue;
3109 }
3110 if (rtx_equal_p (x, pool_vector[i].value))
3111 {
3112 lab = new = 0;
3113 if (! last_value
3114 || ! i
3115 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3116 {
3117 new = gen_label_rtx ();
3118 LABEL_REFS (new) = pool_vector[i].label;
3119 pool_vector[i].label = lab = new;
3120 }
3121 if (lab && pool_window_label)
3122 {
3123 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3124 newref->label = pool_window_label;
3125 ref = pool_vector[pool_window_last].wend;
3126 newref->next = ref;
3127 pool_vector[pool_window_last].wend = newref;
3128 }
3129 if (new)
3130 pool_window_label = new;
3131 pool_window_last = i;
3132 return lab;
3133 }
3134 }
3135 }
3136
3137 /* Need a new one. */
3138 pool_vector[pool_size].value = x;
3139 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3140 {
3141 lab = 0;
3142 pool_vector[pool_size - 1].part_of_sequence_p = true;
3143 }
3144 else
3145 lab = gen_label_rtx ();
3146 pool_vector[pool_size].mode = mode;
3147 pool_vector[pool_size].label = lab;
3148 pool_vector[pool_size].wend = NULL;
3149 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3150 if (lab && pool_window_label)
3151 {
3152 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3153 newref->label = pool_window_label;
3154 ref = pool_vector[pool_window_last].wend;
3155 newref->next = ref;
3156 pool_vector[pool_window_last].wend = newref;
3157 }
3158 if (lab)
3159 pool_window_label = lab;
3160 pool_window_last = pool_size;
3161 pool_size++;
3162 return lab;
3163 }
3164
3165 /* Output the literal table. START, if nonzero, is the first instruction
3166 this table is needed for, and also indicates that there is at least one
3167 casesi_worker_2 instruction; We have to emit the operand3 labels from
3168 these insns at a 4-byte aligned position. BARRIER is the barrier
3169 after which we are to place the table. */
3170
3171 static void
dump_table(rtx start,rtx barrier)3172 dump_table (rtx start, rtx barrier)
3173 {
3174 rtx scan = barrier;
3175 int i;
3176 int need_align = 1;
3177 rtx lab;
3178 label_ref_list_t ref;
3179 int have_df = 0;
3180
3181 /* Do two passes, first time dump out the HI sized constants. */
3182
3183 for (i = 0; i < pool_size; i++)
3184 {
3185 pool_node *p = &pool_vector[i];
3186
3187 if (p->mode == HImode)
3188 {
3189 if (need_align)
3190 {
3191 scan = emit_insn_after (gen_align_2 (), scan);
3192 need_align = 0;
3193 }
3194 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3195 scan = emit_label_after (lab, scan);
3196 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3197 scan);
3198 for (ref = p->wend; ref; ref = ref->next)
3199 {
3200 lab = ref->label;
3201 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3202 }
3203 }
3204 else if (p->mode == DFmode)
3205 have_df = 1;
3206 }
3207
3208 need_align = 1;
3209
3210 if (start)
3211 {
3212 scan = emit_insn_after (gen_align_4 (), scan);
3213 need_align = 0;
3214 for (; start != barrier; start = NEXT_INSN (start))
3215 if (GET_CODE (start) == INSN
3216 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3217 {
3218 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3219 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3220
3221 scan = emit_label_after (lab, scan);
3222 }
3223 }
3224 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3225 {
3226 rtx align_insn = NULL_RTX;
3227
3228 scan = emit_label_after (gen_label_rtx (), scan);
3229 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3230 need_align = 0;
3231
3232 for (i = 0; i < pool_size; i++)
3233 {
3234 pool_node *p = &pool_vector[i];
3235
3236 switch (p->mode)
3237 {
3238 case HImode:
3239 break;
3240 case SImode:
3241 case SFmode:
3242 if (align_insn && !p->part_of_sequence_p)
3243 {
3244 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3245 emit_label_before (lab, align_insn);
3246 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3247 align_insn);
3248 for (ref = p->wend; ref; ref = ref->next)
3249 {
3250 lab = ref->label;
3251 emit_insn_before (gen_consttable_window_end (lab),
3252 align_insn);
3253 }
3254 delete_insn (align_insn);
3255 align_insn = NULL_RTX;
3256 continue;
3257 }
3258 else
3259 {
3260 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3261 scan = emit_label_after (lab, scan);
3262 scan = emit_insn_after (gen_consttable_4 (p->value,
3263 const0_rtx), scan);
3264 need_align = ! need_align;
3265 }
3266 break;
3267 case DFmode:
3268 if (need_align)
3269 {
3270 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3271 align_insn = scan;
3272 need_align = 0;
3273 }
3274 case DImode:
3275 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3276 scan = emit_label_after (lab, scan);
3277 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3278 scan);
3279 break;
3280 default:
3281 gcc_unreachable ();
3282 }
3283
3284 if (p->mode != HImode)
3285 {
3286 for (ref = p->wend; ref; ref = ref->next)
3287 {
3288 lab = ref->label;
3289 scan = emit_insn_after (gen_consttable_window_end (lab),
3290 scan);
3291 }
3292 }
3293 }
3294
3295 pool_size = 0;
3296 }
3297
3298 for (i = 0; i < pool_size; i++)
3299 {
3300 pool_node *p = &pool_vector[i];
3301
3302 switch (p->mode)
3303 {
3304 case HImode:
3305 break;
3306 case SImode:
3307 case SFmode:
3308 if (need_align)
3309 {
3310 need_align = 0;
3311 scan = emit_label_after (gen_label_rtx (), scan);
3312 scan = emit_insn_after (gen_align_4 (), scan);
3313 }
3314 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3315 scan = emit_label_after (lab, scan);
3316 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3317 scan);
3318 break;
3319 case DFmode:
3320 case DImode:
3321 if (need_align)
3322 {
3323 need_align = 0;
3324 scan = emit_label_after (gen_label_rtx (), scan);
3325 scan = emit_insn_after (gen_align_4 (), scan);
3326 }
3327 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3328 scan = emit_label_after (lab, scan);
3329 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3330 scan);
3331 break;
3332 default:
3333 gcc_unreachable ();
3334 }
3335
3336 if (p->mode != HImode)
3337 {
3338 for (ref = p->wend; ref; ref = ref->next)
3339 {
3340 lab = ref->label;
3341 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3342 }
3343 }
3344 }
3345
3346 scan = emit_insn_after (gen_consttable_end (), scan);
3347 scan = emit_barrier_after (scan);
3348 pool_size = 0;
3349 pool_window_label = NULL_RTX;
3350 pool_window_last = 0;
3351 }
3352
3353 /* Return nonzero if constant would be an ok source for a
3354 mov.w instead of a mov.l. */
3355
3356 static int
hi_const(rtx src)3357 hi_const (rtx src)
3358 {
3359 return (GET_CODE (src) == CONST_INT
3360 && INTVAL (src) >= -32768
3361 && INTVAL (src) <= 32767);
3362 }
3363
3364 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3365
3366 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3367
3368 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3369 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3370 need to fix it if the input value is CONST_OK_FOR_I08. */
3371
3372 static int
broken_move(rtx insn)3373 broken_move (rtx insn)
3374 {
3375 if (GET_CODE (insn) == INSN)
3376 {
3377 rtx pat = PATTERN (insn);
3378 if (GET_CODE (pat) == PARALLEL)
3379 pat = XVECEXP (pat, 0, 0);
3380 if (GET_CODE (pat) == SET
3381 /* We can load any 8 bit value if we don't care what the high
3382 order bits end up as. */
3383 && GET_MODE (SET_DEST (pat)) != QImode
3384 && (CONSTANT_P (SET_SRC (pat))
3385 /* Match mova_const. */
3386 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3387 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3388 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3389 && ! (TARGET_SH2E
3390 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3391 && (fp_zero_operand (SET_SRC (pat))
3392 || fp_one_operand (SET_SRC (pat)))
3393 /* ??? If this is a -m4 or -m4-single compilation, in general
3394 we don't know the current setting of fpscr, so disable fldi.
3395 There is an exception if this was a register-register move
3396 before reload - and hence it was ascertained that we have
3397 single precision setting - and in a post-reload optimization
3398 we changed this to do a constant load. In that case
3399 we don't have an r0 clobber, hence we must use fldi. */
3400 && (! TARGET_SH4 || TARGET_FMOVD
3401 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3402 == SCRATCH))
3403 && GET_CODE (SET_DEST (pat)) == REG
3404 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3405 && ! (TARGET_SH2A
3406 && GET_MODE (SET_DEST (pat)) == SImode
3407 && GET_CODE (SET_SRC (pat)) == CONST_INT
3408 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3409 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3410 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3411 return 1;
3412 }
3413
3414 return 0;
3415 }
3416
3417 static int
mova_p(rtx insn)3418 mova_p (rtx insn)
3419 {
3420 return (GET_CODE (insn) == INSN
3421 && GET_CODE (PATTERN (insn)) == SET
3422 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3423 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3424 /* Don't match mova_const. */
3425 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3426 }
3427
3428 /* Fix up a mova from a switch that went out of range. */
3429 static void
fixup_mova(rtx mova)3430 fixup_mova (rtx mova)
3431 {
3432 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3433 if (! flag_pic)
3434 {
3435 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3436 INSN_CODE (mova) = -1;
3437 }
3438 else
3439 {
3440 rtx worker = mova;
3441 rtx lab = gen_label_rtx ();
3442 rtx wpat, wpat0, wpat1, wsrc, diff;
3443
3444 do
3445 {
3446 worker = NEXT_INSN (worker);
3447 gcc_assert (worker
3448 && GET_CODE (worker) != CODE_LABEL
3449 && GET_CODE (worker) != JUMP_INSN);
3450 } while (GET_CODE (worker) == NOTE
3451 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3452 wpat = PATTERN (worker);
3453 wpat0 = XVECEXP (wpat, 0, 0);
3454 wpat1 = XVECEXP (wpat, 0, 1);
3455 wsrc = SET_SRC (wpat0);
3456 PATTERN (worker) = (gen_casesi_worker_2
3457 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3458 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3459 XEXP (wpat1, 0)));
3460 INSN_CODE (worker) = -1;
3461 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3462 gen_rtx_LABEL_REF (Pmode, lab));
3463 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3464 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3465 INSN_CODE (mova) = -1;
3466 }
3467 }
3468
3469 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3470 *num_mova, and check if the new mova is not nested within the first one.
3471 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3472 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3473 static int
untangle_mova(int * num_mova,rtx * first_mova,rtx new_mova)3474 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3475 {
3476 int n_addr = 0; /* Initialization to shut up spurious warning. */
3477 int f_target, n_target = 0; /* Likewise. */
3478
3479 if (optimize)
3480 {
3481 /* If NEW_MOVA has no address yet, it will be handled later. */
3482 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
3483 return -1;
3484
3485 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3486 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3487 if (n_addr > n_target || n_addr + 1022 < n_target)
3488 {
3489 /* Change the mova into a load.
3490 broken_move will then return true for it. */
3491 fixup_mova (new_mova);
3492 return 1;
3493 }
3494 }
3495 if (!(*num_mova)++)
3496 {
3497 *first_mova = new_mova;
3498 return 2;
3499 }
3500 if (!optimize
3501 || ((f_target
3502 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3503 >= n_target))
3504 return -1;
3505
3506 (*num_mova)--;
3507 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3508 > n_target - n_addr)
3509 {
3510 fixup_mova (*first_mova);
3511 return 0;
3512 }
3513 else
3514 {
3515 fixup_mova (new_mova);
3516 return 1;
3517 }
3518 }
3519
3520 /* Find the last barrier from insn FROM which is close enough to hold the
3521 constant pool. If we can't find one, then create one near the end of
3522 the range. */
3523
3524 static rtx
find_barrier(int num_mova,rtx mova,rtx from)3525 find_barrier (int num_mova, rtx mova, rtx from)
3526 {
3527 int count_si = 0;
3528 int count_hi = 0;
3529 int found_hi = 0;
3530 int found_si = 0;
3531 int found_di = 0;
3532 int hi_align = 2;
3533 int si_align = 2;
3534 int leading_mova = num_mova;
3535 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3536 int si_limit;
3537 int hi_limit;
3538 rtx orig = from;
3539
3540 /* For HImode: range is 510, add 4 because pc counts from address of
3541 second instruction after this one, subtract 2 for the jump instruction
3542 that we may need to emit before the table, subtract 2 for the instruction
3543 that fills the jump delay slot (in very rare cases, reorg will take an
3544 instruction from after the constant pool or will leave the delay slot
3545 empty). This gives 510.
3546 For SImode: range is 1020, add 4 because pc counts from address of
3547 second instruction after this one, subtract 2 in case pc is 2 byte
3548 aligned, subtract 2 for the jump instruction that we may need to emit
3549 before the table, subtract 2 for the instruction that fills the jump
3550 delay slot. This gives 1018. */
3551
3552 /* The branch will always be shortened now that the reference address for
3553 forward branches is the successor address, thus we need no longer make
3554 adjustments to the [sh]i_limit for -O0. */
3555
3556 si_limit = 1018;
3557 hi_limit = 510;
3558
3559 while (from && count_si < si_limit && count_hi < hi_limit)
3560 {
3561 int inc = get_attr_length (from);
3562 int new_align = 1;
3563
3564 /* If this is a label that existed at the time of the compute_alignments
3565 call, determine the alignment. N.B. When find_barrier recurses for
3566 an out-of-reach mova, we might see labels at the start of previously
3567 inserted constant tables. */
3568 if (GET_CODE (from) == CODE_LABEL
3569 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3570 {
3571 if (optimize)
3572 new_align = 1 << label_to_alignment (from);
3573 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3574 new_align = 1 << barrier_align (from);
3575 else
3576 new_align = 1;
3577 inc = 0;
3578 }
3579 /* In case we are scanning a constant table because of recursion, check
3580 for explicit alignments. If the table is long, we might be forced
3581 to emit the new table in front of it; the length of the alignment
3582 might be the last straw. */
3583 else if (GET_CODE (from) == INSN
3584 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3585 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3586 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3587 /* When we find the end of a constant table, paste the new constant
3588 at the end. That is better than putting it in front because
3589 this way, we don't need extra alignment for adding a 4-byte-aligned
3590 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3591 else if (GET_CODE (from) == INSN
3592 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3593 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3594 return from;
3595
3596 if (GET_CODE (from) == BARRIER)
3597 {
3598
3599 found_barrier = from;
3600
3601 /* If we are at the end of the function, or in front of an alignment
3602 instruction, we need not insert an extra alignment. We prefer
3603 this kind of barrier. */
3604 if (barrier_align (from) > 2)
3605 good_barrier = from;
3606 }
3607
3608 if (broken_move (from))
3609 {
3610 rtx pat, src, dst;
3611 enum machine_mode mode;
3612
3613 pat = PATTERN (from);
3614 if (GET_CODE (pat) == PARALLEL)
3615 pat = XVECEXP (pat, 0, 0);
3616 src = SET_SRC (pat);
3617 dst = SET_DEST (pat);
3618 mode = GET_MODE (dst);
3619
3620 /* We must explicitly check the mode, because sometimes the
3621 front end will generate code to load unsigned constants into
3622 HImode targets without properly sign extending them. */
3623 if (mode == HImode
3624 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3625 {
3626 found_hi += 2;
3627 /* We put the short constants before the long constants, so
3628 we must count the length of short constants in the range
3629 for the long constants. */
3630 /* ??? This isn't optimal, but is easy to do. */
3631 si_limit -= 2;
3632 }
3633 else
3634 {
3635 /* We dump DF/DI constants before SF/SI ones, because
3636 the limit is the same, but the alignment requirements
3637 are higher. We may waste up to 4 additional bytes
3638 for alignment, and the DF/DI constant may have
3639 another SF/SI constant placed before it. */
3640 if (TARGET_SHCOMPACT
3641 && ! found_di
3642 && (mode == DFmode || mode == DImode))
3643 {
3644 found_di = 1;
3645 si_limit -= 8;
3646 }
3647 while (si_align > 2 && found_si + si_align - 2 > count_si)
3648 si_align >>= 1;
3649 if (found_si > count_si)
3650 count_si = found_si;
3651 found_si += GET_MODE_SIZE (mode);
3652 if (num_mova)
3653 si_limit -= GET_MODE_SIZE (mode);
3654 }
3655 }
3656
3657 if (mova_p (from))
3658 {
3659 switch (untangle_mova (&num_mova, &mova, from))
3660 {
3661 case 0: return find_barrier (0, 0, mova);
3662 case 2:
3663 {
3664 leading_mova = 0;
3665 barrier_before_mova
3666 = good_barrier ? good_barrier : found_barrier;
3667 }
3668 default: break;
3669 }
3670 if (found_si > count_si)
3671 count_si = found_si;
3672 }
3673 else if (GET_CODE (from) == JUMP_INSN
3674 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3675 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3676 {
3677 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
3678 || (num_mova
3679 && (prev_nonnote_insn (from)
3680 == XEXP (MOVA_LABELREF (mova), 0))))
3681 num_mova--;
3682 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3683 {
3684 /* We have just passed the barrier in front of the
3685 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3686 the ADDR_DIFF_VEC is accessed as data, just like our pool
3687 constants, this is a good opportunity to accommodate what
3688 we have gathered so far.
3689 If we waited any longer, we could end up at a barrier in
3690 front of code, which gives worse cache usage for separated
3691 instruction / data caches. */
3692 good_barrier = found_barrier;
3693 break;
3694 }
3695 else
3696 {
3697 rtx body = PATTERN (from);
3698 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3699 }
3700 }
3701 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3702 else if (GET_CODE (from) == JUMP_INSN
3703 && ! TARGET_SH2
3704 && ! TARGET_SMALLCODE)
3705 new_align = 4;
3706
3707 if (found_si)
3708 {
3709 count_si += inc;
3710 if (new_align > si_align)
3711 {
3712 si_limit -= (count_si - 1) & (new_align - si_align);
3713 si_align = new_align;
3714 }
3715 count_si = (count_si + new_align - 1) & -new_align;
3716 }
3717 if (found_hi)
3718 {
3719 count_hi += inc;
3720 if (new_align > hi_align)
3721 {
3722 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3723 hi_align = new_align;
3724 }
3725 count_hi = (count_hi + new_align - 1) & -new_align;
3726 }
3727 from = NEXT_INSN (from);
3728 }
3729
3730 if (num_mova)
3731 {
3732 if (leading_mova)
3733 {
3734 /* Try as we might, the leading mova is out of range. Change
3735 it into a load (which will become a pcload) and retry. */
3736 fixup_mova (mova);
3737 return find_barrier (0, 0, mova);
3738 }
3739 else
3740 {
3741 /* Insert the constant pool table before the mova instruction,
3742 to prevent the mova label reference from going out of range. */
3743 from = mova;
3744 good_barrier = found_barrier = barrier_before_mova;
3745 }
3746 }
3747
3748 if (found_barrier)
3749 {
3750 if (good_barrier && next_real_insn (found_barrier))
3751 found_barrier = good_barrier;
3752 }
3753 else
3754 {
3755 /* We didn't find a barrier in time to dump our stuff,
3756 so we'll make one. */
3757 rtx label = gen_label_rtx ();
3758
3759 /* If we exceeded the range, then we must back up over the last
3760 instruction we looked at. Otherwise, we just need to undo the
3761 NEXT_INSN at the end of the loop. */
3762 if (PREV_INSN (from) != orig
3763 && (count_hi > hi_limit || count_si > si_limit))
3764 from = PREV_INSN (PREV_INSN (from));
3765 else
3766 from = PREV_INSN (from);
3767
3768 /* Walk back to be just before any jump or label.
3769 Putting it before a label reduces the number of times the branch
3770 around the constant pool table will be hit. Putting it before
3771 a jump makes it more likely that the bra delay slot will be
3772 filled. */
3773 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3774 || GET_CODE (from) == CODE_LABEL)
3775 from = PREV_INSN (from);
3776
3777 from = emit_jump_insn_after (gen_jump (label), from);
3778 JUMP_LABEL (from) = label;
3779 LABEL_NUSES (label) = 1;
3780 found_barrier = emit_barrier_after (from);
3781 emit_label_after (label, found_barrier);
3782 }
3783
3784 return found_barrier;
3785 }
3786
3787 /* If the instruction INSN is implemented by a special function, and we can
3788 positively find the register that is used to call the sfunc, and this
3789 register is not used anywhere else in this instruction - except as the
3790 destination of a set, return this register; else, return 0. */
3791 rtx
sfunc_uses_reg(rtx insn)3792 sfunc_uses_reg (rtx insn)
3793 {
3794 int i;
3795 rtx pattern, part, reg_part, reg;
3796
3797 if (GET_CODE (insn) != INSN)
3798 return 0;
3799 pattern = PATTERN (insn);
3800 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3801 return 0;
3802
3803 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3804 {
3805 part = XVECEXP (pattern, 0, i);
3806 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3807 reg_part = part;
3808 }
3809 if (! reg_part)
3810 return 0;
3811 reg = XEXP (reg_part, 0);
3812 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3813 {
3814 part = XVECEXP (pattern, 0, i);
3815 if (part == reg_part || GET_CODE (part) == CLOBBER)
3816 continue;
3817 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3818 && GET_CODE (SET_DEST (part)) == REG)
3819 ? SET_SRC (part) : part)))
3820 return 0;
3821 }
3822 return reg;
3823 }
3824
3825 /* See if the only way in which INSN uses REG is by calling it, or by
3826 setting it while calling it. Set *SET to a SET rtx if the register
3827 is set by INSN. */
3828
3829 static int
noncall_uses_reg(rtx reg,rtx insn,rtx * set)3830 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3831 {
3832 rtx pattern, reg2;
3833
3834 *set = NULL_RTX;
3835
3836 reg2 = sfunc_uses_reg (insn);
3837 if (reg2 && REGNO (reg2) == REGNO (reg))
3838 {
3839 pattern = single_set (insn);
3840 if (pattern
3841 && GET_CODE (SET_DEST (pattern)) == REG
3842 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3843 *set = pattern;
3844 return 0;
3845 }
3846 if (GET_CODE (insn) != CALL_INSN)
3847 {
3848 /* We don't use rtx_equal_p because we don't care if the mode is
3849 different. */
3850 pattern = single_set (insn);
3851 if (pattern
3852 && GET_CODE (SET_DEST (pattern)) == REG
3853 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3854 {
3855 rtx par, part;
3856 int i;
3857
3858 *set = pattern;
3859 par = PATTERN (insn);
3860 if (GET_CODE (par) == PARALLEL)
3861 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3862 {
3863 part = XVECEXP (par, 0, i);
3864 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3865 return 1;
3866 }
3867 return reg_mentioned_p (reg, SET_SRC (pattern));
3868 }
3869
3870 return 1;
3871 }
3872
3873 pattern = PATTERN (insn);
3874
3875 if (GET_CODE (pattern) == PARALLEL)
3876 {
3877 int i;
3878
3879 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3880 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3881 return 1;
3882 pattern = XVECEXP (pattern, 0, 0);
3883 }
3884
3885 if (GET_CODE (pattern) == SET)
3886 {
3887 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3888 {
3889 /* We don't use rtx_equal_p, because we don't care if the
3890 mode is different. */
3891 if (GET_CODE (SET_DEST (pattern)) != REG
3892 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3893 return 1;
3894
3895 *set = pattern;
3896 }
3897
3898 pattern = SET_SRC (pattern);
3899 }
3900
3901 if (GET_CODE (pattern) != CALL
3902 || GET_CODE (XEXP (pattern, 0)) != MEM
3903 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3904 return 1;
3905
3906 return 0;
3907 }
3908
3909 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3910 general registers. Bits 0..15 mean that the respective registers
3911 are used as inputs in the instruction. Bits 16..31 mean that the
3912 registers 0..15, respectively, are used as outputs, or are clobbered.
3913 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3914 int
regs_used(rtx x,int is_dest)3915 regs_used (rtx x, int is_dest)
3916 {
3917 enum rtx_code code;
3918 const char *fmt;
3919 int i, used = 0;
3920
3921 if (! x)
3922 return used;
3923 code = GET_CODE (x);
3924 switch (code)
3925 {
3926 case REG:
3927 if (REGNO (x) < 16)
3928 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3929 << (REGNO (x) + is_dest));
3930 return 0;
3931 case SUBREG:
3932 {
3933 rtx y = SUBREG_REG (x);
3934
3935 if (GET_CODE (y) != REG)
3936 break;
3937 if (REGNO (y) < 16)
3938 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3939 << (REGNO (y) +
3940 subreg_regno_offset (REGNO (y),
3941 GET_MODE (y),
3942 SUBREG_BYTE (x),
3943 GET_MODE (x)) + is_dest));
3944 return 0;
3945 }
3946 case SET:
3947 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3948 case RETURN:
3949 /* If there was a return value, it must have been indicated with USE. */
3950 return 0x00ffff00;
3951 case CLOBBER:
3952 is_dest = 1;
3953 break;
3954 case MEM:
3955 is_dest = 0;
3956 break;
3957 case CALL:
3958 used |= 0x00ff00f0;
3959 break;
3960 default:
3961 break;
3962 }
3963
3964 fmt = GET_RTX_FORMAT (code);
3965
3966 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3967 {
3968 if (fmt[i] == 'E')
3969 {
3970 register int j;
3971 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3972 used |= regs_used (XVECEXP (x, i, j), is_dest);
3973 }
3974 else if (fmt[i] == 'e')
3975 used |= regs_used (XEXP (x, i), is_dest);
3976 }
3977 return used;
3978 }
3979
3980 /* Create an instruction that prevents redirection of a conditional branch
3981 to the destination of the JUMP with address ADDR.
3982 If the branch needs to be implemented as an indirect jump, try to find
3983 a scratch register for it.
3984 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3985 If any preceding insn that doesn't fit into a delay slot is good enough,
3986 pass 1. Pass 2 if a definite blocking insn is needed.
3987 -1 is used internally to avoid deep recursion.
3988 If a blocking instruction is made or recognized, return it. */
3989
3990 static rtx
gen_block_redirect(rtx jump,int addr,int need_block)3991 gen_block_redirect (rtx jump, int addr, int need_block)
3992 {
3993 int dead = 0;
3994 rtx prev = prev_nonnote_insn (jump);
3995 rtx dest;
3996
3997 /* First, check if we already have an instruction that satisfies our need. */
3998 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3999 {
4000 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
4001 return prev;
4002 if (GET_CODE (PATTERN (prev)) == USE
4003 || GET_CODE (PATTERN (prev)) == CLOBBER
4004 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4005 prev = jump;
4006 else if ((need_block &= ~1) < 0)
4007 return prev;
4008 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
4009 need_block = 0;
4010 }
4011 if (GET_CODE (PATTERN (jump)) == RETURN)
4012 {
4013 if (! need_block)
4014 return prev;
4015 /* Reorg even does nasty things with return insns that cause branches
4016 to go out of range - see find_end_label and callers. */
4017 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
4018 }
4019 /* We can't use JUMP_LABEL here because it might be undefined
4020 when not optimizing. */
4021 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
4022 /* If the branch is out of range, try to find a scratch register for it. */
4023 if (optimize
4024 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4025 > 4092 + 4098))
4026 {
4027 rtx scan;
4028 /* Don't look for the stack pointer as a scratch register,
4029 it would cause trouble if an interrupt occurred. */
4030 unsigned try = 0x7fff, used;
4031 int jump_left = flag_expensive_optimizations + 1;
4032
4033 /* It is likely that the most recent eligible instruction is wanted for
4034 the delay slot. Therefore, find out which registers it uses, and
4035 try to avoid using them. */
4036
4037 for (scan = jump; (scan = PREV_INSN (scan)); )
4038 {
4039 enum rtx_code code;
4040
4041 if (INSN_DELETED_P (scan))
4042 continue;
4043 code = GET_CODE (scan);
4044 if (code == CODE_LABEL || code == JUMP_INSN)
4045 break;
4046 if (code == INSN
4047 && GET_CODE (PATTERN (scan)) != USE
4048 && GET_CODE (PATTERN (scan)) != CLOBBER
4049 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4050 {
4051 try &= ~regs_used (PATTERN (scan), 0);
4052 break;
4053 }
4054 }
4055 for (used = dead = 0, scan = JUMP_LABEL (jump);
4056 (scan = NEXT_INSN (scan)); )
4057 {
4058 enum rtx_code code;
4059
4060 if (INSN_DELETED_P (scan))
4061 continue;
4062 code = GET_CODE (scan);
4063 if (INSN_P (scan))
4064 {
4065 used |= regs_used (PATTERN (scan), 0);
4066 if (code == CALL_INSN)
4067 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4068 dead |= (used >> 16) & ~used;
4069 if (dead & try)
4070 {
4071 dead &= try;
4072 break;
4073 }
4074 if (code == JUMP_INSN)
4075 {
4076 if (jump_left-- && simplejump_p (scan))
4077 scan = JUMP_LABEL (scan);
4078 else
4079 break;
4080 }
4081 }
4082 }
4083 /* Mask out the stack pointer again, in case it was
4084 the only 'free' register we have found. */
4085 dead &= 0x7fff;
4086 }
4087 /* If the immediate destination is still in range, check for possible
4088 threading with a jump beyond the delay slot insn.
4089 Don't check if we are called recursively; the jump has been or will be
4090 checked in a different invocation then. */
4091
4092 else if (optimize && need_block >= 0)
4093 {
4094 rtx next = next_active_insn (next_active_insn (dest));
4095 if (next && GET_CODE (next) == JUMP_INSN
4096 && GET_CODE (PATTERN (next)) == SET
4097 && recog_memoized (next) == CODE_FOR_jump_compact)
4098 {
4099 dest = JUMP_LABEL (next);
4100 if (dest
4101 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4102 > 4092 + 4098))
4103 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4104 }
4105 }
4106
4107 if (dead)
4108 {
4109 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4110
4111 /* It would be nice if we could convert the jump into an indirect
4112 jump / far branch right now, and thus exposing all constituent
4113 instructions to further optimization. However, reorg uses
4114 simplejump_p to determine if there is an unconditional jump where
4115 it should try to schedule instructions from the target of the
4116 branch; simplejump_p fails for indirect jumps even if they have
4117 a JUMP_LABEL. */
4118 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4119 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4120 , jump);
4121 /* ??? We would like this to have the scope of the jump, but that
4122 scope will change when a delay slot insn of an inner scope is added.
4123 Hence, after delay slot scheduling, we'll have to expect
4124 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4125 the jump. */
4126
4127 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4128 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4129 return insn;
4130 }
4131 else if (need_block)
4132 /* We can't use JUMP_LABEL here because it might be undefined
4133 when not optimizing. */
4134 return emit_insn_before (gen_block_branch_redirect
4135 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4136 , jump);
4137 return prev;
4138 }
4139
4140 #define CONDJUMP_MIN -252
4141 #define CONDJUMP_MAX 262
4142 struct far_branch
4143 {
4144 /* A label (to be placed) in front of the jump
4145 that jumps to our ultimate destination. */
4146 rtx near_label;
4147 /* Where we are going to insert it if we cannot move the jump any farther,
4148 or the jump itself if we have picked up an existing jump. */
4149 rtx insert_place;
4150 /* The ultimate destination. */
4151 rtx far_label;
4152 struct far_branch *prev;
4153 /* If the branch has already been created, its address;
4154 else the address of its first prospective user. */
4155 int address;
4156 };
4157
4158 static void gen_far_branch (struct far_branch *);
4159 enum mdep_reorg_phase_e mdep_reorg_phase;
4160 static void
gen_far_branch(struct far_branch * bp)4161 gen_far_branch (struct far_branch *bp)
4162 {
4163 rtx insn = bp->insert_place;
4164 rtx jump;
4165 rtx label = gen_label_rtx ();
4166 int ok;
4167
4168 emit_label_after (label, insn);
4169 if (bp->far_label)
4170 {
4171 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4172 LABEL_NUSES (bp->far_label)++;
4173 }
4174 else
4175 jump = emit_jump_insn_after (gen_return (), insn);
4176 /* Emit a barrier so that reorg knows that any following instructions
4177 are not reachable via a fall-through path.
4178 But don't do this when not optimizing, since we wouldn't suppress the
4179 alignment for the barrier then, and could end up with out-of-range
4180 pc-relative loads. */
4181 if (optimize)
4182 emit_barrier_after (jump);
4183 emit_label_after (bp->near_label, insn);
4184 JUMP_LABEL (jump) = bp->far_label;
4185 ok = invert_jump (insn, label, 1);
4186 gcc_assert (ok);
4187
4188 /* If we are branching around a jump (rather than a return), prevent
4189 reorg from using an insn from the jump target as the delay slot insn -
4190 when reorg did this, it pessimized code (we rather hide the delay slot)
4191 and it could cause branches to go out of range. */
4192 if (bp->far_label)
4193 (emit_insn_after
4194 (gen_stuff_delay_slot
4195 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4196 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4197 insn));
4198 /* Prevent reorg from undoing our splits. */
4199 gen_block_redirect (jump, bp->address += 2, 2);
4200 }
4201
4202 /* Fix up ADDR_DIFF_VECs. */
4203 void
fixup_addr_diff_vecs(rtx first)4204 fixup_addr_diff_vecs (rtx first)
4205 {
4206 rtx insn;
4207
4208 for (insn = first; insn; insn = NEXT_INSN (insn))
4209 {
4210 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4211
4212 if (GET_CODE (insn) != JUMP_INSN
4213 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4214 continue;
4215 pat = PATTERN (insn);
4216 vec_lab = XEXP (XEXP (pat, 0), 0);
4217
4218 /* Search the matching casesi_jump_2. */
4219 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4220 {
4221 if (GET_CODE (prev) != JUMP_INSN)
4222 continue;
4223 prevpat = PATTERN (prev);
4224 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4225 continue;
4226 x = XVECEXP (prevpat, 0, 1);
4227 if (GET_CODE (x) != USE)
4228 continue;
4229 x = XEXP (x, 0);
4230 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4231 break;
4232 }
4233 /* FIXME: This is a bug in the optimizer, but it seems harmless
4234 to just avoid panicing. */
4235 if (!prev)
4236 continue;
4237
4238 /* Emit the reference label of the braf where it belongs, right after
4239 the casesi_jump_2 (i.e. braf). */
4240 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4241 emit_label_after (braf_label, prev);
4242
4243 /* Fix up the ADDR_DIF_VEC to be relative
4244 to the reference address of the braf. */
4245 XEXP (XEXP (pat, 0), 0) = braf_label;
4246 }
4247 }
4248
4249 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4250 a barrier. Return the base 2 logarithm of the desired alignment. */
4251 int
barrier_align(rtx barrier_or_label)4252 barrier_align (rtx barrier_or_label)
4253 {
4254 rtx next = next_real_insn (barrier_or_label), pat, prev;
4255 int slot, credit, jump_to_next = 0;
4256
4257 if (! next)
4258 return 0;
4259
4260 pat = PATTERN (next);
4261
4262 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4263 return 2;
4264
4265 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4266 /* This is a barrier in front of a constant table. */
4267 return 0;
4268
4269 prev = prev_real_insn (barrier_or_label);
4270 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4271 {
4272 pat = PATTERN (prev);
4273 /* If this is a very small table, we want to keep the alignment after
4274 the table to the minimum for proper code alignment. */
4275 return ((TARGET_SMALLCODE
4276 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4277 <= (unsigned) 1 << (CACHE_LOG - 2)))
4278 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4279 }
4280
4281 if (TARGET_SMALLCODE)
4282 return 0;
4283
4284 if (! TARGET_SH2 || ! optimize)
4285 return align_jumps_log;
4286
4287 /* When fixing up pcloads, a constant table might be inserted just before
4288 the basic block that ends with the barrier. Thus, we can't trust the
4289 instruction lengths before that. */
4290 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4291 {
4292 /* Check if there is an immediately preceding branch to the insn beyond
4293 the barrier. We must weight the cost of discarding useful information
4294 from the current cache line when executing this branch and there is
4295 an alignment, against that of fetching unneeded insn in front of the
4296 branch target when there is no alignment. */
4297
4298 /* There are two delay_slot cases to consider. One is the simple case
4299 where the preceding branch is to the insn beyond the barrier (simple
4300 delay slot filling), and the other is where the preceding branch has
4301 a delay slot that is a duplicate of the insn after the barrier
4302 (fill_eager_delay_slots) and the branch is to the insn after the insn
4303 after the barrier. */
4304
4305 /* PREV is presumed to be the JUMP_INSN for the barrier under
4306 investigation. Skip to the insn before it. */
4307 prev = prev_real_insn (prev);
4308
4309 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4310 credit >= 0 && prev && GET_CODE (prev) == INSN;
4311 prev = prev_real_insn (prev))
4312 {
4313 jump_to_next = 0;
4314 if (GET_CODE (PATTERN (prev)) == USE
4315 || GET_CODE (PATTERN (prev)) == CLOBBER)
4316 continue;
4317 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4318 {
4319 prev = XVECEXP (PATTERN (prev), 0, 1);
4320 if (INSN_UID (prev) == INSN_UID (next))
4321 {
4322 /* Delay slot was filled with insn at jump target. */
4323 jump_to_next = 1;
4324 continue;
4325 }
4326 }
4327
4328 if (slot &&
4329 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4330 slot = 0;
4331 credit -= get_attr_length (prev);
4332 }
4333 if (prev
4334 && GET_CODE (prev) == JUMP_INSN
4335 && JUMP_LABEL (prev))
4336 {
4337 rtx x;
4338 if (jump_to_next
4339 || next_real_insn (JUMP_LABEL (prev)) == next
4340 /* If relax_delay_slots() decides NEXT was redundant
4341 with some previous instruction, it will have
4342 redirected PREV's jump to the following insn. */
4343 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4344 /* There is no upper bound on redundant instructions
4345 that might have been skipped, but we must not put an
4346 alignment where none had been before. */
4347 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4348 (INSN_P (x)
4349 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4350 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4351 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4352 {
4353 rtx pat = PATTERN (prev);
4354 if (GET_CODE (pat) == PARALLEL)
4355 pat = XVECEXP (pat, 0, 0);
4356 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4357 return 0;
4358 }
4359 }
4360 }
4361
4362 return align_jumps_log;
4363 }
4364
4365 /* If we are inside a phony loop, almost any kind of label can turn up as the
4366 first one in the loop. Aligning a braf label causes incorrect switch
4367 destination addresses; we can detect braf labels because they are
4368 followed by a BARRIER.
4369 Applying loop alignment to small constant or switch tables is a waste
4370 of space, so we suppress this too. */
4371 int
sh_loop_align(rtx label)4372 sh_loop_align (rtx label)
4373 {
4374 rtx next = label;
4375
4376 do
4377 next = next_nonnote_insn (next);
4378 while (next && GET_CODE (next) == CODE_LABEL);
4379
4380 if (! next
4381 || ! INSN_P (next)
4382 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4383 || recog_memoized (next) == CODE_FOR_consttable_2)
4384 return 0;
4385
4386 return align_loops_log;
4387 }
4388
4389 /* Do a final pass over the function, just before delayed branch
4390 scheduling. */
4391
4392 static void
sh_reorg(void)4393 sh_reorg (void)
4394 {
4395 rtx first, insn, mova = NULL_RTX;
4396 int num_mova;
4397 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4398 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4399
4400 first = get_insns ();
4401 max_labelno_before_reorg = max_label_num ();
4402
4403 /* We must split call insns before introducing `mova's. If we're
4404 optimizing, they'll have already been split. Otherwise, make
4405 sure we don't split them too late. */
4406 if (! optimize)
4407 split_all_insns_noflow ();
4408
4409 if (TARGET_SHMEDIA)
4410 return;
4411
4412 /* If relaxing, generate pseudo-ops to associate function calls with
4413 the symbols they call. It does no harm to not generate these
4414 pseudo-ops. However, when we can generate them, it enables to
4415 linker to potentially relax the jsr to a bsr, and eliminate the
4416 register load and, possibly, the constant pool entry. */
4417
4418 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4419 if (TARGET_RELAX)
4420 {
4421 /* Remove all REG_LABEL notes. We want to use them for our own
4422 purposes. This works because none of the remaining passes
4423 need to look at them.
4424
4425 ??? But it may break in the future. We should use a machine
4426 dependent REG_NOTE, or some other approach entirely. */
4427 for (insn = first; insn; insn = NEXT_INSN (insn))
4428 {
4429 if (INSN_P (insn))
4430 {
4431 rtx note;
4432
4433 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4434 remove_note (insn, note);
4435 }
4436 }
4437
4438 for (insn = first; insn; insn = NEXT_INSN (insn))
4439 {
4440 rtx pattern, reg, link, set, scan, dies, label;
4441 int rescan = 0, foundinsn = 0;
4442
4443 if (GET_CODE (insn) == CALL_INSN)
4444 {
4445 pattern = PATTERN (insn);
4446
4447 if (GET_CODE (pattern) == PARALLEL)
4448 pattern = XVECEXP (pattern, 0, 0);
4449 if (GET_CODE (pattern) == SET)
4450 pattern = SET_SRC (pattern);
4451
4452 if (GET_CODE (pattern) != CALL
4453 || GET_CODE (XEXP (pattern, 0)) != MEM)
4454 continue;
4455
4456 reg = XEXP (XEXP (pattern, 0), 0);
4457 }
4458 else
4459 {
4460 reg = sfunc_uses_reg (insn);
4461 if (! reg)
4462 continue;
4463 }
4464
4465 if (GET_CODE (reg) != REG)
4466 continue;
4467
4468 /* This is a function call via REG. If the only uses of REG
4469 between the time that it is set and the time that it dies
4470 are in function calls, then we can associate all the
4471 function calls with the setting of REG. */
4472
4473 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4474 {
4475 rtx linked_insn;
4476
4477 if (REG_NOTE_KIND (link) != 0)
4478 continue;
4479 linked_insn = XEXP (link, 0);
4480 set = single_set (linked_insn);
4481 if (set
4482 && rtx_equal_p (reg, SET_DEST (set))
4483 && ! INSN_DELETED_P (linked_insn))
4484 {
4485 link = linked_insn;
4486 break;
4487 }
4488 }
4489
4490 if (! link)
4491 {
4492 /* ??? Sometimes global register allocation will have
4493 deleted the insn pointed to by LOG_LINKS. Try
4494 scanning backward to find where the register is set. */
4495 for (scan = PREV_INSN (insn);
4496 scan && GET_CODE (scan) != CODE_LABEL;
4497 scan = PREV_INSN (scan))
4498 {
4499 if (! INSN_P (scan))
4500 continue;
4501
4502 if (! reg_mentioned_p (reg, scan))
4503 continue;
4504
4505 if (noncall_uses_reg (reg, scan, &set))
4506 break;
4507
4508 if (set)
4509 {
4510 link = scan;
4511 break;
4512 }
4513 }
4514 }
4515
4516 if (! link)
4517 continue;
4518
4519 /* The register is set at LINK. */
4520
4521 /* We can only optimize the function call if the register is
4522 being set to a symbol. In theory, we could sometimes
4523 optimize calls to a constant location, but the assembler
4524 and linker do not support that at present. */
4525 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4526 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4527 continue;
4528
4529 /* Scan forward from LINK to the place where REG dies, and
4530 make sure that the only insns which use REG are
4531 themselves function calls. */
4532
4533 /* ??? This doesn't work for call targets that were allocated
4534 by reload, since there may not be a REG_DEAD note for the
4535 register. */
4536
4537 dies = NULL_RTX;
4538 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4539 {
4540 rtx scanset;
4541
4542 /* Don't try to trace forward past a CODE_LABEL if we haven't
4543 seen INSN yet. Ordinarily, we will only find the setting insn
4544 in LOG_LINKS if it is in the same basic block. However,
4545 cross-jumping can insert code labels in between the load and
4546 the call, and can result in situations where a single call
4547 insn may have two targets depending on where we came from. */
4548
4549 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4550 break;
4551
4552 if (! INSN_P (scan))
4553 continue;
4554
4555 /* Don't try to trace forward past a JUMP. To optimize
4556 safely, we would have to check that all the
4557 instructions at the jump destination did not use REG. */
4558
4559 if (GET_CODE (scan) == JUMP_INSN)
4560 break;
4561
4562 if (! reg_mentioned_p (reg, scan))
4563 continue;
4564
4565 if (noncall_uses_reg (reg, scan, &scanset))
4566 break;
4567
4568 if (scan == insn)
4569 foundinsn = 1;
4570
4571 if (scan != insn
4572 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4573 {
4574 /* There is a function call to this register other
4575 than the one we are checking. If we optimize
4576 this call, we need to rescan again below. */
4577 rescan = 1;
4578 }
4579
4580 /* ??? We shouldn't have to worry about SCANSET here.
4581 We should just be able to check for a REG_DEAD note
4582 on a function call. However, the REG_DEAD notes are
4583 apparently not dependable around libcalls; c-torture
4584 execute/920501-2 is a test case. If SCANSET is set,
4585 then this insn sets the register, so it must have
4586 died earlier. Unfortunately, this will only handle
4587 the cases in which the register is, in fact, set in a
4588 later insn. */
4589
4590 /* ??? We shouldn't have to use FOUNDINSN here.
4591 However, the LOG_LINKS fields are apparently not
4592 entirely reliable around libcalls;
4593 newlib/libm/math/e_pow.c is a test case. Sometimes
4594 an insn will appear in LOG_LINKS even though it is
4595 not the most recent insn which sets the register. */
4596
4597 if (foundinsn
4598 && (scanset
4599 || find_reg_note (scan, REG_DEAD, reg)))
4600 {
4601 dies = scan;
4602 break;
4603 }
4604 }
4605
4606 if (! dies)
4607 {
4608 /* Either there was a branch, or some insn used REG
4609 other than as a function call address. */
4610 continue;
4611 }
4612
4613 /* Create a code label, and put it in a REG_LABEL note on
4614 the insn which sets the register, and on each call insn
4615 which uses the register. In final_prescan_insn we look
4616 for the REG_LABEL notes, and output the appropriate label
4617 or pseudo-op. */
4618
4619 label = gen_label_rtx ();
4620 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4621 REG_NOTES (link));
4622 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4623 REG_NOTES (insn));
4624 if (rescan)
4625 {
4626 scan = link;
4627 do
4628 {
4629 rtx reg2;
4630
4631 scan = NEXT_INSN (scan);
4632 if (scan != insn
4633 && ((GET_CODE (scan) == CALL_INSN
4634 && reg_mentioned_p (reg, scan))
4635 || ((reg2 = sfunc_uses_reg (scan))
4636 && REGNO (reg2) == REGNO (reg))))
4637 REG_NOTES (scan)
4638 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4639 }
4640 while (scan != dies);
4641 }
4642 }
4643 }
4644
4645 if (TARGET_SH2)
4646 fixup_addr_diff_vecs (first);
4647
4648 if (optimize)
4649 {
4650 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4651 shorten_branches (first);
4652 }
4653
4654 /* Scan the function looking for move instructions which have to be
4655 changed to pc-relative loads and insert the literal tables. */
4656 label_ref_list_pool = create_alloc_pool ("label references list",
4657 sizeof (struct label_ref_list_d),
4658 30);
4659 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4660 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4661 {
4662 if (mova_p (insn))
4663 {
4664 /* ??? basic block reordering can move a switch table dispatch
4665 below the switch table. Check if that has happened.
4666 We only have the addresses available when optimizing; but then,
4667 this check shouldn't be needed when not optimizing. */
4668 if (!untangle_mova (&num_mova, &mova, insn))
4669 {
4670 insn = mova;
4671 num_mova = 0;
4672 }
4673 }
4674 else if (GET_CODE (insn) == JUMP_INSN
4675 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4676 && num_mova
4677 /* ??? loop invariant motion can also move a mova out of a
4678 loop. Since loop does this code motion anyway, maybe we
4679 should wrap UNSPEC_MOVA into a CONST, so that reload can
4680 move it back. */
4681 && ((num_mova > 1
4682 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
4683 || (prev_nonnote_insn (insn)
4684 == XEXP (MOVA_LABELREF (mova), 0))))
4685 {
4686 rtx scan;
4687 int total;
4688
4689 num_mova--;
4690
4691 /* Some code might have been inserted between the mova and
4692 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4693 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4694 total += get_attr_length (scan);
4695
4696 /* range of mova is 1020, add 4 because pc counts from address of
4697 second instruction after this one, subtract 2 in case pc is 2
4698 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4699 cancels out with alignment effects of the mova itself. */
4700 if (total > 1022)
4701 {
4702 /* Change the mova into a load, and restart scanning
4703 there. broken_move will then return true for mova. */
4704 fixup_mova (mova);
4705 insn = mova;
4706 }
4707 }
4708 if (broken_move (insn)
4709 || (GET_CODE (insn) == INSN
4710 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4711 {
4712 rtx scan;
4713 /* Scan ahead looking for a barrier to stick the constant table
4714 behind. */
4715 rtx barrier = find_barrier (num_mova, mova, insn);
4716 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4717 int need_aligned_label = 0;
4718
4719 if (num_mova && ! mova_p (mova))
4720 {
4721 /* find_barrier had to change the first mova into a
4722 pcload; thus, we have to start with this new pcload. */
4723 insn = mova;
4724 num_mova = 0;
4725 }
4726 /* Now find all the moves between the points and modify them. */
4727 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4728 {
4729 if (GET_CODE (scan) == CODE_LABEL)
4730 last_float = 0;
4731 if (GET_CODE (scan) == INSN
4732 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4733 need_aligned_label = 1;
4734 if (broken_move (scan))
4735 {
4736 rtx *patp = &PATTERN (scan), pat = *patp;
4737 rtx src, dst;
4738 rtx lab;
4739 rtx newsrc;
4740 enum machine_mode mode;
4741
4742 if (GET_CODE (pat) == PARALLEL)
4743 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4744 src = SET_SRC (pat);
4745 dst = SET_DEST (pat);
4746 mode = GET_MODE (dst);
4747
4748 if (mode == SImode && hi_const (src)
4749 && REGNO (dst) != FPUL_REG)
4750 {
4751 int offset = 0;
4752
4753 mode = HImode;
4754 while (GET_CODE (dst) == SUBREG)
4755 {
4756 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4757 GET_MODE (SUBREG_REG (dst)),
4758 SUBREG_BYTE (dst),
4759 GET_MODE (dst));
4760 dst = SUBREG_REG (dst);
4761 }
4762 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4763 }
4764 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4765 {
4766 /* This must be an insn that clobbers r0. */
4767 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4768 XVECLEN (PATTERN (scan), 0)
4769 - 1);
4770 rtx clobber = *clobberp;
4771
4772 gcc_assert (GET_CODE (clobber) == CLOBBER
4773 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4774
4775 if (last_float
4776 && reg_set_between_p (r0_rtx, last_float_move, scan))
4777 last_float = 0;
4778 if (last_float
4779 && TARGET_SHCOMPACT
4780 && GET_MODE_SIZE (mode) != 4
4781 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4782 last_float = 0;
4783 lab = add_constant (src, mode, last_float);
4784 if (lab)
4785 emit_insn_before (gen_mova (lab), scan);
4786 else
4787 {
4788 /* There will be a REG_UNUSED note for r0 on
4789 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4790 lest reorg:mark_target_live_regs will not
4791 consider r0 to be used, and we end up with delay
4792 slot insn in front of SCAN that clobbers r0. */
4793 rtx note
4794 = find_regno_note (last_float_move, REG_UNUSED, 0);
4795
4796 /* If we are not optimizing, then there may not be
4797 a note. */
4798 if (note)
4799 PUT_MODE (note, REG_INC);
4800
4801 *last_float_addr = r0_inc_rtx;
4802 }
4803 last_float_move = scan;
4804 last_float = src;
4805 newsrc = gen_const_mem (mode,
4806 (((TARGET_SH4 && ! TARGET_FMOVD)
4807 || REGNO (dst) == FPUL_REG)
4808 ? r0_inc_rtx
4809 : r0_rtx));
4810 last_float_addr = &XEXP (newsrc, 0);
4811
4812 /* Remove the clobber of r0. */
4813 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4814 gen_rtx_SCRATCH (Pmode));
4815 }
4816 /* This is a mova needing a label. Create it. */
4817 else if (GET_CODE (src) == UNSPEC
4818 && XINT (src, 1) == UNSPEC_MOVA
4819 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4820 {
4821 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4822 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4823 newsrc = gen_rtx_UNSPEC (SImode,
4824 gen_rtvec (1, newsrc),
4825 UNSPEC_MOVA);
4826 }
4827 else
4828 {
4829 lab = add_constant (src, mode, 0);
4830 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4831 newsrc = gen_const_mem (mode, newsrc);
4832 }
4833 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4834 INSN_CODE (scan) = -1;
4835 }
4836 }
4837 dump_table (need_aligned_label ? insn : 0, barrier);
4838 insn = barrier;
4839 }
4840 }
4841 free_alloc_pool (label_ref_list_pool);
4842 for (insn = first; insn; insn = NEXT_INSN (insn))
4843 PUT_MODE (insn, VOIDmode);
4844
4845 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4846 INSN_ADDRESSES_FREE ();
4847 split_branches (first);
4848
4849 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4850 also has an effect on the register that holds the address of the sfunc.
4851 Insert an extra dummy insn in front of each sfunc that pretends to
4852 use this register. */
4853 if (flag_delayed_branch)
4854 {
4855 for (insn = first; insn; insn = NEXT_INSN (insn))
4856 {
4857 rtx reg = sfunc_uses_reg (insn);
4858
4859 if (! reg)
4860 continue;
4861 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4862 }
4863 }
4864 #if 0
4865 /* fpscr is not actually a user variable, but we pretend it is for the
4866 sake of the previous optimization passes, since we want it handled like
4867 one. However, we don't have any debugging information for it, so turn
4868 it into a non-user variable now. */
4869 if (TARGET_SH4)
4870 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4871 #endif
4872 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4873 }
4874
4875 int
get_dest_uid(rtx label,int max_uid)4876 get_dest_uid (rtx label, int max_uid)
4877 {
4878 rtx dest = next_real_insn (label);
4879 int dest_uid;
4880 if (! dest)
4881 /* This can happen for an undefined label. */
4882 return 0;
4883 dest_uid = INSN_UID (dest);
4884 /* If this is a newly created branch redirection blocking instruction,
4885 we cannot index the branch_uid or insn_addresses arrays with its
4886 uid. But then, we won't need to, because the actual destination is
4887 the following branch. */
4888 while (dest_uid >= max_uid)
4889 {
4890 dest = NEXT_INSN (dest);
4891 dest_uid = INSN_UID (dest);
4892 }
4893 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4894 return 0;
4895 return dest_uid;
4896 }
4897
4898 /* Split condbranches that are out of range. Also add clobbers for
4899 scratch registers that are needed in far jumps.
4900 We do this before delay slot scheduling, so that it can take our
4901 newly created instructions into account. It also allows us to
4902 find branches with common targets more easily. */
4903
4904 static void
split_branches(rtx first)4905 split_branches (rtx first)
4906 {
4907 rtx insn;
4908 struct far_branch **uid_branch, *far_branch_list = 0;
4909 int max_uid = get_max_uid ();
4910 int ok;
4911
4912 /* Find out which branches are out of range. */
4913 shorten_branches (first);
4914
4915 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4916 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4917
4918 for (insn = first; insn; insn = NEXT_INSN (insn))
4919 if (! INSN_P (insn))
4920 continue;
4921 else if (INSN_DELETED_P (insn))
4922 {
4923 /* Shorten_branches would split this instruction again,
4924 so transform it into a note. */
4925 PUT_CODE (insn, NOTE);
4926 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4927 NOTE_SOURCE_FILE (insn) = 0;
4928 }
4929 else if (GET_CODE (insn) == JUMP_INSN
4930 /* Don't mess with ADDR_DIFF_VEC */
4931 && (GET_CODE (PATTERN (insn)) == SET
4932 || GET_CODE (PATTERN (insn)) == RETURN))
4933 {
4934 enum attr_type type = get_attr_type (insn);
4935 if (type == TYPE_CBRANCH)
4936 {
4937 rtx next, beyond;
4938
4939 if (get_attr_length (insn) > 4)
4940 {
4941 rtx src = SET_SRC (PATTERN (insn));
4942 rtx olabel = XEXP (XEXP (src, 1), 0);
4943 int addr = INSN_ADDRESSES (INSN_UID (insn));
4944 rtx label = 0;
4945 int dest_uid = get_dest_uid (olabel, max_uid);
4946 struct far_branch *bp = uid_branch[dest_uid];
4947
4948 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4949 the label if the LABEL_NUSES count drops to zero. There is
4950 always a jump_optimize pass that sets these values, but it
4951 proceeds to delete unreferenced code, and then if not
4952 optimizing, to un-delete the deleted instructions, thus
4953 leaving labels with too low uses counts. */
4954 if (! optimize)
4955 {
4956 JUMP_LABEL (insn) = olabel;
4957 LABEL_NUSES (olabel)++;
4958 }
4959 if (! bp)
4960 {
4961 bp = (struct far_branch *) alloca (sizeof *bp);
4962 uid_branch[dest_uid] = bp;
4963 bp->prev = far_branch_list;
4964 far_branch_list = bp;
4965 bp->far_label
4966 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4967 LABEL_NUSES (bp->far_label)++;
4968 }
4969 else
4970 {
4971 label = bp->near_label;
4972 if (! label && bp->address - addr >= CONDJUMP_MIN)
4973 {
4974 rtx block = bp->insert_place;
4975
4976 if (GET_CODE (PATTERN (block)) == RETURN)
4977 block = PREV_INSN (block);
4978 else
4979 block = gen_block_redirect (block,
4980 bp->address, 2);
4981 label = emit_label_after (gen_label_rtx (),
4982 PREV_INSN (block));
4983 bp->near_label = label;
4984 }
4985 else if (label && ! NEXT_INSN (label))
4986 {
4987 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4988 bp->insert_place = insn;
4989 else
4990 gen_far_branch (bp);
4991 }
4992 }
4993 if (! label
4994 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4995 {
4996 bp->near_label = label = gen_label_rtx ();
4997 bp->insert_place = insn;
4998 bp->address = addr;
4999 }
5000 ok = redirect_jump (insn, label, 1);
5001 gcc_assert (ok);
5002 }
5003 else
5004 {
5005 /* get_attr_length (insn) == 2 */
5006 /* Check if we have a pattern where reorg wants to redirect
5007 the branch to a label from an unconditional branch that
5008 is too far away. */
5009 /* We can't use JUMP_LABEL here because it might be undefined
5010 when not optimizing. */
5011 /* A syntax error might cause beyond to be NULL_RTX. */
5012 beyond
5013 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
5014 0));
5015
5016 if (beyond
5017 && (GET_CODE (beyond) == JUMP_INSN
5018 || ((beyond = next_active_insn (beyond))
5019 && GET_CODE (beyond) == JUMP_INSN))
5020 && GET_CODE (PATTERN (beyond)) == SET
5021 && recog_memoized (beyond) == CODE_FOR_jump_compact
5022 && ((INSN_ADDRESSES
5023 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
5024 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5025 > 252 + 258 + 2))
5026 gen_block_redirect (beyond,
5027 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5028 }
5029
5030 next = next_active_insn (insn);
5031
5032 if ((GET_CODE (next) == JUMP_INSN
5033 || ((next = next_active_insn (next))
5034 && GET_CODE (next) == JUMP_INSN))
5035 && GET_CODE (PATTERN (next)) == SET
5036 && recog_memoized (next) == CODE_FOR_jump_compact
5037 && ((INSN_ADDRESSES
5038 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5039 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5040 > 252 + 258 + 2))
5041 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5042 }
5043 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5044 {
5045 int addr = INSN_ADDRESSES (INSN_UID (insn));
5046 rtx far_label = 0;
5047 int dest_uid = 0;
5048 struct far_branch *bp;
5049
5050 if (type == TYPE_JUMP)
5051 {
5052 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5053 dest_uid = get_dest_uid (far_label, max_uid);
5054 if (! dest_uid)
5055 {
5056 /* Parse errors can lead to labels outside
5057 the insn stream. */
5058 if (! NEXT_INSN (far_label))
5059 continue;
5060
5061 if (! optimize)
5062 {
5063 JUMP_LABEL (insn) = far_label;
5064 LABEL_NUSES (far_label)++;
5065 }
5066 redirect_jump (insn, NULL_RTX, 1);
5067 far_label = 0;
5068 }
5069 }
5070 bp = uid_branch[dest_uid];
5071 if (! bp)
5072 {
5073 bp = (struct far_branch *) alloca (sizeof *bp);
5074 uid_branch[dest_uid] = bp;
5075 bp->prev = far_branch_list;
5076 far_branch_list = bp;
5077 bp->near_label = 0;
5078 bp->far_label = far_label;
5079 if (far_label)
5080 LABEL_NUSES (far_label)++;
5081 }
5082 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5083 if (addr - bp->address <= CONDJUMP_MAX)
5084 emit_label_after (bp->near_label, PREV_INSN (insn));
5085 else
5086 {
5087 gen_far_branch (bp);
5088 bp->near_label = 0;
5089 }
5090 else
5091 bp->near_label = 0;
5092 bp->address = addr;
5093 bp->insert_place = insn;
5094 if (! far_label)
5095 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5096 else
5097 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5098 }
5099 }
5100 /* Generate all pending far branches,
5101 and free our references to the far labels. */
5102 while (far_branch_list)
5103 {
5104 if (far_branch_list->near_label
5105 && ! NEXT_INSN (far_branch_list->near_label))
5106 gen_far_branch (far_branch_list);
5107 if (optimize
5108 && far_branch_list->far_label
5109 && ! --LABEL_NUSES (far_branch_list->far_label))
5110 delete_insn (far_branch_list->far_label);
5111 far_branch_list = far_branch_list->prev;
5112 }
5113
5114 /* Instruction length information is no longer valid due to the new
5115 instructions that have been generated. */
5116 init_insn_lengths ();
5117 }
5118
5119 /* Dump out instruction addresses, which is useful for debugging the
5120 constant pool table stuff.
5121
5122 If relaxing, output the label and pseudo-ops used to link together
5123 calls and the instruction which set the registers. */
5124
5125 /* ??? The addresses printed by this routine for insns are nonsense for
5126 insns which are inside of a sequence where none of the inner insns have
5127 variable length. This is because the second pass of shorten_branches
5128 does not bother to update them. */
5129
5130 void
final_prescan_insn(rtx insn,rtx * opvec ATTRIBUTE_UNUSED,int noperands ATTRIBUTE_UNUSED)5131 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5132 int noperands ATTRIBUTE_UNUSED)
5133 {
5134 if (TARGET_DUMPISIZE)
5135 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5136
5137 if (TARGET_RELAX)
5138 {
5139 rtx note;
5140
5141 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
5142 if (note)
5143 {
5144 rtx pattern;
5145
5146 pattern = PATTERN (insn);
5147 if (GET_CODE (pattern) == PARALLEL)
5148 pattern = XVECEXP (pattern, 0, 0);
5149 switch (GET_CODE (pattern))
5150 {
5151 case SET:
5152 if (GET_CODE (SET_SRC (pattern)) != CALL
5153 && get_attr_type (insn) != TYPE_SFUNC)
5154 {
5155 targetm.asm_out.internal_label
5156 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5157 break;
5158 }
5159 /* else FALLTHROUGH */
5160 case CALL:
5161 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5162 CODE_LABEL_NUMBER (XEXP (note, 0)));
5163 break;
5164
5165 default:
5166 gcc_unreachable ();
5167 }
5168 }
5169 }
5170 }
5171
5172 /* Dump out any constants accumulated in the final pass. These will
5173 only be labels. */
5174
5175 const char *
output_jump_label_table(void)5176 output_jump_label_table (void)
5177 {
5178 int i;
5179
5180 if (pool_size)
5181 {
5182 fprintf (asm_out_file, "\t.align 2\n");
5183 for (i = 0; i < pool_size; i++)
5184 {
5185 pool_node *p = &pool_vector[i];
5186
5187 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5188 CODE_LABEL_NUMBER (p->label));
5189 output_asm_insn (".long %O0", &p->value);
5190 }
5191 pool_size = 0;
5192 }
5193
5194 return "";
5195 }
5196
5197 /* A full frame looks like:
5198
5199 arg-5
5200 arg-4
5201 [ if current_function_anonymous_args
5202 arg-3
5203 arg-2
5204 arg-1
5205 arg-0 ]
5206 saved-fp
5207 saved-r10
5208 saved-r11
5209 saved-r12
5210 saved-pr
5211 local-n
5212 ..
5213 local-1
5214 local-0 <- fp points here. */
5215
5216 /* Number of bytes pushed for anonymous args, used to pass information
5217 between expand_prologue and expand_epilogue. */
5218
5219 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5220 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5221 for an epilogue and a negative value means that it's for a sibcall
5222 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5223 all the registers that are about to be restored, and hence dead. */
5224
5225 static void
output_stack_adjust(int size,rtx reg,int epilogue_p,HARD_REG_SET * live_regs_mask)5226 output_stack_adjust (int size, rtx reg, int epilogue_p,
5227 HARD_REG_SET *live_regs_mask)
5228 {
5229 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5230 if (size)
5231 {
5232 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5233
5234 /* This test is bogus, as output_stack_adjust is used to re-align the
5235 stack. */
5236 #if 0
5237 gcc_assert (!(size % align));
5238 #endif
5239
5240 if (CONST_OK_FOR_ADD (size))
5241 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5242 /* Try to do it with two partial adjustments; however, we must make
5243 sure that the stack is properly aligned at all times, in case
5244 an interrupt occurs between the two partial adjustments. */
5245 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5246 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5247 {
5248 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5249 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5250 }
5251 else
5252 {
5253 rtx const_reg;
5254 rtx insn;
5255 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5256 int i;
5257
5258 /* If TEMP is invalid, we could temporarily save a general
5259 register to MACL. However, there is currently no need
5260 to handle this case, so just die when we see it. */
5261 if (epilogue_p < 0
5262 || current_function_interrupt
5263 || ! call_really_used_regs[temp] || fixed_regs[temp])
5264 temp = -1;
5265 if (temp < 0 && ! current_function_interrupt
5266 && (TARGET_SHMEDIA || epilogue_p >= 0))
5267 {
5268 HARD_REG_SET temps;
5269 COPY_HARD_REG_SET (temps, call_used_reg_set);
5270 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5271 if (epilogue_p > 0)
5272 {
5273 int nreg = 0;
5274 if (current_function_return_rtx)
5275 {
5276 enum machine_mode mode;
5277 mode = GET_MODE (current_function_return_rtx);
5278 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5279 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5280 }
5281 for (i = 0; i < nreg; i++)
5282 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5283 if (current_function_calls_eh_return)
5284 {
5285 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5286 for (i = 0; i <= 3; i++)
5287 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5288 }
5289 }
5290 if (TARGET_SHMEDIA && epilogue_p < 0)
5291 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5292 CLEAR_HARD_REG_BIT (temps, i);
5293 if (epilogue_p <= 0)
5294 {
5295 for (i = FIRST_PARM_REG;
5296 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5297 CLEAR_HARD_REG_BIT (temps, i);
5298 if (cfun->static_chain_decl != NULL)
5299 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5300 }
5301 temp = scavenge_reg (&temps);
5302 }
5303 if (temp < 0 && live_regs_mask)
5304 {
5305 HARD_REG_SET temps;
5306
5307 COPY_HARD_REG_SET (temps, *live_regs_mask);
5308 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
5309 temp = scavenge_reg (&temps);
5310 }
5311 if (temp < 0)
5312 {
5313 rtx adj_reg, tmp_reg, mem;
5314
5315 /* If we reached here, the most likely case is the (sibcall)
5316 epilogue for non SHmedia. Put a special push/pop sequence
5317 for such case as the last resort. This looks lengthy but
5318 would not be problem because it seems to be very
5319 rare. */
5320
5321 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5322
5323
5324 /* ??? There is still the slight possibility that r4 or
5325 r5 have been reserved as fixed registers or assigned
5326 as global registers, and they change during an
5327 interrupt. There are possible ways to handle this:
5328
5329 - If we are adjusting the frame pointer (r14), we can do
5330 with a single temp register and an ordinary push / pop
5331 on the stack.
5332 - Grab any call-used or call-saved registers (i.e. not
5333 fixed or globals) for the temps we need. We might
5334 also grab r14 if we are adjusting the stack pointer.
5335 If we can't find enough available registers, issue
5336 a diagnostic and die - the user must have reserved
5337 way too many registers.
5338 But since all this is rather unlikely to happen and
5339 would require extra testing, we just die if r4 / r5
5340 are not available. */
5341 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5342 && !global_regs[4] && !global_regs[5]);
5343
5344 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5345 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5346 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5347 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5348 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5349 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5350 emit_move_insn (mem, tmp_reg);
5351 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5352 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5353 emit_move_insn (mem, tmp_reg);
5354 emit_move_insn (reg, adj_reg);
5355 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5356 emit_move_insn (adj_reg, mem);
5357 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5358 emit_move_insn (tmp_reg, mem);
5359 /* Tell flow the insns that pop r4/r5 aren't dead. */
5360 emit_insn (gen_rtx_USE (VOIDmode, tmp_reg));
5361 emit_insn (gen_rtx_USE (VOIDmode, adj_reg));
5362 return;
5363 }
5364 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5365
5366 /* If SIZE is negative, subtract the positive value.
5367 This sometimes allows a constant pool entry to be shared
5368 between prologue and epilogue code. */
5369 if (size < 0)
5370 {
5371 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5372 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5373 }
5374 else
5375 {
5376 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5377 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5378 }
5379 if (! epilogue_p)
5380 REG_NOTES (insn)
5381 = (gen_rtx_EXPR_LIST
5382 (REG_FRAME_RELATED_EXPR,
5383 gen_rtx_SET (VOIDmode, reg,
5384 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5385 REG_NOTES (insn)));
5386 }
5387 }
5388 }
5389
5390 static rtx
frame_insn(rtx x)5391 frame_insn (rtx x)
5392 {
5393 x = emit_insn (x);
5394 RTX_FRAME_RELATED_P (x) = 1;
5395 return x;
5396 }
5397
5398 /* Output RTL to push register RN onto the stack. */
5399
5400 static rtx
push(int rn)5401 push (int rn)
5402 {
5403 rtx x;
5404 if (rn == FPUL_REG)
5405 x = gen_push_fpul ();
5406 else if (rn == FPSCR_REG)
5407 x = gen_push_fpscr ();
5408 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5409 && FP_OR_XD_REGISTER_P (rn))
5410 {
5411 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5412 return NULL_RTX;
5413 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5414 }
5415 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5416 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5417 else
5418 x = gen_push (gen_rtx_REG (SImode, rn));
5419
5420 x = frame_insn (x);
5421 REG_NOTES (x)
5422 = gen_rtx_EXPR_LIST (REG_INC,
5423 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5424 return x;
5425 }
5426
5427 /* Output RTL to pop register RN from the stack. */
5428
5429 static void
pop(int rn)5430 pop (int rn)
5431 {
5432 rtx x;
5433 if (rn == FPUL_REG)
5434 x = gen_pop_fpul ();
5435 else if (rn == FPSCR_REG)
5436 x = gen_pop_fpscr ();
5437 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5438 && FP_OR_XD_REGISTER_P (rn))
5439 {
5440 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5441 return;
5442 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5443 }
5444 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5445 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5446 else
5447 x = gen_pop (gen_rtx_REG (SImode, rn));
5448
5449 x = emit_insn (x);
5450 REG_NOTES (x)
5451 = gen_rtx_EXPR_LIST (REG_INC,
5452 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5453 }
5454
5455 /* Generate code to push the regs specified in the mask. */
5456
5457 static void
push_regs(HARD_REG_SET * mask,int interrupt_handler)5458 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5459 {
5460 int i;
5461 int skip_fpscr = 0;
5462
5463 /* Push PR last; this gives better latencies after the prologue, and
5464 candidates for the return delay slot when there are no general
5465 registers pushed. */
5466 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5467 {
5468 /* If this is an interrupt handler, and the SZ bit varies,
5469 and we have to push any floating point register, we need
5470 to switch to the correct precision first. */
5471 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5472 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
5473 {
5474 HARD_REG_SET unsaved;
5475
5476 push (FPSCR_REG);
5477 COMPL_HARD_REG_SET (unsaved, *mask);
5478 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5479 skip_fpscr = 1;
5480 }
5481 if (i != PR_REG
5482 && (i != FPSCR_REG || ! skip_fpscr)
5483 && TEST_HARD_REG_BIT (*mask, i))
5484 push (i);
5485 }
5486 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5487 push (PR_REG);
5488 }
5489
5490 /* Calculate how much extra space is needed to save all callee-saved
5491 target registers.
5492 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5493
5494 static int
shmedia_target_regs_stack_space(HARD_REG_SET * live_regs_mask)5495 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5496 {
5497 int reg;
5498 int stack_space = 0;
5499 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5500
5501 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5502 if ((! call_really_used_regs[reg] || interrupt_handler)
5503 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5504 /* Leave space to save this target register on the stack,
5505 in case target register allocation wants to use it. */
5506 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5507 return stack_space;
5508 }
5509
5510 /* Decide whether we should reserve space for callee-save target registers,
5511 in case target register allocation wants to use them. REGS_SAVED is
5512 the space, in bytes, that is already required for register saves.
5513 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5514
5515 static int
shmedia_reserve_space_for_target_registers_p(int regs_saved,HARD_REG_SET * live_regs_mask)5516 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5517 HARD_REG_SET *live_regs_mask)
5518 {
5519 if (optimize_size)
5520 return 0;
5521 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5522 }
5523
5524 /* Decide how much space to reserve for callee-save target registers
5525 in case target register allocation wants to use them.
5526 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5527
5528 static int
shmedia_target_regs_stack_adjust(HARD_REG_SET * live_regs_mask)5529 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5530 {
5531 if (shmedia_space_reserved_for_target_registers)
5532 return shmedia_target_regs_stack_space (live_regs_mask);
5533 else
5534 return 0;
5535 }
5536
5537 /* Work out the registers which need to be saved, both as a mask and a
5538 count of saved words. Return the count.
5539
5540 If doing a pragma interrupt function, then push all regs used by the
5541 function, and if we call another function (we can tell by looking at PR),
5542 make sure that all the regs it clobbers are safe too. */
5543
5544 static int
calc_live_regs(HARD_REG_SET * live_regs_mask)5545 calc_live_regs (HARD_REG_SET *live_regs_mask)
5546 {
5547 unsigned int reg;
5548 int count;
5549 tree attrs;
5550 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5551 bool nosave_low_regs;
5552 int pr_live, has_call;
5553
5554 attrs = DECL_ATTRIBUTES (current_function_decl);
5555 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5556 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5557 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5558 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5559
5560 CLEAR_HARD_REG_SET (*live_regs_mask);
5561 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5562 && regs_ever_live[FPSCR_REG])
5563 target_flags &= ~MASK_FPU_SINGLE;
5564 /* If we can save a lot of saves by switching to double mode, do that. */
5565 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5566 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5567 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5568 && (! call_really_used_regs[reg]
5569 || interrupt_handler)
5570 && ++count > 2)
5571 {
5572 target_flags &= ~MASK_FPU_SINGLE;
5573 break;
5574 }
5575 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5576 knows how to use it. That means the pseudo originally allocated for
5577 the initial value can become the PR_MEDIA_REG hard register, as seen for
5578 execute/20010122-1.c:test9. */
5579 if (TARGET_SHMEDIA)
5580 /* ??? this function is called from initial_elimination_offset, hence we
5581 can't use the result of sh_media_register_for_return here. */
5582 pr_live = sh_pr_n_sets ();
5583 else
5584 {
5585 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5586 pr_live = (pr_initial
5587 ? (GET_CODE (pr_initial) != REG
5588 || REGNO (pr_initial) != (PR_REG))
5589 : regs_ever_live[PR_REG]);
5590 /* For Shcompact, if not optimizing, we end up with a memory reference
5591 using the return address pointer for __builtin_return_address even
5592 though there is no actual need to put the PR register on the stack. */
5593 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5594 }
5595 /* Force PR to be live if the prologue has to call the SHmedia
5596 argument decoder or register saver. */
5597 if (TARGET_SHCOMPACT
5598 && ((current_function_args_info.call_cookie
5599 & ~ CALL_COOKIE_RET_TRAMP (1))
5600 || current_function_has_nonlocal_label))
5601 pr_live = 1;
5602 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5603 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5604 {
5605 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5606 ? pr_live
5607 : interrupt_handler
5608 ? (/* Need to save all the regs ever live. */
5609 (regs_ever_live[reg]
5610 || (call_really_used_regs[reg]
5611 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5612 || reg == PIC_OFFSET_TABLE_REGNUM)
5613 && has_call)
5614 || (TARGET_SHMEDIA && has_call
5615 && REGISTER_NATURAL_MODE (reg) == SImode
5616 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5617 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5618 && reg != RETURN_ADDRESS_POINTER_REGNUM
5619 && reg != T_REG && reg != GBR_REG
5620 /* Push fpscr only on targets which have FPU */
5621 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5622 : (/* Only push those regs which are used and need to be saved. */
5623 (TARGET_SHCOMPACT
5624 && flag_pic
5625 && current_function_args_info.call_cookie
5626 && reg == PIC_OFFSET_TABLE_REGNUM)
5627 || (regs_ever_live[reg]
5628 && (!call_really_used_regs[reg]
5629 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5630 || (current_function_calls_eh_return
5631 && (reg == EH_RETURN_DATA_REGNO (0)
5632 || reg == EH_RETURN_DATA_REGNO (1)
5633 || reg == EH_RETURN_DATA_REGNO (2)
5634 || reg == EH_RETURN_DATA_REGNO (3)))
5635 || ((reg == MACL_REG || reg == MACH_REG)
5636 && regs_ever_live[reg]
5637 && sh_cfun_attr_renesas_p ())
5638 ))
5639 {
5640 SET_HARD_REG_BIT (*live_regs_mask, reg);
5641 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5642
5643 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5644 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5645 {
5646 if (FP_REGISTER_P (reg))
5647 {
5648 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5649 {
5650 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5651 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5652 }
5653 }
5654 else if (XD_REGISTER_P (reg))
5655 {
5656 /* Must switch to double mode to access these registers. */
5657 target_flags &= ~MASK_FPU_SINGLE;
5658 }
5659 }
5660 }
5661 if (nosave_low_regs && reg == R8_REG)
5662 break;
5663 }
5664 /* If we have a target register optimization pass after prologue / epilogue
5665 threading, we need to assume all target registers will be live even if
5666 they aren't now. */
5667 if (flag_branch_target_load_optimize2
5668 && TARGET_SAVE_ALL_TARGET_REGS
5669 && shmedia_space_reserved_for_target_registers)
5670 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5671 if ((! call_really_used_regs[reg] || interrupt_handler)
5672 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5673 {
5674 SET_HARD_REG_BIT (*live_regs_mask, reg);
5675 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5676 }
5677 /* If this is an interrupt handler, we don't have any call-clobbered
5678 registers we can conveniently use for target register save/restore.
5679 Make sure we save at least one general purpose register when we need
5680 to save target registers. */
5681 if (interrupt_handler
5682 && hard_regs_intersect_p (live_regs_mask,
5683 ®_class_contents[TARGET_REGS])
5684 && ! hard_regs_intersect_p (live_regs_mask,
5685 ®_class_contents[GENERAL_REGS]))
5686 {
5687 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5688 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5689 }
5690
5691 return count;
5692 }
5693
5694 /* Code to generate prologue and epilogue sequences */
5695
5696 /* PUSHED is the number of bytes that are being pushed on the
5697 stack for register saves. Return the frame size, padded
5698 appropriately so that the stack stays properly aligned. */
5699 static HOST_WIDE_INT
rounded_frame_size(int pushed)5700 rounded_frame_size (int pushed)
5701 {
5702 HOST_WIDE_INT size = get_frame_size ();
5703 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5704
5705 return ((size + pushed + align - 1) & -align) - pushed;
5706 }
5707
5708 /* Choose a call-clobbered target-branch register that remains
5709 unchanged along the whole function. We set it up as the return
5710 value in the prologue. */
5711 int
sh_media_register_for_return(void)5712 sh_media_register_for_return (void)
5713 {
5714 int regno;
5715 int tr0_used;
5716
5717 if (! current_function_is_leaf)
5718 return -1;
5719 if (lookup_attribute ("interrupt_handler",
5720 DECL_ATTRIBUTES (current_function_decl)))
5721 return -1;
5722 if (sh_cfun_interrupt_handler_p ())
5723 return -1;
5724
5725 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5726
5727 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5728 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5729 return regno;
5730
5731 return -1;
5732 }
5733
5734 /* The maximum registers we need to save are:
5735 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5736 - 32 floating point registers (for each pair, we save none,
5737 one single precision value, or a double precision value).
5738 - 8 target registers
5739 - add 1 entry for a delimiter. */
5740 #define MAX_SAVED_REGS (62+32+8)
5741
5742 typedef struct save_entry_s
5743 {
5744 unsigned char reg;
5745 unsigned char mode;
5746 short offset;
5747 } save_entry;
5748
5749 #define MAX_TEMPS 4
5750
5751 /* There will be a delimiter entry with VOIDmode both at the start and the
5752 end of a filled in schedule. The end delimiter has the offset of the
5753 save with the smallest (i.e. most negative) offset. */
5754 typedef struct save_schedule_s
5755 {
5756 save_entry entries[MAX_SAVED_REGS + 2];
5757 int temps[MAX_TEMPS+1];
5758 } save_schedule;
5759
5760 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5761 use reverse order. Returns the last entry written to (not counting
5762 the delimiter). OFFSET_BASE is a number to be added to all offset
5763 entries. */
5764
5765 static save_entry *
sh5_schedule_saves(HARD_REG_SET * live_regs_mask,save_schedule * schedule,int offset_base)5766 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5767 int offset_base)
5768 {
5769 int align, i;
5770 save_entry *entry = schedule->entries;
5771 int tmpx = 0;
5772 int offset;
5773
5774 if (! current_function_interrupt)
5775 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5776 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5777 && ! FUNCTION_ARG_REGNO_P (i)
5778 && i != FIRST_RET_REG
5779 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5780 && ! (current_function_calls_eh_return
5781 && (i == EH_RETURN_STACKADJ_REGNO
5782 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5783 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5784 schedule->temps[tmpx++] = i;
5785 entry->reg = -1;
5786 entry->mode = VOIDmode;
5787 entry->offset = offset_base;
5788 entry++;
5789 /* We loop twice: first, we save 8-byte aligned registers in the
5790 higher addresses, that are known to be aligned. Then, we
5791 proceed to saving 32-bit registers that don't need 8-byte
5792 alignment.
5793 If this is an interrupt function, all registers that need saving
5794 need to be saved in full. moreover, we need to postpone saving
5795 target registers till we have saved some general purpose registers
5796 we can then use as scratch registers. */
5797 offset = offset_base;
5798 for (align = 1; align >= 0; align--)
5799 {
5800 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5801 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5802 {
5803 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5804 int reg = i;
5805
5806 if (current_function_interrupt)
5807 {
5808 if (TARGET_REGISTER_P (i))
5809 continue;
5810 if (GENERAL_REGISTER_P (i))
5811 mode = DImode;
5812 }
5813 if (mode == SFmode && (i % 2) == 1
5814 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5815 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5816 {
5817 mode = DFmode;
5818 i--;
5819 reg--;
5820 }
5821
5822 /* If we're doing the aligned pass and this is not aligned,
5823 or we're doing the unaligned pass and this is aligned,
5824 skip it. */
5825 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5826 != align)
5827 continue;
5828
5829 if (current_function_interrupt
5830 && GENERAL_REGISTER_P (i)
5831 && tmpx < MAX_TEMPS)
5832 schedule->temps[tmpx++] = i;
5833
5834 offset -= GET_MODE_SIZE (mode);
5835 entry->reg = i;
5836 entry->mode = mode;
5837 entry->offset = offset;
5838 entry++;
5839 }
5840 if (align && current_function_interrupt)
5841 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5842 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5843 {
5844 offset -= GET_MODE_SIZE (DImode);
5845 entry->reg = i;
5846 entry->mode = DImode;
5847 entry->offset = offset;
5848 entry++;
5849 }
5850 }
5851 entry->reg = -1;
5852 entry->mode = VOIDmode;
5853 entry->offset = offset;
5854 schedule->temps[tmpx] = -1;
5855 return entry - 1;
5856 }
5857
5858 void
sh_expand_prologue(void)5859 sh_expand_prologue (void)
5860 {
5861 HARD_REG_SET live_regs_mask;
5862 int d, i;
5863 int d_rounding = 0;
5864 int save_flags = target_flags;
5865 int pretend_args;
5866 tree sp_switch_attr
5867 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
5868
5869 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5870
5871 /* We have pretend args if we had an object sent partially in registers
5872 and partially on the stack, e.g. a large structure. */
5873 pretend_args = current_function_pretend_args_size;
5874 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5875 && (NPARM_REGS(SImode)
5876 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5877 pretend_args = 0;
5878 output_stack_adjust (-pretend_args
5879 - current_function_args_info.stack_regs * 8,
5880 stack_pointer_rtx, 0, NULL);
5881
5882 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5883 /* We're going to use the PIC register to load the address of the
5884 incoming-argument decoder and/or of the return trampoline from
5885 the GOT, so make sure the PIC register is preserved and
5886 initialized. */
5887 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5888
5889 if (TARGET_SHCOMPACT
5890 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5891 {
5892 int reg;
5893
5894 /* First, make all registers with incoming arguments that will
5895 be pushed onto the stack live, so that register renaming
5896 doesn't overwrite them. */
5897 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5898 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5899 >= NPARM_REGS (SImode) - reg)
5900 for (; reg < NPARM_REGS (SImode); reg++)
5901 emit_insn (gen_shcompact_preserve_incoming_args
5902 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5903 else if (CALL_COOKIE_INT_REG_GET
5904 (current_function_args_info.call_cookie, reg) == 1)
5905 emit_insn (gen_shcompact_preserve_incoming_args
5906 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5907
5908 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5909 stack_pointer_rtx);
5910 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5911 GEN_INT (current_function_args_info.call_cookie));
5912 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5913 gen_rtx_REG (SImode, R0_REG));
5914 }
5915 else if (TARGET_SHMEDIA)
5916 {
5917 int tr = sh_media_register_for_return ();
5918
5919 if (tr >= 0)
5920 {
5921 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5922 gen_rtx_REG (DImode, PR_MEDIA_REG));
5923
5924 /* ??? We should suppress saving pr when we don't need it, but this
5925 is tricky because of builtin_return_address. */
5926
5927 /* If this function only exits with sibcalls, this copy
5928 will be flagged as dead. */
5929 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5930 const0_rtx,
5931 REG_NOTES (insn));
5932 }
5933 }
5934
5935 /* Emit the code for SETUP_VARARGS. */
5936 if (current_function_stdarg)
5937 {
5938 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5939 {
5940 /* Push arg regs as if they'd been provided by caller in stack. */
5941 for (i = 0; i < NPARM_REGS(SImode); i++)
5942 {
5943 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5944 rtx insn;
5945
5946 if (i >= (NPARM_REGS(SImode)
5947 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5948 ))
5949 break;
5950 insn = push (rn);
5951 RTX_FRAME_RELATED_P (insn) = 0;
5952 }
5953 }
5954 }
5955
5956 /* If we're supposed to switch stacks at function entry, do so now. */
5957 if (sp_switch_attr)
5958 {
5959 /* The argument specifies a variable holding the address of the
5960 stack the interrupt function should switch to/from at entry/exit. */
5961 const char *s
5962 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
5963 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
5964
5965 emit_insn (gen_sp_switch_1 (sp_switch));
5966 }
5967
5968 d = calc_live_regs (&live_regs_mask);
5969 /* ??? Maybe we could save some switching if we can move a mode switch
5970 that already happens to be at the function start into the prologue. */
5971 if (target_flags != save_flags && ! current_function_interrupt)
5972 emit_insn (gen_toggle_sz ());
5973
5974 if (TARGET_SH5)
5975 {
5976 int offset_base, offset;
5977 rtx r0 = NULL_RTX;
5978 int offset_in_r0 = -1;
5979 int sp_in_r0 = 0;
5980 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5981 int total_size, save_size;
5982 save_schedule schedule;
5983 save_entry *entry;
5984 int *tmp_pnt;
5985
5986 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5987 && ! current_function_interrupt)
5988 r0 = gen_rtx_REG (Pmode, R0_REG);
5989
5990 /* D is the actual number of bytes that we need for saving registers,
5991 however, in initial_elimination_offset we have committed to using
5992 an additional TREGS_SPACE amount of bytes - in order to keep both
5993 addresses to arguments supplied by the caller and local variables
5994 valid, we must keep this gap. Place it between the incoming
5995 arguments and the actually saved registers in a bid to optimize
5996 locality of reference. */
5997 total_size = d + tregs_space;
5998 total_size += rounded_frame_size (total_size);
5999 save_size = total_size - rounded_frame_size (d);
6000 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
6001 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6002 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
6003
6004 /* If adjusting the stack in a single step costs nothing extra, do so.
6005 I.e. either if a single addi is enough, or we need a movi anyway,
6006 and we don't exceed the maximum offset range (the test for the
6007 latter is conservative for simplicity). */
6008 if (TARGET_SHMEDIA
6009 && (CONST_OK_FOR_I10 (-total_size)
6010 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
6011 && total_size <= 2044)))
6012 d_rounding = total_size - save_size;
6013
6014 offset_base = d + d_rounding;
6015
6016 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
6017 0, NULL);
6018
6019 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
6020 tmp_pnt = schedule.temps;
6021 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6022 {
6023 enum machine_mode mode = entry->mode;
6024 unsigned int reg = entry->reg;
6025 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
6026 rtx orig_reg_rtx;
6027
6028 offset = entry->offset;
6029
6030 reg_rtx = gen_rtx_REG (mode, reg);
6031
6032 mem_rtx = gen_frame_mem (mode,
6033 gen_rtx_PLUS (Pmode,
6034 stack_pointer_rtx,
6035 GEN_INT (offset)));
6036
6037 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6038
6039 gcc_assert (r0);
6040 mem_rtx = NULL_RTX;
6041
6042 try_pre_dec:
6043 do
6044 if (HAVE_PRE_DECREMENT
6045 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6046 || mem_rtx == NULL_RTX
6047 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6048 {
6049 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6050
6051 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6052 pre_dec_ok);
6053
6054 pre_dec = NULL_RTX;
6055
6056 break;
6057
6058 pre_dec_ok:
6059 mem_rtx = NULL_RTX;
6060 offset += GET_MODE_SIZE (mode);
6061 }
6062 while (0);
6063
6064 if (mem_rtx != NULL_RTX)
6065 goto addr_ok;
6066
6067 if (offset_in_r0 == -1)
6068 {
6069 emit_move_insn (r0, GEN_INT (offset));
6070 offset_in_r0 = offset;
6071 }
6072 else if (offset != offset_in_r0)
6073 {
6074 emit_move_insn (r0,
6075 gen_rtx_PLUS
6076 (Pmode, r0,
6077 GEN_INT (offset - offset_in_r0)));
6078 offset_in_r0 += offset - offset_in_r0;
6079 }
6080
6081 if (pre_dec != NULL_RTX)
6082 {
6083 if (! sp_in_r0)
6084 {
6085 emit_move_insn (r0,
6086 gen_rtx_PLUS
6087 (Pmode, r0, stack_pointer_rtx));
6088 sp_in_r0 = 1;
6089 }
6090
6091 offset -= GET_MODE_SIZE (mode);
6092 offset_in_r0 -= GET_MODE_SIZE (mode);
6093
6094 mem_rtx = pre_dec;
6095 }
6096 else if (sp_in_r0)
6097 mem_rtx = gen_frame_mem (mode, r0);
6098 else
6099 mem_rtx = gen_frame_mem (mode,
6100 gen_rtx_PLUS (Pmode,
6101 stack_pointer_rtx,
6102 r0));
6103
6104 /* We must not use an r0-based address for target-branch
6105 registers or for special registers without pre-dec
6106 memory addresses, since we store their values in r0
6107 first. */
6108 gcc_assert (!TARGET_REGISTER_P (reg)
6109 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6110 || mem_rtx == pre_dec));
6111
6112 addr_ok:
6113 orig_reg_rtx = reg_rtx;
6114 if (TARGET_REGISTER_P (reg)
6115 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6116 && mem_rtx != pre_dec))
6117 {
6118 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6119
6120 emit_move_insn (tmp_reg, reg_rtx);
6121
6122 if (REGNO (tmp_reg) == R0_REG)
6123 {
6124 offset_in_r0 = -1;
6125 sp_in_r0 = 0;
6126 gcc_assert (!refers_to_regno_p
6127 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6128 }
6129
6130 if (*++tmp_pnt <= 0)
6131 tmp_pnt = schedule.temps;
6132
6133 reg_rtx = tmp_reg;
6134 }
6135 {
6136 rtx insn;
6137
6138 /* Mark as interesting for dwarf cfi generator */
6139 insn = emit_move_insn (mem_rtx, reg_rtx);
6140 RTX_FRAME_RELATED_P (insn) = 1;
6141 /* If we use an intermediate register for the save, we can't
6142 describe this exactly in cfi as a copy of the to-be-saved
6143 register into the temporary register and then the temporary
6144 register on the stack, because the temporary register can
6145 have a different natural size than the to-be-saved register.
6146 Thus, we gloss over the intermediate copy and pretend we do
6147 a direct save from the to-be-saved register. */
6148 if (REGNO (reg_rtx) != reg)
6149 {
6150 rtx set, note_rtx;
6151
6152 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6153 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6154 REG_NOTES (insn));
6155 REG_NOTES (insn) = note_rtx;
6156 }
6157
6158 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6159 {
6160 rtx reg_rtx = gen_rtx_REG (mode, reg);
6161 rtx set, note_rtx;
6162 rtx mem_rtx = gen_frame_mem (mode,
6163 gen_rtx_PLUS (Pmode,
6164 stack_pointer_rtx,
6165 GEN_INT (offset)));
6166
6167 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6168 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6169 REG_NOTES (insn));
6170 REG_NOTES (insn) = note_rtx;
6171 }
6172 }
6173 }
6174
6175 gcc_assert (entry->offset == d_rounding);
6176 }
6177 else
6178 push_regs (&live_regs_mask, current_function_interrupt);
6179
6180 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6181 {
6182 rtx insn = get_last_insn ();
6183 rtx last = emit_insn (gen_GOTaddr2picreg ());
6184
6185 /* Mark these insns as possibly dead. Sometimes, flow2 may
6186 delete all uses of the PIC register. In this case, let it
6187 delete the initialization too. */
6188 do
6189 {
6190 insn = NEXT_INSN (insn);
6191
6192 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6193 const0_rtx,
6194 REG_NOTES (insn));
6195 }
6196 while (insn != last);
6197 }
6198
6199 if (SHMEDIA_REGS_STACK_ADJUST ())
6200 {
6201 /* This must NOT go through the PLT, otherwise mach and macl
6202 may be clobbered. */
6203 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6204 (TARGET_FPU_ANY
6205 ? "__GCC_push_shmedia_regs"
6206 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6207 emit_insn (gen_shmedia_save_restore_regs_compact
6208 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6209 }
6210
6211 if (target_flags != save_flags && ! current_function_interrupt)
6212 {
6213 rtx insn = emit_insn (gen_toggle_sz ());
6214
6215 /* If we're lucky, a mode switch in the function body will
6216 overwrite fpscr, turning this insn dead. Tell flow this
6217 insn is ok to delete. */
6218 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6219 const0_rtx,
6220 REG_NOTES (insn));
6221 }
6222
6223 target_flags = save_flags;
6224
6225 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6226 stack_pointer_rtx, 0, NULL);
6227
6228 if (frame_pointer_needed)
6229 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6230
6231 if (TARGET_SHCOMPACT
6232 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6233 {
6234 /* This must NOT go through the PLT, otherwise mach and macl
6235 may be clobbered. */
6236 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6237 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6238 emit_insn (gen_shcompact_incoming_args ());
6239 }
6240 }
6241
6242 void
sh_expand_epilogue(bool sibcall_p)6243 sh_expand_epilogue (bool sibcall_p)
6244 {
6245 HARD_REG_SET live_regs_mask;
6246 int d, i;
6247 int d_rounding = 0;
6248
6249 int save_flags = target_flags;
6250 int frame_size, save_size;
6251 int fpscr_deferred = 0;
6252 int e = sibcall_p ? -1 : 1;
6253
6254 d = calc_live_regs (&live_regs_mask);
6255
6256 save_size = d;
6257 frame_size = rounded_frame_size (d);
6258
6259 if (TARGET_SH5)
6260 {
6261 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6262 int total_size;
6263 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6264 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6265 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6266
6267 total_size = d + tregs_space;
6268 total_size += rounded_frame_size (total_size);
6269 save_size = total_size - frame_size;
6270
6271 /* If adjusting the stack in a single step costs nothing extra, do so.
6272 I.e. either if a single addi is enough, or we need a movi anyway,
6273 and we don't exceed the maximum offset range (the test for the
6274 latter is conservative for simplicity). */
6275 if (TARGET_SHMEDIA
6276 && ! frame_pointer_needed
6277 && (CONST_OK_FOR_I10 (total_size)
6278 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6279 && total_size <= 2044)))
6280 d_rounding = frame_size;
6281
6282 frame_size -= d_rounding;
6283 }
6284
6285 if (frame_pointer_needed)
6286 {
6287 /* We must avoid scheduling the epilogue with previous basic blocks
6288 when exception handling is enabled. See PR/18032. */
6289 if (flag_exceptions)
6290 emit_insn (gen_blockage ());
6291 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6292 &live_regs_mask);
6293
6294 /* We must avoid moving the stack pointer adjustment past code
6295 which reads from the local frame, else an interrupt could
6296 occur after the SP adjustment and clobber data in the local
6297 frame. */
6298 emit_insn (gen_blockage ());
6299 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6300 }
6301 else if (frame_size)
6302 {
6303 /* We must avoid moving the stack pointer adjustment past code
6304 which reads from the local frame, else an interrupt could
6305 occur after the SP adjustment and clobber data in the local
6306 frame. */
6307 emit_insn (gen_blockage ());
6308 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6309 }
6310
6311 if (SHMEDIA_REGS_STACK_ADJUST ())
6312 {
6313 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6314 (TARGET_FPU_ANY
6315 ? "__GCC_pop_shmedia_regs"
6316 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6317 /* This must NOT go through the PLT, otherwise mach and macl
6318 may be clobbered. */
6319 emit_insn (gen_shmedia_save_restore_regs_compact
6320 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6321 }
6322
6323 /* Pop all the registers. */
6324
6325 if (target_flags != save_flags && ! current_function_interrupt)
6326 emit_insn (gen_toggle_sz ());
6327 if (TARGET_SH5)
6328 {
6329 int offset_base, offset;
6330 int offset_in_r0 = -1;
6331 int sp_in_r0 = 0;
6332 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6333 save_schedule schedule;
6334 save_entry *entry;
6335 int *tmp_pnt;
6336
6337 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6338 offset_base = -entry[1].offset + d_rounding;
6339 tmp_pnt = schedule.temps;
6340 for (; entry->mode != VOIDmode; entry--)
6341 {
6342 enum machine_mode mode = entry->mode;
6343 int reg = entry->reg;
6344 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6345
6346 offset = offset_base + entry->offset;
6347 reg_rtx = gen_rtx_REG (mode, reg);
6348
6349 mem_rtx = gen_frame_mem (mode,
6350 gen_rtx_PLUS (Pmode,
6351 stack_pointer_rtx,
6352 GEN_INT (offset)));
6353
6354 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6355
6356 mem_rtx = NULL_RTX;
6357
6358 try_post_inc:
6359 do
6360 if (HAVE_POST_INCREMENT
6361 && (offset == offset_in_r0
6362 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6363 && mem_rtx == NULL_RTX)
6364 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6365 {
6366 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6367
6368 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6369 post_inc_ok);
6370
6371 post_inc = NULL_RTX;
6372
6373 break;
6374
6375 post_inc_ok:
6376 mem_rtx = NULL_RTX;
6377 }
6378 while (0);
6379
6380 if (mem_rtx != NULL_RTX)
6381 goto addr_ok;
6382
6383 if (offset_in_r0 == -1)
6384 {
6385 emit_move_insn (r0, GEN_INT (offset));
6386 offset_in_r0 = offset;
6387 }
6388 else if (offset != offset_in_r0)
6389 {
6390 emit_move_insn (r0,
6391 gen_rtx_PLUS
6392 (Pmode, r0,
6393 GEN_INT (offset - offset_in_r0)));
6394 offset_in_r0 += offset - offset_in_r0;
6395 }
6396
6397 if (post_inc != NULL_RTX)
6398 {
6399 if (! sp_in_r0)
6400 {
6401 emit_move_insn (r0,
6402 gen_rtx_PLUS
6403 (Pmode, r0, stack_pointer_rtx));
6404 sp_in_r0 = 1;
6405 }
6406
6407 mem_rtx = post_inc;
6408
6409 offset_in_r0 += GET_MODE_SIZE (mode);
6410 }
6411 else if (sp_in_r0)
6412 mem_rtx = gen_frame_mem (mode, r0);
6413 else
6414 mem_rtx = gen_frame_mem (mode,
6415 gen_rtx_PLUS (Pmode,
6416 stack_pointer_rtx,
6417 r0));
6418
6419 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6420 || mem_rtx == post_inc);
6421
6422 addr_ok:
6423 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6424 && mem_rtx != post_inc)
6425 {
6426 insn = emit_move_insn (r0, mem_rtx);
6427 mem_rtx = r0;
6428 }
6429 else if (TARGET_REGISTER_P (reg))
6430 {
6431 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6432
6433 /* Give the scheduler a bit of freedom by using up to
6434 MAX_TEMPS registers in a round-robin fashion. */
6435 insn = emit_move_insn (tmp_reg, mem_rtx);
6436 mem_rtx = tmp_reg;
6437 if (*++tmp_pnt < 0)
6438 tmp_pnt = schedule.temps;
6439 }
6440
6441 insn = emit_move_insn (reg_rtx, mem_rtx);
6442 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6443 /* This is dead, unless we return with a sibcall. */
6444 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6445 const0_rtx,
6446 REG_NOTES (insn));
6447 }
6448
6449 gcc_assert (entry->offset + offset_base == d + d_rounding);
6450 }
6451 else /* ! TARGET_SH5 */
6452 {
6453 save_size = 0;
6454 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6455 pop (PR_REG);
6456 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6457 {
6458 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6459
6460 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6461 && hard_regs_intersect_p (&live_regs_mask,
6462 ®_class_contents[DF_REGS]))
6463 fpscr_deferred = 1;
6464 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6465 pop (j);
6466 if (j == FIRST_FP_REG && fpscr_deferred)
6467 pop (FPSCR_REG);
6468
6469 }
6470 }
6471 if (target_flags != save_flags && ! current_function_interrupt)
6472 emit_insn (gen_toggle_sz ());
6473 target_flags = save_flags;
6474
6475 output_stack_adjust (current_function_pretend_args_size
6476 + save_size + d_rounding
6477 + current_function_args_info.stack_regs * 8,
6478 stack_pointer_rtx, e, NULL);
6479
6480 if (current_function_calls_eh_return)
6481 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6482 EH_RETURN_STACKADJ_RTX));
6483
6484 /* Switch back to the normal stack if necessary. */
6485 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6486 emit_insn (gen_sp_switch_2 ());
6487
6488 /* Tell flow the insn that pops PR isn't dead. */
6489 /* PR_REG will never be live in SHmedia mode, and we don't need to
6490 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6491 by the return pattern. */
6492 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6493 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6494 }
6495
6496 static int sh_need_epilogue_known = 0;
6497
6498 int
sh_need_epilogue(void)6499 sh_need_epilogue (void)
6500 {
6501 if (! sh_need_epilogue_known)
6502 {
6503 rtx epilogue;
6504
6505 start_sequence ();
6506 sh_expand_epilogue (0);
6507 epilogue = get_insns ();
6508 end_sequence ();
6509 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6510 }
6511 return sh_need_epilogue_known > 0;
6512 }
6513
6514 /* Emit code to change the current function's return address to RA.
6515 TEMP is available as a scratch register, if needed. */
6516
6517 void
sh_set_return_address(rtx ra,rtx tmp)6518 sh_set_return_address (rtx ra, rtx tmp)
6519 {
6520 HARD_REG_SET live_regs_mask;
6521 int d;
6522 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6523 int pr_offset;
6524
6525 d = calc_live_regs (&live_regs_mask);
6526
6527 /* If pr_reg isn't life, we can set it (or the register given in
6528 sh_media_register_for_return) directly. */
6529 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6530 {
6531 rtx rr;
6532
6533 if (TARGET_SHMEDIA)
6534 {
6535 int rr_regno = sh_media_register_for_return ();
6536
6537 if (rr_regno < 0)
6538 rr_regno = pr_reg;
6539
6540 rr = gen_rtx_REG (DImode, rr_regno);
6541 }
6542 else
6543 rr = gen_rtx_REG (SImode, pr_reg);
6544
6545 emit_insn (GEN_MOV (rr, ra));
6546 /* Tell flow the register for return isn't dead. */
6547 emit_insn (gen_rtx_USE (VOIDmode, rr));
6548 return;
6549 }
6550
6551 if (TARGET_SH5)
6552 {
6553 int offset;
6554 save_schedule schedule;
6555 save_entry *entry;
6556
6557 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6558 offset = entry[1].offset;
6559 for (; entry->mode != VOIDmode; entry--)
6560 if (entry->reg == pr_reg)
6561 goto found;
6562
6563 /* We can't find pr register. */
6564 gcc_unreachable ();
6565
6566 found:
6567 offset = entry->offset - offset;
6568 pr_offset = (rounded_frame_size (d) + offset
6569 + SHMEDIA_REGS_STACK_ADJUST ());
6570 }
6571 else
6572 pr_offset = rounded_frame_size (d);
6573
6574 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6575 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6576
6577 tmp = gen_frame_mem (Pmode, tmp);
6578 emit_insn (GEN_MOV (tmp, ra));
6579 }
6580
6581 /* Clear variables at function end. */
6582
6583 static void
sh_output_function_epilogue(FILE * file ATTRIBUTE_UNUSED,HOST_WIDE_INT size ATTRIBUTE_UNUSED)6584 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6585 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6586 {
6587 sh_need_epilogue_known = 0;
6588 }
6589
6590 static rtx
sh_builtin_saveregs(void)6591 sh_builtin_saveregs (void)
6592 {
6593 /* First unnamed integer register. */
6594 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6595 /* Number of integer registers we need to save. */
6596 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6597 /* First unnamed SFmode float reg */
6598 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6599 /* Number of SFmode float regs to save. */
6600 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6601 rtx regbuf, fpregs;
6602 int bufsize, regno;
6603 HOST_WIDE_INT alias_set;
6604
6605 if (TARGET_SH5)
6606 {
6607 if (n_intregs)
6608 {
6609 int pushregs = n_intregs;
6610
6611 while (pushregs < NPARM_REGS (SImode) - 1
6612 && (CALL_COOKIE_INT_REG_GET
6613 (current_function_args_info.call_cookie,
6614 NPARM_REGS (SImode) - pushregs)
6615 == 1))
6616 {
6617 current_function_args_info.call_cookie
6618 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6619 - pushregs, 1);
6620 pushregs++;
6621 }
6622
6623 if (pushregs == NPARM_REGS (SImode))
6624 current_function_args_info.call_cookie
6625 |= (CALL_COOKIE_INT_REG (0, 1)
6626 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6627 else
6628 current_function_args_info.call_cookie
6629 |= CALL_COOKIE_STACKSEQ (pushregs);
6630
6631 current_function_pretend_args_size += 8 * n_intregs;
6632 }
6633 if (TARGET_SHCOMPACT)
6634 return const0_rtx;
6635 }
6636
6637 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6638 {
6639 error ("__builtin_saveregs not supported by this subtarget");
6640 return const0_rtx;
6641 }
6642
6643 if (TARGET_SHMEDIA)
6644 n_floatregs = 0;
6645
6646 /* Allocate block of memory for the regs. */
6647 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6648 Or can assign_stack_local accept a 0 SIZE argument? */
6649 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6650
6651 if (TARGET_SHMEDIA)
6652 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6653 else if (n_floatregs & 1)
6654 {
6655 rtx addr;
6656
6657 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6658 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6659 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6660 regbuf = change_address (regbuf, BLKmode, addr);
6661 }
6662 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6663 {
6664 rtx addr, mask;
6665
6666 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6667 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6668 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6669 emit_insn (gen_andsi3 (addr, addr, mask));
6670 regbuf = change_address (regbuf, BLKmode, addr);
6671 }
6672 else
6673 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6674 alias_set = get_varargs_alias_set ();
6675 set_mem_alias_set (regbuf, alias_set);
6676
6677 /* Save int args.
6678 This is optimized to only save the regs that are necessary. Explicitly
6679 named args need not be saved. */
6680 if (n_intregs > 0)
6681 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6682 adjust_address (regbuf, BLKmode,
6683 n_floatregs * UNITS_PER_WORD),
6684 n_intregs);
6685
6686 if (TARGET_SHMEDIA)
6687 /* Return the address of the regbuf. */
6688 return XEXP (regbuf, 0);
6689
6690 /* Save float args.
6691 This is optimized to only save the regs that are necessary. Explicitly
6692 named args need not be saved.
6693 We explicitly build a pointer to the buffer because it halves the insn
6694 count when not optimizing (otherwise the pointer is built for each reg
6695 saved).
6696 We emit the moves in reverse order so that we can use predecrement. */
6697
6698 fpregs = copy_to_mode_reg (Pmode,
6699 plus_constant (XEXP (regbuf, 0),
6700 n_floatregs * UNITS_PER_WORD));
6701 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6702 {
6703 rtx mem;
6704 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6705 {
6706 emit_insn (gen_addsi3 (fpregs, fpregs,
6707 GEN_INT (-2 * UNITS_PER_WORD)));
6708 mem = change_address (regbuf, DFmode, fpregs);
6709 emit_move_insn (mem,
6710 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6711 }
6712 regno = first_floatreg;
6713 if (regno & 1)
6714 {
6715 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6716 mem = change_address (regbuf, SFmode, fpregs);
6717 emit_move_insn (mem,
6718 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6719 - (TARGET_LITTLE_ENDIAN != 0)));
6720 }
6721 }
6722 else
6723 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6724 {
6725 rtx mem;
6726
6727 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6728 mem = change_address (regbuf, SFmode, fpregs);
6729 emit_move_insn (mem,
6730 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6731 }
6732
6733 /* Return the address of the regbuf. */
6734 return XEXP (regbuf, 0);
6735 }
6736
6737 /* Define the `__builtin_va_list' type for the ABI. */
6738
6739 static tree
sh_build_builtin_va_list(void)6740 sh_build_builtin_va_list (void)
6741 {
6742 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6743 tree record;
6744
6745 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6746 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6747 return ptr_type_node;
6748
6749 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6750
6751 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6752 ptr_type_node);
6753 f_next_o_limit = build_decl (FIELD_DECL,
6754 get_identifier ("__va_next_o_limit"),
6755 ptr_type_node);
6756 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6757 ptr_type_node);
6758 f_next_fp_limit = build_decl (FIELD_DECL,
6759 get_identifier ("__va_next_fp_limit"),
6760 ptr_type_node);
6761 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6762 ptr_type_node);
6763
6764 DECL_FIELD_CONTEXT (f_next_o) = record;
6765 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6766 DECL_FIELD_CONTEXT (f_next_fp) = record;
6767 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6768 DECL_FIELD_CONTEXT (f_next_stack) = record;
6769
6770 TYPE_FIELDS (record) = f_next_o;
6771 TREE_CHAIN (f_next_o) = f_next_o_limit;
6772 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6773 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6774 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6775
6776 layout_type (record);
6777
6778 return record;
6779 }
6780
6781 /* Implement `va_start' for varargs and stdarg. */
6782
6783 void
sh_va_start(tree valist,rtx nextarg)6784 sh_va_start (tree valist, rtx nextarg)
6785 {
6786 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6787 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6788 tree t, u;
6789 int nfp, nint;
6790
6791 if (TARGET_SH5)
6792 {
6793 expand_builtin_saveregs ();
6794 std_expand_builtin_va_start (valist, nextarg);
6795 return;
6796 }
6797
6798 if ((! TARGET_SH2E && ! TARGET_SH4)
6799 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6800 {
6801 std_expand_builtin_va_start (valist, nextarg);
6802 return;
6803 }
6804
6805 f_next_o = TYPE_FIELDS (va_list_type_node);
6806 f_next_o_limit = TREE_CHAIN (f_next_o);
6807 f_next_fp = TREE_CHAIN (f_next_o_limit);
6808 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6809 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6810
6811 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6812 NULL_TREE);
6813 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6814 valist, f_next_o_limit, NULL_TREE);
6815 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6816 NULL_TREE);
6817 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6818 valist, f_next_fp_limit, NULL_TREE);
6819 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
6820 valist, f_next_stack, NULL_TREE);
6821
6822 /* Call __builtin_saveregs. */
6823 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6824 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
6825 TREE_SIDE_EFFECTS (t) = 1;
6826 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6827
6828 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6829 if (nfp < 8)
6830 nfp = 8 - nfp;
6831 else
6832 nfp = 0;
6833 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
6834 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp));
6835 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6836 TREE_SIDE_EFFECTS (t) = 1;
6837 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6838
6839 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
6840 TREE_SIDE_EFFECTS (t) = 1;
6841 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6842
6843 nint = current_function_args_info.arg_count[SH_ARG_INT];
6844 if (nint < 4)
6845 nint = 4 - nint;
6846 else
6847 nint = 0;
6848 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
6849 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint));
6850 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6851 TREE_SIDE_EFFECTS (t) = 1;
6852 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6853
6854 u = make_tree (ptr_type_node, nextarg);
6855 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
6856 TREE_SIDE_EFFECTS (t) = 1;
6857 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6858 }
6859
6860 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
6861 member, return it. */
6862 static tree
find_sole_member(tree type)6863 find_sole_member (tree type)
6864 {
6865 tree field, member = NULL_TREE;
6866
6867 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6868 {
6869 if (TREE_CODE (field) != FIELD_DECL)
6870 continue;
6871 if (!DECL_SIZE (field))
6872 return NULL_TREE;
6873 if (integer_zerop (DECL_SIZE (field)))
6874 continue;
6875 if (member)
6876 return NULL_TREE;
6877 member = field;
6878 }
6879 return member;
6880 }
6881 /* Implement `va_arg'. */
6882
6883 static tree
sh_gimplify_va_arg_expr(tree valist,tree type,tree * pre_p,tree * post_p ATTRIBUTE_UNUSED)6884 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6885 tree *post_p ATTRIBUTE_UNUSED)
6886 {
6887 HOST_WIDE_INT size, rsize;
6888 tree tmp, pptr_type_node;
6889 tree addr, lab_over = NULL, result = NULL;
6890 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6891 tree eff_type;
6892
6893 if (pass_by_ref)
6894 type = build_pointer_type (type);
6895
6896 size = int_size_in_bytes (type);
6897 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6898 pptr_type_node = build_pointer_type (ptr_type_node);
6899
6900 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6901 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6902 {
6903 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6904 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6905 int pass_as_float;
6906 tree lab_false;
6907 tree member;
6908
6909 f_next_o = TYPE_FIELDS (va_list_type_node);
6910 f_next_o_limit = TREE_CHAIN (f_next_o);
6911 f_next_fp = TREE_CHAIN (f_next_o_limit);
6912 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6913 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6914
6915 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6916 NULL_TREE);
6917 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6918 valist, f_next_o_limit, NULL_TREE);
6919 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
6920 valist, f_next_fp, NULL_TREE);
6921 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6922 valist, f_next_fp_limit, NULL_TREE);
6923 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
6924 valist, f_next_stack, NULL_TREE);
6925
6926 /* Structures with a single member with a distinct mode are passed
6927 like their member. This is relevant if the latter has a REAL_TYPE
6928 or COMPLEX_TYPE type. */
6929 eff_type = type;
6930 while (TREE_CODE (eff_type) == RECORD_TYPE
6931 && (member = find_sole_member (eff_type))
6932 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
6933 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
6934 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
6935 {
6936 tree field_type = TREE_TYPE (member);
6937
6938 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
6939 eff_type = field_type;
6940 else
6941 {
6942 gcc_assert ((TYPE_ALIGN (eff_type)
6943 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
6944 || (TYPE_ALIGN (eff_type)
6945 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
6946 break;
6947 }
6948 }
6949
6950 if (TARGET_SH4)
6951 {
6952 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
6953 || (TREE_CODE (eff_type) == COMPLEX_TYPE
6954 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
6955 && size <= 16));
6956 }
6957 else
6958 {
6959 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
6960 }
6961
6962 addr = create_tmp_var (pptr_type_node, NULL);
6963 lab_false = create_artificial_label ();
6964 lab_over = create_artificial_label ();
6965
6966 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6967
6968 if (pass_as_float)
6969 {
6970 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
6971 tree cmp;
6972 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
6973
6974 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6975 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6976 gimplify_and_add (tmp, pre_p);
6977
6978 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6979 gimplify_and_add (tmp, pre_p);
6980 tmp = next_fp_limit;
6981 if (size > 4 && !is_double)
6982 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
6983 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
6984 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
6985 cmp = build3 (COND_EXPR, void_type_node, tmp,
6986 build1 (GOTO_EXPR, void_type_node, lab_false),
6987 NULL_TREE);
6988 if (!is_double)
6989 gimplify_and_add (cmp, pre_p);
6990
6991 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
6992 || (is_double || size == 16))
6993 {
6994 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6995 tmp = build2 (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
6996 tmp = build2 (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
6997 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, tmp);
6998 gimplify_and_add (tmp, pre_p);
6999 }
7000 if (is_double)
7001 gimplify_and_add (cmp, pre_p);
7002
7003 #ifdef FUNCTION_ARG_SCmode_WART
7004 if (TYPE_MODE (eff_type) == SCmode
7005 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7006 {
7007 tree subtype = TREE_TYPE (eff_type);
7008 tree real, imag;
7009
7010 imag
7011 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7012 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7013
7014 real
7015 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7016 real = get_initialized_tmp_var (real, pre_p, NULL);
7017
7018 result = build2 (COMPLEX_EXPR, type, real, imag);
7019 result = get_initialized_tmp_var (result, pre_p, NULL);
7020 }
7021 #endif /* FUNCTION_ARG_SCmode_WART */
7022
7023 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7024 gimplify_and_add (tmp, pre_p);
7025
7026 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7027 gimplify_and_add (tmp, pre_p);
7028
7029 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7030 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
7031 gimplify_and_add (tmp, pre_p);
7032 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
7033 gimplify_and_add (tmp, pre_p);
7034
7035 tmp = build2 (MODIFY_EXPR, ptr_type_node, valist, next_fp_tmp);
7036 gimplify_and_add (tmp, post_p);
7037 valist = next_fp_tmp;
7038 }
7039 else
7040 {
7041 tmp = fold_convert (ptr_type_node, size_int (rsize));
7042 tmp = build2 (PLUS_EXPR, ptr_type_node, next_o, tmp);
7043 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
7044 tmp = build3 (COND_EXPR, void_type_node, tmp,
7045 build1 (GOTO_EXPR, void_type_node, lab_false),
7046 NULL_TREE);
7047 gimplify_and_add (tmp, pre_p);
7048
7049 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
7050 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
7051 gimplify_and_add (tmp, pre_p);
7052
7053 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7054 gimplify_and_add (tmp, pre_p);
7055
7056 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7057 gimplify_and_add (tmp, pre_p);
7058
7059 if (size > 4 && ! TARGET_SH4)
7060 {
7061 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
7062 gimplify_and_add (tmp, pre_p);
7063 }
7064
7065 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7066 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
7067 gimplify_and_add (tmp, pre_p);
7068 }
7069
7070 if (!result)
7071 {
7072 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7073 gimplify_and_add (tmp, pre_p);
7074 }
7075 }
7076
7077 /* ??? In va-sh.h, there had been code to make values larger than
7078 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7079
7080 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7081 if (result)
7082 {
7083 tmp = build2 (MODIFY_EXPR, void_type_node, result, tmp);
7084 gimplify_and_add (tmp, pre_p);
7085
7086 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7087 gimplify_and_add (tmp, pre_p);
7088 }
7089 else
7090 result = tmp;
7091
7092 if (pass_by_ref)
7093 result = build_va_arg_indirect_ref (result);
7094
7095 return result;
7096 }
7097
7098 bool
sh_promote_prototypes(tree type)7099 sh_promote_prototypes (tree type)
7100 {
7101 if (TARGET_HITACHI)
7102 return 0;
7103 if (! type)
7104 return 1;
7105 return ! sh_attr_renesas_p (type);
7106 }
7107
7108 /* Whether an argument must be passed by reference. On SHcompact, we
7109 pretend arguments wider than 32-bits that would have been passed in
7110 registers are passed by reference, so that an SHmedia trampoline
7111 loads them into the full 64-bits registers. */
7112
7113 static int
shcompact_byref(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,bool named)7114 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7115 tree type, bool named)
7116 {
7117 unsigned HOST_WIDE_INT size;
7118
7119 if (type)
7120 size = int_size_in_bytes (type);
7121 else
7122 size = GET_MODE_SIZE (mode);
7123
7124 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7125 && (!named
7126 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7127 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7128 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7129 && size > 4
7130 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7131 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7132 return size;
7133 else
7134 return 0;
7135 }
7136
7137 static bool
sh_pass_by_reference(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,bool named)7138 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7139 tree type, bool named)
7140 {
7141 if (targetm.calls.must_pass_in_stack (mode, type))
7142 return true;
7143
7144 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7145 wants to know about pass-by-reference semantics for incoming
7146 arguments. */
7147 if (! cum)
7148 return false;
7149
7150 if (TARGET_SHCOMPACT)
7151 {
7152 cum->byref = shcompact_byref (cum, mode, type, named);
7153 return cum->byref != 0;
7154 }
7155
7156 return false;
7157 }
7158
7159 static bool
sh_callee_copies(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,bool named ATTRIBUTE_UNUSED)7160 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7161 tree type, bool named ATTRIBUTE_UNUSED)
7162 {
7163 /* ??? How can it possibly be correct to return true only on the
7164 caller side of the equation? Is there someplace else in the
7165 sh backend that's magically producing the copies? */
7166 return (cum->outgoing
7167 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7168 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7169 }
7170
7171 static int
sh_arg_partial_bytes(CUMULATIVE_ARGS * cum,enum machine_mode mode,tree type,bool named ATTRIBUTE_UNUSED)7172 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7173 tree type, bool named ATTRIBUTE_UNUSED)
7174 {
7175 int words = 0;
7176
7177 if (!TARGET_SH5
7178 && PASS_IN_REG_P (*cum, mode, type)
7179 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7180 && (ROUND_REG (*cum, mode)
7181 + (mode != BLKmode
7182 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7183 : ROUND_ADVANCE (int_size_in_bytes (type)))
7184 > NPARM_REGS (mode)))
7185 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7186
7187 else if (!TARGET_SHCOMPACT
7188 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7189 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7190
7191 return words * UNITS_PER_WORD;
7192 }
7193
7194
7195 /* Define where to put the arguments to a function.
7196 Value is zero to push the argument on the stack,
7197 or a hard register in which to store the argument.
7198
7199 MODE is the argument's machine mode.
7200 TYPE is the data type of the argument (as a tree).
7201 This is null for libcalls where that information may
7202 not be available.
7203 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7204 the preceding args and about the function being called.
7205 NAMED is nonzero if this argument is a named parameter
7206 (otherwise it is an extra parameter matching an ellipsis).
7207
7208 On SH the first args are normally in registers
7209 and the rest are pushed. Any arg that starts within the first
7210 NPARM_REGS words is at least partially passed in a register unless
7211 its data type forbids. */
7212
7213
7214 rtx
sh_function_arg(CUMULATIVE_ARGS * ca,enum machine_mode mode,tree type,int named)7215 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7216 tree type, int named)
7217 {
7218 if (! TARGET_SH5 && mode == VOIDmode)
7219 return GEN_INT (ca->renesas_abi ? 1 : 0);
7220
7221 if (! TARGET_SH5
7222 && PASS_IN_REG_P (*ca, mode, type)
7223 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7224 {
7225 int regno;
7226
7227 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7228 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7229 {
7230 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7231 gen_rtx_REG (SFmode,
7232 BASE_ARG_REG (mode)
7233 + (ROUND_REG (*ca, mode) ^ 1)),
7234 const0_rtx);
7235 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7236 gen_rtx_REG (SFmode,
7237 BASE_ARG_REG (mode)
7238 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7239 GEN_INT (4));
7240 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7241 }
7242
7243 /* If the alignment of a DF value causes an SF register to be
7244 skipped, we will use that skipped register for the next SF
7245 value. */
7246 if ((TARGET_HITACHI || ca->renesas_abi)
7247 && ca->free_single_fp_reg
7248 && mode == SFmode)
7249 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7250
7251 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7252 ^ (mode == SFmode && TARGET_SH4
7253 && TARGET_LITTLE_ENDIAN != 0
7254 && ! TARGET_HITACHI && ! ca->renesas_abi);
7255 return gen_rtx_REG (mode, regno);
7256
7257 }
7258
7259 if (TARGET_SH5)
7260 {
7261 if (mode == VOIDmode && TARGET_SHCOMPACT)
7262 return GEN_INT (ca->call_cookie);
7263
7264 /* The following test assumes unnamed arguments are promoted to
7265 DFmode. */
7266 if (mode == SFmode && ca->free_single_fp_reg)
7267 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7268
7269 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7270 && (named || ! ca->prototype_p)
7271 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7272 {
7273 if (! ca->prototype_p && TARGET_SHMEDIA)
7274 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7275
7276 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7277 FIRST_FP_PARM_REG
7278 + ca->arg_count[(int) SH_ARG_FLOAT]);
7279 }
7280
7281 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7282 && (! TARGET_SHCOMPACT
7283 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7284 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7285 type, named))))
7286 {
7287 return gen_rtx_REG (mode, (FIRST_PARM_REG
7288 + ca->arg_count[(int) SH_ARG_INT]));
7289 }
7290
7291 return 0;
7292 }
7293
7294 return 0;
7295 }
7296
7297 /* Update the data in CUM to advance over an argument
7298 of mode MODE and data type TYPE.
7299 (TYPE is null for libcalls where that information may not be
7300 available.) */
7301
7302 void
sh_function_arg_advance(CUMULATIVE_ARGS * ca,enum machine_mode mode,tree type,int named)7303 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7304 tree type, int named)
7305 {
7306 if (ca->force_mem)
7307 ca->force_mem = 0;
7308 else if (TARGET_SH5)
7309 {
7310 tree type2 = (ca->byref && type
7311 ? TREE_TYPE (type)
7312 : type);
7313 enum machine_mode mode2 = (ca->byref && type
7314 ? TYPE_MODE (type2)
7315 : mode);
7316 int dwords = ((ca->byref
7317 ? ca->byref
7318 : mode2 == BLKmode
7319 ? int_size_in_bytes (type2)
7320 : GET_MODE_SIZE (mode2)) + 7) / 8;
7321 int numregs = MIN (dwords, NPARM_REGS (SImode)
7322 - ca->arg_count[(int) SH_ARG_INT]);
7323
7324 if (numregs)
7325 {
7326 ca->arg_count[(int) SH_ARG_INT] += numregs;
7327 if (TARGET_SHCOMPACT
7328 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7329 {
7330 ca->call_cookie
7331 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7332 - numregs, 1);
7333 /* N.B. We want this also for outgoing. */
7334 ca->stack_regs += numregs;
7335 }
7336 else if (ca->byref)
7337 {
7338 if (! ca->outgoing)
7339 ca->stack_regs += numregs;
7340 ca->byref_regs += numregs;
7341 ca->byref = 0;
7342 do
7343 ca->call_cookie
7344 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7345 - numregs, 2);
7346 while (--numregs);
7347 ca->call_cookie
7348 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7349 - 1, 1);
7350 }
7351 else if (dwords > numregs)
7352 {
7353 int pushregs = numregs;
7354
7355 if (TARGET_SHCOMPACT)
7356 ca->stack_regs += numregs;
7357 while (pushregs < NPARM_REGS (SImode) - 1
7358 && (CALL_COOKIE_INT_REG_GET
7359 (ca->call_cookie,
7360 NPARM_REGS (SImode) - pushregs)
7361 == 1))
7362 {
7363 ca->call_cookie
7364 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7365 - pushregs, 1);
7366 pushregs++;
7367 }
7368 if (numregs == NPARM_REGS (SImode))
7369 ca->call_cookie
7370 |= CALL_COOKIE_INT_REG (0, 1)
7371 | CALL_COOKIE_STACKSEQ (numregs - 1);
7372 else
7373 ca->call_cookie
7374 |= CALL_COOKIE_STACKSEQ (numregs);
7375 }
7376 }
7377 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7378 && (named || ! ca->prototype_p))
7379 {
7380 if (mode2 == SFmode && ca->free_single_fp_reg)
7381 ca->free_single_fp_reg = 0;
7382 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7383 < NPARM_REGS (SFmode))
7384 {
7385 int numfpregs
7386 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7387 NPARM_REGS (SFmode)
7388 - ca->arg_count[(int) SH_ARG_FLOAT]);
7389
7390 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7391
7392 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7393 {
7394 if (ca->outgoing && numregs > 0)
7395 do
7396 {
7397 ca->call_cookie
7398 |= (CALL_COOKIE_INT_REG
7399 (ca->arg_count[(int) SH_ARG_INT]
7400 - numregs + ((numfpregs - 2) / 2),
7401 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7402 - numfpregs) / 2));
7403 }
7404 while (numfpregs -= 2);
7405 }
7406 else if (mode2 == SFmode && (named)
7407 && (ca->arg_count[(int) SH_ARG_FLOAT]
7408 < NPARM_REGS (SFmode)))
7409 ca->free_single_fp_reg
7410 = FIRST_FP_PARM_REG - numfpregs
7411 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7412 }
7413 }
7414 return;
7415 }
7416
7417 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7418 {
7419 /* Note that we've used the skipped register. */
7420 if (mode == SFmode && ca->free_single_fp_reg)
7421 {
7422 ca->free_single_fp_reg = 0;
7423 return;
7424 }
7425 /* When we have a DF after an SF, there's an SF register that get
7426 skipped in order to align the DF value. We note this skipped
7427 register, because the next SF value will use it, and not the
7428 SF that follows the DF. */
7429 if (mode == DFmode
7430 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7431 {
7432 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7433 + BASE_ARG_REG (mode));
7434 }
7435 }
7436
7437 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7438 || PASS_IN_REG_P (*ca, mode, type))
7439 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7440 = (ROUND_REG (*ca, mode)
7441 + (mode == BLKmode
7442 ? ROUND_ADVANCE (int_size_in_bytes (type))
7443 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7444 }
7445
7446 /* The Renesas calling convention doesn't quite fit into this scheme since
7447 the address is passed like an invisible argument, but one that is always
7448 passed in memory. */
7449 static rtx
sh_struct_value_rtx(tree fndecl,int incoming ATTRIBUTE_UNUSED)7450 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7451 {
7452 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7453 return 0;
7454 return gen_rtx_REG (Pmode, 2);
7455 }
7456
7457 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7458
7459 static bool
sh_return_in_memory(tree type,tree fndecl)7460 sh_return_in_memory (tree type, tree fndecl)
7461 {
7462 if (TARGET_SH5)
7463 {
7464 if (TYPE_MODE (type) == BLKmode)
7465 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7466 else
7467 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7468 }
7469 else
7470 {
7471 return (TYPE_MODE (type) == BLKmode
7472 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7473 && TREE_CODE (type) == RECORD_TYPE));
7474 }
7475 }
7476
7477 /* We actually emit the code in sh_expand_prologue. We used to use
7478 a static variable to flag that we need to emit this code, but that
7479 doesn't when inlining, when functions are deferred and then emitted
7480 later. Fortunately, we already have two flags that are part of struct
7481 function that tell if a function uses varargs or stdarg. */
7482 static void
sh_setup_incoming_varargs(CUMULATIVE_ARGS * ca,enum machine_mode mode,tree type,int * pretend_arg_size,int second_time ATTRIBUTE_UNUSED)7483 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7484 enum machine_mode mode,
7485 tree type,
7486 int *pretend_arg_size,
7487 int second_time ATTRIBUTE_UNUSED)
7488 {
7489 gcc_assert (current_function_stdarg);
7490 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7491 {
7492 int named_parm_regs, anon_parm_regs;
7493
7494 named_parm_regs = (ROUND_REG (*ca, mode)
7495 + (mode == BLKmode
7496 ? ROUND_ADVANCE (int_size_in_bytes (type))
7497 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7498 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7499 if (anon_parm_regs > 0)
7500 *pretend_arg_size = anon_parm_regs * 4;
7501 }
7502 }
7503
7504 static bool
sh_strict_argument_naming(CUMULATIVE_ARGS * ca ATTRIBUTE_UNUSED)7505 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7506 {
7507 return TARGET_SH5;
7508 }
7509
7510 static bool
sh_pretend_outgoing_varargs_named(CUMULATIVE_ARGS * ca)7511 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7512 {
7513 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7514 }
7515
7516
7517 /* Define the offset between two registers, one to be eliminated, and
7518 the other its replacement, at the start of a routine. */
7519
7520 int
initial_elimination_offset(int from,int to)7521 initial_elimination_offset (int from, int to)
7522 {
7523 int regs_saved;
7524 int regs_saved_rounding = 0;
7525 int total_saved_regs_space;
7526 int total_auto_space;
7527 int save_flags = target_flags;
7528 int copy_flags;
7529 HARD_REG_SET live_regs_mask;
7530
7531 shmedia_space_reserved_for_target_registers = false;
7532 regs_saved = calc_live_regs (&live_regs_mask);
7533 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7534
7535 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7536 {
7537 shmedia_space_reserved_for_target_registers = true;
7538 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7539 }
7540
7541 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7542 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7543 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7544
7545 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7546 copy_flags = target_flags;
7547 target_flags = save_flags;
7548
7549 total_saved_regs_space = regs_saved + regs_saved_rounding;
7550
7551 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7552 return total_saved_regs_space + total_auto_space
7553 + current_function_args_info.byref_regs * 8;
7554
7555 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7556 return total_saved_regs_space + total_auto_space
7557 + current_function_args_info.byref_regs * 8;
7558
7559 /* Initial gap between fp and sp is 0. */
7560 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7561 return 0;
7562
7563 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7564 return rounded_frame_size (0);
7565
7566 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7567 return rounded_frame_size (0);
7568
7569 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7570 && (to == HARD_FRAME_POINTER_REGNUM
7571 || to == STACK_POINTER_REGNUM));
7572 if (TARGET_SH5)
7573 {
7574 int n = total_saved_regs_space;
7575 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7576 save_schedule schedule;
7577 save_entry *entry;
7578
7579 n += total_auto_space;
7580
7581 /* If it wasn't saved, there's not much we can do. */
7582 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7583 return n;
7584
7585 target_flags = copy_flags;
7586
7587 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7588 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7589 if (entry->reg == pr_reg)
7590 {
7591 target_flags = save_flags;
7592 return entry->offset;
7593 }
7594 gcc_unreachable ();
7595 }
7596 else
7597 return total_auto_space;
7598 }
7599
7600 /* Insert any deferred function attributes from earlier pragmas. */
7601 static void
sh_insert_attributes(tree node,tree * attributes)7602 sh_insert_attributes (tree node, tree *attributes)
7603 {
7604 tree attrs;
7605
7606 if (TREE_CODE (node) != FUNCTION_DECL)
7607 return;
7608
7609 /* We are only interested in fields. */
7610 if (!DECL_P (node))
7611 return;
7612
7613 /* Append the attributes to the deferred attributes. */
7614 *sh_deferred_function_attributes_tail = *attributes;
7615 attrs = sh_deferred_function_attributes;
7616 if (!attrs)
7617 return;
7618
7619 /* Some attributes imply or require the interrupt attribute. */
7620 if (!lookup_attribute ("interrupt_handler", attrs)
7621 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7622 {
7623 /* If we have a trapa_handler, but no interrupt_handler attribute,
7624 insert an interrupt_handler attribute. */
7625 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7626 /* We can't use sh_pr_interrupt here because that's not in the
7627 java frontend. */
7628 attrs
7629 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7630 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7631 interrupt attribute is missing, we ignore the attribute and warn. */
7632 else if (lookup_attribute ("sp_switch", attrs)
7633 || lookup_attribute ("trap_exit", attrs)
7634 || lookup_attribute ("nosave_low_regs", attrs))
7635 {
7636 tree *tail;
7637
7638 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7639 {
7640 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7641 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7642 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7643 warning (OPT_Wattributes,
7644 "%qs attribute only applies to interrupt functions",
7645 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7646 else
7647 {
7648 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7649 NULL_TREE);
7650 tail = &TREE_CHAIN (*tail);
7651 }
7652 }
7653 attrs = *attributes;
7654 }
7655 }
7656
7657 /* Install the processed list. */
7658 *attributes = attrs;
7659
7660 /* Clear deferred attributes. */
7661 sh_deferred_function_attributes = NULL_TREE;
7662 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7663
7664 return;
7665 }
7666
7667 /* Supported attributes:
7668
7669 interrupt_handler -- specifies this function is an interrupt handler.
7670
7671 trapa_handler - like above, but don't save all registers.
7672
7673 sp_switch -- specifies an alternate stack for an interrupt handler
7674 to run on.
7675
7676 trap_exit -- use a trapa to exit an interrupt function instead of
7677 an rte instruction.
7678
7679 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7680 This is useful on the SH3 and upwards,
7681 which has a separate set of low regs for User and Supervisor modes.
7682 This should only be used for the lowest level of interrupts. Higher levels
7683 of interrupts must save the registers in case they themselves are
7684 interrupted.
7685
7686 renesas -- use Renesas calling/layout conventions (functions and
7687 structures).
7688
7689 */
7690
7691 const struct attribute_spec sh_attribute_table[] =
7692 {
7693 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7694 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7695 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7696 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7697 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7698 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7699 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7700 #ifdef SYMBIAN
7701 /* Symbian support adds three new attributes:
7702 dllexport - for exporting a function/variable that will live in a dll
7703 dllimport - for importing a function/variable from a dll
7704
7705 Microsoft allows multiple declspecs in one __declspec, separating
7706 them with spaces. We do NOT support this. Instead, use __declspec
7707 multiple times. */
7708 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7709 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7710 #endif
7711 { NULL, 0, 0, false, false, false, NULL }
7712 };
7713
7714 /* Handle an "interrupt_handler" attribute; arguments as in
7715 struct attribute_spec.handler. */
7716 static tree
sh_handle_interrupt_handler_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7717 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7718 tree args ATTRIBUTE_UNUSED,
7719 int flags ATTRIBUTE_UNUSED,
7720 bool *no_add_attrs)
7721 {
7722 if (TREE_CODE (*node) != FUNCTION_DECL)
7723 {
7724 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7725 IDENTIFIER_POINTER (name));
7726 *no_add_attrs = true;
7727 }
7728 else if (TARGET_SHCOMPACT)
7729 {
7730 error ("attribute interrupt_handler is not compatible with -m5-compact");
7731 *no_add_attrs = true;
7732 }
7733
7734 return NULL_TREE;
7735 }
7736
7737 /* Handle an "sp_switch" attribute; arguments as in
7738 struct attribute_spec.handler. */
7739 static tree
sh_handle_sp_switch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7740 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7741 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7742 {
7743 if (TREE_CODE (*node) != FUNCTION_DECL)
7744 {
7745 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7746 IDENTIFIER_POINTER (name));
7747 *no_add_attrs = true;
7748 }
7749 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7750 {
7751 /* The argument must be a constant string. */
7752 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7753 IDENTIFIER_POINTER (name));
7754 *no_add_attrs = true;
7755 }
7756
7757 return NULL_TREE;
7758 }
7759
7760 /* Handle an "trap_exit" attribute; arguments as in
7761 struct attribute_spec.handler. */
7762 static tree
sh_handle_trap_exit_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)7763 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7764 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7765 {
7766 if (TREE_CODE (*node) != FUNCTION_DECL)
7767 {
7768 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7769 IDENTIFIER_POINTER (name));
7770 *no_add_attrs = true;
7771 }
7772 /* The argument specifies a trap number to be used in a trapa instruction
7773 at function exit (instead of an rte instruction). */
7774 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7775 {
7776 /* The argument must be a constant integer. */
7777 warning (OPT_Wattributes, "%qs attribute argument not an "
7778 "integer constant", IDENTIFIER_POINTER (name));
7779 *no_add_attrs = true;
7780 }
7781
7782 return NULL_TREE;
7783 }
7784
7785 static tree
sh_handle_renesas_attribute(tree * node ATTRIBUTE_UNUSED,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs ATTRIBUTE_UNUSED)7786 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7787 tree name ATTRIBUTE_UNUSED,
7788 tree args ATTRIBUTE_UNUSED,
7789 int flags ATTRIBUTE_UNUSED,
7790 bool *no_add_attrs ATTRIBUTE_UNUSED)
7791 {
7792 return NULL_TREE;
7793 }
7794
7795 /* True if __attribute__((renesas)) or -mrenesas. */
7796 int
sh_attr_renesas_p(tree td)7797 sh_attr_renesas_p (tree td)
7798 {
7799 if (TARGET_HITACHI)
7800 return 1;
7801 if (td == 0)
7802 return 0;
7803 if (DECL_P (td))
7804 td = TREE_TYPE (td);
7805 if (td == error_mark_node)
7806 return 0;
7807 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7808 != NULL_TREE);
7809 }
7810
7811 /* True if __attribute__((renesas)) or -mrenesas, for the current
7812 function. */
7813 int
sh_cfun_attr_renesas_p(void)7814 sh_cfun_attr_renesas_p (void)
7815 {
7816 return sh_attr_renesas_p (current_function_decl);
7817 }
7818
7819 int
sh_cfun_interrupt_handler_p(void)7820 sh_cfun_interrupt_handler_p (void)
7821 {
7822 return (lookup_attribute ("interrupt_handler",
7823 DECL_ATTRIBUTES (current_function_decl))
7824 != NULL_TREE);
7825 }
7826
7827 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7828
7829 static const char *
sh_check_pch_target_flags(int old_flags)7830 sh_check_pch_target_flags (int old_flags)
7831 {
7832 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7833 | MASK_SH_E | MASK_HARD_SH4
7834 | MASK_FPU_SINGLE | MASK_SH4))
7835 return _("created and used with different architectures / ABIs");
7836 if ((old_flags ^ target_flags) & MASK_HITACHI)
7837 return _("created and used with different ABIs");
7838 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7839 return _("created and used with different endianness");
7840 return NULL;
7841 }
7842
7843 /* Predicates used by the templates. */
7844
7845 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7846 Used only in general_movsrc_operand. */
7847
7848 int
system_reg_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)7849 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7850 {
7851 switch (REGNO (op))
7852 {
7853 case PR_REG:
7854 case MACL_REG:
7855 case MACH_REG:
7856 return 1;
7857 }
7858 return 0;
7859 }
7860
7861 /* Nonzero if OP is a floating point value with value 0.0. */
7862
7863 int
fp_zero_operand(rtx op)7864 fp_zero_operand (rtx op)
7865 {
7866 REAL_VALUE_TYPE r;
7867
7868 if (GET_MODE (op) != SFmode)
7869 return 0;
7870
7871 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7872 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7873 }
7874
7875 /* Nonzero if OP is a floating point value with value 1.0. */
7876
7877 int
fp_one_operand(rtx op)7878 fp_one_operand (rtx op)
7879 {
7880 REAL_VALUE_TYPE r;
7881
7882 if (GET_MODE (op) != SFmode)
7883 return 0;
7884
7885 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7886 return REAL_VALUES_EQUAL (r, dconst1);
7887 }
7888
7889 /* For -m4 and -m4-single-only, mode switching is used. If we are
7890 compiling without -mfmovd, movsf_ie isn't taken into account for
7891 mode switching. We could check in machine_dependent_reorg for
7892 cases where we know we are in single precision mode, but there is
7893 interface to find that out during reload, so we must avoid
7894 choosing an fldi alternative during reload and thus failing to
7895 allocate a scratch register for the constant loading. */
7896 int
fldi_ok(void)7897 fldi_ok (void)
7898 {
7899 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7900 }
7901
7902 int
tertiary_reload_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)7903 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7904 {
7905 enum rtx_code code = GET_CODE (op);
7906 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7907 }
7908
7909 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7910 int
tls_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)7911 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7912 {
7913 if (GET_CODE (op) != SYMBOL_REF)
7914 return 0;
7915 return SYMBOL_REF_TLS_MODEL (op);
7916 }
7917
7918 /* Return the destination address of a branch. */
7919
7920 static int
branch_dest(rtx branch)7921 branch_dest (rtx branch)
7922 {
7923 rtx dest = SET_SRC (PATTERN (branch));
7924 int dest_uid;
7925
7926 if (GET_CODE (dest) == IF_THEN_ELSE)
7927 dest = XEXP (dest, 1);
7928 dest = XEXP (dest, 0);
7929 dest_uid = INSN_UID (dest);
7930 return INSN_ADDRESSES (dest_uid);
7931 }
7932
7933 /* Return nonzero if REG is not used after INSN.
7934 We assume REG is a reload reg, and therefore does
7935 not live past labels. It may live past calls or jumps though. */
7936 int
reg_unused_after(rtx reg,rtx insn)7937 reg_unused_after (rtx reg, rtx insn)
7938 {
7939 enum rtx_code code;
7940 rtx set;
7941
7942 /* If the reg is set by this instruction, then it is safe for our
7943 case. Disregard the case where this is a store to memory, since
7944 we are checking a register used in the store address. */
7945 set = single_set (insn);
7946 if (set && GET_CODE (SET_DEST (set)) != MEM
7947 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7948 return 1;
7949
7950 while ((insn = NEXT_INSN (insn)))
7951 {
7952 rtx set;
7953 if (!INSN_P (insn))
7954 continue;
7955
7956 code = GET_CODE (insn);
7957
7958 #if 0
7959 /* If this is a label that existed before reload, then the register
7960 if dead here. However, if this is a label added by reorg, then
7961 the register may still be live here. We can't tell the difference,
7962 so we just ignore labels completely. */
7963 if (code == CODE_LABEL)
7964 return 1;
7965 /* else */
7966 #endif
7967
7968 if (code == JUMP_INSN)
7969 return 0;
7970
7971 /* If this is a sequence, we must handle them all at once.
7972 We could have for instance a call that sets the target register,
7973 and an insn in a delay slot that uses the register. In this case,
7974 we must return 0. */
7975 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7976 {
7977 int i;
7978 int retval = 0;
7979
7980 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7981 {
7982 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7983 rtx set = single_set (this_insn);
7984
7985 if (GET_CODE (this_insn) == CALL_INSN)
7986 code = CALL_INSN;
7987 else if (GET_CODE (this_insn) == JUMP_INSN)
7988 {
7989 if (INSN_ANNULLED_BRANCH_P (this_insn))
7990 return 0;
7991 code = JUMP_INSN;
7992 }
7993
7994 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7995 return 0;
7996 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7997 {
7998 if (GET_CODE (SET_DEST (set)) != MEM)
7999 retval = 1;
8000 else
8001 return 0;
8002 }
8003 if (set == 0
8004 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8005 return 0;
8006 }
8007 if (retval == 1)
8008 return 1;
8009 else if (code == JUMP_INSN)
8010 return 0;
8011 }
8012
8013 set = single_set (insn);
8014 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8015 return 0;
8016 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8017 return GET_CODE (SET_DEST (set)) != MEM;
8018 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8019 return 0;
8020
8021 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8022 return 1;
8023 }
8024 return 1;
8025 }
8026
8027 #include "ggc.h"
8028
8029 static GTY(()) rtx fpscr_rtx;
8030 rtx
get_fpscr_rtx(void)8031 get_fpscr_rtx (void)
8032 {
8033 if (! fpscr_rtx)
8034 {
8035 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8036 REG_USERVAR_P (fpscr_rtx) = 1;
8037 mark_user_reg (fpscr_rtx);
8038 }
8039 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8040 mark_user_reg (fpscr_rtx);
8041 return fpscr_rtx;
8042 }
8043
8044 static GTY(()) tree fpscr_values;
8045
8046 static void
emit_fpu_switch(rtx scratch,int index)8047 emit_fpu_switch (rtx scratch, int index)
8048 {
8049 rtx dst, src;
8050
8051 if (fpscr_values == NULL)
8052 {
8053 tree t;
8054
8055 t = build_index_type (integer_one_node);
8056 t = build_array_type (integer_type_node, t);
8057 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8058 DECL_ARTIFICIAL (t) = 1;
8059 DECL_IGNORED_P (t) = 1;
8060 DECL_EXTERNAL (t) = 1;
8061 TREE_STATIC (t) = 1;
8062 TREE_PUBLIC (t) = 1;
8063 TREE_USED (t) = 1;
8064
8065 fpscr_values = t;
8066 }
8067
8068 src = DECL_RTL (fpscr_values);
8069 if (no_new_pseudos)
8070 {
8071 emit_move_insn (scratch, XEXP (src, 0));
8072 if (index != 0)
8073 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8074 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8075 }
8076 else
8077 src = adjust_address (src, PSImode, index * 4);
8078
8079 dst = get_fpscr_rtx ();
8080 emit_move_insn (dst, src);
8081 }
8082
8083 void
emit_sf_insn(rtx pat)8084 emit_sf_insn (rtx pat)
8085 {
8086 emit_insn (pat);
8087 }
8088
8089 void
emit_df_insn(rtx pat)8090 emit_df_insn (rtx pat)
8091 {
8092 emit_insn (pat);
8093 }
8094
8095 void
expand_sf_unop(rtx (* fun)(rtx,rtx,rtx),rtx * operands)8096 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8097 {
8098 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8099 }
8100
8101 void
expand_sf_binop(rtx (* fun)(rtx,rtx,rtx,rtx),rtx * operands)8102 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8103 {
8104 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8105 get_fpscr_rtx ()));
8106 }
8107
8108 void
expand_df_unop(rtx (* fun)(rtx,rtx,rtx),rtx * operands)8109 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8110 {
8111 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8112 }
8113
8114 void
expand_df_binop(rtx (* fun)(rtx,rtx,rtx,rtx),rtx * operands)8115 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8116 {
8117 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8118 get_fpscr_rtx ()));
8119 }
8120
8121 /* ??? gcc does flow analysis strictly after common subexpression
8122 elimination. As a result, common subexpression elimination fails
8123 when there are some intervening statements setting the same register.
8124 If we did nothing about this, this would hurt the precision switching
8125 for SH4 badly. There is some cse after reload, but it is unable to
8126 undo the extra register pressure from the unused instructions, and
8127 it cannot remove auto-increment loads.
8128
8129 A C code example that shows this flow/cse weakness for (at least) SH
8130 and sparc (as of gcc ss-970706) is this:
8131
8132 double
8133 f(double a)
8134 {
8135 double d;
8136 d = 0.1;
8137 a += d;
8138 d = 1.1;
8139 d = 0.1;
8140 a *= d;
8141 return a;
8142 }
8143
8144 So we add another pass before common subexpression elimination, to
8145 remove assignments that are dead due to a following assignment in the
8146 same basic block. */
8147
8148 static void
mark_use(rtx x,rtx * reg_set_block)8149 mark_use (rtx x, rtx *reg_set_block)
8150 {
8151 enum rtx_code code;
8152
8153 if (! x)
8154 return;
8155 code = GET_CODE (x);
8156 switch (code)
8157 {
8158 case REG:
8159 {
8160 int regno = REGNO (x);
8161 int nregs = (regno < FIRST_PSEUDO_REGISTER
8162 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8163 : 1);
8164 do
8165 {
8166 reg_set_block[regno + nregs - 1] = 0;
8167 }
8168 while (--nregs);
8169 break;
8170 }
8171 case SET:
8172 {
8173 rtx dest = SET_DEST (x);
8174
8175 if (GET_CODE (dest) == SUBREG)
8176 dest = SUBREG_REG (dest);
8177 if (GET_CODE (dest) != REG)
8178 mark_use (dest, reg_set_block);
8179 mark_use (SET_SRC (x), reg_set_block);
8180 break;
8181 }
8182 case CLOBBER:
8183 break;
8184 default:
8185 {
8186 const char *fmt = GET_RTX_FORMAT (code);
8187 int i, j;
8188 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8189 {
8190 if (fmt[i] == 'e')
8191 mark_use (XEXP (x, i), reg_set_block);
8192 else if (fmt[i] == 'E')
8193 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8194 mark_use (XVECEXP (x, i, j), reg_set_block);
8195 }
8196 break;
8197 }
8198 }
8199 }
8200
8201 static rtx get_free_reg (HARD_REG_SET);
8202
8203 /* This function returns a register to use to load the address to load
8204 the fpscr from. Currently it always returns r1 or r7, but when we are
8205 able to use pseudo registers after combine, or have a better mechanism
8206 for choosing a register, it should be done here. */
8207 /* REGS_LIVE is the liveness information for the point for which we
8208 need this allocation. In some bare-bones exit blocks, r1 is live at the
8209 start. We can even have all of r0..r3 being live:
8210 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8211 INSN before which new insns are placed with will clobber the register
8212 we return. If a basic block consists only of setting the return value
8213 register to a pseudo and using that register, the return value is not
8214 live before or after this block, yet we we'll insert our insns right in
8215 the middle. */
8216
8217 static rtx
get_free_reg(HARD_REG_SET regs_live)8218 get_free_reg (HARD_REG_SET regs_live)
8219 {
8220 if (! TEST_HARD_REG_BIT (regs_live, 1))
8221 return gen_rtx_REG (Pmode, 1);
8222
8223 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8224 there shouldn't be anything but a jump before the function end. */
8225 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8226 return gen_rtx_REG (Pmode, 7);
8227 }
8228
8229 /* This function will set the fpscr from memory.
8230 MODE is the mode we are setting it to. */
8231 void
fpscr_set_from_mem(int mode,HARD_REG_SET regs_live)8232 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8233 {
8234 enum attr_fp_mode fp_mode = mode;
8235 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8236 rtx addr_reg = get_free_reg (regs_live);
8237
8238 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8239 }
8240
8241 /* Is the given character a logical line separator for the assembler? */
8242 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8243 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8244 #endif
8245
8246 int
sh_insn_length_adjustment(rtx insn)8247 sh_insn_length_adjustment (rtx insn)
8248 {
8249 /* Instructions with unfilled delay slots take up an extra two bytes for
8250 the nop in the delay slot. */
8251 if (((GET_CODE (insn) == INSN
8252 && GET_CODE (PATTERN (insn)) != USE
8253 && GET_CODE (PATTERN (insn)) != CLOBBER)
8254 || GET_CODE (insn) == CALL_INSN
8255 || (GET_CODE (insn) == JUMP_INSN
8256 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8257 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8258 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8259 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8260 return 2;
8261
8262 /* SH2e has a bug that prevents the use of annulled branches, so if
8263 the delay slot is not filled, we'll have to put a NOP in it. */
8264 if (sh_cpu == CPU_SH2E
8265 && GET_CODE (insn) == JUMP_INSN
8266 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8267 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8268 && get_attr_type (insn) == TYPE_CBRANCH
8269 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8270 return 2;
8271
8272 /* sh-dsp parallel processing insn take four bytes instead of two. */
8273
8274 if (GET_CODE (insn) == INSN)
8275 {
8276 int sum = 0;
8277 rtx body = PATTERN (insn);
8278 const char *template;
8279 char c;
8280 int maybe_label = 1;
8281
8282 if (GET_CODE (body) == ASM_INPUT)
8283 template = XSTR (body, 0);
8284 else if (asm_noperands (body) >= 0)
8285 template
8286 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8287 else
8288 return 0;
8289 do
8290 {
8291 int ppi_adjust = 0;
8292
8293 do
8294 c = *template++;
8295 while (c == ' ' || c == '\t');
8296 /* all sh-dsp parallel-processing insns start with p.
8297 The only non-ppi sh insn starting with p is pref.
8298 The only ppi starting with pr is prnd. */
8299 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8300 ppi_adjust = 2;
8301 /* The repeat pseudo-insn expands two three insns, a total of
8302 six bytes in size. */
8303 else if ((c == 'r' || c == 'R')
8304 && ! strncasecmp ("epeat", template, 5))
8305 ppi_adjust = 4;
8306 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8307 {
8308 /* If this is a label, it is obviously not a ppi insn. */
8309 if (c == ':' && maybe_label)
8310 {
8311 ppi_adjust = 0;
8312 break;
8313 }
8314 else if (c == '\'' || c == '"')
8315 maybe_label = 0;
8316 c = *template++;
8317 }
8318 sum += ppi_adjust;
8319 maybe_label = c != ':';
8320 }
8321 while (c);
8322 return sum;
8323 }
8324 return 0;
8325 }
8326
8327 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8328 isn't protected by a PIC unspec. */
8329 int
nonpic_symbol_mentioned_p(rtx x)8330 nonpic_symbol_mentioned_p (rtx x)
8331 {
8332 register const char *fmt;
8333 register int i;
8334
8335 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8336 || GET_CODE (x) == PC)
8337 return 1;
8338
8339 /* We don't want to look into the possible MEM location of a
8340 CONST_DOUBLE, since we're not going to use it, in general. */
8341 if (GET_CODE (x) == CONST_DOUBLE)
8342 return 0;
8343
8344 if (GET_CODE (x) == UNSPEC
8345 && (XINT (x, 1) == UNSPEC_PIC
8346 || XINT (x, 1) == UNSPEC_GOT
8347 || XINT (x, 1) == UNSPEC_GOTOFF
8348 || XINT (x, 1) == UNSPEC_GOTPLT
8349 || XINT (x, 1) == UNSPEC_GOTTPOFF
8350 || XINT (x, 1) == UNSPEC_DTPOFF
8351 || XINT (x, 1) == UNSPEC_PLT))
8352 return 0;
8353
8354 fmt = GET_RTX_FORMAT (GET_CODE (x));
8355 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8356 {
8357 if (fmt[i] == 'E')
8358 {
8359 register int j;
8360
8361 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8362 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8363 return 1;
8364 }
8365 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8366 return 1;
8367 }
8368
8369 return 0;
8370 }
8371
8372 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8373 @GOTOFF in `reg'. */
8374 rtx
legitimize_pic_address(rtx orig,enum machine_mode mode ATTRIBUTE_UNUSED,rtx reg)8375 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8376 rtx reg)
8377 {
8378 if (tls_symbolic_operand (orig, Pmode))
8379 return orig;
8380
8381 if (GET_CODE (orig) == LABEL_REF
8382 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8383 {
8384 if (reg == 0)
8385 reg = gen_reg_rtx (Pmode);
8386
8387 emit_insn (gen_symGOTOFF2reg (reg, orig));
8388 return reg;
8389 }
8390 else if (GET_CODE (orig) == SYMBOL_REF)
8391 {
8392 if (reg == 0)
8393 reg = gen_reg_rtx (Pmode);
8394
8395 emit_insn (gen_symGOT2reg (reg, orig));
8396 return reg;
8397 }
8398 return orig;
8399 }
8400
8401 /* Mark the use of a constant in the literal table. If the constant
8402 has multiple labels, make it unique. */
8403 static rtx
mark_constant_pool_use(rtx x)8404 mark_constant_pool_use (rtx x)
8405 {
8406 rtx insn, lab, pattern;
8407
8408 if (x == NULL)
8409 return x;
8410
8411 switch (GET_CODE (x))
8412 {
8413 case LABEL_REF:
8414 x = XEXP (x, 0);
8415 case CODE_LABEL:
8416 break;
8417 default:
8418 return x;
8419 }
8420
8421 /* Get the first label in the list of labels for the same constant
8422 and delete another labels in the list. */
8423 lab = x;
8424 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8425 {
8426 if (GET_CODE (insn) != CODE_LABEL
8427 || LABEL_REFS (insn) != NEXT_INSN (insn))
8428 break;
8429 lab = insn;
8430 }
8431
8432 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8433 INSN_DELETED_P (insn) = 1;
8434
8435 /* Mark constants in a window. */
8436 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8437 {
8438 if (GET_CODE (insn) != INSN)
8439 continue;
8440
8441 pattern = PATTERN (insn);
8442 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8443 continue;
8444
8445 switch (XINT (pattern, 1))
8446 {
8447 case UNSPECV_CONST2:
8448 case UNSPECV_CONST4:
8449 case UNSPECV_CONST8:
8450 XVECEXP (pattern, 0, 1) = const1_rtx;
8451 break;
8452 case UNSPECV_WINDOW_END:
8453 if (XVECEXP (pattern, 0, 0) == x)
8454 return lab;
8455 break;
8456 case UNSPECV_CONST_END:
8457 return lab;
8458 default:
8459 break;
8460 }
8461 }
8462
8463 return lab;
8464 }
8465
8466 /* Return true if it's possible to redirect BRANCH1 to the destination
8467 of an unconditional jump BRANCH2. We only want to do this if the
8468 resulting branch will have a short displacement. */
8469 int
sh_can_redirect_branch(rtx branch1,rtx branch2)8470 sh_can_redirect_branch (rtx branch1, rtx branch2)
8471 {
8472 if (flag_expensive_optimizations && simplejump_p (branch2))
8473 {
8474 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8475 rtx insn;
8476 int distance;
8477
8478 for (distance = 0, insn = NEXT_INSN (branch1);
8479 insn && distance < 256;
8480 insn = PREV_INSN (insn))
8481 {
8482 if (insn == dest)
8483 return 1;
8484 else
8485 distance += get_attr_length (insn);
8486 }
8487 for (distance = 0, insn = NEXT_INSN (branch1);
8488 insn && distance < 256;
8489 insn = NEXT_INSN (insn))
8490 {
8491 if (insn == dest)
8492 return 1;
8493 else
8494 distance += get_attr_length (insn);
8495 }
8496 }
8497 return 0;
8498 }
8499
8500 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8501 int
sh_hard_regno_rename_ok(unsigned int old_reg ATTRIBUTE_UNUSED,unsigned int new_reg)8502 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8503 unsigned int new_reg)
8504 {
8505 /* Interrupt functions can only use registers that have already been
8506 saved by the prologue, even if they would normally be
8507 call-clobbered. */
8508
8509 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8510 return 0;
8511
8512 return 1;
8513 }
8514
8515 /* Function to update the integer COST
8516 based on the relationship between INSN that is dependent on
8517 DEP_INSN through the dependence LINK. The default is to make no
8518 adjustment to COST. This can be used for example to specify to
8519 the scheduler that an output- or anti-dependence does not incur
8520 the same cost as a data-dependence. The return value should be
8521 the new value for COST. */
8522 static int
sh_adjust_cost(rtx insn,rtx link ATTRIBUTE_UNUSED,rtx dep_insn,int cost)8523 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8524 {
8525 rtx reg, use_pat;
8526
8527 if (TARGET_SHMEDIA)
8528 {
8529 /* On SHmedia, if the dependence is an anti-dependence or
8530 output-dependence, there is no cost. */
8531 if (REG_NOTE_KIND (link) != 0)
8532 {
8533 /* However, dependencies between target register loads and
8534 uses of the register in a subsequent block that are separated
8535 by a conditional branch are not modelled - we have to do with
8536 the anti-dependency between the target register load and the
8537 conditional branch that ends the current block. */
8538 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8539 && GET_CODE (PATTERN (dep_insn)) == SET
8540 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8541 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8542 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8543 {
8544 int orig_cost = cost;
8545 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8546 rtx target = ((! note
8547 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8548 ? insn : JUMP_LABEL (insn));
8549 /* On the likely path, the branch costs 1, on the unlikely path,
8550 it costs 3. */
8551 cost--;
8552 do
8553 target = next_active_insn (target);
8554 while (target && ! flow_dependent_p (target, dep_insn)
8555 && --cost > 0);
8556 /* If two branches are executed in immediate succession, with the
8557 first branch properly predicted, this causes a stall at the
8558 second branch, hence we won't need the target for the
8559 second branch for two cycles after the launch of the first
8560 branch. */
8561 if (cost > orig_cost - 2)
8562 cost = orig_cost - 2;
8563 }
8564 else
8565 cost = 0;
8566 }
8567
8568 else if (get_attr_is_mac_media (insn)
8569 && get_attr_is_mac_media (dep_insn))
8570 cost = 1;
8571
8572 else if (! reload_completed
8573 && GET_CODE (PATTERN (insn)) == SET
8574 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8575 && GET_CODE (PATTERN (dep_insn)) == SET
8576 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8577 && cost < 4)
8578 cost = 4;
8579 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8580 that is needed at the target. */
8581 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8582 && ! flow_dependent_p (insn, dep_insn))
8583 cost--;
8584 }
8585 else if (REG_NOTE_KIND (link) == 0)
8586 {
8587 enum attr_type dep_type, type;
8588
8589 if (recog_memoized (insn) < 0
8590 || recog_memoized (dep_insn) < 0)
8591 return cost;
8592
8593 dep_type = get_attr_type (dep_insn);
8594 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8595 cost--;
8596 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8597 && (type = get_attr_type (insn)) != TYPE_CALL
8598 && type != TYPE_SFUNC)
8599 cost--;
8600
8601 /* The only input for a call that is timing-critical is the
8602 function's address. */
8603 if (GET_CODE(insn) == CALL_INSN)
8604 {
8605 rtx call = PATTERN (insn);
8606
8607 if (GET_CODE (call) == PARALLEL)
8608 call = XVECEXP (call, 0 ,0);
8609 if (GET_CODE (call) == SET)
8610 call = SET_SRC (call);
8611 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8612 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8613 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8614 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8615 cost = 0;
8616 }
8617 /* Likewise, the most timing critical input for an sfuncs call
8618 is the function address. However, sfuncs typically start
8619 using their arguments pretty quickly.
8620 Assume a four cycle delay before they are needed. */
8621 /* All sfunc calls are parallels with at least four components.
8622 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8623 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8624 && XVECLEN (PATTERN (insn), 0) >= 4
8625 && (reg = sfunc_uses_reg (insn)))
8626 {
8627 if (! reg_set_p (reg, dep_insn))
8628 cost -= 4;
8629 }
8630 /* When the preceding instruction loads the shift amount of
8631 the following SHAD/SHLD, the latency of the load is increased
8632 by 1 cycle. */
8633 else if (TARGET_SH4
8634 && get_attr_type (insn) == TYPE_DYN_SHIFT
8635 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8636 && reg_overlap_mentioned_p (SET_DEST (single_set (dep_insn)),
8637 XEXP (SET_SRC (single_set (insn)),
8638 1)))
8639 cost++;
8640 /* When an LS group instruction with a latency of less than
8641 3 cycles is followed by a double-precision floating-point
8642 instruction, FIPR, or FTRV, the latency of the first
8643 instruction is increased to 3 cycles. */
8644 else if (cost < 3
8645 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8646 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8647 cost = 3;
8648 /* The lsw register of a double-precision computation is ready one
8649 cycle earlier. */
8650 else if (reload_completed
8651 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8652 && (use_pat = single_set (insn))
8653 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8654 SET_SRC (use_pat)))
8655 cost -= 1;
8656
8657 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8658 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8659 cost -= 1;
8660 }
8661 /* An anti-dependence penalty of two applies if the first insn is a double
8662 precision fadd / fsub / fmul. */
8663 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8664 && recog_memoized (dep_insn) >= 0
8665 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8666 /* A lot of alleged anti-flow dependences are fake,
8667 so check this one is real. */
8668 && flow_dependent_p (dep_insn, insn))
8669 cost = 2;
8670
8671
8672 return cost;
8673 }
8674
8675 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8676 if DEP_INSN is anti-flow dependent on INSN. */
8677 static int
flow_dependent_p(rtx insn,rtx dep_insn)8678 flow_dependent_p (rtx insn, rtx dep_insn)
8679 {
8680 rtx tmp = PATTERN (insn);
8681
8682 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8683 return tmp == NULL_RTX;
8684 }
8685
8686 /* A helper function for flow_dependent_p called through note_stores. */
8687 static void
flow_dependent_p_1(rtx x,rtx pat ATTRIBUTE_UNUSED,void * data)8688 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8689 {
8690 rtx * pinsn = (rtx *) data;
8691
8692 if (*pinsn && reg_referenced_p (x, *pinsn))
8693 *pinsn = NULL_RTX;
8694 }
8695
8696 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8697 'special function' patterns (type sfunc) that clobber pr, but that
8698 do not look like function calls to leaf_function_p. Hence we must
8699 do this extra check. */
8700 static int
sh_pr_n_sets(void)8701 sh_pr_n_sets (void)
8702 {
8703 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8704 }
8705
8706 /* Return where to allocate pseudo for a given hard register initial
8707 value. */
8708 static rtx
sh_allocate_initial_value(rtx hard_reg)8709 sh_allocate_initial_value (rtx hard_reg)
8710 {
8711 rtx x;
8712
8713 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8714 {
8715 if (current_function_is_leaf
8716 && ! sh_pr_n_sets ()
8717 && ! (TARGET_SHCOMPACT
8718 && ((current_function_args_info.call_cookie
8719 & ~ CALL_COOKIE_RET_TRAMP (1))
8720 || current_function_has_nonlocal_label)))
8721 x = hard_reg;
8722 else
8723 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8724 }
8725 else
8726 x = NULL_RTX;
8727
8728 return x;
8729 }
8730
8731 /* This function returns "2" to indicate dual issue for the SH4
8732 processor. To be used by the DFA pipeline description. */
8733 static int
sh_issue_rate(void)8734 sh_issue_rate (void)
8735 {
8736 if (TARGET_SUPERSCALAR)
8737 return 2;
8738 else
8739 return 1;
8740 }
8741
8742 /* Functions for ready queue reordering for sched1. */
8743
8744 /* Get weight for mode for a set x. */
8745 static short
find_set_regmode_weight(rtx x,enum machine_mode mode)8746 find_set_regmode_weight (rtx x, enum machine_mode mode)
8747 {
8748 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8749 return 1;
8750 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8751 {
8752 if (GET_CODE (SET_DEST (x)) == REG)
8753 {
8754 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8755 return 1;
8756 else
8757 return 0;
8758 }
8759 return 1;
8760 }
8761 return 0;
8762 }
8763
8764 /* Get regmode weight for insn. */
8765 static short
find_insn_regmode_weight(rtx insn,enum machine_mode mode)8766 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8767 {
8768 short reg_weight = 0;
8769 rtx x;
8770
8771 /* Increment weight for each register born here. */
8772 x = PATTERN (insn);
8773 reg_weight += find_set_regmode_weight (x, mode);
8774 if (GET_CODE (x) == PARALLEL)
8775 {
8776 int j;
8777 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8778 {
8779 x = XVECEXP (PATTERN (insn), 0, j);
8780 reg_weight += find_set_regmode_weight (x, mode);
8781 }
8782 }
8783 /* Decrement weight for each register that dies here. */
8784 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8785 {
8786 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8787 {
8788 rtx note = XEXP (x, 0);
8789 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8790 reg_weight--;
8791 }
8792 }
8793 return reg_weight;
8794 }
8795
8796 /* Calculate regmode weights for all insns of a basic block. */
8797 static void
find_regmode_weight(basic_block b,enum machine_mode mode)8798 find_regmode_weight (basic_block b, enum machine_mode mode)
8799 {
8800 rtx insn, next_tail, head, tail;
8801
8802 get_ebb_head_tail (b, b, &head, &tail);
8803 next_tail = NEXT_INSN (tail);
8804
8805 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8806 {
8807 /* Handle register life information. */
8808 if (!INSN_P (insn))
8809 continue;
8810
8811 if (mode == SFmode)
8812 INSN_REGMODE_WEIGHT (insn, mode) =
8813 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8814 else if (mode == SImode)
8815 INSN_REGMODE_WEIGHT (insn, mode) =
8816 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8817 }
8818 }
8819
8820 /* Comparison function for ready queue sorting. */
8821 static int
rank_for_reorder(const void * x,const void * y)8822 rank_for_reorder (const void *x, const void *y)
8823 {
8824 rtx tmp = *(const rtx *) y;
8825 rtx tmp2 = *(const rtx *) x;
8826
8827 /* The insn in a schedule group should be issued the first. */
8828 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8829 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8830
8831 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8832 minimizes instruction movement, thus minimizing sched's effect on
8833 register pressure. */
8834 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8835 }
8836
8837 /* Resort the array A in which only element at index N may be out of order. */
8838 static void
swap_reorder(rtx * a,int n)8839 swap_reorder (rtx *a, int n)
8840 {
8841 rtx insn = a[n - 1];
8842 int i = n - 2;
8843
8844 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8845 {
8846 a[i + 1] = a[i];
8847 i -= 1;
8848 }
8849 a[i + 1] = insn;
8850 }
8851
8852 #define SCHED_REORDER(READY, N_READY) \
8853 do \
8854 { \
8855 if ((N_READY) == 2) \
8856 swap_reorder (READY, N_READY); \
8857 else if ((N_READY) > 2) \
8858 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8859 } \
8860 while (0)
8861
8862 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8863 macro. */
8864 static void
ready_reorder(rtx * ready,int nready)8865 ready_reorder (rtx *ready, int nready)
8866 {
8867 SCHED_REORDER (ready, nready);
8868 }
8869
8870 /* Calculate regmode weights for all insns of all basic block. */
8871 static void
sh_md_init_global(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int old_max_uid)8872 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8873 int verbose ATTRIBUTE_UNUSED,
8874 int old_max_uid)
8875 {
8876 basic_block b;
8877
8878 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8879 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8880
8881 FOR_EACH_BB_REVERSE (b)
8882 {
8883 find_regmode_weight (b, SImode);
8884 find_regmode_weight (b, SFmode);
8885 }
8886
8887 CURR_REGMODE_PRESSURE (SImode) = 0;
8888 CURR_REGMODE_PRESSURE (SFmode) = 0;
8889
8890 }
8891
8892 /* Cleanup. */
8893 static void
sh_md_finish_global(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED)8894 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8895 int verbose ATTRIBUTE_UNUSED)
8896 {
8897 if (regmode_weight[0])
8898 {
8899 free (regmode_weight[0]);
8900 regmode_weight[0] = NULL;
8901 }
8902 if (regmode_weight[1])
8903 {
8904 free (regmode_weight[1]);
8905 regmode_weight[1] = NULL;
8906 }
8907 }
8908
8909 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8910 keep count of register pressures on SImode and SFmode. */
8911 static int
sh_variable_issue(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx insn,int can_issue_more)8912 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8913 int sched_verbose ATTRIBUTE_UNUSED,
8914 rtx insn,
8915 int can_issue_more)
8916 {
8917 if (GET_CODE (PATTERN (insn)) != USE
8918 && GET_CODE (PATTERN (insn)) != CLOBBER)
8919 cached_can_issue_more = can_issue_more - 1;
8920 else
8921 cached_can_issue_more = can_issue_more;
8922
8923 if (reload_completed)
8924 return cached_can_issue_more;
8925
8926 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8927 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8928
8929 return cached_can_issue_more;
8930 }
8931
8932 static void
sh_md_init(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int veclen ATTRIBUTE_UNUSED)8933 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8934 int verbose ATTRIBUTE_UNUSED,
8935 int veclen ATTRIBUTE_UNUSED)
8936 {
8937 CURR_REGMODE_PRESSURE (SImode) = 0;
8938 CURR_REGMODE_PRESSURE (SFmode) = 0;
8939 }
8940
8941 /* Some magic numbers. */
8942 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8943 functions that already have high pressure on r0. */
8944 #define R0_MAX_LIFE_REGIONS 2
8945 #define R0_MAX_LIVE_LENGTH 12
8946 /* Register Pressure thresholds for SImode and SFmode registers. */
8947 #define SIMODE_MAX_WEIGHT 5
8948 #define SFMODE_MAX_WEIGHT 10
8949
8950 /* Return true if the pressure is high for MODE. */
8951 static short
high_pressure(enum machine_mode mode)8952 high_pressure (enum machine_mode mode)
8953 {
8954 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8955 functions that already have high pressure on r0. */
8956 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8957 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8958 return 1;
8959
8960 if (mode == SFmode)
8961 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8962 else
8963 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8964 }
8965
8966 /* Reorder ready queue if register pressure is high. */
8967 static int
sh_reorder(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx * ready,int * n_readyp,int clock_var ATTRIBUTE_UNUSED)8968 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8969 int sched_verbose ATTRIBUTE_UNUSED,
8970 rtx *ready,
8971 int *n_readyp,
8972 int clock_var ATTRIBUTE_UNUSED)
8973 {
8974 if (reload_completed)
8975 return sh_issue_rate ();
8976
8977 if (high_pressure (SFmode) || high_pressure (SImode))
8978 {
8979 ready_reorder (ready, *n_readyp);
8980 }
8981
8982 return sh_issue_rate ();
8983 }
8984
8985 /* Skip cycles if the current register pressure is high. */
8986 static int
sh_reorder2(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx * ready ATTRIBUTE_UNUSED,int * n_readyp ATTRIBUTE_UNUSED,int clock_var ATTRIBUTE_UNUSED)8987 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8988 int sched_verbose ATTRIBUTE_UNUSED,
8989 rtx *ready ATTRIBUTE_UNUSED,
8990 int *n_readyp ATTRIBUTE_UNUSED,
8991 int clock_var ATTRIBUTE_UNUSED)
8992 {
8993 if (reload_completed)
8994 return cached_can_issue_more;
8995
8996 if (high_pressure(SFmode) || high_pressure (SImode))
8997 skip_cycles = 1;
8998
8999 return cached_can_issue_more;
9000 }
9001
9002 /* Skip cycles without sorting the ready queue. This will move insn from
9003 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9004 queue by sh_reorder. */
9005
9006 /* Generally, skipping these many cycles are sufficient for all insns to move
9007 from Q -> R. */
9008 #define MAX_SKIPS 8
9009
9010 static int
sh_dfa_new_cycle(FILE * sched_dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx insn ATTRIBUTE_UNUSED,int last_clock_var,int clock_var,int * sort_p)9011 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9012 int sched_verbose ATTRIBUTE_UNUSED,
9013 rtx insn ATTRIBUTE_UNUSED,
9014 int last_clock_var,
9015 int clock_var,
9016 int *sort_p)
9017 {
9018 if (reload_completed)
9019 return 0;
9020
9021 if (skip_cycles)
9022 {
9023 if ((clock_var - last_clock_var) < MAX_SKIPS)
9024 {
9025 *sort_p = 0;
9026 return 1;
9027 }
9028 /* If this is the last cycle we are skipping, allow reordering of R. */
9029 if ((clock_var - last_clock_var) == MAX_SKIPS)
9030 {
9031 *sort_p = 1;
9032 return 1;
9033 }
9034 }
9035
9036 skip_cycles = 0;
9037
9038 return 0;
9039 }
9040
9041 /* SHmedia requires registers for branches, so we can't generate new
9042 branches past reload. */
9043 static bool
sh_cannot_modify_jumps_p(void)9044 sh_cannot_modify_jumps_p (void)
9045 {
9046 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9047 }
9048
9049 static int
sh_target_reg_class(void)9050 sh_target_reg_class (void)
9051 {
9052 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9053 }
9054
9055 static bool
sh_optimize_target_register_callee_saved(bool after_prologue_epilogue_gen)9056 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9057 {
9058 HARD_REG_SET dummy;
9059 rtx insn;
9060
9061 if (! shmedia_space_reserved_for_target_registers)
9062 return 0;
9063 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9064 return 0;
9065 if (calc_live_regs (&dummy) >= 6 * 8)
9066 return 1;
9067 /* This is a borderline case. See if we got a nested loop, or a loop
9068 with a call, or with more than 4 labels inside. */
9069 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
9070 {
9071 if (GET_CODE (insn) == NOTE
9072 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9073 {
9074 int labels = 0;
9075
9076 do
9077 {
9078 insn = NEXT_INSN (insn);
9079 if ((GET_CODE (insn) == NOTE
9080 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9081 || GET_CODE (insn) == CALL_INSN
9082 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
9083 return 1;
9084 }
9085 while (GET_CODE (insn) != NOTE
9086 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
9087 }
9088 }
9089 return 0;
9090 }
9091
9092 static bool
sh_ms_bitfield_layout_p(tree record_type ATTRIBUTE_UNUSED)9093 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9094 {
9095 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9096 }
9097
9098 /*
9099 On the SH1..SH4, the trampoline looks like
9100 2 0002 D202 mov.l l2,r2
9101 1 0000 D301 mov.l l1,r3
9102 3 0004 422B jmp @r2
9103 4 0006 0009 nop
9104 5 0008 00000000 l1: .long area
9105 6 000c 00000000 l2: .long function
9106
9107 SH5 (compact) uses r1 instead of r3 for the static chain. */
9108
9109
9110 /* Emit RTL insns to initialize the variable parts of a trampoline.
9111 FNADDR is an RTX for the address of the function's pure code.
9112 CXT is an RTX for the static chain value for the function. */
9113
9114 void
sh_initialize_trampoline(rtx tramp,rtx fnaddr,rtx cxt)9115 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9116 {
9117 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9118
9119 if (TARGET_SHMEDIA64)
9120 {
9121 rtx tramp_templ;
9122 int fixed_len;
9123
9124 rtx movi1 = GEN_INT (0xcc000010);
9125 rtx shori1 = GEN_INT (0xc8000010);
9126 rtx src, dst;
9127
9128 /* The following trampoline works within a +- 128 KB range for cxt:
9129 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9130 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9131 gettr tr1,r1; blink tr0,r63 */
9132 /* Address rounding makes it hard to compute the exact bounds of the
9133 offset for this trampoline, but we have a rather generous offset
9134 range, so frame_offset should do fine as an upper bound. */
9135 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9136 {
9137 /* ??? could optimize this trampoline initialization
9138 by writing DImode words with two insns each. */
9139 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9140 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9141 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9142 insn = gen_rtx_AND (DImode, insn, mask);
9143 /* Or in ptb/u .,tr1 pattern */
9144 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9145 insn = force_operand (insn, NULL_RTX);
9146 insn = gen_lowpart (SImode, insn);
9147 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9148 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9149 insn = gen_rtx_AND (DImode, insn, mask);
9150 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9151 insn = gen_lowpart (SImode, insn);
9152 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9153 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9154 insn = gen_rtx_AND (DImode, insn, mask);
9155 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9156 insn = gen_lowpart (SImode, insn);
9157 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9158 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9159 insn = gen_rtx_AND (DImode, insn, mask);
9160 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9161 insn = gen_lowpart (SImode, insn);
9162 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9163 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9164 insn = gen_rtx_AND (DImode, insn, mask);
9165 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9166 insn = gen_lowpart (SImode, insn);
9167 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9168 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9169 GEN_INT (0x6bf10600));
9170 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9171 GEN_INT (0x4415fc10));
9172 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9173 GEN_INT (0x4401fff0));
9174 emit_insn (gen_ic_invalidate_line (tramp));
9175 return;
9176 }
9177 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9178 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9179
9180 tramp_templ = gen_datalabel_ref (tramp_templ);
9181 dst = tramp_mem;
9182 src = gen_const_mem (BLKmode, tramp_templ);
9183 set_mem_align (dst, 256);
9184 set_mem_align (src, 64);
9185 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9186
9187 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9188 emit_move_insn (adjust_address (tramp_mem, Pmode,
9189 fixed_len + GET_MODE_SIZE (Pmode)),
9190 cxt);
9191 emit_insn (gen_ic_invalidate_line (tramp));
9192 return;
9193 }
9194 else if (TARGET_SHMEDIA)
9195 {
9196 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9197 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9198 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9199 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9200 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9201 rotated 10 right, and higher 16 bit of every 32 selected. */
9202 rtx movishori
9203 = force_reg (V2HImode, (simplify_gen_subreg
9204 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9205 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9206 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9207
9208 tramp = force_reg (Pmode, tramp);
9209 fnaddr = force_reg (SImode, fnaddr);
9210 cxt = force_reg (SImode, cxt);
9211 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9212 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9213 movishori));
9214 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9215 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9216 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9217 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9218 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9219 gen_rtx_SUBREG (V2HImode, cxt, 0),
9220 movishori));
9221 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9222 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9223 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9224 if (TARGET_LITTLE_ENDIAN)
9225 {
9226 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9227 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9228 }
9229 else
9230 {
9231 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9232 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9233 }
9234 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9235 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9236 emit_insn (gen_ic_invalidate_line (tramp));
9237 return;
9238 }
9239 else if (TARGET_SHCOMPACT)
9240 {
9241 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9242 return;
9243 }
9244 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9245 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9246 SImode));
9247 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9248 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9249 SImode));
9250 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9251 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9252 if (TARGET_HARVARD)
9253 {
9254 if (TARGET_USERMODE)
9255 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9256 FUNCTION_ORDINARY),
9257 0, VOIDmode, 1, tramp, SImode);
9258 else
9259 emit_insn (gen_ic_invalidate_line (tramp));
9260 }
9261 }
9262
9263 /* FIXME: This is overly conservative. A SHcompact function that
9264 receives arguments ``by reference'' will have them stored in its
9265 own stack frame, so it must not pass pointers or references to
9266 these arguments to other functions by means of sibling calls. */
9267 /* If PIC, we cannot make sibling calls to global functions
9268 because the PLT requires r12 to be live. */
9269 static bool
sh_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)9270 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9271 {
9272 return (1
9273 && (! TARGET_SHCOMPACT
9274 || current_function_args_info.stack_regs == 0)
9275 && ! sh_cfun_interrupt_handler_p ()
9276 && (! flag_pic
9277 || (decl && ! TREE_PUBLIC (decl))
9278 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9279 }
9280
9281 /* Machine specific built-in functions. */
9282
9283 struct builtin_description
9284 {
9285 const enum insn_code icode;
9286 const char *const name;
9287 int signature;
9288 };
9289
9290 /* describe number and signedness of arguments; arg[0] == result
9291 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9292 /* 9: 64 bit pointer, 10: 32 bit pointer */
9293 static const char signature_args[][4] =
9294 {
9295 #define SH_BLTIN_V2SI2 0
9296 { 4, 4 },
9297 #define SH_BLTIN_V4HI2 1
9298 { 4, 4 },
9299 #define SH_BLTIN_V2SI3 2
9300 { 4, 4, 4 },
9301 #define SH_BLTIN_V4HI3 3
9302 { 4, 4, 4 },
9303 #define SH_BLTIN_V8QI3 4
9304 { 4, 4, 4 },
9305 #define SH_BLTIN_MAC_HISI 5
9306 { 1, 4, 4, 1 },
9307 #define SH_BLTIN_SH_HI 6
9308 { 4, 4, 1 },
9309 #define SH_BLTIN_SH_SI 7
9310 { 4, 4, 1 },
9311 #define SH_BLTIN_V4HI2V2SI 8
9312 { 4, 4, 4 },
9313 #define SH_BLTIN_V4HI2V8QI 9
9314 { 4, 4, 4 },
9315 #define SH_BLTIN_SISF 10
9316 { 4, 2 },
9317 #define SH_BLTIN_LDUA_L 11
9318 { 2, 10 },
9319 #define SH_BLTIN_LDUA_Q 12
9320 { 1, 10 },
9321 #define SH_BLTIN_STUA_L 13
9322 { 0, 10, 2 },
9323 #define SH_BLTIN_STUA_Q 14
9324 { 0, 10, 1 },
9325 #define SH_BLTIN_LDUA_L64 15
9326 { 2, 9 },
9327 #define SH_BLTIN_LDUA_Q64 16
9328 { 1, 9 },
9329 #define SH_BLTIN_STUA_L64 17
9330 { 0, 9, 2 },
9331 #define SH_BLTIN_STUA_Q64 18
9332 { 0, 9, 1 },
9333 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9334 #define SH_BLTIN_2 19
9335 #define SH_BLTIN_SU 19
9336 { 1, 2 },
9337 #define SH_BLTIN_3 20
9338 #define SH_BLTIN_SUS 20
9339 { 2, 2, 1 },
9340 #define SH_BLTIN_PSSV 21
9341 { 0, 8, 2, 2 },
9342 #define SH_BLTIN_XXUU 22
9343 #define SH_BLTIN_UUUU 22
9344 { 1, 1, 1, 1 },
9345 #define SH_BLTIN_PV 23
9346 { 0, 8 },
9347 };
9348 /* mcmv: operands considered unsigned. */
9349 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9350 /* mperm: control value considered unsigned int. */
9351 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9352 /* mshards_q: returns signed short. */
9353 /* nsb: takes long long arg, returns unsigned char. */
9354 static const struct builtin_description bdesc[] =
9355 {
9356 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9357 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9358 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9359 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9360 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9361 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9362 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9363 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9364 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9365 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9366 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9367 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9368 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9369 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9370 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9371 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9372 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9373 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9374 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9375 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9376 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9377 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9378 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9379 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9380 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9381 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9382 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9383 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9384 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9385 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9386 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9387 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9388 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9389 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9390 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9391 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9392 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9393 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9394 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9395 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9396 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9397 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9398 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9399 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9400 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9401 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9402 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9403 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9404 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9405 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9406 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9407 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9408 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9409 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9410 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9411 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9412 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9413 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9414 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9415 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9416 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9417 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9418 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9419 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9420 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9421 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9422 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9423 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9424 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9425 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9426 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9427 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9428 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9429 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9430 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9431 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9432 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9433 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9434 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9435 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9436 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9437 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9438 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9439 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9440 };
9441
9442 static void
sh_media_init_builtins(void)9443 sh_media_init_builtins (void)
9444 {
9445 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9446 const struct builtin_description *d;
9447
9448 memset (shared, 0, sizeof shared);
9449 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9450 {
9451 tree type, arg_type = 0;
9452 int signature = d->signature;
9453 int i;
9454
9455 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9456 type = shared[signature];
9457 else
9458 {
9459 int has_result = signature_args[signature][0] != 0;
9460
9461 if ((signature_args[signature][1] & 8)
9462 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9463 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9464 continue;
9465 if (! TARGET_FPU_ANY
9466 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9467 continue;
9468 type = void_list_node;
9469 for (i = 3; ; i--)
9470 {
9471 int arg = signature_args[signature][i];
9472 int opno = i - 1 + has_result;
9473
9474 if (arg & 8)
9475 arg_type = ptr_type_node;
9476 else if (arg)
9477 arg_type = (*lang_hooks.types.type_for_mode)
9478 (insn_data[d->icode].operand[opno].mode,
9479 (arg & 1));
9480 else if (i)
9481 continue;
9482 else
9483 arg_type = void_type_node;
9484 if (i == 0)
9485 break;
9486 type = tree_cons (NULL_TREE, arg_type, type);
9487 }
9488 type = build_function_type (arg_type, type);
9489 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9490 shared[signature] = type;
9491 }
9492 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9493 NULL, NULL_TREE);
9494 }
9495 }
9496
9497 /* Implements target hook vector_mode_supported_p. */
9498 bool
sh_vector_mode_supported_p(enum machine_mode mode)9499 sh_vector_mode_supported_p (enum machine_mode mode)
9500 {
9501 if (TARGET_FPU_ANY
9502 && ((mode == V2SFmode)
9503 || (mode == V4SFmode)
9504 || (mode == V16SFmode)))
9505 return true;
9506
9507 else if (TARGET_SHMEDIA
9508 && ((mode == V8QImode)
9509 || (mode == V2HImode)
9510 || (mode == V4HImode)
9511 || (mode == V2SImode)))
9512 return true;
9513
9514 return false;
9515 }
9516
9517 /* Implements target hook dwarf_calling_convention. Return an enum
9518 of dwarf_calling_convention. */
9519 int
sh_dwarf_calling_convention(tree func)9520 sh_dwarf_calling_convention (tree func)
9521 {
9522 if (sh_attr_renesas_p (func))
9523 return DW_CC_GNU_renesas_sh;
9524
9525 return DW_CC_normal;
9526 }
9527
9528 static void
sh_init_builtins(void)9529 sh_init_builtins (void)
9530 {
9531 if (TARGET_SHMEDIA)
9532 sh_media_init_builtins ();
9533 }
9534
9535 /* Expand an expression EXP that calls a built-in function,
9536 with result going to TARGET if that's convenient
9537 (and in mode MODE if that's convenient).
9538 SUBTARGET may be used as the target for computing one of EXP's operands.
9539 IGNORE is nonzero if the value is to be ignored. */
9540
9541 static rtx
sh_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,int ignore)9542 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9543 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9544 {
9545 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9546 tree arglist = TREE_OPERAND (exp, 1);
9547 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9548 const struct builtin_description *d = &bdesc[fcode];
9549 enum insn_code icode = d->icode;
9550 int signature = d->signature;
9551 enum machine_mode tmode = VOIDmode;
9552 int nop = 0, i;
9553 rtx op[4];
9554 rtx pat = 0;
9555
9556 if (signature_args[signature][0])
9557 {
9558 if (ignore)
9559 return 0;
9560
9561 tmode = insn_data[icode].operand[0].mode;
9562 if (! target
9563 || GET_MODE (target) != tmode
9564 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9565 target = gen_reg_rtx (tmode);
9566 op[nop++] = target;
9567 }
9568 else
9569 target = 0;
9570
9571 for (i = 1; i <= 3; i++, nop++)
9572 {
9573 tree arg;
9574 enum machine_mode opmode, argmode;
9575 tree optype;
9576
9577 if (! signature_args[signature][i])
9578 break;
9579 arg = TREE_VALUE (arglist);
9580 if (arg == error_mark_node)
9581 return const0_rtx;
9582 arglist = TREE_CHAIN (arglist);
9583 if (signature_args[signature][i] & 8)
9584 {
9585 opmode = ptr_mode;
9586 optype = ptr_type_node;
9587 }
9588 else
9589 {
9590 opmode = insn_data[icode].operand[nop].mode;
9591 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9592 }
9593 argmode = TYPE_MODE (TREE_TYPE (arg));
9594 if (argmode != opmode)
9595 arg = build1 (NOP_EXPR, optype, arg);
9596 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9597 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9598 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9599 }
9600
9601 switch (nop)
9602 {
9603 case 1:
9604 pat = (*insn_data[d->icode].genfun) (op[0]);
9605 break;
9606 case 2:
9607 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9608 break;
9609 case 3:
9610 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9611 break;
9612 case 4:
9613 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9614 break;
9615 default:
9616 gcc_unreachable ();
9617 }
9618 if (! pat)
9619 return 0;
9620 emit_insn (pat);
9621 return target;
9622 }
9623
9624 void
sh_expand_unop_v2sf(enum rtx_code code,rtx op0,rtx op1)9625 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9626 {
9627 rtx sel0 = const0_rtx;
9628 rtx sel1 = const1_rtx;
9629 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9630 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9631
9632 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9633 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9634 }
9635
9636 void
sh_expand_binop_v2sf(enum rtx_code code,rtx op0,rtx op1,rtx op2)9637 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9638 {
9639 rtx sel0 = const0_rtx;
9640 rtx sel1 = const1_rtx;
9641 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9642 = gen_binary_sf_op;
9643 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9644
9645 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9646 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9647 }
9648
9649 /* Return the class of registers for which a mode change from FROM to TO
9650 is invalid. */
9651 bool
sh_cannot_change_mode_class(enum machine_mode from,enum machine_mode to,enum reg_class class)9652 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9653 enum reg_class class)
9654 {
9655 /* We want to enable the use of SUBREGs as a means to
9656 VEC_SELECT a single element of a vector. */
9657 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9658 return (reg_classes_intersect_p (GENERAL_REGS, class));
9659
9660 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9661 {
9662 if (TARGET_LITTLE_ENDIAN)
9663 {
9664 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9665 return reg_classes_intersect_p (DF_REGS, class);
9666 }
9667 else
9668 {
9669 if (GET_MODE_SIZE (from) < 8)
9670 return reg_classes_intersect_p (DF_HI_REGS, class);
9671 }
9672 }
9673 return 0;
9674 }
9675
9676
9677 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9678 that label is used. */
9679
9680 void
sh_mark_label(rtx address,int nuses)9681 sh_mark_label (rtx address, int nuses)
9682 {
9683 if (GOTOFF_P (address))
9684 {
9685 /* Extract the label or symbol. */
9686 address = XEXP (address, 0);
9687 if (GET_CODE (address) == PLUS)
9688 address = XEXP (address, 0);
9689 address = XVECEXP (address, 0, 0);
9690 }
9691 if (GET_CODE (address) == LABEL_REF
9692 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9693 LABEL_NUSES (XEXP (address, 0)) += nuses;
9694 }
9695
9696 /* Compute extra cost of moving data between one register class
9697 and another. */
9698
9699 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9700 uses this information. Hence, the general register <-> floating point
9701 register information here is not used for SFmode. */
9702
9703 int
sh_register_move_cost(enum machine_mode mode,enum reg_class srcclass,enum reg_class dstclass)9704 sh_register_move_cost (enum machine_mode mode,
9705 enum reg_class srcclass, enum reg_class dstclass)
9706 {
9707 if (dstclass == T_REGS || dstclass == PR_REGS)
9708 return 10;
9709
9710 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9711 return 4;
9712
9713 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9714 && REGCLASS_HAS_FP_REG (srcclass)
9715 && REGCLASS_HAS_FP_REG (dstclass))
9716 return 4;
9717
9718 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9719 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9720
9721 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9722 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9723 return 9;
9724
9725 if ((REGCLASS_HAS_FP_REG (dstclass)
9726 && REGCLASS_HAS_GENERAL_REG (srcclass))
9727 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9728 && REGCLASS_HAS_FP_REG (srcclass)))
9729 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9730 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9731
9732 if ((dstclass == FPUL_REGS
9733 && REGCLASS_HAS_GENERAL_REG (srcclass))
9734 || (srcclass == FPUL_REGS
9735 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9736 return 5;
9737
9738 if ((dstclass == FPUL_REGS
9739 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9740 || (srcclass == FPUL_REGS
9741 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9742 return 7;
9743
9744 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9745 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9746 return 20;
9747
9748 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9749 if (TARGET_SHMEDIA
9750 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
9751 {
9752 if (sh_gettrcost >= 0)
9753 return sh_gettrcost;
9754 else if (!TARGET_PT_FIXED)
9755 return 100;
9756 }
9757
9758 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9759 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9760 return 4;
9761
9762 if (TARGET_SHMEDIA
9763 || (TARGET_FMOVD
9764 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9765 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9766 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9767
9768 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9769 }
9770
9771 static rtx emit_load_ptr (rtx, rtx);
9772
9773 static rtx
emit_load_ptr(rtx reg,rtx addr)9774 emit_load_ptr (rtx reg, rtx addr)
9775 {
9776 rtx mem = gen_const_mem (ptr_mode, addr);
9777
9778 if (Pmode != ptr_mode)
9779 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9780 return emit_move_insn (reg, mem);
9781 }
9782
9783 static void
sh_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)9784 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9785 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9786 tree function)
9787 {
9788 CUMULATIVE_ARGS cum;
9789 int structure_value_byref = 0;
9790 rtx this, this_value, sibcall, insns, funexp;
9791 tree funtype = TREE_TYPE (function);
9792 int simple_add = CONST_OK_FOR_ADD (delta);
9793 int did_load = 0;
9794 rtx scratch0, scratch1, scratch2;
9795 unsigned i;
9796
9797 reload_completed = 1;
9798 epilogue_completed = 1;
9799 no_new_pseudos = 1;
9800 current_function_uses_only_leaf_regs = 1;
9801 reset_block_changes ();
9802
9803 emit_note (NOTE_INSN_PROLOGUE_END);
9804
9805 /* Find the "this" pointer. We have such a wide range of ABIs for the
9806 SH that it's best to do this completely machine independently.
9807 "this" is passed as first argument, unless a structure return pointer
9808 comes first, in which case "this" comes second. */
9809 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9810 #ifndef PCC_STATIC_STRUCT_RETURN
9811 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9812 structure_value_byref = 1;
9813 #endif /* not PCC_STATIC_STRUCT_RETURN */
9814 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9815 {
9816 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9817
9818 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9819 }
9820 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9821
9822 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9823 static chain pointer (even if you can't have nested virtual functions
9824 right now, someone might implement them sometime), and the rest of the
9825 registers are used for argument passing, are callee-saved, or reserved. */
9826 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9827 -ffixed-reg has been used. */
9828 if (! call_used_regs[0] || fixed_regs[0])
9829 error ("r0 needs to be available as a call-clobbered register");
9830 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9831 if (! TARGET_SH5)
9832 {
9833 if (call_used_regs[1] && ! fixed_regs[1])
9834 scratch1 = gen_rtx_REG (ptr_mode, 1);
9835 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9836 pointing where to return struct values. */
9837 if (call_used_regs[3] && ! fixed_regs[3])
9838 scratch2 = gen_rtx_REG (Pmode, 3);
9839 }
9840 else if (TARGET_SHMEDIA)
9841 {
9842 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
9843 if (i != REGNO (scratch0) &&
9844 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
9845 {
9846 scratch1 = gen_rtx_REG (ptr_mode, i);
9847 break;
9848 }
9849 if (scratch1 == scratch0)
9850 error ("Need a second call-clobbered general purpose register");
9851 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
9852 if (call_used_regs[i] && ! fixed_regs[i])
9853 {
9854 scratch2 = gen_rtx_REG (Pmode, i);
9855 break;
9856 }
9857 if (scratch2 == scratch0)
9858 error ("Need a call-clobbered target register");
9859 }
9860
9861 this_value = plus_constant (this, delta);
9862 if (vcall_offset
9863 && (simple_add || scratch0 != scratch1)
9864 && strict_memory_address_p (ptr_mode, this_value))
9865 {
9866 emit_load_ptr (scratch0, this_value);
9867 did_load = 1;
9868 }
9869
9870 if (!delta)
9871 ; /* Do nothing. */
9872 else if (simple_add)
9873 emit_move_insn (this, this_value);
9874 else
9875 {
9876 emit_move_insn (scratch1, GEN_INT (delta));
9877 emit_insn (gen_add2_insn (this, scratch1));
9878 }
9879
9880 if (vcall_offset)
9881 {
9882 rtx offset_addr;
9883
9884 if (!did_load)
9885 emit_load_ptr (scratch0, this);
9886
9887 offset_addr = plus_constant (scratch0, vcall_offset);
9888 if (strict_memory_address_p (ptr_mode, offset_addr))
9889 ; /* Do nothing. */
9890 else if (! TARGET_SH5 && scratch0 != scratch1)
9891 {
9892 /* scratch0 != scratch1, and we have indexed loads. Get better
9893 schedule by loading the offset into r1 and using an indexed
9894 load - then the load of r1 can issue before the load from
9895 (this + delta) finishes. */
9896 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9897 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9898 }
9899 else if (CONST_OK_FOR_ADD (vcall_offset))
9900 {
9901 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9902 offset_addr = scratch0;
9903 }
9904 else if (scratch0 != scratch1)
9905 {
9906 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9907 emit_insn (gen_add2_insn (scratch0, scratch1));
9908 offset_addr = scratch0;
9909 }
9910 else
9911 gcc_unreachable (); /* FIXME */
9912 emit_load_ptr (scratch0, offset_addr);
9913
9914 if (Pmode != ptr_mode)
9915 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9916 emit_insn (gen_add2_insn (this, scratch0));
9917 }
9918
9919 /* Generate a tail call to the target function. */
9920 if (! TREE_USED (function))
9921 {
9922 assemble_external (function);
9923 TREE_USED (function) = 1;
9924 }
9925 funexp = XEXP (DECL_RTL (function), 0);
9926 /* If the function is overridden, so is the thunk, hence we don't
9927 need GOT addressing even if this is a public symbol. */
9928 #if 0
9929 if (TARGET_SH1 && ! flag_weak)
9930 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
9931 else
9932 #endif
9933 if (TARGET_SH2 && flag_pic)
9934 {
9935 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
9936 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
9937 }
9938 else
9939 {
9940 if (TARGET_SHMEDIA && flag_pic)
9941 {
9942 funexp = gen_sym2PIC (funexp);
9943 PUT_MODE (funexp, Pmode);
9944 }
9945 emit_move_insn (scratch2, funexp);
9946 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9947 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
9948 }
9949 sibcall = emit_call_insn (sibcall);
9950 SIBLING_CALL_P (sibcall) = 1;
9951 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9952 emit_barrier ();
9953
9954 /* Run just enough of rest_of_compilation to do scheduling and get
9955 the insns emitted. Note that use_thunk calls
9956 assemble_start_function and assemble_end_function. */
9957
9958 insn_locators_initialize ();
9959 insns = get_insns ();
9960
9961 if (optimize > 0)
9962 {
9963 /* Initialize the bitmap obstacks. */
9964 bitmap_obstack_initialize (NULL);
9965 bitmap_obstack_initialize (®_obstack);
9966 if (! cfun->cfg)
9967 init_flow ();
9968 rtl_register_cfg_hooks ();
9969 init_rtl_bb_info (ENTRY_BLOCK_PTR);
9970 init_rtl_bb_info (EXIT_BLOCK_PTR);
9971 ENTRY_BLOCK_PTR->flags |= BB_RTL;
9972 EXIT_BLOCK_PTR->flags |= BB_RTL;
9973 find_basic_blocks (insns);
9974
9975 if (flag_schedule_insns_after_reload)
9976 {
9977 life_analysis (PROP_FINAL);
9978
9979 split_all_insns (1);
9980
9981 schedule_insns ();
9982 }
9983 /* We must split jmp insn in PIC case. */
9984 else if (flag_pic)
9985 split_all_insns_noflow ();
9986 }
9987
9988 sh_reorg ();
9989
9990 if (optimize > 0 && flag_delayed_branch)
9991 dbr_schedule (insns);
9992
9993 shorten_branches (insns);
9994 final_start_function (insns, file, 1);
9995 final (insns, file, 1);
9996 final_end_function ();
9997
9998 if (optimize > 0)
9999 {
10000 /* Release all memory allocated by flow. */
10001 free_basic_block_vars ();
10002
10003 /* Release the bitmap obstacks. */
10004 bitmap_obstack_release (®_obstack);
10005 bitmap_obstack_release (NULL);
10006 }
10007
10008 reload_completed = 0;
10009 epilogue_completed = 0;
10010 no_new_pseudos = 0;
10011 }
10012
10013 rtx
function_symbol(rtx target,const char * name,enum sh_function_kind kind)10014 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10015 {
10016 rtx sym;
10017
10018 /* If this is not an ordinary function, the name usually comes from a
10019 string literal or an sprintf buffer. Make sure we use the same
10020 string consistently, so that cse will be able to unify address loads. */
10021 if (kind != FUNCTION_ORDINARY)
10022 name = IDENTIFIER_POINTER (get_identifier (name));
10023 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10024 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10025 if (flag_pic)
10026 switch (kind)
10027 {
10028 case FUNCTION_ORDINARY:
10029 break;
10030 case SFUNC_GOT:
10031 {
10032 rtx reg = target ? target : gen_reg_rtx (Pmode);
10033
10034 emit_insn (gen_symGOT2reg (reg, sym));
10035 sym = reg;
10036 break;
10037 }
10038 case SFUNC_STATIC:
10039 {
10040 /* ??? To allow cse to work, we use GOTOFF relocations.
10041 we could add combiner patterns to transform this into
10042 straight pc-relative calls with sym2PIC / bsrf when
10043 label load and function call are still 1:1 and in the
10044 same basic block during combine. */
10045 rtx reg = target ? target : gen_reg_rtx (Pmode);
10046
10047 emit_insn (gen_symGOTOFF2reg (reg, sym));
10048 sym = reg;
10049 break;
10050 }
10051 }
10052 if (target && sym != target)
10053 {
10054 emit_move_insn (target, sym);
10055 return target;
10056 }
10057 return sym;
10058 }
10059
10060 /* Find the number of a general purpose register in S. */
10061 static int
scavenge_reg(HARD_REG_SET * s)10062 scavenge_reg (HARD_REG_SET *s)
10063 {
10064 int r;
10065 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10066 if (TEST_HARD_REG_BIT (*s, r))
10067 return r;
10068 return -1;
10069 }
10070
10071 rtx
sh_get_pr_initial_val(void)10072 sh_get_pr_initial_val (void)
10073 {
10074 rtx val;
10075
10076 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10077 PR register on SHcompact, because it might be clobbered by the prologue.
10078 We check first if that is known to be the case. */
10079 if (TARGET_SHCOMPACT
10080 && ((current_function_args_info.call_cookie
10081 & ~ CALL_COOKIE_RET_TRAMP (1))
10082 || current_function_has_nonlocal_label))
10083 return gen_frame_mem (SImode, return_address_pointer_rtx);
10084
10085 /* If we haven't finished rtl generation, there might be a nonlocal label
10086 that we haven't seen yet.
10087 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
10088 is set, unless it has been called before for the same register. And even
10089 then, we end in trouble if we didn't use the register in the same
10090 basic block before. So call get_hard_reg_initial_val now and wrap it
10091 in an unspec if we might need to replace it. */
10092 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10093 combine can put the pseudo returned by get_hard_reg_initial_val into
10094 instructions that need a general purpose registers, which will fail to
10095 be recognized when the pseudo becomes allocated to PR. */
10096 val
10097 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10098 if (TARGET_SH1)
10099 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10100 return val;
10101 }
10102
10103 int
sh_expand_t_scc(enum rtx_code code,rtx target)10104 sh_expand_t_scc (enum rtx_code code, rtx target)
10105 {
10106 rtx result = target;
10107 HOST_WIDE_INT val;
10108
10109 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10110 || GET_CODE (sh_compare_op1) != CONST_INT)
10111 return 0;
10112 if (GET_CODE (result) != REG)
10113 result = gen_reg_rtx (SImode);
10114 val = INTVAL (sh_compare_op1);
10115 if ((code == EQ && val == 1) || (code == NE && val == 0))
10116 emit_insn (gen_movt (result));
10117 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10118 {
10119 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10120 emit_insn (gen_subc (result, result, result));
10121 emit_insn (gen_addsi3 (result, result, const1_rtx));
10122 }
10123 else if (code == EQ || code == NE)
10124 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10125 else
10126 return 0;
10127 if (result != target)
10128 emit_move_insn (target, result);
10129 return 1;
10130 }
10131
10132 /* INSN is an sfunc; return the rtx that describes the address used. */
10133 static rtx
extract_sfunc_addr(rtx insn)10134 extract_sfunc_addr (rtx insn)
10135 {
10136 rtx pattern, part = NULL_RTX;
10137 int len, i;
10138
10139 pattern = PATTERN (insn);
10140 len = XVECLEN (pattern, 0);
10141 for (i = 0; i < len; i++)
10142 {
10143 part = XVECEXP (pattern, 0, i);
10144 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10145 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10146 return XEXP (part, 0);
10147 }
10148 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10149 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10150 }
10151
10152 /* Verify that the register in use_sfunc_addr still agrees with the address
10153 used in the sfunc. This prevents fill_slots_from_thread from changing
10154 use_sfunc_addr.
10155 INSN is the use_sfunc_addr instruction, and REG is the register it
10156 guards. */
10157 int
check_use_sfunc_addr(rtx insn,rtx reg)10158 check_use_sfunc_addr (rtx insn, rtx reg)
10159 {
10160 /* Search for the sfunc. It should really come right after INSN. */
10161 while ((insn = NEXT_INSN (insn)))
10162 {
10163 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10164 break;
10165 if (! INSN_P (insn))
10166 continue;
10167
10168 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10169 insn = XVECEXP (PATTERN (insn), 0, 0);
10170 if (GET_CODE (PATTERN (insn)) != PARALLEL
10171 || get_attr_type (insn) != TYPE_SFUNC)
10172 continue;
10173 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10174 }
10175 gcc_unreachable ();
10176 }
10177
10178 /* This function returns a constant rtx that represents pi / 2**15 in
10179 SFmode. it's used to scale SFmode angles, in radians, to a
10180 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10181 maps to 0x10000). */
10182
10183 static GTY(()) rtx sh_fsca_sf2int_rtx;
10184
10185 rtx
sh_fsca_sf2int(void)10186 sh_fsca_sf2int (void)
10187 {
10188 if (! sh_fsca_sf2int_rtx)
10189 {
10190 REAL_VALUE_TYPE rv;
10191
10192 real_from_string (&rv, "10430.378350470453");
10193 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10194 }
10195
10196 return sh_fsca_sf2int_rtx;
10197 }
10198
10199 /* This function returns a constant rtx that represents pi / 2**15 in
10200 DFmode. it's used to scale DFmode angles, in radians, to a
10201 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10202 maps to 0x10000). */
10203
10204 static GTY(()) rtx sh_fsca_df2int_rtx;
10205
10206 rtx
sh_fsca_df2int(void)10207 sh_fsca_df2int (void)
10208 {
10209 if (! sh_fsca_df2int_rtx)
10210 {
10211 REAL_VALUE_TYPE rv;
10212
10213 real_from_string (&rv, "10430.378350470453");
10214 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10215 }
10216
10217 return sh_fsca_df2int_rtx;
10218 }
10219
10220 /* This function returns a constant rtx that represents 2**15 / pi in
10221 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10222 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10223 2*pi). */
10224
10225 static GTY(()) rtx sh_fsca_int2sf_rtx;
10226
10227 rtx
sh_fsca_int2sf(void)10228 sh_fsca_int2sf (void)
10229 {
10230 if (! sh_fsca_int2sf_rtx)
10231 {
10232 REAL_VALUE_TYPE rv;
10233
10234 real_from_string (&rv, "9.587379924285257e-5");
10235 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10236 }
10237
10238 return sh_fsca_int2sf_rtx;
10239 }
10240
10241 /* Initialize the CUMULATIVE_ARGS structure. */
10242
10243 void
sh_init_cumulative_args(CUMULATIVE_ARGS * pcum,tree fntype,rtx libname ATTRIBUTE_UNUSED,tree fndecl,signed int n_named_args,enum machine_mode mode)10244 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10245 tree fntype,
10246 rtx libname ATTRIBUTE_UNUSED,
10247 tree fndecl,
10248 signed int n_named_args,
10249 enum machine_mode mode)
10250 {
10251 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10252 pcum->free_single_fp_reg = 0;
10253 pcum->stack_regs = 0;
10254 pcum->byref_regs = 0;
10255 pcum->byref = 0;
10256 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10257
10258 /* XXX - Should we check TARGET_HITACHI here ??? */
10259 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10260
10261 if (fntype)
10262 {
10263 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10264 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10265 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10266 pcum->arg_count [(int) SH_ARG_INT]
10267 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10268
10269 pcum->call_cookie
10270 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10271 && pcum->arg_count [(int) SH_ARG_INT] == 0
10272 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10273 ? int_size_in_bytes (TREE_TYPE (fntype))
10274 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10275 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10276 == FIRST_RET_REG));
10277 }
10278 else
10279 {
10280 pcum->arg_count [(int) SH_ARG_INT] = 0;
10281 pcum->prototype_p = FALSE;
10282 if (mode != VOIDmode)
10283 {
10284 pcum->call_cookie =
10285 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10286 && GET_MODE_SIZE (mode) > 4
10287 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10288
10289 /* If the default ABI is the Renesas ABI then all library
10290 calls must assume that the library will be using the
10291 Renesas ABI. So if the function would return its result
10292 in memory then we must force the address of this memory
10293 block onto the stack. Ideally we would like to call
10294 targetm.calls.return_in_memory() here but we do not have
10295 the TYPE or the FNDECL available so we synthesize the
10296 contents of that function as best we can. */
10297 pcum->force_mem =
10298 (TARGET_DEFAULT & MASK_HITACHI)
10299 && (mode == BLKmode
10300 || (GET_MODE_SIZE (mode) > 4
10301 && !(mode == DFmode
10302 && TARGET_FPU_DOUBLE)));
10303 }
10304 else
10305 {
10306 pcum->call_cookie = 0;
10307 pcum->force_mem = FALSE;
10308 }
10309 }
10310 }
10311
10312 /* Determine if two hard register sets intersect.
10313 Return 1 if they do. */
10314
10315 static int
hard_regs_intersect_p(HARD_REG_SET * a,HARD_REG_SET * b)10316 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10317 {
10318 HARD_REG_SET c;
10319 COPY_HARD_REG_SET (c, *a);
10320 AND_HARD_REG_SET (c, *b);
10321 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10322 return 1;
10323 lose:
10324 return 0;
10325 }
10326
10327 #ifdef TARGET_ADJUST_UNROLL_MAX
10328 static int
sh_adjust_unroll_max(struct loop * loop,int insn_count,int max_unrolled_insns,int strength_reduce_p,int unroll_type)10329 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10330 int max_unrolled_insns, int strength_reduce_p,
10331 int unroll_type)
10332 {
10333 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10334 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10335 {
10336 /* Throttle back loop unrolling so that the costs of using more
10337 targets than the eight target register we have don't outweigh
10338 the benefits of unrolling. */
10339 rtx insn;
10340 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10341 int n_barriers = 0;
10342 rtx dest;
10343 int i;
10344 rtx exit_dest[8];
10345 int threshold;
10346 int unroll_benefit = 0, mem_latency = 0;
10347 int base_cost, best_cost, cost;
10348 int factor, best_factor;
10349 int n_dest;
10350 unsigned max_iterations = 32767;
10351 int n_iterations;
10352 int need_precond = 0, precond = 0;
10353 basic_block * bbs = get_loop_body (loop);
10354 struct niter_desc *desc;
10355
10356 /* Assume that all labels inside the loop are used from inside the
10357 loop. If the loop has multiple entry points, it is unlikely to
10358 be unrolled anyways.
10359 Also assume that all calls are to different functions. That is
10360 somewhat pessimistic, but if you have lots of calls, unrolling the
10361 loop is not likely to gain you much in the first place. */
10362 i = loop->num_nodes - 1;
10363 for (insn = BB_HEAD (bbs[i]); ; )
10364 {
10365 if (GET_CODE (insn) == CODE_LABEL)
10366 n_labels++;
10367 else if (GET_CODE (insn) == CALL_INSN)
10368 n_calls++;
10369 else if (GET_CODE (insn) == NOTE
10370 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10371 n_inner_loops++;
10372 else if (GET_CODE (insn) == BARRIER)
10373 n_barriers++;
10374 if (insn != BB_END (bbs[i]))
10375 insn = NEXT_INSN (insn);
10376 else if (--i >= 0)
10377 insn = BB_HEAD (bbs[i]);
10378 else
10379 break;
10380 }
10381 free (bbs);
10382 /* One label for the loop top is normal, and it won't be duplicated by
10383 unrolling. */
10384 if (n_labels <= 1)
10385 return max_unrolled_insns;
10386 if (n_inner_loops > 0)
10387 return 0;
10388 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10389 dest = LABEL_NEXTREF (dest))
10390 {
10391 for (i = n_exit_dest - 1;
10392 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10393 if (i < 0)
10394 exit_dest[n_exit_dest++] = dest;
10395 }
10396 /* If the loop top and call and exit destinations are enough to fill up
10397 the target registers, we're unlikely to do any more damage by
10398 unrolling. */
10399 if (n_calls + n_exit_dest >= 7)
10400 return max_unrolled_insns;
10401
10402 /* ??? In the new loop unroller, there is no longer any strength
10403 reduction information available. Thus, when it comes to unrolling,
10404 we know the cost of everything, but we know the value of nothing. */
10405 #if 0
10406 if (strength_reduce_p
10407 && (unroll_type == LPT_UNROLL_RUNTIME
10408 || unroll_type == LPT_UNROLL_CONSTANT
10409 || unroll_type == LPT_PEEL_COMPLETELY))
10410 {
10411 struct loop_ivs *ivs = LOOP_IVS (loop);
10412 struct iv_class *bl;
10413
10414 /* We'll save one compare-and-branch in each loop body copy
10415 but the last one. */
10416 unroll_benefit = 1;
10417 /* Assess the benefit of removing biv & giv updates. */
10418 for (bl = ivs->list; bl; bl = bl->next)
10419 {
10420 rtx increment = biv_total_increment (bl);
10421 struct induction *v;
10422
10423 if (increment && GET_CODE (increment) == CONST_INT)
10424 {
10425 unroll_benefit++;
10426 for (v = bl->giv; v; v = v->next_iv)
10427 {
10428 if (! v->ignore && v->same == 0
10429 && GET_CODE (v->mult_val) == CONST_INT)
10430 unroll_benefit++;
10431 /* If this giv uses an array, try to determine
10432 a maximum iteration count from the size of the
10433 array. This need not be correct all the time,
10434 but should not be too far off the mark too often. */
10435 while (v->giv_type == DEST_ADDR)
10436 {
10437 rtx mem = PATTERN (v->insn);
10438 tree mem_expr, type, size_tree;
10439
10440 if (GET_CODE (SET_SRC (mem)) == MEM)
10441 mem = SET_SRC (mem);
10442 else if (GET_CODE (SET_DEST (mem)) == MEM)
10443 mem = SET_DEST (mem);
10444 else
10445 break;
10446 mem_expr = MEM_EXPR (mem);
10447 if (! mem_expr)
10448 break;
10449 type = TREE_TYPE (mem_expr);
10450 if (TREE_CODE (type) != ARRAY_TYPE
10451 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10452 break;
10453 size_tree = fold_build2 (TRUNC_DIV_EXPR,
10454 bitsizetype,
10455 TYPE_SIZE (type),
10456 TYPE_SIZE_UNIT (type));
10457 if (TREE_CODE (size_tree) == INTEGER_CST
10458 && ! TREE_INT_CST_HIGH (size_tree)
10459 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10460 max_iterations = TREE_INT_CST_LOW (size_tree);
10461 break;
10462 }
10463 }
10464 }
10465 }
10466 }
10467 #else /* 0 */
10468 /* Assume there is at least some benefit. */
10469 unroll_benefit = 1;
10470 #endif /* 0 */
10471
10472 desc = get_simple_loop_desc (loop);
10473 n_iterations = desc->const_iter ? desc->niter : 0;
10474 max_iterations
10475 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10476
10477 if (! strength_reduce_p || ! n_iterations)
10478 need_precond = 1;
10479 if (! n_iterations)
10480 {
10481 n_iterations
10482 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10483 if (! n_iterations)
10484 return 0;
10485 }
10486 #if 0 /* ??? See above - missing induction variable information. */
10487 while (unroll_benefit > 1) /* no loop */
10488 {
10489 /* We include the benefit of biv/ giv updates. Check if some or
10490 all of these updates are likely to fit into a scheduling
10491 bubble of a load.
10492 We check for the following case:
10493 - All the insns leading to the first JUMP_INSN are in a strict
10494 dependency chain.
10495 - there is at least one memory reference in them.
10496
10497 When we find such a pattern, we assume that we can hide as many
10498 updates as the total of the load latency is, if we have an
10499 unroll factor of at least two. We might or might not also do
10500 this without unrolling, so rather than considering this as an
10501 extra unroll benefit, discount it in the unroll benefits of unroll
10502 factors higher than two. */
10503
10504 rtx set, last_set;
10505
10506 insn = next_active_insn (loop->start);
10507 last_set = single_set (insn);
10508 if (! last_set)
10509 break;
10510 if (GET_CODE (SET_SRC (last_set)) == MEM)
10511 mem_latency += 2;
10512 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10513 {
10514 if (! INSN_P (insn))
10515 continue;
10516 if (GET_CODE (insn) == JUMP_INSN)
10517 break;
10518 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10519 {
10520 /* Check if this is a to-be-reduced giv insn. */
10521 struct loop_ivs *ivs = LOOP_IVS (loop);
10522 struct iv_class *bl;
10523 struct induction *v;
10524 for (bl = ivs->list; bl; bl = bl->next)
10525 {
10526 if (bl->biv->insn == insn)
10527 goto is_biv;
10528 for (v = bl->giv; v; v = v->next_iv)
10529 if (v->insn == insn)
10530 goto is_giv;
10531 }
10532 mem_latency--;
10533 is_biv:
10534 is_giv:
10535 continue;
10536 }
10537 set = single_set (insn);
10538 if (! set)
10539 continue;
10540 if (GET_CODE (SET_SRC (set)) == MEM)
10541 mem_latency += 2;
10542 last_set = set;
10543 }
10544 if (mem_latency < 0)
10545 mem_latency = 0;
10546 else if (mem_latency > unroll_benefit - 1)
10547 mem_latency = unroll_benefit - 1;
10548 break;
10549 }
10550 #endif /* 0 */
10551 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10552 <= unroll_benefit)
10553 return max_unrolled_insns;
10554
10555 n_dest = n_labels + n_calls + n_exit_dest;
10556 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10557 best_cost = 0;
10558 best_factor = 1;
10559 if (n_barriers * 2 > n_labels - 1)
10560 n_barriers = (n_labels - 1) / 2;
10561 for (factor = 2; factor <= 8; factor++)
10562 {
10563 /* Bump up preconditioning cost for each power of two. */
10564 if (! (factor & (factor-1)))
10565 precond += 4;
10566 /* When preconditioning, only powers of two will be considered. */
10567 else if (need_precond)
10568 continue;
10569 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10570 + (n_labels - 1) * factor + n_calls + n_exit_dest
10571 - (n_barriers * factor >> 1)
10572 + need_precond);
10573 cost
10574 = ((n_dest <= 8 ? 0 : n_dest - 7)
10575 - base_cost * factor
10576 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10577 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10578 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10579 / n_iterations));
10580 if (need_precond)
10581 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10582 if (cost < best_cost)
10583 {
10584 best_cost = cost;
10585 best_factor = factor;
10586 }
10587 }
10588 threshold = best_factor * insn_count;
10589 if (max_unrolled_insns > threshold)
10590 max_unrolled_insns = threshold;
10591 }
10592 return max_unrolled_insns;
10593 }
10594 #endif /* TARGET_ADJUST_UNROLL_MAX */
10595
10596 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10597 not enter into CONST_DOUBLE for the replace.
10598
10599 Note that copying is not done so X must not be shared unless all copies
10600 are to be modified.
10601
10602 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10603 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10604 replacements[n*2+1] - and that we take mode changes into account.
10605
10606 If a replacement is ambiguous, return NULL_RTX.
10607
10608 If MODIFY is zero, don't modify any rtl in place,
10609 just return zero or nonzero for failure / success. */
10610
10611 rtx
replace_n_hard_rtx(rtx x,rtx * replacements,int n_replacements,int modify)10612 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10613 {
10614 int i, j;
10615 const char *fmt;
10616
10617 /* The following prevents loops occurrence when we change MEM in
10618 CONST_DOUBLE onto the same CONST_DOUBLE. */
10619 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10620 return x;
10621
10622 for (i = n_replacements - 1; i >= 0 ; i--)
10623 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10624 return replacements[i*2+1];
10625
10626 /* Allow this function to make replacements in EXPR_LISTs. */
10627 if (x == 0)
10628 return 0;
10629
10630 if (GET_CODE (x) == SUBREG)
10631 {
10632 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10633 n_replacements, modify);
10634
10635 if (GET_CODE (new) == CONST_INT)
10636 {
10637 x = simplify_subreg (GET_MODE (x), new,
10638 GET_MODE (SUBREG_REG (x)),
10639 SUBREG_BYTE (x));
10640 if (! x)
10641 abort ();
10642 }
10643 else if (modify)
10644 SUBREG_REG (x) = new;
10645
10646 return x;
10647 }
10648 else if (GET_CODE (x) == REG)
10649 {
10650 unsigned regno = REGNO (x);
10651 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10652 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10653 rtx result = NULL_RTX;
10654
10655 for (i = n_replacements - 1; i >= 0; i--)
10656 {
10657 rtx from = replacements[i*2];
10658 rtx to = replacements[i*2+1];
10659 unsigned from_regno, from_nregs, to_regno, new_regno;
10660
10661 if (GET_CODE (from) != REG)
10662 continue;
10663 from_regno = REGNO (from);
10664 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10665 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10666 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10667 {
10668 if (regno < from_regno
10669 || regno + nregs > from_regno + nregs
10670 || GET_CODE (to) != REG
10671 || result)
10672 return NULL_RTX;
10673 to_regno = REGNO (to);
10674 if (to_regno < FIRST_PSEUDO_REGISTER)
10675 {
10676 new_regno = regno + to_regno - from_regno;
10677 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10678 != nregs)
10679 return NULL_RTX;
10680 result = gen_rtx_REG (GET_MODE (x), new_regno);
10681 }
10682 else if (GET_MODE (x) <= GET_MODE (to))
10683 result = gen_lowpart_common (GET_MODE (x), to);
10684 else
10685 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10686 }
10687 }
10688 return result ? result : x;
10689 }
10690 else if (GET_CODE (x) == ZERO_EXTEND)
10691 {
10692 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10693 n_replacements, modify);
10694
10695 if (GET_CODE (new) == CONST_INT)
10696 {
10697 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10698 new, GET_MODE (XEXP (x, 0)));
10699 if (! x)
10700 abort ();
10701 }
10702 else if (modify)
10703 XEXP (x, 0) = new;
10704
10705 return x;
10706 }
10707
10708 fmt = GET_RTX_FORMAT (GET_CODE (x));
10709 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10710 {
10711 rtx new;
10712
10713 if (fmt[i] == 'e')
10714 {
10715 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10716 n_replacements, modify);
10717 if (!new)
10718 return NULL_RTX;
10719 if (modify)
10720 XEXP (x, i) = new;
10721 }
10722 else if (fmt[i] == 'E')
10723 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10724 {
10725 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10726 n_replacements, modify);
10727 if (!new)
10728 return NULL_RTX;
10729 if (modify)
10730 XVECEXP (x, i, j) = new;
10731 }
10732 }
10733
10734 return x;
10735 }
10736
10737 rtx
sh_gen_truncate(enum machine_mode mode,rtx x,int need_sign_ext)10738 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10739 {
10740 enum rtx_code code = TRUNCATE;
10741
10742 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10743 {
10744 rtx inner = XEXP (x, 0);
10745 enum machine_mode inner_mode = GET_MODE (inner);
10746
10747 if (inner_mode == mode)
10748 return inner;
10749 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10750 x = inner;
10751 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10752 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10753 {
10754 code = GET_CODE (x);
10755 x = inner;
10756 }
10757 }
10758 return gen_rtx_fmt_e (code, mode, x);
10759 }
10760
10761 /* called via for_each_rtx after reload, to clean up truncates of
10762 registers that span multiple actual hard registers. */
10763 int
shmedia_cleanup_truncate(rtx * p,void * n_changes)10764 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10765 {
10766 rtx x = *p, reg;
10767
10768 if (GET_CODE (x) != TRUNCATE)
10769 return 0;
10770 reg = XEXP (x, 0);
10771 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10772 {
10773 enum machine_mode reg_mode = GET_MODE (reg);
10774 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10775 subreg_lowpart_offset (DImode, reg_mode));
10776 *(int*) n_changes += 1;
10777 return -1;
10778 }
10779 return 0;
10780 }
10781
10782 /* Load and store depend on the highpart of the address. However,
10783 set_attr_alternative does not give well-defined results before reload,
10784 so we must look at the rtl ourselves to see if any of the feeding
10785 registers is used in a memref. */
10786
10787 /* Called by sh_contains_memref_p via for_each_rtx. */
10788 static int
sh_contains_memref_p_1(rtx * loc,void * data ATTRIBUTE_UNUSED)10789 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10790 {
10791 return (GET_CODE (*loc) == MEM);
10792 }
10793
10794 /* Return nonzero iff INSN contains a MEM. */
10795 int
sh_contains_memref_p(rtx insn)10796 sh_contains_memref_p (rtx insn)
10797 {
10798 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10799 }
10800
10801 /* FNADDR is the MEM expression from a call expander. Return an address
10802 to use in an SHmedia insn pattern. */
10803 rtx
shmedia_prepare_call_address(rtx fnaddr,int is_sibcall)10804 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10805 {
10806 int is_sym;
10807
10808 fnaddr = XEXP (fnaddr, 0);
10809 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10810 if (flag_pic && is_sym)
10811 {
10812 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10813 {
10814 rtx reg = gen_reg_rtx (Pmode);
10815
10816 /* We must not use GOTPLT for sibcalls, because PIC_REG
10817 must be restored before the PLT code gets to run. */
10818 if (is_sibcall)
10819 emit_insn (gen_symGOT2reg (reg, fnaddr));
10820 else
10821 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10822 fnaddr = reg;
10823 }
10824 else
10825 {
10826 fnaddr = gen_sym2PIC (fnaddr);
10827 PUT_MODE (fnaddr, Pmode);
10828 }
10829 }
10830 /* If ptabs might trap, make this visible to the rest of the compiler.
10831 We generally assume that symbols pertain to valid locations, but
10832 it is possible to generate invalid symbols with asm or linker tricks.
10833 In a list of functions where each returns its successor, an invalid
10834 symbol might denote an empty list. */
10835 if (!TARGET_PT_FIXED
10836 && (!is_sym || TARGET_INVALID_SYMBOLS)
10837 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10838 {
10839 rtx tr = gen_reg_rtx (PDImode);
10840
10841 emit_insn (gen_ptabs (tr, fnaddr));
10842 fnaddr = tr;
10843 }
10844 else if (! target_reg_operand (fnaddr, Pmode))
10845 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10846 return fnaddr;
10847 }
10848
10849 enum reg_class
sh_secondary_reload(bool in_p,rtx x,enum reg_class class,enum machine_mode mode,secondary_reload_info * sri)10850 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
10851 enum machine_mode mode, secondary_reload_info *sri)
10852 {
10853 if (in_p)
10854 {
10855 if (REGCLASS_HAS_FP_REG (class)
10856 && ! TARGET_SHMEDIA
10857 && immediate_operand ((x), mode)
10858 && ! ((fp_zero_operand (x) || fp_one_operand (x))
10859 && mode == SFmode && fldi_ok ()))
10860 switch (mode)
10861 {
10862 case SFmode:
10863 sri->icode = CODE_FOR_reload_insf__frn;
10864 return NO_REGS;
10865 case DFmode:
10866 sri->icode = CODE_FOR_reload_indf__frn;
10867 return NO_REGS;
10868 case SImode:
10869 /* ??? If we knew that we are in the appropriate mode -
10870 single precision - we could use a reload pattern directly. */
10871 return FPUL_REGS;
10872 default:
10873 abort ();
10874 }
10875 if (class == FPUL_REGS
10876 && ((GET_CODE (x) == REG
10877 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
10878 || REGNO (x) == T_REG))
10879 || GET_CODE (x) == PLUS))
10880 return GENERAL_REGS;
10881 if (class == FPUL_REGS && immediate_operand (x, mode))
10882 {
10883 if (GET_CODE (x) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (x)))
10884 return GENERAL_REGS;
10885 sri->icode = CODE_FOR_reload_insi__i_fpul;
10886 return NO_REGS;
10887 }
10888 if (class == FPSCR_REGS
10889 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
10890 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
10891 return GENERAL_REGS;
10892 if (REGCLASS_HAS_FP_REG (class)
10893 && TARGET_SHMEDIA
10894 && immediate_operand (x, mode)
10895 && x != CONST0_RTX (GET_MODE (x))
10896 && GET_MODE (x) != V4SFmode)
10897 return GENERAL_REGS;
10898 if ((mode == QImode || mode == HImode)
10899 && TARGET_SHMEDIA && inqhi_operand (x, mode))
10900 {
10901 sri->icode = ((mode == QImode)
10902 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
10903 return NO_REGS;
10904 }
10905 if (TARGET_SHMEDIA && class == GENERAL_REGS
10906 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
10907 return TARGET_REGS;
10908 } /* end of input-only processing. */
10909
10910 if (((REGCLASS_HAS_FP_REG (class)
10911 && (GET_CODE (x) == REG
10912 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
10913 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
10914 && TARGET_FMOVD))))
10915 || (REGCLASS_HAS_GENERAL_REG (class)
10916 && GET_CODE (x) == REG
10917 && FP_REGISTER_P (REGNO (x))))
10918 && ! TARGET_SHMEDIA
10919 && (mode == SFmode || mode == SImode))
10920 return FPUL_REGS;
10921 if ((class == FPUL_REGS
10922 || (REGCLASS_HAS_FP_REG (class)
10923 && ! TARGET_SHMEDIA && mode == SImode))
10924 && (GET_CODE (x) == MEM
10925 || (GET_CODE (x) == REG
10926 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
10927 || REGNO (x) == T_REG
10928 || system_reg_operand (x, VOIDmode)))))
10929 {
10930 if (class == FPUL_REGS)
10931 return GENERAL_REGS;
10932 return FPUL_REGS;
10933 }
10934 if ((class == TARGET_REGS
10935 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
10936 && !EXTRA_CONSTRAINT_Csy (x)
10937 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
10938 return GENERAL_REGS;
10939 if ((class == MAC_REGS || class == PR_REGS)
10940 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
10941 && class != REGNO_REG_CLASS (REGNO (x)))
10942 return GENERAL_REGS;
10943 if (class != GENERAL_REGS && GET_CODE (x) == REG
10944 && TARGET_REGISTER_P (REGNO (x)))
10945 return GENERAL_REGS;
10946 return NO_REGS;
10947 }
10948
10949 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
10950
10951 #include "gt-sh.h"
10952
10953 void
sh_override_options(void)10954 sh_override_options (void)
10955 {
10956 int regno;
10957
10958 if (flag_finite_math_only == 2)
10959 flag_finite_math_only
10960 = !flag_signaling_nans && TARGET_SH2E && ! TARGET_IEEE;
10961 if (TARGET_SH2E && !flag_finite_math_only)
10962 target_flags |= MASK_IEEE;
10963 sh_cpu = CPU_SH1;
10964 assembler_dialect = 0;
10965 if (TARGET_SH2)
10966 sh_cpu = CPU_SH2;
10967 if (TARGET_SH2E)
10968 sh_cpu = CPU_SH2E;
10969 if (TARGET_SH2A)
10970 {
10971 sh_cpu = CPU_SH2A;
10972 if (TARGET_SH2A_DOUBLE)
10973 target_flags |= MASK_FMOVD;
10974 }
10975 if (TARGET_SH3)
10976 sh_cpu = CPU_SH3;
10977 if (TARGET_SH3E)
10978 sh_cpu = CPU_SH3E;
10979 if (TARGET_SH4)
10980 {
10981 assembler_dialect = 1;
10982 sh_cpu = CPU_SH4;
10983 }
10984 if (TARGET_SH4A_ARCH)
10985 {
10986 assembler_dialect = 1;
10987 sh_cpu = CPU_SH4A;
10988 }
10989 if (TARGET_SH5)
10990 {
10991 sh_cpu = CPU_SH5;
10992 target_flags |= MASK_ALIGN_DOUBLE;
10993 if (TARGET_SHMEDIA_FPU)
10994 target_flags |= MASK_FMOVD;
10995 if (TARGET_SHMEDIA)
10996 {
10997 /* There are no delay slots on SHmedia. */
10998 flag_delayed_branch = 0;
10999 /* Relaxation isn't yet supported for SHmedia */
11000 target_flags &= ~MASK_RELAX;
11001 /* After reload, if conversion does little good but can cause
11002 ICEs:
11003 - find_if_block doesn't do anything for SH because we don't
11004 have conditional execution patterns. (We use conditional
11005 move patterns, which are handled differently, and only
11006 before reload).
11007 - find_cond_trap doesn't do anything for the SH because we \
11008 don't have conditional traps.
11009 - find_if_case_1 uses redirect_edge_and_branch_force in
11010 the only path that does an optimization, and this causes
11011 an ICE when branch targets are in registers.
11012 - find_if_case_2 doesn't do anything for the SHmedia after
11013 reload except when it can redirect a tablejump - and
11014 that's rather rare. */
11015 flag_if_conversion2 = 0;
11016 if (! strcmp (sh_div_str, "call"))
11017 sh_div_strategy = SH_DIV_CALL;
11018 else if (! strcmp (sh_div_str, "call2"))
11019 sh_div_strategy = SH_DIV_CALL2;
11020 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
11021 sh_div_strategy = SH_DIV_FP;
11022 else if (! strcmp (sh_div_str, "inv"))
11023 sh_div_strategy = SH_DIV_INV;
11024 else if (! strcmp (sh_div_str, "inv:minlat"))
11025 sh_div_strategy = SH_DIV_INV_MINLAT;
11026 else if (! strcmp (sh_div_str, "inv20u"))
11027 sh_div_strategy = SH_DIV_INV20U;
11028 else if (! strcmp (sh_div_str, "inv20l"))
11029 sh_div_strategy = SH_DIV_INV20L;
11030 else if (! strcmp (sh_div_str, "inv:call2"))
11031 sh_div_strategy = SH_DIV_INV_CALL2;
11032 else if (! strcmp (sh_div_str, "inv:call"))
11033 sh_div_strategy = SH_DIV_INV_CALL;
11034 else if (! strcmp (sh_div_str, "inv:fp"))
11035 {
11036 if (TARGET_FPU_ANY)
11037 sh_div_strategy = SH_DIV_INV_FP;
11038 else
11039 sh_div_strategy = SH_DIV_INV;
11040 }
11041 }
11042 /* -fprofile-arcs needs a working libgcov . In unified tree
11043 configurations with newlib, this requires to configure with
11044 --with-newlib --with-headers. But there is no way to check
11045 here we have a working libgcov, so just assume that we have. */
11046 if (profile_flag)
11047 warning (0, "profiling is still experimental for this target");
11048 }
11049 else
11050 {
11051 /* Only the sh64-elf assembler fully supports .quad properly. */
11052 targetm.asm_out.aligned_op.di = NULL;
11053 targetm.asm_out.unaligned_op.di = NULL;
11054 }
11055 if (!TARGET_SH1)
11056 TARGET_PRETEND_CMOVE = 0;
11057 if (sh_divsi3_libfunc[0])
11058 ; /* User supplied - leave it alone. */
11059 else if (TARGET_HARD_SH4 && TARGET_SH2E)
11060 sh_divsi3_libfunc = "__sdivsi3_i4";
11061 else if (TARGET_SH5)
11062 {
11063 if (TARGET_FPU_ANY && TARGET_SH1)
11064 sh_divsi3_libfunc = "__sdivsi3_i4";
11065 else
11066 sh_divsi3_libfunc = "__sdivsi3_1";
11067 }
11068 else
11069 sh_divsi3_libfunc = "__sdivsi3";
11070 if (TARGET_FMOVD)
11071 reg_class_from_letter['e' - 'a'] = NO_REGS;
11072
11073 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11074 if (! VALID_REGISTER_P (regno))
11075 sh_register_names[regno][0] = '\0';
11076
11077 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
11078 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
11079 sh_additional_register_names[regno][0] = '\0';
11080
11081 if (flag_omit_frame_pointer < 0)
11082 {
11083 /* The debugging information is sufficient,
11084 but gdb doesn't implement this yet */
11085 if (0)
11086 flag_omit_frame_pointer
11087 = (PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG);
11088 else
11089 flag_omit_frame_pointer = 0;
11090 }
11091
11092 if ((flag_pic && ! TARGET_PREFERGOT)
11093 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
11094 flag_no_function_cse = 1;
11095
11096 if (SMALL_REGISTER_CLASSES)
11097 {
11098 /* Never run scheduling before reload, since that can
11099 break global alloc, and generates slower code anyway due
11100 to the pressure on R0. */
11101 /* Enable sched1 for SH4; ready queue will be reordered by
11102 the target hooks when pressure is high. We can not do this for
11103 SH3 and lower as they give spill failures for R0. */
11104 if (!TARGET_HARD_SH4 || flag_pic)
11105 flag_schedule_insns = 0;
11106 /* ??? Current exception handling places basic block boundaries
11107 after call_insns. It causes the high pressure on R0 and gives
11108 spill failures for R0 in reload. See PR 22553 and the thread
11109 on gcc-patches
11110 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
11111 else if (flag_exceptions)
11112 {
11113 if (flag_schedule_insns == 1)
11114 warning (0, "ignoring -fschedule-insns because of exception handling bug");
11115 flag_schedule_insns = 0;
11116 }
11117 }
11118
11119 if (align_loops == 0)
11120 align_loops = 1 << (TARGET_SH5 ? 3 : 2);
11121 #if defined(OPENBSD_NATIVE) || defined(OPENBSD_CROSS)
11122 /* Do not align jump targets to cache line boundaries at -O2 */
11123 if (align_jumps == 0)
11124 align_jumps = 2;
11125 #else
11126 if (align_jumps == 0)
11127 align_jumps = 1 << CACHE_LOG;
11128 #endif
11129 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
11130 align_jumps = TARGET_SHMEDIA ? 4 : 2;
11131
11132 /* Allocation boundary (in *bytes*) for the code of a function.
11133 SH1: 32 bit alignment is faster, because instructions are always
11134 fetched as a pair from a longword boundary.
11135 SH2 .. SH5 : align to cache line start. */
11136 if (align_functions == 0)
11137 align_functions
11138 = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
11139 /* The linker relaxation code breaks when a function contains
11140 alignments that are larger than that at the start of a
11141 compilation unit. */
11142 if (TARGET_RELAX)
11143 {
11144 int min_align
11145 = align_loops > align_jumps ? align_loops : align_jumps;
11146
11147 /* Also take possible .long constants / mova tables int account. */
11148 if (min_align < 4)
11149 min_align = 4;
11150 if (align_functions < min_align)
11151 align_functions = min_align;
11152 }
11153 }
11154