1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "real.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "ra.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "ggc.h"
53
54 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
55
56 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
57 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
58
59 /* These are some macros to abstract register modes. */
60 #define CONST_OK_FOR_ADD(size) \
61 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
62 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
63 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
64 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
65
66 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
67 int current_function_interrupt;
68
69 /* ??? The pragma interrupt support will not work for SH3. */
70 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
71 output code for the next function appropriate for an interrupt handler. */
72 int pragma_interrupt;
73
74 /* This is set by the trap_exit attribute for functions. It specifies
75 a trap number to be used in a trapa instruction at function exit
76 (instead of an rte instruction). */
77 int trap_exit;
78
79 /* This is used by the sp_switch attribute for functions. It specifies
80 a variable holding the address of the stack the interrupt function
81 should switch to/from at entry/exit. */
82 rtx sp_switch;
83
84 /* This is set by #pragma trapa, and is similar to the above, except that
85 the compiler doesn't emit code to preserve all registers. */
86 static int pragma_trapa;
87
88 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
89 which has a separate set of low regs for User and Supervisor modes.
90 This should only be used for the lowest level of interrupts. Higher levels
91 of interrupts must save the registers in case they themselves are
92 interrupted. */
93 int pragma_nosave_low_regs;
94
95 /* This is used for communication between SETUP_INCOMING_VARARGS and
96 sh_expand_prologue. */
97 int current_function_anonymous_args;
98
99 /* Global variables for machine-dependent things. */
100
101 /* Which cpu are we scheduling for. */
102 enum processor_type sh_cpu;
103
104 /* Saved operands from the last compare to use when we generate an scc
105 or bcc insn. */
106
107 rtx sh_compare_op0;
108 rtx sh_compare_op1;
109
110 /* Provides the class number of the smallest class containing
111 reg number. */
112
113 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
114 {
115 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
148 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
149 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
150 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
151 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
152 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
153 GENERAL_REGS,
154 };
155
156 char sh_register_names[FIRST_PSEUDO_REGISTER] \
157 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
158
159 char sh_additional_register_names[ADDREGNAMES_SIZE] \
160 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
161 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
162
163 /* Provide reg_class from a letter such as appears in the machine
164 description. *: target independently reserved letter.
165 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
166
167 enum reg_class reg_class_from_letter[] =
168 {
169 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
170 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
171 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
172 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
173 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
174 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
175 /* y */ FPUL_REGS, /* z */ R0_REGS
176 };
177
178 int assembler_dialect;
179
180 static bool shmedia_space_reserved_for_target_registers;
181
182 static void split_branches (rtx);
183 static int branch_dest (rtx);
184 static void force_into (rtx, rtx);
185 static void print_slot (rtx);
186 static rtx add_constant (rtx, enum machine_mode, rtx);
187 static void dump_table (rtx, rtx);
188 static int hi_const (rtx);
189 static int broken_move (rtx);
190 static int mova_p (rtx);
191 static rtx find_barrier (int, rtx, rtx);
192 static int noncall_uses_reg (rtx, rtx, rtx *);
193 static rtx gen_block_redirect (rtx, int, int);
194 static void sh_reorg (void);
195 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
196 static rtx frame_insn (rtx);
197 static rtx push (int);
198 static void pop (int);
199 static void push_regs (HARD_REG_SET *, int);
200 static int calc_live_regs (HARD_REG_SET *);
201 static void mark_use (rtx, rtx *);
202 static HOST_WIDE_INT rounded_frame_size (int);
203 static rtx mark_constant_pool_use (rtx);
204 const struct attribute_spec sh_attribute_table[];
205 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
206 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
207 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
208 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
209 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
210 static void sh_insert_attributes (tree, tree *);
211 static int sh_adjust_cost (rtx, rtx, rtx, int);
212 static int sh_use_dfa_interface (void);
213 static int sh_issue_rate (void);
214 static bool sh_function_ok_for_sibcall (tree, tree);
215
216 static bool sh_cannot_modify_jumps_p (void);
217 static int sh_target_reg_class (void);
218 static bool sh_optimize_target_register_callee_saved (bool);
219 static bool sh_ms_bitfield_layout_p (tree);
220
221 static void sh_init_builtins (void);
222 static void sh_media_init_builtins (void);
223 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
224 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
225 static void sh_file_start (void);
226 static int flow_dependent_p (rtx, rtx);
227 static void flow_dependent_p_1 (rtx, rtx, void *);
228 static int shiftcosts (rtx);
229 static int andcosts (rtx);
230 static int addsubcosts (rtx);
231 static int multcosts (rtx);
232 static bool unspec_caller_rtx_p (rtx);
233 static bool sh_cannot_copy_insn_p (rtx);
234 static bool sh_rtx_costs (rtx, int, int, int *);
235 static int sh_address_cost (rtx);
236 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
237 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
238 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
239 static int scavenge_reg (HARD_REG_SET *s);
240 struct save_schedule_s;
241 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
242 struct save_schedule_s *, int);
243
244 static rtx sh_struct_value_rtx (tree, int);
245 static bool sh_return_in_memory (tree, tree);
246 static rtx sh_builtin_saveregs (void);
247 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
248 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
249 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
250 static tree sh_build_builtin_va_list (void);
251
252
253 /* Initialize the GCC target structure. */
254 #undef TARGET_ATTRIBUTE_TABLE
255 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
256
257 /* The next two are used for debug info when compiling with -gdwarf. */
258 #undef TARGET_ASM_UNALIGNED_HI_OP
259 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
260 #undef TARGET_ASM_UNALIGNED_SI_OP
261 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
262
263 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
264 #undef TARGET_ASM_UNALIGNED_DI_OP
265 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
266 #undef TARGET_ASM_ALIGNED_DI_OP
267 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
268
269 #undef TARGET_ASM_FUNCTION_EPILOGUE
270 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
271
272 #undef TARGET_ASM_OUTPUT_MI_THUNK
273 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
274
275 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
276 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
277
278 #undef TARGET_ASM_FILE_START
279 #define TARGET_ASM_FILE_START sh_file_start
280 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
281 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
282
283 #undef TARGET_INSERT_ATTRIBUTES
284 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
285
286 #undef TARGET_SCHED_ADJUST_COST
287 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
288
289 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
290 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
291 sh_use_dfa_interface
292 #undef TARGET_SCHED_ISSUE_RATE
293 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
294
295 #undef TARGET_CANNOT_MODIFY_JUMPS_P
296 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
297 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
298 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
299 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
300 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
301 sh_optimize_target_register_callee_saved
302
303 #undef TARGET_MS_BITFIELD_LAYOUT_P
304 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
305
306 #undef TARGET_INIT_BUILTINS
307 #define TARGET_INIT_BUILTINS sh_init_builtins
308 #undef TARGET_EXPAND_BUILTIN
309 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
310
311 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
312 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
313
314 #undef TARGET_CANNOT_COPY_INSN_P
315 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
316 #undef TARGET_RTX_COSTS
317 #define TARGET_RTX_COSTS sh_rtx_costs
318 #undef TARGET_ADDRESS_COST
319 #define TARGET_ADDRESS_COST sh_address_cost
320
321 #undef TARGET_MACHINE_DEPENDENT_REORG
322 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
323
324 #ifdef HAVE_AS_TLS
325 #undef TARGET_HAVE_TLS
326 #define TARGET_HAVE_TLS true
327 #endif
328
329 #undef TARGET_PROMOTE_PROTOTYPES
330 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
331 #undef TARGET_PROMOTE_FUNCTION_ARGS
332 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
333 #undef TARGET_PROMOTE_FUNCTION_RETURN
334 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
335
336 #undef TARGET_STRUCT_VALUE_RTX
337 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
338 #undef TARGET_RETURN_IN_MEMORY
339 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
340
341 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
342 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
343 #undef TARGET_SETUP_INCOMING_VARARGS
344 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
345 #undef TARGET_STRICT_ARGUMENT_NAMING
346 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
347 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
348 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
349
350 #undef TARGET_BUILD_BUILTIN_VA_LIST
351 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
352
353 #undef TARGET_PCH_VALID_P
354 #define TARGET_PCH_VALID_P sh_pch_valid_p
355
356 struct gcc_target targetm = TARGET_INITIALIZER;
357
358 /* Print the operand address in x to the stream. */
359
360 void
print_operand_address(FILE * stream,rtx x)361 print_operand_address (FILE *stream, rtx x)
362 {
363 switch (GET_CODE (x))
364 {
365 case REG:
366 case SUBREG:
367 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
368 break;
369
370 case PLUS:
371 {
372 rtx base = XEXP (x, 0);
373 rtx index = XEXP (x, 1);
374
375 switch (GET_CODE (index))
376 {
377 case CONST_INT:
378 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
379 reg_names[true_regnum (base)]);
380 break;
381
382 case REG:
383 case SUBREG:
384 {
385 int base_num = true_regnum (base);
386 int index_num = true_regnum (index);
387
388 fprintf (stream, "@(r0,%s)",
389 reg_names[MAX (base_num, index_num)]);
390 break;
391 }
392
393 default:
394 debug_rtx (x);
395 abort ();
396 }
397 }
398 break;
399
400 case PRE_DEC:
401 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
402 break;
403
404 case POST_INC:
405 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
406 break;
407
408 default:
409 x = mark_constant_pool_use (x);
410 output_addr_const (stream, x);
411 break;
412 }
413 }
414
415 /* Print operand x (an rtx) in assembler syntax to file stream
416 according to modifier code.
417
418 '.' print a .s if insn needs delay slot
419 ',' print LOCAL_LABEL_PREFIX
420 '@' print trap, rte or rts depending upon pragma interruptness
421 '#' output a nop if there is nothing to put in the delay slot
422 ''' print likelihood suffix (/u for unlikely).
423 'O' print a constant without the #
424 'R' print the LSW of a dp value - changes if in little endian
425 'S' print the MSW of a dp value - changes if in little endian
426 'T' print the next word of a dp value - same as 'R' in big endian mode.
427 'M' print an `x' if `m' will print `base,index'.
428 'N' print 'r63' if the operand is (const_int 0).
429 'm' print a pair `base,offset' or `base,index', for LD and ST.
430 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
431 'o' output an operator. */
432
433 void
print_operand(FILE * stream,rtx x,int code)434 print_operand (FILE *stream, rtx x, int code)
435 {
436 switch (code)
437 {
438 case '.':
439 if (final_sequence
440 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
441 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
442 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
443 break;
444 case ',':
445 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
446 break;
447 case '@':
448 if (trap_exit)
449 fprintf (stream, "trapa #%d", trap_exit);
450 else if (sh_cfun_interrupt_handler_p ())
451 fprintf (stream, "rte");
452 else
453 fprintf (stream, "rts");
454 break;
455 case '#':
456 /* Output a nop if there's nothing in the delay slot. */
457 if (dbr_sequence_length () == 0)
458 fprintf (stream, "\n\tnop");
459 break;
460 case '\'':
461 {
462 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
463
464 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
465 fputs ("/u", stream);
466 break;
467 }
468 case 'O':
469 x = mark_constant_pool_use (x);
470 output_addr_const (stream, x);
471 break;
472 case 'R':
473 fputs (reg_names[REGNO (x) + LSW], (stream));
474 break;
475 case 'S':
476 fputs (reg_names[REGNO (x) + MSW], (stream));
477 break;
478 case 'T':
479 /* Next word of a double. */
480 switch (GET_CODE (x))
481 {
482 case REG:
483 fputs (reg_names[REGNO (x) + 1], (stream));
484 break;
485 case MEM:
486 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
487 && GET_CODE (XEXP (x, 0)) != POST_INC)
488 x = adjust_address (x, SImode, 4);
489 print_operand_address (stream, XEXP (x, 0));
490 break;
491 default:
492 break;
493 }
494 break;
495 case 'o':
496 switch (GET_CODE (x))
497 {
498 case PLUS: fputs ("add", stream); break;
499 case MINUS: fputs ("sub", stream); break;
500 case MULT: fputs ("mul", stream); break;
501 case DIV: fputs ("div", stream); break;
502 case EQ: fputs ("eq", stream); break;
503 case NE: fputs ("ne", stream); break;
504 case GT: case LT: fputs ("gt", stream); break;
505 case GE: case LE: fputs ("ge", stream); break;
506 case GTU: case LTU: fputs ("gtu", stream); break;
507 case GEU: case LEU: fputs ("geu", stream); break;
508 default:
509 break;
510 }
511 break;
512 case 'M':
513 if (GET_CODE (x) == MEM
514 && GET_CODE (XEXP (x, 0)) == PLUS
515 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
516 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
517 fputc ('x', stream);
518 break;
519
520 case 'm':
521 if (GET_CODE (x) != MEM)
522 abort ();
523 x = XEXP (x, 0);
524 switch (GET_CODE (x))
525 {
526 case REG:
527 case SUBREG:
528 print_operand (stream, x, 0);
529 fputs (", 0", stream);
530 break;
531
532 case PLUS:
533 print_operand (stream, XEXP (x, 0), 0);
534 fputs (", ", stream);
535 print_operand (stream, XEXP (x, 1), 0);
536 break;
537
538 default:
539 abort ();
540 }
541 break;
542
543 case 'N':
544 if (x == CONST0_RTX (GET_MODE (x)))
545 {
546 fprintf ((stream), "r63");
547 break;
548 }
549 goto default_output;
550 case 'u':
551 if (GET_CODE (x) == CONST_INT)
552 {
553 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
554 break;
555 }
556 /* Fall through. */
557
558 default_output:
559 default:
560 switch (GET_CODE (x))
561 {
562 /* FIXME: We need this on SHmedia32 because reload generates
563 some sign-extended HI or QI loads into DImode registers
564 but, because Pmode is SImode, the address ends up with a
565 subreg:SI of the DImode register. Maybe reload should be
566 fixed so as to apply alter_subreg to such loads? */
567 case SUBREG:
568 if (SUBREG_BYTE (x) != 0
569 || GET_CODE (SUBREG_REG (x)) != REG)
570 abort ();
571
572 x = SUBREG_REG (x);
573 /* Fall through. */
574
575 case REG:
576 if (FP_REGISTER_P (REGNO (x))
577 && GET_MODE (x) == V16SFmode)
578 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
579 else if (FP_REGISTER_P (REGNO (x))
580 && GET_MODE (x) == V4SFmode)
581 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
582 else if (GET_CODE (x) == REG
583 && GET_MODE (x) == V2SFmode)
584 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
585 else if (FP_REGISTER_P (REGNO (x))
586 && GET_MODE_SIZE (GET_MODE (x)) > 4)
587 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
588 else
589 fputs (reg_names[REGNO (x)], (stream));
590 break;
591
592 case MEM:
593 output_address (XEXP (x, 0));
594 break;
595
596 case CONST:
597 if (TARGET_SHMEDIA
598 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
599 && GET_MODE (XEXP (x, 0)) == DImode
600 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
601 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
602 {
603 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
604
605 fputc ('(', stream);
606 if (GET_CODE (val) == ASHIFTRT)
607 {
608 fputc ('(', stream);
609 if (GET_CODE (XEXP (val, 0)) == CONST)
610 fputc ('(', stream);
611 output_addr_const (stream, XEXP (val, 0));
612 if (GET_CODE (XEXP (val, 0)) == CONST)
613 fputc (')', stream);
614 fputs (" >> ", stream);
615 output_addr_const (stream, XEXP (val, 1));
616 fputc (')', stream);
617 }
618 else
619 {
620 if (GET_CODE (val) == CONST)
621 fputc ('(', stream);
622 output_addr_const (stream, val);
623 if (GET_CODE (val) == CONST)
624 fputc (')', stream);
625 }
626 fputs (" & 65535)", stream);
627 break;
628 }
629
630 /* Fall through. */
631 default:
632 if (TARGET_SH1)
633 fputc ('#', stream);
634 output_addr_const (stream, x);
635 break;
636 }
637 break;
638 }
639 }
640
641 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
642 static void
force_into(rtx value,rtx target)643 force_into (rtx value, rtx target)
644 {
645 value = force_operand (value, target);
646 if (! rtx_equal_p (value, target))
647 emit_insn (gen_move_insn (target, value));
648 }
649
650 /* Emit code to perform a block move. Choose the best method.
651
652 OPERANDS[0] is the destination.
653 OPERANDS[1] is the source.
654 OPERANDS[2] is the size.
655 OPERANDS[3] is the alignment safe to use. */
656
657 int
expand_block_move(rtx * operands)658 expand_block_move (rtx *operands)
659 {
660 int align = INTVAL (operands[3]);
661 int constp = (GET_CODE (operands[2]) == CONST_INT);
662 int bytes = (constp ? INTVAL (operands[2]) : 0);
663
664 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
665 alignment, or if it isn't a multiple of 4 bytes, then fail. */
666 if (! constp || align < 4 || (bytes % 4 != 0))
667 return 0;
668
669 if (TARGET_HARD_SH4)
670 {
671 if (bytes < 12)
672 return 0;
673 else if (bytes == 12)
674 {
675 tree entry_name;
676 rtx sym;
677 rtx func_addr_rtx;
678 rtx r4 = gen_rtx (REG, SImode, 4);
679 rtx r5 = gen_rtx (REG, SImode, 5);
680
681 entry_name = get_identifier ("__movstrSI12_i4");
682
683 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
684 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
685 force_into (XEXP (operands[0], 0), r4);
686 force_into (XEXP (operands[1], 0), r5);
687 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
688 return 1;
689 }
690 else if (! TARGET_SMALLCODE)
691 {
692 tree entry_name;
693 rtx sym;
694 rtx func_addr_rtx;
695 int dwords;
696 rtx r4 = gen_rtx (REG, SImode, 4);
697 rtx r5 = gen_rtx (REG, SImode, 5);
698 rtx r6 = gen_rtx (REG, SImode, 6);
699
700 entry_name = get_identifier (bytes & 4
701 ? "__movstr_i4_odd"
702 : "__movstr_i4_even");
703 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
704 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
705 force_into (XEXP (operands[0], 0), r4);
706 force_into (XEXP (operands[1], 0), r5);
707
708 dwords = bytes >> 3;
709 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
710 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
711 return 1;
712 }
713 else
714 return 0;
715 }
716 if (bytes < 64)
717 {
718 char entry[30];
719 tree entry_name;
720 rtx sym;
721 rtx func_addr_rtx;
722 rtx r4 = gen_rtx_REG (SImode, 4);
723 rtx r5 = gen_rtx_REG (SImode, 5);
724
725 sprintf (entry, "__movstrSI%d", bytes);
726 entry_name = get_identifier (entry);
727 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
728 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
729 force_into (XEXP (operands[0], 0), r4);
730 force_into (XEXP (operands[1], 0), r5);
731 emit_insn (gen_block_move_real (func_addr_rtx));
732 return 1;
733 }
734
735 /* This is the same number of bytes as a memcpy call, but to a different
736 less common function name, so this will occasionally use more space. */
737 if (! TARGET_SMALLCODE)
738 {
739 tree entry_name;
740 rtx sym;
741 rtx func_addr_rtx;
742 int final_switch, while_loop;
743 rtx r4 = gen_rtx_REG (SImode, 4);
744 rtx r5 = gen_rtx_REG (SImode, 5);
745 rtx r6 = gen_rtx_REG (SImode, 6);
746
747 entry_name = get_identifier ("__movstr");
748 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
749 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
750 force_into (XEXP (operands[0], 0), r4);
751 force_into (XEXP (operands[1], 0), r5);
752
753 /* r6 controls the size of the move. 16 is decremented from it
754 for each 64 bytes moved. Then the negative bit left over is used
755 as an index into a list of move instructions. e.g., a 72 byte move
756 would be set up with size(r6) = 14, for one iteration through the
757 big while loop, and a switch of -2 for the last part. */
758
759 final_switch = 16 - ((bytes / 4) % 16);
760 while_loop = ((bytes / 4) / 16 - 1) * 16;
761 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
762 emit_insn (gen_block_lump_real (func_addr_rtx));
763 return 1;
764 }
765
766 return 0;
767 }
768
769 /* Prepare operands for a move define_expand; specifically, one of the
770 operands must be in a register. */
771
772 int
prepare_move_operands(rtx operands[],enum machine_mode mode)773 prepare_move_operands (rtx operands[], enum machine_mode mode)
774 {
775 if ((mode == SImode || mode == DImode)
776 && flag_pic
777 && ! ((mode == Pmode || mode == ptr_mode)
778 && tls_symbolic_operand (operands[1], Pmode) != 0))
779 {
780 rtx temp;
781 if (SYMBOLIC_CONST_P (operands[1]))
782 {
783 if (GET_CODE (operands[0]) == MEM)
784 operands[1] = force_reg (Pmode, operands[1]);
785 else if (TARGET_SHMEDIA
786 && GET_CODE (operands[1]) == LABEL_REF
787 && target_reg_operand (operands[0], mode))
788 /* It's ok. */;
789 else
790 {
791 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
792 operands[1] = legitimize_pic_address (operands[1], mode, temp);
793 }
794 }
795 else if (GET_CODE (operands[1]) == CONST
796 && GET_CODE (XEXP (operands[1], 0)) == PLUS
797 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
798 {
799 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
800 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
801 mode, temp);
802 operands[1] = expand_binop (mode, add_optab, temp,
803 XEXP (XEXP (operands[1], 0), 1),
804 no_new_pseudos ? temp
805 : gen_reg_rtx (Pmode),
806 0, OPTAB_LIB_WIDEN);
807 }
808 }
809
810 if (! reload_in_progress && ! reload_completed)
811 {
812 /* Copy the source to a register if both operands aren't registers. */
813 if (! register_operand (operands[0], mode)
814 && ! sh_register_operand (operands[1], mode))
815 operands[1] = copy_to_mode_reg (mode, operands[1]);
816
817 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
818 {
819 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
820 except that we can't use that function because it is static. */
821 rtx new = change_address (operands[0], mode, 0);
822 MEM_COPY_ATTRIBUTES (new, operands[0]);
823 operands[0] = new;
824 }
825
826 /* This case can happen while generating code to move the result
827 of a library call to the target. Reject `st r0,@(rX,rY)' because
828 reload will fail to find a spill register for rX, since r0 is already
829 being used for the source. */
830 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
831 && GET_CODE (operands[0]) == MEM
832 && GET_CODE (XEXP (operands[0], 0)) == PLUS
833 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
834 operands[1] = copy_to_mode_reg (mode, operands[1]);
835 }
836
837 if (mode == Pmode || mode == ptr_mode)
838 {
839 rtx op0, op1;
840 enum tls_model tls_kind;
841
842 op0 = operands[0];
843 op1 = operands[1];
844 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
845 {
846 rtx tga_op1, tga_ret, tmp, tmp2;
847
848
849 switch (tls_kind)
850 {
851 case TLS_MODEL_GLOBAL_DYNAMIC:
852 tga_ret = gen_rtx_REG (Pmode, R0_REG);
853 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
854 op1 = tga_ret;
855 break;
856
857 case TLS_MODEL_LOCAL_DYNAMIC:
858 tga_ret = gen_rtx_REG (Pmode, R0_REG);
859 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
860
861 tmp = gen_reg_rtx (Pmode);
862 emit_move_insn (tmp, tga_ret);
863
864 if (register_operand (op0, Pmode))
865 tmp2 = op0;
866 else
867 tmp2 = gen_reg_rtx (Pmode);
868
869 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
870 op1 = tmp2;
871 break;
872
873 case TLS_MODEL_INITIAL_EXEC:
874 if (! flag_pic)
875 emit_insn (gen_GOTaddr2picreg ());
876 tga_op1 = gen_reg_rtx (Pmode);
877 tmp = gen_sym2GOTTPOFF (op1);
878 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
879 op1 = tga_op1;
880 break;
881
882 case TLS_MODEL_LOCAL_EXEC:
883 tmp2 = gen_reg_rtx (Pmode);
884 emit_insn (gen_load_gbr (tmp2));
885 tmp = gen_reg_rtx (Pmode);
886 emit_insn (gen_symTPOFF2reg (tmp, op1));
887 RTX_UNCHANGING_P (tmp) = 1;
888
889 if (register_operand (op0, Pmode))
890 op1 = op0;
891 else
892 op1 = gen_reg_rtx (Pmode);
893
894 emit_insn (gen_addsi3 (op1, tmp, tmp2));
895 break;
896
897 default:
898 abort ();
899 }
900 operands[1] = op1;
901 }
902 }
903
904 return 0;
905 }
906
907 /* Prepare the operands for an scc instruction; make sure that the
908 compare has been done. */
909 rtx
prepare_scc_operands(enum rtx_code code)910 prepare_scc_operands (enum rtx_code code)
911 {
912 rtx t_reg = gen_rtx_REG (SImode, T_REG);
913 enum rtx_code oldcode = code;
914 enum machine_mode mode;
915
916 /* First need a compare insn. */
917 switch (code)
918 {
919 case NE:
920 /* It isn't possible to handle this case. */
921 abort ();
922 case LT:
923 code = GT;
924 break;
925 case LE:
926 code = GE;
927 break;
928 case LTU:
929 code = GTU;
930 break;
931 case LEU:
932 code = GEU;
933 break;
934 default:
935 break;
936 }
937 if (code != oldcode)
938 {
939 rtx tmp = sh_compare_op0;
940 sh_compare_op0 = sh_compare_op1;
941 sh_compare_op1 = tmp;
942 }
943
944 mode = GET_MODE (sh_compare_op0);
945 if (mode == VOIDmode)
946 mode = GET_MODE (sh_compare_op1);
947
948 sh_compare_op0 = force_reg (mode, sh_compare_op0);
949 if ((code != EQ && code != NE
950 && (sh_compare_op1 != const0_rtx
951 || code == GTU || code == GEU || code == LTU || code == LEU))
952 || (mode == DImode && sh_compare_op1 != const0_rtx)
953 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
954 sh_compare_op1 = force_reg (mode, sh_compare_op1);
955
956 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
957 (mode == SFmode ? emit_sf_insn : emit_df_insn)
958 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
959 gen_rtx (SET, VOIDmode, t_reg,
960 gen_rtx (code, SImode,
961 sh_compare_op0, sh_compare_op1)),
962 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
963 else
964 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
965 gen_rtx (code, SImode, sh_compare_op0,
966 sh_compare_op1)));
967
968 return t_reg;
969 }
970
971 /* Called from the md file, set up the operands of a compare instruction. */
972
973 void
from_compare(rtx * operands,int code)974 from_compare (rtx *operands, int code)
975 {
976 enum machine_mode mode = GET_MODE (sh_compare_op0);
977 rtx insn;
978 if (mode == VOIDmode)
979 mode = GET_MODE (sh_compare_op1);
980 if (code != EQ
981 || mode == DImode
982 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
983 {
984 /* Force args into regs, since we can't use constants here. */
985 sh_compare_op0 = force_reg (mode, sh_compare_op0);
986 if (sh_compare_op1 != const0_rtx
987 || code == GTU || code == GEU
988 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
989 sh_compare_op1 = force_reg (mode, sh_compare_op1);
990 }
991 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
992 {
993 from_compare (operands, GT);
994 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
995 }
996 else
997 insn = gen_rtx_SET (VOIDmode,
998 gen_rtx_REG (SImode, T_REG),
999 gen_rtx (code, SImode, sh_compare_op0,
1000 sh_compare_op1));
1001 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
1002 {
1003 insn = gen_rtx (PARALLEL, VOIDmode,
1004 gen_rtvec (2, insn,
1005 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
1006 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1007 }
1008 else
1009 emit_insn (insn);
1010 }
1011
1012 /* Functions to output assembly code. */
1013
1014 /* Return a sequence of instructions to perform DI or DF move.
1015
1016 Since the SH cannot move a DI or DF in one instruction, we have
1017 to take care when we see overlapping source and dest registers. */
1018
1019 const char *
output_movedouble(rtx insn ATTRIBUTE_UNUSED,rtx operands[],enum machine_mode mode)1020 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1021 enum machine_mode mode)
1022 {
1023 rtx dst = operands[0];
1024 rtx src = operands[1];
1025
1026 if (GET_CODE (dst) == MEM
1027 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1028 return "mov.l %T1,%0\n\tmov.l %1,%0";
1029
1030 if (register_operand (dst, mode)
1031 && register_operand (src, mode))
1032 {
1033 if (REGNO (src) == MACH_REG)
1034 return "sts mach,%S0\n\tsts macl,%R0";
1035
1036 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1037 when mov.d r1,r0 do r1->r0 then r2->r1. */
1038
1039 if (REGNO (src) + 1 == REGNO (dst))
1040 return "mov %T1,%T0\n\tmov %1,%0";
1041 else
1042 return "mov %1,%0\n\tmov %T1,%T0";
1043 }
1044 else if (GET_CODE (src) == CONST_INT)
1045 {
1046 if (INTVAL (src) < 0)
1047 output_asm_insn ("mov #-1,%S0", operands);
1048 else
1049 output_asm_insn ("mov #0,%S0", operands);
1050
1051 return "mov %1,%R0";
1052 }
1053 else if (GET_CODE (src) == MEM)
1054 {
1055 int ptrreg = -1;
1056 int dreg = REGNO (dst);
1057 rtx inside = XEXP (src, 0);
1058
1059 if (GET_CODE (inside) == REG)
1060 ptrreg = REGNO (inside);
1061 else if (GET_CODE (inside) == SUBREG)
1062 ptrreg = subreg_regno (inside);
1063 else if (GET_CODE (inside) == PLUS)
1064 {
1065 ptrreg = REGNO (XEXP (inside, 0));
1066 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1067 an offsettable address. Unfortunately, offsettable addresses use
1068 QImode to check the offset, and a QImode offsettable address
1069 requires r0 for the other operand, which is not currently
1070 supported, so we can't use the 'o' constraint.
1071 Thus we must check for and handle r0+REG addresses here.
1072 We punt for now, since this is likely very rare. */
1073 if (GET_CODE (XEXP (inside, 1)) == REG)
1074 abort ();
1075 }
1076 else if (GET_CODE (inside) == LABEL_REF)
1077 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1078 else if (GET_CODE (inside) == POST_INC)
1079 return "mov.l %1,%0\n\tmov.l %1,%T0";
1080 else
1081 abort ();
1082
1083 /* Work out the safe way to copy. Copy into the second half first. */
1084 if (dreg == ptrreg)
1085 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1086 }
1087
1088 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1089 }
1090
1091 /* Print an instruction which would have gone into a delay slot after
1092 another instruction, but couldn't because the other instruction expanded
1093 into a sequence where putting the slot insn at the end wouldn't work. */
1094
1095 static void
print_slot(rtx insn)1096 print_slot (rtx insn)
1097 {
1098 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1099
1100 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1101 }
1102
1103 const char *
output_far_jump(rtx insn,rtx op)1104 output_far_jump (rtx insn, rtx op)
1105 {
1106 struct { rtx lab, reg, op; } this;
1107 rtx braf_base_lab = NULL_RTX;
1108 const char *jump;
1109 int far;
1110 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1111 rtx prev;
1112
1113 this.lab = gen_label_rtx ();
1114
1115 if (TARGET_SH2
1116 && offset >= -32764
1117 && offset - get_attr_length (insn) <= 32766)
1118 {
1119 far = 0;
1120 jump = "mov.w %O0,%1; braf %1";
1121 }
1122 else
1123 {
1124 far = 1;
1125 if (flag_pic)
1126 {
1127 if (TARGET_SH2)
1128 jump = "mov.l %O0,%1; braf %1";
1129 else
1130 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1131 }
1132 else
1133 jump = "mov.l %O0,%1; jmp @%1";
1134 }
1135 /* If we have a scratch register available, use it. */
1136 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1137 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1138 {
1139 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1140 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1141 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1142 output_asm_insn (jump, &this.lab);
1143 if (dbr_sequence_length ())
1144 print_slot (final_sequence);
1145 else
1146 output_asm_insn ("nop", 0);
1147 }
1148 else
1149 {
1150 /* Output the delay slot insn first if any. */
1151 if (dbr_sequence_length ())
1152 print_slot (final_sequence);
1153
1154 this.reg = gen_rtx_REG (SImode, 13);
1155 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1156 Fortunately, MACL is fixed and call-clobbered, and we never
1157 need its value across jumps, so save r13 in it instead of in
1158 the stack. */
1159 if (TARGET_SH5)
1160 output_asm_insn ("lds r13, macl", 0);
1161 else
1162 output_asm_insn ("mov.l r13,@-r15", 0);
1163 output_asm_insn (jump, &this.lab);
1164 if (TARGET_SH5)
1165 output_asm_insn ("sts macl, r13", 0);
1166 else
1167 output_asm_insn ("mov.l @r15+,r13", 0);
1168 }
1169 if (far && flag_pic && TARGET_SH2)
1170 {
1171 braf_base_lab = gen_label_rtx ();
1172 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1173 CODE_LABEL_NUMBER (braf_base_lab));
1174 }
1175 if (far)
1176 output_asm_insn (".align 2", 0);
1177 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1178 this.op = op;
1179 if (far && flag_pic)
1180 {
1181 if (TARGET_SH2)
1182 this.lab = braf_base_lab;
1183 output_asm_insn (".long %O2-%O0", &this.lab);
1184 }
1185 else
1186 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1187 return "";
1188 }
1189
1190 /* Local label counter, used for constants in the pool and inside
1191 pattern branches. */
1192
1193 static int lf = 100;
1194
1195 /* Output code for ordinary branches. */
1196
1197 const char *
output_branch(int logic,rtx insn,rtx * operands)1198 output_branch (int logic, rtx insn, rtx *operands)
1199 {
1200 switch (get_attr_length (insn))
1201 {
1202 case 6:
1203 /* This can happen if filling the delay slot has caused a forward
1204 branch to exceed its range (we could reverse it, but only
1205 when we know we won't overextend other branches; this should
1206 best be handled by relaxation).
1207 It can also happen when other condbranches hoist delay slot insn
1208 from their destination, thus leading to code size increase.
1209 But the branch will still be in the range -4092..+4098 bytes. */
1210
1211 if (! TARGET_RELAX)
1212 {
1213 int label = lf++;
1214 /* The call to print_slot will clobber the operands. */
1215 rtx op0 = operands[0];
1216
1217 /* If the instruction in the delay slot is annulled (true), then
1218 there is no delay slot where we can put it now. The only safe
1219 place for it is after the label. final will do that by default. */
1220
1221 if (final_sequence
1222 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1223 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1224 {
1225 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1226 ASSEMBLER_DIALECT ? "/" : ".", label);
1227 print_slot (final_sequence);
1228 }
1229 else
1230 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1231
1232 output_asm_insn ("bra\t%l0", &op0);
1233 fprintf (asm_out_file, "\tnop\n");
1234 (*targetm.asm_out.internal_label)(asm_out_file, "LF", label);
1235
1236 return "";
1237 }
1238 /* When relaxing, handle this like a short branch. The linker
1239 will fix it up if it still doesn't fit after relaxation. */
1240 case 2:
1241 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1242
1243 /* These are for SH2e, in which we have to account for the
1244 extra nop because of the hardware bug in annulled branches. */
1245 case 8:
1246 if (! TARGET_RELAX)
1247 {
1248 int label = lf++;
1249
1250 if (final_sequence
1251 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1252 abort ();
1253 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1254 logic ? "f" : "t",
1255 ASSEMBLER_DIALECT ? "/" : ".", label);
1256 fprintf (asm_out_file, "\tnop\n");
1257 output_asm_insn ("bra\t%l0", operands);
1258 fprintf (asm_out_file, "\tnop\n");
1259 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1260
1261 return "";
1262 }
1263 /* When relaxing, fall through. */
1264 case 4:
1265 {
1266 char buffer[10];
1267
1268 sprintf (buffer, "b%s%ss\t%%l0",
1269 logic ? "t" : "f",
1270 ASSEMBLER_DIALECT ? "/" : ".");
1271 output_asm_insn (buffer, &operands[0]);
1272 return "nop";
1273 }
1274
1275 default:
1276 /* There should be no longer branches now - that would
1277 indicate that something has destroyed the branches set
1278 up in machine_dependent_reorg. */
1279 abort ();
1280 }
1281 }
1282
1283 const char *
output_branchy_insn(enum rtx_code code,const char * template,rtx insn,rtx * operands)1284 output_branchy_insn (enum rtx_code code, const char *template,
1285 rtx insn, rtx *operands)
1286 {
1287 rtx next_insn = NEXT_INSN (insn);
1288
1289 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1290 {
1291 rtx src = SET_SRC (PATTERN (next_insn));
1292 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1293 {
1294 /* Following branch not taken */
1295 operands[9] = gen_label_rtx ();
1296 emit_label_after (operands[9], next_insn);
1297 INSN_ADDRESSES_NEW (operands[9],
1298 INSN_ADDRESSES (INSN_UID (next_insn))
1299 + get_attr_length (next_insn));
1300 return template;
1301 }
1302 else
1303 {
1304 int offset = (branch_dest (next_insn)
1305 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1306 if (offset >= -252 && offset <= 258)
1307 {
1308 if (GET_CODE (src) == IF_THEN_ELSE)
1309 /* branch_true */
1310 src = XEXP (src, 1);
1311 operands[9] = src;
1312 return template;
1313 }
1314 }
1315 }
1316 operands[9] = gen_label_rtx ();
1317 emit_label_after (operands[9], insn);
1318 INSN_ADDRESSES_NEW (operands[9],
1319 INSN_ADDRESSES (INSN_UID (insn))
1320 + get_attr_length (insn));
1321 return template;
1322 }
1323
1324 const char *
output_ieee_ccmpeq(rtx insn,rtx * operands)1325 output_ieee_ccmpeq (rtx insn, rtx *operands)
1326 {
1327 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1328 }
1329
1330 /* Output the start of the assembler file. */
1331
1332 static void
sh_file_start(void)1333 sh_file_start (void)
1334 {
1335 default_file_start ();
1336
1337 if (TARGET_ELF)
1338 /* We need to show the text section with the proper
1339 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1340 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1341 will complain. We can teach GAS specifically about the
1342 default attributes for our choice of text section, but
1343 then we would have to change GAS again if/when we change
1344 the text section name. */
1345 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1346 else
1347 /* Switch to the data section so that the coffsem symbol
1348 isn't in the text section. */
1349 data_section ();
1350
1351 if (TARGET_LITTLE_ENDIAN)
1352 fputs ("\t.little\n", asm_out_file);
1353
1354 if (!TARGET_ELF)
1355 {
1356 if (TARGET_SHCOMPACT)
1357 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1358 else if (TARGET_SHMEDIA)
1359 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1360 TARGET_SHMEDIA64 ? 64 : 32);
1361 }
1362 }
1363
1364 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1365
1366 static bool
unspec_caller_rtx_p(rtx pat)1367 unspec_caller_rtx_p (rtx pat)
1368 {
1369 switch (GET_CODE (pat))
1370 {
1371 case CONST:
1372 return unspec_caller_rtx_p (XEXP (pat, 0));
1373 case PLUS:
1374 case MINUS:
1375 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1376 return true;
1377 return unspec_caller_rtx_p (XEXP (pat, 1));
1378 case UNSPEC:
1379 if (XINT (pat, 1) == UNSPEC_CALLER)
1380 return true;
1381 default:
1382 break;
1383 }
1384
1385 return false;
1386 }
1387
1388 /* Indicate that INSN cannot be duplicated. This is true for insn
1389 that generates an unique label. */
1390
1391 static bool
sh_cannot_copy_insn_p(rtx insn)1392 sh_cannot_copy_insn_p (rtx insn)
1393 {
1394 rtx pat;
1395
1396 if (!reload_completed || !flag_pic)
1397 return false;
1398
1399 if (GET_CODE (insn) != INSN)
1400 return false;
1401 if (asm_noperands (insn) >= 0)
1402 return false;
1403
1404 pat = PATTERN (insn);
1405 if (GET_CODE (pat) != SET)
1406 return false;
1407 pat = SET_SRC (pat);
1408
1409 if (unspec_caller_rtx_p (pat))
1410 return true;
1411
1412 return false;
1413 }
1414
1415 /* Actual number of instructions used to make a shift by N. */
1416 static const char ashiftrt_insns[] =
1417 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1418
1419 /* Left shift and logical right shift are the same. */
1420 static const char shift_insns[] =
1421 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1422
1423 /* Individual shift amounts needed to get the above length sequences.
1424 One bit right shifts clobber the T bit, so when possible, put one bit
1425 shifts in the middle of the sequence, so the ends are eligible for
1426 branch delay slots. */
1427 static const short shift_amounts[32][5] = {
1428 {0}, {1}, {2}, {2, 1},
1429 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1430 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1431 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1432 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1433 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1434 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1435 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1436
1437 /* Likewise, but for shift amounts < 16, up to three highmost bits
1438 might be clobbered. This is typically used when combined with some
1439 kind of sign or zero extension. */
1440
1441 static const char ext_shift_insns[] =
1442 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1443
1444 static const short ext_shift_amounts[32][4] = {
1445 {0}, {1}, {2}, {2, 1},
1446 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1447 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1448 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1449 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1450 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1451 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1452 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1453
1454 /* Assuming we have a value that has been sign-extended by at least one bit,
1455 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1456 to shift it by N without data loss, and quicker than by other means? */
1457 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1458
1459 /* This is used in length attributes in sh.md to help compute the length
1460 of arbitrary constant shift instructions. */
1461
1462 int
shift_insns_rtx(rtx insn)1463 shift_insns_rtx (rtx insn)
1464 {
1465 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1466 int shift_count = INTVAL (XEXP (set_src, 1));
1467 enum rtx_code shift_code = GET_CODE (set_src);
1468
1469 switch (shift_code)
1470 {
1471 case ASHIFTRT:
1472 return ashiftrt_insns[shift_count];
1473 case LSHIFTRT:
1474 case ASHIFT:
1475 return shift_insns[shift_count];
1476 default:
1477 abort();
1478 }
1479 }
1480
1481 /* Return the cost of a shift. */
1482
1483 static inline int
shiftcosts(rtx x)1484 shiftcosts (rtx x)
1485 {
1486 int value;
1487
1488 if (TARGET_SHMEDIA)
1489 return 1;
1490
1491 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1492 {
1493 if (GET_MODE (x) == DImode
1494 && GET_CODE (XEXP (x, 1)) == CONST_INT
1495 && INTVAL (XEXP (x, 1)) == 1)
1496 return 2;
1497
1498 /* Everything else is invalid, because there is no pattern for it. */
1499 return 10000;
1500 }
1501 /* If shift by a non constant, then this will be expensive. */
1502 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1503 return SH_DYNAMIC_SHIFT_COST;
1504
1505 value = INTVAL (XEXP (x, 1));
1506
1507 /* Otherwise, return the true cost in instructions. */
1508 if (GET_CODE (x) == ASHIFTRT)
1509 {
1510 int cost = ashiftrt_insns[value];
1511 /* If SH3, then we put the constant in a reg and use shad. */
1512 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1513 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1514 return cost;
1515 }
1516 else
1517 return shift_insns[value];
1518 }
1519
1520 /* Return the cost of an AND operation. */
1521
1522 static inline int
andcosts(rtx x)1523 andcosts (rtx x)
1524 {
1525 int i;
1526
1527 /* Anding with a register is a single cycle and instruction. */
1528 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1529 return 1;
1530
1531 i = INTVAL (XEXP (x, 1));
1532
1533 if (TARGET_SHMEDIA)
1534 {
1535 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1536 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1537 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1538 return 1;
1539 else
1540 return 2;
1541 }
1542
1543 /* These constants are single cycle extu.[bw] instructions. */
1544 if (i == 0xff || i == 0xffff)
1545 return 1;
1546 /* Constants that can be used in an and immediate instruction in a single
1547 cycle, but this requires r0, so make it a little more expensive. */
1548 if (CONST_OK_FOR_K08 (i))
1549 return 2;
1550 /* Constants that can be loaded with a mov immediate and an and.
1551 This case is probably unnecessary. */
1552 if (CONST_OK_FOR_I08 (i))
1553 return 2;
1554 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1555 This case is probably unnecessary. */
1556 return 3;
1557 }
1558
1559 /* Return the cost of an addition or a subtraction. */
1560
1561 static inline int
addsubcosts(rtx x)1562 addsubcosts (rtx x)
1563 {
1564 /* Adding a register is a single cycle insn. */
1565 if (GET_CODE (XEXP (x, 1)) == REG
1566 || GET_CODE (XEXP (x, 1)) == SUBREG)
1567 return 1;
1568
1569 /* Likewise for small constants. */
1570 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1571 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1572 return 1;
1573
1574 if (TARGET_SHMEDIA)
1575 switch (GET_CODE (XEXP (x, 1)))
1576 {
1577 case CONST:
1578 case LABEL_REF:
1579 case SYMBOL_REF:
1580 return TARGET_SHMEDIA64 ? 5 : 3;
1581
1582 case CONST_INT:
1583 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1584 return 2;
1585 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1586 return 3;
1587 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1588 return 4;
1589
1590 /* Fall through. */
1591 default:
1592 return 5;
1593 }
1594
1595 /* Any other constant requires a 2 cycle pc-relative load plus an
1596 addition. */
1597 return 3;
1598 }
1599
1600 /* Return the cost of a multiply. */
1601 static inline int
multcosts(rtx x ATTRIBUTE_UNUSED)1602 multcosts (rtx x ATTRIBUTE_UNUSED)
1603 {
1604 if (TARGET_SHMEDIA)
1605 return 3;
1606
1607 if (TARGET_SH2)
1608 {
1609 /* We have a mul insn, so we can never take more than the mul and the
1610 read of the mac reg, but count more because of the latency and extra
1611 reg usage. */
1612 if (TARGET_SMALLCODE)
1613 return 2;
1614 return 3;
1615 }
1616
1617 /* If we're aiming at small code, then just count the number of
1618 insns in a multiply call sequence. */
1619 if (TARGET_SMALLCODE)
1620 return 5;
1621
1622 /* Otherwise count all the insns in the routine we'd be calling too. */
1623 return 20;
1624 }
1625
1626 /* Compute a (partial) cost for rtx X. Return true if the complete
1627 cost has been computed, and false if subexpressions should be
1628 scanned. In either case, *TOTAL contains the cost result. */
1629
1630 static bool
sh_rtx_costs(rtx x,int code,int outer_code,int * total)1631 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1632 {
1633 switch (code)
1634 {
1635 case CONST_INT:
1636 if (TARGET_SHMEDIA)
1637 {
1638 if (INTVAL (x) == 0)
1639 *total = 0;
1640 else if (outer_code == AND && and_operand ((x), DImode))
1641 *total = 0;
1642 else if ((outer_code == IOR || outer_code == XOR
1643 || outer_code == PLUS)
1644 && CONST_OK_FOR_I10 (INTVAL (x)))
1645 *total = 0;
1646 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1647 *total = COSTS_N_INSNS (outer_code != SET);
1648 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1649 *total = COSTS_N_INSNS (2);
1650 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1651 *total = COSTS_N_INSNS (3);
1652 else
1653 *total = COSTS_N_INSNS (4);
1654 return true;
1655 }
1656 if (CONST_OK_FOR_I08 (INTVAL (x)))
1657 *total = 0;
1658 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1659 && CONST_OK_FOR_K08 (INTVAL (x)))
1660 *total = 1;
1661 else
1662 *total = 8;
1663 return true;
1664
1665 case CONST:
1666 case LABEL_REF:
1667 case SYMBOL_REF:
1668 if (TARGET_SHMEDIA64)
1669 *total = COSTS_N_INSNS (4);
1670 else if (TARGET_SHMEDIA32)
1671 *total = COSTS_N_INSNS (2);
1672 else
1673 *total = 5;
1674 return true;
1675
1676 case CONST_DOUBLE:
1677 if (TARGET_SHMEDIA)
1678 *total = COSTS_N_INSNS (4);
1679 else
1680 *total = 10;
1681 return true;
1682
1683 case PLUS:
1684 *total = COSTS_N_INSNS (addsubcosts (x));
1685 return true;
1686
1687 case AND:
1688 *total = COSTS_N_INSNS (andcosts (x));
1689 return true;
1690
1691 case MULT:
1692 *total = COSTS_N_INSNS (multcosts (x));
1693 return true;
1694
1695 case ASHIFT:
1696 case ASHIFTRT:
1697 case LSHIFTRT:
1698 *total = COSTS_N_INSNS (shiftcosts (x));
1699 return true;
1700
1701 case DIV:
1702 case UDIV:
1703 case MOD:
1704 case UMOD:
1705 *total = COSTS_N_INSNS (20);
1706 return true;
1707
1708 case FLOAT:
1709 case FIX:
1710 *total = 100;
1711 return true;
1712
1713 default:
1714 return false;
1715 }
1716 }
1717
1718 /* Compute the cost of an address. For the SH, all valid addresses are
1719 the same cost. Use a slightly higher cost for reg + reg addressing,
1720 since it increases pressure on r0. */
1721
1722 static int
sh_address_cost(rtx X)1723 sh_address_cost (rtx X)
1724 {
1725 return (GET_CODE (X) == PLUS
1726 && ! CONSTANT_P (XEXP (X, 1))
1727 && ! TARGET_SHMEDIA ? 1 : 0);
1728 }
1729
1730 /* Code to expand a shift. */
1731
1732 void
gen_ashift(int type,int n,rtx reg)1733 gen_ashift (int type, int n, rtx reg)
1734 {
1735 /* Negative values here come from the shift_amounts array. */
1736 if (n < 0)
1737 {
1738 if (type == ASHIFT)
1739 type = LSHIFTRT;
1740 else
1741 type = ASHIFT;
1742 n = -n;
1743 }
1744
1745 switch (type)
1746 {
1747 case ASHIFTRT:
1748 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1749 break;
1750 case LSHIFTRT:
1751 if (n == 1)
1752 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1753 else
1754 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1755 break;
1756 case ASHIFT:
1757 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1758 break;
1759 }
1760 }
1761
1762 /* Same for HImode */
1763
1764 void
gen_ashift_hi(int type,int n,rtx reg)1765 gen_ashift_hi (int type, int n, rtx reg)
1766 {
1767 /* Negative values here come from the shift_amounts array. */
1768 if (n < 0)
1769 {
1770 if (type == ASHIFT)
1771 type = LSHIFTRT;
1772 else
1773 type = ASHIFT;
1774 n = -n;
1775 }
1776
1777 switch (type)
1778 {
1779 case ASHIFTRT:
1780 case LSHIFTRT:
1781 /* We don't have HImode right shift operations because using the
1782 ordinary 32 bit shift instructions for that doesn't generate proper
1783 zero/sign extension.
1784 gen_ashift_hi is only called in contexts where we know that the
1785 sign extension works out correctly. */
1786 {
1787 int offset = 0;
1788 if (GET_CODE (reg) == SUBREG)
1789 {
1790 offset = SUBREG_BYTE (reg);
1791 reg = SUBREG_REG (reg);
1792 }
1793 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1794 break;
1795 }
1796 case ASHIFT:
1797 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1798 break;
1799 }
1800 }
1801
1802 /* Output RTL to split a constant shift into its component SH constant
1803 shift instructions. */
1804
1805 void
gen_shifty_op(int code,rtx * operands)1806 gen_shifty_op (int code, rtx *operands)
1807 {
1808 int value = INTVAL (operands[2]);
1809 int max, i;
1810
1811 /* Truncate the shift count in case it is out of bounds. */
1812 value = value & 0x1f;
1813
1814 if (value == 31)
1815 {
1816 if (code == LSHIFTRT)
1817 {
1818 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1819 emit_insn (gen_movt (operands[0]));
1820 return;
1821 }
1822 else if (code == ASHIFT)
1823 {
1824 /* There is a two instruction sequence for 31 bit left shifts,
1825 but it requires r0. */
1826 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1827 {
1828 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1829 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1830 return;
1831 }
1832 }
1833 }
1834 else if (value == 0)
1835 {
1836 /* This can happen when not optimizing. We must output something here
1837 to prevent the compiler from aborting in final.c after the try_split
1838 call. */
1839 emit_insn (gen_nop ());
1840 return;
1841 }
1842
1843 max = shift_insns[value];
1844 for (i = 0; i < max; i++)
1845 gen_ashift (code, shift_amounts[value][i], operands[0]);
1846 }
1847
1848 /* Same as above, but optimized for values where the topmost bits don't
1849 matter. */
1850
1851 void
gen_shifty_hi_op(int code,rtx * operands)1852 gen_shifty_hi_op (int code, rtx *operands)
1853 {
1854 int value = INTVAL (operands[2]);
1855 int max, i;
1856 void (*gen_fun) (int, int, rtx);
1857
1858 /* This operation is used by and_shl for SImode values with a few
1859 high bits known to be cleared. */
1860 value &= 31;
1861 if (value == 0)
1862 {
1863 emit_insn (gen_nop ());
1864 return;
1865 }
1866
1867 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1868 if (code == ASHIFT)
1869 {
1870 max = ext_shift_insns[value];
1871 for (i = 0; i < max; i++)
1872 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1873 }
1874 else
1875 /* When shifting right, emit the shifts in reverse order, so that
1876 solitary negative values come first. */
1877 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1878 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1879 }
1880
1881 /* Output RTL for an arithmetic right shift. */
1882
1883 /* ??? Rewrite to use super-optimizer sequences. */
1884
1885 int
expand_ashiftrt(rtx * operands)1886 expand_ashiftrt (rtx *operands)
1887 {
1888 rtx sym;
1889 rtx wrk;
1890 char func[18];
1891 tree func_name;
1892 int value;
1893
1894 if (TARGET_SH3)
1895 {
1896 if (GET_CODE (operands[2]) != CONST_INT)
1897 {
1898 rtx count = copy_to_mode_reg (SImode, operands[2]);
1899 emit_insn (gen_negsi2 (count, count));
1900 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1901 return 1;
1902 }
1903 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1904 > 1 + SH_DYNAMIC_SHIFT_COST)
1905 {
1906 rtx count
1907 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1908 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1909 return 1;
1910 }
1911 }
1912 if (GET_CODE (operands[2]) != CONST_INT)
1913 return 0;
1914
1915 value = INTVAL (operands[2]) & 31;
1916
1917 if (value == 31)
1918 {
1919 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1920 return 1;
1921 }
1922 else if (value >= 16 && value <= 19)
1923 {
1924 wrk = gen_reg_rtx (SImode);
1925 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1926 value -= 16;
1927 while (value--)
1928 gen_ashift (ASHIFTRT, 1, wrk);
1929 emit_move_insn (operands[0], wrk);
1930 return 1;
1931 }
1932 /* Expand a short sequence inline, longer call a magic routine. */
1933 else if (value <= 5)
1934 {
1935 wrk = gen_reg_rtx (SImode);
1936 emit_move_insn (wrk, operands[1]);
1937 while (value--)
1938 gen_ashift (ASHIFTRT, 1, wrk);
1939 emit_move_insn (operands[0], wrk);
1940 return 1;
1941 }
1942
1943 wrk = gen_reg_rtx (Pmode);
1944
1945 /* Load the value into an arg reg and call a helper. */
1946 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
1947 sprintf (func, "__ashiftrt_r4_%d", value);
1948 func_name = get_identifier (func);
1949 sym = function_symbol (IDENTIFIER_POINTER (func_name));
1950 emit_move_insn (wrk, sym);
1951 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1952 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
1953 return 1;
1954 }
1955
1956 int
sh_dynamicalize_shift_p(rtx count)1957 sh_dynamicalize_shift_p (rtx count)
1958 {
1959 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1960 }
1961
1962 /* Try to find a good way to implement the combiner pattern
1963 [(set (match_operand:SI 0 "register_operand" "r")
1964 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1965 (match_operand:SI 2 "const_int_operand" "n"))
1966 (match_operand:SI 3 "const_int_operand" "n"))) .
1967 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1968 return 0 for simple right / left or left/right shift combination.
1969 return 1 for a combination of shifts with zero_extend.
1970 return 2 for a combination of shifts with an AND that needs r0.
1971 return 3 for a combination of shifts with an AND that needs an extra
1972 scratch register, when the three highmost bits of the AND mask are clear.
1973 return 4 for a combination of shifts with an AND that needs an extra
1974 scratch register, when any of the three highmost bits of the AND mask
1975 is set.
1976 If ATTRP is set, store an initial right shift width in ATTRP[0],
1977 and the instruction length in ATTRP[1] . These values are not valid
1978 when returning 0.
1979 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1980 shift_amounts for the last shift value that is to be used before the
1981 sign extend. */
1982 int
shl_and_kind(rtx left_rtx,rtx mask_rtx,int * attrp)1983 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
1984 {
1985 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1986 int left = INTVAL (left_rtx), right;
1987 int best = 0;
1988 int cost, best_cost = 10000;
1989 int best_right = 0, best_len = 0;
1990 int i;
1991 int can_ext;
1992
1993 if (left < 0 || left > 31)
1994 return 0;
1995 if (GET_CODE (mask_rtx) == CONST_INT)
1996 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1997 else
1998 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1999 /* Can this be expressed as a right shift / left shift pair ? */
2000 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2001 right = exact_log2 (lsb);
2002 mask2 = ~(mask + lsb - 1);
2003 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2004 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2005 if (! mask2)
2006 best_cost = shift_insns[right] + shift_insns[right + left];
2007 /* mask has no trailing zeroes <==> ! right */
2008 else if (! right && mask2 == ~(lsb2 - 1))
2009 {
2010 int late_right = exact_log2 (lsb2);
2011 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2012 }
2013 /* Try to use zero extend */
2014 if (mask2 == ~(lsb2 - 1))
2015 {
2016 int width, first;
2017
2018 for (width = 8; width <= 16; width += 8)
2019 {
2020 /* Can we zero-extend right away? */
2021 if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
2022 {
2023 cost
2024 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2025 if (cost < best_cost)
2026 {
2027 best = 1;
2028 best_cost = cost;
2029 best_right = right;
2030 best_len = cost;
2031 if (attrp)
2032 attrp[2] = -1;
2033 }
2034 continue;
2035 }
2036 /* ??? Could try to put zero extend into initial right shift,
2037 or even shift a bit left before the right shift. */
2038 /* Determine value of first part of left shift, to get to the
2039 zero extend cut-off point. */
2040 first = width - exact_log2 (lsb2) + right;
2041 if (first >= 0 && right + left - first >= 0)
2042 {
2043 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2044 + ext_shift_insns[right + left - first];
2045 if (cost < best_cost)
2046 {
2047 best = 1;
2048 best_cost = cost;
2049 best_right = right;
2050 best_len = cost;
2051 if (attrp)
2052 attrp[2] = first;
2053 }
2054 }
2055 }
2056 }
2057 /* Try to use r0 AND pattern */
2058 for (i = 0; i <= 2; i++)
2059 {
2060 if (i > right)
2061 break;
2062 if (! CONST_OK_FOR_K08 (mask >> i))
2063 continue;
2064 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2065 if (cost < best_cost)
2066 {
2067 best = 2;
2068 best_cost = cost;
2069 best_right = i;
2070 best_len = cost - 1;
2071 }
2072 }
2073 /* Try to use a scratch register to hold the AND operand. */
2074 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
2075 for (i = 0; i <= 2; i++)
2076 {
2077 if (i > right)
2078 break;
2079 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2080 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2081 if (cost < best_cost)
2082 {
2083 best = 4 - can_ext;
2084 best_cost = cost;
2085 best_right = i;
2086 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2087 }
2088 }
2089
2090 if (attrp)
2091 {
2092 attrp[0] = best_right;
2093 attrp[1] = best_len;
2094 }
2095 return best;
2096 }
2097
2098 /* This is used in length attributes of the unnamed instructions
2099 corresponding to shl_and_kind return values of 1 and 2. */
2100 int
shl_and_length(rtx insn)2101 shl_and_length (rtx insn)
2102 {
2103 rtx set_src, left_rtx, mask_rtx;
2104 int attributes[3];
2105
2106 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2107 left_rtx = XEXP (XEXP (set_src, 0), 1);
2108 mask_rtx = XEXP (set_src, 1);
2109 shl_and_kind (left_rtx, mask_rtx, attributes);
2110 return attributes[1];
2111 }
2112
2113 /* This is used in length attribute of the and_shl_scratch instruction. */
2114
2115 int
shl_and_scr_length(rtx insn)2116 shl_and_scr_length (rtx insn)
2117 {
2118 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2119 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2120 rtx op = XEXP (set_src, 0);
2121 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2122 op = XEXP (XEXP (op, 0), 0);
2123 return len + shift_insns[INTVAL (XEXP (op, 1))];
2124 }
2125
2126 /* Generating rtl? */
2127 extern int rtx_equal_function_value_matters;
2128
2129 /* Generate rtl for instructions for which shl_and_kind advised a particular
2130 method of generating them, i.e. returned zero. */
2131
2132 int
gen_shl_and(rtx dest,rtx left_rtx,rtx mask_rtx,rtx source)2133 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2134 {
2135 int attributes[3];
2136 unsigned HOST_WIDE_INT mask;
2137 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2138 int right, total_shift;
2139 void (*shift_gen_fun) (int, rtx*) = gen_shifty_hi_op;
2140
2141 right = attributes[0];
2142 total_shift = INTVAL (left_rtx) + right;
2143 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2144 switch (kind)
2145 {
2146 default:
2147 return -1;
2148 case 1:
2149 {
2150 int first = attributes[2];
2151 rtx operands[3];
2152
2153 if (first < 0)
2154 {
2155 emit_insn ((mask << right) <= 0xff
2156 ? gen_zero_extendqisi2(dest,
2157 gen_lowpart (QImode, source))
2158 : gen_zero_extendhisi2(dest,
2159 gen_lowpart (HImode, source)));
2160 source = dest;
2161 }
2162 if (source != dest)
2163 emit_insn (gen_movsi (dest, source));
2164 operands[0] = dest;
2165 if (right)
2166 {
2167 operands[2] = GEN_INT (right);
2168 gen_shifty_hi_op (LSHIFTRT, operands);
2169 }
2170 if (first > 0)
2171 {
2172 operands[2] = GEN_INT (first);
2173 gen_shifty_hi_op (ASHIFT, operands);
2174 total_shift -= first;
2175 mask <<= first;
2176 }
2177 if (first >= 0)
2178 emit_insn (mask <= 0xff
2179 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
2180 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
2181 if (total_shift > 0)
2182 {
2183 operands[2] = GEN_INT (total_shift);
2184 gen_shifty_hi_op (ASHIFT, operands);
2185 }
2186 break;
2187 }
2188 case 4:
2189 shift_gen_fun = gen_shifty_op;
2190 case 3:
2191 /* If the topmost bit that matters is set, set the topmost bits
2192 that don't matter. This way, we might be able to get a shorter
2193 signed constant. */
2194 if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
2195 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
2196 case 2:
2197 /* Don't expand fine-grained when combining, because that will
2198 make the pattern fail. */
2199 if (rtx_equal_function_value_matters
2200 || reload_in_progress || reload_completed)
2201 {
2202 rtx operands[3];
2203
2204 /* Cases 3 and 4 should be handled by this split
2205 only while combining */
2206 if (kind > 2)
2207 abort ();
2208 if (right)
2209 {
2210 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2211 source = dest;
2212 }
2213 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2214 if (total_shift)
2215 {
2216 operands[0] = dest;
2217 operands[1] = dest;
2218 operands[2] = GEN_INT (total_shift);
2219 shift_gen_fun (ASHIFT, operands);
2220 }
2221 break;
2222 }
2223 else
2224 {
2225 int neg = 0;
2226 if (kind != 4 && total_shift < 16)
2227 {
2228 neg = -ext_shift_amounts[total_shift][1];
2229 if (neg > 0)
2230 neg -= ext_shift_amounts[total_shift][2];
2231 else
2232 neg = 0;
2233 }
2234 emit_insn (gen_and_shl_scratch (dest, source,
2235 GEN_INT (right),
2236 GEN_INT (mask),
2237 GEN_INT (total_shift + neg),
2238 GEN_INT (neg)));
2239 emit_insn (gen_movsi (dest, dest));
2240 break;
2241 }
2242 }
2243 return 0;
2244 }
2245
2246 /* Try to find a good way to implement the combiner pattern
2247 [(set (match_operand:SI 0 "register_operand" "=r")
2248 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2249 (match_operand:SI 2 "const_int_operand" "n")
2250 (match_operand:SI 3 "const_int_operand" "n")
2251 (const_int 0)))
2252 (clobber (reg:SI T_REG))]
2253 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2254 return 0 for simple left / right shift combination.
2255 return 1 for left shift / 8 bit sign extend / left shift.
2256 return 2 for left shift / 16 bit sign extend / left shift.
2257 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2258 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2259 return 5 for left shift / 16 bit sign extend / right shift
2260 return 6 for < 8 bit sign extend / left shift.
2261 return 7 for < 8 bit sign extend / left shift / single right shift.
2262 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2263
2264 int
shl_sext_kind(rtx left_rtx,rtx size_rtx,int * costp)2265 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2266 {
2267 int left, size, insize, ext;
2268 int cost = 0, best_cost;
2269 int kind;
2270
2271 left = INTVAL (left_rtx);
2272 size = INTVAL (size_rtx);
2273 insize = size - left;
2274 if (insize <= 0)
2275 abort ();
2276 /* Default to left / right shift. */
2277 kind = 0;
2278 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2279 if (size <= 16)
2280 {
2281 /* 16 bit shift / sign extend / 16 bit shift */
2282 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2283 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2284 below, by alternative 3 or something even better. */
2285 if (cost < best_cost)
2286 {
2287 kind = 5;
2288 best_cost = cost;
2289 }
2290 }
2291 /* Try a plain sign extend between two shifts. */
2292 for (ext = 16; ext >= insize; ext -= 8)
2293 {
2294 if (ext <= size)
2295 {
2296 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2297 if (cost < best_cost)
2298 {
2299 kind = ext / (unsigned) 8;
2300 best_cost = cost;
2301 }
2302 }
2303 /* Check if we can do a sloppy shift with a final signed shift
2304 restoring the sign. */
2305 if (EXT_SHIFT_SIGNED (size - ext))
2306 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2307 /* If not, maybe it's still cheaper to do the second shift sloppy,
2308 and do a final sign extend? */
2309 else if (size <= 16)
2310 cost = ext_shift_insns[ext - insize] + 1
2311 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2312 else
2313 continue;
2314 if (cost < best_cost)
2315 {
2316 kind = ext / (unsigned) 8 + 2;
2317 best_cost = cost;
2318 }
2319 }
2320 /* Check if we can sign extend in r0 */
2321 if (insize < 8)
2322 {
2323 cost = 3 + shift_insns[left];
2324 if (cost < best_cost)
2325 {
2326 kind = 6;
2327 best_cost = cost;
2328 }
2329 /* Try the same with a final signed shift. */
2330 if (left < 31)
2331 {
2332 cost = 3 + ext_shift_insns[left + 1] + 1;
2333 if (cost < best_cost)
2334 {
2335 kind = 7;
2336 best_cost = cost;
2337 }
2338 }
2339 }
2340 if (TARGET_SH3)
2341 {
2342 /* Try to use a dynamic shift. */
2343 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2344 if (cost < best_cost)
2345 {
2346 kind = 0;
2347 best_cost = cost;
2348 }
2349 }
2350 if (costp)
2351 *costp = cost;
2352 return kind;
2353 }
2354
2355 /* Function to be used in the length attribute of the instructions
2356 implementing this pattern. */
2357
2358 int
shl_sext_length(rtx insn)2359 shl_sext_length (rtx insn)
2360 {
2361 rtx set_src, left_rtx, size_rtx;
2362 int cost;
2363
2364 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2365 left_rtx = XEXP (XEXP (set_src, 0), 1);
2366 size_rtx = XEXP (set_src, 1);
2367 shl_sext_kind (left_rtx, size_rtx, &cost);
2368 return cost;
2369 }
2370
2371 /* Generate rtl for this pattern */
2372
2373 int
gen_shl_sext(rtx dest,rtx left_rtx,rtx size_rtx,rtx source)2374 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2375 {
2376 int kind;
2377 int left, size, insize, cost;
2378 rtx operands[3];
2379
2380 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2381 left = INTVAL (left_rtx);
2382 size = INTVAL (size_rtx);
2383 insize = size - left;
2384 switch (kind)
2385 {
2386 case 1:
2387 case 2:
2388 case 3:
2389 case 4:
2390 {
2391 int ext = kind & 1 ? 8 : 16;
2392 int shift2 = size - ext;
2393
2394 /* Don't expand fine-grained when combining, because that will
2395 make the pattern fail. */
2396 if (! rtx_equal_function_value_matters
2397 && ! reload_in_progress && ! reload_completed)
2398 {
2399 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2400 emit_insn (gen_movsi (dest, source));
2401 break;
2402 }
2403 if (dest != source)
2404 emit_insn (gen_movsi (dest, source));
2405 operands[0] = dest;
2406 if (ext - insize)
2407 {
2408 operands[2] = GEN_INT (ext - insize);
2409 gen_shifty_hi_op (ASHIFT, operands);
2410 }
2411 emit_insn (kind & 1
2412 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
2413 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
2414 if (kind <= 2)
2415 {
2416 if (shift2)
2417 {
2418 operands[2] = GEN_INT (shift2);
2419 gen_shifty_op (ASHIFT, operands);
2420 }
2421 }
2422 else
2423 {
2424 if (shift2 > 0)
2425 {
2426 if (EXT_SHIFT_SIGNED (shift2))
2427 {
2428 operands[2] = GEN_INT (shift2 + 1);
2429 gen_shifty_op (ASHIFT, operands);
2430 operands[2] = GEN_INT (1);
2431 gen_shifty_op (ASHIFTRT, operands);
2432 break;
2433 }
2434 operands[2] = GEN_INT (shift2);
2435 gen_shifty_hi_op (ASHIFT, operands);
2436 }
2437 else if (shift2)
2438 {
2439 operands[2] = GEN_INT (-shift2);
2440 gen_shifty_hi_op (LSHIFTRT, operands);
2441 }
2442 emit_insn (size <= 8
2443 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2444 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2445 }
2446 break;
2447 }
2448 case 5:
2449 {
2450 int i = 16 - size;
2451 if (! rtx_equal_function_value_matters
2452 && ! reload_in_progress && ! reload_completed)
2453 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2454 else
2455 {
2456 operands[0] = dest;
2457 operands[2] = GEN_INT (16 - insize);
2458 gen_shifty_hi_op (ASHIFT, operands);
2459 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2460 }
2461 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2462 while (--i >= 0)
2463 gen_ashift (ASHIFTRT, 1, dest);
2464 break;
2465 }
2466 case 6:
2467 case 7:
2468 /* Don't expand fine-grained when combining, because that will
2469 make the pattern fail. */
2470 if (! rtx_equal_function_value_matters
2471 && ! reload_in_progress && ! reload_completed)
2472 {
2473 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2474 emit_insn (gen_movsi (dest, source));
2475 break;
2476 }
2477 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2478 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2479 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2480 operands[0] = dest;
2481 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2482 gen_shifty_op (ASHIFT, operands);
2483 if (kind == 7)
2484 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
2485 break;
2486 default:
2487 return -1;
2488 }
2489 return 0;
2490 }
2491
2492 /* Prefix a symbol_ref name with "datalabel". */
2493
2494 rtx
gen_datalabel_ref(rtx sym)2495 gen_datalabel_ref (rtx sym)
2496 {
2497 if (GET_CODE (sym) == LABEL_REF)
2498 return gen_rtx_CONST (GET_MODE (sym),
2499 gen_rtx_UNSPEC (GET_MODE (sym),
2500 gen_rtvec (1, sym),
2501 UNSPEC_DATALABEL));
2502
2503 if (GET_CODE (sym) != SYMBOL_REF)
2504 abort ();
2505
2506 return sym;
2507 }
2508
2509
2510 /* The SH cannot load a large constant into a register, constants have to
2511 come from a pc relative load. The reference of a pc relative load
2512 instruction must be less than 1k infront of the instruction. This
2513 means that we often have to dump a constant inside a function, and
2514 generate code to branch around it.
2515
2516 It is important to minimize this, since the branches will slow things
2517 down and make things bigger.
2518
2519 Worst case code looks like:
2520
2521 mov.l L1,rn
2522 bra L2
2523 nop
2524 align
2525 L1: .long value
2526 L2:
2527 ..
2528
2529 mov.l L3,rn
2530 bra L4
2531 nop
2532 align
2533 L3: .long value
2534 L4:
2535 ..
2536
2537 We fix this by performing a scan before scheduling, which notices which
2538 instructions need to have their operands fetched from the constant table
2539 and builds the table.
2540
2541 The algorithm is:
2542
2543 scan, find an instruction which needs a pcrel move. Look forward, find the
2544 last barrier which is within MAX_COUNT bytes of the requirement.
2545 If there isn't one, make one. Process all the instructions between
2546 the find and the barrier.
2547
2548 In the above example, we can tell that L3 is within 1k of L1, so
2549 the first move can be shrunk from the 3 insn+constant sequence into
2550 just 1 insn, and the constant moved to L3 to make:
2551
2552 mov.l L1,rn
2553 ..
2554 mov.l L3,rn
2555 bra L4
2556 nop
2557 align
2558 L3:.long value
2559 L4:.long value
2560
2561 Then the second move becomes the target for the shortening process. */
2562
2563 typedef struct
2564 {
2565 rtx value; /* Value in table. */
2566 rtx label; /* Label of value. */
2567 rtx wend; /* End of window. */
2568 enum machine_mode mode; /* Mode of value. */
2569
2570 /* True if this constant is accessed as part of a post-increment
2571 sequence. Note that HImode constants are never accessed in this way. */
2572 bool part_of_sequence_p;
2573 } pool_node;
2574
2575 /* The maximum number of constants that can fit into one pool, since
2576 the pc relative range is 0...1020 bytes and constants are at least 4
2577 bytes long. */
2578
2579 #define MAX_POOL_SIZE (1020/4)
2580 static pool_node pool_vector[MAX_POOL_SIZE];
2581 static int pool_size;
2582 static rtx pool_window_label;
2583 static int pool_window_last;
2584
2585 /* ??? If we need a constant in HImode which is the truncated value of a
2586 constant we need in SImode, we could combine the two entries thus saving
2587 two bytes. Is this common enough to be worth the effort of implementing
2588 it? */
2589
2590 /* ??? This stuff should be done at the same time that we shorten branches.
2591 As it is now, we must assume that all branches are the maximum size, and
2592 this causes us to almost always output constant pools sooner than
2593 necessary. */
2594
2595 /* Add a constant to the pool and return its label. */
2596
2597 static rtx
add_constant(rtx x,enum machine_mode mode,rtx last_value)2598 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2599 {
2600 int i;
2601 rtx lab, new, ref, newref;
2602
2603 /* First see if we've already got it. */
2604 for (i = 0; i < pool_size; i++)
2605 {
2606 if (x->code == pool_vector[i].value->code
2607 && mode == pool_vector[i].mode)
2608 {
2609 if (x->code == CODE_LABEL)
2610 {
2611 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2612 continue;
2613 }
2614 if (rtx_equal_p (x, pool_vector[i].value))
2615 {
2616 lab = new = 0;
2617 if (! last_value
2618 || ! i
2619 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2620 {
2621 new = gen_label_rtx ();
2622 LABEL_REFS (new) = pool_vector[i].label;
2623 pool_vector[i].label = lab = new;
2624 }
2625 if (lab && pool_window_label)
2626 {
2627 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2628 ref = pool_vector[pool_window_last].wend;
2629 LABEL_NEXTREF (newref) = ref;
2630 pool_vector[pool_window_last].wend = newref;
2631 }
2632 if (new)
2633 pool_window_label = new;
2634 pool_window_last = i;
2635 return lab;
2636 }
2637 }
2638 }
2639
2640 /* Need a new one. */
2641 pool_vector[pool_size].value = x;
2642 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2643 {
2644 lab = 0;
2645 pool_vector[pool_size - 1].part_of_sequence_p = true;
2646 }
2647 else
2648 lab = gen_label_rtx ();
2649 pool_vector[pool_size].mode = mode;
2650 pool_vector[pool_size].label = lab;
2651 pool_vector[pool_size].wend = NULL_RTX;
2652 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2653 if (lab && pool_window_label)
2654 {
2655 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2656 ref = pool_vector[pool_window_last].wend;
2657 LABEL_NEXTREF (newref) = ref;
2658 pool_vector[pool_window_last].wend = newref;
2659 }
2660 if (lab)
2661 pool_window_label = lab;
2662 pool_window_last = pool_size;
2663 pool_size++;
2664 return lab;
2665 }
2666
2667 /* Output the literal table. START, if nonzero, is the first instruction
2668 this table is needed for, and also indicates that there is at least one
2669 casesi_worker_2 instruction; We have to emit the operand3 labels from
2670 these insns at a 4-byte aligned position. BARRIER is the barrier
2671 after which we are to place the table. */
2672
2673 static void
dump_table(rtx start,rtx barrier)2674 dump_table (rtx start, rtx barrier)
2675 {
2676 rtx scan = barrier;
2677 int i;
2678 int need_align = 1;
2679 rtx lab, ref;
2680 int have_df = 0;
2681
2682 /* Do two passes, first time dump out the HI sized constants. */
2683
2684 for (i = 0; i < pool_size; i++)
2685 {
2686 pool_node *p = &pool_vector[i];
2687
2688 if (p->mode == HImode)
2689 {
2690 if (need_align)
2691 {
2692 scan = emit_insn_after (gen_align_2 (), scan);
2693 need_align = 0;
2694 }
2695 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2696 scan = emit_label_after (lab, scan);
2697 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2698 scan);
2699 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2700 {
2701 lab = XEXP (ref, 0);
2702 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2703 }
2704 }
2705 else if (p->mode == DFmode)
2706 have_df = 1;
2707 }
2708
2709 need_align = 1;
2710
2711 if (start)
2712 {
2713 scan = emit_insn_after (gen_align_4 (), scan);
2714 need_align = 0;
2715 for (; start != barrier; start = NEXT_INSN (start))
2716 if (GET_CODE (start) == INSN
2717 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2718 {
2719 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2720 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2721
2722 scan = emit_label_after (lab, scan);
2723 }
2724 }
2725 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2726 {
2727 rtx align_insn = NULL_RTX;
2728
2729 scan = emit_label_after (gen_label_rtx (), scan);
2730 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2731 need_align = 0;
2732
2733 for (i = 0; i < pool_size; i++)
2734 {
2735 pool_node *p = &pool_vector[i];
2736
2737 switch (p->mode)
2738 {
2739 case HImode:
2740 break;
2741 case SImode:
2742 case SFmode:
2743 if (align_insn && !p->part_of_sequence_p)
2744 {
2745 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2746 emit_label_before (lab, align_insn);
2747 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2748 align_insn);
2749 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2750 {
2751 lab = XEXP (ref, 0);
2752 emit_insn_before (gen_consttable_window_end (lab),
2753 align_insn);
2754 }
2755 delete_insn (align_insn);
2756 align_insn = NULL_RTX;
2757 continue;
2758 }
2759 else
2760 {
2761 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2762 scan = emit_label_after (lab, scan);
2763 scan = emit_insn_after (gen_consttable_4 (p->value,
2764 const0_rtx), scan);
2765 need_align = ! need_align;
2766 }
2767 break;
2768 case DFmode:
2769 if (need_align)
2770 {
2771 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2772 align_insn = scan;
2773 need_align = 0;
2774 }
2775 case DImode:
2776 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2777 scan = emit_label_after (lab, scan);
2778 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2779 scan);
2780 break;
2781 default:
2782 abort ();
2783 break;
2784 }
2785
2786 if (p->mode != HImode)
2787 {
2788 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2789 {
2790 lab = XEXP (ref, 0);
2791 scan = emit_insn_after (gen_consttable_window_end (lab),
2792 scan);
2793 }
2794 }
2795 }
2796
2797 pool_size = 0;
2798 }
2799
2800 for (i = 0; i < pool_size; i++)
2801 {
2802 pool_node *p = &pool_vector[i];
2803
2804 switch (p->mode)
2805 {
2806 case HImode:
2807 break;
2808 case SImode:
2809 case SFmode:
2810 if (need_align)
2811 {
2812 need_align = 0;
2813 scan = emit_label_after (gen_label_rtx (), scan);
2814 scan = emit_insn_after (gen_align_4 (), scan);
2815 }
2816 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2817 scan = emit_label_after (lab, scan);
2818 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2819 scan);
2820 break;
2821 case DFmode:
2822 case DImode:
2823 if (need_align)
2824 {
2825 need_align = 0;
2826 scan = emit_label_after (gen_label_rtx (), scan);
2827 scan = emit_insn_after (gen_align_4 (), scan);
2828 }
2829 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2830 scan = emit_label_after (lab, scan);
2831 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2832 scan);
2833 break;
2834 default:
2835 abort ();
2836 break;
2837 }
2838
2839 if (p->mode != HImode)
2840 {
2841 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2842 {
2843 lab = XEXP (ref, 0);
2844 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2845 }
2846 }
2847 }
2848
2849 scan = emit_insn_after (gen_consttable_end (), scan);
2850 scan = emit_barrier_after (scan);
2851 pool_size = 0;
2852 pool_window_label = NULL_RTX;
2853 pool_window_last = 0;
2854 }
2855
2856 /* Return nonzero if constant would be an ok source for a
2857 mov.w instead of a mov.l. */
2858
2859 static int
hi_const(rtx src)2860 hi_const (rtx src)
2861 {
2862 return (GET_CODE (src) == CONST_INT
2863 && INTVAL (src) >= -32768
2864 && INTVAL (src) <= 32767);
2865 }
2866
2867 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2868
2869 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2870 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
2871 need to fix it if the input value is CONST_OK_FOR_I08. */
2872
2873 static int
broken_move(rtx insn)2874 broken_move (rtx insn)
2875 {
2876 if (GET_CODE (insn) == INSN)
2877 {
2878 rtx pat = PATTERN (insn);
2879 if (GET_CODE (pat) == PARALLEL)
2880 pat = XVECEXP (pat, 0, 0);
2881 if (GET_CODE (pat) == SET
2882 /* We can load any 8 bit value if we don't care what the high
2883 order bits end up as. */
2884 && GET_MODE (SET_DEST (pat)) != QImode
2885 && (CONSTANT_P (SET_SRC (pat))
2886 /* Match mova_const. */
2887 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2888 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2889 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2890 && ! (TARGET_SH2E
2891 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2892 && (fp_zero_operand (SET_SRC (pat))
2893 || fp_one_operand (SET_SRC (pat)))
2894 /* ??? If this is a -m4 or -m4-single compilation, in general
2895 we don't know the current setting of fpscr, so disable fldi.
2896 There is an exception if this was a register-register move
2897 before reload - and hence it was ascertained that we have
2898 single precision setting - and in a post-reload optimization
2899 we changed this to do a constant load. In that case
2900 we don't have an r0 clobber, hence we must use fldi. */
2901 && (! TARGET_SH4 || TARGET_FMOVD
2902 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2903 == SCRATCH))
2904 && GET_CODE (SET_DEST (pat)) == REG
2905 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2906 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2907 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
2908 return 1;
2909 }
2910
2911 return 0;
2912 }
2913
2914 static int
mova_p(rtx insn)2915 mova_p (rtx insn)
2916 {
2917 return (GET_CODE (insn) == INSN
2918 && GET_CODE (PATTERN (insn)) == SET
2919 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2920 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
2921 /* Don't match mova_const. */
2922 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
2923 }
2924
2925 /* Fix up a mova from a switch that went out of range. */
2926 static void
fixup_mova(rtx mova)2927 fixup_mova (rtx mova)
2928 {
2929 if (! flag_pic)
2930 {
2931 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
2932 INSN_CODE (mova) = -1;
2933 }
2934 else
2935 {
2936 rtx worker = mova;
2937 rtx lab = gen_label_rtx ();
2938 rtx wpat, wpat0, wpat1, wsrc, diff;
2939
2940 do
2941 {
2942 worker = NEXT_INSN (worker);
2943 if (! worker
2944 || GET_CODE (worker) == CODE_LABEL
2945 || GET_CODE (worker) == JUMP_INSN)
2946 abort ();
2947 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
2948 wpat = PATTERN (worker);
2949 wpat0 = XVECEXP (wpat, 0, 0);
2950 wpat1 = XVECEXP (wpat, 0, 1);
2951 wsrc = SET_SRC (wpat0);
2952 PATTERN (worker) = (gen_casesi_worker_2
2953 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
2954 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
2955 XEXP (wpat1, 0)));
2956 INSN_CODE (worker) = -1;
2957 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
2958 gen_rtx_LABEL_REF (Pmode, lab));
2959 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
2960 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
2961 INSN_CODE (mova) = -1;
2962 }
2963 }
2964
2965 /* Find the last barrier from insn FROM which is close enough to hold the
2966 constant pool. If we can't find one, then create one near the end of
2967 the range. */
2968
2969 static rtx
find_barrier(int num_mova,rtx mova,rtx from)2970 find_barrier (int num_mova, rtx mova, rtx from)
2971 {
2972 int count_si = 0;
2973 int count_hi = 0;
2974 int found_hi = 0;
2975 int found_si = 0;
2976 int found_di = 0;
2977 int hi_align = 2;
2978 int si_align = 2;
2979 int leading_mova = num_mova;
2980 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
2981 int si_limit;
2982 int hi_limit;
2983
2984 /* For HImode: range is 510, add 4 because pc counts from address of
2985 second instruction after this one, subtract 2 for the jump instruction
2986 that we may need to emit before the table, subtract 2 for the instruction
2987 that fills the jump delay slot (in very rare cases, reorg will take an
2988 instruction from after the constant pool or will leave the delay slot
2989 empty). This gives 510.
2990 For SImode: range is 1020, add 4 because pc counts from address of
2991 second instruction after this one, subtract 2 in case pc is 2 byte
2992 aligned, subtract 2 for the jump instruction that we may need to emit
2993 before the table, subtract 2 for the instruction that fills the jump
2994 delay slot. This gives 1018. */
2995
2996 /* The branch will always be shortened now that the reference address for
2997 forward branches is the successor address, thus we need no longer make
2998 adjustments to the [sh]i_limit for -O0. */
2999
3000 si_limit = 1018;
3001 hi_limit = 510;
3002
3003 while (from && count_si < si_limit && count_hi < hi_limit)
3004 {
3005 int inc = get_attr_length (from);
3006 int new_align = 1;
3007
3008 if (GET_CODE (from) == CODE_LABEL)
3009 {
3010 if (optimize)
3011 new_align = 1 << label_to_alignment (from);
3012 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3013 new_align = 1 << barrier_align (from);
3014 else
3015 new_align = 1;
3016 inc = 0;
3017 }
3018
3019 if (GET_CODE (from) == BARRIER)
3020 {
3021
3022 found_barrier = from;
3023
3024 /* If we are at the end of the function, or in front of an alignment
3025 instruction, we need not insert an extra alignment. We prefer
3026 this kind of barrier. */
3027 if (barrier_align (from) > 2)
3028 good_barrier = from;
3029 }
3030
3031 if (broken_move (from))
3032 {
3033 rtx pat, src, dst;
3034 enum machine_mode mode;
3035
3036 pat = PATTERN (from);
3037 if (GET_CODE (pat) == PARALLEL)
3038 pat = XVECEXP (pat, 0, 0);
3039 src = SET_SRC (pat);
3040 dst = SET_DEST (pat);
3041 mode = GET_MODE (dst);
3042
3043 /* We must explicitly check the mode, because sometimes the
3044 front end will generate code to load unsigned constants into
3045 HImode targets without properly sign extending them. */
3046 if (mode == HImode
3047 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3048 {
3049 found_hi += 2;
3050 /* We put the short constants before the long constants, so
3051 we must count the length of short constants in the range
3052 for the long constants. */
3053 /* ??? This isn't optimal, but is easy to do. */
3054 si_limit -= 2;
3055 }
3056 else
3057 {
3058 /* We dump DF/DI constants before SF/SI ones, because
3059 the limit is the same, but the alignment requirements
3060 are higher. We may waste up to 4 additional bytes
3061 for alignment, and the DF/DI constant may have
3062 another SF/SI constant placed before it. */
3063 if (TARGET_SHCOMPACT
3064 && ! found_di
3065 && (mode == DFmode || mode == DImode))
3066 {
3067 found_di = 1;
3068 si_limit -= 8;
3069 }
3070 while (si_align > 2 && found_si + si_align - 2 > count_si)
3071 si_align >>= 1;
3072 if (found_si > count_si)
3073 count_si = found_si;
3074 found_si += GET_MODE_SIZE (mode);
3075 if (num_mova)
3076 si_limit -= GET_MODE_SIZE (mode);
3077 }
3078
3079 /* See the code in machine_dependent_reorg, which has a similar if
3080 statement that generates a new mova insn in many cases. */
3081 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3082 inc += 2;
3083 }
3084
3085 if (mova_p (from))
3086 {
3087 if (! num_mova++)
3088 {
3089 leading_mova = 0;
3090 mova = from;
3091 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3092 }
3093 if (found_si > count_si)
3094 count_si = found_si;
3095 }
3096 else if (GET_CODE (from) == JUMP_INSN
3097 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3098 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3099 {
3100 if (num_mova)
3101 num_mova--;
3102 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3103 {
3104 /* We have just passed the barrier in front of the
3105 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3106 the ADDR_DIFF_VEC is accessed as data, just like our pool
3107 constants, this is a good opportunity to accommodate what
3108 we have gathered so far.
3109 If we waited any longer, we could end up at a barrier in
3110 front of code, which gives worse cache usage for separated
3111 instruction / data caches. */
3112 good_barrier = found_barrier;
3113 break;
3114 }
3115 else
3116 {
3117 rtx body = PATTERN (from);
3118 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3119 }
3120 }
3121 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3122 else if (GET_CODE (from) == JUMP_INSN
3123 && ! TARGET_SH2
3124 && ! TARGET_SMALLCODE)
3125 new_align = 4;
3126
3127 if (found_si)
3128 {
3129 count_si += inc;
3130 if (new_align > si_align)
3131 {
3132 si_limit -= (count_si - 1) & (new_align - si_align);
3133 si_align = new_align;
3134 }
3135 count_si = (count_si + new_align - 1) & -new_align;
3136 }
3137 if (found_hi)
3138 {
3139 count_hi += inc;
3140 if (new_align > hi_align)
3141 {
3142 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3143 hi_align = new_align;
3144 }
3145 count_hi = (count_hi + new_align - 1) & -new_align;
3146 }
3147 from = NEXT_INSN (from);
3148 }
3149
3150 if (num_mova)
3151 {
3152 if (leading_mova)
3153 {
3154 /* Try as we might, the leading mova is out of range. Change
3155 it into a load (which will become a pcload) and retry. */
3156 fixup_mova (mova);
3157 return find_barrier (0, 0, mova);
3158 }
3159 else
3160 {
3161 /* Insert the constant pool table before the mova instruction,
3162 to prevent the mova label reference from going out of range. */
3163 from = mova;
3164 good_barrier = found_barrier = barrier_before_mova;
3165 }
3166 }
3167
3168 if (found_barrier)
3169 {
3170 if (good_barrier && next_real_insn (found_barrier))
3171 found_barrier = good_barrier;
3172 }
3173 else
3174 {
3175 /* We didn't find a barrier in time to dump our stuff,
3176 so we'll make one. */
3177 rtx label = gen_label_rtx ();
3178
3179 /* If we exceeded the range, then we must back up over the last
3180 instruction we looked at. Otherwise, we just need to undo the
3181 NEXT_INSN at the end of the loop. */
3182 if (count_hi > hi_limit || count_si > si_limit)
3183 from = PREV_INSN (PREV_INSN (from));
3184 else
3185 from = PREV_INSN (from);
3186
3187 /* Walk back to be just before any jump or label.
3188 Putting it before a label reduces the number of times the branch
3189 around the constant pool table will be hit. Putting it before
3190 a jump makes it more likely that the bra delay slot will be
3191 filled. */
3192 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3193 || GET_CODE (from) == CODE_LABEL)
3194 from = PREV_INSN (from);
3195
3196 from = emit_jump_insn_after (gen_jump (label), from);
3197 JUMP_LABEL (from) = label;
3198 LABEL_NUSES (label) = 1;
3199 found_barrier = emit_barrier_after (from);
3200 emit_label_after (label, found_barrier);
3201 }
3202
3203 return found_barrier;
3204 }
3205
3206 /* If the instruction INSN is implemented by a special function, and we can
3207 positively find the register that is used to call the sfunc, and this
3208 register is not used anywhere else in this instruction - except as the
3209 destination of a set, return this register; else, return 0. */
3210 rtx
sfunc_uses_reg(rtx insn)3211 sfunc_uses_reg (rtx insn)
3212 {
3213 int i;
3214 rtx pattern, part, reg_part, reg;
3215
3216 if (GET_CODE (insn) != INSN)
3217 return 0;
3218 pattern = PATTERN (insn);
3219 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3220 return 0;
3221
3222 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3223 {
3224 part = XVECEXP (pattern, 0, i);
3225 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3226 reg_part = part;
3227 }
3228 if (! reg_part)
3229 return 0;
3230 reg = XEXP (reg_part, 0);
3231 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3232 {
3233 part = XVECEXP (pattern, 0, i);
3234 if (part == reg_part || GET_CODE (part) == CLOBBER)
3235 continue;
3236 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3237 && GET_CODE (SET_DEST (part)) == REG)
3238 ? SET_SRC (part) : part)))
3239 return 0;
3240 }
3241 return reg;
3242 }
3243
3244 /* See if the only way in which INSN uses REG is by calling it, or by
3245 setting it while calling it. Set *SET to a SET rtx if the register
3246 is set by INSN. */
3247
3248 static int
noncall_uses_reg(rtx reg,rtx insn,rtx * set)3249 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3250 {
3251 rtx pattern, reg2;
3252
3253 *set = NULL_RTX;
3254
3255 reg2 = sfunc_uses_reg (insn);
3256 if (reg2 && REGNO (reg2) == REGNO (reg))
3257 {
3258 pattern = single_set (insn);
3259 if (pattern
3260 && GET_CODE (SET_DEST (pattern)) == REG
3261 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3262 *set = pattern;
3263 return 0;
3264 }
3265 if (GET_CODE (insn) != CALL_INSN)
3266 {
3267 /* We don't use rtx_equal_p because we don't care if the mode is
3268 different. */
3269 pattern = single_set (insn);
3270 if (pattern
3271 && GET_CODE (SET_DEST (pattern)) == REG
3272 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3273 {
3274 rtx par, part;
3275 int i;
3276
3277 *set = pattern;
3278 par = PATTERN (insn);
3279 if (GET_CODE (par) == PARALLEL)
3280 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3281 {
3282 part = XVECEXP (par, 0, i);
3283 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3284 return 1;
3285 }
3286 return reg_mentioned_p (reg, SET_SRC (pattern));
3287 }
3288
3289 return 1;
3290 }
3291
3292 pattern = PATTERN (insn);
3293
3294 if (GET_CODE (pattern) == PARALLEL)
3295 {
3296 int i;
3297
3298 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3299 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3300 return 1;
3301 pattern = XVECEXP (pattern, 0, 0);
3302 }
3303
3304 if (GET_CODE (pattern) == SET)
3305 {
3306 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3307 {
3308 /* We don't use rtx_equal_p, because we don't care if the
3309 mode is different. */
3310 if (GET_CODE (SET_DEST (pattern)) != REG
3311 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3312 return 1;
3313
3314 *set = pattern;
3315 }
3316
3317 pattern = SET_SRC (pattern);
3318 }
3319
3320 if (GET_CODE (pattern) != CALL
3321 || GET_CODE (XEXP (pattern, 0)) != MEM
3322 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3323 return 1;
3324
3325 return 0;
3326 }
3327
3328 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3329 general registers. Bits 0..15 mean that the respective registers
3330 are used as inputs in the instruction. Bits 16..31 mean that the
3331 registers 0..15, respectively, are used as outputs, or are clobbered.
3332 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3333 int
regs_used(rtx x,int is_dest)3334 regs_used (rtx x, int is_dest)
3335 {
3336 enum rtx_code code;
3337 const char *fmt;
3338 int i, used = 0;
3339
3340 if (! x)
3341 return used;
3342 code = GET_CODE (x);
3343 switch (code)
3344 {
3345 case REG:
3346 if (REGNO (x) < 16)
3347 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3348 << (REGNO (x) + is_dest));
3349 return 0;
3350 case SUBREG:
3351 {
3352 rtx y = SUBREG_REG (x);
3353
3354 if (GET_CODE (y) != REG)
3355 break;
3356 if (REGNO (y) < 16)
3357 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3358 << (REGNO (y) +
3359 subreg_regno_offset (REGNO (y),
3360 GET_MODE (y),
3361 SUBREG_BYTE (x),
3362 GET_MODE (x)) + is_dest));
3363 return 0;
3364 }
3365 case SET:
3366 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3367 case RETURN:
3368 /* If there was a return value, it must have been indicated with USE. */
3369 return 0x00ffff00;
3370 case CLOBBER:
3371 is_dest = 1;
3372 break;
3373 case MEM:
3374 is_dest = 0;
3375 break;
3376 case CALL:
3377 used |= 0x00ff00f0;
3378 break;
3379 default:
3380 break;
3381 }
3382
3383 fmt = GET_RTX_FORMAT (code);
3384
3385 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3386 {
3387 if (fmt[i] == 'E')
3388 {
3389 register int j;
3390 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3391 used |= regs_used (XVECEXP (x, i, j), is_dest);
3392 }
3393 else if (fmt[i] == 'e')
3394 used |= regs_used (XEXP (x, i), is_dest);
3395 }
3396 return used;
3397 }
3398
3399 /* Create an instruction that prevents redirection of a conditional branch
3400 to the destination of the JUMP with address ADDR.
3401 If the branch needs to be implemented as an indirect jump, try to find
3402 a scratch register for it.
3403 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3404 If any preceding insn that doesn't fit into a delay slot is good enough,
3405 pass 1. Pass 2 if a definite blocking insn is needed.
3406 -1 is used internally to avoid deep recursion.
3407 If a blocking instruction is made or recognized, return it. */
3408
3409 static rtx
gen_block_redirect(rtx jump,int addr,int need_block)3410 gen_block_redirect (rtx jump, int addr, int need_block)
3411 {
3412 int dead = 0;
3413 rtx prev = prev_nonnote_insn (jump);
3414 rtx dest;
3415
3416 /* First, check if we already have an instruction that satisfies our need. */
3417 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3418 {
3419 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3420 return prev;
3421 if (GET_CODE (PATTERN (prev)) == USE
3422 || GET_CODE (PATTERN (prev)) == CLOBBER
3423 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3424 prev = jump;
3425 else if ((need_block &= ~1) < 0)
3426 return prev;
3427 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3428 need_block = 0;
3429 }
3430 if (GET_CODE (PATTERN (jump)) == RETURN)
3431 {
3432 if (! need_block)
3433 return prev;
3434 /* Reorg even does nasty things with return insns that cause branches
3435 to go out of range - see find_end_label and callers. */
3436 return emit_insn_before (gen_block_branch_redirect (GEN_INT (0)) , jump);
3437 }
3438 /* We can't use JUMP_LABEL here because it might be undefined
3439 when not optimizing. */
3440 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3441 /* If the branch is out of range, try to find a scratch register for it. */
3442 if (optimize
3443 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3444 > 4092 + 4098))
3445 {
3446 rtx scan;
3447 /* Don't look for the stack pointer as a scratch register,
3448 it would cause trouble if an interrupt occurred. */
3449 unsigned try = 0x7fff, used;
3450 int jump_left = flag_expensive_optimizations + 1;
3451
3452 /* It is likely that the most recent eligible instruction is wanted for
3453 the delay slot. Therefore, find out which registers it uses, and
3454 try to avoid using them. */
3455
3456 for (scan = jump; (scan = PREV_INSN (scan)); )
3457 {
3458 enum rtx_code code;
3459
3460 if (INSN_DELETED_P (scan))
3461 continue;
3462 code = GET_CODE (scan);
3463 if (code == CODE_LABEL || code == JUMP_INSN)
3464 break;
3465 if (code == INSN
3466 && GET_CODE (PATTERN (scan)) != USE
3467 && GET_CODE (PATTERN (scan)) != CLOBBER
3468 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3469 {
3470 try &= ~regs_used (PATTERN (scan), 0);
3471 break;
3472 }
3473 }
3474 for (used = dead = 0, scan = JUMP_LABEL (jump);
3475 (scan = NEXT_INSN (scan)); )
3476 {
3477 enum rtx_code code;
3478
3479 if (INSN_DELETED_P (scan))
3480 continue;
3481 code = GET_CODE (scan);
3482 if (GET_RTX_CLASS (code) == 'i')
3483 {
3484 used |= regs_used (PATTERN (scan), 0);
3485 if (code == CALL_INSN)
3486 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3487 dead |= (used >> 16) & ~used;
3488 if (dead & try)
3489 {
3490 dead &= try;
3491 break;
3492 }
3493 if (code == JUMP_INSN)
3494 {
3495 if (jump_left-- && simplejump_p (scan))
3496 scan = JUMP_LABEL (scan);
3497 else
3498 break;
3499 }
3500 }
3501 }
3502 /* Mask out the stack pointer again, in case it was
3503 the only 'free' register we have found. */
3504 dead &= 0x7fff;
3505 }
3506 /* If the immediate destination is still in range, check for possible
3507 threading with a jump beyond the delay slot insn.
3508 Don't check if we are called recursively; the jump has been or will be
3509 checked in a different invocation then. */
3510
3511 else if (optimize && need_block >= 0)
3512 {
3513 rtx next = next_active_insn (next_active_insn (dest));
3514 if (next && GET_CODE (next) == JUMP_INSN
3515 && GET_CODE (PATTERN (next)) == SET
3516 && recog_memoized (next) == CODE_FOR_jump_compact)
3517 {
3518 dest = JUMP_LABEL (next);
3519 if (dest
3520 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3521 > 4092 + 4098))
3522 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3523 }
3524 }
3525
3526 if (dead)
3527 {
3528 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3529
3530 /* It would be nice if we could convert the jump into an indirect
3531 jump / far branch right now, and thus exposing all constituent
3532 instructions to further optimization. However, reorg uses
3533 simplejump_p to determine if there is an unconditional jump where
3534 it should try to schedule instructions from the target of the
3535 branch; simplejump_p fails for indirect jumps even if they have
3536 a JUMP_LABEL. */
3537 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3538 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3539 , jump);
3540 /* ??? We would like this to have the scope of the jump, but that
3541 scope will change when a delay slot insn of an inner scope is added.
3542 Hence, after delay slot scheduling, we'll have to expect
3543 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3544 the jump. */
3545
3546 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3547 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3548 return insn;
3549 }
3550 else if (need_block)
3551 /* We can't use JUMP_LABEL here because it might be undefined
3552 when not optimizing. */
3553 return emit_insn_before (gen_block_branch_redirect
3554 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3555 , jump);
3556 return prev;
3557 }
3558
3559 #define CONDJUMP_MIN -252
3560 #define CONDJUMP_MAX 262
3561 struct far_branch
3562 {
3563 /* A label (to be placed) in front of the jump
3564 that jumps to our ultimate destination. */
3565 rtx near_label;
3566 /* Where we are going to insert it if we cannot move the jump any farther,
3567 or the jump itself if we have picked up an existing jump. */
3568 rtx insert_place;
3569 /* The ultimate destination. */
3570 rtx far_label;
3571 struct far_branch *prev;
3572 /* If the branch has already been created, its address;
3573 else the address of its first prospective user. */
3574 int address;
3575 };
3576
3577 static void gen_far_branch (struct far_branch *);
3578 enum mdep_reorg_phase_e mdep_reorg_phase;
3579 static void
gen_far_branch(struct far_branch * bp)3580 gen_far_branch (struct far_branch *bp)
3581 {
3582 rtx insn = bp->insert_place;
3583 rtx jump;
3584 rtx label = gen_label_rtx ();
3585
3586 emit_label_after (label, insn);
3587 if (bp->far_label)
3588 {
3589 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3590 LABEL_NUSES (bp->far_label)++;
3591 }
3592 else
3593 jump = emit_jump_insn_after (gen_return (), insn);
3594 /* Emit a barrier so that reorg knows that any following instructions
3595 are not reachable via a fall-through path.
3596 But don't do this when not optimizing, since we wouldn't suppress the
3597 alignment for the barrier then, and could end up with out-of-range
3598 pc-relative loads. */
3599 if (optimize)
3600 emit_barrier_after (jump);
3601 emit_label_after (bp->near_label, insn);
3602 JUMP_LABEL (jump) = bp->far_label;
3603 if (! invert_jump (insn, label, 1))
3604 abort ();
3605 /* If we are branching around a jump (rather than a return), prevent
3606 reorg from using an insn from the jump target as the delay slot insn -
3607 when reorg did this, it pessimized code (we rather hide the delay slot)
3608 and it could cause branches to go out of range. */
3609 if (bp->far_label)
3610 (emit_insn_after
3611 (gen_stuff_delay_slot
3612 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3613 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3614 insn));
3615 /* Prevent reorg from undoing our splits. */
3616 gen_block_redirect (jump, bp->address += 2, 2);
3617 }
3618
3619 /* Fix up ADDR_DIFF_VECs. */
3620 void
fixup_addr_diff_vecs(rtx first)3621 fixup_addr_diff_vecs (rtx first)
3622 {
3623 rtx insn;
3624
3625 for (insn = first; insn; insn = NEXT_INSN (insn))
3626 {
3627 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3628
3629 if (GET_CODE (insn) != JUMP_INSN
3630 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3631 continue;
3632 pat = PATTERN (insn);
3633 vec_lab = XEXP (XEXP (pat, 0), 0);
3634
3635 /* Search the matching casesi_jump_2. */
3636 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3637 {
3638 if (GET_CODE (prev) != JUMP_INSN)
3639 continue;
3640 prevpat = PATTERN (prev);
3641 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3642 continue;
3643 x = XVECEXP (prevpat, 0, 1);
3644 if (GET_CODE (x) != USE)
3645 continue;
3646 x = XEXP (x, 0);
3647 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3648 break;
3649 }
3650
3651 /* Emit the reference label of the braf where it belongs, right after
3652 the casesi_jump_2 (i.e. braf). */
3653 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3654 emit_label_after (braf_label, prev);
3655
3656 /* Fix up the ADDR_DIF_VEC to be relative
3657 to the reference address of the braf. */
3658 XEXP (XEXP (pat, 0), 0) = braf_label;
3659 }
3660 }
3661
3662 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3663 a barrier. Return the base 2 logarithm of the desired alignment. */
3664 int
barrier_align(rtx barrier_or_label)3665 barrier_align (rtx barrier_or_label)
3666 {
3667 rtx next = next_real_insn (barrier_or_label), pat, prev;
3668 int slot, credit, jump_to_next = 0;
3669
3670 if (! next)
3671 return 0;
3672
3673 pat = PATTERN (next);
3674
3675 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3676 return 2;
3677
3678 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3679 /* This is a barrier in front of a constant table. */
3680 return 0;
3681
3682 prev = prev_real_insn (barrier_or_label);
3683 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3684 {
3685 pat = PATTERN (prev);
3686 /* If this is a very small table, we want to keep the alignment after
3687 the table to the minimum for proper code alignment. */
3688 return ((TARGET_SMALLCODE
3689 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3690 <= (unsigned)1 << (CACHE_LOG - 2)))
3691 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3692 }
3693
3694 if (TARGET_SMALLCODE)
3695 return 0;
3696
3697 if (! TARGET_SH2 || ! optimize)
3698 return align_jumps_log;
3699
3700 /* When fixing up pcloads, a constant table might be inserted just before
3701 the basic block that ends with the barrier. Thus, we can't trust the
3702 instruction lengths before that. */
3703 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3704 {
3705 /* Check if there is an immediately preceding branch to the insn beyond
3706 the barrier. We must weight the cost of discarding useful information
3707 from the current cache line when executing this branch and there is
3708 an alignment, against that of fetching unneeded insn in front of the
3709 branch target when there is no alignment. */
3710
3711 /* There are two delay_slot cases to consider. One is the simple case
3712 where the preceding branch is to the insn beyond the barrier (simple
3713 delay slot filling), and the other is where the preceding branch has
3714 a delay slot that is a duplicate of the insn after the barrier
3715 (fill_eager_delay_slots) and the branch is to the insn after the insn
3716 after the barrier. */
3717
3718 /* PREV is presumed to be the JUMP_INSN for the barrier under
3719 investigation. Skip to the insn before it. */
3720 prev = prev_real_insn (prev);
3721
3722 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3723 credit >= 0 && prev && GET_CODE (prev) == INSN;
3724 prev = prev_real_insn (prev))
3725 {
3726 jump_to_next = 0;
3727 if (GET_CODE (PATTERN (prev)) == USE
3728 || GET_CODE (PATTERN (prev)) == CLOBBER)
3729 continue;
3730 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3731 {
3732 prev = XVECEXP (PATTERN (prev), 0, 1);
3733 if (INSN_UID (prev) == INSN_UID (next))
3734 {
3735 /* Delay slot was filled with insn at jump target. */
3736 jump_to_next = 1;
3737 continue;
3738 }
3739 }
3740
3741 if (slot &&
3742 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3743 slot = 0;
3744 credit -= get_attr_length (prev);
3745 }
3746 if (prev
3747 && GET_CODE (prev) == JUMP_INSN
3748 && JUMP_LABEL (prev))
3749 {
3750 rtx x;
3751 if (jump_to_next
3752 || next_real_insn (JUMP_LABEL (prev)) == next
3753 /* If relax_delay_slots() decides NEXT was redundant
3754 with some previous instruction, it will have
3755 redirected PREV's jump to the following insn. */
3756 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3757 /* There is no upper bound on redundant instructions
3758 that might have been skipped, but we must not put an
3759 alignment where none had been before. */
3760 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3761 (INSN_P (x)
3762 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3763 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3764 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3765 {
3766 rtx pat = PATTERN (prev);
3767 if (GET_CODE (pat) == PARALLEL)
3768 pat = XVECEXP (pat, 0, 0);
3769 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3770 return 0;
3771 }
3772 }
3773 }
3774
3775 return align_jumps_log;
3776 }
3777
3778 /* If we are inside a phony loop, almost any kind of label can turn up as the
3779 first one in the loop. Aligning a braf label causes incorrect switch
3780 destination addresses; we can detect braf labels because they are
3781 followed by a BARRIER.
3782 Applying loop alignment to small constant or switch tables is a waste
3783 of space, so we suppress this too. */
3784 int
sh_loop_align(rtx label)3785 sh_loop_align (rtx label)
3786 {
3787 rtx next = label;
3788
3789 do
3790 next = next_nonnote_insn (next);
3791 while (next && GET_CODE (next) == CODE_LABEL);
3792
3793 if (! next
3794 || ! INSN_P (next)
3795 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3796 || recog_memoized (next) == CODE_FOR_consttable_2)
3797 return 0;
3798
3799 return align_loops_log;
3800 }
3801
3802 /* Do a final pass over the function, just before delayed branch
3803 scheduling. */
3804
3805 static void
sh_reorg(void)3806 sh_reorg (void)
3807 {
3808 rtx first, insn, mova = NULL_RTX;
3809 int num_mova;
3810 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3811 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3812
3813 first = get_insns ();
3814
3815 /* We must split call insns before introducing `mova's. If we're
3816 optimizing, they'll have already been split. Otherwise, make
3817 sure we don't split them too late. */
3818 if (! optimize)
3819 split_all_insns_noflow ();
3820
3821 if (TARGET_SHMEDIA)
3822 return;
3823
3824 /* If relaxing, generate pseudo-ops to associate function calls with
3825 the symbols they call. It does no harm to not generate these
3826 pseudo-ops. However, when we can generate them, it enables to
3827 linker to potentially relax the jsr to a bsr, and eliminate the
3828 register load and, possibly, the constant pool entry. */
3829
3830 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3831 if (TARGET_RELAX)
3832 {
3833 /* Remove all REG_LABEL notes. We want to use them for our own
3834 purposes. This works because none of the remaining passes
3835 need to look at them.
3836
3837 ??? But it may break in the future. We should use a machine
3838 dependent REG_NOTE, or some other approach entirely. */
3839 for (insn = first; insn; insn = NEXT_INSN (insn))
3840 {
3841 if (INSN_P (insn))
3842 {
3843 rtx note;
3844
3845 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3846 remove_note (insn, note);
3847 }
3848 }
3849
3850 for (insn = first; insn; insn = NEXT_INSN (insn))
3851 {
3852 rtx pattern, reg, link, set, scan, dies, label;
3853 int rescan = 0, foundinsn = 0;
3854
3855 if (GET_CODE (insn) == CALL_INSN)
3856 {
3857 pattern = PATTERN (insn);
3858
3859 if (GET_CODE (pattern) == PARALLEL)
3860 pattern = XVECEXP (pattern, 0, 0);
3861 if (GET_CODE (pattern) == SET)
3862 pattern = SET_SRC (pattern);
3863
3864 if (GET_CODE (pattern) != CALL
3865 || GET_CODE (XEXP (pattern, 0)) != MEM)
3866 continue;
3867
3868 reg = XEXP (XEXP (pattern, 0), 0);
3869 }
3870 else
3871 {
3872 reg = sfunc_uses_reg (insn);
3873 if (! reg)
3874 continue;
3875 }
3876
3877 if (GET_CODE (reg) != REG)
3878 continue;
3879
3880 /* This is a function call via REG. If the only uses of REG
3881 between the time that it is set and the time that it dies
3882 are in function calls, then we can associate all the
3883 function calls with the setting of REG. */
3884
3885 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3886 {
3887 if (REG_NOTE_KIND (link) != 0)
3888 continue;
3889 set = single_set (XEXP (link, 0));
3890 if (set && rtx_equal_p (reg, SET_DEST (set)))
3891 {
3892 link = XEXP (link, 0);
3893 break;
3894 }
3895 }
3896
3897 if (! link)
3898 {
3899 /* ??? Sometimes global register allocation will have
3900 deleted the insn pointed to by LOG_LINKS. Try
3901 scanning backward to find where the register is set. */
3902 for (scan = PREV_INSN (insn);
3903 scan && GET_CODE (scan) != CODE_LABEL;
3904 scan = PREV_INSN (scan))
3905 {
3906 if (! INSN_P (scan))
3907 continue;
3908
3909 if (! reg_mentioned_p (reg, scan))
3910 continue;
3911
3912 if (noncall_uses_reg (reg, scan, &set))
3913 break;
3914
3915 if (set)
3916 {
3917 link = scan;
3918 break;
3919 }
3920 }
3921 }
3922
3923 if (! link)
3924 continue;
3925
3926 /* The register is set at LINK. */
3927
3928 /* We can only optimize the function call if the register is
3929 being set to a symbol. In theory, we could sometimes
3930 optimize calls to a constant location, but the assembler
3931 and linker do not support that at present. */
3932 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
3933 && GET_CODE (SET_SRC (set)) != LABEL_REF)
3934 continue;
3935
3936 /* Scan forward from LINK to the place where REG dies, and
3937 make sure that the only insns which use REG are
3938 themselves function calls. */
3939
3940 /* ??? This doesn't work for call targets that were allocated
3941 by reload, since there may not be a REG_DEAD note for the
3942 register. */
3943
3944 dies = NULL_RTX;
3945 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
3946 {
3947 rtx scanset;
3948
3949 /* Don't try to trace forward past a CODE_LABEL if we haven't
3950 seen INSN yet. Ordinarily, we will only find the setting insn
3951 in LOG_LINKS if it is in the same basic block. However,
3952 cross-jumping can insert code labels in between the load and
3953 the call, and can result in situations where a single call
3954 insn may have two targets depending on where we came from. */
3955
3956 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
3957 break;
3958
3959 if (! INSN_P (scan))
3960 continue;
3961
3962 /* Don't try to trace forward past a JUMP. To optimize
3963 safely, we would have to check that all the
3964 instructions at the jump destination did not use REG. */
3965
3966 if (GET_CODE (scan) == JUMP_INSN)
3967 break;
3968
3969 if (! reg_mentioned_p (reg, scan))
3970 continue;
3971
3972 if (noncall_uses_reg (reg, scan, &scanset))
3973 break;
3974
3975 if (scan == insn)
3976 foundinsn = 1;
3977
3978 if (scan != insn
3979 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
3980 {
3981 /* There is a function call to this register other
3982 than the one we are checking. If we optimize
3983 this call, we need to rescan again below. */
3984 rescan = 1;
3985 }
3986
3987 /* ??? We shouldn't have to worry about SCANSET here.
3988 We should just be able to check for a REG_DEAD note
3989 on a function call. However, the REG_DEAD notes are
3990 apparently not dependable around libcalls; c-torture
3991 execute/920501-2 is a test case. If SCANSET is set,
3992 then this insn sets the register, so it must have
3993 died earlier. Unfortunately, this will only handle
3994 the cases in which the register is, in fact, set in a
3995 later insn. */
3996
3997 /* ??? We shouldn't have to use FOUNDINSN here.
3998 However, the LOG_LINKS fields are apparently not
3999 entirely reliable around libcalls;
4000 newlib/libm/math/e_pow.c is a test case. Sometimes
4001 an insn will appear in LOG_LINKS even though it is
4002 not the most recent insn which sets the register. */
4003
4004 if (foundinsn
4005 && (scanset
4006 || find_reg_note (scan, REG_DEAD, reg)))
4007 {
4008 dies = scan;
4009 break;
4010 }
4011 }
4012
4013 if (! dies)
4014 {
4015 /* Either there was a branch, or some insn used REG
4016 other than as a function call address. */
4017 continue;
4018 }
4019
4020 /* Create a code label, and put it in a REG_LABEL note on
4021 the insn which sets the register, and on each call insn
4022 which uses the register. In final_prescan_insn we look
4023 for the REG_LABEL notes, and output the appropriate label
4024 or pseudo-op. */
4025
4026 label = gen_label_rtx ();
4027 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4028 REG_NOTES (link));
4029 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4030 REG_NOTES (insn));
4031 if (rescan)
4032 {
4033 scan = link;
4034 do
4035 {
4036 rtx reg2;
4037
4038 scan = NEXT_INSN (scan);
4039 if (scan != insn
4040 && ((GET_CODE (scan) == CALL_INSN
4041 && reg_mentioned_p (reg, scan))
4042 || ((reg2 = sfunc_uses_reg (scan))
4043 && REGNO (reg2) == REGNO (reg))))
4044 REG_NOTES (scan)
4045 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4046 }
4047 while (scan != dies);
4048 }
4049 }
4050 }
4051
4052 if (TARGET_SH2)
4053 fixup_addr_diff_vecs (first);
4054
4055 if (optimize)
4056 {
4057 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4058 shorten_branches (first);
4059 }
4060 /* Scan the function looking for move instructions which have to be
4061 changed to pc-relative loads and insert the literal tables. */
4062
4063 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4064 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4065 {
4066 if (mova_p (insn))
4067 {
4068 /* ??? basic block reordering can move a switch table dispatch
4069 below the switch table. Check if that has happened.
4070 We only have the addresses available when optimizing; but then,
4071 this check shouldn't be needed when not optimizing. */
4072 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4073 if (optimize
4074 && (INSN_ADDRESSES (INSN_UID (insn))
4075 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4076 {
4077 /* Change the mova into a load.
4078 broken_move will then return true for it. */
4079 fixup_mova (insn);
4080 }
4081 else if (! num_mova++)
4082 mova = insn;
4083 }
4084 else if (GET_CODE (insn) == JUMP_INSN
4085 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4086 && num_mova)
4087 {
4088 rtx scan;
4089 int total;
4090
4091 num_mova--;
4092
4093 /* Some code might have been inserted between the mova and
4094 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4095 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4096 total += get_attr_length (scan);
4097
4098 /* range of mova is 1020, add 4 because pc counts from address of
4099 second instruction after this one, subtract 2 in case pc is 2
4100 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4101 cancels out with alignment effects of the mova itself. */
4102 if (total > 1022)
4103 {
4104 /* Change the mova into a load, and restart scanning
4105 there. broken_move will then return true for mova. */
4106 fixup_mova (mova);
4107 insn = mova;
4108 }
4109 }
4110 if (broken_move (insn)
4111 || (GET_CODE (insn) == INSN
4112 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4113 {
4114 rtx scan;
4115 /* Scan ahead looking for a barrier to stick the constant table
4116 behind. */
4117 rtx barrier = find_barrier (num_mova, mova, insn);
4118 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4119 int need_aligned_label = 0;
4120
4121 if (num_mova && ! mova_p (mova))
4122 {
4123 /* find_barrier had to change the first mova into a
4124 pcload; thus, we have to start with this new pcload. */
4125 insn = mova;
4126 num_mova = 0;
4127 }
4128 /* Now find all the moves between the points and modify them. */
4129 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4130 {
4131 if (GET_CODE (scan) == CODE_LABEL)
4132 last_float = 0;
4133 if (GET_CODE (scan) == INSN
4134 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4135 need_aligned_label = 1;
4136 if (broken_move (scan))
4137 {
4138 rtx *patp = &PATTERN (scan), pat = *patp;
4139 rtx src, dst;
4140 rtx lab;
4141 rtx newsrc;
4142 enum machine_mode mode;
4143
4144 if (GET_CODE (pat) == PARALLEL)
4145 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4146 src = SET_SRC (pat);
4147 dst = SET_DEST (pat);
4148 mode = GET_MODE (dst);
4149
4150 if (mode == SImode && hi_const (src)
4151 && REGNO (dst) != FPUL_REG)
4152 {
4153 int offset = 0;
4154
4155 mode = HImode;
4156 while (GET_CODE (dst) == SUBREG)
4157 {
4158 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4159 GET_MODE (SUBREG_REG (dst)),
4160 SUBREG_BYTE (dst),
4161 GET_MODE (dst));
4162 dst = SUBREG_REG (dst);
4163 }
4164 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4165 }
4166 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4167 {
4168 /* This must be an insn that clobbers r0. */
4169 rtx clobber = XVECEXP (PATTERN (scan), 0,
4170 XVECLEN (PATTERN (scan), 0) - 1);
4171
4172 if (GET_CODE (clobber) != CLOBBER
4173 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4174 abort ();
4175
4176 if (last_float
4177 && reg_set_between_p (r0_rtx, last_float_move, scan))
4178 last_float = 0;
4179 if (last_float
4180 && TARGET_SHCOMPACT
4181 && GET_MODE_SIZE (mode) != 4
4182 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4183 last_float = 0;
4184 lab = add_constant (src, mode, last_float);
4185 if (lab)
4186 emit_insn_before (gen_mova (lab), scan);
4187 else
4188 {
4189 /* There will be a REG_UNUSED note for r0 on
4190 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4191 lest reorg:mark_target_live_regs will not
4192 consider r0 to be used, and we end up with delay
4193 slot insn in front of SCAN that clobbers r0. */
4194 rtx note
4195 = find_regno_note (last_float_move, REG_UNUSED, 0);
4196
4197 /* If we are not optimizing, then there may not be
4198 a note. */
4199 if (note)
4200 PUT_MODE (note, REG_INC);
4201
4202 *last_float_addr = r0_inc_rtx;
4203 }
4204 last_float_move = scan;
4205 last_float = src;
4206 newsrc = gen_rtx (MEM, mode,
4207 (((TARGET_SH4 && ! TARGET_FMOVD)
4208 || REGNO (dst) == FPUL_REG)
4209 ? r0_inc_rtx
4210 : r0_rtx));
4211 last_float_addr = &XEXP (newsrc, 0);
4212
4213 /* Remove the clobber of r0. */
4214 XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
4215 RTX_UNCHANGING_P (newsrc) = 1;
4216 }
4217 /* This is a mova needing a label. Create it. */
4218 else if (GET_CODE (src) == UNSPEC
4219 && XINT (src, 1) == UNSPEC_MOVA
4220 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4221 {
4222 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4223 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4224 newsrc = gen_rtx_UNSPEC (SImode,
4225 gen_rtvec (1, newsrc),
4226 UNSPEC_MOVA);
4227 }
4228 else
4229 {
4230 lab = add_constant (src, mode, 0);
4231 newsrc = gen_rtx_MEM (mode,
4232 gen_rtx_LABEL_REF (VOIDmode, lab));
4233 RTX_UNCHANGING_P (newsrc) = 1;
4234 }
4235 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4236 INSN_CODE (scan) = -1;
4237 }
4238 }
4239 dump_table (need_aligned_label ? insn : 0, barrier);
4240 insn = barrier;
4241 }
4242 }
4243
4244 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4245 INSN_ADDRESSES_FREE ();
4246 split_branches (first);
4247
4248 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4249 also has an effect on the register that holds the address of the sfunc.
4250 Insert an extra dummy insn in front of each sfunc that pretends to
4251 use this register. */
4252 if (flag_delayed_branch)
4253 {
4254 for (insn = first; insn; insn = NEXT_INSN (insn))
4255 {
4256 rtx reg = sfunc_uses_reg (insn);
4257
4258 if (! reg)
4259 continue;
4260 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4261 }
4262 }
4263 #if 0
4264 /* fpscr is not actually a user variable, but we pretend it is for the
4265 sake of the previous optimization passes, since we want it handled like
4266 one. However, we don't have any debugging information for it, so turn
4267 it into a non-user variable now. */
4268 if (TARGET_SH4)
4269 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4270 #endif
4271 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4272 }
4273
4274 int
get_dest_uid(rtx label,int max_uid)4275 get_dest_uid (rtx label, int max_uid)
4276 {
4277 rtx dest = next_real_insn (label);
4278 int dest_uid;
4279 if (! dest)
4280 /* This can happen for an undefined label. */
4281 return 0;
4282 dest_uid = INSN_UID (dest);
4283 /* If this is a newly created branch redirection blocking instruction,
4284 we cannot index the branch_uid or insn_addresses arrays with its
4285 uid. But then, we won't need to, because the actual destination is
4286 the following branch. */
4287 while (dest_uid >= max_uid)
4288 {
4289 dest = NEXT_INSN (dest);
4290 dest_uid = INSN_UID (dest);
4291 }
4292 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4293 return 0;
4294 return dest_uid;
4295 }
4296
4297 /* Split condbranches that are out of range. Also add clobbers for
4298 scratch registers that are needed in far jumps.
4299 We do this before delay slot scheduling, so that it can take our
4300 newly created instructions into account. It also allows us to
4301 find branches with common targets more easily. */
4302
4303 static void
split_branches(rtx first)4304 split_branches (rtx first)
4305 {
4306 rtx insn;
4307 struct far_branch **uid_branch, *far_branch_list = 0;
4308 int max_uid = get_max_uid ();
4309
4310 /* Find out which branches are out of range. */
4311 shorten_branches (first);
4312
4313 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4314 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4315
4316 for (insn = first; insn; insn = NEXT_INSN (insn))
4317 if (! INSN_P (insn))
4318 continue;
4319 else if (INSN_DELETED_P (insn))
4320 {
4321 /* Shorten_branches would split this instruction again,
4322 so transform it into a note. */
4323 PUT_CODE (insn, NOTE);
4324 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4325 NOTE_SOURCE_FILE (insn) = 0;
4326 }
4327 else if (GET_CODE (insn) == JUMP_INSN
4328 /* Don't mess with ADDR_DIFF_VEC */
4329 && (GET_CODE (PATTERN (insn)) == SET
4330 || GET_CODE (PATTERN (insn)) == RETURN))
4331 {
4332 enum attr_type type = get_attr_type (insn);
4333 if (type == TYPE_CBRANCH)
4334 {
4335 rtx next, beyond;
4336
4337 if (get_attr_length (insn) > 4)
4338 {
4339 rtx src = SET_SRC (PATTERN (insn));
4340 rtx olabel = XEXP (XEXP (src, 1), 0);
4341 int addr = INSN_ADDRESSES (INSN_UID (insn));
4342 rtx label = 0;
4343 int dest_uid = get_dest_uid (olabel, max_uid);
4344 struct far_branch *bp = uid_branch[dest_uid];
4345
4346 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4347 the label if the LABEL_NUSES count drops to zero. There is
4348 always a jump_optimize pass that sets these values, but it
4349 proceeds to delete unreferenced code, and then if not
4350 optimizing, to un-delete the deleted instructions, thus
4351 leaving labels with too low uses counts. */
4352 if (! optimize)
4353 {
4354 JUMP_LABEL (insn) = olabel;
4355 LABEL_NUSES (olabel)++;
4356 }
4357 if (! bp)
4358 {
4359 bp = (struct far_branch *) alloca (sizeof *bp);
4360 uid_branch[dest_uid] = bp;
4361 bp->prev = far_branch_list;
4362 far_branch_list = bp;
4363 bp->far_label
4364 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4365 LABEL_NUSES (bp->far_label)++;
4366 }
4367 else
4368 {
4369 label = bp->near_label;
4370 if (! label && bp->address - addr >= CONDJUMP_MIN)
4371 {
4372 rtx block = bp->insert_place;
4373
4374 if (GET_CODE (PATTERN (block)) == RETURN)
4375 block = PREV_INSN (block);
4376 else
4377 block = gen_block_redirect (block,
4378 bp->address, 2);
4379 label = emit_label_after (gen_label_rtx (),
4380 PREV_INSN (block));
4381 bp->near_label = label;
4382 }
4383 else if (label && ! NEXT_INSN (label))
4384 {
4385 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4386 bp->insert_place = insn;
4387 else
4388 gen_far_branch (bp);
4389 }
4390 }
4391 if (! label
4392 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4393 {
4394 bp->near_label = label = gen_label_rtx ();
4395 bp->insert_place = insn;
4396 bp->address = addr;
4397 }
4398 if (! redirect_jump (insn, label, 1))
4399 abort ();
4400 }
4401 else
4402 {
4403 /* get_attr_length (insn) == 2 */
4404 /* Check if we have a pattern where reorg wants to redirect
4405 the branch to a label from an unconditional branch that
4406 is too far away. */
4407 /* We can't use JUMP_LABEL here because it might be undefined
4408 when not optimizing. */
4409 /* A syntax error might cause beyond to be NULL_RTX. */
4410 beyond
4411 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4412 0));
4413
4414 if (beyond
4415 && (GET_CODE (beyond) == JUMP_INSN
4416 || ((beyond = next_active_insn (beyond))
4417 && GET_CODE (beyond) == JUMP_INSN))
4418 && GET_CODE (PATTERN (beyond)) == SET
4419 && recog_memoized (beyond) == CODE_FOR_jump_compact
4420 && ((INSN_ADDRESSES
4421 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4422 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4423 > 252 + 258 + 2))
4424 gen_block_redirect (beyond,
4425 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4426 }
4427
4428 next = next_active_insn (insn);
4429
4430 if ((GET_CODE (next) == JUMP_INSN
4431 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4432 && GET_CODE (PATTERN (next)) == SET
4433 && recog_memoized (next) == CODE_FOR_jump_compact
4434 && ((INSN_ADDRESSES
4435 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4436 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4437 > 252 + 258 + 2))
4438 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4439 }
4440 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4441 {
4442 int addr = INSN_ADDRESSES (INSN_UID (insn));
4443 rtx far_label = 0;
4444 int dest_uid = 0;
4445 struct far_branch *bp;
4446
4447 if (type == TYPE_JUMP)
4448 {
4449 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4450 dest_uid = get_dest_uid (far_label, max_uid);
4451 if (! dest_uid)
4452 {
4453 /* Parse errors can lead to labels outside
4454 the insn stream. */
4455 if (! NEXT_INSN (far_label))
4456 continue;
4457
4458 if (! optimize)
4459 {
4460 JUMP_LABEL (insn) = far_label;
4461 LABEL_NUSES (far_label)++;
4462 }
4463 redirect_jump (insn, NULL_RTX, 1);
4464 far_label = 0;
4465 }
4466 }
4467 bp = uid_branch[dest_uid];
4468 if (! bp)
4469 {
4470 bp = (struct far_branch *) alloca (sizeof *bp);
4471 uid_branch[dest_uid] = bp;
4472 bp->prev = far_branch_list;
4473 far_branch_list = bp;
4474 bp->near_label = 0;
4475 bp->far_label = far_label;
4476 if (far_label)
4477 LABEL_NUSES (far_label)++;
4478 }
4479 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4480 if (addr - bp->address <= CONDJUMP_MAX)
4481 emit_label_after (bp->near_label, PREV_INSN (insn));
4482 else
4483 {
4484 gen_far_branch (bp);
4485 bp->near_label = 0;
4486 }
4487 else
4488 bp->near_label = 0;
4489 bp->address = addr;
4490 bp->insert_place = insn;
4491 if (! far_label)
4492 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4493 else
4494 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4495 }
4496 }
4497 /* Generate all pending far branches,
4498 and free our references to the far labels. */
4499 while (far_branch_list)
4500 {
4501 if (far_branch_list->near_label
4502 && ! NEXT_INSN (far_branch_list->near_label))
4503 gen_far_branch (far_branch_list);
4504 if (optimize
4505 && far_branch_list->far_label
4506 && ! --LABEL_NUSES (far_branch_list->far_label))
4507 delete_insn (far_branch_list->far_label);
4508 far_branch_list = far_branch_list->prev;
4509 }
4510
4511 /* Instruction length information is no longer valid due to the new
4512 instructions that have been generated. */
4513 init_insn_lengths ();
4514 }
4515
4516 /* Dump out instruction addresses, which is useful for debugging the
4517 constant pool table stuff.
4518
4519 If relaxing, output the label and pseudo-ops used to link together
4520 calls and the instruction which set the registers. */
4521
4522 /* ??? The addresses printed by this routine for insns are nonsense for
4523 insns which are inside of a sequence where none of the inner insns have
4524 variable length. This is because the second pass of shorten_branches
4525 does not bother to update them. */
4526
4527 void
final_prescan_insn(rtx insn,rtx * opvec ATTRIBUTE_UNUSED,int noperands ATTRIBUTE_UNUSED)4528 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4529 int noperands ATTRIBUTE_UNUSED)
4530 {
4531 if (TARGET_DUMPISIZE)
4532 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4533
4534 if (TARGET_RELAX)
4535 {
4536 rtx note;
4537
4538 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4539 if (note)
4540 {
4541 rtx pattern;
4542
4543 pattern = PATTERN (insn);
4544 if (GET_CODE (pattern) == PARALLEL)
4545 pattern = XVECEXP (pattern, 0, 0);
4546 if (GET_CODE (pattern) == CALL
4547 || (GET_CODE (pattern) == SET
4548 && (GET_CODE (SET_SRC (pattern)) == CALL
4549 || get_attr_type (insn) == TYPE_SFUNC)))
4550 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4551 CODE_LABEL_NUMBER (XEXP (note, 0)));
4552 else if (GET_CODE (pattern) == SET)
4553 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4554 CODE_LABEL_NUMBER (XEXP (note, 0)));
4555 else
4556 abort ();
4557 }
4558 }
4559 }
4560
4561 /* Dump out any constants accumulated in the final pass. These will
4562 only be labels. */
4563
4564 const char *
output_jump_label_table(void)4565 output_jump_label_table (void)
4566 {
4567 int i;
4568
4569 if (pool_size)
4570 {
4571 fprintf (asm_out_file, "\t.align 2\n");
4572 for (i = 0; i < pool_size; i++)
4573 {
4574 pool_node *p = &pool_vector[i];
4575
4576 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4577 CODE_LABEL_NUMBER (p->label));
4578 output_asm_insn (".long %O0", &p->value);
4579 }
4580 pool_size = 0;
4581 }
4582
4583 return "";
4584 }
4585
4586 /* A full frame looks like:
4587
4588 arg-5
4589 arg-4
4590 [ if current_function_anonymous_args
4591 arg-3
4592 arg-2
4593 arg-1
4594 arg-0 ]
4595 saved-fp
4596 saved-r10
4597 saved-r11
4598 saved-r12
4599 saved-pr
4600 local-n
4601 ..
4602 local-1
4603 local-0 <- fp points here. */
4604
4605 /* Number of bytes pushed for anonymous args, used to pass information
4606 between expand_prologue and expand_epilogue. */
4607
4608 static int extra_push;
4609
4610 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4611 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4612 for an epilogue and a negative value means that it's for a sibcall
4613 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4614 all the registers that are about to be restored, and hence dead. */
4615
4616 static void
output_stack_adjust(int size,rtx reg,int epilogue_p,HARD_REG_SET * live_regs_mask)4617 output_stack_adjust (int size, rtx reg, int epilogue_p,
4618 HARD_REG_SET *live_regs_mask)
4619 {
4620 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4621 if (size)
4622 {
4623 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4624
4625 if (size % align)
4626 abort ();
4627
4628 if (CONST_OK_FOR_ADD (size))
4629 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4630 /* Try to do it with two partial adjustments; however, we must make
4631 sure that the stack is properly aligned at all times, in case
4632 an interrupt occurs between the two partial adjustments. */
4633 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4634 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4635 {
4636 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4637 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4638 }
4639 else
4640 {
4641 rtx const_reg;
4642 rtx insn;
4643 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4644 int i;
4645
4646 /* If TEMP is invalid, we could temporarily save a general
4647 register to MACL. However, there is currently no need
4648 to handle this case, so just abort when we see it. */
4649 if (epilogue_p < 0
4650 || current_function_interrupt
4651 || ! call_used_regs[temp] || fixed_regs[temp])
4652 temp = -1;
4653 if (temp < 0 && ! current_function_interrupt
4654 && (TARGET_SHMEDIA || epilogue_p >= 0))
4655 {
4656 HARD_REG_SET temps;
4657 COPY_HARD_REG_SET (temps, call_used_reg_set);
4658 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4659 if (epilogue_p > 0)
4660 {
4661 int nreg = 0;
4662 if (current_function_return_rtx)
4663 {
4664 enum machine_mode mode;
4665 mode = GET_MODE (current_function_return_rtx);
4666 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4667 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4668 }
4669 for (i = 0; i < nreg; i++)
4670 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4671 if (current_function_calls_eh_return)
4672 {
4673 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4674 for (i = 0; i <= 3; i++)
4675 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4676 }
4677 }
4678 if (TARGET_SHMEDIA && epilogue_p < 0)
4679 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4680 CLEAR_HARD_REG_BIT (temps, i);
4681 if (epilogue_p <= 0)
4682 {
4683 for (i = FIRST_PARM_REG;
4684 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4685 CLEAR_HARD_REG_BIT (temps, i);
4686 if (current_function_needs_context)
4687 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4688 }
4689 temp = scavenge_reg (&temps);
4690 }
4691 if (temp < 0 && live_regs_mask)
4692 temp = scavenge_reg (live_regs_mask);
4693 if (temp < 0)
4694 {
4695 /* If we reached here, the most likely case is the (sibcall)
4696 epilogue for non SHmedia. Put a special push/pop sequence
4697 for such case as the last resort. This looks lengthy but
4698 would not be problem because it seems to be very rare. */
4699 if (! TARGET_SHMEDIA && epilogue_p)
4700 {
4701 rtx adj_reg, tmp_reg, mem;
4702
4703 /* ??? There is still the slight possibility that r4 or r5
4704 have been reserved as fixed registers or assigned as
4705 global registers, and they change during an interrupt.
4706 There are possible ways to handle this:
4707 - If we are adjusting the frame pointer (r14), we can do
4708 with a single temp register and an ordinary push / pop
4709 on the stack.
4710 - Grab any call-used or call-saved registers (i.e. not
4711 fixed or globals) for the temps we need. We might
4712 also grab r14 if we are adjusting the stack pointer.
4713 If we can't find enough available registers, issue
4714 a diagnostic and abort - the user must have reserved
4715 way too many registers.
4716 But since all this is rather unlikely to happen and
4717 would require extra testing, we just abort if r4 / r5
4718 are not available. */
4719 if (fixed_regs[4] || fixed_regs[5]
4720 || global_regs[4] || global_regs[5])
4721 abort ();
4722
4723 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4724 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4725 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4726 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4727 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4728 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4729 emit_move_insn (mem, tmp_reg);
4730 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4731 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4732 emit_move_insn (mem, tmp_reg);
4733 emit_move_insn (reg, adj_reg);
4734 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4735 emit_move_insn (adj_reg, mem);
4736 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4737 emit_move_insn (tmp_reg, mem);
4738 return;
4739 }
4740 else
4741 abort ();
4742 }
4743 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4744
4745 /* If SIZE is negative, subtract the positive value.
4746 This sometimes allows a constant pool entry to be shared
4747 between prologue and epilogue code. */
4748 if (size < 0)
4749 {
4750 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4751 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4752 }
4753 else
4754 {
4755 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4756 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4757 }
4758 if (! epilogue_p)
4759 REG_NOTES (insn)
4760 = (gen_rtx_EXPR_LIST
4761 (REG_FRAME_RELATED_EXPR,
4762 gen_rtx_SET (VOIDmode, reg,
4763 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4764 REG_NOTES (insn)));
4765 }
4766 }
4767 }
4768
4769 static rtx
frame_insn(rtx x)4770 frame_insn (rtx x)
4771 {
4772 x = emit_insn (x);
4773 RTX_FRAME_RELATED_P (x) = 1;
4774 return x;
4775 }
4776
4777 /* Output RTL to push register RN onto the stack. */
4778
4779 static rtx
push(int rn)4780 push (int rn)
4781 {
4782 rtx x;
4783 if (rn == FPUL_REG)
4784 x = gen_push_fpul ();
4785 else if (rn == FPSCR_REG)
4786 x = gen_push_fpscr ();
4787 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4788 && FP_OR_XD_REGISTER_P (rn))
4789 {
4790 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4791 return NULL_RTX;
4792 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4793 }
4794 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4795 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4796 else
4797 x = gen_push (gen_rtx_REG (SImode, rn));
4798
4799 x = frame_insn (x);
4800 REG_NOTES (x)
4801 = gen_rtx_EXPR_LIST (REG_INC,
4802 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4803 return x;
4804 }
4805
4806 /* Output RTL to pop register RN from the stack. */
4807
4808 static void
pop(int rn)4809 pop (int rn)
4810 {
4811 rtx x;
4812 if (rn == FPUL_REG)
4813 x = gen_pop_fpul ();
4814 else if (rn == FPSCR_REG)
4815 x = gen_pop_fpscr ();
4816 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4817 && FP_OR_XD_REGISTER_P (rn))
4818 {
4819 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4820 return;
4821 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4822 }
4823 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4824 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4825 else
4826 x = gen_pop (gen_rtx_REG (SImode, rn));
4827
4828 x = emit_insn (x);
4829 REG_NOTES (x)
4830 = gen_rtx_EXPR_LIST (REG_INC,
4831 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4832 }
4833
4834 /* Generate code to push the regs specified in the mask. */
4835
4836 static void
push_regs(HARD_REG_SET * mask,int interrupt_handler)4837 push_regs (HARD_REG_SET *mask, int interrupt_handler)
4838 {
4839 int i;
4840 int skip_fpscr = 0;
4841
4842 /* Push PR last; this gives better latencies after the prologue, and
4843 candidates for the return delay slot when there are no general
4844 registers pushed. */
4845 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4846 {
4847 /* If this is an interrupt handler, and the SZ bit varies,
4848 and we have to push any floating point register, we need
4849 to switch to the correct precision first. */
4850 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
4851 && hard_regs_intersect_p (mask, ®_class_contents[DF_REGS]))
4852 {
4853 HARD_REG_SET unsaved;
4854
4855 push (FPSCR_REG);
4856 COMPL_HARD_REG_SET(unsaved, *mask);
4857 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
4858 skip_fpscr = 1;
4859 }
4860 if (i != PR_REG
4861 && (i != FPSCR_REG || ! skip_fpscr)
4862 && TEST_HARD_REG_BIT (*mask, i))
4863 push (i);
4864 }
4865 if (TEST_HARD_REG_BIT (*mask, PR_REG))
4866 push (PR_REG);
4867 }
4868
4869 /* Calculate how much extra space is needed to save all callee-saved
4870 target registers.
4871 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4872
4873 static int
shmedia_target_regs_stack_space(HARD_REG_SET * live_regs_mask)4874 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
4875 {
4876 int reg;
4877 int stack_space = 0;
4878 int interrupt_handler = sh_cfun_interrupt_handler_p ();
4879
4880 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
4881 if ((! call_used_regs[reg] || interrupt_handler)
4882 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
4883 /* Leave space to save this target register on the stack,
4884 in case target register allocation wants to use it. */
4885 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4886 return stack_space;
4887 }
4888
4889 /* Decide whether we should reserve space for callee-save target registers,
4890 in case target register allocation wants to use them. REGS_SAVED is
4891 the space, in bytes, that is already required for register saves.
4892 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4893
4894 static int
shmedia_reserve_space_for_target_registers_p(int regs_saved,HARD_REG_SET * live_regs_mask)4895 shmedia_reserve_space_for_target_registers_p (int regs_saved,
4896 HARD_REG_SET *live_regs_mask)
4897 {
4898 if (optimize_size)
4899 return 0;
4900 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
4901 }
4902
4903 /* Decide how much space to reserve for callee-save target registers
4904 in case target register allocation wants to use them.
4905 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4906
4907 static int
shmedia_target_regs_stack_adjust(HARD_REG_SET * live_regs_mask)4908 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
4909 {
4910 if (shmedia_space_reserved_for_target_registers)
4911 return shmedia_target_regs_stack_space (live_regs_mask);
4912 else
4913 return 0;
4914 }
4915
4916 /* Work out the registers which need to be saved, both as a mask and a
4917 count of saved words. Return the count.
4918
4919 If doing a pragma interrupt function, then push all regs used by the
4920 function, and if we call another function (we can tell by looking at PR),
4921 make sure that all the regs it clobbers are safe too. */
4922
4923 static int
calc_live_regs(HARD_REG_SET * live_regs_mask)4924 calc_live_regs (HARD_REG_SET *live_regs_mask)
4925 {
4926 int reg;
4927 int count;
4928 int interrupt_handler;
4929 int pr_live, has_call;
4930
4931 interrupt_handler = sh_cfun_interrupt_handler_p ();
4932
4933 CLEAR_HARD_REG_SET (*live_regs_mask);
4934 if (TARGET_SH4 && TARGET_FMOVD && interrupt_handler
4935 && regs_ever_live[FPSCR_REG])
4936 target_flags &= ~FPU_SINGLE_BIT;
4937 /* If we can save a lot of saves by switching to double mode, do that. */
4938 else if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
4939 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
4940 if (regs_ever_live[reg] && regs_ever_live[reg+1]
4941 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
4942 && ++count > 2)
4943 {
4944 target_flags &= ~FPU_SINGLE_BIT;
4945 break;
4946 }
4947 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
4948 knows how to use it. That means the pseudo originally allocated for
4949 the initial value can become the PR_MEDIA_REG hard register, as seen for
4950 execute/20010122-1.c:test9. */
4951 if (TARGET_SHMEDIA)
4952 /* ??? this function is called from initial_elimination_offset, hence we
4953 can't use the result of sh_media_register_for_return here. */
4954 pr_live = sh_pr_n_sets ();
4955 else
4956 {
4957 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
4958 pr_live = (pr_initial
4959 ? (GET_CODE (pr_initial) != REG
4960 || REGNO (pr_initial) != (PR_REG))
4961 : regs_ever_live[PR_REG]);
4962 /* For Shcompact, if not optimizing, we end up with a memory reference
4963 using the return address pointer for __builtin_return_address even
4964 though there is no actual need to put the PR register on the stack. */
4965 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
4966 }
4967 /* Force PR to be live if the prologue has to call the SHmedia
4968 argument decoder or register saver. */
4969 if (TARGET_SHCOMPACT
4970 && ((current_function_args_info.call_cookie
4971 & ~ CALL_COOKIE_RET_TRAMP (1))
4972 || current_function_has_nonlocal_label))
4973 pr_live = 1;
4974 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
4975 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
4976 {
4977 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
4978 ? pr_live
4979 : (interrupt_handler && ! pragma_trapa)
4980 ? (/* Need to save all the regs ever live. */
4981 (regs_ever_live[reg]
4982 || (call_used_regs[reg]
4983 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
4984 && has_call)
4985 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
4986 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
4987 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
4988 && reg != RETURN_ADDRESS_POINTER_REGNUM
4989 && reg != T_REG && reg != GBR_REG
4990 /* Push fpscr only on targets which have FPU */
4991 && (reg != FPSCR_REG || TARGET_FPU_ANY))
4992 : (/* Only push those regs which are used and need to be saved. */
4993 (TARGET_SHCOMPACT
4994 && flag_pic
4995 && current_function_args_info.call_cookie
4996 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
4997 || (regs_ever_live[reg] && ! call_used_regs[reg])
4998 || (current_function_calls_eh_return
4999 && (reg == (int) EH_RETURN_DATA_REGNO (0)
5000 || reg == (int) EH_RETURN_DATA_REGNO (1)
5001 || reg == (int) EH_RETURN_DATA_REGNO (2)
5002 || reg == (int) EH_RETURN_DATA_REGNO (3)))
5003 || ((reg == MACL_REG || reg == MACH_REG)
5004 && regs_ever_live[reg]
5005 && sh_cfun_attr_renesas_p ())
5006 ))
5007 {
5008 SET_HARD_REG_BIT (*live_regs_mask, reg);
5009 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5010
5011 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
5012 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5013 {
5014 if (FP_REGISTER_P (reg))
5015 {
5016 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5017 {
5018 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5019 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5020 }
5021 }
5022 else if (XD_REGISTER_P (reg))
5023 {
5024 /* Must switch to double mode to access these registers. */
5025 target_flags &= ~FPU_SINGLE_BIT;
5026 }
5027 }
5028 }
5029 }
5030 /* If we have a target register optimization pass after prologue / epilogue
5031 threading, we need to assume all target registers will be live even if
5032 they aren't now. */
5033 if (flag_branch_target_load_optimize2
5034 && TARGET_SAVE_ALL_TARGET_REGS
5035 && shmedia_space_reserved_for_target_registers)
5036 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5037 if ((! call_used_regs[reg] || interrupt_handler)
5038 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5039 {
5040 SET_HARD_REG_BIT (*live_regs_mask, reg);
5041 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5042 }
5043 /* If this is an interrupt handler, we don't have any call-clobbered
5044 registers we can conveniently use for target register save/restore.
5045 Make sure we save at least one general purpose register when we need
5046 to save target registers. */
5047 if (interrupt_handler
5048 && hard_regs_intersect_p (live_regs_mask,
5049 ®_class_contents[TARGET_REGS])
5050 && ! hard_regs_intersect_p (live_regs_mask,
5051 ®_class_contents[GENERAL_REGS]))
5052 {
5053 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5054 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5055 }
5056
5057 return count;
5058 }
5059
5060 /* Code to generate prologue and epilogue sequences */
5061
5062 /* PUSHED is the number of bytes that are being pushed on the
5063 stack for register saves. Return the frame size, padded
5064 appropriately so that the stack stays properly aligned. */
5065 static HOST_WIDE_INT
rounded_frame_size(int pushed)5066 rounded_frame_size (int pushed)
5067 {
5068 HOST_WIDE_INT size = get_frame_size ();
5069 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5070
5071 return ((size + pushed + align - 1) & -align) - pushed;
5072 }
5073
5074 /* Choose a call-clobbered target-branch register that remains
5075 unchanged along the whole function. We set it up as the return
5076 value in the prologue. */
5077 int
sh_media_register_for_return(void)5078 sh_media_register_for_return (void)
5079 {
5080 int regno;
5081 int tr0_used;
5082
5083 if (! current_function_is_leaf)
5084 return -1;
5085 if (lookup_attribute ("interrupt_handler",
5086 DECL_ATTRIBUTES (current_function_decl)))
5087 return -1;
5088
5089 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5090
5091 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5092 if (call_used_regs[regno] && ! regs_ever_live[regno])
5093 return regno;
5094
5095 return -1;
5096 }
5097
5098 /* The maximum registers we need to save are:
5099 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5100 - 32 floating point registers (for each pair, we save none,
5101 one single precision value, or a double precision value).
5102 - 8 target registers
5103 - add 1 entry for a delimiter. */
5104 #define MAX_SAVED_REGS (62+32+8)
5105
5106 typedef struct save_entry_s
5107 {
5108 unsigned char reg;
5109 unsigned char mode;
5110 short offset;
5111 } save_entry;
5112
5113 #define MAX_TEMPS 4
5114
5115 /* There will be a delimiter entry with VOIDmode both at the start and the
5116 end of a filled in schedule. The end delimiter has the offset of the
5117 save with the smallest (i.e. most negative) offset. */
5118 typedef struct save_schedule_s
5119 {
5120 save_entry entries[MAX_SAVED_REGS + 2];
5121 int temps[MAX_TEMPS+1];
5122 } save_schedule;
5123
5124 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5125 use reverse order. Returns the last entry written to (not counting
5126 the delimiter). OFFSET_BASE is a number to be added to all offset
5127 entries. */
5128
5129 static save_entry *
sh5_schedule_saves(HARD_REG_SET * live_regs_mask,save_schedule * schedule,int offset_base)5130 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5131 int offset_base)
5132 {
5133 int align, i;
5134 save_entry *entry = schedule->entries;
5135 int tmpx = 0;
5136 int offset;
5137
5138 if (! current_function_interrupt)
5139 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5140 if (call_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5141 && ! FUNCTION_ARG_REGNO_P (i)
5142 && i != FIRST_RET_REG
5143 && ! (current_function_needs_context && i == STATIC_CHAIN_REGNUM)
5144 && ! (current_function_calls_eh_return
5145 && (i == EH_RETURN_STACKADJ_REGNO
5146 || ((unsigned)i <= EH_RETURN_DATA_REGNO (0)
5147 && (unsigned)i >= EH_RETURN_DATA_REGNO (3)))))
5148 schedule->temps[tmpx++] = i;
5149 entry->reg = -1;
5150 entry->mode = VOIDmode;
5151 entry->offset = offset_base;
5152 entry++;
5153 /* We loop twice: first, we save 8-byte aligned registers in the
5154 higher addresses, that are known to be aligned. Then, we
5155 proceed to saving 32-bit registers that don't need 8-byte
5156 alignment.
5157 If this is an interrupt function, all registers that need saving
5158 need to be saved in full. moreover, we need to postpone saving
5159 target registers till we have saved some general purpose registers
5160 we can then use as scratch registers. */
5161 offset = offset_base;
5162 for (align = 1; align >= 0; align--)
5163 {
5164 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5165 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5166 {
5167 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5168 int reg = i;
5169
5170 if (current_function_interrupt)
5171 {
5172 if (TARGET_REGISTER_P (i))
5173 continue;
5174 if (GENERAL_REGISTER_P (i))
5175 mode = DImode;
5176 }
5177 if (mode == SFmode && (i % 2) == 1
5178 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5179 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5180 {
5181 mode = DFmode;
5182 i--;
5183 reg--;
5184 }
5185
5186 /* If we're doing the aligned pass and this is not aligned,
5187 or we're doing the unaligned pass and this is aligned,
5188 skip it. */
5189 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5190 != align)
5191 continue;
5192
5193 if (current_function_interrupt
5194 && GENERAL_REGISTER_P (i)
5195 && tmpx < MAX_TEMPS)
5196 schedule->temps[tmpx++] = i;
5197
5198 offset -= GET_MODE_SIZE (mode);
5199 entry->reg = i;
5200 entry->mode = mode;
5201 entry->offset = offset;
5202 entry++;
5203 }
5204 if (align && current_function_interrupt)
5205 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5206 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5207 {
5208 offset -= GET_MODE_SIZE (DImode);
5209 entry->reg = i;
5210 entry->mode = DImode;
5211 entry->offset = offset;
5212 entry++;
5213 }
5214 }
5215 entry->reg = -1;
5216 entry->mode = VOIDmode;
5217 entry->offset = offset;
5218 schedule->temps[tmpx] = -1;
5219 return entry - 1;
5220 }
5221
5222 void
sh_expand_prologue(void)5223 sh_expand_prologue (void)
5224 {
5225 HARD_REG_SET live_regs_mask;
5226 int d, i;
5227 int d_rounding = 0;
5228 int save_flags = target_flags;
5229
5230 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5231
5232 /* We have pretend args if we had an object sent partially in registers
5233 and partially on the stack, e.g. a large structure. */
5234 output_stack_adjust (-current_function_pretend_args_size
5235 - current_function_args_info.stack_regs * 8,
5236 stack_pointer_rtx, 0, NULL);
5237
5238 extra_push = 0;
5239
5240 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5241 /* We're going to use the PIC register to load the address of the
5242 incoming-argument decoder and/or of the return trampoline from
5243 the GOT, so make sure the PIC register is preserved and
5244 initialized. */
5245 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5246
5247 if (TARGET_SHCOMPACT
5248 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5249 {
5250 int reg;
5251
5252 /* First, make all registers with incoming arguments that will
5253 be pushed onto the stack live, so that register renaming
5254 doesn't overwrite them. */
5255 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5256 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5257 >= NPARM_REGS (SImode) - reg)
5258 for (; reg < NPARM_REGS (SImode); reg++)
5259 emit_insn (gen_shcompact_preserve_incoming_args
5260 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5261 else if (CALL_COOKIE_INT_REG_GET
5262 (current_function_args_info.call_cookie, reg) == 1)
5263 emit_insn (gen_shcompact_preserve_incoming_args
5264 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5265
5266 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5267 stack_pointer_rtx);
5268 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5269 GEN_INT (current_function_args_info.call_cookie));
5270 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5271 gen_rtx_REG (SImode, R0_REG));
5272 }
5273 else if (TARGET_SHMEDIA)
5274 {
5275 int tr = sh_media_register_for_return ();
5276
5277 if (tr >= 0)
5278 {
5279 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5280 gen_rtx_REG (DImode, PR_MEDIA_REG));
5281
5282 /* ??? We should suppress saving pr when we don't need it, but this
5283 is tricky because of builtin_return_address. */
5284
5285 /* If this function only exits with sibcalls, this copy
5286 will be flagged as dead. */
5287 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5288 const0_rtx,
5289 REG_NOTES (insn));
5290 }
5291 }
5292
5293 /* Emit the code for SETUP_VARARGS. */
5294 if (current_function_stdarg)
5295 {
5296 /* This is not used by the SH2E calling convention */
5297 if (TARGET_SH1 && ! TARGET_SH2E && ! TARGET_SH5
5298 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
5299 {
5300 /* Push arg regs as if they'd been provided by caller in stack. */
5301 for (i = 0; i < NPARM_REGS(SImode); i++)
5302 {
5303 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5304 rtx insn;
5305
5306 if (i >= (NPARM_REGS(SImode)
5307 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5308 ))
5309 break;
5310 insn = push (rn);
5311 RTX_FRAME_RELATED_P (insn) = 0;
5312 extra_push += 4;
5313 }
5314 }
5315 }
5316
5317 /* If we're supposed to switch stacks at function entry, do so now. */
5318 if (sp_switch)
5319 emit_insn (gen_sp_switch_1 ());
5320
5321 d = calc_live_regs (&live_regs_mask);
5322 /* ??? Maybe we could save some switching if we can move a mode switch
5323 that already happens to be at the function start into the prologue. */
5324 if (target_flags != save_flags && ! current_function_interrupt)
5325 emit_insn (gen_toggle_sz ());
5326
5327 if (TARGET_SH5)
5328 {
5329 int offset_base, offset;
5330 rtx r0 = NULL_RTX;
5331 int offset_in_r0 = -1;
5332 int sp_in_r0 = 0;
5333 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5334 int total_size, save_size;
5335 save_schedule schedule;
5336 save_entry *entry;
5337 int *tmp_pnt;
5338
5339 if (call_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5340 && ! current_function_interrupt)
5341 r0 = gen_rtx_REG (Pmode, R0_REG);
5342
5343 /* D is the actual number of bytes that we need for saving registers,
5344 however, in initial_elimination_offset we have committed to using
5345 an additional TREGS_SPACE amount of bytes - in order to keep both
5346 addresses to arguments supplied by the caller and local variables
5347 valid, we must keep this gap. Place it between the incoming
5348 arguments and the actually saved registers in a bid to optimize
5349 locality of reference. */
5350 total_size = d + tregs_space;
5351 total_size += rounded_frame_size (total_size);
5352 save_size = total_size - rounded_frame_size (d);
5353 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5354 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5355 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5356
5357 /* If adjusting the stack in a single step costs nothing extra, do so.
5358 I.e. either if a single addi is enough, or we need a movi anyway,
5359 and we don't exceed the maximum offset range (the test for the
5360 latter is conservative for simplicity). */
5361 if (TARGET_SHMEDIA
5362 && (CONST_OK_FOR_I10 (-total_size)
5363 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5364 && total_size <= 2044)))
5365 d_rounding = total_size - save_size;
5366
5367 offset_base = d + d_rounding;
5368
5369 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5370 0, NULL);
5371
5372 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5373 tmp_pnt = schedule.temps;
5374 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5375 {
5376 enum machine_mode mode = entry->mode;
5377 int reg = entry->reg;
5378 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5379
5380 offset = entry->offset;
5381
5382 reg_rtx = gen_rtx_REG (mode, reg);
5383
5384 mem_rtx = gen_rtx_MEM (mode,
5385 gen_rtx_PLUS (Pmode,
5386 stack_pointer_rtx,
5387 GEN_INT (offset)));
5388
5389 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5390
5391 if (! r0)
5392 abort ();
5393 mem_rtx = NULL_RTX;
5394
5395 try_pre_dec:
5396 do
5397 if (HAVE_PRE_DECREMENT
5398 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5399 || mem_rtx == NULL_RTX
5400 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5401 {
5402 pre_dec = gen_rtx_MEM (mode,
5403 gen_rtx_PRE_DEC (Pmode, r0));
5404
5405 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5406 pre_dec_ok);
5407
5408 pre_dec = NULL_RTX;
5409
5410 break;
5411
5412 pre_dec_ok:
5413 mem_rtx = NULL_RTX;
5414 offset += GET_MODE_SIZE (mode);
5415 }
5416 while (0);
5417
5418 if (mem_rtx != NULL_RTX)
5419 goto addr_ok;
5420
5421 if (offset_in_r0 == -1)
5422 {
5423 emit_move_insn (r0, GEN_INT (offset));
5424 offset_in_r0 = offset;
5425 }
5426 else if (offset != offset_in_r0)
5427 {
5428 emit_move_insn (r0,
5429 gen_rtx_PLUS
5430 (Pmode, r0,
5431 GEN_INT (offset - offset_in_r0)));
5432 offset_in_r0 += offset - offset_in_r0;
5433 }
5434
5435 if (pre_dec != NULL_RTX)
5436 {
5437 if (! sp_in_r0)
5438 {
5439 emit_move_insn (r0,
5440 gen_rtx_PLUS
5441 (Pmode, r0, stack_pointer_rtx));
5442 sp_in_r0 = 1;
5443 }
5444
5445 offset -= GET_MODE_SIZE (mode);
5446 offset_in_r0 -= GET_MODE_SIZE (mode);
5447
5448 mem_rtx = pre_dec;
5449 }
5450 else if (sp_in_r0)
5451 mem_rtx = gen_rtx_MEM (mode, r0);
5452 else
5453 mem_rtx = gen_rtx_MEM (mode,
5454 gen_rtx_PLUS (Pmode,
5455 stack_pointer_rtx,
5456 r0));
5457
5458 /* We must not use an r0-based address for target-branch
5459 registers or for special registers without pre-dec
5460 memory addresses, since we store their values in r0
5461 first. */
5462 if (TARGET_REGISTER_P (reg)
5463 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5464 && mem_rtx != pre_dec))
5465 abort ();
5466
5467 addr_ok:
5468 if (TARGET_REGISTER_P (reg)
5469 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5470 && mem_rtx != pre_dec))
5471 {
5472 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5473
5474 emit_move_insn (tmp_reg, reg_rtx);
5475
5476 if (REGNO (tmp_reg) == R0_REG)
5477 {
5478 offset_in_r0 = -1;
5479 sp_in_r0 = 0;
5480 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5481 abort ();
5482 }
5483
5484 if (*++tmp_pnt <= 0)
5485 tmp_pnt = schedule.temps;
5486
5487 reg_rtx = tmp_reg;
5488 }
5489 {
5490 rtx insn;
5491
5492 /* Mark as interesting for dwarf cfi generator */
5493 insn = emit_move_insn (mem_rtx, reg_rtx);
5494 RTX_FRAME_RELATED_P (insn) = 1;
5495
5496 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5497 {
5498 rtx reg_rtx = gen_rtx_REG (mode, reg);
5499 rtx set, note_rtx;
5500 rtx mem_rtx = gen_rtx_MEM (mode,
5501 gen_rtx_PLUS (Pmode,
5502 stack_pointer_rtx,
5503 GEN_INT (offset)));
5504
5505 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5506 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5507 REG_NOTES (insn));
5508 REG_NOTES (insn) = note_rtx;
5509 }
5510 }
5511 }
5512
5513 if (entry->offset != d_rounding)
5514 abort ();
5515 }
5516 else
5517 push_regs (&live_regs_mask, current_function_interrupt);
5518
5519 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5520 {
5521 rtx insn = get_last_insn ();
5522 rtx last = emit_insn (gen_GOTaddr2picreg ());
5523
5524 /* Mark these insns as possibly dead. Sometimes, flow2 may
5525 delete all uses of the PIC register. In this case, let it
5526 delete the initialization too. */
5527 do
5528 {
5529 insn = NEXT_INSN (insn);
5530
5531 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5532 const0_rtx,
5533 REG_NOTES (insn));
5534 }
5535 while (insn != last);
5536 }
5537
5538 if (SHMEDIA_REGS_STACK_ADJUST ())
5539 {
5540 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5541 function_symbol (TARGET_FPU_ANY
5542 ? "__GCC_push_shmedia_regs"
5543 : "__GCC_push_shmedia_regs_nofpu"));
5544 /* This must NOT go through the PLT, otherwise mach and macl
5545 may be clobbered. */
5546 emit_insn (gen_shmedia_save_restore_regs_compact
5547 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5548 }
5549
5550 if (target_flags != save_flags && ! current_function_interrupt)
5551 {
5552 rtx insn = emit_insn (gen_toggle_sz ());
5553
5554 /* If we're lucky, a mode switch in the function body will
5555 overwrite fpscr, turning this insn dead. Tell flow this
5556 insn is ok to delete. */
5557 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5558 const0_rtx,
5559 REG_NOTES (insn));
5560 }
5561
5562 target_flags = save_flags;
5563
5564 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5565 stack_pointer_rtx, 0, NULL);
5566
5567 if (frame_pointer_needed)
5568 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5569
5570 if (TARGET_SHCOMPACT
5571 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5572 {
5573 /* This must NOT go through the PLT, otherwise mach and macl
5574 may be clobbered. */
5575 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5576 function_symbol ("__GCC_shcompact_incoming_args"));
5577 emit_insn (gen_shcompact_incoming_args ());
5578 }
5579 }
5580
5581 void
sh_expand_epilogue(bool sibcall_p)5582 sh_expand_epilogue (bool sibcall_p)
5583 {
5584 HARD_REG_SET live_regs_mask;
5585 int d, i;
5586 int d_rounding = 0;
5587
5588 int save_flags = target_flags;
5589 int frame_size, save_size;
5590 int fpscr_deferred = 0;
5591 int e = sibcall_p ? -1 : 1;
5592
5593 d = calc_live_regs (&live_regs_mask);
5594
5595 save_size = d;
5596 frame_size = rounded_frame_size (d);
5597
5598 if (TARGET_SH5)
5599 {
5600 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5601 int total_size;
5602 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5603 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5604 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5605
5606 total_size = d + tregs_space;
5607 total_size += rounded_frame_size (total_size);
5608 save_size = total_size - frame_size;
5609
5610 /* If adjusting the stack in a single step costs nothing extra, do so.
5611 I.e. either if a single addi is enough, or we need a movi anyway,
5612 and we don't exceed the maximum offset range (the test for the
5613 latter is conservative for simplicity). */
5614 if (TARGET_SHMEDIA
5615 && ! frame_pointer_needed
5616 && (CONST_OK_FOR_I10 (total_size)
5617 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5618 && total_size <= 2044)))
5619 d_rounding = frame_size;
5620
5621 frame_size -= d_rounding;
5622 }
5623
5624 if (frame_pointer_needed)
5625 {
5626 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5627
5628 /* We must avoid moving the stack pointer adjustment past code
5629 which reads from the local frame, else an interrupt could
5630 occur after the SP adjustment and clobber data in the local
5631 frame. */
5632 emit_insn (gen_blockage ());
5633 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5634 }
5635 else if (frame_size)
5636 {
5637 /* We must avoid moving the stack pointer adjustment past code
5638 which reads from the local frame, else an interrupt could
5639 occur after the SP adjustment and clobber data in the local
5640 frame. */
5641 emit_insn (gen_blockage ());
5642 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5643 }
5644
5645 if (SHMEDIA_REGS_STACK_ADJUST ())
5646 {
5647 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5648 function_symbol (TARGET_FPU_ANY
5649 ? "__GCC_pop_shmedia_regs"
5650 : "__GCC_pop_shmedia_regs_nofpu"));
5651 /* This must NOT go through the PLT, otherwise mach and macl
5652 may be clobbered. */
5653 emit_insn (gen_shmedia_save_restore_regs_compact
5654 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5655 }
5656
5657 /* Pop all the registers. */
5658
5659 if (target_flags != save_flags && ! current_function_interrupt)
5660 emit_insn (gen_toggle_sz ());
5661 if (TARGET_SH5)
5662 {
5663 int offset_base, offset;
5664 int offset_in_r0 = -1;
5665 int sp_in_r0 = 0;
5666 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5667 save_schedule schedule;
5668 save_entry *entry;
5669 int *tmp_pnt;
5670
5671 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5672 offset_base = -entry[1].offset + d_rounding;
5673 tmp_pnt = schedule.temps;
5674 for (; entry->mode != VOIDmode; entry--)
5675 {
5676 enum machine_mode mode = entry->mode;
5677 int reg = entry->reg;
5678 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5679
5680 offset = offset_base + entry->offset;
5681 reg_rtx = gen_rtx_REG (mode, reg);
5682
5683 mem_rtx = gen_rtx_MEM (mode,
5684 gen_rtx_PLUS (Pmode,
5685 stack_pointer_rtx,
5686 GEN_INT (offset)));
5687
5688 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5689
5690 mem_rtx = NULL_RTX;
5691
5692 try_post_inc:
5693 do
5694 if (HAVE_POST_INCREMENT
5695 && (offset == offset_in_r0
5696 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5697 && mem_rtx == NULL_RTX)
5698 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5699 {
5700 post_inc = gen_rtx_MEM (mode,
5701 gen_rtx_POST_INC (Pmode, r0));
5702
5703 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5704 post_inc_ok);
5705
5706 post_inc = NULL_RTX;
5707
5708 break;
5709
5710 post_inc_ok:
5711 mem_rtx = NULL_RTX;
5712 }
5713 while (0);
5714
5715 if (mem_rtx != NULL_RTX)
5716 goto addr_ok;
5717
5718 if (offset_in_r0 == -1)
5719 {
5720 emit_move_insn (r0, GEN_INT (offset));
5721 offset_in_r0 = offset;
5722 }
5723 else if (offset != offset_in_r0)
5724 {
5725 emit_move_insn (r0,
5726 gen_rtx_PLUS
5727 (Pmode, r0,
5728 GEN_INT (offset - offset_in_r0)));
5729 offset_in_r0 += offset - offset_in_r0;
5730 }
5731
5732 if (post_inc != NULL_RTX)
5733 {
5734 if (! sp_in_r0)
5735 {
5736 emit_move_insn (r0,
5737 gen_rtx_PLUS
5738 (Pmode, r0, stack_pointer_rtx));
5739 sp_in_r0 = 1;
5740 }
5741
5742 mem_rtx = post_inc;
5743
5744 offset_in_r0 += GET_MODE_SIZE (mode);
5745 }
5746 else if (sp_in_r0)
5747 mem_rtx = gen_rtx_MEM (mode, r0);
5748 else
5749 mem_rtx = gen_rtx_MEM (mode,
5750 gen_rtx_PLUS (Pmode,
5751 stack_pointer_rtx,
5752 r0));
5753
5754 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5755 && mem_rtx != post_inc)
5756 abort ();
5757
5758 addr_ok:
5759 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5760 && mem_rtx != post_inc)
5761 {
5762 insn = emit_move_insn (r0, mem_rtx);
5763 mem_rtx = r0;
5764 }
5765 else if (TARGET_REGISTER_P (reg))
5766 {
5767 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5768
5769 /* Give the scheduler a bit of freedom by using up to
5770 MAX_TEMPS registers in a round-robin fashion. */
5771 insn = emit_move_insn (tmp_reg, mem_rtx);
5772 mem_rtx = tmp_reg;
5773 if (*++tmp_pnt < 0)
5774 tmp_pnt = schedule.temps;
5775 }
5776
5777 insn = emit_move_insn (reg_rtx, mem_rtx);
5778 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5779 /* This is dead, unless we return with a sibcall. */
5780 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5781 const0_rtx,
5782 REG_NOTES (insn));
5783 }
5784
5785 if (entry->offset + offset_base != d + d_rounding)
5786 abort ();
5787 }
5788 else /* ! TARGET_SH5 */
5789 {
5790 save_size = 0;
5791 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5792 pop (PR_REG);
5793 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5794 {
5795 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5796
5797 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5798 && hard_regs_intersect_p (&live_regs_mask,
5799 ®_class_contents[DF_REGS]))
5800 fpscr_deferred = 1;
5801 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5802 pop (j);
5803 if (j == FIRST_FP_REG && fpscr_deferred)
5804 pop (FPSCR_REG);
5805
5806 }
5807 }
5808 if (target_flags != save_flags && ! current_function_interrupt)
5809 emit_insn (gen_toggle_sz ());
5810 target_flags = save_flags;
5811
5812 output_stack_adjust (extra_push + current_function_pretend_args_size
5813 + save_size + d_rounding
5814 + current_function_args_info.stack_regs * 8,
5815 stack_pointer_rtx, e, NULL);
5816
5817 if (current_function_calls_eh_return)
5818 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5819 EH_RETURN_STACKADJ_RTX));
5820
5821 /* Switch back to the normal stack if necessary. */
5822 if (sp_switch)
5823 emit_insn (gen_sp_switch_2 ());
5824
5825 /* Tell flow the insn that pops PR isn't dead. */
5826 /* PR_REG will never be live in SHmedia mode, and we don't need to
5827 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5828 by the return pattern. */
5829 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5830 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5831 }
5832
5833 static int sh_need_epilogue_known = 0;
5834
5835 int
sh_need_epilogue(void)5836 sh_need_epilogue (void)
5837 {
5838 if (! sh_need_epilogue_known)
5839 {
5840 rtx epilogue;
5841
5842 start_sequence ();
5843 sh_expand_epilogue (0);
5844 epilogue = get_insns ();
5845 end_sequence ();
5846 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5847 }
5848 return sh_need_epilogue_known > 0;
5849 }
5850
5851 /* Emit code to change the current function's return address to RA.
5852 TEMP is available as a scratch register, if needed. */
5853
5854 void
sh_set_return_address(rtx ra,rtx tmp)5855 sh_set_return_address (rtx ra, rtx tmp)
5856 {
5857 HARD_REG_SET live_regs_mask;
5858 int d;
5859 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5860 int pr_offset;
5861
5862 d = calc_live_regs (&live_regs_mask);
5863
5864 /* If pr_reg isn't life, we can set it (or the register given in
5865 sh_media_register_for_return) directly. */
5866 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5867 {
5868 rtx rr;
5869
5870 if (TARGET_SHMEDIA)
5871 {
5872 int rr_regno = sh_media_register_for_return ();
5873
5874 if (rr_regno < 0)
5875 rr_regno = pr_reg;
5876
5877 rr = gen_rtx_REG (DImode, rr_regno);
5878 }
5879 else
5880 rr = gen_rtx_REG (SImode, pr_reg);
5881
5882 emit_insn (GEN_MOV (rr, ra));
5883 /* Tell flow the register for return isn't dead. */
5884 emit_insn (gen_rtx_USE (VOIDmode, rr));
5885 return;
5886 }
5887
5888 if (TARGET_SH5)
5889 {
5890 int offset;
5891 save_schedule schedule;
5892 save_entry *entry;
5893
5894 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
5895 offset = entry[1].offset;
5896 for (; entry->mode != VOIDmode; entry--)
5897 if (entry->reg == pr_reg)
5898 goto found;
5899
5900 /* We can't find pr register. */
5901 abort ();
5902
5903 found:
5904 offset = entry->offset - offset;
5905 pr_offset = (rounded_frame_size (d) + offset
5906 + SHMEDIA_REGS_STACK_ADJUST ());
5907 }
5908 else
5909 pr_offset = rounded_frame_size (d);
5910
5911 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
5912 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
5913
5914 tmp = gen_rtx_MEM (Pmode, tmp);
5915 emit_insn (GEN_MOV (tmp, ra));
5916 }
5917
5918 /* Clear variables at function end. */
5919
5920 static void
sh_output_function_epilogue(FILE * file ATTRIBUTE_UNUSED,HOST_WIDE_INT size ATTRIBUTE_UNUSED)5921 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5922 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5923 {
5924 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
5925 sh_need_epilogue_known = 0;
5926 sp_switch = NULL_RTX;
5927 }
5928
5929 static rtx
sh_builtin_saveregs(void)5930 sh_builtin_saveregs (void)
5931 {
5932 /* First unnamed integer register. */
5933 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
5934 /* Number of integer registers we need to save. */
5935 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
5936 /* First unnamed SFmode float reg */
5937 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
5938 /* Number of SFmode float regs to save. */
5939 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
5940 rtx regbuf, fpregs;
5941 int bufsize, regno;
5942 HOST_WIDE_INT alias_set;
5943
5944 if (TARGET_SH5)
5945 {
5946 if (n_intregs)
5947 {
5948 int pushregs = n_intregs;
5949
5950 while (pushregs < NPARM_REGS (SImode) - 1
5951 && (CALL_COOKIE_INT_REG_GET
5952 (current_function_args_info.call_cookie,
5953 NPARM_REGS (SImode) - pushregs)
5954 == 1))
5955 {
5956 current_function_args_info.call_cookie
5957 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
5958 - pushregs, 1);
5959 pushregs++;
5960 }
5961
5962 if (pushregs == NPARM_REGS (SImode))
5963 current_function_args_info.call_cookie
5964 |= (CALL_COOKIE_INT_REG (0, 1)
5965 | CALL_COOKIE_STACKSEQ (pushregs - 1));
5966 else
5967 current_function_args_info.call_cookie
5968 |= CALL_COOKIE_STACKSEQ (pushregs);
5969
5970 current_function_pretend_args_size += 8 * n_intregs;
5971 }
5972 if (TARGET_SHCOMPACT)
5973 return const0_rtx;
5974 }
5975
5976 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
5977 {
5978 error ("__builtin_saveregs not supported by this subtarget");
5979 return const0_rtx;
5980 }
5981
5982 if (TARGET_SHMEDIA)
5983 n_floatregs = 0;
5984
5985 /* Allocate block of memory for the regs. */
5986 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
5987 Or can assign_stack_local accept a 0 SIZE argument? */
5988 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
5989
5990 if (TARGET_SHMEDIA)
5991 regbuf = gen_rtx_MEM (BLKmode,
5992 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
5993 else if (n_floatregs & 1)
5994 {
5995 rtx addr;
5996
5997 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
5998 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
5999 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6000 regbuf = change_address (regbuf, BLKmode, addr);
6001 }
6002 else
6003 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6004 alias_set = get_varargs_alias_set ();
6005 set_mem_alias_set (regbuf, alias_set);
6006
6007 /* Save int args.
6008 This is optimized to only save the regs that are necessary. Explicitly
6009 named args need not be saved. */
6010 if (n_intregs > 0)
6011 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6012 adjust_address (regbuf, BLKmode,
6013 n_floatregs * UNITS_PER_WORD),
6014 n_intregs);
6015
6016 if (TARGET_SHMEDIA)
6017 /* Return the address of the regbuf. */
6018 return XEXP (regbuf, 0);
6019
6020 /* Save float args.
6021 This is optimized to only save the regs that are necessary. Explicitly
6022 named args need not be saved.
6023 We explicitly build a pointer to the buffer because it halves the insn
6024 count when not optimizing (otherwise the pointer is built for each reg
6025 saved).
6026 We emit the moves in reverse order so that we can use predecrement. */
6027
6028 fpregs = gen_reg_rtx (Pmode);
6029 emit_move_insn (fpregs, XEXP (regbuf, 0));
6030 emit_insn (gen_addsi3 (fpregs, fpregs,
6031 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6032 if (TARGET_SH4)
6033 {
6034 rtx mem;
6035 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6036 {
6037 emit_insn (gen_addsi3 (fpregs, fpregs,
6038 GEN_INT (-2 * UNITS_PER_WORD)));
6039 mem = gen_rtx_MEM (DFmode, fpregs);
6040 set_mem_alias_set (mem, alias_set);
6041 emit_move_insn (mem,
6042 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
6043 }
6044 regno = first_floatreg;
6045 if (regno & 1)
6046 {
6047 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
6048 mem = gen_rtx_MEM (SFmode, fpregs);
6049 set_mem_alias_set (mem, alias_set);
6050 emit_move_insn (mem,
6051 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
6052 - (TARGET_LITTLE_ENDIAN != 0)));
6053 }
6054 }
6055 else
6056 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6057 {
6058 rtx mem;
6059
6060 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
6061 mem = gen_rtx_MEM (SFmode, fpregs);
6062 set_mem_alias_set (mem, alias_set);
6063 emit_move_insn (mem,
6064 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6065 }
6066
6067 /* Return the address of the regbuf. */
6068 return XEXP (regbuf, 0);
6069 }
6070
6071 /* Define the `__builtin_va_list' type for the ABI. */
6072
6073 static tree
sh_build_builtin_va_list(void)6074 sh_build_builtin_va_list (void)
6075 {
6076 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6077 tree record;
6078
6079 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6080 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6081 return ptr_type_node;
6082
6083 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6084
6085 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6086 ptr_type_node);
6087 f_next_o_limit = build_decl (FIELD_DECL,
6088 get_identifier ("__va_next_o_limit"),
6089 ptr_type_node);
6090 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6091 ptr_type_node);
6092 f_next_fp_limit = build_decl (FIELD_DECL,
6093 get_identifier ("__va_next_fp_limit"),
6094 ptr_type_node);
6095 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6096 ptr_type_node);
6097
6098 DECL_FIELD_CONTEXT (f_next_o) = record;
6099 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6100 DECL_FIELD_CONTEXT (f_next_fp) = record;
6101 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6102 DECL_FIELD_CONTEXT (f_next_stack) = record;
6103
6104 TYPE_FIELDS (record) = f_next_o;
6105 TREE_CHAIN (f_next_o) = f_next_o_limit;
6106 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6107 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6108 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6109
6110 layout_type (record);
6111
6112 return record;
6113 }
6114
6115 /* Implement `va_start' for varargs and stdarg. */
6116
6117 void
sh_va_start(tree valist,rtx nextarg)6118 sh_va_start (tree valist, rtx nextarg)
6119 {
6120 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6121 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6122 tree t, u;
6123 int nfp, nint;
6124
6125 if (TARGET_SH5)
6126 {
6127 expand_builtin_saveregs ();
6128 std_expand_builtin_va_start (valist, nextarg);
6129 return;
6130 }
6131
6132 if ((! TARGET_SH2E && ! TARGET_SH4)
6133 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6134 {
6135 std_expand_builtin_va_start (valist, nextarg);
6136 return;
6137 }
6138
6139 f_next_o = TYPE_FIELDS (va_list_type_node);
6140 f_next_o_limit = TREE_CHAIN (f_next_o);
6141 f_next_fp = TREE_CHAIN (f_next_o_limit);
6142 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6143 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6144
6145 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
6146 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6147 valist, f_next_o_limit);
6148 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp);
6149 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6150 valist, f_next_fp_limit);
6151 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6152 valist, f_next_stack);
6153
6154 /* Call __builtin_saveregs. */
6155 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6156 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6157 TREE_SIDE_EFFECTS (t) = 1;
6158 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6159
6160 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6161 if (nfp < 8)
6162 nfp = 8 - nfp;
6163 else
6164 nfp = 0;
6165 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6166 build_int_2 (UNITS_PER_WORD * nfp, 0)));
6167 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6168 TREE_SIDE_EFFECTS (t) = 1;
6169 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6170
6171 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6172 TREE_SIDE_EFFECTS (t) = 1;
6173 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6174
6175 nint = current_function_args_info.arg_count[SH_ARG_INT];
6176 if (nint < 4)
6177 nint = 4 - nint;
6178 else
6179 nint = 0;
6180 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6181 build_int_2 (UNITS_PER_WORD * nint, 0)));
6182 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6183 TREE_SIDE_EFFECTS (t) = 1;
6184 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6185
6186 u = make_tree (ptr_type_node, nextarg);
6187 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6188 TREE_SIDE_EFFECTS (t) = 1;
6189 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6190 }
6191
6192 /* Implement `va_arg'. */
6193
6194 rtx
sh_va_arg(tree valist,tree type)6195 sh_va_arg (tree valist, tree type)
6196 {
6197 HOST_WIDE_INT size, rsize;
6198 tree tmp, pptr_type_node;
6199 rtx addr_rtx, r;
6200 rtx result_ptr, result = NULL_RTX;
6201 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
6202 rtx lab_over;
6203
6204 size = int_size_in_bytes (type);
6205 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6206 pptr_type_node = build_pointer_type (ptr_type_node);
6207
6208 if (pass_by_ref)
6209 type = build_pointer_type (type);
6210
6211 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6212 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6213 {
6214 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6215 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6216 int pass_as_float;
6217 rtx lab_false;
6218
6219 f_next_o = TYPE_FIELDS (va_list_type_node);
6220 f_next_o_limit = TREE_CHAIN (f_next_o);
6221 f_next_fp = TREE_CHAIN (f_next_o_limit);
6222 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6223 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6224
6225 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o);
6226 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6227 valist, f_next_o_limit);
6228 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6229 valist, f_next_fp);
6230 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6231 valist, f_next_fp_limit);
6232 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6233 valist, f_next_stack);
6234
6235 /* Structures with a single member with a distinct mode are passed
6236 like their member. This is relevant if the latter has a REAL_TYPE
6237 or COMPLEX_TYPE type. */
6238 if (TREE_CODE (type) == RECORD_TYPE
6239 && TYPE_FIELDS (type)
6240 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6241 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6242 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6243 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6244 type = TREE_TYPE (TYPE_FIELDS (type));
6245 if (TARGET_SH4)
6246 {
6247 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6248 || (TREE_CODE (type) == COMPLEX_TYPE
6249 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6250 && size <= 16));
6251 }
6252 else
6253 {
6254 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6255 }
6256
6257 addr_rtx = gen_reg_rtx (Pmode);
6258 lab_false = gen_label_rtx ();
6259 lab_over = gen_label_rtx ();
6260
6261 tmp = make_tree (pptr_type_node, addr_rtx);
6262 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
6263
6264 if (pass_as_float)
6265 {
6266 int first_floatreg
6267 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6268 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6269
6270 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
6271 EXPAND_NORMAL),
6272 expand_expr (next_fp_limit, NULL_RTX,
6273 Pmode, EXPAND_NORMAL),
6274 GE, const1_rtx, Pmode, 1, lab_false);
6275
6276 if (TYPE_ALIGN (type) > BITS_PER_WORD
6277 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6278 && (n_floatregs & 1)))
6279 {
6280 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
6281 build_int_2 (UNITS_PER_WORD, 0));
6282 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6283 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6284 TREE_SIDE_EFFECTS (tmp) = 1;
6285 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
6286 }
6287
6288 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6289 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6290 if (r != addr_rtx)
6291 emit_move_insn (addr_rtx, r);
6292
6293 #ifdef FUNCTION_ARG_SCmode_WART
6294 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6295 {
6296 rtx addr, real, imag, result_value, slot;
6297 tree subtype = TREE_TYPE (type);
6298
6299 addr = std_expand_builtin_va_arg (valist, subtype);
6300 #ifdef POINTERS_EXTEND_UNSIGNED
6301 if (GET_MODE (addr) != Pmode)
6302 addr = convert_memory_address (Pmode, addr);
6303 #endif
6304 imag = gen_rtx_MEM (TYPE_MODE (type), addr);
6305 set_mem_alias_set (imag, get_varargs_alias_set ());
6306
6307 addr = std_expand_builtin_va_arg (valist, subtype);
6308 #ifdef POINTERS_EXTEND_UNSIGNED
6309 if (GET_MODE (addr) != Pmode)
6310 addr = convert_memory_address (Pmode, addr);
6311 #endif
6312 real = gen_rtx_MEM (TYPE_MODE (type), addr);
6313 set_mem_alias_set (real, get_varargs_alias_set ());
6314
6315 result_value = gen_rtx_CONCAT (SCmode, real, imag);
6316 /* ??? this interface is stupid - why require a pointer? */
6317 result = gen_reg_rtx (Pmode);
6318 slot = assign_stack_temp (SCmode, 8, 0);
6319 emit_move_insn (slot, result_value);
6320 emit_move_insn (result, XEXP (slot, 0));
6321 }
6322 #endif /* FUNCTION_ARG_SCmode_WART */
6323
6324 emit_jump_insn (gen_jump (lab_over));
6325 emit_barrier ();
6326 emit_label (lab_false);
6327
6328 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6329 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6330 if (r != addr_rtx)
6331 emit_move_insn (addr_rtx, r);
6332 }
6333 else
6334 {
6335 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
6336 build_int_2 (rsize, 0));
6337
6338 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
6339 EXPAND_NORMAL),
6340 expand_expr (next_o_limit, NULL_RTX,
6341 Pmode, EXPAND_NORMAL),
6342 GT, const1_rtx, Pmode, 1, lab_false);
6343
6344 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6345 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6346 if (r != addr_rtx)
6347 emit_move_insn (addr_rtx, r);
6348
6349 emit_jump_insn (gen_jump (lab_over));
6350 emit_barrier ();
6351 emit_label (lab_false);
6352
6353 if (size > 4 && ! TARGET_SH4)
6354 {
6355 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6356 TREE_SIDE_EFFECTS (tmp) = 1;
6357 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
6358 }
6359
6360 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6361 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6362 if (r != addr_rtx)
6363 emit_move_insn (addr_rtx, r);
6364 }
6365
6366 if (! result)
6367 emit_label (lab_over);
6368 }
6369
6370 /* ??? In va-sh.h, there had been code to make values larger than
6371 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6372
6373 result_ptr = std_expand_builtin_va_arg (valist, type);
6374 if (result)
6375 {
6376 emit_move_insn (result, result_ptr);
6377 emit_label (lab_over);
6378 }
6379 else
6380 result = result_ptr;
6381
6382 if (pass_by_ref)
6383 {
6384 #ifdef POINTERS_EXTEND_UNSIGNED
6385 if (GET_MODE (addr) != Pmode)
6386 addr = convert_memory_address (Pmode, result);
6387 #endif
6388 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
6389 set_mem_alias_set (result, get_varargs_alias_set ());
6390 }
6391 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
6392 argument to the varargs alias set. */
6393 return result;
6394 }
6395
6396 bool
sh_promote_prototypes(tree type)6397 sh_promote_prototypes (tree type)
6398 {
6399 if (TARGET_HITACHI)
6400 return 0;
6401 if (! type)
6402 return 1;
6403 return ! sh_attr_renesas_p (type);
6404 }
6405
6406 /* Define where to put the arguments to a function.
6407 Value is zero to push the argument on the stack,
6408 or a hard register in which to store the argument.
6409
6410 MODE is the argument's machine mode.
6411 TYPE is the data type of the argument (as a tree).
6412 This is null for libcalls where that information may
6413 not be available.
6414 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6415 the preceding args and about the function being called.
6416 NAMED is nonzero if this argument is a named parameter
6417 (otherwise it is an extra parameter matching an ellipsis).
6418
6419 On SH the first args are normally in registers
6420 and the rest are pushed. Any arg that starts within the first
6421 NPARM_REGS words is at least partially passed in a register unless
6422 its data type forbids. */
6423
6424
6425 rtx
sh_function_arg(CUMULATIVE_ARGS * ca,enum machine_mode mode,tree type,int named)6426 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6427 tree type, int named)
6428 {
6429 if (! TARGET_SH5 && mode == VOIDmode)
6430 return GEN_INT (ca->renesas_abi ? 1 : 0);
6431
6432 if (! TARGET_SH5
6433 && PASS_IN_REG_P (*ca, mode, type)
6434 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6435 {
6436 int regno;
6437
6438 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6439 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6440 {
6441 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6442 gen_rtx_REG (SFmode,
6443 BASE_ARG_REG (mode)
6444 + (ROUND_REG (*ca, mode) ^ 1)),
6445 const0_rtx);
6446 rtx r2 = gen_rtx_EXPR_LIST(VOIDmode,
6447 gen_rtx_REG (SFmode,
6448 BASE_ARG_REG (mode)
6449 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6450 GEN_INT (4));
6451 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6452 }
6453
6454 /* If the alignment of a DF value causes an SF register to be
6455 skipped, we will use that skipped register for the next SF
6456 value. */
6457 if ((TARGET_HITACHI || ca->renesas_abi)
6458 && ca->free_single_fp_reg
6459 && mode == SFmode)
6460 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6461
6462 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6463 ^ (mode == SFmode && TARGET_SH4
6464 && TARGET_LITTLE_ENDIAN != 0
6465 && ! TARGET_HITACHI && ! ca->renesas_abi);
6466 return gen_rtx_REG (mode, regno);
6467
6468 }
6469
6470 if (TARGET_SH5)
6471 {
6472 if (mode == VOIDmode && TARGET_SHCOMPACT)
6473 return GEN_INT (ca->call_cookie);
6474
6475 /* The following test assumes unnamed arguments are promoted to
6476 DFmode. */
6477 if (mode == SFmode && ca->free_single_fp_reg)
6478 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6479
6480 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6481 && (named || ! ca->prototype_p)
6482 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6483 {
6484 if (! ca->prototype_p && TARGET_SHMEDIA)
6485 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6486
6487 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6488 FIRST_FP_PARM_REG
6489 + ca->arg_count[(int) SH_ARG_FLOAT]);
6490 }
6491
6492 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6493 && (! TARGET_SHCOMPACT
6494 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6495 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6496 type, named))))
6497 {
6498 return gen_rtx_REG (mode, (FIRST_PARM_REG
6499 + ca->arg_count[(int) SH_ARG_INT]));
6500 }
6501
6502 return 0;
6503 }
6504
6505 return 0;
6506 }
6507
6508 /* Update the data in CUM to advance over an argument
6509 of mode MODE and data type TYPE.
6510 (TYPE is null for libcalls where that information may not be
6511 available.) */
6512
6513 void
sh_function_arg_advance(CUMULATIVE_ARGS * ca,enum machine_mode mode,tree type,int named)6514 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6515 tree type, int named)
6516 {
6517 if (ca->force_mem)
6518 ca->force_mem = 0;
6519 else if (TARGET_SH5)
6520 {
6521 tree type2 = (ca->byref && type
6522 ? TREE_TYPE (type)
6523 : type);
6524 enum machine_mode mode2 = (ca->byref && type
6525 ? TYPE_MODE (type2)
6526 : mode);
6527 int dwords = ((ca->byref
6528 ? ca->byref
6529 : mode2 == BLKmode
6530 ? int_size_in_bytes (type2)
6531 : GET_MODE_SIZE (mode2)) + 7) / 8;
6532 int numregs = MIN (dwords, NPARM_REGS (SImode)
6533 - ca->arg_count[(int) SH_ARG_INT]);
6534
6535 if (numregs)
6536 {
6537 ca->arg_count[(int) SH_ARG_INT] += numregs;
6538 if (TARGET_SHCOMPACT
6539 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6540 {
6541 ca->call_cookie
6542 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6543 - numregs, 1);
6544 /* N.B. We want this also for outgoing. */
6545 ca->stack_regs += numregs;
6546 }
6547 else if (ca->byref)
6548 {
6549 if (! ca->outgoing)
6550 ca->stack_regs += numregs;
6551 ca->byref_regs += numregs;
6552 ca->byref = 0;
6553 do
6554 ca->call_cookie
6555 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6556 - numregs, 2);
6557 while (--numregs);
6558 ca->call_cookie
6559 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6560 - 1, 1);
6561 }
6562 else if (dwords > numregs)
6563 {
6564 int pushregs = numregs;
6565
6566 if (TARGET_SHCOMPACT)
6567 ca->stack_regs += numregs;
6568 while (pushregs < NPARM_REGS (SImode) - 1
6569 && (CALL_COOKIE_INT_REG_GET
6570 (ca->call_cookie,
6571 NPARM_REGS (SImode) - pushregs)
6572 == 1))
6573 {
6574 ca->call_cookie
6575 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6576 - pushregs, 1);
6577 pushregs++;
6578 }
6579 if (numregs == NPARM_REGS (SImode))
6580 ca->call_cookie
6581 |= CALL_COOKIE_INT_REG (0, 1)
6582 | CALL_COOKIE_STACKSEQ (numregs - 1);
6583 else
6584 ca->call_cookie
6585 |= CALL_COOKIE_STACKSEQ (numregs);
6586 }
6587 }
6588 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6589 && (named || ! ca->prototype_p))
6590 {
6591 if (mode2 == SFmode && ca->free_single_fp_reg)
6592 ca->free_single_fp_reg = 0;
6593 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6594 < NPARM_REGS (SFmode))
6595 {
6596 int numfpregs
6597 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6598 NPARM_REGS (SFmode)
6599 - ca->arg_count[(int) SH_ARG_FLOAT]);
6600
6601 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6602
6603 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6604 {
6605 if (ca->outgoing && numregs > 0)
6606 do
6607 {
6608 ca->call_cookie
6609 |= (CALL_COOKIE_INT_REG
6610 (ca->arg_count[(int) SH_ARG_INT]
6611 - numregs + ((numfpregs - 2) / 2),
6612 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6613 - numfpregs) / 2));
6614 }
6615 while (numfpregs -= 2);
6616 }
6617 else if (mode2 == SFmode && (named)
6618 && (ca->arg_count[(int) SH_ARG_FLOAT]
6619 < NPARM_REGS (SFmode)))
6620 ca->free_single_fp_reg
6621 = FIRST_FP_PARM_REG - numfpregs
6622 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6623 }
6624 }
6625 return;
6626 }
6627
6628 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6629 {
6630 /* Note that we've used the skipped register. */
6631 if (mode == SFmode && ca->free_single_fp_reg)
6632 {
6633 ca->free_single_fp_reg = 0;
6634 return;
6635 }
6636 /* When we have a DF after an SF, there's an SF register that get
6637 skipped in order to align the DF value. We note this skipped
6638 register, because the next SF value will use it, and not the
6639 SF that follows the DF. */
6640 if (mode == DFmode
6641 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6642 {
6643 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6644 + BASE_ARG_REG (mode));
6645 }
6646 }
6647
6648 if (! (TARGET_SH4 || ca->renesas_abi)
6649 || PASS_IN_REG_P (*ca, mode, type))
6650 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6651 = (ROUND_REG (*ca, mode)
6652 + (mode == BLKmode
6653 ? ROUND_ADVANCE (int_size_in_bytes (type))
6654 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6655 }
6656
6657 /* If the structure value address is not passed in a register, define
6658 `STRUCT_VALUE' as an expression returning an RTX for the place
6659 where the address is passed. If it returns 0, the address is
6660 passed as an "invisible" first argument. */
6661 /* The Renesas calling convention doesn't quite fit into this scheme since
6662 the address is passed like an invisible argument, but one that is always
6663 passed in memory. */
6664 static rtx
sh_struct_value_rtx(tree fndecl,int incoming ATTRIBUTE_UNUSED)6665 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6666 {
6667 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6668 return 0;
6669 return gen_rtx_REG (Pmode, 2);
6670 }
6671
6672 static bool
sh_return_in_memory(tree type,tree fndecl)6673 sh_return_in_memory (tree type, tree fndecl)
6674 {
6675 if (TARGET_SH5)
6676 {
6677 if (TYPE_MODE (type) == BLKmode)
6678 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6679 else
6680 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6681 }
6682 else
6683 {
6684 return (TYPE_MODE (type) == BLKmode
6685 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6686 && TREE_CODE (type) == RECORD_TYPE));
6687 }
6688 }
6689
6690 /* We actually emit the code in sh_expand_prologue. We used to use
6691 a static variable to flag that we need to emit this code, but that
6692 doesn't when inlining, when functions are deferred and then emitted
6693 later. Fortunately, we already have two flags that are part of struct
6694 function that tell if a function uses varargs or stdarg. */
6695 static void
sh_setup_incoming_varargs(CUMULATIVE_ARGS * ca ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,tree type ATTRIBUTE_UNUSED,int * pretend_arg_size ATTRIBUTE_UNUSED,int second_time ATTRIBUTE_UNUSED)6696 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
6697 enum machine_mode mode ATTRIBUTE_UNUSED,
6698 tree type ATTRIBUTE_UNUSED,
6699 int *pretend_arg_size ATTRIBUTE_UNUSED,
6700 int second_time ATTRIBUTE_UNUSED)
6701 {
6702 if (! current_function_stdarg)
6703 abort ();
6704 }
6705
6706 static bool
sh_strict_argument_naming(CUMULATIVE_ARGS * ca ATTRIBUTE_UNUSED)6707 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6708 {
6709 return TARGET_SH5;
6710 }
6711
6712 static bool
sh_pretend_outgoing_varargs_named(CUMULATIVE_ARGS * ca)6713 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6714 {
6715 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6716 }
6717
6718
6719 /* Define the offset between two registers, one to be eliminated, and
6720 the other its replacement, at the start of a routine. */
6721
6722 int
initial_elimination_offset(int from,int to)6723 initial_elimination_offset (int from, int to)
6724 {
6725 int regs_saved;
6726 int regs_saved_rounding = 0;
6727 int total_saved_regs_space;
6728 int total_auto_space;
6729 int save_flags = target_flags;
6730 int copy_flags;
6731 HARD_REG_SET live_regs_mask;
6732
6733 shmedia_space_reserved_for_target_registers = false;
6734 regs_saved = calc_live_regs (&live_regs_mask);
6735 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6736
6737 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
6738 {
6739 shmedia_space_reserved_for_target_registers = true;
6740 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
6741 }
6742
6743 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
6744 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6745 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
6746
6747 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
6748 copy_flags = target_flags;
6749 target_flags = save_flags;
6750
6751 total_saved_regs_space = regs_saved + regs_saved_rounding;
6752
6753 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
6754 return total_saved_regs_space + total_auto_space
6755 + current_function_args_info.byref_regs * 8;
6756
6757 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6758 return total_saved_regs_space + total_auto_space
6759 + current_function_args_info.byref_regs * 8;
6760
6761 /* Initial gap between fp and sp is 0. */
6762 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6763 return 0;
6764
6765 if (from == RETURN_ADDRESS_POINTER_REGNUM
6766 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
6767 {
6768 if (TARGET_SH5)
6769 {
6770 int n = total_saved_regs_space;
6771 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6772 save_schedule schedule;
6773 save_entry *entry;
6774
6775 n += total_auto_space;
6776
6777 /* If it wasn't saved, there's not much we can do. */
6778 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6779 return n;
6780
6781 target_flags = copy_flags;
6782
6783 sh5_schedule_saves (&live_regs_mask, &schedule, n);
6784 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6785 if (entry->reg == pr_reg)
6786 {
6787 target_flags = save_flags;
6788 return entry->offset;
6789 }
6790 abort ();
6791 }
6792 else
6793 return total_auto_space;
6794 }
6795
6796 abort ();
6797 }
6798
6799 /* Handle machine specific pragmas to be semi-compatible with Renesas
6800 compiler. */
6801
6802 void
sh_pr_interrupt(struct cpp_reader * pfile ATTRIBUTE_UNUSED)6803 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6804 {
6805 pragma_interrupt = 1;
6806 }
6807
6808 void
sh_pr_trapa(struct cpp_reader * pfile ATTRIBUTE_UNUSED)6809 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6810 {
6811 pragma_interrupt = pragma_trapa = 1;
6812 }
6813
6814 void
sh_pr_nosave_low_regs(struct cpp_reader * pfile ATTRIBUTE_UNUSED)6815 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6816 {
6817 pragma_nosave_low_regs = 1;
6818 }
6819
6820 /* Generate 'handle_interrupt' attribute for decls */
6821
6822 static void
sh_insert_attributes(tree node,tree * attributes)6823 sh_insert_attributes (tree node, tree *attributes)
6824 {
6825 if (! pragma_interrupt
6826 || TREE_CODE (node) != FUNCTION_DECL)
6827 return;
6828
6829 /* We are only interested in fields. */
6830 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
6831 return;
6832
6833 /* Add a 'handle_interrupt' attribute. */
6834 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
6835
6836 return;
6837 }
6838
6839 /* Supported attributes:
6840
6841 interrupt_handler -- specifies this function is an interrupt handler.
6842
6843 sp_switch -- specifies an alternate stack for an interrupt handler
6844 to run on.
6845
6846 trap_exit -- use a trapa to exit an interrupt function instead of
6847 an rte instruction.
6848
6849 renesas -- use Renesas calling/layout conventions (functions and
6850 structures).
6851
6852 */
6853
6854 const struct attribute_spec sh_attribute_table[] =
6855 {
6856 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6857 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
6858 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
6859 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
6860 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
6861 { NULL, 0, 0, false, false, false, NULL }
6862 };
6863
6864 /* Handle an "interrupt_handler" attribute; arguments as in
6865 struct attribute_spec.handler. */
6866 static tree
sh_handle_interrupt_handler_attribute(tree * node,tree name,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)6867 sh_handle_interrupt_handler_attribute (tree *node, tree name,
6868 tree args ATTRIBUTE_UNUSED,
6869 int flags ATTRIBUTE_UNUSED,
6870 bool *no_add_attrs)
6871 {
6872 if (TREE_CODE (*node) != FUNCTION_DECL)
6873 {
6874 warning ("`%s' attribute only applies to functions",
6875 IDENTIFIER_POINTER (name));
6876 *no_add_attrs = true;
6877 }
6878 else if (TARGET_SHCOMPACT)
6879 {
6880 error ("attribute interrupt_handler is not compatible with -m5-compact");
6881 *no_add_attrs = true;
6882 }
6883
6884 return NULL_TREE;
6885 }
6886
6887 /* Handle an "sp_switch" attribute; arguments as in
6888 struct attribute_spec.handler. */
6889 static tree
sh_handle_sp_switch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)6890 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
6891 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6892 {
6893 if (TREE_CODE (*node) != FUNCTION_DECL)
6894 {
6895 warning ("`%s' attribute only applies to functions",
6896 IDENTIFIER_POINTER (name));
6897 *no_add_attrs = true;
6898 }
6899 else if (!pragma_interrupt)
6900 {
6901 /* The sp_switch attribute only has meaning for interrupt functions. */
6902 warning ("`%s' attribute only applies to interrupt functions",
6903 IDENTIFIER_POINTER (name));
6904 *no_add_attrs = true;
6905 }
6906 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
6907 {
6908 /* The argument must be a constant string. */
6909 warning ("`%s' attribute argument not a string constant",
6910 IDENTIFIER_POINTER (name));
6911 *no_add_attrs = true;
6912 }
6913 else
6914 {
6915 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
6916 TREE_STRING_POINTER (TREE_VALUE (args)));
6917 }
6918
6919 return NULL_TREE;
6920 }
6921
6922 /* Handle an "trap_exit" attribute; arguments as in
6923 struct attribute_spec.handler. */
6924 static tree
sh_handle_trap_exit_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)6925 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
6926 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6927 {
6928 if (TREE_CODE (*node) != FUNCTION_DECL)
6929 {
6930 warning ("`%s' attribute only applies to functions",
6931 IDENTIFIER_POINTER (name));
6932 *no_add_attrs = true;
6933 }
6934 else if (!pragma_interrupt)
6935 {
6936 /* The trap_exit attribute only has meaning for interrupt functions. */
6937 warning ("`%s' attribute only applies to interrupt functions",
6938 IDENTIFIER_POINTER (name));
6939 *no_add_attrs = true;
6940 }
6941 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
6942 {
6943 /* The argument must be a constant integer. */
6944 warning ("`%s' attribute argument not an integer constant",
6945 IDENTIFIER_POINTER (name));
6946 *no_add_attrs = true;
6947 }
6948 else
6949 {
6950 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
6951 }
6952
6953 return NULL_TREE;
6954 }
6955
6956 static tree
sh_handle_renesas_attribute(tree * node ATTRIBUTE_UNUSED,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs ATTRIBUTE_UNUSED)6957 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
6958 tree name ATTRIBUTE_UNUSED,
6959 tree args ATTRIBUTE_UNUSED,
6960 int flags ATTRIBUTE_UNUSED,
6961 bool *no_add_attrs ATTRIBUTE_UNUSED)
6962 {
6963 return NULL_TREE;
6964 }
6965
6966 /* True if __attribute__((renesas)) or -mrenesas. */
6967 int
sh_attr_renesas_p(tree td)6968 sh_attr_renesas_p (tree td)
6969 {
6970 if (TARGET_HITACHI)
6971 return 1;
6972 if (td == 0)
6973 return 0;
6974 if (DECL_P (td))
6975 td = TREE_TYPE (td);
6976 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
6977 != NULL_TREE);
6978 }
6979
6980 /* True if __attribute__((renesas)) or -mrenesas, for the current
6981 function. */
6982 int
sh_cfun_attr_renesas_p(void)6983 sh_cfun_attr_renesas_p (void)
6984 {
6985 return sh_attr_renesas_p (current_function_decl);
6986 }
6987
6988 int
sh_cfun_interrupt_handler_p(void)6989 sh_cfun_interrupt_handler_p (void)
6990 {
6991 return (lookup_attribute ("interrupt_handler",
6992 DECL_ATTRIBUTES (current_function_decl))
6993 != NULL_TREE);
6994 }
6995
6996 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
6997 static const struct
6998 {
6999 const char *const name;
7000 const int value;
7001 const char *const description;
7002 }
7003 sh_target_switches[] = TARGET_SWITCHES;
7004 #define target_switches sh_target_switches
7005
7006 /* Like default_pch_valid_p, but take flag_mask into account. */
7007 const char *
sh_pch_valid_p(const void * data_p,size_t len)7008 sh_pch_valid_p (const void *data_p, size_t len)
7009 {
7010 const char *data = (const char *)data_p;
7011 const char *flag_that_differs = NULL;
7012 size_t i;
7013 int old_flags;
7014 int flag_mask
7015 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7016 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7017
7018 /* -fpic and -fpie also usually make a PCH invalid. */
7019 if (data[0] != flag_pic)
7020 return _("created and used with different settings of -fpic");
7021 if (data[1] != flag_pie)
7022 return _("created and used with different settings of -fpie");
7023 data += 2;
7024
7025 /* Check target_flags. */
7026 memcpy (&old_flags, data, sizeof (target_flags));
7027 if (((old_flags ^ target_flags) & flag_mask) != 0)
7028 {
7029 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7030 {
7031 int bits;
7032
7033 bits = target_switches[i].value;
7034 if (bits < 0)
7035 bits = -bits;
7036 bits &= flag_mask;
7037 if ((target_flags & bits) != (old_flags & bits))
7038 {
7039 flag_that_differs = target_switches[i].name;
7040 goto make_message;
7041 }
7042 }
7043 abort ();
7044 }
7045 data += sizeof (target_flags);
7046 len -= sizeof (target_flags);
7047
7048 /* Check string options. */
7049 #ifdef TARGET_OPTIONS
7050 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7051 {
7052 const char *str = *target_options[i].variable;
7053 size_t l;
7054 if (! str)
7055 str = "";
7056 l = strlen (str) + 1;
7057 if (len < l || memcmp (data, str, l) != 0)
7058 {
7059 flag_that_differs = target_options[i].prefix;
7060 goto make_message;
7061 }
7062 data += l;
7063 len -= l;
7064 }
7065 #endif
7066
7067 return NULL;
7068
7069 make_message:
7070 {
7071 char *r;
7072 asprintf (&r, _("created and used with differing settings of `-m%s'"),
7073 flag_that_differs);
7074 if (r == NULL)
7075 return _("out of memory");
7076 return r;
7077 }
7078 }
7079
7080 /* Predicates used by the templates. */
7081
7082 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7083 Used only in general_movsrc_operand. */
7084
7085 int
system_reg_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)7086 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7087 {
7088 switch (REGNO (op))
7089 {
7090 case PR_REG:
7091 case MACL_REG:
7092 case MACH_REG:
7093 return 1;
7094 }
7095 return 0;
7096 }
7097
7098 /* Returns 1 if OP can be source of a simple move operation.
7099 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7100 invalid as are subregs of system registers. */
7101
7102 int
general_movsrc_operand(rtx op,enum machine_mode mode)7103 general_movsrc_operand (rtx op, enum machine_mode mode)
7104 {
7105 if (GET_CODE (op) == MEM)
7106 {
7107 rtx inside = XEXP (op, 0);
7108 if (GET_CODE (inside) == CONST)
7109 inside = XEXP (inside, 0);
7110
7111 if (GET_CODE (inside) == LABEL_REF)
7112 return 1;
7113
7114 if (GET_CODE (inside) == PLUS
7115 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7116 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7117 return 1;
7118
7119 /* Only post inc allowed. */
7120 if (GET_CODE (inside) == PRE_DEC)
7121 return 0;
7122 }
7123
7124 if ((mode == QImode || mode == HImode)
7125 && (GET_CODE (op) == SUBREG
7126 && GET_CODE (XEXP (op, 0)) == REG
7127 && system_reg_operand (XEXP (op, 0), mode)))
7128 return 0;
7129
7130 return general_operand (op, mode);
7131 }
7132
7133 /* Returns 1 if OP can be a destination of a move.
7134 Same as general_operand, but no preinc allowed. */
7135
7136 int
general_movdst_operand(rtx op,enum machine_mode mode)7137 general_movdst_operand (rtx op, enum machine_mode mode)
7138 {
7139 /* Only pre dec allowed. */
7140 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7141 return 0;
7142
7143 return general_operand (op, mode);
7144 }
7145
7146 /* Returns 1 if OP is a normal arithmetic register. */
7147
7148 int
arith_reg_operand(rtx op,enum machine_mode mode)7149 arith_reg_operand (rtx op, enum machine_mode mode)
7150 {
7151 if (register_operand (op, mode))
7152 {
7153 int regno;
7154
7155 if (GET_CODE (op) == REG)
7156 regno = REGNO (op);
7157 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7158 regno = REGNO (SUBREG_REG (op));
7159 else
7160 return 1;
7161
7162 return (regno != T_REG && regno != PR_REG
7163 && ! TARGET_REGISTER_P (regno)
7164 && (regno != FPUL_REG || TARGET_SH4)
7165 && regno != MACH_REG && regno != MACL_REG);
7166 }
7167 return 0;
7168 }
7169
7170 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7171 because this would lead to missing sign extensions when truncating from
7172 DImode to SImode. */
7173 int
arith_reg_dest(rtx op,enum machine_mode mode)7174 arith_reg_dest (rtx op, enum machine_mode mode)
7175 {
7176 if (mode == DImode && GET_CODE (op) == SUBREG
7177 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7178 return 0;
7179 return arith_reg_operand (op, mode);
7180 }
7181
7182 int
int_gpr_dest(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)7183 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7184 {
7185 enum machine_mode op_mode = GET_MODE (op);
7186
7187 if (GET_MODE_CLASS (op_mode) != MODE_INT
7188 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7189 return 0;
7190 if (! reload_completed)
7191 return 0;
7192 return true_regnum (op) <= LAST_GENERAL_REG;
7193 }
7194
7195 int
fp_arith_reg_operand(rtx op,enum machine_mode mode)7196 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7197 {
7198 if (register_operand (op, mode))
7199 {
7200 int regno;
7201
7202 if (GET_CODE (op) == REG)
7203 regno = REGNO (op);
7204 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7205 regno = REGNO (SUBREG_REG (op));
7206 else
7207 return 1;
7208
7209 return (regno >= FIRST_PSEUDO_REGISTER
7210 || FP_REGISTER_P (regno));
7211 }
7212 return 0;
7213 }
7214
7215 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7216
7217 int
arith_operand(rtx op,enum machine_mode mode)7218 arith_operand (rtx op, enum machine_mode mode)
7219 {
7220 if (arith_reg_operand (op, mode))
7221 return 1;
7222
7223 if (TARGET_SHMEDIA)
7224 {
7225 /* FIXME: We should be checking whether the CONST_INT fits in a
7226 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7227 attempting to transform a sequence of two 64-bit sets of the
7228 same register from literal constants into a set and an add,
7229 when the difference is too wide for an add. */
7230 if (GET_CODE (op) == CONST_INT
7231 || EXTRA_CONSTRAINT_C16 (op))
7232 return 1;
7233 else
7234 return 0;
7235 }
7236 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7237 return 1;
7238
7239 return 0;
7240 }
7241
7242 /* Returns 1 if OP is a valid source operand for a compare insn. */
7243
7244 int
arith_reg_or_0_operand(rtx op,enum machine_mode mode)7245 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7246 {
7247 if (arith_reg_operand (op, mode))
7248 return 1;
7249
7250 if (EXTRA_CONSTRAINT_Z (op))
7251 return 1;
7252
7253 return 0;
7254 }
7255
7256 /* Return 1 if OP is a valid source operand for an SHmedia operation
7257 that takes either a register or a 6-bit immediate. */
7258
7259 int
shmedia_6bit_operand(rtx op,enum machine_mode mode)7260 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7261 {
7262 return (arith_reg_operand (op, mode)
7263 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7264 }
7265
7266 /* Returns 1 if OP is a valid source operand for a logical operation. */
7267
7268 int
logical_operand(rtx op,enum machine_mode mode)7269 logical_operand (rtx op, enum machine_mode mode)
7270 {
7271 if (arith_reg_operand (op, mode))
7272 return 1;
7273
7274 if (TARGET_SHMEDIA)
7275 {
7276 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7277 return 1;
7278 else
7279 return 0;
7280 }
7281 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7282 return 1;
7283
7284 return 0;
7285 }
7286
7287 int
and_operand(rtx op,enum machine_mode mode)7288 and_operand (rtx op, enum machine_mode mode)
7289 {
7290 if (logical_operand (op, mode))
7291 return 1;
7292
7293 /* Check mshflo.l / mshflhi.l opportunities. */
7294 if (TARGET_SHMEDIA
7295 && mode == DImode
7296 && GET_CODE (op) == CONST_INT
7297 && CONST_OK_FOR_J16 (INTVAL (op)))
7298 return 1;
7299
7300 return 0;
7301 }
7302
7303 /* Nonzero if OP is a floating point value with value 0.0. */
7304
7305 int
fp_zero_operand(rtx op)7306 fp_zero_operand (rtx op)
7307 {
7308 REAL_VALUE_TYPE r;
7309
7310 if (GET_MODE (op) != SFmode)
7311 return 0;
7312
7313 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7314 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7315 }
7316
7317 /* Nonzero if OP is a floating point value with value 1.0. */
7318
7319 int
fp_one_operand(rtx op)7320 fp_one_operand (rtx op)
7321 {
7322 REAL_VALUE_TYPE r;
7323
7324 if (GET_MODE (op) != SFmode)
7325 return 0;
7326
7327 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7328 return REAL_VALUES_EQUAL (r, dconst1);
7329 }
7330
7331 /* For -m4 and -m4-single-only, mode switching is used. If we are
7332 compiling without -mfmovd, movsf_ie isn't taken into account for
7333 mode switching. We could check in machine_dependent_reorg for
7334 cases where we know we are in single precision mode, but there is
7335 interface to find that out during reload, so we must avoid
7336 choosing an fldi alternative during reload and thus failing to
7337 allocate a scratch register for the constant loading. */
7338 int
fldi_ok(void)7339 fldi_ok (void)
7340 {
7341 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7342 }
7343
7344 int
tertiary_reload_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)7345 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7346 {
7347 enum rtx_code code = GET_CODE (op);
7348 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7349 }
7350
7351 int
fpscr_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)7352 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7353 {
7354 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
7355 && GET_MODE (op) == PSImode);
7356 }
7357
7358 int
fpul_operand(rtx op,enum machine_mode mode)7359 fpul_operand (rtx op, enum machine_mode mode)
7360 {
7361 if (TARGET_SHMEDIA)
7362 return fp_arith_reg_operand (op, mode);
7363
7364 return (GET_CODE (op) == REG
7365 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7366 && GET_MODE (op) == mode);
7367 }
7368
7369 int
symbol_ref_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)7370 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7371 {
7372 return (GET_CODE (op) == SYMBOL_REF);
7373 }
7374
7375 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7376 int
tls_symbolic_operand(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)7377 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7378 {
7379 if (GET_CODE (op) != SYMBOL_REF)
7380 return 0;
7381 return SYMBOL_REF_TLS_MODEL (op);
7382 }
7383
7384 int
commutative_float_operator(rtx op,enum machine_mode mode)7385 commutative_float_operator (rtx op, enum machine_mode mode)
7386 {
7387 if (GET_MODE (op) != mode)
7388 return 0;
7389 switch (GET_CODE (op))
7390 {
7391 case PLUS:
7392 case MULT:
7393 return 1;
7394 default:
7395 break;
7396 }
7397 return 0;
7398 }
7399
7400 int
noncommutative_float_operator(rtx op,enum machine_mode mode)7401 noncommutative_float_operator (rtx op, enum machine_mode mode)
7402 {
7403 if (GET_MODE (op) != mode)
7404 return 0;
7405 switch (GET_CODE (op))
7406 {
7407 case MINUS:
7408 case DIV:
7409 return 1;
7410 default:
7411 break;
7412 }
7413 return 0;
7414 }
7415
7416 int
unary_float_operator(rtx op,enum machine_mode mode)7417 unary_float_operator (rtx op, enum machine_mode mode)
7418 {
7419 if (GET_MODE (op) != mode)
7420 return 0;
7421 switch (GET_CODE (op))
7422 {
7423 case ABS:
7424 case NEG:
7425 case SQRT:
7426 return 1;
7427 default:
7428 break;
7429 }
7430 return 0;
7431 }
7432
7433 int
binary_float_operator(rtx op,enum machine_mode mode)7434 binary_float_operator (rtx op, enum machine_mode mode)
7435 {
7436 if (GET_MODE (op) != mode)
7437 return 0;
7438 switch (GET_CODE (op))
7439 {
7440 case PLUS:
7441 case MINUS:
7442 case MULT:
7443 case DIV:
7444 return 1;
7445 default:
7446 break;
7447 }
7448 return 0;
7449 }
7450
7451 int
binary_logical_operator(rtx op,enum machine_mode mode)7452 binary_logical_operator (rtx op, enum machine_mode mode)
7453 {
7454 if (GET_MODE (op) != mode)
7455 return 0;
7456 switch (GET_CODE (op))
7457 {
7458 case IOR:
7459 case AND:
7460 case XOR:
7461 return 1;
7462 default:
7463 break;
7464 }
7465 return 0;
7466 }
7467
7468 int
equality_comparison_operator(rtx op,enum machine_mode mode)7469 equality_comparison_operator (rtx op, enum machine_mode mode)
7470 {
7471 return ((mode == VOIDmode || GET_MODE (op) == mode)
7472 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7473 }
7474
greater_comparison_operator(rtx op,enum machine_mode mode)7475 int greater_comparison_operator (rtx op, enum machine_mode mode)
7476 {
7477 if (mode != VOIDmode && GET_MODE (op) == mode)
7478 return 0;
7479 switch (GET_CODE (op))
7480 {
7481 case GT:
7482 case GE:
7483 case GTU:
7484 case GEU:
7485 return 1;
7486 default:
7487 return 0;
7488 }
7489 }
7490
less_comparison_operator(rtx op,enum machine_mode mode)7491 int less_comparison_operator (rtx op, enum machine_mode mode)
7492 {
7493 if (mode != VOIDmode && GET_MODE (op) == mode)
7494 return 0;
7495 switch (GET_CODE (op))
7496 {
7497 case LT:
7498 case LE:
7499 case LTU:
7500 case LEU:
7501 return 1;
7502 default:
7503 return 0;
7504 }
7505 }
7506
7507 /* Accept pseudos and branch target registers. */
7508 int
target_reg_operand(rtx op,enum machine_mode mode)7509 target_reg_operand (rtx op, enum machine_mode mode)
7510 {
7511 if (mode != DImode
7512 || GET_MODE (op) != DImode)
7513 return 0;
7514
7515 if (GET_CODE (op) == SUBREG)
7516 op = XEXP (op, 0);
7517
7518 if (GET_CODE (op) != REG)
7519 return 0;
7520
7521 /* We must protect ourselves from matching pseudos that are virtual
7522 register, because they will eventually be replaced with hardware
7523 registers that aren't branch-target registers. */
7524 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7525 || TARGET_REGISTER_P (REGNO (op)))
7526 return 1;
7527
7528 return 0;
7529 }
7530
7531 /* Same as target_reg_operand, except that label_refs and symbol_refs
7532 are accepted before reload. */
7533 int
target_operand(rtx op,enum machine_mode mode)7534 target_operand (rtx op, enum machine_mode mode)
7535 {
7536 if (mode != DImode)
7537 return 0;
7538
7539 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7540 && EXTRA_CONSTRAINT_Csy (op))
7541 return ! reload_completed;
7542
7543 return target_reg_operand (op, mode);
7544 }
7545
7546 int
mextr_bit_offset(rtx op,enum machine_mode mode ATTRIBUTE_UNUSED)7547 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7548 {
7549 HOST_WIDE_INT i;
7550
7551 if (GET_CODE (op) != CONST_INT)
7552 return 0;
7553 i = INTVAL (op);
7554 return i >= 1*8 && i <= 7*8 && (i & 7) == 0;
7555 }
7556
7557 int
extend_reg_operand(rtx op,enum machine_mode mode)7558 extend_reg_operand (rtx op, enum machine_mode mode)
7559 {
7560 return (GET_CODE (op) == TRUNCATE
7561 ? arith_operand
7562 : arith_reg_operand) (op, mode);
7563 }
7564
7565 int
trunc_hi_operand(rtx op,enum machine_mode mode)7566 trunc_hi_operand (rtx op, enum machine_mode mode)
7567 {
7568 enum machine_mode op_mode = GET_MODE (op);
7569
7570 if (op_mode != SImode && op_mode != DImode
7571 && op_mode != V4HImode && op_mode != V2SImode)
7572 return 0;
7573 return extend_reg_operand (op, mode);
7574 }
7575
7576 int
extend_reg_or_0_operand(rtx op,enum machine_mode mode)7577 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7578 {
7579 return (GET_CODE (op) == TRUNCATE
7580 ? arith_operand
7581 : arith_reg_or_0_operand) (op, mode);
7582 }
7583
7584 int
general_extend_operand(rtx op,enum machine_mode mode)7585 general_extend_operand (rtx op, enum machine_mode mode)
7586 {
7587 return (GET_CODE (op) == TRUNCATE
7588 ? arith_operand
7589 : nonimmediate_operand) (op, mode);
7590 }
7591
7592 int
inqhi_operand(rtx op,enum machine_mode mode)7593 inqhi_operand (rtx op, enum machine_mode mode)
7594 {
7595 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7596 return 0;
7597 op = XEXP (op, 0);
7598 /* Can't use true_regnum here because copy_cost wants to know about
7599 SECONDARY_INPUT_RELOAD_CLASS. */
7600 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7601 }
7602
7603 int
sh_rep_vec(rtx v,enum machine_mode mode)7604 sh_rep_vec (rtx v, enum machine_mode mode)
7605 {
7606 int i;
7607 rtx x, y;
7608
7609 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7610 || (GET_MODE (v) != mode && mode != VOIDmode))
7611 return 0;
7612 i = XVECLEN (v, 0) - 2;
7613 x = XVECEXP (v, 0, i + 1);
7614 if (GET_MODE_UNIT_SIZE (mode) == 1)
7615 {
7616 y = XVECEXP (v, 0, i);
7617 for (i -= 2 ; i >= 0; i -= 2)
7618 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7619 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7620 return 0;
7621 }
7622 else
7623 for (; i >= 0; i--)
7624 if (XVECEXP (v, 0, i) != x)
7625 return 0;
7626 return 1;
7627 }
7628
7629 /* Determine if V is a constant vector matching MODE with only one element
7630 that is not a sign extension. Two byte-sized elements count as one. */
7631 int
sh_1el_vec(rtx v,enum machine_mode mode)7632 sh_1el_vec (rtx v, enum machine_mode mode)
7633 {
7634 int unit_size;
7635 int i, last, least, sign_ix;
7636 rtx sign;
7637
7638 if (GET_CODE (v) != CONST_VECTOR
7639 || (GET_MODE (v) != mode && mode != VOIDmode))
7640 return 0;
7641 /* Determine numbers of last and of least significant elements. */
7642 last = XVECLEN (v, 0) - 1;
7643 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7644 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7645 return 0;
7646 sign_ix = least;
7647 if (GET_MODE_UNIT_SIZE (mode) == 1)
7648 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7649 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7650 return 0;
7651 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7652 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7653 ? constm1_rtx : const0_rtx);
7654 i = XVECLEN (v, 0) - 1;
7655 do
7656 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7657 return 0;
7658 while (--i);
7659 return 1;
7660 }
7661
7662 int
sh_const_vec(rtx v,enum machine_mode mode)7663 sh_const_vec (rtx v, enum machine_mode mode)
7664 {
7665 int i;
7666
7667 if (GET_CODE (v) != CONST_VECTOR
7668 || (GET_MODE (v) != mode && mode != VOIDmode))
7669 return 0;
7670 i = XVECLEN (v, 0) - 1;
7671 for (; i >= 0; i--)
7672 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7673 return 0;
7674 return 1;
7675 }
7676
7677 /* Return the destination address of a branch. */
7678
7679 static int
branch_dest(rtx branch)7680 branch_dest (rtx branch)
7681 {
7682 rtx dest = SET_SRC (PATTERN (branch));
7683 int dest_uid;
7684
7685 if (GET_CODE (dest) == IF_THEN_ELSE)
7686 dest = XEXP (dest, 1);
7687 dest = XEXP (dest, 0);
7688 dest_uid = INSN_UID (dest);
7689 return INSN_ADDRESSES (dest_uid);
7690 }
7691
7692 /* Return nonzero if REG is not used after INSN.
7693 We assume REG is a reload reg, and therefore does
7694 not live past labels. It may live past calls or jumps though. */
7695 int
reg_unused_after(rtx reg,rtx insn)7696 reg_unused_after (rtx reg, rtx insn)
7697 {
7698 enum rtx_code code;
7699 rtx set;
7700
7701 /* If the reg is set by this instruction, then it is safe for our
7702 case. Disregard the case where this is a store to memory, since
7703 we are checking a register used in the store address. */
7704 set = single_set (insn);
7705 if (set && GET_CODE (SET_DEST (set)) != MEM
7706 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7707 return 1;
7708
7709 while ((insn = NEXT_INSN (insn)))
7710 {
7711 code = GET_CODE (insn);
7712
7713 #if 0
7714 /* If this is a label that existed before reload, then the register
7715 if dead here. However, if this is a label added by reorg, then
7716 the register may still be live here. We can't tell the difference,
7717 so we just ignore labels completely. */
7718 if (code == CODE_LABEL)
7719 return 1;
7720 /* else */
7721 #endif
7722
7723 if (code == JUMP_INSN)
7724 return 0;
7725
7726 /* If this is a sequence, we must handle them all at once.
7727 We could have for instance a call that sets the target register,
7728 and an insn in a delay slot that uses the register. In this case,
7729 we must return 0. */
7730 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7731 {
7732 int i;
7733 int retval = 0;
7734
7735 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7736 {
7737 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7738 rtx set = single_set (this_insn);
7739
7740 if (GET_CODE (this_insn) == CALL_INSN)
7741 code = CALL_INSN;
7742 else if (GET_CODE (this_insn) == JUMP_INSN)
7743 {
7744 if (INSN_ANNULLED_BRANCH_P (this_insn))
7745 return 0;
7746 code = JUMP_INSN;
7747 }
7748
7749 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7750 return 0;
7751 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7752 {
7753 if (GET_CODE (SET_DEST (set)) != MEM)
7754 retval = 1;
7755 else
7756 return 0;
7757 }
7758 if (set == 0
7759 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7760 return 0;
7761 }
7762 if (retval == 1)
7763 return 1;
7764 else if (code == JUMP_INSN)
7765 return 0;
7766 }
7767 else if (GET_RTX_CLASS (code) == 'i')
7768 {
7769 rtx set = single_set (insn);
7770
7771 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7772 return 0;
7773 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7774 return GET_CODE (SET_DEST (set)) != MEM;
7775 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7776 return 0;
7777 }
7778
7779 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
7780 return 1;
7781 }
7782 return 1;
7783 }
7784
7785 #include "ggc.h"
7786
7787 static GTY(()) rtx fpscr_rtx;
7788 rtx
get_fpscr_rtx(void)7789 get_fpscr_rtx (void)
7790 {
7791 if (! fpscr_rtx)
7792 {
7793 fpscr_rtx = gen_rtx (REG, PSImode, FPSCR_REG);
7794 REG_USERVAR_P (fpscr_rtx) = 1;
7795 mark_user_reg (fpscr_rtx);
7796 }
7797 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7798 mark_user_reg (fpscr_rtx);
7799 return fpscr_rtx;
7800 }
7801
7802 void
emit_sf_insn(rtx pat)7803 emit_sf_insn (rtx pat)
7804 {
7805 emit_insn (pat);
7806 }
7807
7808 void
emit_df_insn(rtx pat)7809 emit_df_insn (rtx pat)
7810 {
7811 emit_insn (pat);
7812 }
7813
7814 void
expand_sf_unop(rtx (* fun)(rtx,rtx,rtx),rtx * operands)7815 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7816 {
7817 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7818 }
7819
7820 void
expand_sf_binop(rtx (* fun)(rtx,rtx,rtx,rtx),rtx * operands)7821 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7822 {
7823 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7824 get_fpscr_rtx ()));
7825 }
7826
7827 void
expand_df_unop(rtx (* fun)(rtx,rtx,rtx),rtx * operands)7828 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7829 {
7830 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7831 }
7832
7833 void
expand_df_binop(rtx (* fun)(rtx,rtx,rtx,rtx),rtx * operands)7834 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7835 {
7836 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7837 get_fpscr_rtx ()));
7838 }
7839
7840 /* ??? gcc does flow analysis strictly after common subexpression
7841 elimination. As a result, common subexpression elimination fails
7842 when there are some intervening statements setting the same register.
7843 If we did nothing about this, this would hurt the precision switching
7844 for SH4 badly. There is some cse after reload, but it is unable to
7845 undo the extra register pressure from the unused instructions, and
7846 it cannot remove auto-increment loads.
7847
7848 A C code example that shows this flow/cse weakness for (at least) SH
7849 and sparc (as of gcc ss-970706) is this:
7850
7851 double
7852 f(double a)
7853 {
7854 double d;
7855 d = 0.1;
7856 a += d;
7857 d = 1.1;
7858 d = 0.1;
7859 a *= d;
7860 return a;
7861 }
7862
7863 So we add another pass before common subexpression elimination, to
7864 remove assignments that are dead due to a following assignment in the
7865 same basic block. */
7866
7867 static void
mark_use(rtx x,rtx * reg_set_block)7868 mark_use (rtx x, rtx *reg_set_block)
7869 {
7870 enum rtx_code code;
7871
7872 if (! x)
7873 return;
7874 code = GET_CODE (x);
7875 switch (code)
7876 {
7877 case REG:
7878 {
7879 int regno = REGNO (x);
7880 int nregs = (regno < FIRST_PSEUDO_REGISTER
7881 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7882 : 1);
7883 do
7884 {
7885 reg_set_block[regno + nregs - 1] = 0;
7886 }
7887 while (--nregs);
7888 break;
7889 }
7890 case SET:
7891 {
7892 rtx dest = SET_DEST (x);
7893
7894 if (GET_CODE (dest) == SUBREG)
7895 dest = SUBREG_REG (dest);
7896 if (GET_CODE (dest) != REG)
7897 mark_use (dest, reg_set_block);
7898 mark_use (SET_SRC (x), reg_set_block);
7899 break;
7900 }
7901 case CLOBBER:
7902 break;
7903 default:
7904 {
7905 const char *fmt = GET_RTX_FORMAT (code);
7906 int i, j;
7907 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7908 {
7909 if (fmt[i] == 'e')
7910 mark_use (XEXP (x, i), reg_set_block);
7911 else if (fmt[i] == 'E')
7912 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7913 mark_use (XVECEXP (x, i, j), reg_set_block);
7914 }
7915 break;
7916 }
7917 }
7918 }
7919
7920 static rtx get_free_reg (HARD_REG_SET);
7921
7922 /* This function returns a register to use to load the address to load
7923 the fpscr from. Currently it always returns r1 or r7, but when we are
7924 able to use pseudo registers after combine, or have a better mechanism
7925 for choosing a register, it should be done here. */
7926 /* REGS_LIVE is the liveness information for the point for which we
7927 need this allocation. In some bare-bones exit blocks, r1 is live at the
7928 start. We can even have all of r0..r3 being live:
7929 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
7930 INSN before which new insns are placed with will clobber the register
7931 we return. If a basic block consists only of setting the return value
7932 register to a pseudo and using that register, the return value is not
7933 live before or after this block, yet we we'll insert our insns right in
7934 the middle. */
7935
7936 static rtx
get_free_reg(HARD_REG_SET regs_live)7937 get_free_reg (HARD_REG_SET regs_live)
7938 {
7939 if (! TEST_HARD_REG_BIT (regs_live, 1))
7940 return gen_rtx_REG (Pmode, 1);
7941
7942 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
7943 there shouldn't be anything but a jump before the function end. */
7944 if (! TEST_HARD_REG_BIT (regs_live, 7))
7945 return gen_rtx_REG (Pmode, 7);
7946
7947 abort ();
7948 }
7949
7950 /* This function will set the fpscr from memory.
7951 MODE is the mode we are setting it to. */
7952 void
fpscr_set_from_mem(int mode,HARD_REG_SET regs_live)7953 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
7954 {
7955 enum attr_fp_mode fp_mode = mode;
7956 rtx addr_reg = get_free_reg (regs_live);
7957
7958 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
7959 emit_insn (gen_fpu_switch1 (addr_reg));
7960 else
7961 emit_insn (gen_fpu_switch0 (addr_reg));
7962 }
7963
7964 /* Is the given character a logical line separator for the assembler? */
7965 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
7966 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
7967 #endif
7968
7969 int
sh_insn_length_adjustment(rtx insn)7970 sh_insn_length_adjustment (rtx insn)
7971 {
7972 /* Instructions with unfilled delay slots take up an extra two bytes for
7973 the nop in the delay slot. */
7974 if (((GET_CODE (insn) == INSN
7975 && GET_CODE (PATTERN (insn)) != USE
7976 && GET_CODE (PATTERN (insn)) != CLOBBER)
7977 || GET_CODE (insn) == CALL_INSN
7978 || (GET_CODE (insn) == JUMP_INSN
7979 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7980 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
7981 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
7982 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
7983 return 2;
7984
7985 /* SH2e has a bug that prevents the use of annulled branches, so if
7986 the delay slot is not filled, we'll have to put a NOP in it. */
7987 if (sh_cpu == CPU_SH2E
7988 && GET_CODE (insn) == JUMP_INSN
7989 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7990 && GET_CODE (PATTERN (insn)) != ADDR_VEC
7991 && get_attr_type (insn) == TYPE_CBRANCH
7992 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
7993 return 2;
7994
7995 /* sh-dsp parallel processing insn take four bytes instead of two. */
7996
7997 if (GET_CODE (insn) == INSN)
7998 {
7999 int sum = 0;
8000 rtx body = PATTERN (insn);
8001 const char *template;
8002 char c;
8003 int maybe_label = 1;
8004
8005 if (GET_CODE (body) == ASM_INPUT)
8006 template = XSTR (body, 0);
8007 else if (asm_noperands (body) >= 0)
8008 template
8009 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8010 else
8011 return 0;
8012 do
8013 {
8014 int ppi_adjust = 0;
8015
8016 do
8017 c = *template++;
8018 while (c == ' ' || c == '\t');
8019 /* all sh-dsp parallel-processing insns start with p.
8020 The only non-ppi sh insn starting with p is pref.
8021 The only ppi starting with pr is prnd. */
8022 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8023 ppi_adjust = 2;
8024 /* The repeat pseudo-insn expands two three insns, a total of
8025 six bytes in size. */
8026 else if ((c == 'r' || c == 'R')
8027 && ! strncasecmp ("epeat", template, 5))
8028 ppi_adjust = 4;
8029 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8030 {
8031 /* If this is a label, it is obviously not a ppi insn. */
8032 if (c == ':' && maybe_label)
8033 {
8034 ppi_adjust = 0;
8035 break;
8036 }
8037 else if (c == '\'' || c == '"')
8038 maybe_label = 0;
8039 c = *template++;
8040 }
8041 sum += ppi_adjust;
8042 maybe_label = c != ':';
8043 }
8044 while (c);
8045 return sum;
8046 }
8047 return 0;
8048 }
8049
8050 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8051 isn't protected by a PIC unspec. */
8052 int
nonpic_symbol_mentioned_p(rtx x)8053 nonpic_symbol_mentioned_p (rtx x)
8054 {
8055 register const char *fmt;
8056 register int i;
8057
8058 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8059 || GET_CODE (x) == PC)
8060 return 1;
8061
8062 /* We don't want to look into the possible MEM location of a
8063 CONST_DOUBLE, since we're not going to use it, in general. */
8064 if (GET_CODE (x) == CONST_DOUBLE)
8065 return 0;
8066
8067 if (GET_CODE (x) == UNSPEC
8068 && (XINT (x, 1) == UNSPEC_PIC
8069 || XINT (x, 1) == UNSPEC_GOT
8070 || XINT (x, 1) == UNSPEC_GOTOFF
8071 || XINT (x, 1) == UNSPEC_GOTPLT
8072 || XINT (x, 1) == UNSPEC_GOTTPOFF
8073 || XINT (x, 1) == UNSPEC_DTPOFF
8074 || XINT (x, 1) == UNSPEC_PLT))
8075 return 0;
8076
8077 fmt = GET_RTX_FORMAT (GET_CODE (x));
8078 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8079 {
8080 if (fmt[i] == 'E')
8081 {
8082 register int j;
8083
8084 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8085 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8086 return 1;
8087 }
8088 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8089 return 1;
8090 }
8091
8092 return 0;
8093 }
8094
8095 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8096 @GOTOFF in `reg'. */
8097 rtx
legitimize_pic_address(rtx orig,enum machine_mode mode ATTRIBUTE_UNUSED,rtx reg)8098 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8099 rtx reg)
8100 {
8101 if (tls_symbolic_operand (orig, Pmode))
8102 return orig;
8103
8104 if (GET_CODE (orig) == LABEL_REF
8105 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8106 {
8107 if (reg == 0)
8108 reg = gen_reg_rtx (Pmode);
8109
8110 emit_insn (gen_symGOTOFF2reg (reg, orig));
8111 return reg;
8112 }
8113 else if (GET_CODE (orig) == SYMBOL_REF)
8114 {
8115 if (reg == 0)
8116 reg = gen_reg_rtx (Pmode);
8117
8118 emit_insn (gen_symGOT2reg (reg, orig));
8119 return reg;
8120 }
8121 return orig;
8122 }
8123
8124 /* Mark the use of a constant in the literal table. If the constant
8125 has multiple labels, make it unique. */
8126 static rtx
mark_constant_pool_use(rtx x)8127 mark_constant_pool_use (rtx x)
8128 {
8129 rtx insn, lab, pattern;
8130
8131 if (x == NULL)
8132 return x;
8133
8134 switch (GET_CODE (x))
8135 {
8136 case LABEL_REF:
8137 x = XEXP (x, 0);
8138 case CODE_LABEL:
8139 break;
8140 default:
8141 return x;
8142 }
8143
8144 /* Get the first label in the list of labels for the same constant
8145 and delete another labels in the list. */
8146 lab = x;
8147 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8148 {
8149 if (GET_CODE (insn) != CODE_LABEL
8150 || LABEL_REFS (insn) != NEXT_INSN (insn))
8151 break;
8152 lab = insn;
8153 }
8154
8155 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8156 INSN_DELETED_P (insn) = 1;
8157
8158 /* Mark constants in a window. */
8159 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8160 {
8161 if (GET_CODE (insn) != INSN)
8162 continue;
8163
8164 pattern = PATTERN (insn);
8165 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8166 continue;
8167
8168 switch (XINT (pattern, 1))
8169 {
8170 case UNSPECV_CONST2:
8171 case UNSPECV_CONST4:
8172 case UNSPECV_CONST8:
8173 XVECEXP (pattern, 0, 1) = const1_rtx;
8174 break;
8175 case UNSPECV_WINDOW_END:
8176 if (XVECEXP (pattern, 0, 0) == x)
8177 return lab;
8178 break;
8179 case UNSPECV_CONST_END:
8180 return lab;
8181 default:
8182 break;
8183 }
8184 }
8185
8186 return lab;
8187 }
8188
8189 /* Return true if it's possible to redirect BRANCH1 to the destination
8190 of an unconditional jump BRANCH2. We only want to do this if the
8191 resulting branch will have a short displacement. */
8192 int
sh_can_redirect_branch(rtx branch1,rtx branch2)8193 sh_can_redirect_branch (rtx branch1, rtx branch2)
8194 {
8195 if (flag_expensive_optimizations && simplejump_p (branch2))
8196 {
8197 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8198 rtx insn;
8199 int distance;
8200
8201 for (distance = 0, insn = NEXT_INSN (branch1);
8202 insn && distance < 256;
8203 insn = PREV_INSN (insn))
8204 {
8205 if (insn == dest)
8206 return 1;
8207 else
8208 distance += get_attr_length (insn);
8209 }
8210 for (distance = 0, insn = NEXT_INSN (branch1);
8211 insn && distance < 256;
8212 insn = NEXT_INSN (insn))
8213 {
8214 if (insn == dest)
8215 return 1;
8216 else
8217 distance += get_attr_length (insn);
8218 }
8219 }
8220 return 0;
8221 }
8222
8223 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8224 int
sh_hard_regno_rename_ok(unsigned int old_reg ATTRIBUTE_UNUSED,unsigned int new_reg)8225 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8226 unsigned int new_reg)
8227 {
8228
8229 /* Interrupt functions can only use registers that have already been
8230 saved by the prologue, even if they would normally be
8231 call-clobbered. */
8232
8233 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8234 return 0;
8235
8236 return 1;
8237 }
8238
8239 /* Function to update the integer COST
8240 based on the relationship between INSN that is dependent on
8241 DEP_INSN through the dependence LINK. The default is to make no
8242 adjustment to COST. This can be used for example to specify to
8243 the scheduler that an output- or anti-dependence does not incur
8244 the same cost as a data-dependence. The return value should be
8245 the new value for COST. */
8246 static int
sh_adjust_cost(rtx insn,rtx link ATTRIBUTE_UNUSED,rtx dep_insn,int cost)8247 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8248 {
8249 rtx reg, use_pat;
8250
8251 if (TARGET_SHMEDIA)
8252 {
8253 /* On SHmedia, if the dependence is an anti-dependence or
8254 output-dependence, there is no cost. */
8255 if (REG_NOTE_KIND (link) != 0)
8256 cost = 0;
8257
8258 if (get_attr_is_mac_media (insn)
8259 && get_attr_is_mac_media (dep_insn))
8260 cost = 1;
8261 }
8262 else if (REG_NOTE_KIND (link) == 0)
8263 {
8264 enum attr_type dep_type, type;
8265
8266 if (recog_memoized (insn) < 0
8267 || recog_memoized (dep_insn) < 0)
8268 return cost;
8269
8270 dep_type = get_attr_type (dep_insn);
8271 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8272 cost--;
8273 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8274 && (type = get_attr_type (insn)) != TYPE_CALL
8275 && type != TYPE_SFUNC)
8276 cost--;
8277
8278 /* The only input for a call that is timing-critical is the
8279 function's address. */
8280 if (GET_CODE(insn) == CALL_INSN)
8281 {
8282 rtx call = PATTERN (insn);
8283
8284 if (GET_CODE (call) == PARALLEL)
8285 call = XVECEXP (call, 0 ,0);
8286 if (GET_CODE (call) == SET)
8287 call = SET_SRC (call);
8288 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8289 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8290 cost = 0;
8291 }
8292 /* Likewise, the most timing critical input for an sfuncs call
8293 is the function address. However, sfuncs typically start
8294 using their arguments pretty quickly.
8295 Assume a four cycle delay before they are needed. */
8296 /* All sfunc calls are parallels with at least four components.
8297 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8298 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8299 && XVECLEN (PATTERN (insn), 0) >= 4
8300 && (reg = sfunc_uses_reg (insn)))
8301 {
8302 if (! reg_set_p (reg, dep_insn))
8303 cost -= 4;
8304 }
8305 /* When the preceding instruction loads the shift amount of
8306 the following SHAD/SHLD, the latency of the load is increased
8307 by 1 cycle. */
8308 else if (TARGET_SH4
8309 && get_attr_type (insn) == TYPE_DYN_SHIFT
8310 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8311 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8312 XEXP (SET_SRC (single_set(insn)),
8313 1)))
8314 cost++;
8315 /* When an LS group instruction with a latency of less than
8316 3 cycles is followed by a double-precision floating-point
8317 instruction, FIPR, or FTRV, the latency of the first
8318 instruction is increased to 3 cycles. */
8319 else if (cost < 3
8320 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8321 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8322 cost = 3;
8323 /* The lsw register of a double-precision computation is ready one
8324 cycle earlier. */
8325 else if (reload_completed
8326 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8327 && (use_pat = single_set (insn))
8328 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8329 SET_SRC (use_pat)))
8330 cost -= 1;
8331
8332 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8333 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8334 cost -= 1;
8335 }
8336 /* An anti-dependence penalty of two applies if the first insn is a double
8337 precision fadd / fsub / fmul. */
8338 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8339 && recog_memoized (dep_insn) >= 0
8340 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8341 /* A lot of alleged anti-flow dependences are fake,
8342 so check this one is real. */
8343 && flow_dependent_p (dep_insn, insn))
8344 cost = 2;
8345
8346
8347 return cost;
8348 }
8349
8350 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8351 if DEP_INSN is anti-flow dependent on INSN. */
8352 static int
flow_dependent_p(rtx insn,rtx dep_insn)8353 flow_dependent_p (rtx insn, rtx dep_insn)
8354 {
8355 rtx tmp = PATTERN (insn);
8356
8357 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8358 return tmp == NULL_RTX;
8359 }
8360
8361 /* A helper function for flow_dependent_p called through note_stores. */
8362 static void
flow_dependent_p_1(rtx x,rtx pat ATTRIBUTE_UNUSED,void * data)8363 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8364 {
8365 rtx * pinsn = (rtx *) data;
8366
8367 if (*pinsn && reg_referenced_p (x, *pinsn))
8368 *pinsn = NULL_RTX;
8369 }
8370
8371 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8372 'special function' patterns (type sfunc) that clobber pr, but that
8373 do not look like function calls to leaf_function_p. Hence we must
8374 do this extra check. */
8375 int
sh_pr_n_sets(void)8376 sh_pr_n_sets (void)
8377 {
8378 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8379 }
8380
8381 /* This Function returns nonzero if the DFA based scheduler interface
8382 is to be used. At present this is supported for the SH4 only. */
8383 static int
sh_use_dfa_interface(void)8384 sh_use_dfa_interface(void)
8385 {
8386 if (TARGET_HARD_SH4)
8387 return 1;
8388 else
8389 return 0;
8390 }
8391
8392 /* This function returns "2" to indicate dual issue for the SH4
8393 processor. To be used by the DFA pipeline description. */
8394 static int
sh_issue_rate(void)8395 sh_issue_rate(void)
8396 {
8397 if (TARGET_SUPERSCALAR)
8398 return 2;
8399 else
8400 return 1;
8401 }
8402
8403 /* SHmedia requires registers for branches, so we can't generate new
8404 branches past reload. */
8405 static bool
sh_cannot_modify_jumps_p(void)8406 sh_cannot_modify_jumps_p (void)
8407 {
8408 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8409 }
8410
8411 static int
sh_target_reg_class(void)8412 sh_target_reg_class (void)
8413 {
8414 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8415 }
8416
8417 static bool
sh_optimize_target_register_callee_saved(bool after_prologue_epilogue_gen)8418 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8419 {
8420 return (shmedia_space_reserved_for_target_registers
8421 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8422 }
8423
8424 static bool
sh_ms_bitfield_layout_p(record_type)8425 sh_ms_bitfield_layout_p (record_type)
8426 tree record_type ATTRIBUTE_UNUSED;
8427 {
8428 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8429 }
8430
8431 /*
8432 On the SH1..SH4, the trampoline looks like
8433 2 0002 D202 mov.l l2,r2
8434 1 0000 D301 mov.l l1,r3
8435 3 0004 422B jmp @r2
8436 4 0006 0009 nop
8437 5 0008 00000000 l1: .long area
8438 6 000c 00000000 l2: .long function
8439
8440 SH5 (compact) uses r1 instead of r3 for the static chain. */
8441
8442
8443 /* Emit RTL insns to initialize the variable parts of a trampoline.
8444 FNADDR is an RTX for the address of the function's pure code.
8445 CXT is an RTX for the static chain value for the function. */
8446
8447 void
sh_initialize_trampoline(rtx tramp,rtx fnaddr,rtx cxt)8448 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8449 {
8450 if (TARGET_SHMEDIA64)
8451 {
8452 rtx tramp_templ;
8453 int fixed_len;
8454
8455 rtx movi1 = GEN_INT (0xcc000010);
8456 rtx shori1 = GEN_INT (0xc8000010);
8457 rtx src, dst;
8458
8459 /* The following trampoline works within a +- 128 KB range for cxt:
8460 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8461 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8462 gettr tr1,r1; blink tr0,r63 */
8463 /* Address rounding makes it hard to compute the exact bounds of the
8464 offset for this trampoline, but we have a rather generous offset
8465 range, so frame_offset should do fine as an upper bound. */
8466 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8467 {
8468 /* ??? could optimize this trampoline initialization
8469 by writing DImode words with two insns each. */
8470 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8471 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8472 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8473 insn = gen_rtx_AND (DImode, insn, mask);
8474 /* Or in ptb/u .,tr1 pattern */
8475 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8476 insn = force_operand (insn, NULL_RTX);
8477 insn = gen_lowpart (SImode, insn);
8478 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
8479 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8480 insn = gen_rtx_AND (DImode, insn, mask);
8481 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8482 insn = gen_lowpart (SImode, insn);
8483 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
8484 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
8485 insn = gen_rtx_AND (DImode, insn, mask);
8486 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8487 insn = gen_lowpart (SImode, insn);
8488 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
8489 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
8490 insn = gen_rtx_AND (DImode, insn, mask);
8491 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8492 insn = gen_lowpart (SImode, insn);
8493 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8494 insn);
8495 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
8496 insn = gen_rtx_AND (DImode, insn, mask);
8497 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8498 insn = gen_lowpart (SImode, insn);
8499 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
8500 insn);
8501 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
8502 GEN_INT (0x6bf10600));
8503 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
8504 GEN_INT (0x4415fc10));
8505 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
8506 GEN_INT (0x4401fff0));
8507 emit_insn (gen_ic_invalidate_line (tramp));
8508 return;
8509 }
8510 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
8511 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
8512
8513 tramp_templ = gen_datalabel_ref (tramp_templ);
8514 dst = gen_rtx_MEM (BLKmode, tramp);
8515 src = gen_rtx_MEM (BLKmode, tramp_templ);
8516 set_mem_align (dst, 256);
8517 set_mem_align (src, 64);
8518 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
8519
8520 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
8521 fnaddr);
8522 emit_move_insn (gen_rtx_MEM (Pmode,
8523 plus_constant (tramp,
8524 fixed_len
8525 + GET_MODE_SIZE (Pmode))),
8526 cxt);
8527 emit_insn (gen_ic_invalidate_line (tramp));
8528 return;
8529 }
8530 else if (TARGET_SHMEDIA)
8531 {
8532 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
8533 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
8534 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
8535 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
8536 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
8537 rotated 10 right, and higher 16 bit of every 32 selected. */
8538 rtx movishori
8539 = force_reg (V2HImode, (simplify_gen_subreg
8540 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
8541 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
8542 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
8543
8544 tramp = force_reg (Pmode, tramp);
8545 fnaddr = force_reg (SImode, fnaddr);
8546 cxt = force_reg (SImode, cxt);
8547 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
8548 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
8549 movishori));
8550 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
8551 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8552 emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2)));
8553 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
8554 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
8555 gen_rtx_SUBREG (V2HImode, cxt, 0),
8556 movishori));
8557 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
8558 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8559 emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2)));
8560 if (TARGET_LITTLE_ENDIAN)
8561 {
8562 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
8563 emit_insn (gen_mextr4 (quad2, cxtload, blink));
8564 }
8565 else
8566 {
8567 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
8568 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
8569 }
8570 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
8571 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
8572 emit_insn (gen_ic_invalidate_line (tramp));
8573 return;
8574 }
8575 else if (TARGET_SHCOMPACT)
8576 {
8577 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
8578 return;
8579 }
8580 emit_move_insn (gen_rtx_MEM (SImode, tramp),
8581 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
8582 SImode));
8583 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
8584 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
8585 SImode));
8586 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
8587 cxt);
8588 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8589 fnaddr);
8590 if (TARGET_HARVARD)
8591 {
8592 if (TARGET_USERMODE)
8593 emit_library_call (function_symbol ("__ic_invalidate"),
8594 0, VOIDmode, 1, tramp, SImode);
8595 else
8596 emit_insn (gen_ic_invalidate_line (tramp));
8597 }
8598 }
8599
8600 /* FIXME: This is overly conservative. A SHcompact function that
8601 receives arguments ``by reference'' will have them stored in its
8602 own stack frame, so it must not pass pointers or references to
8603 these arguments to other functions by means of sibling calls. */
8604 static bool
sh_function_ok_for_sibcall(tree decl,tree exp ATTRIBUTE_UNUSED)8605 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8606 {
8607 return (decl
8608 && (! TARGET_SHCOMPACT
8609 || current_function_args_info.stack_regs == 0)
8610 && ! sh_cfun_interrupt_handler_p ());
8611 }
8612
8613 /* Machine specific built-in functions. */
8614
8615 struct builtin_description
8616 {
8617 const enum insn_code icode;
8618 const char *const name;
8619 int signature;
8620 };
8621
8622 /* describe number and signedness of arguments; arg[0] == result
8623 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
8624 static const char signature_args[][4] =
8625 {
8626 #define SH_BLTIN_V2SI2 0
8627 { 4, 4 },
8628 #define SH_BLTIN_V4HI2 1
8629 { 4, 4 },
8630 #define SH_BLTIN_V2SI3 2
8631 { 4, 4, 4 },
8632 #define SH_BLTIN_V4HI3 3
8633 { 4, 4, 4 },
8634 #define SH_BLTIN_V8QI3 4
8635 { 4, 4, 4 },
8636 #define SH_BLTIN_MAC_HISI 5
8637 { 1, 4, 4, 1 },
8638 #define SH_BLTIN_SH_HI 6
8639 { 4, 4, 1 },
8640 #define SH_BLTIN_SH_SI 7
8641 { 4, 4, 1 },
8642 #define SH_BLTIN_V4HI2V2SI 8
8643 { 4, 4, 4 },
8644 #define SH_BLTIN_V4HI2V8QI 9
8645 { 4, 4, 4 },
8646 #define SH_BLTIN_SISF 10
8647 { 4, 2 },
8648 #define SH_BLTIN_LDUA_L 11
8649 { 2, 8 },
8650 #define SH_BLTIN_LDUA_Q 12
8651 { 1, 8 },
8652 #define SH_BLTIN_STUA_L 13
8653 { 0, 8, 2 },
8654 #define SH_BLTIN_STUA_Q 14
8655 { 0, 8, 1 },
8656 #define SH_BLTIN_UDI 15
8657 { 0, 8, 1 },
8658 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
8659 #define SH_BLTIN_2 16
8660 #define SH_BLTIN_SU 16
8661 { 1, 2 },
8662 #define SH_BLTIN_3 17
8663 #define SH_BLTIN_SUS 17
8664 { 2, 2, 1 },
8665 #define SH_BLTIN_PSSV 18
8666 { 0, 8, 2, 2 },
8667 #define SH_BLTIN_XXUU 19
8668 #define SH_BLTIN_UUUU 19
8669 { 1, 1, 1, 1 },
8670 #define SH_BLTIN_PV 20
8671 { 0, 8 },
8672 };
8673 /* mcmv: operands considered unsigned. */
8674 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
8675 /* mperm: control value considered unsigned int. */
8676 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
8677 /* mshards_q: returns signed short. */
8678 /* nsb: takes long long arg, returns unsigned char. */
8679 static const struct builtin_description bdesc[] =
8680 {
8681 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
8682 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
8683 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
8684 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
8685 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
8686 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
8687 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
8688 #if 0
8689 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
8690 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
8691 #endif
8692 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
8693 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
8694 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
8695 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
8696 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
8697 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
8698 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
8699 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
8700 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
8701 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
8702 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
8703 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
8704 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
8705 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
8706 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
8707 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
8708 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
8709 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
8710 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
8711 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
8712 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
8713 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
8714 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
8715 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
8716 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
8717 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
8718 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
8719 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
8720 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
8721 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
8722 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
8723 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
8724 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
8725 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
8726 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
8727 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
8728 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
8729 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
8730 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
8731 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
8732 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
8733 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
8734 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
8735 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
8736 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
8737 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
8738 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
8739 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
8740 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
8741 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
8742 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
8743 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
8744 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
8745 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
8746 #if 0
8747 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
8748 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
8749 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
8750 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
8751 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
8752 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
8753 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
8754 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
8755 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
8756 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
8757 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
8758 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
8759 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
8760 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
8761 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
8762 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
8763 #endif
8764 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
8765 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
8766 #if 0
8767 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
8768 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
8769 #endif
8770 };
8771
8772 static void
sh_media_init_builtins(void)8773 sh_media_init_builtins (void)
8774 {
8775 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
8776 const struct builtin_description *d;
8777
8778 memset (shared, 0, sizeof shared);
8779 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
8780 {
8781 tree type, arg_type;
8782 int signature = d->signature;
8783 int i;
8784
8785 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
8786 type = shared[signature];
8787 else
8788 {
8789 int has_result = signature_args[signature][0] != 0;
8790
8791 if (signature_args[signature][1] == 8
8792 && (insn_data[d->icode].operand[has_result].mode != Pmode))
8793 continue;
8794 if (! TARGET_FPU_ANY
8795 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
8796 continue;
8797 type = void_list_node;
8798 for (i = 3; ; i--)
8799 {
8800 int arg = signature_args[signature][i];
8801 int opno = i - 1 + has_result;
8802
8803 if (arg == 8)
8804 arg_type = ptr_type_node;
8805 else if (arg)
8806 arg_type = ((*lang_hooks.types.type_for_mode)
8807 (insn_data[d->icode].operand[opno].mode,
8808 (arg & 1)));
8809 else if (i)
8810 continue;
8811 else
8812 arg_type = void_type_node;
8813 if (i == 0)
8814 break;
8815 type = tree_cons (NULL_TREE, arg_type, type);
8816 }
8817 type = build_function_type (arg_type, type);
8818 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
8819 shared[signature] = type;
8820 }
8821 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
8822 NULL, NULL_TREE);
8823 }
8824 }
8825
8826 static void
sh_init_builtins(void)8827 sh_init_builtins (void)
8828 {
8829 if (TARGET_SHMEDIA)
8830 sh_media_init_builtins ();
8831 }
8832
8833 /* Expand an expression EXP that calls a built-in function,
8834 with result going to TARGET if that's convenient
8835 (and in mode MODE if that's convenient).
8836 SUBTARGET may be used as the target for computing one of EXP's operands.
8837 IGNORE is nonzero if the value is to be ignored. */
8838
8839 static rtx
sh_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,enum machine_mode mode ATTRIBUTE_UNUSED,int ignore)8840 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8841 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
8842 {
8843 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8844 tree arglist = TREE_OPERAND (exp, 1);
8845 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8846 const struct builtin_description *d = &bdesc[fcode];
8847 enum insn_code icode = d->icode;
8848 int signature = d->signature;
8849 enum machine_mode tmode = VOIDmode;
8850 int nop = 0, i;
8851 rtx op[4];
8852 rtx pat;
8853
8854 if (signature_args[signature][0])
8855 {
8856 if (ignore)
8857 return 0;
8858
8859 tmode = insn_data[icode].operand[0].mode;
8860 if (! target
8861 || GET_MODE (target) != tmode
8862 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
8863 target = gen_reg_rtx (tmode);
8864 op[nop++] = target;
8865 }
8866 else
8867 target = 0;
8868
8869 for (i = 1; i <= 3; i++, nop++)
8870 {
8871 tree arg;
8872 enum machine_mode opmode, argmode;
8873
8874 if (! signature_args[signature][i])
8875 break;
8876 arg = TREE_VALUE (arglist);
8877 if (arg == error_mark_node)
8878 return const0_rtx;
8879 arglist = TREE_CHAIN (arglist);
8880 opmode = insn_data[icode].operand[nop].mode;
8881 argmode = TYPE_MODE (TREE_TYPE (arg));
8882 if (argmode != opmode)
8883 arg = build1 (NOP_EXPR,
8884 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
8885 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
8886 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
8887 op[nop] = copy_to_mode_reg (opmode, op[nop]);
8888 }
8889
8890 switch (nop)
8891 {
8892 case 1:
8893 pat = (*insn_data[d->icode].genfun) (op[0]);
8894 break;
8895 case 2:
8896 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
8897 break;
8898 case 3:
8899 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
8900 break;
8901 case 4:
8902 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
8903 break;
8904 default:
8905 abort ();
8906 }
8907 if (! pat)
8908 return 0;
8909 emit_insn (pat);
8910 return target;
8911 }
8912
8913 void
sh_expand_unop_v2sf(enum rtx_code code,rtx op0,rtx op1)8914 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
8915 {
8916 rtx sel0 = const0_rtx;
8917 rtx sel1 = const1_rtx;
8918 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
8919 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
8920
8921 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
8922 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
8923 }
8924
8925 void
sh_expand_binop_v2sf(enum rtx_code code,rtx op0,rtx op1,rtx op2)8926 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
8927 {
8928 rtx sel0 = const0_rtx;
8929 rtx sel1 = const1_rtx;
8930 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
8931 = gen_binary_sf_op;
8932 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
8933
8934 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
8935 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
8936 }
8937
8938 /* Return the class of registers for which a mode change from FROM to TO
8939 is invalid. */
8940 bool
sh_cannot_change_mode_class(enum machine_mode from,enum machine_mode to,enum reg_class class)8941 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
8942 enum reg_class class)
8943 {
8944 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
8945 {
8946 if (TARGET_LITTLE_ENDIAN)
8947 {
8948 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
8949 return reg_classes_intersect_p (DF_REGS, class);
8950 }
8951 else
8952 {
8953 if (GET_MODE_SIZE (from) < 8)
8954 return reg_classes_intersect_p (DF_HI_REGS, class);
8955 }
8956 }
8957 return 0;
8958 }
8959
8960
8961 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
8962 that label is used. */
8963
8964 void
sh_mark_label(rtx address,int nuses)8965 sh_mark_label (rtx address, int nuses)
8966 {
8967 if (GOTOFF_P (address))
8968 {
8969 /* Extract the label or symbol. */
8970 address = XEXP (address, 0);
8971 if (GET_CODE (address) == PLUS)
8972 address = XEXP (address, 0);
8973 address = XVECEXP (address, 0, 0);
8974 }
8975 if (GET_CODE (address) == LABEL_REF
8976 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
8977 LABEL_NUSES (XEXP (address, 0)) += nuses;
8978 }
8979
8980 /* Compute extra cost of moving data between one register class
8981 and another. */
8982
8983 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
8984 uses this information. Hence, the general register <-> floating point
8985 register information here is not used for SFmode. */
8986
8987 int
sh_register_move_cost(enum machine_mode mode,enum reg_class srcclass,enum reg_class dstclass)8988 sh_register_move_cost (enum machine_mode mode,
8989 enum reg_class srcclass, enum reg_class dstclass)
8990 {
8991 if (dstclass == T_REGS || dstclass == PR_REGS)
8992 return 10;
8993
8994 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
8995 return 4;
8996
8997 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
8998 && REGCLASS_HAS_FP_REG (srcclass)
8999 && REGCLASS_HAS_FP_REG (dstclass))
9000 return 4;
9001
9002 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9003 || (dstclass== MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9004 return 9;
9005
9006 if ((REGCLASS_HAS_FP_REG (dstclass)
9007 && REGCLASS_HAS_GENERAL_REG (srcclass))
9008 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9009 && REGCLASS_HAS_FP_REG (srcclass)))
9010 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9011 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9012
9013 if ((dstclass == FPUL_REGS
9014 && REGCLASS_HAS_GENERAL_REG (srcclass))
9015 || (srcclass == FPUL_REGS
9016 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9017 return 5;
9018
9019 if ((dstclass == FPUL_REGS
9020 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9021 || (srcclass == FPUL_REGS
9022 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9023 return 7;
9024
9025 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9026 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9027 return 20;
9028
9029 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9030 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9031 return 4;
9032
9033 if (TARGET_SHMEDIA
9034 || (TARGET_FMOVD
9035 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9036 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9037 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9038
9039 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9040 }
9041
9042 /* Like register_operand, but take into account that SHMEDIA can use
9043 the constant zero like a general register. */
9044 int
sh_register_operand(rtx op,enum machine_mode mode)9045 sh_register_operand (rtx op, enum machine_mode mode)
9046 {
9047 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9048 return 1;
9049 return register_operand (op, mode);
9050 }
9051
9052 int
cmpsi_operand(rtx op,enum machine_mode mode)9053 cmpsi_operand (rtx op, enum machine_mode mode)
9054 {
9055 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9056 && GET_MODE (op) == SImode)
9057 return 1;
9058 return arith_operand (op, mode);
9059 }
9060
9061 static rtx emit_load_ptr (rtx, rtx);
9062
9063 static rtx
emit_load_ptr(rtx reg,rtx addr)9064 emit_load_ptr (rtx reg, rtx addr)
9065 {
9066 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9067
9068 if (Pmode != ptr_mode)
9069 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9070 return emit_move_insn (reg, mem);
9071 }
9072
9073 void
sh_output_mi_thunk(FILE * file,tree thunk_fndecl ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)9074 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9075 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9076 tree function)
9077 {
9078 CUMULATIVE_ARGS cum;
9079 int structure_value_byref = 0;
9080 rtx this, this_value, sibcall, insns, funexp;
9081 tree funtype = TREE_TYPE (function);
9082 int simple_add = CONST_OK_FOR_ADD (delta);
9083 int did_load = 0;
9084 rtx scratch0, scratch1, scratch2;
9085
9086 reload_completed = 1;
9087 epilogue_completed = 1;
9088 no_new_pseudos = 1;
9089 current_function_uses_only_leaf_regs = 1;
9090
9091 emit_note (NOTE_INSN_PROLOGUE_END);
9092
9093 /* Find the "this" pointer. We have such a wide range of ABIs for the
9094 SH that it's best to do this completely machine independently.
9095 "this" is passed as first argument, unless a structure return pointer
9096 comes first, in which case "this" comes second. */
9097 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9098 #ifndef PCC_STATIC_STRUCT_RETURN
9099 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9100 structure_value_byref = 1;
9101 #endif /* not PCC_STATIC_STRUCT_RETURN */
9102 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9103 {
9104 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9105
9106 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9107 }
9108 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9109
9110 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9111 static chain pointer (even if you can't have nested virtual functions
9112 right now, someone might implement them sometime), and the rest of the
9113 registers are used for argument passing, are callee-saved, or reserved. */
9114 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9115 if (! TARGET_SH5)
9116 {
9117 scratch1 = gen_rtx_REG (ptr_mode, 1);
9118 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9119 pointing where to return struct values. */
9120 scratch2 = gen_rtx_REG (Pmode, 3);
9121 }
9122 else if (TARGET_SHMEDIA)
9123 {
9124 scratch1 = gen_rtx_REG (ptr_mode, 21);
9125 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9126 }
9127
9128 this_value = plus_constant (this, delta);
9129 if (vcall_offset
9130 && (simple_add || scratch0 != scratch1)
9131 && strict_memory_address_p (ptr_mode, this_value))
9132 {
9133 emit_load_ptr (scratch0, this_value);
9134 did_load = 1;
9135 }
9136
9137 if (!delta)
9138 ; /* Do nothing. */
9139 else if (simple_add)
9140 emit_move_insn (this, this_value);
9141 else
9142 {
9143 emit_move_insn (scratch1, GEN_INT (delta));
9144 emit_insn (gen_add2_insn (this, scratch1));
9145 }
9146
9147 if (vcall_offset)
9148 {
9149 rtx offset_addr;
9150
9151 if (!did_load)
9152 emit_load_ptr (scratch0, this);
9153
9154 offset_addr = plus_constant (scratch0, vcall_offset);
9155 if (strict_memory_address_p (ptr_mode, offset_addr))
9156 ; /* Do nothing. */
9157 else if (! TARGET_SH5)
9158 {
9159 /* scratch0 != scratch1, and we have indexed loads. Get better
9160 schedule by loading the offset into r1 and using an indexed
9161 load - then the load of r1 can issue before the load from
9162 (this + delta) finishes. */
9163 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9164 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9165 }
9166 else if (CONST_OK_FOR_ADD (vcall_offset))
9167 {
9168 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9169 offset_addr = scratch0;
9170 }
9171 else if (scratch0 != scratch1)
9172 {
9173 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9174 emit_insn (gen_add2_insn (scratch0, scratch1));
9175 offset_addr = scratch0;
9176 }
9177 else
9178 abort (); /* FIXME */
9179 emit_load_ptr (scratch0, offset_addr);
9180
9181 if (Pmode != ptr_mode)
9182 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9183 emit_insn (gen_add2_insn (this, scratch0));
9184 }
9185
9186 /* Generate a tail call to the target function. */
9187 if (! TREE_USED (function))
9188 {
9189 assemble_external (function);
9190 TREE_USED (function) = 1;
9191 }
9192 funexp = XEXP (DECL_RTL (function), 0);
9193 emit_move_insn (scratch2, funexp);
9194 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9195 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9196 SIBLING_CALL_P (sibcall) = 1;
9197 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9198 emit_barrier ();
9199
9200 /* Run just enough of rest_of_compilation to do scheduling and get
9201 the insns emitted. Note that use_thunk calls
9202 assemble_start_function and assemble_end_function. */
9203
9204 insn_locators_initialize ();
9205 insns = get_insns ();
9206
9207 if (optimize > 0 && flag_schedule_insns_after_reload)
9208 {
9209
9210 find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
9211 life_analysis (insns, rtl_dump_file, PROP_FINAL);
9212
9213 split_all_insns (1);
9214
9215 schedule_insns (rtl_dump_file);
9216 }
9217
9218 sh_reorg ();
9219
9220 if (optimize > 0 && flag_delayed_branch)
9221 dbr_schedule (insns, rtl_dump_file);
9222 shorten_branches (insns);
9223 final_start_function (insns, file, 1);
9224 final (insns, file, 1, 0);
9225 final_end_function ();
9226
9227 if (optimize > 0 && flag_schedule_insns_after_reload)
9228 {
9229 /* Release all memory allocated by flow. */
9230 free_basic_block_vars (0);
9231
9232 /* Release all memory held by regsets now. */
9233 regset_release_memory ();
9234 }
9235
9236 reload_completed = 0;
9237 epilogue_completed = 0;
9238 no_new_pseudos = 0;
9239 }
9240
9241 rtx
function_symbol(const char * name)9242 function_symbol (const char *name)
9243 {
9244 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9245 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9246 return sym;
9247 }
9248
9249 /* Find the number of a general purpose register in S. */
9250 static int
scavenge_reg(HARD_REG_SET * s)9251 scavenge_reg (HARD_REG_SET *s)
9252 {
9253 int r;
9254 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9255 if (TEST_HARD_REG_BIT (*s, r))
9256 return r;
9257 return -1;
9258 }
9259
9260 rtx
sh_get_pr_initial_val(void)9261 sh_get_pr_initial_val (void)
9262 {
9263 rtx val;
9264
9265 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9266 PR register on SHcompact, because it might be clobbered by the prologue.
9267 We check first if that is known to be the case. */
9268 if (TARGET_SHCOMPACT
9269 && ((current_function_args_info.call_cookie
9270 & ~ CALL_COOKIE_RET_TRAMP (1))
9271 || current_function_has_nonlocal_label))
9272 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9273
9274 /* If we haven't finished rtl generation, there might be a nonlocal label
9275 that we haven't seen yet.
9276 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9277 is set, unless it has been called before for the same register. And even
9278 then, we end in trouble if we didn't use the register in the same
9279 basic block before. So call get_hard_reg_initial_val now and wrap it
9280 in an unspec if we might need to replace it. */
9281 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9282 combine can put the pseudo returned by get_hard_reg_initial_val into
9283 instructions that need a general purpose registers, which will fail to
9284 be recognized when the pseudo becomes allocated to PR. */
9285 val
9286 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9287 if (TARGET_SH1)
9288 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9289 return val;
9290 }
9291
9292 int
sh_expand_t_scc(enum rtx_code code,rtx target)9293 sh_expand_t_scc (enum rtx_code code, rtx target)
9294 {
9295 rtx result = target;
9296 HOST_WIDE_INT val;
9297
9298 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9299 || GET_CODE (sh_compare_op1) != CONST_INT)
9300 return 0;
9301 if (GET_CODE (result) != REG)
9302 result = gen_reg_rtx (SImode);
9303 val = INTVAL (sh_compare_op1);
9304 if ((code == EQ && val == 1) || (code == NE && val == 0))
9305 emit_insn (gen_movt (result));
9306 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9307 {
9308 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9309 emit_insn (gen_subc (result, result, result));
9310 emit_insn (gen_addsi3 (result, result, GEN_INT (1)));
9311 }
9312 else if (code == EQ || code == NE)
9313 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9314 else
9315 return 0;
9316 if (result != target)
9317 emit_move_insn (target, result);
9318 return 1;
9319 }
9320
9321 /* INSN is an sfunc; return the rtx that describes the address used. */
9322 static rtx
extract_sfunc_addr(rtx insn)9323 extract_sfunc_addr (rtx insn)
9324 {
9325 rtx pattern, part = NULL_RTX;
9326 int len, i;
9327
9328 pattern = PATTERN (insn);
9329 len = XVECLEN (pattern, 0);
9330 for (i = 0; i < len; i++)
9331 {
9332 part = XVECEXP (pattern, 0, i);
9333 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9334 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9335 return XEXP (part, 0);
9336 }
9337 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9338 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9339 abort ();
9340 }
9341
9342 /* Verify that the register in use_sfunc_addr still agrees with the address
9343 used in the sfunc. This prevents fill_slots_from_thread from changing
9344 use_sfunc_addr.
9345 INSN is the use_sfunc_addr instruction, and REG is the register it
9346 guards. */
9347 int
check_use_sfunc_addr(rtx insn,rtx reg)9348 check_use_sfunc_addr (rtx insn, rtx reg)
9349 {
9350 /* Search for the sfunc. It should really come right after INSN. */
9351 while ((insn = NEXT_INSN (insn)))
9352 {
9353 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9354 break;
9355 if (! INSN_P (insn))
9356 continue;
9357
9358 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9359 insn = XVECEXP (PATTERN (insn), 0, 0);
9360 if (GET_CODE (PATTERN (insn)) != PARALLEL
9361 || get_attr_type (insn) != TYPE_SFUNC)
9362 continue;
9363 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9364 }
9365 abort ();
9366 }
9367
9368 #include "gt-sh.h"
9369