1 /* Target Code for TI C6X
2 Copyright (C) 2010-2016 Free Software Foundation, Inc.
3 Contributed by Andrew Jenner <andrew@codesourcery.com>
4 Contributed by Bernd Schmidt <bernds@codesourcery.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple-expr.h"
30 #include "cfghooks.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "emit-rtl.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic-core.h"
40 #include "stor-layout.h"
41 #include "varasm.h"
42 #include "calls.h"
43 #include "output.h"
44 #include "insn-attr.h"
45 #include "explow.h"
46 #include "expr.h"
47 #include "cfgrtl.h"
48 #include "sched-int.h"
49 #include "tm-constrs.h"
50 #include "langhooks.h"
51 #include "sel-sched.h"
52 #include "debug.h"
53 #include "hw-doloop.h"
54 #include "regrename.h"
55 #include "dumpfile.h"
56 #include "builtins.h"
57
58 /* This file should be included last. */
59 #include "target-def.h"
60
61 /* Table of supported architecture variants. */
62 typedef struct
63 {
64 const char *arch;
65 enum c6x_cpu_type type;
66 unsigned short features;
67 } c6x_arch_table;
68
69 /* A list of all ISAs, mapping each one to a representative device.
70 Used for -march selection. */
71 static const c6x_arch_table all_isas[] =
72 {
73 #define C6X_ISA(NAME,DEVICE,FLAGS) \
74 { NAME, DEVICE, FLAGS },
75 #include "c6x-isas.def"
76 #undef C6X_ISA
77 { NULL, C6X_CPU_C62X, 0 }
78 };
79
80 /* This is the parsed result of the "-march=" option, if given. */
81 enum c6x_cpu_type c6x_arch = C6X_DEFAULT_ARCH;
82
83 /* A mask of insn types that are allowed by the architecture selected by
84 the -march option. */
85 unsigned long c6x_insn_mask = C6X_DEFAULT_INSN_MASK;
86
87 /* The instruction that is being output (as obtained from FINAL_PRESCAN_INSN).
88 */
89 static rtx_insn *c6x_current_insn = NULL;
90
91 /* A decl we build to access __c6xabi_DSBT_base. */
92 static GTY(()) tree dsbt_decl;
93
94 /* Determines whether we run our final scheduling pass or not. We always
95 avoid the normal second scheduling pass. */
96 static int c6x_flag_schedule_insns2;
97
98 /* Determines whether we run variable tracking in machine dependent
99 reorganization. */
100 static int c6x_flag_var_tracking;
101
102 /* Determines whether we use modulo scheduling. */
103 static int c6x_flag_modulo_sched;
104
105 /* Record the state of flag_pic before we set it to 1 for DSBT. */
106 int c6x_initial_flag_pic;
107
108 typedef struct
109 {
110 /* We record the clock cycle for every insn during scheduling. */
111 int clock;
112 /* After scheduling, we run assign_reservations to choose unit
113 reservations for all insns. These are recorded here. */
114 int reservation;
115 /* Records the new condition for insns which must be made
116 conditional after scheduling. An entry of NULL_RTX means no such
117 change is necessary. */
118 rtx new_cond;
119 /* True for the first insn that was scheduled in an ebb. */
120 bool ebb_start;
121 /* The scheduler state after the insn, transformed into a mask of UNIT_QID
122 bits rather than storing the state. Meaningful only for the last
123 insn in a cycle. */
124 unsigned int unit_mask;
125 } c6x_sched_insn_info;
126
127
128 /* Record a c6x_sched_insn_info structure for every insn in the function. */
129 static vec<c6x_sched_insn_info> insn_info;
130
131 #define INSN_INFO_LENGTH (insn_info).length ()
132 #define INSN_INFO_ENTRY(N) (insn_info[(N)])
133
134 static bool done_cfi_sections;
135
136 #define RESERVATION_FLAG_D 1
137 #define RESERVATION_FLAG_L 2
138 #define RESERVATION_FLAG_S 4
139 #define RESERVATION_FLAG_M 8
140 #define RESERVATION_FLAG_DL (RESERVATION_FLAG_D | RESERVATION_FLAG_L)
141 #define RESERVATION_FLAG_DS (RESERVATION_FLAG_D | RESERVATION_FLAG_S)
142 #define RESERVATION_FLAG_LS (RESERVATION_FLAG_L | RESERVATION_FLAG_S)
143 #define RESERVATION_FLAG_DLS (RESERVATION_FLAG_D | RESERVATION_FLAG_LS)
144
145 /* The DFA names of the units. */
146 static const char *const c6x_unit_names[] =
147 {
148 "d1", "l1", "s1", "m1", "fps1", "fpl1", "adddps1", "adddpl1",
149 "d2", "l2", "s2", "m2", "fps2", "fpl2", "adddps2", "adddpl2"
150 };
151
152 /* The DFA unit number for each unit in c6x_unit_names[]. */
153 static int c6x_unit_codes[ARRAY_SIZE (c6x_unit_names)];
154
155 /* Unit query IDs. */
156 #define UNIT_QID_D1 0
157 #define UNIT_QID_L1 1
158 #define UNIT_QID_S1 2
159 #define UNIT_QID_M1 3
160 #define UNIT_QID_FPS1 4
161 #define UNIT_QID_FPL1 5
162 #define UNIT_QID_ADDDPS1 6
163 #define UNIT_QID_ADDDPL1 7
164 #define UNIT_QID_SIDE_OFFSET 8
165
166 #define RESERVATION_S1 2
167 #define RESERVATION_S2 10
168
169 /* An enum for the unit requirements we count in the UNIT_REQS table. */
170 enum unitreqs
171 {
172 UNIT_REQ_D,
173 UNIT_REQ_L,
174 UNIT_REQ_S,
175 UNIT_REQ_M,
176 UNIT_REQ_DL,
177 UNIT_REQ_DS,
178 UNIT_REQ_LS,
179 UNIT_REQ_DLS,
180 UNIT_REQ_T,
181 UNIT_REQ_X,
182 UNIT_REQ_MAX
183 };
184
185 /* A table used to count unit requirements. Used when computing minimum
186 iteration intervals. */
187 typedef int unit_req_table[2][UNIT_REQ_MAX];
188 static unit_req_table unit_reqs;
189
190 /* Register map for debugging. */
191 unsigned const dbx_register_map[FIRST_PSEUDO_REGISTER] =
192 {
193 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* A0 - A15. */
194 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, /* A16 - A32. */
195 50, 51, 52,
196 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, /* B0 - B15. */
197 29, 30, 31,
198 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, /* B16 - B32. */
199 66, 67, 68,
200 -1, -1, -1 /* FP, ARGP, ILC. */
201 };
202
203 /* Allocate a new, cleared machine_function structure. */
204
205 static struct machine_function *
c6x_init_machine_status(void)206 c6x_init_machine_status (void)
207 {
208 return ggc_cleared_alloc<machine_function> ();
209 }
210
211 /* Implement TARGET_OPTION_OVERRIDE. */
212
213 static void
c6x_option_override(void)214 c6x_option_override (void)
215 {
216 unsigned i;
217
218 if (global_options_set.x_c6x_arch_option)
219 {
220 c6x_arch = all_isas[c6x_arch_option].type;
221 c6x_insn_mask &= ~C6X_INSNS_ALL_CPU_BITS;
222 c6x_insn_mask |= all_isas[c6x_arch_option].features;
223 }
224
225 c6x_flag_schedule_insns2 = flag_schedule_insns_after_reload;
226 flag_schedule_insns_after_reload = 0;
227
228 c6x_flag_modulo_sched = flag_modulo_sched;
229 flag_modulo_sched = 0;
230
231 init_machine_status = c6x_init_machine_status;
232
233 for (i = 0; i < ARRAY_SIZE (c6x_unit_names); i++)
234 c6x_unit_codes[i] = get_cpu_unit_code (c6x_unit_names[i]);
235
236 if (flag_pic && !TARGET_DSBT)
237 {
238 error ("-fpic and -fPIC not supported without -mdsbt on this target");
239 flag_pic = 0;
240 }
241 c6x_initial_flag_pic = flag_pic;
242 if (TARGET_DSBT && !flag_pic)
243 flag_pic = 1;
244 }
245
246
247 /* Implement the TARGET_CONDITIONAL_REGISTER_USAGE hook. */
248
249 static void
c6x_conditional_register_usage(void)250 c6x_conditional_register_usage (void)
251 {
252 int i;
253 if (c6x_arch == C6X_CPU_C62X || c6x_arch == C6X_CPU_C67X)
254 for (i = 16; i < 32; i++)
255 {
256 fixed_regs[i] = 1;
257 fixed_regs[32 + i] = 1;
258 }
259 if (TARGET_INSNS_64)
260 {
261 SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_A_REGS],
262 REG_A0);
263 SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_REGS],
264 REG_A0);
265 CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_A_REGS],
266 REG_A0);
267 CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_REGS],
268 REG_A0);
269 }
270 }
271
272 static GTY(()) rtx eqdf_libfunc;
273 static GTY(()) rtx nedf_libfunc;
274 static GTY(()) rtx ledf_libfunc;
275 static GTY(()) rtx ltdf_libfunc;
276 static GTY(()) rtx gedf_libfunc;
277 static GTY(()) rtx gtdf_libfunc;
278 static GTY(()) rtx eqsf_libfunc;
279 static GTY(()) rtx nesf_libfunc;
280 static GTY(()) rtx lesf_libfunc;
281 static GTY(()) rtx ltsf_libfunc;
282 static GTY(()) rtx gesf_libfunc;
283 static GTY(()) rtx gtsf_libfunc;
284 static GTY(()) rtx strasgi_libfunc;
285 static GTY(()) rtx strasgi64p_libfunc;
286
287 /* Implement the TARGET_INIT_LIBFUNCS macro. We use this to rename library
288 functions to match the C6x ABI. */
289
290 static void
c6x_init_libfuncs(void)291 c6x_init_libfuncs (void)
292 {
293 /* Double-precision floating-point arithmetic. */
294 set_optab_libfunc (add_optab, DFmode, "__c6xabi_addd");
295 set_optab_libfunc (sdiv_optab, DFmode, "__c6xabi_divd");
296 set_optab_libfunc (smul_optab, DFmode, "__c6xabi_mpyd");
297 set_optab_libfunc (neg_optab, DFmode, "__c6xabi_negd");
298 set_optab_libfunc (sub_optab, DFmode, "__c6xabi_subd");
299
300 /* Single-precision floating-point arithmetic. */
301 set_optab_libfunc (add_optab, SFmode, "__c6xabi_addf");
302 set_optab_libfunc (sdiv_optab, SFmode, "__c6xabi_divf");
303 set_optab_libfunc (smul_optab, SFmode, "__c6xabi_mpyf");
304 set_optab_libfunc (neg_optab, SFmode, "__c6xabi_negf");
305 set_optab_libfunc (sub_optab, SFmode, "__c6xabi_subf");
306
307 /* Floating-point comparisons. */
308 eqsf_libfunc = init_one_libfunc ("__c6xabi_eqf");
309 nesf_libfunc = init_one_libfunc ("__c6xabi_neqf");
310 lesf_libfunc = init_one_libfunc ("__c6xabi_lef");
311 ltsf_libfunc = init_one_libfunc ("__c6xabi_ltf");
312 gesf_libfunc = init_one_libfunc ("__c6xabi_gef");
313 gtsf_libfunc = init_one_libfunc ("__c6xabi_gtf");
314 eqdf_libfunc = init_one_libfunc ("__c6xabi_eqd");
315 nedf_libfunc = init_one_libfunc ("__c6xabi_neqd");
316 ledf_libfunc = init_one_libfunc ("__c6xabi_led");
317 ltdf_libfunc = init_one_libfunc ("__c6xabi_ltd");
318 gedf_libfunc = init_one_libfunc ("__c6xabi_ged");
319 gtdf_libfunc = init_one_libfunc ("__c6xabi_gtd");
320
321 set_optab_libfunc (eq_optab, SFmode, NULL);
322 set_optab_libfunc (ne_optab, SFmode, "__c6xabi_neqf");
323 set_optab_libfunc (gt_optab, SFmode, NULL);
324 set_optab_libfunc (ge_optab, SFmode, NULL);
325 set_optab_libfunc (lt_optab, SFmode, NULL);
326 set_optab_libfunc (le_optab, SFmode, NULL);
327 set_optab_libfunc (unord_optab, SFmode, "__c6xabi_unordf");
328 set_optab_libfunc (eq_optab, DFmode, NULL);
329 set_optab_libfunc (ne_optab, DFmode, "__c6xabi_neqd");
330 set_optab_libfunc (gt_optab, DFmode, NULL);
331 set_optab_libfunc (ge_optab, DFmode, NULL);
332 set_optab_libfunc (lt_optab, DFmode, NULL);
333 set_optab_libfunc (le_optab, DFmode, NULL);
334 set_optab_libfunc (unord_optab, DFmode, "__c6xabi_unordd");
335
336 /* Floating-point to integer conversions. */
337 set_conv_libfunc (sfix_optab, SImode, DFmode, "__c6xabi_fixdi");
338 set_conv_libfunc (ufix_optab, SImode, DFmode, "__c6xabi_fixdu");
339 set_conv_libfunc (sfix_optab, DImode, DFmode, "__c6xabi_fixdlli");
340 set_conv_libfunc (ufix_optab, DImode, DFmode, "__c6xabi_fixdull");
341 set_conv_libfunc (sfix_optab, SImode, SFmode, "__c6xabi_fixfi");
342 set_conv_libfunc (ufix_optab, SImode, SFmode, "__c6xabi_fixfu");
343 set_conv_libfunc (sfix_optab, DImode, SFmode, "__c6xabi_fixflli");
344 set_conv_libfunc (ufix_optab, DImode, SFmode, "__c6xabi_fixfull");
345
346 /* Conversions between floating types. */
347 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__c6xabi_cvtdf");
348 set_conv_libfunc (sext_optab, DFmode, SFmode, "__c6xabi_cvtfd");
349
350 /* Integer to floating-point conversions. */
351 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__c6xabi_fltid");
352 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__c6xabi_fltud");
353 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__c6xabi_fltllid");
354 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__c6xabi_fltulld");
355 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__c6xabi_fltif");
356 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__c6xabi_fltuf");
357 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__c6xabi_fltllif");
358 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__c6xabi_fltullf");
359
360 /* Long long. */
361 set_optab_libfunc (smul_optab, DImode, "__c6xabi_mpyll");
362 set_optab_libfunc (ashl_optab, DImode, "__c6xabi_llshl");
363 set_optab_libfunc (lshr_optab, DImode, "__c6xabi_llshru");
364 set_optab_libfunc (ashr_optab, DImode, "__c6xabi_llshr");
365
366 set_optab_libfunc (sdiv_optab, SImode, "__c6xabi_divi");
367 set_optab_libfunc (udiv_optab, SImode, "__c6xabi_divu");
368 set_optab_libfunc (smod_optab, SImode, "__c6xabi_remi");
369 set_optab_libfunc (umod_optab, SImode, "__c6xabi_remu");
370 set_optab_libfunc (sdivmod_optab, SImode, "__c6xabi_divremi");
371 set_optab_libfunc (udivmod_optab, SImode, "__c6xabi_divremu");
372 set_optab_libfunc (sdiv_optab, DImode, "__c6xabi_divlli");
373 set_optab_libfunc (udiv_optab, DImode, "__c6xabi_divull");
374 set_optab_libfunc (smod_optab, DImode, "__c6xabi_remlli");
375 set_optab_libfunc (umod_optab, DImode, "__c6xabi_remull");
376 set_optab_libfunc (udivmod_optab, DImode, "__c6xabi_divremull");
377
378 /* Block move. */
379 strasgi_libfunc = init_one_libfunc ("__c6xabi_strasgi");
380 strasgi64p_libfunc = init_one_libfunc ("__c6xabi_strasgi_64plus");
381 }
382
383 /* Begin the assembly file. */
384
385 static void
c6x_file_start(void)386 c6x_file_start (void)
387 {
388 /* Variable tracking should be run after all optimizations which change order
389 of insns. It also needs a valid CFG. This can't be done in
390 c6x_override_options, because flag_var_tracking is finalized after
391 that. */
392 c6x_flag_var_tracking = flag_var_tracking;
393 flag_var_tracking = 0;
394
395 done_cfi_sections = false;
396 default_file_start ();
397
398 /* Arrays are aligned to 8-byte boundaries. */
399 asm_fprintf (asm_out_file,
400 "\t.c6xabi_attribute Tag_ABI_array_object_alignment, 0\n");
401 asm_fprintf (asm_out_file,
402 "\t.c6xabi_attribute Tag_ABI_array_object_align_expected, 0\n");
403
404 /* Stack alignment is 8 bytes. */
405 asm_fprintf (asm_out_file,
406 "\t.c6xabi_attribute Tag_ABI_stack_align_needed, 0\n");
407 asm_fprintf (asm_out_file,
408 "\t.c6xabi_attribute Tag_ABI_stack_align_preserved, 0\n");
409
410 #if 0 /* FIXME: Reenable when TI's tools are fixed. */
411 /* ??? Ideally we'd check flag_short_wchar somehow. */
412 asm_fprintf (asm_out_file, "\t.c6xabi_attribute Tag_ABI_wchar_t, %d\n", 2);
413 #endif
414
415 /* We conform to version 1.0 of the ABI. */
416 asm_fprintf (asm_out_file,
417 "\t.c6xabi_attribute Tag_ABI_conformance, \"1.0\"\n");
418
419 }
420
421 /* The LTO frontend only enables exceptions when it sees a function that
422 uses it. This changes the return value of dwarf2out_do_frame, so we
423 have to check before every function. */
424
425 void
c6x_output_file_unwind(FILE * f)426 c6x_output_file_unwind (FILE * f)
427 {
428 if (done_cfi_sections)
429 return;
430
431 /* Output a .cfi_sections directive. */
432 if (dwarf2out_do_frame ())
433 {
434 if (flag_unwind_tables || flag_exceptions)
435 {
436 if (write_symbols == DWARF2_DEBUG
437 || write_symbols == VMS_AND_DWARF2_DEBUG)
438 asm_fprintf (f, "\t.cfi_sections .debug_frame, .c6xabi.exidx\n");
439 else
440 asm_fprintf (f, "\t.cfi_sections .c6xabi.exidx\n");
441 }
442 else
443 asm_fprintf (f, "\t.cfi_sections .debug_frame\n");
444 done_cfi_sections = true;
445 }
446 }
447
448 /* Output unwind directives at the end of a function. */
449
450 static void
c6x_output_fn_unwind(FILE * f)451 c6x_output_fn_unwind (FILE * f)
452 {
453 /* Return immediately if we are not generating unwinding tables. */
454 if (! (flag_unwind_tables || flag_exceptions))
455 return;
456
457 /* If this function will never be unwound, then mark it as such. */
458 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
459 && (TREE_NOTHROW (current_function_decl)
460 || crtl->all_throwers_are_sibcalls))
461 fputs("\t.cantunwind\n", f);
462
463 fputs ("\t.endp\n", f);
464 }
465
466
467 /* Stack and Calling. */
468
469 int argument_registers[10] =
470 {
471 REG_A4, REG_B4,
472 REG_A6, REG_B6,
473 REG_A8, REG_B8,
474 REG_A10, REG_B10,
475 REG_A12, REG_B12
476 };
477
478 /* Implements the macro INIT_CUMULATIVE_ARGS defined in c6x.h. */
479
480 void
c6x_init_cumulative_args(CUMULATIVE_ARGS * cum,const_tree fntype,rtx libname,int n_named_args ATTRIBUTE_UNUSED)481 c6x_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype, rtx libname,
482 int n_named_args ATTRIBUTE_UNUSED)
483 {
484 cum->count = 0;
485 cum->nregs = 10;
486 if (!libname && fntype)
487 {
488 /* We need to find out the number of named arguments. Unfortunately,
489 for incoming arguments, N_NAMED_ARGS is set to -1. */
490 if (stdarg_p (fntype))
491 cum->nregs = type_num_arguments (fntype) - 1;
492 if (cum->nregs > 10)
493 cum->nregs = 10;
494 }
495 }
496
497 /* Implements the macro FUNCTION_ARG defined in c6x.h. */
498
499 static rtx
c6x_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)500 c6x_function_arg (cumulative_args_t cum_v, machine_mode mode,
501 const_tree type, bool named ATTRIBUTE_UNUSED)
502 {
503 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
504 if (cum->count >= cum->nregs)
505 return NULL_RTX;
506 if (type)
507 {
508 HOST_WIDE_INT size = int_size_in_bytes (type);
509 if (TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (type))
510 {
511 if (size > 4)
512 {
513 rtx reg1 = gen_rtx_REG (SImode, argument_registers[cum->count] + 1);
514 rtx reg2 = gen_rtx_REG (SImode, argument_registers[cum->count]);
515 rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx),
516 gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4)));
517 return gen_rtx_PARALLEL (mode, vec);
518 }
519 }
520 }
521 return gen_rtx_REG (mode, argument_registers[cum->count]);
522 }
523
524 static void
c6x_function_arg_advance(cumulative_args_t cum_v,machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,bool named ATTRIBUTE_UNUSED)525 c6x_function_arg_advance (cumulative_args_t cum_v,
526 machine_mode mode ATTRIBUTE_UNUSED,
527 const_tree type ATTRIBUTE_UNUSED,
528 bool named ATTRIBUTE_UNUSED)
529 {
530 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
531 cum->count++;
532 }
533
534
535 /* Return true if BLOCK_REG_PADDING (MODE, TYPE, FIRST) should return
536 upward rather than downward. */
537
538 bool
c6x_block_reg_pad_upward(machine_mode mode ATTRIBUTE_UNUSED,const_tree type,bool first)539 c6x_block_reg_pad_upward (machine_mode mode ATTRIBUTE_UNUSED,
540 const_tree type, bool first)
541 {
542 HOST_WIDE_INT size;
543
544 if (!TARGET_BIG_ENDIAN)
545 return true;
546 if (!first)
547 return true;
548 if (!type)
549 return true;
550 size = int_size_in_bytes (type);
551 return size == 3;
552 }
553
554 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. */
555
556 static unsigned int
c6x_function_arg_boundary(machine_mode mode,const_tree type)557 c6x_function_arg_boundary (machine_mode mode, const_tree type)
558 {
559 unsigned int boundary = type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode);
560
561 if (boundary > BITS_PER_WORD)
562 return 2 * BITS_PER_WORD;
563
564 if (mode == BLKmode)
565 {
566 HOST_WIDE_INT size = int_size_in_bytes (type);
567 if (size > 4)
568 return 2 * BITS_PER_WORD;
569 if (boundary < BITS_PER_WORD)
570 {
571 if (size >= 3)
572 return BITS_PER_WORD;
573 if (size >= 2)
574 return 2 * BITS_PER_UNIT;
575 }
576 }
577 return boundary;
578 }
579
580 /* Implement TARGET_FUNCTION_ARG_ROUND_BOUNDARY. */
581 static unsigned int
c6x_function_arg_round_boundary(machine_mode mode,const_tree type)582 c6x_function_arg_round_boundary (machine_mode mode, const_tree type)
583 {
584 return c6x_function_arg_boundary (mode, type);
585 }
586
587 /* TARGET_FUNCTION_VALUE implementation. Returns an RTX representing the place
588 where function FUNC returns or receives a value of data type TYPE. */
589
590 static rtx
c6x_function_value(const_tree type,const_tree func ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)591 c6x_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED,
592 bool outgoing ATTRIBUTE_UNUSED)
593 {
594 /* Functions return values in register A4. When returning aggregates, we may
595 have to adjust for endianness. */
596 if (TARGET_BIG_ENDIAN && type && AGGREGATE_TYPE_P (type))
597 {
598 HOST_WIDE_INT size = int_size_in_bytes (type);
599 if (size > 4)
600 {
601
602 rtx reg1 = gen_rtx_REG (SImode, REG_A4 + 1);
603 rtx reg2 = gen_rtx_REG (SImode, REG_A4);
604 rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx),
605 gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4)));
606 return gen_rtx_PARALLEL (TYPE_MODE (type), vec);
607 }
608 }
609 return gen_rtx_REG (TYPE_MODE (type), REG_A4);
610 }
611
612 /* Implement TARGET_LIBCALL_VALUE. */
613
614 static rtx
c6x_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)615 c6x_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
616 {
617 return gen_rtx_REG (mode, REG_A4);
618 }
619
620 /* TARGET_STRUCT_VALUE_RTX implementation. */
621
622 static rtx
c6x_struct_value_rtx(tree type ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)623 c6x_struct_value_rtx (tree type ATTRIBUTE_UNUSED, int incoming ATTRIBUTE_UNUSED)
624 {
625 return gen_rtx_REG (Pmode, REG_A3);
626 }
627
628 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
629
630 static bool
c6x_function_value_regno_p(const unsigned int regno)631 c6x_function_value_regno_p (const unsigned int regno)
632 {
633 return regno == REG_A4;
634 }
635
636 /* Types larger than 64 bit, and variable sized types, are passed by
637 reference. The callee must copy them; see c6x_callee_copies. */
638
639 static bool
c6x_pass_by_reference(cumulative_args_t cum_v ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)640 c6x_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
641 machine_mode mode, const_tree type,
642 bool named ATTRIBUTE_UNUSED)
643 {
644 int size = -1;
645 if (type)
646 size = int_size_in_bytes (type);
647 else if (mode != VOIDmode)
648 size = GET_MODE_SIZE (mode);
649 return size > 2 * UNITS_PER_WORD || size == -1;
650 }
651
652 /* Decide whether a type should be returned in memory (true)
653 or in a register (false). This is called by the macro
654 TARGET_RETURN_IN_MEMORY. */
655
656 static bool
c6x_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)657 c6x_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
658 {
659 int size = int_size_in_bytes (type);
660 return size > 2 * UNITS_PER_WORD || size == -1;
661 }
662
663 /* Values which must be returned in the most-significant end of the return
664 register. */
665
666 static bool
c6x_return_in_msb(const_tree valtype)667 c6x_return_in_msb (const_tree valtype)
668 {
669 HOST_WIDE_INT size = int_size_in_bytes (valtype);
670 return TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (valtype) && size == 3;
671 }
672
673 /* Implement TARGET_CALLEE_COPIES. */
674
675 static bool
c6x_callee_copies(cumulative_args_t cum_v ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,bool named ATTRIBUTE_UNUSED)676 c6x_callee_copies (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
677 machine_mode mode ATTRIBUTE_UNUSED,
678 const_tree type ATTRIBUTE_UNUSED,
679 bool named ATTRIBUTE_UNUSED)
680 {
681 return true;
682 }
683
684 /* Return the type to use as __builtin_va_list. */
685 static tree
c6x_build_builtin_va_list(void)686 c6x_build_builtin_va_list (void)
687 {
688 return build_pointer_type (char_type_node);
689 }
690
691 static void
c6x_asm_trampoline_template(FILE * f)692 c6x_asm_trampoline_template (FILE *f)
693 {
694 fprintf (f, "\t.long\t0x0000002b\n"); /* mvkl .s2 fnlow,B0 */
695 fprintf (f, "\t.long\t0x01000028\n"); /* || mvkl .s1 sclow,A2 */
696 fprintf (f, "\t.long\t0x0000006b\n"); /* mvkh .s2 fnhigh,B0 */
697 fprintf (f, "\t.long\t0x01000068\n"); /* || mvkh .s1 schigh,A2 */
698 fprintf (f, "\t.long\t0x00000362\n"); /* b .s2 B0 */
699 fprintf (f, "\t.long\t0x00008000\n"); /* nop 5 */
700 fprintf (f, "\t.long\t0x00000000\n"); /* nop */
701 fprintf (f, "\t.long\t0x00000000\n"); /* nop */
702 }
703
704 /* Emit RTL insns to initialize the variable parts of a trampoline at
705 TRAMP. FNADDR is an RTX for the address of the function's pure
706 code. CXT is an RTX for the static chain value for the function. */
707
708 static void
c6x_initialize_trampoline(rtx tramp,tree fndecl,rtx cxt)709 c6x_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
710 {
711 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
712 rtx t1 = copy_to_reg (fnaddr);
713 rtx t2 = copy_to_reg (cxt);
714 rtx mask = gen_reg_rtx (SImode);
715 int i;
716
717 emit_block_move (tramp, assemble_trampoline_template (),
718 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
719
720 emit_move_insn (mask, GEN_INT (0xffff << 7));
721
722 for (i = 0; i < 4; i++)
723 {
724 rtx mem = adjust_address (tramp, SImode, i * 4);
725 rtx t = (i & 1) ? t2 : t1;
726 rtx v1 = gen_reg_rtx (SImode);
727 rtx v2 = gen_reg_rtx (SImode);
728 emit_move_insn (v1, mem);
729 if (i < 2)
730 emit_insn (gen_ashlsi3 (v2, t, GEN_INT (7)));
731 else
732 emit_insn (gen_lshrsi3 (v2, t, GEN_INT (9)));
733 emit_insn (gen_andsi3 (v2, v2, mask));
734 emit_insn (gen_iorsi3 (v2, v2, v1));
735 emit_move_insn (mem, v2);
736 }
737 #ifdef CLEAR_INSN_CACHE
738 tramp = XEXP (tramp, 0);
739 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__gnu_clear_cache"),
740 LCT_NORMAL, VOIDmode, 2, tramp, Pmode,
741 plus_constant (Pmode, tramp, TRAMPOLINE_SIZE),
742 Pmode);
743 #endif
744 }
745
746 /* Determine whether c6x_output_mi_thunk can succeed. */
747
748 static bool
c6x_can_output_mi_thunk(const_tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta ATTRIBUTE_UNUSED,HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,const_tree function ATTRIBUTE_UNUSED)749 c6x_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
750 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
751 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
752 const_tree function ATTRIBUTE_UNUSED)
753 {
754 return !TARGET_LONG_CALLS;
755 }
756
757 /* Output the assembler code for a thunk function. THUNK is the
758 declaration for the thunk function itself, FUNCTION is the decl for
759 the target function. DELTA is an immediate constant offset to be
760 added to THIS. If VCALL_OFFSET is nonzero, the word at
761 *(*this + vcall_offset) should be added to THIS. */
762
763 static void
c6x_output_mi_thunk(FILE * file ATTRIBUTE_UNUSED,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)764 c6x_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
765 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
766 HOST_WIDE_INT vcall_offset, tree function)
767 {
768 rtx xops[5];
769 /* The this parameter is passed as the first argument. */
770 rtx this_rtx = gen_rtx_REG (Pmode, REG_A4);
771
772 c6x_current_insn = NULL;
773
774 xops[4] = XEXP (DECL_RTL (function), 0);
775 if (!vcall_offset)
776 {
777 output_asm_insn ("b .s2 \t%4", xops);
778 if (!delta)
779 output_asm_insn ("nop 5", xops);
780 }
781
782 /* Adjust the this parameter by a fixed constant. */
783 if (delta)
784 {
785 xops[0] = GEN_INT (delta);
786 xops[1] = this_rtx;
787 if (delta >= -16 && delta <= 15)
788 {
789 output_asm_insn ("add .s1 %0, %1, %1", xops);
790 if (!vcall_offset)
791 output_asm_insn ("nop 4", xops);
792 }
793 else if (delta >= 16 && delta < 32)
794 {
795 output_asm_insn ("add .d1 %0, %1, %1", xops);
796 if (!vcall_offset)
797 output_asm_insn ("nop 4", xops);
798 }
799 else if (delta >= -32768 && delta < 32768)
800 {
801 output_asm_insn ("mvk .s1 %0, A0", xops);
802 output_asm_insn ("add .d1 %1, A0, %1", xops);
803 if (!vcall_offset)
804 output_asm_insn ("nop 3", xops);
805 }
806 else
807 {
808 output_asm_insn ("mvkl .s1 %0, A0", xops);
809 output_asm_insn ("mvkh .s1 %0, A0", xops);
810 output_asm_insn ("add .d1 %1, A0, %1", xops);
811 if (!vcall_offset)
812 output_asm_insn ("nop 3", xops);
813 }
814 }
815
816 /* Adjust the this parameter by a value stored in the vtable. */
817 if (vcall_offset)
818 {
819 rtx a0tmp = gen_rtx_REG (Pmode, REG_A0);
820 rtx a3tmp = gen_rtx_REG (Pmode, REG_A3);
821
822 xops[1] = a3tmp;
823 xops[2] = a0tmp;
824 xops[3] = gen_rtx_MEM (Pmode, a0tmp);
825 output_asm_insn ("mv .s1 a4, %2", xops);
826 output_asm_insn ("ldw .d1t1 %3, %2", xops);
827
828 /* Adjust the this parameter. */
829 xops[0] = gen_rtx_MEM (Pmode, plus_constant (Pmode, a0tmp,
830 vcall_offset));
831 if (!memory_operand (xops[0], Pmode))
832 {
833 rtx tmp2 = gen_rtx_REG (Pmode, REG_A1);
834 xops[0] = GEN_INT (vcall_offset);
835 xops[1] = tmp2;
836 output_asm_insn ("mvkl .s1 %0, %1", xops);
837 output_asm_insn ("mvkh .s1 %0, %1", xops);
838 output_asm_insn ("nop 2", xops);
839 output_asm_insn ("add .d1 %2, %1, %2", xops);
840 xops[0] = gen_rtx_MEM (Pmode, a0tmp);
841 }
842 else
843 output_asm_insn ("nop 4", xops);
844 xops[2] = this_rtx;
845 output_asm_insn ("ldw .d1t1 %0, %1", xops);
846 output_asm_insn ("|| b .s2 \t%4", xops);
847 output_asm_insn ("nop 4", xops);
848 output_asm_insn ("add .d1 %2, %1, %2", xops);
849 }
850 }
851
852 /* Return true if EXP goes in small data/bss. */
853
854 static bool
c6x_in_small_data_p(const_tree exp)855 c6x_in_small_data_p (const_tree exp)
856 {
857 /* We want to merge strings, so we never consider them small data. */
858 if (TREE_CODE (exp) == STRING_CST)
859 return false;
860
861 /* Functions are never small data. */
862 if (TREE_CODE (exp) == FUNCTION_DECL)
863 return false;
864
865 if (TREE_CODE (exp) == VAR_DECL && DECL_WEAK (exp))
866 return false;
867
868 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
869 {
870 const char *section = DECL_SECTION_NAME (exp);
871
872 if (strcmp (section, ".neardata") == 0
873 || strncmp (section, ".neardata.", 10) == 0
874 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
875 || strcmp (section, ".bss") == 0
876 || strncmp (section, ".bss.", 5) == 0
877 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0
878 || strcmp (section, ".rodata") == 0
879 || strncmp (section, ".rodata.", 8) == 0
880 || strncmp (section, ".gnu.linkonce.s2.", 17) == 0)
881 return true;
882 }
883 else
884 return PLACE_IN_SDATA_P (exp);
885
886 return false;
887 }
888
889 /* Return a section for X. The only special thing we do here is to
890 honor small data. We don't have a tree type, so we can't use the
891 PLACE_IN_SDATA_P macro we use everywhere else; we choose to place
892 everything sized 8 bytes or smaller into small data. */
893
894 static section *
c6x_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)895 c6x_select_rtx_section (machine_mode mode, rtx x,
896 unsigned HOST_WIDE_INT align)
897 {
898 if (c6x_sdata_mode == C6X_SDATA_ALL
899 || (c6x_sdata_mode != C6X_SDATA_NONE && GET_MODE_SIZE (mode) <= 8))
900 /* ??? Consider using mergeable sdata sections. */
901 return sdata_section;
902 else
903 return default_elf_select_rtx_section (mode, x, align);
904 }
905
906 static section *
c6x_elf_select_section(tree decl,int reloc,unsigned HOST_WIDE_INT align)907 c6x_elf_select_section (tree decl, int reloc,
908 unsigned HOST_WIDE_INT align)
909 {
910 const char *sname = NULL;
911 unsigned int flags = SECTION_WRITE;
912 if (c6x_in_small_data_p (decl))
913 {
914 switch (categorize_decl_for_section (decl, reloc))
915 {
916 case SECCAT_SRODATA:
917 sname = ".rodata";
918 flags = 0;
919 break;
920 case SECCAT_SDATA:
921 sname = ".neardata";
922 break;
923 case SECCAT_SBSS:
924 sname = ".bss";
925 flags |= SECTION_BSS;
926 default:
927 break;
928 }
929 }
930 else
931 {
932 switch (categorize_decl_for_section (decl, reloc))
933 {
934 case SECCAT_DATA:
935 sname = ".fardata";
936 break;
937 case SECCAT_DATA_REL:
938 sname = ".fardata.rel";
939 break;
940 case SECCAT_DATA_REL_LOCAL:
941 sname = ".fardata.rel.local";
942 break;
943 case SECCAT_DATA_REL_RO:
944 sname = ".fardata.rel.ro";
945 break;
946 case SECCAT_DATA_REL_RO_LOCAL:
947 sname = ".fardata.rel.ro.local";
948 break;
949 case SECCAT_BSS:
950 sname = ".far";
951 flags |= SECTION_BSS;
952 break;
953 case SECCAT_RODATA:
954 sname = ".const";
955 flags = 0;
956 break;
957 case SECCAT_SRODATA:
958 case SECCAT_SDATA:
959 case SECCAT_SBSS:
960 gcc_unreachable ();
961 default:
962 break;
963 }
964 }
965 if (sname)
966 {
967 /* We might get called with string constants, but get_named_section
968 doesn't like them as they are not DECLs. Also, we need to set
969 flags in that case. */
970 if (!DECL_P (decl))
971 return get_section (sname, flags, NULL);
972 return get_named_section (decl, sname, reloc);
973 }
974
975 return default_elf_select_section (decl, reloc, align);
976 }
977
978 /* Build up a unique section name, expressed as a
979 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
980 RELOC indicates whether the initial value of EXP requires
981 link-time relocations. */
982
983 static void ATTRIBUTE_UNUSED
c6x_elf_unique_section(tree decl,int reloc)984 c6x_elf_unique_section (tree decl, int reloc)
985 {
986 const char *prefix = NULL;
987 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
988 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
989
990 if (c6x_in_small_data_p (decl))
991 {
992 switch (categorize_decl_for_section (decl, reloc))
993 {
994 case SECCAT_SDATA:
995 prefix = one_only ? ".s" : ".neardata";
996 break;
997 case SECCAT_SBSS:
998 prefix = one_only ? ".sb" : ".bss";
999 break;
1000 case SECCAT_SRODATA:
1001 prefix = one_only ? ".s2" : ".rodata";
1002 break;
1003 case SECCAT_RODATA_MERGE_STR:
1004 case SECCAT_RODATA_MERGE_STR_INIT:
1005 case SECCAT_RODATA_MERGE_CONST:
1006 case SECCAT_RODATA:
1007 case SECCAT_DATA:
1008 case SECCAT_DATA_REL:
1009 case SECCAT_DATA_REL_LOCAL:
1010 case SECCAT_DATA_REL_RO:
1011 case SECCAT_DATA_REL_RO_LOCAL:
1012 gcc_unreachable ();
1013 default:
1014 /* Everything else we place into default sections and hope for the
1015 best. */
1016 break;
1017 }
1018 }
1019 else
1020 {
1021 switch (categorize_decl_for_section (decl, reloc))
1022 {
1023 case SECCAT_DATA:
1024 case SECCAT_DATA_REL:
1025 case SECCAT_DATA_REL_LOCAL:
1026 case SECCAT_DATA_REL_RO:
1027 case SECCAT_DATA_REL_RO_LOCAL:
1028 prefix = one_only ? ".fd" : ".fardata";
1029 break;
1030 case SECCAT_BSS:
1031 prefix = one_only ? ".fb" : ".far";
1032 break;
1033 case SECCAT_RODATA:
1034 case SECCAT_RODATA_MERGE_STR:
1035 case SECCAT_RODATA_MERGE_STR_INIT:
1036 case SECCAT_RODATA_MERGE_CONST:
1037 prefix = one_only ? ".fr" : ".const";
1038 break;
1039 case SECCAT_SRODATA:
1040 case SECCAT_SDATA:
1041 case SECCAT_SBSS:
1042 gcc_unreachable ();
1043 default:
1044 break;
1045 }
1046 }
1047
1048 if (prefix)
1049 {
1050 const char *name, *linkonce;
1051 char *string;
1052
1053 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
1054 name = targetm.strip_name_encoding (name);
1055
1056 /* If we're using one_only, then there needs to be a .gnu.linkonce
1057 prefix to the section name. */
1058 linkonce = one_only ? ".gnu.linkonce" : "";
1059
1060 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
1061
1062 set_decl_section_name (decl, string);
1063 return;
1064 }
1065 default_unique_section (decl, reloc);
1066 }
1067
1068 static unsigned int
c6x_section_type_flags(tree decl,const char * name,int reloc)1069 c6x_section_type_flags (tree decl, const char *name, int reloc)
1070 {
1071 unsigned int flags = 0;
1072
1073 if (strcmp (name, ".far") == 0
1074 || strncmp (name, ".far.", 5) == 0)
1075 flags |= SECTION_BSS;
1076
1077 flags |= default_section_type_flags (decl, name, reloc);
1078
1079 return flags;
1080 }
1081
1082 /* Checks whether the given CALL_EXPR would use a caller saved
1083 register. This is used to decide whether sibling call optimization
1084 could be performed on the respective function call. */
1085
1086 static bool
c6x_call_saved_register_used(tree call_expr)1087 c6x_call_saved_register_used (tree call_expr)
1088 {
1089 CUMULATIVE_ARGS cum_v;
1090 cumulative_args_t cum;
1091 HARD_REG_SET call_saved_regset;
1092 tree parameter;
1093 machine_mode mode;
1094 tree type;
1095 rtx parm_rtx;
1096 int i;
1097
1098 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
1099 cum = pack_cumulative_args (&cum_v);
1100
1101 COMPL_HARD_REG_SET (call_saved_regset, call_used_reg_set);
1102 for (i = 0; i < call_expr_nargs (call_expr); i++)
1103 {
1104 parameter = CALL_EXPR_ARG (call_expr, i);
1105 gcc_assert (parameter);
1106
1107 /* For an undeclared variable passed as parameter we will get
1108 an ERROR_MARK node here. */
1109 if (TREE_CODE (parameter) == ERROR_MARK)
1110 return true;
1111
1112 type = TREE_TYPE (parameter);
1113 gcc_assert (type);
1114
1115 mode = TYPE_MODE (type);
1116 gcc_assert (mode);
1117
1118 if (pass_by_reference (&cum_v, mode, type, true))
1119 {
1120 mode = Pmode;
1121 type = build_pointer_type (type);
1122 }
1123
1124 parm_rtx = c6x_function_arg (cum, mode, type, 0);
1125
1126 c6x_function_arg_advance (cum, mode, type, 0);
1127
1128 if (!parm_rtx)
1129 continue;
1130
1131 if (REG_P (parm_rtx)
1132 && overlaps_hard_reg_set_p (call_saved_regset, GET_MODE (parm_rtx),
1133 REGNO (parm_rtx)))
1134 return true;
1135 if (GET_CODE (parm_rtx) == PARALLEL)
1136 {
1137 int n = XVECLEN (parm_rtx, 0);
1138 while (n-- > 0)
1139 {
1140 rtx x = XEXP (XVECEXP (parm_rtx, 0, n), 0);
1141 if (REG_P (x)
1142 && overlaps_hard_reg_set_p (call_saved_regset,
1143 GET_MODE (x), REGNO (x)))
1144 return true;
1145 }
1146 }
1147 }
1148 return false;
1149 }
1150
1151 /* Decide whether we can make a sibling call to a function. DECL is the
1152 declaration of the function being targeted by the call and EXP is the
1153 CALL_EXPR representing the call. */
1154
1155 static bool
c6x_function_ok_for_sibcall(tree decl,tree exp)1156 c6x_function_ok_for_sibcall (tree decl, tree exp)
1157 {
1158 /* Registers A10, A12, B10 and B12 are available as arguments
1159 register but unfortunately caller saved. This makes functions
1160 needing these registers for arguments not suitable for
1161 sibcalls. */
1162 if (c6x_call_saved_register_used (exp))
1163 return false;
1164
1165 if (!flag_pic)
1166 return true;
1167
1168 if (TARGET_DSBT)
1169 {
1170 /* When compiling for DSBT, the calling function must be local,
1171 so that when we reload B14 in the sibcall epilogue, it will
1172 not change its value. */
1173 struct cgraph_local_info *this_func;
1174
1175 if (!decl)
1176 /* Not enough information. */
1177 return false;
1178
1179 this_func = cgraph_node::local_info (current_function_decl);
1180 return this_func->local;
1181 }
1182
1183 return true;
1184 }
1185
1186 /* Return true if DECL is known to be linked into section SECTION. */
1187
1188 static bool
c6x_function_in_section_p(tree decl,section * section)1189 c6x_function_in_section_p (tree decl, section *section)
1190 {
1191 /* We can only be certain about functions defined in the same
1192 compilation unit. */
1193 if (!TREE_STATIC (decl))
1194 return false;
1195
1196 /* Make sure that SYMBOL always binds to the definition in this
1197 compilation unit. */
1198 if (!targetm.binds_local_p (decl))
1199 return false;
1200
1201 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
1202 if (!DECL_SECTION_NAME (decl))
1203 {
1204 /* Make sure that we will not create a unique section for DECL. */
1205 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
1206 return false;
1207 }
1208
1209 return function_section (decl) == section;
1210 }
1211
1212 /* Return true if a call to OP, which is a SYMBOL_REF, must be expanded
1213 as a long call. */
1214 bool
c6x_long_call_p(rtx op)1215 c6x_long_call_p (rtx op)
1216 {
1217 tree decl;
1218
1219 if (!TARGET_LONG_CALLS)
1220 return false;
1221
1222 decl = SYMBOL_REF_DECL (op);
1223
1224 /* Try to determine whether the symbol is in the same section as the current
1225 function. Be conservative, and only cater for cases in which the
1226 whole of the current function is placed in the same section. */
1227 if (decl != NULL_TREE
1228 && !flag_reorder_blocks_and_partition
1229 && TREE_CODE (decl) == FUNCTION_DECL
1230 && c6x_function_in_section_p (decl, current_function_section ()))
1231 return false;
1232
1233 return true;
1234 }
1235
1236 /* Emit the sequence for a call. */
1237 void
c6x_expand_call(rtx retval,rtx address,bool sibcall)1238 c6x_expand_call (rtx retval, rtx address, bool sibcall)
1239 {
1240 rtx callee = XEXP (address, 0);
1241 rtx call_insn;
1242
1243 if (!c6x_call_operand (callee, Pmode))
1244 {
1245 callee = force_reg (Pmode, callee);
1246 address = change_address (address, Pmode, callee);
1247 }
1248 call_insn = gen_rtx_CALL (VOIDmode, address, const0_rtx);
1249 if (sibcall)
1250 {
1251 call_insn = emit_call_insn (call_insn);
1252 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
1253 gen_rtx_REG (Pmode, REG_B3));
1254 }
1255 else
1256 {
1257 if (retval == NULL_RTX)
1258 call_insn = emit_call_insn (call_insn);
1259 else
1260 call_insn = emit_call_insn (gen_rtx_SET (retval, call_insn));
1261 }
1262 if (flag_pic)
1263 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
1264 }
1265
1266 /* Legitimize PIC addresses. If the address is already position-independent,
1267 we return ORIG. Newly generated position-independent addresses go into a
1268 reg. This is REG if nonzero, otherwise we allocate register(s) as
1269 necessary. PICREG is the register holding the pointer to the PIC offset
1270 table. */
1271
1272 static rtx
legitimize_pic_address(rtx orig,rtx reg,rtx picreg)1273 legitimize_pic_address (rtx orig, rtx reg, rtx picreg)
1274 {
1275 rtx addr = orig;
1276 rtx new_rtx = orig;
1277
1278 if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
1279 {
1280 int unspec = UNSPEC_LOAD_GOT;
1281 rtx tmp;
1282
1283 if (reg == 0)
1284 {
1285 gcc_assert (can_create_pseudo_p ());
1286 reg = gen_reg_rtx (Pmode);
1287 }
1288 if (flag_pic == 2)
1289 {
1290 if (can_create_pseudo_p ())
1291 tmp = gen_reg_rtx (Pmode);
1292 else
1293 tmp = reg;
1294 emit_insn (gen_movsi_gotoff_high (tmp, addr));
1295 emit_insn (gen_movsi_gotoff_lo_sum (tmp, tmp, addr));
1296 emit_insn (gen_load_got_gotoff (reg, picreg, tmp));
1297 }
1298 else
1299 {
1300 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec);
1301 new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
1302
1303 emit_move_insn (reg, new_rtx);
1304 }
1305 if (picreg == pic_offset_table_rtx)
1306 crtl->uses_pic_offset_table = 1;
1307 return reg;
1308 }
1309
1310 else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS)
1311 {
1312 rtx base;
1313
1314 if (GET_CODE (addr) == CONST)
1315 {
1316 addr = XEXP (addr, 0);
1317 gcc_assert (GET_CODE (addr) == PLUS);
1318 }
1319
1320 if (XEXP (addr, 0) == picreg)
1321 return orig;
1322
1323 if (reg == 0)
1324 {
1325 gcc_assert (can_create_pseudo_p ());
1326 reg = gen_reg_rtx (Pmode);
1327 }
1328
1329 base = legitimize_pic_address (XEXP (addr, 0), reg, picreg);
1330 addr = legitimize_pic_address (XEXP (addr, 1),
1331 base == reg ? NULL_RTX : reg,
1332 picreg);
1333
1334 if (GET_CODE (addr) == CONST_INT)
1335 {
1336 gcc_assert (! reload_in_progress && ! reload_completed);
1337 addr = force_reg (Pmode, addr);
1338 }
1339
1340 if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1)))
1341 {
1342 base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0));
1343 addr = XEXP (addr, 1);
1344 }
1345
1346 return gen_rtx_PLUS (Pmode, base, addr);
1347 }
1348
1349 return new_rtx;
1350 }
1351
1352 /* Expand a move operation in mode MODE. The operands are in OPERANDS.
1353 Returns true if no further code must be generated, false if the caller
1354 should generate an insn to move OPERANDS[1] to OPERANDS[0]. */
1355
1356 bool
expand_move(rtx * operands,machine_mode mode)1357 expand_move (rtx *operands, machine_mode mode)
1358 {
1359 rtx dest = operands[0];
1360 rtx op = operands[1];
1361
1362 if ((reload_in_progress | reload_completed) == 0
1363 && GET_CODE (dest) == MEM && GET_CODE (op) != REG)
1364 operands[1] = force_reg (mode, op);
1365 else if (mode == SImode && symbolic_operand (op, SImode))
1366 {
1367 if (flag_pic)
1368 {
1369 if (sdata_symbolic_operand (op, SImode))
1370 {
1371 emit_insn (gen_load_sdata_pic (dest, pic_offset_table_rtx, op));
1372 crtl->uses_pic_offset_table = 1;
1373 return true;
1374 }
1375 else
1376 {
1377 rtx temp = (reload_completed || reload_in_progress
1378 ? dest : gen_reg_rtx (Pmode));
1379
1380 operands[1] = legitimize_pic_address (op, temp,
1381 pic_offset_table_rtx);
1382 }
1383 }
1384 else if (reload_completed
1385 && !sdata_symbolic_operand (op, SImode))
1386 {
1387 emit_insn (gen_movsi_high (dest, op));
1388 emit_insn (gen_movsi_lo_sum (dest, dest, op));
1389 return true;
1390 }
1391 }
1392 return false;
1393 }
1394
1395 /* This function is called when we're about to expand an integer compare
1396 operation which performs COMPARISON. It examines the second operand,
1397 and if it is an integer constant that cannot be used directly on the
1398 current machine in a comparison insn, it returns true. */
1399 bool
c6x_force_op_for_comparison_p(enum rtx_code code,rtx op)1400 c6x_force_op_for_comparison_p (enum rtx_code code, rtx op)
1401 {
1402 if (!CONST_INT_P (op) || satisfies_constraint_Iu4 (op))
1403 return false;
1404
1405 if ((code == EQ || code == LT || code == GT)
1406 && !satisfies_constraint_Is5 (op))
1407 return true;
1408 if ((code == GTU || code == LTU)
1409 && (!TARGET_INSNS_64 || !satisfies_constraint_Iu5 (op)))
1410 return true;
1411
1412 return false;
1413 }
1414
1415 /* Emit comparison instruction if necessary, returning the expression
1416 that holds the compare result in the proper mode. Return the comparison
1417 that should be used in the jump insn. */
1418
1419 rtx
c6x_expand_compare(rtx comparison,machine_mode mode)1420 c6x_expand_compare (rtx comparison, machine_mode mode)
1421 {
1422 enum rtx_code code = GET_CODE (comparison);
1423 rtx op0 = XEXP (comparison, 0);
1424 rtx op1 = XEXP (comparison, 1);
1425 rtx cmp;
1426 enum rtx_code jump_code = code;
1427 machine_mode op_mode = GET_MODE (op0);
1428
1429 if (op_mode == DImode && (code == NE || code == EQ) && op1 == const0_rtx)
1430 {
1431 rtx t = gen_reg_rtx (SImode);
1432 emit_insn (gen_iorsi3 (t, gen_lowpart (SImode, op0),
1433 gen_highpart (SImode, op0)));
1434 op_mode = SImode;
1435 cmp = t;
1436 }
1437 else if (op_mode == DImode)
1438 {
1439 rtx lo[2], high[2];
1440 rtx cmp1, cmp2;
1441
1442 if (code == NE || code == GEU || code == LEU || code == GE || code == LE)
1443 {
1444 code = reverse_condition (code);
1445 jump_code = EQ;
1446 }
1447 else
1448 jump_code = NE;
1449
1450 split_di (&op0, 1, lo, high);
1451 split_di (&op1, 1, lo + 1, high + 1);
1452
1453 if (c6x_force_op_for_comparison_p (code, high[1])
1454 || c6x_force_op_for_comparison_p (EQ, high[1]))
1455 high[1] = force_reg (SImode, high[1]);
1456
1457 cmp1 = gen_reg_rtx (SImode);
1458 cmp2 = gen_reg_rtx (SImode);
1459 emit_insn (gen_rtx_SET (cmp1, gen_rtx_fmt_ee (code, SImode,
1460 high[0], high[1])));
1461 if (code == EQ)
1462 {
1463 if (c6x_force_op_for_comparison_p (code, lo[1]))
1464 lo[1] = force_reg (SImode, lo[1]);
1465 emit_insn (gen_rtx_SET (cmp2, gen_rtx_fmt_ee (code, SImode,
1466 lo[0], lo[1])));
1467 emit_insn (gen_andsi3 (cmp1, cmp1, cmp2));
1468 }
1469 else
1470 {
1471 emit_insn (gen_rtx_SET (cmp2, gen_rtx_EQ (SImode, high[0],
1472 high[1])));
1473 if (code == GT)
1474 code = GTU;
1475 else if (code == LT)
1476 code = LTU;
1477 if (c6x_force_op_for_comparison_p (code, lo[1]))
1478 lo[1] = force_reg (SImode, lo[1]);
1479 emit_insn (gen_cmpsi_and (cmp2, gen_rtx_fmt_ee (code, SImode,
1480 lo[0], lo[1]),
1481 lo[0], lo[1], cmp2));
1482 emit_insn (gen_iorsi3 (cmp1, cmp1, cmp2));
1483 }
1484 cmp = cmp1;
1485 }
1486 else if (TARGET_FP && !flag_finite_math_only
1487 && (op_mode == DFmode || op_mode == SFmode)
1488 && code != EQ && code != NE && code != LT && code != GT
1489 && code != UNLE && code != UNGE)
1490 {
1491 enum rtx_code code1, code2, code3;
1492 rtx (*fn) (rtx, rtx, rtx, rtx, rtx);
1493
1494 jump_code = NE;
1495 code3 = UNKNOWN;
1496 switch (code)
1497 {
1498 case UNLT:
1499 case UNGT:
1500 jump_code = EQ;
1501 /* fall through */
1502 case LE:
1503 case GE:
1504 code1 = code == LE || code == UNGT ? LT : GT;
1505 code2 = EQ;
1506 break;
1507
1508 case UNORDERED:
1509 jump_code = EQ;
1510 /* fall through */
1511 case ORDERED:
1512 code3 = EQ;
1513 /* fall through */
1514 case LTGT:
1515 code1 = LT;
1516 code2 = GT;
1517 break;
1518
1519 case UNEQ:
1520 code1 = LT;
1521 code2 = GT;
1522 jump_code = EQ;
1523 break;
1524
1525 default:
1526 gcc_unreachable ();
1527 }
1528
1529 cmp = gen_reg_rtx (SImode);
1530 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code1, SImode, op0, op1)));
1531 fn = op_mode == DFmode ? gen_cmpdf_ior : gen_cmpsf_ior;
1532 emit_insn (fn (cmp, gen_rtx_fmt_ee (code2, SImode, op0, op1),
1533 op0, op1, cmp));
1534 if (code3 != UNKNOWN)
1535 emit_insn (fn (cmp, gen_rtx_fmt_ee (code3, SImode, op0, op1),
1536 op0, op1, cmp));
1537 }
1538 else if (op_mode == SImode && (code == NE || code == EQ) && op1 == const0_rtx)
1539 cmp = op0;
1540 else
1541 {
1542 bool is_fp_libfunc;
1543 is_fp_libfunc = !TARGET_FP && (op_mode == DFmode || op_mode == SFmode);
1544
1545 if ((code == NE || code == GEU || code == LEU || code == GE || code == LE)
1546 && !is_fp_libfunc)
1547 {
1548 code = reverse_condition (code);
1549 jump_code = EQ;
1550 }
1551 else if (code == UNGE)
1552 {
1553 code = LT;
1554 jump_code = EQ;
1555 }
1556 else if (code == UNLE)
1557 {
1558 code = GT;
1559 jump_code = EQ;
1560 }
1561 else
1562 jump_code = NE;
1563
1564 if (is_fp_libfunc)
1565 {
1566 rtx_insn *insns;
1567 rtx libfunc;
1568 switch (code)
1569 {
1570 case EQ:
1571 libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc;
1572 break;
1573 case NE:
1574 libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc;
1575 break;
1576 case GT:
1577 libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc;
1578 break;
1579 case GE:
1580 libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc;
1581 break;
1582 case LT:
1583 libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc;
1584 break;
1585 case LE:
1586 libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc;
1587 break;
1588 default:
1589 gcc_unreachable ();
1590 }
1591 start_sequence ();
1592
1593 cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode, 2,
1594 op0, op_mode, op1, op_mode);
1595 insns = get_insns ();
1596 end_sequence ();
1597
1598 emit_libcall_block (insns, cmp, cmp,
1599 gen_rtx_fmt_ee (code, SImode, op0, op1));
1600 }
1601 else
1602 {
1603 cmp = gen_reg_rtx (SImode);
1604 if (c6x_force_op_for_comparison_p (code, op1))
1605 op1 = force_reg (SImode, op1);
1606 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, SImode,
1607 op0, op1)));
1608 }
1609 }
1610
1611 return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx);
1612 }
1613
1614 /* Return one word of double-word value OP. HIGH_P is true to select the
1615 high part, false to select the low part. When encountering auto-increment
1616 addressing, we make the assumption that the low part is going to be accessed
1617 first. */
1618
1619 rtx
c6x_subword(rtx op,bool high_p)1620 c6x_subword (rtx op, bool high_p)
1621 {
1622 unsigned int byte;
1623 machine_mode mode;
1624
1625 mode = GET_MODE (op);
1626 if (mode == VOIDmode)
1627 mode = DImode;
1628
1629 if (TARGET_BIG_ENDIAN ? !high_p : high_p)
1630 byte = UNITS_PER_WORD;
1631 else
1632 byte = 0;
1633
1634 if (MEM_P (op))
1635 {
1636 rtx addr = XEXP (op, 0);
1637 if (GET_CODE (addr) == PLUS || REG_P (addr))
1638 return adjust_address (op, word_mode, byte);
1639 /* FIXME: should really support autoincrement addressing for
1640 multi-word modes. */
1641 gcc_unreachable ();
1642 }
1643
1644 return simplify_gen_subreg (word_mode, op, mode, byte);
1645 }
1646
1647 /* Split one or more DImode RTL references into pairs of SImode
1648 references. The RTL can be REG, offsettable MEM, integer constant, or
1649 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
1650 split and "num" is its length. lo_half and hi_half are output arrays
1651 that parallel "operands". */
1652
1653 void
split_di(rtx operands[],int num,rtx lo_half[],rtx hi_half[])1654 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
1655 {
1656 while (num--)
1657 {
1658 rtx op = operands[num];
1659
1660 lo_half[num] = c6x_subword (op, false);
1661 hi_half[num] = c6x_subword (op, true);
1662 }
1663 }
1664
1665 /* Return true if VAL is a mask valid for a clr instruction. */
1666 bool
c6x_valid_mask_p(HOST_WIDE_INT val)1667 c6x_valid_mask_p (HOST_WIDE_INT val)
1668 {
1669 int i;
1670 for (i = 0; i < 32; i++)
1671 if (!(val & ((unsigned HOST_WIDE_INT)1 << i)))
1672 break;
1673 for (; i < 32; i++)
1674 if (val & ((unsigned HOST_WIDE_INT)1 << i))
1675 break;
1676 for (; i < 32; i++)
1677 if (!(val & ((unsigned HOST_WIDE_INT)1 << i)))
1678 return false;
1679 return true;
1680 }
1681
1682 /* Expand a block move for a movmemM pattern. */
1683
1684 bool
c6x_expand_movmem(rtx dst,rtx src,rtx count_exp,rtx align_exp,rtx expected_align_exp ATTRIBUTE_UNUSED,rtx expected_size_exp ATTRIBUTE_UNUSED)1685 c6x_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
1686 rtx expected_align_exp ATTRIBUTE_UNUSED,
1687 rtx expected_size_exp ATTRIBUTE_UNUSED)
1688 {
1689 unsigned HOST_WIDE_INT align = 1;
1690 unsigned HOST_WIDE_INT src_mem_align, dst_mem_align, min_mem_align;
1691 unsigned HOST_WIDE_INT count = 0, offset = 0;
1692 unsigned int biggest_move = TARGET_STDW ? 8 : 4;
1693
1694 if (CONST_INT_P (align_exp))
1695 align = INTVAL (align_exp);
1696
1697 src_mem_align = MEM_ALIGN (src) / BITS_PER_UNIT;
1698 dst_mem_align = MEM_ALIGN (dst) / BITS_PER_UNIT;
1699 min_mem_align = MIN (src_mem_align, dst_mem_align);
1700
1701 if (min_mem_align > align)
1702 align = min_mem_align / BITS_PER_UNIT;
1703 if (src_mem_align < align)
1704 src_mem_align = align;
1705 if (dst_mem_align < align)
1706 dst_mem_align = align;
1707
1708 if (CONST_INT_P (count_exp))
1709 count = INTVAL (count_exp);
1710 else
1711 return false;
1712
1713 /* Make sure we don't need to care about overflow later on. */
1714 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
1715 return false;
1716
1717 if (count >= 28 && (count & 3) == 0 && align >= 4)
1718 {
1719 tree dst_expr = MEM_EXPR (dst);
1720 tree src_expr = MEM_EXPR (src);
1721 rtx fn = TARGET_INSNS_64PLUS ? strasgi64p_libfunc : strasgi_libfunc;
1722 rtx srcreg = force_reg (Pmode, XEXP (src, 0));
1723 rtx dstreg = force_reg (Pmode, XEXP (dst, 0));
1724
1725 if (src_expr)
1726 mark_addressable (src_expr);
1727 if (dst_expr)
1728 mark_addressable (dst_expr);
1729 emit_library_call (fn, LCT_NORMAL, VOIDmode, 3,
1730 dstreg, Pmode, srcreg, Pmode, count_exp, SImode);
1731 return true;
1732 }
1733
1734 if (biggest_move > align && !TARGET_INSNS_64)
1735 biggest_move = align;
1736
1737 if (count / biggest_move > 7)
1738 return false;
1739
1740 while (count > 0)
1741 {
1742 rtx reg, reg_lowpart;
1743 machine_mode srcmode, dstmode;
1744 unsigned HOST_WIDE_INT src_size, dst_size, src_left;
1745 int shift;
1746 rtx srcmem, dstmem;
1747
1748 while (biggest_move > count)
1749 biggest_move /= 2;
1750
1751 src_size = dst_size = biggest_move;
1752 if (src_size > src_mem_align && src_size == 2)
1753 src_size = 1;
1754 if (dst_size > dst_mem_align && dst_size == 2)
1755 dst_size = 1;
1756
1757 if (dst_size > src_size)
1758 dst_size = src_size;
1759
1760 srcmode = mode_for_size (src_size * BITS_PER_UNIT, MODE_INT, 0);
1761 dstmode = mode_for_size (dst_size * BITS_PER_UNIT, MODE_INT, 0);
1762 if (src_size >= 4)
1763 reg_lowpart = reg = gen_reg_rtx (srcmode);
1764 else
1765 {
1766 reg = gen_reg_rtx (SImode);
1767 reg_lowpart = gen_lowpart (srcmode, reg);
1768 }
1769
1770 srcmem = adjust_address (copy_rtx (src), srcmode, offset);
1771
1772 if (src_size > src_mem_align)
1773 {
1774 enum insn_code icode = (srcmode == SImode ? CODE_FOR_movmisalignsi
1775 : CODE_FOR_movmisaligndi);
1776 emit_insn (GEN_FCN (icode) (reg_lowpart, srcmem));
1777 }
1778 else
1779 emit_move_insn (reg_lowpart, srcmem);
1780
1781 src_left = src_size;
1782 shift = TARGET_BIG_ENDIAN ? (src_size - dst_size) * BITS_PER_UNIT : 0;
1783 while (src_left > 0)
1784 {
1785 rtx dstreg = reg_lowpart;
1786
1787 if (src_size > dst_size)
1788 {
1789 rtx srcword = reg;
1790 int shift_amount = shift & (BITS_PER_WORD - 1);
1791 if (src_size > 4)
1792 srcword = operand_subword_force (srcword, src_left >= 4 ? 0 : 4,
1793 SImode);
1794 if (shift_amount > 0)
1795 {
1796 dstreg = gen_reg_rtx (SImode);
1797 emit_insn (gen_lshrsi3 (dstreg, srcword,
1798 GEN_INT (shift_amount)));
1799 }
1800 else
1801 dstreg = srcword;
1802 dstreg = gen_lowpart (dstmode, dstreg);
1803 }
1804
1805 dstmem = adjust_address (copy_rtx (dst), dstmode, offset);
1806 if (dst_size > dst_mem_align)
1807 {
1808 enum insn_code icode = (dstmode == SImode ? CODE_FOR_movmisalignsi
1809 : CODE_FOR_movmisaligndi);
1810 emit_insn (GEN_FCN (icode) (dstmem, dstreg));
1811 }
1812 else
1813 emit_move_insn (dstmem, dstreg);
1814
1815 if (TARGET_BIG_ENDIAN)
1816 shift -= dst_size * BITS_PER_UNIT;
1817 else
1818 shift += dst_size * BITS_PER_UNIT;
1819 offset += dst_size;
1820 src_left -= dst_size;
1821 }
1822 count -= src_size;
1823 }
1824 return true;
1825 }
1826
1827 /* Subroutine of print_address_operand, print a single address offset OFF for
1828 a memory access of mode MEM_MODE, choosing between normal form and scaled
1829 form depending on the type of the insn. Misaligned memory references must
1830 use the scaled form. */
1831
1832 static void
print_address_offset(FILE * file,rtx off,machine_mode mem_mode)1833 print_address_offset (FILE *file, rtx off, machine_mode mem_mode)
1834 {
1835 rtx pat;
1836
1837 if (c6x_current_insn != NULL_RTX)
1838 {
1839 pat = PATTERN (c6x_current_insn);
1840 if (GET_CODE (pat) == COND_EXEC)
1841 pat = COND_EXEC_CODE (pat);
1842 if (GET_CODE (pat) == PARALLEL)
1843 pat = XVECEXP (pat, 0, 0);
1844
1845 if (GET_CODE (pat) == SET
1846 && GET_CODE (SET_SRC (pat)) == UNSPEC
1847 && XINT (SET_SRC (pat), 1) == UNSPEC_MISALIGNED_ACCESS)
1848 {
1849 gcc_assert (CONST_INT_P (off)
1850 && (INTVAL (off) & (GET_MODE_SIZE (mem_mode) - 1)) == 0);
1851 fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]",
1852 INTVAL (off) / GET_MODE_SIZE (mem_mode));
1853 return;
1854 }
1855 }
1856 fputs ("(", file);
1857 output_address (mem_mode, off);
1858 fputs (")", file);
1859 }
1860
1861 static bool
c6x_print_operand_punct_valid_p(unsigned char c)1862 c6x_print_operand_punct_valid_p (unsigned char c)
1863 {
1864 return c == '$' || c == '.' || c == '|';
1865 }
1866
1867 static void c6x_print_operand (FILE *, rtx, int);
1868
1869 /* Subroutine of c6x_print_operand; used to print a memory reference X to FILE. */
1870
1871 static void
c6x_print_address_operand(FILE * file,rtx x,machine_mode mem_mode)1872 c6x_print_address_operand (FILE *file, rtx x, machine_mode mem_mode)
1873 {
1874 rtx off;
1875 switch (GET_CODE (x))
1876 {
1877 case PRE_MODIFY:
1878 case POST_MODIFY:
1879 if (GET_CODE (x) == POST_MODIFY)
1880 output_address (mem_mode, XEXP (x, 0));
1881 off = XEXP (XEXP (x, 1), 1);
1882 if (XEXP (x, 0) == stack_pointer_rtx)
1883 {
1884 if (GET_CODE (x) == PRE_MODIFY)
1885 gcc_assert (INTVAL (off) > 0);
1886 else
1887 gcc_assert (INTVAL (off) < 0);
1888 }
1889 if (CONST_INT_P (off) && INTVAL (off) < 0)
1890 {
1891 fprintf (file, "--");
1892 off = GEN_INT (-INTVAL (off));
1893 }
1894 else
1895 fprintf (file, "++");
1896 if (GET_CODE (x) == PRE_MODIFY)
1897 output_address (mem_mode, XEXP (x, 0));
1898 print_address_offset (file, off, mem_mode);
1899 break;
1900
1901 case PLUS:
1902 off = XEXP (x, 1);
1903 if (CONST_INT_P (off) && INTVAL (off) < 0)
1904 {
1905 fprintf (file, "-");
1906 off = GEN_INT (-INTVAL (off));
1907 }
1908 else
1909 fprintf (file, "+");
1910 output_address (mem_mode, XEXP (x, 0));
1911 print_address_offset (file, off, mem_mode);
1912 break;
1913
1914 case PRE_DEC:
1915 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
1916 fprintf (file, "--");
1917 output_address (mem_mode, XEXP (x, 0));
1918 fprintf (file, "[1]");
1919 break;
1920 case PRE_INC:
1921 fprintf (file, "++");
1922 output_address (mem_mode, XEXP (x, 0));
1923 fprintf (file, "[1]");
1924 break;
1925 case POST_INC:
1926 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
1927 output_address (mem_mode, XEXP (x, 0));
1928 fprintf (file, "++[1]");
1929 break;
1930 case POST_DEC:
1931 output_address (mem_mode, XEXP (x, 0));
1932 fprintf (file, "--[1]");
1933 break;
1934
1935 case SYMBOL_REF:
1936 case CONST:
1937 case LABEL_REF:
1938 gcc_assert (sdata_symbolic_operand (x, Pmode));
1939 fprintf (file, "+B14(");
1940 output_addr_const (file, x);
1941 fprintf (file, ")");
1942 break;
1943
1944 case UNSPEC:
1945 switch (XINT (x, 1))
1946 {
1947 case UNSPEC_LOAD_GOT:
1948 fputs ("$GOT(", file);
1949 output_addr_const (file, XVECEXP (x, 0, 0));
1950 fputs (")", file);
1951 break;
1952 case UNSPEC_LOAD_SDATA:
1953 output_addr_const (file, XVECEXP (x, 0, 0));
1954 break;
1955 default:
1956 gcc_unreachable ();
1957 }
1958 break;
1959
1960 default:
1961 gcc_assert (GET_CODE (x) != MEM);
1962 c6x_print_operand (file, x, 0);
1963 break;
1964 }
1965 }
1966
1967 /* Return a single character, which is either 'l', 's', 'd' or 'm', which
1968 specifies the functional unit used by INSN. */
1969
1970 char
c6x_get_unit_specifier(rtx_insn * insn)1971 c6x_get_unit_specifier (rtx_insn *insn)
1972 {
1973 enum attr_units units;
1974
1975 if (insn_info.exists ())
1976 {
1977 int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation;
1978 return c6x_unit_names[unit][0];
1979 }
1980
1981 units = get_attr_units (insn);
1982 switch (units)
1983 {
1984 case UNITS_D:
1985 case UNITS_DL:
1986 case UNITS_DS:
1987 case UNITS_DLS:
1988 case UNITS_D_ADDR:
1989 return 'd';
1990 break;
1991 case UNITS_L:
1992 case UNITS_LS:
1993 return 'l';
1994 break;
1995 case UNITS_S:
1996 return 's';
1997 break;
1998 case UNITS_M:
1999 return 'm';
2000 break;
2001 default:
2002 gcc_unreachable ();
2003 }
2004 }
2005
2006 /* Prints the unit specifier field. */
2007 static void
c6x_print_unit_specifier_field(FILE * file,rtx_insn * insn)2008 c6x_print_unit_specifier_field (FILE *file, rtx_insn *insn)
2009 {
2010 enum attr_units units = get_attr_units (insn);
2011 enum attr_cross cross = get_attr_cross (insn);
2012 enum attr_dest_regfile rf = get_attr_dest_regfile (insn);
2013 int half;
2014 char unitspec;
2015
2016 if (units == UNITS_D_ADDR)
2017 {
2018 enum attr_addr_regfile arf = get_attr_addr_regfile (insn);
2019 int t_half;
2020 gcc_assert (arf != ADDR_REGFILE_UNKNOWN);
2021 half = arf == ADDR_REGFILE_A ? 1 : 2;
2022 t_half = rf == DEST_REGFILE_A ? 1 : 2;
2023 fprintf (file, ".d%dt%d", half, t_half);
2024 return;
2025 }
2026
2027 if (insn_info.exists ())
2028 {
2029 int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation;
2030 fputs (".", file);
2031 fputs (c6x_unit_names[unit], file);
2032 if (cross == CROSS_Y)
2033 fputs ("x", file);
2034 return;
2035 }
2036
2037 gcc_assert (rf != DEST_REGFILE_UNKNOWN);
2038 unitspec = c6x_get_unit_specifier (insn);
2039 half = rf == DEST_REGFILE_A ? 1 : 2;
2040 fprintf (file, ".%c%d%s", unitspec, half, cross == CROSS_Y ? "x" : "");
2041 }
2042
2043 /* Output assembly language output for the address ADDR to FILE. */
2044 static void
c6x_print_operand_address(FILE * file,machine_mode mode,rtx addr)2045 c6x_print_operand_address (FILE *file, machine_mode mode, rtx addr)
2046 {
2047 c6x_print_address_operand (file, addr, mode);
2048 }
2049
2050 /* Print an operand, X, to FILE, with an optional modifier in CODE.
2051
2052 Meaning of CODE:
2053 $ -- print the unit specifier field for the instruction.
2054 . -- print the predicate for the instruction or an emptry string for an
2055 unconditional one.
2056 | -- print "||" if the insn should be issued in parallel with the previous
2057 one.
2058
2059 C -- print an opcode suffix for a reversed condition
2060 d -- H, W or D as a suffix for ADDA, based on the factor given by the
2061 operand
2062 D -- print either B, H, W or D as a suffix for ADDA, based on the size of
2063 the operand
2064 J -- print a predicate
2065 j -- like J, but use reverse predicate
2066 k -- treat a CONST_INT as a register number and print it as a register
2067 k -- like k, but print out a doubleword register
2068 n -- print an integer operand, negated
2069 p -- print the low part of a DImode register
2070 P -- print the high part of a DImode register
2071 r -- print the absolute value of an integer operand, shifted right by 1
2072 R -- print the absolute value of an integer operand, shifted right by 2
2073 f -- the first clear bit in an integer operand assumed to be a mask for
2074 a clr instruction
2075 F -- the last clear bit in such a mask
2076 s -- the first set bit in an integer operand assumed to be a mask for
2077 a set instruction
2078 S -- the last set bit in such a mask
2079 U -- print either 1 or 2, depending on the side of the machine used by
2080 the operand */
2081
2082 static void
c6x_print_operand(FILE * file,rtx x,int code)2083 c6x_print_operand (FILE *file, rtx x, int code)
2084 {
2085 int i;
2086 HOST_WIDE_INT v;
2087 tree t;
2088 machine_mode mode;
2089
2090 if (code == '|')
2091 {
2092 if (GET_MODE (c6x_current_insn) != TImode)
2093 fputs ("||", file);
2094 return;
2095 }
2096 if (code == '$')
2097 {
2098 c6x_print_unit_specifier_field (file, c6x_current_insn);
2099 return;
2100 }
2101
2102 if (code == '.')
2103 {
2104 x = current_insn_predicate;
2105 if (x)
2106 {
2107 unsigned int regno = REGNO (XEXP (x, 0));
2108 fputs ("[", file);
2109 if (GET_CODE (x) == EQ)
2110 fputs ("!", file);
2111 fputs (reg_names [regno], file);
2112 fputs ("]", file);
2113 }
2114 return;
2115 }
2116
2117 mode = GET_MODE (x);
2118
2119 switch (code)
2120 {
2121 case 'C':
2122 case 'c':
2123 {
2124 enum rtx_code c = GET_CODE (x);
2125 if (code == 'C')
2126 c = swap_condition (c);
2127 fputs (GET_RTX_NAME (c), file);
2128 }
2129 return;
2130
2131 case 'J':
2132 case 'j':
2133 {
2134 unsigned int regno = REGNO (XEXP (x, 0));
2135 if ((GET_CODE (x) == EQ) == (code == 'J'))
2136 fputs ("!", file);
2137 fputs (reg_names [regno], file);
2138 }
2139 return;
2140
2141 case 'k':
2142 gcc_assert (GET_CODE (x) == CONST_INT);
2143 v = INTVAL (x);
2144 fprintf (file, "%s", reg_names[v]);
2145 return;
2146 case 'K':
2147 gcc_assert (GET_CODE (x) == CONST_INT);
2148 v = INTVAL (x);
2149 gcc_assert ((v & 1) == 0);
2150 fprintf (file, "%s:%s", reg_names[v + 1], reg_names[v]);
2151 return;
2152
2153 case 's':
2154 case 'S':
2155 case 'f':
2156 case 'F':
2157 gcc_assert (GET_CODE (x) == CONST_INT);
2158 v = INTVAL (x);
2159 for (i = 0; i < 32; i++)
2160 {
2161 HOST_WIDE_INT tst = v & 1;
2162 if (((code == 'f' || code == 'F') && !tst)
2163 || ((code == 's' || code == 'S') && tst))
2164 break;
2165 v >>= 1;
2166 }
2167 if (code == 'f' || code == 's')
2168 {
2169 fprintf (file, "%d", i);
2170 return;
2171 }
2172 for (;i < 32; i++)
2173 {
2174 HOST_WIDE_INT tst = v & 1;
2175 if ((code == 'F' && tst) || (code == 'S' && !tst))
2176 break;
2177 v >>= 1;
2178 }
2179 fprintf (file, "%d", i - 1);
2180 return;
2181
2182 case 'n':
2183 gcc_assert (GET_CODE (x) == CONST_INT);
2184 output_addr_const (file, GEN_INT (-INTVAL (x)));
2185 return;
2186
2187 case 'r':
2188 gcc_assert (GET_CODE (x) == CONST_INT);
2189 v = INTVAL (x);
2190 if (v < 0)
2191 v = -v;
2192 output_addr_const (file, GEN_INT (v >> 1));
2193 return;
2194
2195 case 'R':
2196 gcc_assert (GET_CODE (x) == CONST_INT);
2197 v = INTVAL (x);
2198 if (v < 0)
2199 v = -v;
2200 output_addr_const (file, GEN_INT (v >> 2));
2201 return;
2202
2203 case 'd':
2204 gcc_assert (GET_CODE (x) == CONST_INT);
2205 v = INTVAL (x);
2206 fputs (v == 2 ? "h" : v == 4 ? "w" : "d", file);
2207 return;
2208
2209 case 'p':
2210 case 'P':
2211 gcc_assert (GET_CODE (x) == REG);
2212 v = REGNO (x);
2213 if (code == 'P')
2214 v++;
2215 fputs (reg_names[v], file);
2216 return;
2217
2218 case 'D':
2219 v = 0;
2220 if (GET_CODE (x) == CONST)
2221 {
2222 x = XEXP (x, 0);
2223 gcc_assert (GET_CODE (x) == PLUS);
2224 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
2225 v = INTVAL (XEXP (x, 1));
2226 x = XEXP (x, 0);
2227
2228 }
2229 gcc_assert (GET_CODE (x) == SYMBOL_REF);
2230
2231 t = SYMBOL_REF_DECL (x);
2232 if (DECL_P (t))
2233 v |= DECL_ALIGN_UNIT (t);
2234 else
2235 v |= TYPE_ALIGN_UNIT (TREE_TYPE (t));
2236 if (v & 1)
2237 fputs ("b", file);
2238 else if (v & 2)
2239 fputs ("h", file);
2240 else
2241 fputs ("w", file);
2242 return;
2243
2244 case 'U':
2245 if (MEM_P (x))
2246 {
2247 x = XEXP (x, 0);
2248 if (GET_CODE (x) == PLUS
2249 || GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
2250 x = XEXP (x, 0);
2251 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
2252 {
2253 gcc_assert (sdata_symbolic_operand (x, Pmode));
2254 fputs ("2", file);
2255 return;
2256 }
2257 }
2258 gcc_assert (REG_P (x));
2259 if (A_REGNO_P (REGNO (x)))
2260 fputs ("1", file);
2261 if (B_REGNO_P (REGNO (x)))
2262 fputs ("2", file);
2263 return;
2264
2265 default:
2266 switch (GET_CODE (x))
2267 {
2268 case REG:
2269 if (GET_MODE_SIZE (mode) == 8)
2270 fprintf (file, "%s:%s", reg_names[REGNO (x) + 1],
2271 reg_names[REGNO (x)]);
2272 else
2273 fprintf (file, "%s", reg_names[REGNO (x)]);
2274 break;
2275
2276 case MEM:
2277 fputc ('*', file);
2278 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
2279 c6x_print_address_operand (file, XEXP (x, 0), GET_MODE (x));
2280 break;
2281
2282 case SYMBOL_REF:
2283 fputc ('(', file);
2284 output_addr_const (file, x);
2285 fputc (')', file);
2286 break;
2287
2288 case CONST_INT:
2289 output_addr_const (file, x);
2290 break;
2291
2292 case CONST_DOUBLE:
2293 output_operand_lossage ("invalid const_double operand");
2294 break;
2295
2296 default:
2297 output_addr_const (file, x);
2298 }
2299 }
2300 }
2301
2302 /* Return TRUE if OP is a valid memory address with a base register of
2303 class C. If SMALL_OFFSET is true, we disallow memory references which would
2304 require a long offset with B14/B15. */
2305
2306 bool
c6x_mem_operand(rtx op,enum reg_class c,bool small_offset)2307 c6x_mem_operand (rtx op, enum reg_class c, bool small_offset)
2308 {
2309 machine_mode mode = GET_MODE (op);
2310 rtx base = XEXP (op, 0);
2311 switch (GET_CODE (base))
2312 {
2313 case REG:
2314 break;
2315 case PLUS:
2316 if (small_offset
2317 && (XEXP (base, 0) == stack_pointer_rtx
2318 || XEXP (base, 0) == pic_offset_table_rtx))
2319 {
2320 if (!c6x_legitimate_address_p_1 (mode, base, true, true))
2321 return false;
2322 }
2323
2324 /* fall through */
2325 case PRE_INC:
2326 case PRE_DEC:
2327 case PRE_MODIFY:
2328 case POST_INC:
2329 case POST_DEC:
2330 case POST_MODIFY:
2331 base = XEXP (base, 0);
2332 break;
2333
2334 case CONST:
2335 case LABEL_REF:
2336 case SYMBOL_REF:
2337 gcc_assert (sdata_symbolic_operand (base, Pmode));
2338 return !small_offset && c == B_REGS;
2339
2340 default:
2341 return false;
2342 }
2343 return TEST_HARD_REG_BIT (reg_class_contents[ (int) (c)], REGNO (base));
2344 }
2345
2346 /* Returns true if X is a valid address for use in a memory reference
2347 of mode MODE. If STRICT is true, we do not allow pseudo registers
2348 in the address. NO_LARGE_OFFSET is true if we are examining an
2349 address for use in a load or store misaligned instruction, or
2350 recursively examining an operand inside a PRE/POST_MODIFY. */
2351
2352 bool
c6x_legitimate_address_p_1(machine_mode mode,rtx x,bool strict,bool no_large_offset)2353 c6x_legitimate_address_p_1 (machine_mode mode, rtx x, bool strict,
2354 bool no_large_offset)
2355 {
2356 int size, size1;
2357 HOST_WIDE_INT off;
2358 enum rtx_code code = GET_CODE (x);
2359
2360 switch (code)
2361 {
2362 case PRE_MODIFY:
2363 case POST_MODIFY:
2364 /* We can't split these into word-sized pieces yet. */
2365 if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
2366 return false;
2367 if (GET_CODE (XEXP (x, 1)) != PLUS)
2368 return false;
2369 if (!c6x_legitimate_address_p_1 (mode, XEXP (x, 1), strict, true))
2370 return false;
2371 if (!rtx_equal_p (XEXP (x, 0), XEXP (XEXP (x, 1), 0)))
2372 return false;
2373
2374 /* fall through */
2375 case PRE_INC:
2376 case PRE_DEC:
2377 case POST_INC:
2378 case POST_DEC:
2379 /* We can't split these into word-sized pieces yet. */
2380 if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
2381 return false;
2382 x = XEXP (x, 0);
2383 if (!REG_P (x))
2384 return false;
2385
2386 /* fall through */
2387 case REG:
2388 if (strict)
2389 return REGNO_OK_FOR_BASE_STRICT_P (REGNO (x));
2390 else
2391 return REGNO_OK_FOR_BASE_NONSTRICT_P (REGNO (x));
2392
2393 case PLUS:
2394 if (!REG_P (XEXP (x, 0))
2395 || !c6x_legitimate_address_p_1 (mode, XEXP (x, 0), strict, false))
2396 return false;
2397 /* We cannot ensure currently that both registers end up in the
2398 same register file. */
2399 if (REG_P (XEXP (x, 1)))
2400 return false;
2401
2402 if (mode == BLKmode)
2403 size = 4;
2404 else if (mode == VOIDmode)
2405 /* ??? This can happen during ivopts. */
2406 size = 1;
2407 else
2408 size = GET_MODE_SIZE (mode);
2409
2410 if (flag_pic
2411 && GET_CODE (XEXP (x, 1)) == UNSPEC
2412 && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_SDATA
2413 && XEXP (x, 0) == pic_offset_table_rtx
2414 && sdata_symbolic_operand (XVECEXP (XEXP (x, 1), 0, 0), SImode))
2415 return !no_large_offset && size <= 4;
2416 if (flag_pic == 1
2417 && mode == Pmode
2418 && GET_CODE (XEXP (x, 1)) == UNSPEC
2419 && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_GOT
2420 && XEXP (x, 0) == pic_offset_table_rtx
2421 && (GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == SYMBOL_REF
2422 || GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == LABEL_REF))
2423 return !no_large_offset;
2424 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2425 return false;
2426
2427 off = INTVAL (XEXP (x, 1));
2428
2429 /* If the machine does not have doubleword load/stores, we'll use
2430 word size accesses. */
2431 size1 = size;
2432 if (size == 2 * UNITS_PER_WORD && !TARGET_STDW)
2433 size = UNITS_PER_WORD;
2434
2435 if (((HOST_WIDE_INT)size1 - 1) & off)
2436 return false;
2437 off /= size;
2438 if (off > -32 && off < (size1 == size ? 32 : 28))
2439 return true;
2440 if (no_large_offset || code != PLUS || XEXP (x, 0) != stack_pointer_rtx
2441 || size1 > UNITS_PER_WORD)
2442 return false;
2443 return off >= 0 && off < 32768;
2444
2445 case CONST:
2446 case SYMBOL_REF:
2447 case LABEL_REF:
2448 return (!no_large_offset
2449 /* With -fpic, we must wrap it in an unspec to show the B14
2450 dependency. */
2451 && !flag_pic
2452 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
2453 && sdata_symbolic_operand (x, Pmode));
2454
2455 default:
2456 return false;
2457 }
2458 }
2459
2460 static bool
c6x_legitimate_address_p(machine_mode mode,rtx x,bool strict)2461 c6x_legitimate_address_p (machine_mode mode, rtx x, bool strict)
2462 {
2463 return c6x_legitimate_address_p_1 (mode, x, strict, false);
2464 }
2465
2466 static bool
c6x_legitimate_constant_p(machine_mode mode ATTRIBUTE_UNUSED,rtx x ATTRIBUTE_UNUSED)2467 c6x_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
2468 rtx x ATTRIBUTE_UNUSED)
2469 {
2470 return true;
2471 }
2472
2473 /* Implements TARGET_PREFERRED_RENAME_CLASS. */
2474 static reg_class_t
c6x_preferred_rename_class(reg_class_t cl)2475 c6x_preferred_rename_class (reg_class_t cl)
2476 {
2477 if (cl == A_REGS)
2478 return NONPREDICATE_A_REGS;
2479 if (cl == B_REGS)
2480 return NONPREDICATE_B_REGS;
2481 if (cl == ALL_REGS || cl == GENERAL_REGS)
2482 return NONPREDICATE_REGS;
2483 return NO_REGS;
2484 }
2485
2486 /* Implements FINAL_PRESCAN_INSN. */
2487 void
c6x_final_prescan_insn(rtx_insn * insn,rtx * opvec ATTRIBUTE_UNUSED,int noperands ATTRIBUTE_UNUSED)2488 c6x_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
2489 int noperands ATTRIBUTE_UNUSED)
2490 {
2491 c6x_current_insn = insn;
2492 }
2493
2494 /* A structure to describe the stack layout of a function. The layout is
2495 as follows:
2496
2497 [saved frame pointer (or possibly padding0)]
2498 --> incoming stack pointer, new hard frame pointer
2499 [saved call-used regs]
2500 [optional padding1]
2501 --> soft frame pointer
2502 [frame]
2503 [outgoing arguments]
2504 [optional padding2]
2505
2506 The structure members are laid out in this order. */
2507
2508 struct c6x_frame
2509 {
2510 int padding0;
2511 /* Number of registers to save. */
2512 int nregs;
2513 int padding1;
2514 HOST_WIDE_INT frame;
2515 int outgoing_arguments_size;
2516 int padding2;
2517
2518 HOST_WIDE_INT to_allocate;
2519 /* The offsets relative to the incoming stack pointer (which
2520 becomes HARD_FRAME_POINTER). */
2521 HOST_WIDE_INT frame_pointer_offset;
2522 HOST_WIDE_INT b3_offset;
2523
2524 /* True if we should call push_rts/pop_rts to save and restore
2525 registers. */
2526 bool push_rts;
2527 };
2528
2529 /* Return true if we need to save and modify the PIC register in the
2530 prologue. */
2531
2532 static bool
must_reload_pic_reg_p(void)2533 must_reload_pic_reg_p (void)
2534 {
2535 struct cgraph_local_info *i = NULL;
2536
2537 if (!TARGET_DSBT)
2538 return false;
2539
2540 i = cgraph_node::local_info (current_function_decl);
2541
2542 if ((crtl->uses_pic_offset_table || !crtl->is_leaf) && !i->local)
2543 return true;
2544 return false;
2545 }
2546
2547 /* Return 1 if we need to save REGNO. */
2548 static int
c6x_save_reg(unsigned int regno)2549 c6x_save_reg (unsigned int regno)
2550 {
2551 return ((df_regs_ever_live_p (regno)
2552 && !call_used_regs[regno]
2553 && !fixed_regs[regno])
2554 || (regno == RETURN_ADDR_REGNO
2555 && (df_regs_ever_live_p (regno)
2556 || !crtl->is_leaf))
2557 || (regno == PIC_OFFSET_TABLE_REGNUM && must_reload_pic_reg_p ()));
2558 }
2559
2560 /* Examine the number of regs NREGS we've determined we must save.
2561 Return true if we should use __c6xabi_push_rts/__c6xabi_pop_rts for
2562 prologue and epilogue. */
2563
2564 static bool
use_push_rts_p(int nregs)2565 use_push_rts_p (int nregs)
2566 {
2567 if (TARGET_INSNS_64PLUS && optimize_function_for_size_p (cfun)
2568 && !cfun->machine->contains_sibcall
2569 && !cfun->returns_struct
2570 && !TARGET_LONG_CALLS
2571 && nregs >= 6 && !frame_pointer_needed)
2572 return true;
2573 return false;
2574 }
2575
2576 /* Return number of saved general prupose registers. */
2577
2578 int
c6x_nsaved_regs(void)2579 c6x_nsaved_regs (void)
2580 {
2581 int nregs = 0;
2582 int regno;
2583
2584 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2585 if (c6x_save_reg (regno))
2586 nregs++;
2587 return nregs;
2588 }
2589
2590 /* The safe debug order mandated by the ABI. */
2591 static unsigned reg_save_order[] =
2592 {
2593 REG_A10, REG_A11, REG_A12, REG_A13,
2594 REG_A14, REG_B3,
2595 REG_B10, REG_B11, REG_B12, REG_B13,
2596 REG_B14, REG_A15
2597 };
2598
2599 #define N_SAVE_ORDER (sizeof reg_save_order / sizeof *reg_save_order)
2600
2601 /* Compute the layout of the stack frame and store it in FRAME. */
2602
2603 static void
c6x_compute_frame_layout(struct c6x_frame * frame)2604 c6x_compute_frame_layout (struct c6x_frame *frame)
2605 {
2606 HOST_WIDE_INT size = get_frame_size ();
2607 HOST_WIDE_INT offset;
2608 int nregs;
2609
2610 /* We use the four bytes which are technically inside the caller's frame,
2611 usually to save the frame pointer. */
2612 offset = -4;
2613 frame->padding0 = 0;
2614 nregs = c6x_nsaved_regs ();
2615 frame->push_rts = false;
2616 frame->b3_offset = 0;
2617 if (use_push_rts_p (nregs))
2618 {
2619 frame->push_rts = true;
2620 frame->b3_offset = (TARGET_BIG_ENDIAN ? -12 : -13) * 4;
2621 nregs = 14;
2622 }
2623 else if (c6x_save_reg (REG_B3))
2624 {
2625 int idx;
2626 for (idx = N_SAVE_ORDER - 1; reg_save_order[idx] != REG_B3; idx--)
2627 {
2628 if (c6x_save_reg (reg_save_order[idx]))
2629 frame->b3_offset -= 4;
2630 }
2631 }
2632 frame->nregs = nregs;
2633
2634 if (size == 0 && nregs == 0)
2635 {
2636 frame->padding0 = 4;
2637 frame->padding1 = frame->padding2 = 0;
2638 frame->frame_pointer_offset = frame->to_allocate = 0;
2639 frame->outgoing_arguments_size = 0;
2640 return;
2641 }
2642
2643 if (!frame->push_rts)
2644 offset += frame->nregs * 4;
2645
2646 if (offset == 0 && size == 0 && crtl->outgoing_args_size == 0
2647 && !crtl->is_leaf)
2648 /* Don't use the bottom of the caller's frame if we have no
2649 allocation of our own and call other functions. */
2650 frame->padding0 = frame->padding1 = 4;
2651 else if (offset & 4)
2652 frame->padding1 = 4;
2653 else
2654 frame->padding1 = 0;
2655
2656 offset += frame->padding0 + frame->padding1;
2657 frame->frame_pointer_offset = offset;
2658 offset += size;
2659
2660 frame->outgoing_arguments_size = crtl->outgoing_args_size;
2661 offset += frame->outgoing_arguments_size;
2662
2663 if ((offset & 4) == 0)
2664 frame->padding2 = 8;
2665 else
2666 frame->padding2 = 4;
2667 frame->to_allocate = offset + frame->padding2;
2668 }
2669
2670 /* Return the offset between two registers, one to be eliminated, and the other
2671 its replacement, at the start of a routine. */
2672
2673 HOST_WIDE_INT
c6x_initial_elimination_offset(int from,int to)2674 c6x_initial_elimination_offset (int from, int to)
2675 {
2676 struct c6x_frame frame;
2677 c6x_compute_frame_layout (&frame);
2678
2679 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2680 return 0;
2681 else if (from == FRAME_POINTER_REGNUM
2682 && to == HARD_FRAME_POINTER_REGNUM)
2683 return -frame.frame_pointer_offset;
2684 else
2685 {
2686 gcc_assert (to == STACK_POINTER_REGNUM);
2687
2688 if (from == ARG_POINTER_REGNUM)
2689 return frame.to_allocate + (frame.push_rts ? 56 : 0);
2690
2691 gcc_assert (from == FRAME_POINTER_REGNUM);
2692 return frame.to_allocate - frame.frame_pointer_offset;
2693 }
2694 }
2695
2696 /* Given FROM and TO register numbers, say whether this elimination is
2697 allowed. Frame pointer elimination is automatically handled. */
2698
2699 static bool
c6x_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)2700 c6x_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2701 {
2702 if (to == STACK_POINTER_REGNUM)
2703 return !frame_pointer_needed;
2704 return true;
2705 }
2706
2707 /* Emit insns to increment the stack pointer by OFFSET. If
2708 FRAME_RELATED_P, set the RTX_FRAME_RELATED_P flag on the insns.
2709 Does nothing if the offset is zero. */
2710
2711 static void
emit_add_sp_const(HOST_WIDE_INT offset,bool frame_related_p)2712 emit_add_sp_const (HOST_WIDE_INT offset, bool frame_related_p)
2713 {
2714 rtx to_add = GEN_INT (offset);
2715 rtx orig_to_add = to_add;
2716 rtx_insn *insn;
2717
2718 if (offset == 0)
2719 return;
2720
2721 if (offset < -32768 || offset > 32767)
2722 {
2723 rtx reg = gen_rtx_REG (SImode, REG_A0);
2724 rtx low = GEN_INT (trunc_int_for_mode (offset, HImode));
2725
2726 insn = emit_insn (gen_movsi_high (reg, low));
2727 if (frame_related_p)
2728 RTX_FRAME_RELATED_P (insn) = 1;
2729 insn = emit_insn (gen_movsi_lo_sum (reg, reg, to_add));
2730 if (frame_related_p)
2731 RTX_FRAME_RELATED_P (insn) = 1;
2732 to_add = reg;
2733 }
2734 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2735 to_add));
2736 if (frame_related_p)
2737 {
2738 if (REG_P (to_add))
2739 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
2740 gen_rtx_SET (stack_pointer_rtx,
2741 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2742 orig_to_add)));
2743
2744 RTX_FRAME_RELATED_P (insn) = 1;
2745 }
2746 }
2747
2748 /* Prologue and epilogue. */
2749 void
c6x_expand_prologue(void)2750 c6x_expand_prologue (void)
2751 {
2752 struct c6x_frame frame;
2753 rtx_insn *insn;
2754 rtx mem;
2755 int nsaved = 0;
2756 HOST_WIDE_INT initial_offset, off, added_already;
2757
2758 c6x_compute_frame_layout (&frame);
2759
2760 if (flag_stack_usage_info)
2761 current_function_static_stack_size = frame.to_allocate;
2762
2763 initial_offset = -frame.to_allocate;
2764 if (frame.push_rts)
2765 {
2766 emit_insn (gen_push_rts ());
2767 nsaved = frame.nregs;
2768 }
2769
2770 /* If the offsets would be too large for the memory references we will
2771 create to save registers, do the stack allocation in two parts.
2772 Ensure by subtracting 8 that we don't store to the word pointed to
2773 by the stack pointer. */
2774 if (initial_offset < -32768)
2775 initial_offset = -frame.frame_pointer_offset - 8;
2776
2777 if (frame.to_allocate > 0)
2778 gcc_assert (initial_offset != 0);
2779
2780 off = -initial_offset + 4 - frame.padding0;
2781
2782 mem = gen_frame_mem (Pmode, stack_pointer_rtx);
2783
2784 added_already = 0;
2785 if (frame_pointer_needed)
2786 {
2787 rtx fp_reg = gen_rtx_REG (SImode, REG_A15);
2788 /* We go through some contortions here to both follow the ABI's
2789 recommendation that FP == incoming SP, and to avoid writing or
2790 reading the word pointed to by the stack pointer. */
2791 rtx addr = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx,
2792 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2793 GEN_INT (-8)));
2794 insn = emit_move_insn (gen_frame_mem (Pmode, addr), fp_reg);
2795 RTX_FRAME_RELATED_P (insn) = 1;
2796 nsaved++;
2797 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, stack_pointer_rtx,
2798 GEN_INT (8)));
2799 RTX_FRAME_RELATED_P (insn) = 1;
2800 off -= 4;
2801 added_already = -8;
2802 }
2803
2804 emit_add_sp_const (initial_offset - added_already, true);
2805
2806 if (nsaved < frame.nregs)
2807 {
2808 unsigned i;
2809
2810 for (i = 0; i < N_SAVE_ORDER; i++)
2811 {
2812 int idx = N_SAVE_ORDER - i - 1;
2813 unsigned regno = reg_save_order[idx];
2814 rtx reg;
2815 machine_mode save_mode = SImode;
2816
2817 if (regno == REG_A15 && frame_pointer_needed)
2818 /* Already saved. */
2819 continue;
2820 if (!c6x_save_reg (regno))
2821 continue;
2822
2823 if (TARGET_STDW && (off & 4) == 0 && off <= 256
2824 && (regno & 1) == 1
2825 && i + 1 < N_SAVE_ORDER
2826 && reg_save_order[idx - 1] == regno - 1
2827 && c6x_save_reg (regno - 1))
2828 {
2829 save_mode = DImode;
2830 regno--;
2831 i++;
2832 }
2833 reg = gen_rtx_REG (save_mode, regno);
2834 off -= GET_MODE_SIZE (save_mode);
2835
2836 insn = emit_move_insn (adjust_address (mem, save_mode, off),
2837 reg);
2838 RTX_FRAME_RELATED_P (insn) = 1;
2839
2840 nsaved += HARD_REGNO_NREGS (regno, save_mode);
2841 }
2842 }
2843 gcc_assert (nsaved == frame.nregs);
2844 emit_add_sp_const (-frame.to_allocate - initial_offset, true);
2845 if (must_reload_pic_reg_p ())
2846 {
2847 if (dsbt_decl == NULL)
2848 {
2849 tree t;
2850
2851 t = build_index_type (integer_one_node);
2852 t = build_array_type (integer_type_node, t);
2853 t = build_decl (BUILTINS_LOCATION, VAR_DECL,
2854 get_identifier ("__c6xabi_DSBT_BASE"), t);
2855 DECL_ARTIFICIAL (t) = 1;
2856 DECL_IGNORED_P (t) = 1;
2857 DECL_EXTERNAL (t) = 1;
2858 TREE_STATIC (t) = 1;
2859 TREE_PUBLIC (t) = 1;
2860 TREE_USED (t) = 1;
2861
2862 dsbt_decl = t;
2863 }
2864 emit_insn (gen_setup_dsbt (pic_offset_table_rtx,
2865 XEXP (DECL_RTL (dsbt_decl), 0)));
2866 }
2867 }
2868
2869 void
c6x_expand_epilogue(bool sibcall)2870 c6x_expand_epilogue (bool sibcall)
2871 {
2872 unsigned i;
2873 struct c6x_frame frame;
2874 rtx mem;
2875 HOST_WIDE_INT off;
2876 int nsaved = 0;
2877
2878 c6x_compute_frame_layout (&frame);
2879
2880 mem = gen_frame_mem (Pmode, stack_pointer_rtx);
2881
2882 /* Insert a dummy set/use of the stack pointer. This creates a
2883 scheduler barrier between the prologue saves and epilogue restores. */
2884 emit_insn (gen_epilogue_barrier (stack_pointer_rtx, stack_pointer_rtx));
2885
2886 /* If the offsets would be too large for the memory references we will
2887 create to restore registers, do a preliminary stack adjustment here. */
2888 off = frame.to_allocate - frame.frame_pointer_offset + frame.padding1;
2889 if (frame.push_rts)
2890 {
2891 nsaved = frame.nregs;
2892 }
2893 else
2894 {
2895 if (frame.to_allocate > 32768)
2896 {
2897 /* Don't add the entire offset so that we leave an unused word
2898 above the stack pointer. */
2899 emit_add_sp_const ((off - 16) & ~7, false);
2900 off &= 7;
2901 off += 16;
2902 }
2903 for (i = 0; i < N_SAVE_ORDER; i++)
2904 {
2905 unsigned regno = reg_save_order[i];
2906 rtx reg;
2907 machine_mode save_mode = SImode;
2908
2909 if (!c6x_save_reg (regno))
2910 continue;
2911 if (regno == REG_A15 && frame_pointer_needed)
2912 continue;
2913
2914 if (TARGET_STDW && (off & 4) == 0 && off < 256
2915 && (regno & 1) == 0
2916 && i + 1 < N_SAVE_ORDER
2917 && reg_save_order[i + 1] == regno + 1
2918 && c6x_save_reg (regno + 1))
2919 {
2920 save_mode = DImode;
2921 i++;
2922 }
2923 reg = gen_rtx_REG (save_mode, regno);
2924
2925 emit_move_insn (reg, adjust_address (mem, save_mode, off));
2926
2927 off += GET_MODE_SIZE (save_mode);
2928 nsaved += HARD_REGNO_NREGS (regno, save_mode);
2929 }
2930 }
2931 if (!frame_pointer_needed)
2932 emit_add_sp_const (off + frame.padding0 - 4, false);
2933 else
2934 {
2935 rtx fp_reg = gen_rtx_REG (SImode, REG_A15);
2936 rtx addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
2937 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2938 GEN_INT (8)));
2939 emit_insn (gen_addsi3 (stack_pointer_rtx, hard_frame_pointer_rtx,
2940 GEN_INT (-8)));
2941 emit_move_insn (fp_reg, gen_frame_mem (Pmode, addr));
2942 nsaved++;
2943 }
2944 gcc_assert (nsaved == frame.nregs);
2945 if (!sibcall)
2946 {
2947 if (frame.push_rts)
2948 emit_jump_insn (gen_pop_rts ());
2949 else
2950 emit_jump_insn (gen_return_internal (gen_rtx_REG (SImode,
2951 RETURN_ADDR_REGNO)));
2952 }
2953 }
2954
2955 /* Return the value of the return address for the frame COUNT steps up
2956 from the current frame, after the prologue.
2957 We punt for everything but the current frame by returning const0_rtx. */
2958
2959 rtx
c6x_return_addr_rtx(int count)2960 c6x_return_addr_rtx (int count)
2961 {
2962 if (count != 0)
2963 return const0_rtx;
2964
2965 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNO);
2966 }
2967
2968 /* Return true iff TYPE is one of the shadow types. */
2969 static bool
shadow_type_p(enum attr_type type)2970 shadow_type_p (enum attr_type type)
2971 {
2972 return (type == TYPE_SHADOW || type == TYPE_LOAD_SHADOW
2973 || type == TYPE_MULT_SHADOW);
2974 }
2975
2976 /* Return true iff INSN is a shadow pattern. */
2977 static bool
shadow_p(rtx_insn * insn)2978 shadow_p (rtx_insn *insn)
2979 {
2980 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
2981 return false;
2982 return shadow_type_p (get_attr_type (insn));
2983 }
2984
2985 /* Return true iff INSN is a shadow or blockage pattern. */
2986 static bool
shadow_or_blockage_p(rtx_insn * insn)2987 shadow_or_blockage_p (rtx_insn *insn)
2988 {
2989 enum attr_type type;
2990 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
2991 return false;
2992 type = get_attr_type (insn);
2993 return shadow_type_p (type) || type == TYPE_BLOCKAGE;
2994 }
2995
2996 /* Translate UNITS into a bitmask of units we can reserve for this
2997 insn. */
2998 static int
get_reservation_flags(enum attr_units units)2999 get_reservation_flags (enum attr_units units)
3000 {
3001 switch (units)
3002 {
3003 case UNITS_D:
3004 case UNITS_D_ADDR:
3005 return RESERVATION_FLAG_D;
3006 case UNITS_L:
3007 return RESERVATION_FLAG_L;
3008 case UNITS_S:
3009 return RESERVATION_FLAG_S;
3010 case UNITS_M:
3011 return RESERVATION_FLAG_M;
3012 case UNITS_LS:
3013 return RESERVATION_FLAG_LS;
3014 case UNITS_DL:
3015 return RESERVATION_FLAG_DL;
3016 case UNITS_DS:
3017 return RESERVATION_FLAG_DS;
3018 case UNITS_DLS:
3019 return RESERVATION_FLAG_DLS;
3020 default:
3021 return 0;
3022 }
3023 }
3024
3025 /* Compute the side of the machine used by INSN, which reserves UNITS.
3026 This must match the reservations in the scheduling description. */
3027 static int
get_insn_side(rtx_insn * insn,enum attr_units units)3028 get_insn_side (rtx_insn *insn, enum attr_units units)
3029 {
3030 if (units == UNITS_D_ADDR)
3031 return (get_attr_addr_regfile (insn) == ADDR_REGFILE_A ? 0 : 1);
3032 else
3033 {
3034 enum attr_dest_regfile rf = get_attr_dest_regfile (insn);
3035 if (rf == DEST_REGFILE_ANY)
3036 return get_attr_type (insn) == TYPE_BRANCH ? 0 : 1;
3037 else
3038 return rf == DEST_REGFILE_A ? 0 : 1;
3039 }
3040 }
3041
3042 /* After scheduling, walk the insns between HEAD and END and assign unit
3043 reservations. */
3044 static void
assign_reservations(rtx_insn * head,rtx_insn * end)3045 assign_reservations (rtx_insn *head, rtx_insn *end)
3046 {
3047 rtx_insn *insn;
3048 for (insn = head; insn != NEXT_INSN (end); insn = NEXT_INSN (insn))
3049 {
3050 unsigned int sched_mask, reserved;
3051 rtx_insn *within, *last;
3052 int pass;
3053 int rsrv[2];
3054 int rsrv_count[2][4];
3055 int i;
3056
3057 if (GET_MODE (insn) != TImode)
3058 continue;
3059
3060 reserved = 0;
3061 last = NULL;
3062 /* Find the last insn in the packet. It has a state recorded for it,
3063 which we can use to determine the units we should be using. */
3064 for (within = insn;
3065 (within != NEXT_INSN (end)
3066 && (within == insn || GET_MODE (within) != TImode));
3067 within = NEXT_INSN (within))
3068 {
3069 int icode;
3070 if (!NONDEBUG_INSN_P (within))
3071 continue;
3072 icode = recog_memoized (within);
3073 if (icode < 0)
3074 continue;
3075 if (shadow_p (within))
3076 continue;
3077 if (INSN_INFO_ENTRY (INSN_UID (within)).reservation != 0)
3078 reserved |= 1 << INSN_INFO_ENTRY (INSN_UID (within)).reservation;
3079 last = within;
3080 }
3081 if (last == NULL_RTX)
3082 continue;
3083
3084 sched_mask = INSN_INFO_ENTRY (INSN_UID (last)).unit_mask;
3085 sched_mask &= ~reserved;
3086
3087 memset (rsrv_count, 0, sizeof rsrv_count);
3088 rsrv[0] = rsrv[1] = ~0;
3089 for (i = 0; i < 8; i++)
3090 {
3091 int side = i / 4;
3092 int unit = i & 3;
3093 unsigned unit_bit = 1 << (unit + side * UNIT_QID_SIDE_OFFSET);
3094 /* Clear the bits which we expect to reserve in the following loop,
3095 leaving the ones set which aren't present in the scheduler's
3096 state and shouldn't be reserved. */
3097 if (sched_mask & unit_bit)
3098 rsrv[i / 4] &= ~(1 << unit);
3099 }
3100
3101 /* Walk through the insns that occur in the same cycle. We use multiple
3102 passes to assign units, assigning for insns with the most specific
3103 requirements first. */
3104 for (pass = 0; pass < 4; pass++)
3105 for (within = insn;
3106 (within != NEXT_INSN (end)
3107 && (within == insn || GET_MODE (within) != TImode));
3108 within = NEXT_INSN (within))
3109 {
3110 int uid = INSN_UID (within);
3111 int this_rsrv, side;
3112 int icode;
3113 enum attr_units units;
3114 enum attr_type type;
3115 int j;
3116
3117 if (!NONDEBUG_INSN_P (within))
3118 continue;
3119 icode = recog_memoized (within);
3120 if (icode < 0)
3121 continue;
3122 if (INSN_INFO_ENTRY (uid).reservation != 0)
3123 continue;
3124 units = get_attr_units (within);
3125 type = get_attr_type (within);
3126 this_rsrv = get_reservation_flags (units);
3127 if (this_rsrv == 0)
3128 continue;
3129 side = get_insn_side (within, units);
3130
3131 /* Certain floating point instructions are treated specially. If
3132 an insn can choose between units it can reserve, and its
3133 reservation spans more than one cycle, the reservation contains
3134 special markers in the first cycle to help us reconstruct what
3135 the automaton chose. */
3136 if ((type == TYPE_ADDDP || type == TYPE_FP4)
3137 && units == UNITS_LS)
3138 {
3139 int test1_code = ((type == TYPE_FP4 ? UNIT_QID_FPL1 : UNIT_QID_ADDDPL1)
3140 + side * UNIT_QID_SIDE_OFFSET);
3141 int test2_code = ((type == TYPE_FP4 ? UNIT_QID_FPS1 : UNIT_QID_ADDDPS1)
3142 + side * UNIT_QID_SIDE_OFFSET);
3143 if ((sched_mask & (1 << test1_code)) != 0)
3144 {
3145 this_rsrv = RESERVATION_FLAG_L;
3146 sched_mask &= ~(1 << test1_code);
3147 }
3148 else if ((sched_mask & (1 << test2_code)) != 0)
3149 {
3150 this_rsrv = RESERVATION_FLAG_S;
3151 sched_mask &= ~(1 << test2_code);
3152 }
3153 }
3154
3155 if ((this_rsrv & (this_rsrv - 1)) == 0)
3156 {
3157 int t = exact_log2 (this_rsrv) + side * UNIT_QID_SIDE_OFFSET;
3158 rsrv[side] |= this_rsrv;
3159 INSN_INFO_ENTRY (uid).reservation = t;
3160 continue;
3161 }
3162
3163 if (pass == 1)
3164 {
3165 for (j = 0; j < 4; j++)
3166 if (this_rsrv & (1 << j))
3167 rsrv_count[side][j]++;
3168 continue;
3169 }
3170 if ((pass == 2 && this_rsrv != RESERVATION_FLAG_DLS)
3171 || (pass == 3 && this_rsrv == RESERVATION_FLAG_DLS))
3172 {
3173 int best = -1, best_cost = INT_MAX;
3174 for (j = 0; j < 4; j++)
3175 if ((this_rsrv & (1 << j))
3176 && !(rsrv[side] & (1 << j))
3177 && rsrv_count[side][j] < best_cost)
3178 {
3179 best_cost = rsrv_count[side][j];
3180 best = j;
3181 }
3182 gcc_assert (best != -1);
3183 rsrv[side] |= 1 << best;
3184 for (j = 0; j < 4; j++)
3185 if ((this_rsrv & (1 << j)) && j != best)
3186 rsrv_count[side][j]--;
3187
3188 INSN_INFO_ENTRY (uid).reservation
3189 = best + side * UNIT_QID_SIDE_OFFSET;
3190 }
3191 }
3192 }
3193 }
3194
3195 /* Return a factor by which to weight unit imbalances for a reservation
3196 R. */
3197 static int
unit_req_factor(enum unitreqs r)3198 unit_req_factor (enum unitreqs r)
3199 {
3200 switch (r)
3201 {
3202 case UNIT_REQ_D:
3203 case UNIT_REQ_L:
3204 case UNIT_REQ_S:
3205 case UNIT_REQ_M:
3206 case UNIT_REQ_X:
3207 case UNIT_REQ_T:
3208 return 1;
3209 case UNIT_REQ_DL:
3210 case UNIT_REQ_LS:
3211 case UNIT_REQ_DS:
3212 return 2;
3213 case UNIT_REQ_DLS:
3214 return 3;
3215 default:
3216 gcc_unreachable ();
3217 }
3218 }
3219
3220 /* Examine INSN, and store in REQ1/SIDE1 and REQ2/SIDE2 the unit
3221 requirements. Returns zero if INSN can't be handled, otherwise
3222 either one or two to show how many of the two pairs are in use.
3223 REQ1 is always used, it holds what is normally thought of as the
3224 instructions reservation, e.g. UNIT_REQ_DL. REQ2 is used to either
3225 describe a cross path, or for loads/stores, the T unit. */
3226 static int
get_unit_reqs(rtx_insn * insn,int * req1,int * side1,int * req2,int * side2)3227 get_unit_reqs (rtx_insn *insn, int *req1, int *side1, int *req2, int *side2)
3228 {
3229 enum attr_units units;
3230 enum attr_cross cross;
3231 int side, req;
3232
3233 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
3234 return 0;
3235 units = get_attr_units (insn);
3236 if (units == UNITS_UNKNOWN)
3237 return 0;
3238 side = get_insn_side (insn, units);
3239 cross = get_attr_cross (insn);
3240
3241 req = (units == UNITS_D ? UNIT_REQ_D
3242 : units == UNITS_D_ADDR ? UNIT_REQ_D
3243 : units == UNITS_DL ? UNIT_REQ_DL
3244 : units == UNITS_DS ? UNIT_REQ_DS
3245 : units == UNITS_L ? UNIT_REQ_L
3246 : units == UNITS_LS ? UNIT_REQ_LS
3247 : units == UNITS_S ? UNIT_REQ_S
3248 : units == UNITS_M ? UNIT_REQ_M
3249 : units == UNITS_DLS ? UNIT_REQ_DLS
3250 : -1);
3251 gcc_assert (req != -1);
3252 *req1 = req;
3253 *side1 = side;
3254 if (units == UNITS_D_ADDR)
3255 {
3256 *req2 = UNIT_REQ_T;
3257 *side2 = side ^ (cross == CROSS_Y ? 1 : 0);
3258 return 2;
3259 }
3260 else if (cross == CROSS_Y)
3261 {
3262 *req2 = UNIT_REQ_X;
3263 *side2 = side;
3264 return 2;
3265 }
3266 return 1;
3267 }
3268
3269 /* Walk the insns between and including HEAD and TAIL, and mark the
3270 resource requirements in the unit_reqs table. */
3271 static void
count_unit_reqs(unit_req_table reqs,rtx_insn * head,rtx_insn * tail)3272 count_unit_reqs (unit_req_table reqs, rtx_insn *head, rtx_insn *tail)
3273 {
3274 rtx_insn *insn;
3275
3276 memset (reqs, 0, sizeof (unit_req_table));
3277
3278 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
3279 {
3280 int side1, side2, req1, req2;
3281
3282 switch (get_unit_reqs (insn, &req1, &side1, &req2, &side2))
3283 {
3284 case 2:
3285 reqs[side2][req2]++;
3286 /* fall through */
3287 case 1:
3288 reqs[side1][req1]++;
3289 break;
3290 }
3291 }
3292 }
3293
3294 /* Update the table REQS by merging more specific unit reservations into
3295 more general ones, i.e. counting (for example) UNIT_REQ_D also in
3296 UNIT_REQ_DL, DS, and DLS. */
3297 static void
merge_unit_reqs(unit_req_table reqs)3298 merge_unit_reqs (unit_req_table reqs)
3299 {
3300 int side;
3301 for (side = 0; side < 2; side++)
3302 {
3303 int d = reqs[side][UNIT_REQ_D];
3304 int l = reqs[side][UNIT_REQ_L];
3305 int s = reqs[side][UNIT_REQ_S];
3306 int dl = reqs[side][UNIT_REQ_DL];
3307 int ls = reqs[side][UNIT_REQ_LS];
3308 int ds = reqs[side][UNIT_REQ_DS];
3309
3310 reqs[side][UNIT_REQ_DL] += d;
3311 reqs[side][UNIT_REQ_DL] += l;
3312 reqs[side][UNIT_REQ_DS] += d;
3313 reqs[side][UNIT_REQ_DS] += s;
3314 reqs[side][UNIT_REQ_LS] += l;
3315 reqs[side][UNIT_REQ_LS] += s;
3316 reqs[side][UNIT_REQ_DLS] += ds + dl + ls + d + l + s;
3317 }
3318 }
3319
3320 /* Examine the table REQS and return a measure of unit imbalance by comparing
3321 the two sides of the machine. If, for example, D1 is used twice and D2
3322 used not at all, the return value should be 1 in the absence of other
3323 imbalances. */
3324 static int
unit_req_imbalance(unit_req_table reqs)3325 unit_req_imbalance (unit_req_table reqs)
3326 {
3327 int val = 0;
3328 int i;
3329
3330 for (i = 0; i < UNIT_REQ_MAX; i++)
3331 {
3332 int factor = unit_req_factor ((enum unitreqs) i);
3333 int diff = abs (reqs[0][i] - reqs[1][i]);
3334 val += (diff + factor - 1) / factor / 2;
3335 }
3336 return val;
3337 }
3338
3339 /* Return the resource-constrained minimum iteration interval given the
3340 data in the REQS table. This must have been processed with
3341 merge_unit_reqs already. */
3342 static int
res_mii(unit_req_table reqs)3343 res_mii (unit_req_table reqs)
3344 {
3345 int side, req;
3346 int worst = 1;
3347 for (side = 0; side < 2; side++)
3348 for (req = 0; req < UNIT_REQ_MAX; req++)
3349 {
3350 int factor = unit_req_factor ((enum unitreqs) req);
3351 worst = MAX ((reqs[side][UNIT_REQ_D] + factor - 1) / factor, worst);
3352 }
3353
3354 return worst;
3355 }
3356
3357 /* Examine INSN, and store in PMASK1 and PMASK2 bitmasks that represent
3358 the operands that are involved in the (up to) two reservations, as
3359 found by get_unit_reqs. Return true if we did this successfully, false
3360 if we couldn't identify what to do with INSN. */
3361 static bool
get_unit_operand_masks(rtx_insn * insn,unsigned int * pmask1,unsigned int * pmask2)3362 get_unit_operand_masks (rtx_insn *insn, unsigned int *pmask1,
3363 unsigned int *pmask2)
3364 {
3365 enum attr_op_pattern op_pat;
3366
3367 if (recog_memoized (insn) < 0)
3368 return 0;
3369 if (GET_CODE (PATTERN (insn)) == COND_EXEC)
3370 return false;
3371 extract_insn (insn);
3372 op_pat = get_attr_op_pattern (insn);
3373 if (op_pat == OP_PATTERN_DT)
3374 {
3375 gcc_assert (recog_data.n_operands == 2);
3376 *pmask1 = 1 << 0;
3377 *pmask2 = 1 << 1;
3378 return true;
3379 }
3380 else if (op_pat == OP_PATTERN_TD)
3381 {
3382 gcc_assert (recog_data.n_operands == 2);
3383 *pmask1 = 1 << 1;
3384 *pmask2 = 1 << 0;
3385 return true;
3386 }
3387 else if (op_pat == OP_PATTERN_SXS)
3388 {
3389 gcc_assert (recog_data.n_operands == 3);
3390 *pmask1 = (1 << 0) | (1 << 2);
3391 *pmask2 = 1 << 1;
3392 return true;
3393 }
3394 else if (op_pat == OP_PATTERN_SX)
3395 {
3396 gcc_assert (recog_data.n_operands == 2);
3397 *pmask1 = 1 << 0;
3398 *pmask2 = 1 << 1;
3399 return true;
3400 }
3401 else if (op_pat == OP_PATTERN_SSX)
3402 {
3403 gcc_assert (recog_data.n_operands == 3);
3404 *pmask1 = (1 << 0) | (1 << 1);
3405 *pmask2 = 1 << 2;
3406 return true;
3407 }
3408 return false;
3409 }
3410
3411 /* Try to replace a register in INSN, which has corresponding rename info
3412 from regrename_analyze in INFO. OP_MASK and ORIG_SIDE provide information
3413 about the operands that must be renamed and the side they are on.
3414 REQS is the table of unit reservations in the loop between HEAD and TAIL.
3415 We recompute this information locally after our transformation, and keep
3416 it only if we managed to improve the balance. */
3417 static void
try_rename_operands(rtx_insn * head,rtx_insn * tail,unit_req_table reqs,rtx insn,insn_rr_info * info,unsigned int op_mask,int orig_side)3418 try_rename_operands (rtx_insn *head, rtx_insn *tail, unit_req_table reqs,
3419 rtx insn,
3420 insn_rr_info *info, unsigned int op_mask, int orig_side)
3421 {
3422 enum reg_class super_class = orig_side == 0 ? B_REGS : A_REGS;
3423 HARD_REG_SET unavailable;
3424 du_head_p this_head;
3425 struct du_chain *chain;
3426 int i;
3427 unsigned tmp_mask;
3428 int best_reg, old_reg;
3429 vec<du_head_p> involved_chains = vNULL;
3430 unit_req_table new_reqs;
3431 bool ok;
3432
3433 for (i = 0, tmp_mask = op_mask; tmp_mask; i++)
3434 {
3435 du_head_p op_chain;
3436 if ((tmp_mask & (1 << i)) == 0)
3437 continue;
3438 if (info->op_info[i].n_chains != 1)
3439 goto out_fail;
3440 op_chain = regrename_chain_from_id (info->op_info[i].heads[0]->id);
3441 involved_chains.safe_push (op_chain);
3442 tmp_mask &= ~(1 << i);
3443 }
3444
3445 if (involved_chains.length () > 1)
3446 goto out_fail;
3447
3448 this_head = involved_chains[0];
3449 if (this_head->cannot_rename)
3450 goto out_fail;
3451
3452 for (chain = this_head->first; chain; chain = chain->next_use)
3453 {
3454 unsigned int mask1, mask2, mask_changed;
3455 int count, side1, side2, req1, req2;
3456 insn_rr_info *this_rr = &insn_rr[INSN_UID (chain->insn)];
3457
3458 count = get_unit_reqs (chain->insn, &req1, &side1, &req2, &side2);
3459
3460 if (count == 0)
3461 goto out_fail;
3462
3463 if (!get_unit_operand_masks (chain->insn, &mask1, &mask2))
3464 goto out_fail;
3465
3466 extract_insn (chain->insn);
3467
3468 mask_changed = 0;
3469 for (i = 0; i < recog_data.n_operands; i++)
3470 {
3471 int j;
3472 int n_this_op = this_rr->op_info[i].n_chains;
3473 for (j = 0; j < n_this_op; j++)
3474 {
3475 du_head_p other = this_rr->op_info[i].heads[j];
3476 if (regrename_chain_from_id (other->id) == this_head)
3477 break;
3478 }
3479 if (j == n_this_op)
3480 continue;
3481
3482 if (n_this_op != 1)
3483 goto out_fail;
3484 mask_changed |= 1 << i;
3485 }
3486 gcc_assert (mask_changed != 0);
3487 if (mask_changed != mask1 && mask_changed != mask2)
3488 goto out_fail;
3489 }
3490
3491 /* If we get here, we can do the renaming. */
3492 COMPL_HARD_REG_SET (unavailable, reg_class_contents[(int) super_class]);
3493
3494 old_reg = this_head->regno;
3495 best_reg =
3496 find_rename_reg (this_head, super_class, &unavailable, old_reg, true);
3497
3498 ok = regrename_do_replace (this_head, best_reg);
3499 gcc_assert (ok);
3500
3501 count_unit_reqs (new_reqs, head, PREV_INSN (tail));
3502 merge_unit_reqs (new_reqs);
3503 if (dump_file)
3504 {
3505 fprintf (dump_file, "reshuffle for insn %d, op_mask %x, "
3506 "original side %d, new reg %d\n",
3507 INSN_UID (insn), op_mask, orig_side, best_reg);
3508 fprintf (dump_file, " imbalance %d -> %d\n",
3509 unit_req_imbalance (reqs), unit_req_imbalance (new_reqs));
3510 }
3511 if (unit_req_imbalance (new_reqs) > unit_req_imbalance (reqs))
3512 {
3513 ok = regrename_do_replace (this_head, old_reg);
3514 gcc_assert (ok);
3515 }
3516 else
3517 memcpy (reqs, new_reqs, sizeof (unit_req_table));
3518
3519 out_fail:
3520 involved_chains.release ();
3521 }
3522
3523 /* Find insns in LOOP which would, if shifted to the other side
3524 of the machine, reduce an imbalance in the unit reservations. */
3525 static void
reshuffle_units(basic_block loop)3526 reshuffle_units (basic_block loop)
3527 {
3528 rtx_insn *head = BB_HEAD (loop);
3529 rtx_insn *tail = BB_END (loop);
3530 rtx_insn *insn;
3531 unit_req_table reqs;
3532 edge e;
3533 edge_iterator ei;
3534 bitmap_head bbs;
3535
3536 count_unit_reqs (reqs, head, PREV_INSN (tail));
3537 merge_unit_reqs (reqs);
3538
3539 regrename_init (true);
3540
3541 bitmap_initialize (&bbs, &bitmap_default_obstack);
3542
3543 FOR_EACH_EDGE (e, ei, loop->preds)
3544 bitmap_set_bit (&bbs, e->src->index);
3545
3546 bitmap_set_bit (&bbs, loop->index);
3547 regrename_analyze (&bbs);
3548
3549 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
3550 {
3551 enum attr_units units;
3552 int count, side1, side2, req1, req2;
3553 unsigned int mask1, mask2;
3554 insn_rr_info *info;
3555
3556 if (!NONDEBUG_INSN_P (insn))
3557 continue;
3558
3559 count = get_unit_reqs (insn, &req1, &side1, &req2, &side2);
3560
3561 if (count == 0)
3562 continue;
3563
3564 if (!get_unit_operand_masks (insn, &mask1, &mask2))
3565 continue;
3566
3567 info = &insn_rr[INSN_UID (insn)];
3568 if (info->op_info == NULL)
3569 continue;
3570
3571 if (reqs[side1][req1] > 1
3572 && reqs[side1][req1] > 2 * reqs[side1 ^ 1][req1])
3573 {
3574 try_rename_operands (head, tail, reqs, insn, info, mask1, side1);
3575 }
3576
3577 units = get_attr_units (insn);
3578 if (units == UNITS_D_ADDR)
3579 {
3580 gcc_assert (count == 2);
3581 if (reqs[side2][req2] > 1
3582 && reqs[side2][req2] > 2 * reqs[side2 ^ 1][req2])
3583 {
3584 try_rename_operands (head, tail, reqs, insn, info, mask2, side2);
3585 }
3586 }
3587 }
3588 regrename_finish ();
3589 }
3590
3591 /* Backend scheduling state. */
3592 typedef struct c6x_sched_context
3593 {
3594 /* The current scheduler clock, saved in the sched_reorder hook. */
3595 int curr_sched_clock;
3596
3597 /* Number of insns issued so far in this cycle. */
3598 int issued_this_cycle;
3599
3600 /* We record the time at which each jump occurs in JUMP_CYCLES. The
3601 theoretical maximum for number of jumps in flight is 12: 2 every
3602 cycle, with a latency of 6 cycles each. This is a circular
3603 buffer; JUMP_CYCLE_INDEX is the pointer to the start. Earlier
3604 jumps have a higher index. This array should be accessed through
3605 the jump_cycle function. */
3606 int jump_cycles[12];
3607 int jump_cycle_index;
3608
3609 /* In parallel with jump_cycles, this array records the opposite of
3610 the condition used in each pending jump. This is used to
3611 predicate insns that are scheduled in the jump's delay slots. If
3612 this is NULL_RTX no such predication happens. */
3613 rtx jump_cond[12];
3614
3615 /* Similar to the jump_cycles mechanism, but here we take into
3616 account all insns with delay slots, to avoid scheduling asms into
3617 the delay slots. */
3618 int delays_finished_at;
3619
3620 /* The following variable value is the last issued insn. */
3621 rtx_insn *last_scheduled_insn;
3622 /* The last issued insn that isn't a shadow of another. */
3623 rtx_insn *last_scheduled_iter0;
3624
3625 /* The following variable value is DFA state before issuing the
3626 first insn in the current clock cycle. We do not use this member
3627 of the structure directly; we copy the data in and out of
3628 prev_cycle_state. */
3629 state_t prev_cycle_state_ctx;
3630
3631 int reg_n_accesses[FIRST_PSEUDO_REGISTER];
3632 int reg_n_xaccesses[FIRST_PSEUDO_REGISTER];
3633 int reg_set_in_cycle[FIRST_PSEUDO_REGISTER];
3634
3635 int tmp_reg_n_accesses[FIRST_PSEUDO_REGISTER];
3636 int tmp_reg_n_xaccesses[FIRST_PSEUDO_REGISTER];
3637 } *c6x_sched_context_t;
3638
3639 /* The current scheduling state. */
3640 static struct c6x_sched_context ss;
3641
3642 /* The following variable value is DFA state before issuing the first insn
3643 in the current clock cycle. This is used in c6x_variable_issue for
3644 comparison with the state after issuing the last insn in a cycle. */
3645 static state_t prev_cycle_state;
3646
3647 /* Set when we discover while processing an insn that it would lead to too
3648 many accesses of the same register. */
3649 static bool reg_access_stall;
3650
3651 /* The highest insn uid after delayed insns were split, but before loop bodies
3652 were copied by the modulo scheduling code. */
3653 static int sploop_max_uid_iter0;
3654
3655 /* Look up the jump cycle with index N. For an out-of-bounds N, we return 0,
3656 so the caller does not specifically have to test for it. */
3657 static int
get_jump_cycle(int n)3658 get_jump_cycle (int n)
3659 {
3660 if (n >= 12)
3661 return 0;
3662 n += ss.jump_cycle_index;
3663 if (n >= 12)
3664 n -= 12;
3665 return ss.jump_cycles[n];
3666 }
3667
3668 /* Look up the jump condition with index N. */
3669 static rtx
get_jump_cond(int n)3670 get_jump_cond (int n)
3671 {
3672 if (n >= 12)
3673 return NULL_RTX;
3674 n += ss.jump_cycle_index;
3675 if (n >= 12)
3676 n -= 12;
3677 return ss.jump_cond[n];
3678 }
3679
3680 /* Return the index of the first jump that occurs after CLOCK_VAR. If no jump
3681 has delay slots beyond CLOCK_VAR, return -1. */
3682 static int
first_jump_index(int clock_var)3683 first_jump_index (int clock_var)
3684 {
3685 int retval = -1;
3686 int n = 0;
3687 for (;;)
3688 {
3689 int t = get_jump_cycle (n);
3690 if (t <= clock_var)
3691 break;
3692 retval = n;
3693 n++;
3694 }
3695 return retval;
3696 }
3697
3698 /* Add a new entry in our scheduling state for a jump that occurs in CYCLE
3699 and has the opposite condition of COND. */
3700 static void
record_jump(int cycle,rtx cond)3701 record_jump (int cycle, rtx cond)
3702 {
3703 if (ss.jump_cycle_index == 0)
3704 ss.jump_cycle_index = 11;
3705 else
3706 ss.jump_cycle_index--;
3707 ss.jump_cycles[ss.jump_cycle_index] = cycle;
3708 ss.jump_cond[ss.jump_cycle_index] = cond;
3709 }
3710
3711 /* Set the clock cycle of INSN to CYCLE. Also clears the insn's entry in
3712 new_conditions. */
3713 static void
insn_set_clock(rtx insn,int cycle)3714 insn_set_clock (rtx insn, int cycle)
3715 {
3716 unsigned uid = INSN_UID (insn);
3717
3718 if (uid >= INSN_INFO_LENGTH)
3719 insn_info.safe_grow (uid * 5 / 4 + 10);
3720
3721 INSN_INFO_ENTRY (uid).clock = cycle;
3722 INSN_INFO_ENTRY (uid).new_cond = NULL;
3723 INSN_INFO_ENTRY (uid).reservation = 0;
3724 INSN_INFO_ENTRY (uid).ebb_start = false;
3725 }
3726
3727 /* Return the clock cycle we set for the insn with uid UID. */
3728 static int
insn_uid_get_clock(int uid)3729 insn_uid_get_clock (int uid)
3730 {
3731 return INSN_INFO_ENTRY (uid).clock;
3732 }
3733
3734 /* Return the clock cycle we set for INSN. */
3735 static int
insn_get_clock(rtx insn)3736 insn_get_clock (rtx insn)
3737 {
3738 return insn_uid_get_clock (INSN_UID (insn));
3739 }
3740
3741 /* Examine INSN, and if it is a conditional jump of any kind, return
3742 the opposite of the condition in which it branches. Otherwise,
3743 return NULL_RTX. */
3744 static rtx
condjump_opposite_condition(rtx insn)3745 condjump_opposite_condition (rtx insn)
3746 {
3747 rtx pat = PATTERN (insn);
3748 int icode = INSN_CODE (insn);
3749 rtx x = NULL;
3750
3751 if (icode == CODE_FOR_br_true || icode == CODE_FOR_br_false)
3752 {
3753 x = XEXP (SET_SRC (pat), 0);
3754 if (icode == CODE_FOR_br_false)
3755 return x;
3756 }
3757 if (GET_CODE (pat) == COND_EXEC)
3758 {
3759 rtx t = COND_EXEC_CODE (pat);
3760 if ((GET_CODE (t) == PARALLEL
3761 && GET_CODE (XVECEXP (t, 0, 0)) == RETURN)
3762 || (GET_CODE (t) == UNSPEC && XINT (t, 1) == UNSPEC_REAL_JUMP)
3763 || (GET_CODE (t) == SET && SET_DEST (t) == pc_rtx))
3764 x = COND_EXEC_TEST (pat);
3765 }
3766
3767 if (x != NULL_RTX)
3768 {
3769 enum rtx_code code = GET_CODE (x);
3770 x = gen_rtx_fmt_ee (code == EQ ? NE : EQ,
3771 GET_MODE (x), XEXP (x, 0),
3772 XEXP (x, 1));
3773 }
3774 return x;
3775 }
3776
3777 /* Return true iff COND1 and COND2 are exactly opposite conditions
3778 one of them NE and the other EQ. */
3779 static bool
conditions_opposite_p(rtx cond1,rtx cond2)3780 conditions_opposite_p (rtx cond1, rtx cond2)
3781 {
3782 return (rtx_equal_p (XEXP (cond1, 0), XEXP (cond2, 0))
3783 && rtx_equal_p (XEXP (cond1, 1), XEXP (cond2, 1))
3784 && GET_CODE (cond1) == reverse_condition (GET_CODE (cond2)));
3785 }
3786
3787 /* Return true if we can add a predicate COND to INSN, or if INSN
3788 already has that predicate. If DOIT is true, also perform the
3789 modification. */
3790 static bool
predicate_insn(rtx_insn * insn,rtx cond,bool doit)3791 predicate_insn (rtx_insn *insn, rtx cond, bool doit)
3792 {
3793 int icode;
3794 if (cond == NULL_RTX)
3795 {
3796 gcc_assert (!doit);
3797 return false;
3798 }
3799
3800 if (get_attr_predicable (insn) == PREDICABLE_YES
3801 && GET_CODE (PATTERN (insn)) != COND_EXEC)
3802 {
3803 if (doit)
3804 {
3805 rtx newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn));
3806 PATTERN (insn) = newpat;
3807 INSN_CODE (insn) = -1;
3808 }
3809 return true;
3810 }
3811 if (GET_CODE (PATTERN (insn)) == COND_EXEC
3812 && rtx_equal_p (COND_EXEC_TEST (PATTERN (insn)), cond))
3813 return true;
3814 icode = INSN_CODE (insn);
3815 if (icode == CODE_FOR_real_jump
3816 || icode == CODE_FOR_jump
3817 || icode == CODE_FOR_indirect_jump)
3818 {
3819 rtx pat = PATTERN (insn);
3820 rtx dest = (icode == CODE_FOR_real_jump ? XVECEXP (pat, 0, 0)
3821 : icode == CODE_FOR_jump ? XEXP (SET_SRC (pat), 0)
3822 : SET_SRC (pat));
3823 if (doit)
3824 {
3825 rtx newpat;
3826 if (REG_P (dest))
3827 newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn));
3828 else
3829 newpat = gen_br_true (cond, XEXP (cond, 0), dest);
3830 PATTERN (insn) = newpat;
3831 INSN_CODE (insn) = -1;
3832 }
3833 return true;
3834 }
3835 if (INSN_CODE (insn) == CODE_FOR_br_true)
3836 {
3837 rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0);
3838 return rtx_equal_p (br_cond, cond);
3839 }
3840 if (INSN_CODE (insn) == CODE_FOR_br_false)
3841 {
3842 rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0);
3843 return conditions_opposite_p (br_cond, cond);
3844 }
3845 return false;
3846 }
3847
3848 /* Initialize SC. Used by c6x_init_sched_context and c6x_sched_init. */
3849 static void
init_sched_state(c6x_sched_context_t sc)3850 init_sched_state (c6x_sched_context_t sc)
3851 {
3852 sc->last_scheduled_insn = NULL;
3853 sc->last_scheduled_iter0 = NULL;
3854 sc->issued_this_cycle = 0;
3855 memset (sc->jump_cycles, 0, sizeof sc->jump_cycles);
3856 memset (sc->jump_cond, 0, sizeof sc->jump_cond);
3857 sc->jump_cycle_index = 0;
3858 sc->delays_finished_at = 0;
3859 sc->curr_sched_clock = 0;
3860
3861 sc->prev_cycle_state_ctx = xmalloc (dfa_state_size);
3862
3863 memset (sc->reg_n_accesses, 0, sizeof sc->reg_n_accesses);
3864 memset (sc->reg_n_xaccesses, 0, sizeof sc->reg_n_xaccesses);
3865 memset (sc->reg_set_in_cycle, 0, sizeof sc->reg_set_in_cycle);
3866
3867 state_reset (sc->prev_cycle_state_ctx);
3868 }
3869
3870 /* Allocate store for new scheduling context. */
3871 static void *
c6x_alloc_sched_context(void)3872 c6x_alloc_sched_context (void)
3873 {
3874 return xmalloc (sizeof (struct c6x_sched_context));
3875 }
3876
3877 /* If CLEAN_P is true then initializes _SC with clean data,
3878 and from the global context otherwise. */
3879 static void
c6x_init_sched_context(void * _sc,bool clean_p)3880 c6x_init_sched_context (void *_sc, bool clean_p)
3881 {
3882 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3883
3884 if (clean_p)
3885 {
3886 init_sched_state (sc);
3887 }
3888 else
3889 {
3890 *sc = ss;
3891 sc->prev_cycle_state_ctx = xmalloc (dfa_state_size);
3892 memcpy (sc->prev_cycle_state_ctx, prev_cycle_state, dfa_state_size);
3893 }
3894 }
3895
3896 /* Sets the global scheduling context to the one pointed to by _SC. */
3897 static void
c6x_set_sched_context(void * _sc)3898 c6x_set_sched_context (void *_sc)
3899 {
3900 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3901
3902 gcc_assert (sc != NULL);
3903 ss = *sc;
3904 memcpy (prev_cycle_state, sc->prev_cycle_state_ctx, dfa_state_size);
3905 }
3906
3907 /* Clear data in _SC. */
3908 static void
c6x_clear_sched_context(void * _sc)3909 c6x_clear_sched_context (void *_sc)
3910 {
3911 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3912 gcc_assert (_sc != NULL);
3913
3914 free (sc->prev_cycle_state_ctx);
3915 }
3916
3917 /* Free _SC. */
3918 static void
c6x_free_sched_context(void * _sc)3919 c6x_free_sched_context (void *_sc)
3920 {
3921 free (_sc);
3922 }
3923
3924 /* True if we are currently performing a preliminary scheduling
3925 pass before modulo scheduling; we can't allow the scheduler to
3926 modify instruction patterns using packetization assumptions,
3927 since there will be another scheduling pass later if modulo
3928 scheduling fails. */
3929 static bool in_hwloop;
3930
3931 /* Provide information about speculation capabilities, and set the
3932 DO_BACKTRACKING flag. */
3933 static void
c6x_set_sched_flags(spec_info_t spec_info)3934 c6x_set_sched_flags (spec_info_t spec_info)
3935 {
3936 unsigned int *flags = &(current_sched_info->flags);
3937
3938 if (*flags & SCHED_EBB)
3939 {
3940 *flags |= DO_BACKTRACKING | DO_PREDICATION;
3941 }
3942 if (in_hwloop)
3943 *flags |= DONT_BREAK_DEPENDENCIES;
3944
3945 spec_info->mask = 0;
3946 }
3947
3948 /* Implement the TARGET_SCHED_ISSUE_RATE hook. */
3949
3950 static int
c6x_issue_rate(void)3951 c6x_issue_rate (void)
3952 {
3953 return 8;
3954 }
3955
3956 /* Used together with the collapse_ndfa option, this ensures that we reach a
3957 deterministic automaton state before trying to advance a cycle.
3958 With collapse_ndfa, genautomata creates advance cycle arcs only for
3959 such deterministic states. */
3960
3961 static rtx
c6x_sched_dfa_pre_cycle_insn(void)3962 c6x_sched_dfa_pre_cycle_insn (void)
3963 {
3964 return const0_rtx;
3965 }
3966
3967 /* We're beginning a new block. Initialize data structures as necessary. */
3968
3969 static void
c6x_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)3970 c6x_sched_init (FILE *dump ATTRIBUTE_UNUSED,
3971 int sched_verbose ATTRIBUTE_UNUSED,
3972 int max_ready ATTRIBUTE_UNUSED)
3973 {
3974 if (prev_cycle_state == NULL)
3975 {
3976 prev_cycle_state = xmalloc (dfa_state_size);
3977 }
3978 init_sched_state (&ss);
3979 state_reset (prev_cycle_state);
3980 }
3981
3982 /* We are about to being issuing INSN. Return nonzero if we cannot
3983 issue it on given cycle CLOCK and return zero if we should not sort
3984 the ready queue on the next clock start.
3985 For C6X, we use this function just to copy the previous DFA state
3986 for comparison purposes. */
3987
3988 static int
c6x_dfa_new_cycle(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,rtx_insn * insn ATTRIBUTE_UNUSED,int last_clock ATTRIBUTE_UNUSED,int clock ATTRIBUTE_UNUSED,int * sort_p ATTRIBUTE_UNUSED)3989 c6x_dfa_new_cycle (FILE *dump ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3990 rtx_insn *insn ATTRIBUTE_UNUSED,
3991 int last_clock ATTRIBUTE_UNUSED,
3992 int clock ATTRIBUTE_UNUSED, int *sort_p ATTRIBUTE_UNUSED)
3993 {
3994 if (clock != last_clock)
3995 memcpy (prev_cycle_state, curr_state, dfa_state_size);
3996 return 0;
3997 }
3998
3999 static void
c6x_mark_regno_read(int regno,bool cross)4000 c6x_mark_regno_read (int regno, bool cross)
4001 {
4002 int t = ++ss.tmp_reg_n_accesses[regno];
4003
4004 if (t > 4)
4005 reg_access_stall = true;
4006
4007 if (cross)
4008 {
4009 int set_cycle = ss.reg_set_in_cycle[regno];
4010 /* This must be done in this way rather than by tweaking things in
4011 adjust_cost, since the stall occurs even for insns with opposite
4012 predicates, and the scheduler may not even see a dependency. */
4013 if (set_cycle > 0 && set_cycle == ss.curr_sched_clock)
4014 reg_access_stall = true;
4015 /* This doesn't quite do anything yet as we're only modeling one
4016 x unit. */
4017 ++ss.tmp_reg_n_xaccesses[regno];
4018 }
4019 }
4020
4021 /* Note that REG is read in the insn being examined. If CROSS, it
4022 means the access is through a cross path. Update the temporary reg
4023 access arrays, and set REG_ACCESS_STALL if the insn can't be issued
4024 in the current cycle. */
4025
4026 static void
c6x_mark_reg_read(rtx reg,bool cross)4027 c6x_mark_reg_read (rtx reg, bool cross)
4028 {
4029 unsigned regno = REGNO (reg);
4030 unsigned nregs = hard_regno_nregs[regno][GET_MODE (reg)];
4031
4032 while (nregs-- > 0)
4033 c6x_mark_regno_read (regno + nregs, cross);
4034 }
4035
4036 /* Note that register REG is written in cycle CYCLES. */
4037
4038 static void
c6x_mark_reg_written(rtx reg,int cycles)4039 c6x_mark_reg_written (rtx reg, int cycles)
4040 {
4041 unsigned regno = REGNO (reg);
4042 unsigned nregs = hard_regno_nregs[regno][GET_MODE (reg)];
4043
4044 while (nregs-- > 0)
4045 ss.reg_set_in_cycle[regno + nregs] = cycles;
4046 }
4047
4048 /* Update the register state information for an instruction whose
4049 body is X. Return true if the instruction has to be delayed until the
4050 next cycle. */
4051
4052 static bool
c6x_registers_update(rtx_insn * insn)4053 c6x_registers_update (rtx_insn *insn)
4054 {
4055 enum attr_cross cross;
4056 enum attr_dest_regfile destrf;
4057 int i, nops;
4058 rtx x;
4059
4060 if (!reload_completed || recog_memoized (insn) < 0)
4061 return false;
4062
4063 reg_access_stall = false;
4064 memcpy (ss.tmp_reg_n_accesses, ss.reg_n_accesses,
4065 sizeof ss.tmp_reg_n_accesses);
4066 memcpy (ss.tmp_reg_n_xaccesses, ss.reg_n_xaccesses,
4067 sizeof ss.tmp_reg_n_xaccesses);
4068
4069 extract_insn (insn);
4070
4071 cross = get_attr_cross (insn);
4072 destrf = get_attr_dest_regfile (insn);
4073
4074 nops = recog_data.n_operands;
4075 x = PATTERN (insn);
4076 if (GET_CODE (x) == COND_EXEC)
4077 {
4078 c6x_mark_reg_read (XEXP (XEXP (x, 0), 0), false);
4079 nops -= 2;
4080 }
4081
4082 for (i = 0; i < nops; i++)
4083 {
4084 rtx op = recog_data.operand[i];
4085 if (recog_data.operand_type[i] == OP_OUT)
4086 continue;
4087 if (REG_P (op))
4088 {
4089 bool this_cross = cross;
4090 if (destrf == DEST_REGFILE_A && A_REGNO_P (REGNO (op)))
4091 this_cross = false;
4092 if (destrf == DEST_REGFILE_B && B_REGNO_P (REGNO (op)))
4093 this_cross = false;
4094 c6x_mark_reg_read (op, this_cross);
4095 }
4096 else if (MEM_P (op))
4097 {
4098 op = XEXP (op, 0);
4099 switch (GET_CODE (op))
4100 {
4101 case POST_INC:
4102 case PRE_INC:
4103 case POST_DEC:
4104 case PRE_DEC:
4105 op = XEXP (op, 0);
4106 /* fall through */
4107 case REG:
4108 c6x_mark_reg_read (op, false);
4109 break;
4110 case POST_MODIFY:
4111 case PRE_MODIFY:
4112 op = XEXP (op, 1);
4113 gcc_assert (GET_CODE (op) == PLUS);
4114 /* fall through */
4115 case PLUS:
4116 c6x_mark_reg_read (XEXP (op, 0), false);
4117 if (REG_P (XEXP (op, 1)))
4118 c6x_mark_reg_read (XEXP (op, 1), false);
4119 break;
4120 case SYMBOL_REF:
4121 case LABEL_REF:
4122 case CONST:
4123 c6x_mark_regno_read (REG_B14, false);
4124 break;
4125 default:
4126 gcc_unreachable ();
4127 }
4128 }
4129 else if (!CONSTANT_P (op) && strlen (recog_data.constraints[i]) > 0)
4130 gcc_unreachable ();
4131 }
4132 return reg_access_stall;
4133 }
4134
4135 /* Helper function for the TARGET_SCHED_REORDER and
4136 TARGET_SCHED_REORDER2 hooks. If scheduling an insn would be unsafe
4137 in the current cycle, move it down in the ready list and return the
4138 number of non-unsafe insns. */
4139
4140 static int
c6x_sched_reorder_1(rtx_insn ** ready,int * pn_ready,int clock_var)4141 c6x_sched_reorder_1 (rtx_insn **ready, int *pn_ready, int clock_var)
4142 {
4143 int n_ready = *pn_ready;
4144 rtx_insn **e_ready = ready + n_ready;
4145 rtx_insn **insnp;
4146 int first_jump;
4147
4148 /* Keep track of conflicts due to a limit number of register accesses,
4149 and due to stalls incurred by too early accesses of registers using
4150 cross paths. */
4151
4152 for (insnp = ready; insnp < e_ready; insnp++)
4153 {
4154 rtx_insn *insn = *insnp;
4155 int icode = recog_memoized (insn);
4156 bool is_asm = (icode < 0
4157 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
4158 || asm_noperands (PATTERN (insn)) >= 0));
4159 bool no_parallel = (is_asm || icode == CODE_FOR_sploop
4160 || (icode >= 0
4161 && get_attr_type (insn) == TYPE_ATOMIC));
4162
4163 /* We delay asm insns until all delay slots are exhausted. We can't
4164 accurately tell how many cycles an asm takes, and the main scheduling
4165 code always assumes at least 1 cycle, which may be wrong. */
4166 if ((no_parallel
4167 && (ss.issued_this_cycle > 0 || clock_var < ss.delays_finished_at))
4168 || c6x_registers_update (insn)
4169 || (ss.issued_this_cycle > 0 && icode == CODE_FOR_sploop))
4170 {
4171 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4172 *ready = insn;
4173 n_ready--;
4174 ready++;
4175 }
4176 else if (shadow_p (insn))
4177 {
4178 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4179 *ready = insn;
4180 }
4181 }
4182
4183 /* Ensure that no other jump is scheduled in jump delay slots, since
4184 it would put the machine into the wrong state. Also, we must
4185 avoid scheduling insns that have a latency longer than the
4186 remaining jump delay slots, as the code at the jump destination
4187 won't be prepared for it.
4188
4189 However, we can relax this condition somewhat. The rest of the
4190 scheduler will automatically avoid scheduling an insn on which
4191 the jump shadow depends so late that its side effect happens
4192 after the jump. This means that if we see an insn with a longer
4193 latency here, it can safely be scheduled if we can ensure that it
4194 has a predicate opposite of the previous jump: the side effect
4195 will happen in what we think of as the same basic block. In
4196 c6x_variable_issue, we will record the necessary predicate in
4197 new_conditions, and after scheduling is finished, we will modify
4198 the insn.
4199
4200 Special care must be taken whenever there is more than one jump
4201 in flight. */
4202
4203 first_jump = first_jump_index (clock_var);
4204 if (first_jump != -1)
4205 {
4206 int first_cycle = get_jump_cycle (first_jump);
4207 rtx first_cond = get_jump_cond (first_jump);
4208 int second_cycle = 0;
4209
4210 if (first_jump > 0)
4211 second_cycle = get_jump_cycle (first_jump - 1);
4212
4213 for (insnp = ready; insnp < e_ready; insnp++)
4214 {
4215 rtx_insn *insn = *insnp;
4216 int icode = recog_memoized (insn);
4217 bool is_asm = (icode < 0
4218 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
4219 || asm_noperands (PATTERN (insn)) >= 0));
4220 int this_cycles, rsrv_cycles;
4221 enum attr_type type;
4222
4223 gcc_assert (!is_asm);
4224 if (icode < 0)
4225 continue;
4226 this_cycles = get_attr_cycles (insn);
4227 rsrv_cycles = get_attr_reserve_cycles (insn);
4228 type = get_attr_type (insn);
4229 /* Treat branches specially; there is also a hazard if two jumps
4230 end at the same cycle. */
4231 if (type == TYPE_BRANCH || type == TYPE_CALL)
4232 this_cycles++;
4233 if (clock_var + this_cycles <= first_cycle)
4234 continue;
4235 if ((first_jump > 0 && clock_var + this_cycles > second_cycle)
4236 || clock_var + rsrv_cycles > first_cycle
4237 || !predicate_insn (insn, first_cond, false))
4238 {
4239 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4240 *ready = insn;
4241 n_ready--;
4242 ready++;
4243 }
4244 }
4245 }
4246
4247 return n_ready;
4248 }
4249
4250 /* Implement the TARGET_SCHED_REORDER hook. We save the current clock
4251 for later and clear the register access information for the new
4252 cycle. We also move asm statements out of the way if they would be
4253 scheduled in a delay slot. */
4254
4255 static int
c6x_sched_reorder(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn ** ready ATTRIBUTE_UNUSED,int * pn_ready ATTRIBUTE_UNUSED,int clock_var)4256 c6x_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
4257 int sched_verbose ATTRIBUTE_UNUSED,
4258 rtx_insn **ready ATTRIBUTE_UNUSED,
4259 int *pn_ready ATTRIBUTE_UNUSED, int clock_var)
4260 {
4261 ss.curr_sched_clock = clock_var;
4262 ss.issued_this_cycle = 0;
4263 memset (ss.reg_n_accesses, 0, sizeof ss.reg_n_accesses);
4264 memset (ss.reg_n_xaccesses, 0, sizeof ss.reg_n_xaccesses);
4265
4266 if (ready == NULL)
4267 return 0;
4268
4269 return c6x_sched_reorder_1 (ready, pn_ready, clock_var);
4270 }
4271
4272 /* Implement the TARGET_SCHED_REORDER2 hook. We use this to record the clock
4273 cycle for every insn. */
4274
4275 static int
c6x_sched_reorder2(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn ** ready ATTRIBUTE_UNUSED,int * pn_ready ATTRIBUTE_UNUSED,int clock_var)4276 c6x_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
4277 int sched_verbose ATTRIBUTE_UNUSED,
4278 rtx_insn **ready ATTRIBUTE_UNUSED,
4279 int *pn_ready ATTRIBUTE_UNUSED, int clock_var)
4280 {
4281 /* FIXME: the assembler rejects labels inside an execute packet.
4282 This can occur if prologue insns are scheduled in parallel with
4283 others, so we avoid this here. Also make sure that nothing is
4284 scheduled in parallel with a TYPE_ATOMIC insn or after a jump. */
4285 if (RTX_FRAME_RELATED_P (ss.last_scheduled_insn)
4286 || JUMP_P (ss.last_scheduled_insn)
4287 || (recog_memoized (ss.last_scheduled_insn) >= 0
4288 && get_attr_type (ss.last_scheduled_insn) == TYPE_ATOMIC))
4289 {
4290 int n_ready = *pn_ready;
4291 rtx_insn **e_ready = ready + n_ready;
4292 rtx_insn **insnp;
4293
4294 for (insnp = ready; insnp < e_ready; insnp++)
4295 {
4296 rtx_insn *insn = *insnp;
4297 if (!shadow_p (insn))
4298 {
4299 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4300 *ready = insn;
4301 n_ready--;
4302 ready++;
4303 }
4304 }
4305 return n_ready;
4306 }
4307
4308 return c6x_sched_reorder_1 (ready, pn_ready, clock_var);
4309 }
4310
4311 /* Subroutine of maybe_clobber_cond, called through note_stores. */
4312
4313 static void
clobber_cond_1(rtx x,const_rtx pat ATTRIBUTE_UNUSED,void * data1)4314 clobber_cond_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data1)
4315 {
4316 rtx *cond = (rtx *)data1;
4317 if (*cond != NULL_RTX && reg_overlap_mentioned_p (x, *cond))
4318 *cond = NULL_RTX;
4319 }
4320
4321 /* Examine INSN, and if it destroys the conditions have recorded for
4322 any of the jumps in flight, clear that condition so that we don't
4323 predicate any more insns. CLOCK_VAR helps us limit the search to
4324 only those jumps which are still in flight. */
4325
4326 static void
maybe_clobber_cond(rtx insn,int clock_var)4327 maybe_clobber_cond (rtx insn, int clock_var)
4328 {
4329 int n, idx;
4330 idx = ss.jump_cycle_index;
4331 for (n = 0; n < 12; n++, idx++)
4332 {
4333 rtx cond, link;
4334 int cycle;
4335
4336 if (idx >= 12)
4337 idx -= 12;
4338 cycle = ss.jump_cycles[idx];
4339 if (cycle <= clock_var)
4340 return;
4341
4342 cond = ss.jump_cond[idx];
4343 if (cond == NULL_RTX)
4344 continue;
4345
4346 if (CALL_P (insn))
4347 {
4348 ss.jump_cond[idx] = NULL_RTX;
4349 continue;
4350 }
4351
4352 note_stores (PATTERN (insn), clobber_cond_1, ss.jump_cond + idx);
4353 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
4354 if (REG_NOTE_KIND (link) == REG_INC)
4355 clobber_cond_1 (XEXP (link, 0), NULL_RTX, ss.jump_cond + idx);
4356 }
4357 }
4358
4359 /* Implement the TARGET_SCHED_VARIABLE_ISSUE hook. We are about to
4360 issue INSN. Return the number of insns left on the ready queue
4361 that can be issued this cycle.
4362 We use this hook to record clock cycles and reservations for every insn. */
4363
4364 static int
c6x_variable_issue(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn * insn,int can_issue_more ATTRIBUTE_UNUSED)4365 c6x_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
4366 int sched_verbose ATTRIBUTE_UNUSED,
4367 rtx_insn *insn, int can_issue_more ATTRIBUTE_UNUSED)
4368 {
4369 ss.last_scheduled_insn = insn;
4370 if (INSN_UID (insn) < sploop_max_uid_iter0 && !JUMP_P (insn))
4371 ss.last_scheduled_iter0 = insn;
4372 if (GET_CODE (PATTERN (insn)) != USE && GET_CODE (PATTERN (insn)) != CLOBBER)
4373 ss.issued_this_cycle++;
4374 if (insn_info.exists ())
4375 {
4376 state_t st_after = alloca (dfa_state_size);
4377 int curr_clock = ss.curr_sched_clock;
4378 int uid = INSN_UID (insn);
4379 int icode = recog_memoized (insn);
4380 rtx first_cond;
4381 int first, first_cycle;
4382 unsigned int mask;
4383 int i;
4384
4385 insn_set_clock (insn, curr_clock);
4386 INSN_INFO_ENTRY (uid).ebb_start
4387 = curr_clock == 0 && ss.issued_this_cycle == 1;
4388
4389 first = first_jump_index (ss.curr_sched_clock);
4390 if (first == -1)
4391 {
4392 first_cycle = 0;
4393 first_cond = NULL_RTX;
4394 }
4395 else
4396 {
4397 first_cycle = get_jump_cycle (first);
4398 first_cond = get_jump_cond (first);
4399 }
4400 if (icode >= 0
4401 && first_cycle > curr_clock
4402 && first_cond != NULL_RTX
4403 && (curr_clock + get_attr_cycles (insn) > first_cycle
4404 || get_attr_type (insn) == TYPE_BRANCH
4405 || get_attr_type (insn) == TYPE_CALL))
4406 INSN_INFO_ENTRY (uid).new_cond = first_cond;
4407
4408 memcpy (st_after, curr_state, dfa_state_size);
4409 state_transition (st_after, const0_rtx);
4410
4411 mask = 0;
4412 for (i = 0; i < 2 * UNIT_QID_SIDE_OFFSET; i++)
4413 if (cpu_unit_reservation_p (st_after, c6x_unit_codes[i])
4414 && !cpu_unit_reservation_p (prev_cycle_state, c6x_unit_codes[i]))
4415 mask |= 1 << i;
4416 INSN_INFO_ENTRY (uid).unit_mask = mask;
4417
4418 maybe_clobber_cond (insn, curr_clock);
4419
4420 if (icode >= 0)
4421 {
4422 int i, cycles;
4423
4424 c6x_registers_update (insn);
4425 memcpy (ss.reg_n_accesses, ss.tmp_reg_n_accesses,
4426 sizeof ss.reg_n_accesses);
4427 memcpy (ss.reg_n_xaccesses, ss.tmp_reg_n_accesses,
4428 sizeof ss.reg_n_xaccesses);
4429
4430 cycles = get_attr_cycles (insn);
4431 if (ss.delays_finished_at < ss.curr_sched_clock + cycles)
4432 ss.delays_finished_at = ss.curr_sched_clock + cycles;
4433 if (get_attr_type (insn) == TYPE_BRANCH
4434 || get_attr_type (insn) == TYPE_CALL)
4435 {
4436 rtx opposite = condjump_opposite_condition (insn);
4437 record_jump (ss.curr_sched_clock + cycles, opposite);
4438 }
4439
4440 /* Mark the cycles in which the destination registers are written.
4441 This is used for calculating stalls when using cross units. */
4442 extract_insn (insn);
4443 /* Cross-path stalls don't apply to results of load insns. */
4444 if (get_attr_type (insn) == TYPE_LOAD
4445 || get_attr_type (insn) == TYPE_LOADN
4446 || get_attr_type (insn) == TYPE_LOAD_SHADOW)
4447 cycles--;
4448 for (i = 0; i < recog_data.n_operands; i++)
4449 {
4450 rtx op = recog_data.operand[i];
4451 if (MEM_P (op))
4452 {
4453 rtx addr = XEXP (op, 0);
4454 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4455 c6x_mark_reg_written (XEXP (addr, 0),
4456 insn_uid_get_clock (uid) + 1);
4457 }
4458 if (recog_data.operand_type[i] != OP_IN
4459 && REG_P (op))
4460 {
4461 c6x_mark_reg_written (op,
4462 insn_uid_get_clock (uid) + cycles);
4463 }
4464 }
4465 }
4466 }
4467 return can_issue_more;
4468 }
4469
4470 /* Implement the TARGET_SCHED_ADJUST_COST hook. We need special handling for
4471 anti- and output dependencies. */
4472
4473 static int
c6x_adjust_cost(rtx_insn * insn,rtx link,rtx_insn * dep_insn,int cost)4474 c6x_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
4475 {
4476 enum attr_type insn_type = TYPE_UNKNOWN, dep_insn_type = TYPE_UNKNOWN;
4477 int dep_insn_code_number, insn_code_number;
4478 int shadow_bonus = 0;
4479 enum reg_note kind;
4480 dep_insn_code_number = recog_memoized (dep_insn);
4481 insn_code_number = recog_memoized (insn);
4482
4483 if (dep_insn_code_number >= 0)
4484 dep_insn_type = get_attr_type (dep_insn);
4485
4486 if (insn_code_number >= 0)
4487 insn_type = get_attr_type (insn);
4488
4489 kind = REG_NOTE_KIND (link);
4490 if (kind == 0)
4491 {
4492 /* If we have a dependency on a load, and it's not for the result of
4493 the load, it must be for an autoincrement. Reduce the cost in that
4494 case. */
4495 if (dep_insn_type == TYPE_LOAD)
4496 {
4497 rtx set = PATTERN (dep_insn);
4498 if (GET_CODE (set) == COND_EXEC)
4499 set = COND_EXEC_CODE (set);
4500 if (GET_CODE (set) == UNSPEC)
4501 cost = 1;
4502 else
4503 {
4504 gcc_assert (GET_CODE (set) == SET);
4505 if (!reg_overlap_mentioned_p (SET_DEST (set), PATTERN (insn)))
4506 cost = 1;
4507 }
4508 }
4509 }
4510
4511 /* A jump shadow needs to have its latency decreased by one. Conceptually,
4512 it occurs in between two cycles, but we schedule it at the end of the
4513 first cycle. */
4514 if (shadow_type_p (insn_type))
4515 shadow_bonus = 1;
4516
4517 /* Anti and output dependencies usually have zero cost, but we want
4518 to insert a stall after a jump, and after certain floating point
4519 insns that take more than one cycle to read their inputs. In the
4520 future, we should try to find a better algorithm for scheduling
4521 jumps. */
4522 if (kind != 0)
4523 {
4524 /* We can get anti-dependencies against shadow insns. Treat these
4525 like output dependencies, so that the insn is entirely finished
4526 before the branch takes place. */
4527 if (kind == REG_DEP_ANTI && insn_type == TYPE_SHADOW)
4528 kind = REG_DEP_OUTPUT;
4529 switch (dep_insn_type)
4530 {
4531 case TYPE_CALLP:
4532 return 1;
4533 case TYPE_BRANCH:
4534 case TYPE_CALL:
4535 if (get_attr_has_shadow (dep_insn) == HAS_SHADOW_Y)
4536 /* This is a real_jump/real_call insn. These don't have
4537 outputs, and ensuring the validity of scheduling things
4538 in the delay slot is the job of
4539 c6x_sched_reorder_1. */
4540 return 0;
4541 /* Unsplit calls can happen - e.g. for divide insns. */
4542 return 6;
4543 case TYPE_LOAD:
4544 case TYPE_LOADN:
4545 case TYPE_INTDP:
4546 if (kind == REG_DEP_OUTPUT)
4547 return 5 - shadow_bonus;
4548 return 0;
4549 case TYPE_MPY4:
4550 case TYPE_FP4:
4551 if (kind == REG_DEP_OUTPUT)
4552 return 4 - shadow_bonus;
4553 return 0;
4554 case TYPE_MPY2:
4555 if (kind == REG_DEP_OUTPUT)
4556 return 2 - shadow_bonus;
4557 return 0;
4558 case TYPE_CMPDP:
4559 if (kind == REG_DEP_OUTPUT)
4560 return 2 - shadow_bonus;
4561 return 2;
4562 case TYPE_ADDDP:
4563 case TYPE_MPYSPDP:
4564 if (kind == REG_DEP_OUTPUT)
4565 return 7 - shadow_bonus;
4566 return 2;
4567 case TYPE_MPYSP2DP:
4568 if (kind == REG_DEP_OUTPUT)
4569 return 5 - shadow_bonus;
4570 return 2;
4571 case TYPE_MPYI:
4572 if (kind == REG_DEP_OUTPUT)
4573 return 9 - shadow_bonus;
4574 return 4;
4575 case TYPE_MPYID:
4576 case TYPE_MPYDP:
4577 if (kind == REG_DEP_OUTPUT)
4578 return 10 - shadow_bonus;
4579 return 4;
4580
4581 default:
4582 if (insn_type == TYPE_SPKERNEL)
4583 return 0;
4584 if (kind == REG_DEP_OUTPUT)
4585 return 1 - shadow_bonus;
4586
4587 return 0;
4588 }
4589 }
4590
4591 return cost - shadow_bonus;
4592 }
4593
4594 /* Create a SEQUENCE rtx to replace the instructions in SLOT, of which there
4595 are N_FILLED. REAL_FIRST identifies the slot if the insn that appears
4596 first in the original stream. */
4597
4598 static void
gen_one_bundle(rtx_insn ** slot,int n_filled,int real_first)4599 gen_one_bundle (rtx_insn **slot, int n_filled, int real_first)
4600 {
4601 rtx seq;
4602 rtx_insn *bundle;
4603 rtx_insn *t;
4604 int i;
4605
4606 seq = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (n_filled, slot));
4607 bundle = make_insn_raw (seq);
4608 BLOCK_FOR_INSN (bundle) = BLOCK_FOR_INSN (slot[0]);
4609 INSN_LOCATION (bundle) = INSN_LOCATION (slot[0]);
4610 SET_PREV_INSN (bundle) = SET_PREV_INSN (slot[real_first]);
4611
4612 t = NULL;
4613
4614 for (i = 0; i < n_filled; i++)
4615 {
4616 rtx_insn *insn = slot[i];
4617 remove_insn (insn);
4618 SET_PREV_INSN (insn) = t ? t : PREV_INSN (bundle);
4619 if (t != NULL_RTX)
4620 SET_NEXT_INSN (t) = insn;
4621 t = insn;
4622 if (i > 0)
4623 INSN_LOCATION (slot[i]) = INSN_LOCATION (bundle);
4624 }
4625
4626 SET_NEXT_INSN (bundle) = NEXT_INSN (PREV_INSN (bundle));
4627 SET_NEXT_INSN (t) = NEXT_INSN (bundle);
4628 SET_NEXT_INSN (PREV_INSN (bundle)) = bundle;
4629 SET_PREV_INSN (NEXT_INSN (bundle)) = bundle;
4630 }
4631
4632 /* Move all parallel instructions into SEQUENCEs, so that no subsequent passes
4633 try to insert labels in the middle. */
4634
4635 static void
c6x_gen_bundles(void)4636 c6x_gen_bundles (void)
4637 {
4638 basic_block bb;
4639 rtx_insn *insn, *next, *last_call;
4640
4641 FOR_EACH_BB_FN (bb, cfun)
4642 {
4643 rtx_insn *insn, *next;
4644 /* The machine is eight insns wide. We can have up to six shadow
4645 insns, plus an extra slot for merging the jump shadow. */
4646 rtx_insn *slot[15];
4647 int n_filled = 0;
4648 int first_slot = 0;
4649
4650 for (insn = BB_HEAD (bb);; insn = next)
4651 {
4652 int at_end;
4653 rtx delete_this = NULL_RTX;
4654
4655 if (NONDEBUG_INSN_P (insn))
4656 {
4657 /* Put calls at the start of the sequence. */
4658 if (CALL_P (insn))
4659 {
4660 first_slot++;
4661 if (n_filled)
4662 {
4663 memmove (&slot[1], &slot[0],
4664 n_filled * sizeof (slot[0]));
4665 }
4666 if (!shadow_p (insn))
4667 {
4668 PUT_MODE (insn, TImode);
4669 if (n_filled)
4670 PUT_MODE (slot[1], VOIDmode);
4671 }
4672 n_filled++;
4673 slot[0] = insn;
4674 }
4675 else
4676 {
4677 slot[n_filled++] = insn;
4678 }
4679 }
4680
4681 next = NEXT_INSN (insn);
4682 while (next && insn != BB_END (bb)
4683 && !(NONDEBUG_INSN_P (next)
4684 && GET_CODE (PATTERN (next)) != USE
4685 && GET_CODE (PATTERN (next)) != CLOBBER))
4686 {
4687 insn = next;
4688 next = NEXT_INSN (insn);
4689 }
4690
4691 at_end = insn == BB_END (bb);
4692 if (delete_this == NULL_RTX
4693 && (at_end || (GET_MODE (next) == TImode
4694 && !(shadow_p (next) && CALL_P (next)))))
4695 {
4696 if (n_filled >= 2)
4697 gen_one_bundle (slot, n_filled, first_slot);
4698
4699 n_filled = 0;
4700 first_slot = 0;
4701 }
4702 if (at_end)
4703 break;
4704 }
4705 }
4706 /* Bundling, and emitting nops, can separate
4707 NOTE_INSN_CALL_ARG_LOCATION from the corresponding calls. Fix
4708 that up here. */
4709 last_call = NULL;
4710 for (insn = get_insns (); insn; insn = next)
4711 {
4712 next = NEXT_INSN (insn);
4713 if (CALL_P (insn)
4714 || (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE
4715 && CALL_P (XVECEXP (PATTERN (insn), 0, 0))))
4716 last_call = insn;
4717 if (!NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_CALL_ARG_LOCATION)
4718 continue;
4719 if (NEXT_INSN (last_call) == insn)
4720 continue;
4721 SET_NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
4722 SET_PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
4723 SET_PREV_INSN (insn) = last_call;
4724 SET_NEXT_INSN (insn) = NEXT_INSN (last_call);
4725 SET_PREV_INSN (NEXT_INSN (insn)) = insn;
4726 SET_NEXT_INSN (PREV_INSN (insn)) = insn;
4727 last_call = insn;
4728 }
4729 }
4730
4731 /* Emit a NOP instruction for CYCLES cycles after insn AFTER. Return it. */
4732
4733 static rtx_insn *
emit_nop_after(int cycles,rtx after)4734 emit_nop_after (int cycles, rtx after)
4735 {
4736 rtx_insn *insn;
4737
4738 /* mpydp has 9 delay slots, and we may schedule a stall for a cross-path
4739 operation. We don't need the extra NOP since in this case, the hardware
4740 will automatically insert the required stall. */
4741 if (cycles == 10)
4742 cycles--;
4743
4744 gcc_assert (cycles < 10);
4745
4746 insn = emit_insn_after (gen_nop_count (GEN_INT (cycles)), after);
4747 PUT_MODE (insn, TImode);
4748
4749 return insn;
4750 }
4751
4752 /* Determine whether INSN is a call that needs to have a return label
4753 placed. */
4754
4755 static bool
returning_call_p(rtx_insn * insn)4756 returning_call_p (rtx_insn *insn)
4757 {
4758 if (CALL_P (insn))
4759 return (!SIBLING_CALL_P (insn)
4760 && get_attr_type (insn) != TYPE_CALLP
4761 && get_attr_type (insn) != TYPE_SHADOW);
4762 if (recog_memoized (insn) < 0)
4763 return false;
4764 if (get_attr_type (insn) == TYPE_CALL)
4765 return true;
4766 return false;
4767 }
4768
4769 /* Determine whether INSN's pattern can be converted to use callp. */
4770 static bool
can_use_callp(rtx_insn * insn)4771 can_use_callp (rtx_insn *insn)
4772 {
4773 int icode = recog_memoized (insn);
4774 if (!TARGET_INSNS_64PLUS
4775 || icode < 0
4776 || GET_CODE (PATTERN (insn)) == COND_EXEC)
4777 return false;
4778
4779 return ((icode == CODE_FOR_real_call
4780 || icode == CODE_FOR_call_internal
4781 || icode == CODE_FOR_call_value_internal)
4782 && get_attr_dest_regfile (insn) == DEST_REGFILE_ANY);
4783 }
4784
4785 /* Convert the pattern of INSN, which must be a CALL_INSN, into a callp. */
4786 static void
convert_to_callp(rtx_insn * insn)4787 convert_to_callp (rtx_insn *insn)
4788 {
4789 rtx lab;
4790 extract_insn (insn);
4791 if (GET_CODE (PATTERN (insn)) == SET)
4792 {
4793 rtx dest = recog_data.operand[0];
4794 lab = recog_data.operand[1];
4795 PATTERN (insn) = gen_callp_value (dest, lab);
4796 INSN_CODE (insn) = CODE_FOR_callp_value;
4797 }
4798 else
4799 {
4800 lab = recog_data.operand[0];
4801 PATTERN (insn) = gen_callp (lab);
4802 INSN_CODE (insn) = CODE_FOR_callp;
4803 }
4804 }
4805
4806 /* Scan forwards from INSN until we find the next insn that has mode TImode
4807 (indicating it starts a new cycle), and occurs in cycle CLOCK.
4808 Return it if we find such an insn, NULL_RTX otherwise. */
4809 static rtx
find_next_cycle_insn(rtx insn,int clock)4810 find_next_cycle_insn (rtx insn, int clock)
4811 {
4812 rtx t = insn;
4813 if (GET_MODE (t) == TImode)
4814 t = next_real_insn (t);
4815 while (t && GET_MODE (t) != TImode)
4816 t = next_real_insn (t);
4817
4818 if (t && insn_get_clock (t) == clock)
4819 return t;
4820 return NULL_RTX;
4821 }
4822
4823 /* If COND_INSN has a COND_EXEC condition, wrap the same condition
4824 around PAT. Return PAT either unchanged or modified in this
4825 way. */
4826 static rtx
duplicate_cond(rtx pat,rtx cond_insn)4827 duplicate_cond (rtx pat, rtx cond_insn)
4828 {
4829 rtx cond_pat = PATTERN (cond_insn);
4830 if (GET_CODE (cond_pat) == COND_EXEC)
4831 pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (cond_pat)),
4832 pat);
4833 return pat;
4834 }
4835
4836 /* Walk forward from INSN to find the last insn that issues in the same clock
4837 cycle. */
4838 static rtx
find_last_same_clock(rtx insn)4839 find_last_same_clock (rtx insn)
4840 {
4841 rtx retval = insn;
4842 rtx_insn *t = next_real_insn (insn);
4843
4844 while (t && GET_MODE (t) != TImode)
4845 {
4846 if (!DEBUG_INSN_P (t) && recog_memoized (t) >= 0)
4847 retval = t;
4848 t = next_real_insn (t);
4849 }
4850 return retval;
4851 }
4852
4853 /* For every call insn in the function, emit code to load the return
4854 address. For each call we create a return label and store it in
4855 CALL_LABELS. If are not scheduling, we emit the labels here,
4856 otherwise the caller will do it later.
4857 This function is called after final insn scheduling, but before creating
4858 the SEQUENCEs that represent execute packets. */
4859
4860 static void
reorg_split_calls(rtx * call_labels)4861 reorg_split_calls (rtx *call_labels)
4862 {
4863 unsigned int reservation_mask = 0;
4864 rtx_insn *insn = get_insns ();
4865 gcc_assert (NOTE_P (insn));
4866 insn = next_real_insn (insn);
4867 while (insn)
4868 {
4869 int uid;
4870 rtx_insn *next = next_real_insn (insn);
4871
4872 if (DEBUG_INSN_P (insn))
4873 goto done;
4874
4875 if (GET_MODE (insn) == TImode)
4876 reservation_mask = 0;
4877 uid = INSN_UID (insn);
4878 if (c6x_flag_schedule_insns2 && recog_memoized (insn) >= 0)
4879 reservation_mask |= 1 << INSN_INFO_ENTRY (uid).reservation;
4880
4881 if (returning_call_p (insn))
4882 {
4883 rtx label = gen_label_rtx ();
4884 rtx labelref = gen_rtx_LABEL_REF (Pmode, label);
4885 rtx reg = gen_rtx_REG (SImode, RETURN_ADDR_REGNO);
4886
4887 LABEL_NUSES (label) = 2;
4888 if (!c6x_flag_schedule_insns2)
4889 {
4890 if (can_use_callp (insn))
4891 convert_to_callp (insn);
4892 else
4893 {
4894 rtx t;
4895 rtx_insn *slot[4];
4896 emit_label_after (label, insn);
4897
4898 /* Bundle the call and its delay slots into a single
4899 SEQUENCE. While these do not issue in parallel
4900 we need to group them into a single EH region. */
4901 slot[0] = insn;
4902 PUT_MODE (insn, TImode);
4903 if (TARGET_INSNS_64)
4904 {
4905 t = gen_addkpc (reg, labelref, GEN_INT (4));
4906 slot[1] = emit_insn_after (duplicate_cond (t, insn),
4907 insn);
4908 PUT_MODE (slot[1], TImode);
4909 gen_one_bundle (slot, 2, 0);
4910 }
4911 else
4912 {
4913 slot[3] = emit_insn_after (gen_nop_count (GEN_INT (3)),
4914 insn);
4915 PUT_MODE (slot[3], TImode);
4916 t = gen_movsi_lo_sum (reg, reg, labelref);
4917 slot[2] = emit_insn_after (duplicate_cond (t, insn),
4918 insn);
4919 PUT_MODE (slot[2], TImode);
4920 t = gen_movsi_high (reg, labelref);
4921 slot[1] = emit_insn_after (duplicate_cond (t, insn),
4922 insn);
4923 PUT_MODE (slot[1], TImode);
4924 gen_one_bundle (slot, 4, 0);
4925 }
4926 }
4927 }
4928 else
4929 {
4930 /* If we scheduled, we reserved the .S2 unit for one or two
4931 cycles after the call. Emit the insns in these slots,
4932 unless it's possible to create a CALLP insn.
4933 Note that this works because the dependencies ensure that
4934 no insn setting/using B3 is scheduled in the delay slots of
4935 a call. */
4936 int this_clock = insn_get_clock (insn);
4937 rtx last_same_clock;
4938 rtx after1;
4939
4940 call_labels[INSN_UID (insn)] = label;
4941
4942 last_same_clock = find_last_same_clock (insn);
4943
4944 if (can_use_callp (insn))
4945 {
4946 /* Find the first insn of the next execute packet. If it
4947 is the shadow insn corresponding to this call, we may
4948 use a CALLP insn. */
4949 rtx_insn *shadow =
4950 next_nonnote_nondebug_insn (last_same_clock);
4951
4952 if (CALL_P (shadow)
4953 && insn_get_clock (shadow) == this_clock + 5)
4954 {
4955 convert_to_callp (shadow);
4956 insn_set_clock (shadow, this_clock);
4957 INSN_INFO_ENTRY (INSN_UID (shadow)).reservation
4958 = RESERVATION_S2;
4959 INSN_INFO_ENTRY (INSN_UID (shadow)).unit_mask
4960 = INSN_INFO_ENTRY (INSN_UID (last_same_clock)).unit_mask;
4961 if (GET_MODE (insn) == TImode)
4962 {
4963 rtx_insn *new_cycle_first = NEXT_INSN (insn);
4964 while (!NONDEBUG_INSN_P (new_cycle_first)
4965 || GET_CODE (PATTERN (new_cycle_first)) == USE
4966 || GET_CODE (PATTERN (new_cycle_first)) == CLOBBER)
4967 new_cycle_first = NEXT_INSN (new_cycle_first);
4968 PUT_MODE (new_cycle_first, TImode);
4969 if (new_cycle_first != shadow)
4970 PUT_MODE (shadow, VOIDmode);
4971 INSN_INFO_ENTRY (INSN_UID (new_cycle_first)).ebb_start
4972 = INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start;
4973 }
4974 else
4975 PUT_MODE (shadow, VOIDmode);
4976 delete_insn (insn);
4977 goto done;
4978 }
4979 }
4980 after1 = find_next_cycle_insn (last_same_clock, this_clock + 1);
4981 if (after1 == NULL_RTX)
4982 after1 = last_same_clock;
4983 else
4984 after1 = find_last_same_clock (after1);
4985 if (TARGET_INSNS_64)
4986 {
4987 rtx x1 = gen_addkpc (reg, labelref, const0_rtx);
4988 x1 = emit_insn_after (duplicate_cond (x1, insn), after1);
4989 insn_set_clock (x1, this_clock + 1);
4990 INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2;
4991 if (after1 == last_same_clock)
4992 PUT_MODE (x1, TImode);
4993 else
4994 INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask
4995 = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask;
4996 }
4997 else
4998 {
4999 rtx x1, x2;
5000 rtx after2 = find_next_cycle_insn (after1, this_clock + 2);
5001 if (after2 == NULL_RTX)
5002 after2 = after1;
5003 x2 = gen_movsi_lo_sum (reg, reg, labelref);
5004 x2 = emit_insn_after (duplicate_cond (x2, insn), after2);
5005 x1 = gen_movsi_high (reg, labelref);
5006 x1 = emit_insn_after (duplicate_cond (x1, insn), after1);
5007 insn_set_clock (x1, this_clock + 1);
5008 insn_set_clock (x2, this_clock + 2);
5009 INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2;
5010 INSN_INFO_ENTRY (INSN_UID (x2)).reservation = RESERVATION_S2;
5011 if (after1 == last_same_clock)
5012 PUT_MODE (x1, TImode);
5013 else
5014 INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask
5015 = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask;
5016 if (after1 == after2)
5017 PUT_MODE (x2, TImode);
5018 else
5019 INSN_INFO_ENTRY (INSN_UID (x2)).unit_mask
5020 = INSN_INFO_ENTRY (INSN_UID (after2)).unit_mask;
5021 }
5022 }
5023 }
5024 done:
5025 insn = next;
5026 }
5027 }
5028
5029 /* Called as part of c6x_reorg. This function emits multi-cycle NOP
5030 insns as required for correctness. CALL_LABELS is the array that
5031 holds the return labels for call insns; we emit these here if
5032 scheduling was run earlier. */
5033
5034 static void
reorg_emit_nops(rtx * call_labels)5035 reorg_emit_nops (rtx *call_labels)
5036 {
5037 bool first;
5038 rtx last_call;
5039 rtx_insn *prev;
5040 int prev_clock, earliest_bb_end;
5041 int prev_implicit_nops;
5042 rtx_insn *insn = get_insns ();
5043
5044 /* We look at one insn (or bundle inside a sequence) in each iteration, storing
5045 its issue time in PREV_CLOCK for the next iteration. If there is a gap in
5046 clocks, we must insert a NOP.
5047 EARLIEST_BB_END tracks in which cycle all insns that have been issued in the
5048 current basic block will finish. We must not allow the next basic block to
5049 begin before this cycle.
5050 PREV_IMPLICIT_NOPS tells us whether we've seen an insn that implicitly contains
5051 a multi-cycle nop. The code is scheduled such that subsequent insns will
5052 show the cycle gap, but we needn't insert a real NOP instruction. */
5053 insn = next_real_insn (insn);
5054 last_call = prev = NULL;
5055 prev_clock = -1;
5056 earliest_bb_end = 0;
5057 prev_implicit_nops = 0;
5058 first = true;
5059 while (insn)
5060 {
5061 int this_clock = -1;
5062 rtx_insn *next;
5063 int max_cycles = 0;
5064
5065 next = next_real_insn (insn);
5066
5067 if (DEBUG_INSN_P (insn)
5068 || GET_CODE (PATTERN (insn)) == USE
5069 || GET_CODE (PATTERN (insn)) == CLOBBER
5070 || shadow_or_blockage_p (insn)
5071 || JUMP_TABLE_DATA_P (insn))
5072 goto next_insn;
5073
5074 if (!c6x_flag_schedule_insns2)
5075 /* No scheduling; ensure that no parallel issue happens. */
5076 PUT_MODE (insn, TImode);
5077 else
5078 {
5079 int cycles;
5080
5081 this_clock = insn_get_clock (insn);
5082 if (this_clock != prev_clock)
5083 {
5084 PUT_MODE (insn, TImode);
5085
5086 if (!first)
5087 {
5088 cycles = this_clock - prev_clock;
5089
5090 cycles -= prev_implicit_nops;
5091 if (cycles > 1)
5092 {
5093 rtx nop = emit_nop_after (cycles - 1, prev);
5094 insn_set_clock (nop, prev_clock + prev_implicit_nops + 1);
5095 }
5096 }
5097 prev_clock = this_clock;
5098
5099 if (last_call
5100 && insn_get_clock (last_call) + 6 <= this_clock)
5101 {
5102 emit_label_before (call_labels[INSN_UID (last_call)], insn);
5103 last_call = NULL_RTX;
5104 }
5105 prev_implicit_nops = 0;
5106 }
5107 }
5108
5109 /* Examine how many cycles the current insn takes, and adjust
5110 LAST_CALL, EARLIEST_BB_END and PREV_IMPLICIT_NOPS. */
5111 if (recog_memoized (insn) >= 0
5112 /* If not scheduling, we've emitted NOPs after calls already. */
5113 && (c6x_flag_schedule_insns2 || !returning_call_p (insn)))
5114 {
5115 max_cycles = get_attr_cycles (insn);
5116 if (get_attr_type (insn) == TYPE_CALLP)
5117 prev_implicit_nops = 5;
5118 }
5119 else
5120 max_cycles = 1;
5121 if (returning_call_p (insn))
5122 last_call = insn;
5123
5124 if (c6x_flag_schedule_insns2)
5125 {
5126 gcc_assert (this_clock >= 0);
5127 if (earliest_bb_end < this_clock + max_cycles)
5128 earliest_bb_end = this_clock + max_cycles;
5129 }
5130 else if (max_cycles > 1)
5131 emit_nop_after (max_cycles - 1, insn);
5132
5133 prev = insn;
5134 first = false;
5135
5136 next_insn:
5137 if (c6x_flag_schedule_insns2
5138 && (next == NULL_RTX
5139 || (GET_MODE (next) == TImode
5140 && INSN_INFO_ENTRY (INSN_UID (next)).ebb_start))
5141 && earliest_bb_end > 0)
5142 {
5143 int cycles = earliest_bb_end - prev_clock;
5144 if (cycles > 1)
5145 {
5146 prev = emit_nop_after (cycles - 1, prev);
5147 insn_set_clock (prev, prev_clock + prev_implicit_nops + 1);
5148 }
5149 earliest_bb_end = 0;
5150 prev_clock = -1;
5151 first = true;
5152
5153 if (last_call)
5154 emit_label_after (call_labels[INSN_UID (last_call)], prev);
5155 last_call = NULL_RTX;
5156 }
5157 insn = next;
5158 }
5159 }
5160
5161 /* If possible, split INSN, which we know is either a jump or a call, into a real
5162 insn and its shadow. */
5163 static void
split_delayed_branch(rtx_insn * insn)5164 split_delayed_branch (rtx_insn *insn)
5165 {
5166 int code = recog_memoized (insn);
5167 rtx_insn *i1;
5168 rtx newpat;
5169 rtx pat = PATTERN (insn);
5170
5171 if (GET_CODE (pat) == COND_EXEC)
5172 pat = COND_EXEC_CODE (pat);
5173
5174 if (CALL_P (insn))
5175 {
5176 rtx src = pat, dest = NULL_RTX;
5177 rtx callee;
5178 if (GET_CODE (pat) == SET)
5179 {
5180 dest = SET_DEST (pat);
5181 src = SET_SRC (pat);
5182 }
5183 callee = XEXP (XEXP (src, 0), 0);
5184 if (SIBLING_CALL_P (insn))
5185 {
5186 if (REG_P (callee))
5187 newpat = gen_indirect_sibcall_shadow ();
5188 else
5189 newpat = gen_sibcall_shadow (callee);
5190 pat = gen_real_jump (callee);
5191 }
5192 else if (dest != NULL_RTX)
5193 {
5194 if (REG_P (callee))
5195 newpat = gen_indirect_call_value_shadow (dest);
5196 else
5197 newpat = gen_call_value_shadow (dest, callee);
5198 pat = gen_real_call (callee);
5199 }
5200 else
5201 {
5202 if (REG_P (callee))
5203 newpat = gen_indirect_call_shadow ();
5204 else
5205 newpat = gen_call_shadow (callee);
5206 pat = gen_real_call (callee);
5207 }
5208 pat = duplicate_cond (pat, insn);
5209 newpat = duplicate_cond (newpat, insn);
5210 }
5211 else
5212 {
5213 rtx src, op;
5214 if (GET_CODE (pat) == PARALLEL
5215 && GET_CODE (XVECEXP (pat, 0, 0)) == RETURN)
5216 {
5217 newpat = gen_return_shadow ();
5218 pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0));
5219 newpat = duplicate_cond (newpat, insn);
5220 }
5221 else
5222 switch (code)
5223 {
5224 case CODE_FOR_br_true:
5225 case CODE_FOR_br_false:
5226 src = SET_SRC (pat);
5227 op = XEXP (src, code == CODE_FOR_br_true ? 1 : 2);
5228 newpat = gen_condjump_shadow (op);
5229 pat = gen_real_jump (op);
5230 if (code == CODE_FOR_br_true)
5231 pat = gen_rtx_COND_EXEC (VOIDmode, XEXP (src, 0), pat);
5232 else
5233 pat = gen_rtx_COND_EXEC (VOIDmode,
5234 reversed_comparison (XEXP (src, 0),
5235 VOIDmode),
5236 pat);
5237 break;
5238
5239 case CODE_FOR_jump:
5240 op = SET_SRC (pat);
5241 newpat = gen_jump_shadow (op);
5242 break;
5243
5244 case CODE_FOR_indirect_jump:
5245 newpat = gen_indirect_jump_shadow ();
5246 break;
5247
5248 case CODE_FOR_return_internal:
5249 newpat = gen_return_shadow ();
5250 pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0));
5251 break;
5252
5253 default:
5254 return;
5255 }
5256 }
5257 i1 = emit_insn_before (pat, insn);
5258 PATTERN (insn) = newpat;
5259 INSN_CODE (insn) = -1;
5260 record_delay_slot_pair (i1, insn, 5, 0);
5261 }
5262
5263 /* If INSN is a multi-cycle insn that should be handled properly in
5264 modulo-scheduling, split it into a real insn and a shadow.
5265 Return true if we made a change.
5266
5267 It is valid for us to fail to split an insn; the caller has to deal
5268 with the possibility. Currently we handle loads and most mpy2 and
5269 mpy4 insns. */
5270 static bool
split_delayed_nonbranch(rtx_insn * insn)5271 split_delayed_nonbranch (rtx_insn *insn)
5272 {
5273 int code = recog_memoized (insn);
5274 enum attr_type type;
5275 rtx_insn *i1;
5276 rtx newpat, src, dest;
5277 rtx pat = PATTERN (insn);
5278 rtvec rtv;
5279 int delay;
5280
5281 if (GET_CODE (pat) == COND_EXEC)
5282 pat = COND_EXEC_CODE (pat);
5283
5284 if (code < 0 || GET_CODE (pat) != SET)
5285 return false;
5286 src = SET_SRC (pat);
5287 dest = SET_DEST (pat);
5288 if (!REG_P (dest))
5289 return false;
5290
5291 type = get_attr_type (insn);
5292 if (code >= 0
5293 && (type == TYPE_LOAD
5294 || type == TYPE_LOADN))
5295 {
5296 if (!MEM_P (src)
5297 && (GET_CODE (src) != ZERO_EXTEND
5298 || !MEM_P (XEXP (src, 0))))
5299 return false;
5300
5301 if (GET_MODE_SIZE (GET_MODE (dest)) > 4
5302 && (GET_MODE_SIZE (GET_MODE (dest)) != 8 || !TARGET_LDDW))
5303 return false;
5304
5305 rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))),
5306 SET_SRC (pat));
5307 newpat = gen_load_shadow (SET_DEST (pat));
5308 pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_LOAD);
5309 delay = 4;
5310 }
5311 else if (code >= 0
5312 && (type == TYPE_MPY2
5313 || type == TYPE_MPY4))
5314 {
5315 /* We don't handle floating point multiplies yet. */
5316 if (GET_MODE (dest) == SFmode)
5317 return false;
5318
5319 rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))),
5320 SET_SRC (pat));
5321 newpat = gen_mult_shadow (SET_DEST (pat));
5322 pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_MULT);
5323 delay = type == TYPE_MPY2 ? 1 : 3;
5324 }
5325 else
5326 return false;
5327
5328 pat = duplicate_cond (pat, insn);
5329 newpat = duplicate_cond (newpat, insn);
5330 i1 = emit_insn_before (pat, insn);
5331 PATTERN (insn) = newpat;
5332 INSN_CODE (insn) = -1;
5333 recog_memoized (insn);
5334 recog_memoized (i1);
5335 record_delay_slot_pair (i1, insn, delay, 0);
5336 return true;
5337 }
5338
5339 /* Examine if INSN is the result of splitting a load into a real load and a
5340 shadow, and if so, undo the transformation. */
5341 static void
undo_split_delayed_nonbranch(rtx_insn * insn)5342 undo_split_delayed_nonbranch (rtx_insn *insn)
5343 {
5344 int icode = recog_memoized (insn);
5345 enum attr_type type;
5346 rtx prev_pat, insn_pat;
5347 rtx_insn *prev;
5348
5349 if (icode < 0)
5350 return;
5351 type = get_attr_type (insn);
5352 if (type != TYPE_LOAD_SHADOW && type != TYPE_MULT_SHADOW)
5353 return;
5354 prev = PREV_INSN (insn);
5355 prev_pat = PATTERN (prev);
5356 insn_pat = PATTERN (insn);
5357 if (GET_CODE (prev_pat) == COND_EXEC)
5358 {
5359 prev_pat = COND_EXEC_CODE (prev_pat);
5360 insn_pat = COND_EXEC_CODE (insn_pat);
5361 }
5362
5363 gcc_assert (GET_CODE (prev_pat) == UNSPEC
5364 && ((XINT (prev_pat, 1) == UNSPEC_REAL_LOAD
5365 && type == TYPE_LOAD_SHADOW)
5366 || (XINT (prev_pat, 1) == UNSPEC_REAL_MULT
5367 && type == TYPE_MULT_SHADOW)));
5368 insn_pat = gen_rtx_SET (SET_DEST (insn_pat),
5369 XVECEXP (prev_pat, 0, 1));
5370 insn_pat = duplicate_cond (insn_pat, prev);
5371 PATTERN (insn) = insn_pat;
5372 INSN_CODE (insn) = -1;
5373 delete_insn (prev);
5374 }
5375
5376 /* Split every insn (i.e. jumps and calls) which can have delay slots into
5377 two parts: the first one is scheduled normally and emits the instruction,
5378 while the second one is a shadow insn which shows the side effect taking
5379 place. The second one is placed in the right cycle by the scheduler, but
5380 not emitted as an assembly instruction. */
5381
5382 static void
split_delayed_insns(void)5383 split_delayed_insns (void)
5384 {
5385 rtx_insn *insn;
5386 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5387 {
5388 if (JUMP_P (insn) || CALL_P (insn))
5389 split_delayed_branch (insn);
5390 }
5391 }
5392
5393 /* For every insn that has an entry in the new_conditions vector, give it
5394 the appropriate predicate. */
5395 static void
conditionalize_after_sched(void)5396 conditionalize_after_sched (void)
5397 {
5398 basic_block bb;
5399 rtx_insn *insn;
5400 FOR_EACH_BB_FN (bb, cfun)
5401 FOR_BB_INSNS (bb, insn)
5402 {
5403 unsigned uid = INSN_UID (insn);
5404 rtx cond;
5405 if (!NONDEBUG_INSN_P (insn) || uid >= INSN_INFO_LENGTH)
5406 continue;
5407 cond = INSN_INFO_ENTRY (uid).new_cond;
5408 if (cond == NULL_RTX)
5409 continue;
5410 if (dump_file)
5411 fprintf (dump_file, "Conditionalizing insn %d\n", uid);
5412 predicate_insn (insn, cond, true);
5413 }
5414 }
5415
5416 /* A callback for the hw-doloop pass. This function examines INSN; if
5417 it is a loop_end pattern we recognize, return the reg rtx for the
5418 loop counter. Otherwise, return NULL_RTX. */
5419
5420 static rtx
hwloop_pattern_reg(rtx_insn * insn)5421 hwloop_pattern_reg (rtx_insn *insn)
5422 {
5423 rtx pat, reg;
5424
5425 if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
5426 return NULL_RTX;
5427
5428 pat = PATTERN (insn);
5429 reg = SET_DEST (XVECEXP (pat, 0, 1));
5430 if (!REG_P (reg))
5431 return NULL_RTX;
5432 return reg;
5433 }
5434
5435 /* Return the number of cycles taken by BB, as computed by scheduling,
5436 including the latencies of all insns with delay slots. IGNORE is
5437 an insn we should ignore in the calculation, usually the final
5438 branch. */
5439 static int
bb_earliest_end_cycle(basic_block bb,rtx ignore)5440 bb_earliest_end_cycle (basic_block bb, rtx ignore)
5441 {
5442 int earliest = 0;
5443 rtx_insn *insn;
5444
5445 FOR_BB_INSNS (bb, insn)
5446 {
5447 int cycles, this_clock;
5448
5449 if (LABEL_P (insn) || NOTE_P (insn) || DEBUG_INSN_P (insn)
5450 || GET_CODE (PATTERN (insn)) == USE
5451 || GET_CODE (PATTERN (insn)) == CLOBBER
5452 || insn == ignore)
5453 continue;
5454
5455 this_clock = insn_get_clock (insn);
5456 cycles = get_attr_cycles (insn);
5457
5458 if (earliest < this_clock + cycles)
5459 earliest = this_clock + cycles;
5460 }
5461 return earliest;
5462 }
5463
5464 /* Examine the insns in BB and remove all which have a uid greater or
5465 equal to MAX_UID. */
5466 static void
filter_insns_above(basic_block bb,int max_uid)5467 filter_insns_above (basic_block bb, int max_uid)
5468 {
5469 rtx_insn *insn, *next;
5470 bool prev_ti = false;
5471 int prev_cycle = -1;
5472
5473 FOR_BB_INSNS_SAFE (bb, insn, next)
5474 {
5475 int this_cycle;
5476 if (!NONDEBUG_INSN_P (insn))
5477 continue;
5478 if (insn == BB_END (bb))
5479 return;
5480 this_cycle = insn_get_clock (insn);
5481 if (prev_ti && this_cycle == prev_cycle)
5482 {
5483 gcc_assert (GET_MODE (insn) != TImode);
5484 PUT_MODE (insn, TImode);
5485 }
5486 prev_ti = false;
5487 if (INSN_UID (insn) >= max_uid)
5488 {
5489 if (GET_MODE (insn) == TImode)
5490 {
5491 prev_ti = true;
5492 prev_cycle = this_cycle;
5493 }
5494 delete_insn (insn);
5495 }
5496 }
5497 }
5498
5499 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
5500
5501 static void
c6x_asm_emit_except_personality(rtx personality)5502 c6x_asm_emit_except_personality (rtx personality)
5503 {
5504 fputs ("\t.personality\t", asm_out_file);
5505 output_addr_const (asm_out_file, personality);
5506 fputc ('\n', asm_out_file);
5507 }
5508
5509 /* Use a special assembly directive rather than a regular setion for
5510 unwind table data. */
5511
5512 static void
c6x_asm_init_sections(void)5513 c6x_asm_init_sections (void)
5514 {
5515 exception_section = get_unnamed_section (0, output_section_asm_op,
5516 "\t.handlerdata");
5517 }
5518
5519 /* A callback for the hw-doloop pass. Called to optimize LOOP in a
5520 machine-specific fashion; returns true if successful and false if
5521 the hwloop_fail function should be called. */
5522
5523 static bool
hwloop_optimize(hwloop_info loop)5524 hwloop_optimize (hwloop_info loop)
5525 {
5526 basic_block entry_bb, bb;
5527 rtx_insn *seq, *insn, *prev, *entry_after, *end_packet;
5528 rtx_insn *head_insn, *tail_insn, *new_insns, *last_insn;
5529 int loop_earliest;
5530 int n_execute_packets;
5531 edge entry_edge;
5532 unsigned ix;
5533 int max_uid_before, delayed_splits;
5534 int i, sp_ii, min_ii, max_ii, max_parallel, n_insns, n_real_insns, stages;
5535 rtx_insn **orig_vec;
5536 rtx_insn **copies;
5537 rtx_insn ***insn_copies;
5538
5539 if (!c6x_flag_modulo_sched || !c6x_flag_schedule_insns2
5540 || !TARGET_INSNS_64PLUS)
5541 return false;
5542
5543 if (loop->iter_reg_used || loop->depth > 1)
5544 return false;
5545 if (loop->has_call || loop->has_asm)
5546 return false;
5547
5548 if (loop->head != loop->tail)
5549 return false;
5550
5551 gcc_assert (loop->incoming_dest == loop->head);
5552
5553 entry_edge = NULL;
5554 FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge)
5555 if (entry_edge->flags & EDGE_FALLTHRU)
5556 break;
5557 if (entry_edge == NULL)
5558 return false;
5559
5560 reshuffle_units (loop->head);
5561
5562 in_hwloop = true;
5563 schedule_ebbs_init ();
5564 schedule_ebb (BB_HEAD (loop->tail), loop->loop_end, true);
5565 schedule_ebbs_finish ();
5566 in_hwloop = false;
5567
5568 bb = loop->head;
5569 loop_earliest = bb_earliest_end_cycle (bb, loop->loop_end) + 1;
5570
5571 max_uid_before = get_max_uid ();
5572
5573 /* Split all multi-cycle operations, such as loads. For normal
5574 scheduling, we only do this for branches, as the generated code
5575 would otherwise not be interrupt-safe. When using sploop, it is
5576 safe and beneficial to split them. If any multi-cycle operations
5577 remain after splitting (because we don't handle them yet), we
5578 cannot pipeline the loop. */
5579 delayed_splits = 0;
5580 FOR_BB_INSNS (bb, insn)
5581 {
5582 if (NONDEBUG_INSN_P (insn))
5583 {
5584 recog_memoized (insn);
5585 if (split_delayed_nonbranch (insn))
5586 delayed_splits++;
5587 else if (INSN_CODE (insn) >= 0
5588 && get_attr_cycles (insn) > 1)
5589 goto undo_splits;
5590 }
5591 }
5592
5593 /* Count the number of insns as well as the number real insns, and save
5594 the original sequence of insns in case we must restore it later. */
5595 n_insns = n_real_insns = 0;
5596 FOR_BB_INSNS (bb, insn)
5597 {
5598 n_insns++;
5599 if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end)
5600 n_real_insns++;
5601 }
5602 orig_vec = XNEWVEC (rtx_insn *, n_insns);
5603 n_insns = 0;
5604 FOR_BB_INSNS (bb, insn)
5605 orig_vec[n_insns++] = insn;
5606
5607 /* Count the unit reservations, and compute a minimum II from that
5608 table. */
5609 count_unit_reqs (unit_reqs, loop->start_label,
5610 PREV_INSN (loop->loop_end));
5611 merge_unit_reqs (unit_reqs);
5612
5613 min_ii = res_mii (unit_reqs);
5614 max_ii = loop_earliest < 15 ? loop_earliest : 14;
5615
5616 /* Make copies of the loop body, up to a maximum number of stages we want
5617 to handle. */
5618 max_parallel = loop_earliest / min_ii + 1;
5619
5620 copies = XCNEWVEC (rtx_insn *, (max_parallel + 1) * n_real_insns);
5621 insn_copies = XNEWVEC (rtx_insn **, max_parallel + 1);
5622 for (i = 0; i < max_parallel + 1; i++)
5623 insn_copies[i] = copies + i * n_real_insns;
5624
5625 head_insn = next_nonnote_nondebug_insn (loop->start_label);
5626 tail_insn = prev_real_insn (BB_END (bb));
5627
5628 i = 0;
5629 FOR_BB_INSNS (bb, insn)
5630 if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end)
5631 insn_copies[0][i++] = insn;
5632
5633 sploop_max_uid_iter0 = get_max_uid ();
5634
5635 /* Generate the copies of the loop body, and save them in the
5636 INSN_COPIES array. */
5637 start_sequence ();
5638 for (i = 0; i < max_parallel; i++)
5639 {
5640 int j;
5641 rtx_insn *this_iter;
5642
5643 this_iter = duplicate_insn_chain (head_insn, tail_insn);
5644 j = 0;
5645 while (this_iter)
5646 {
5647 rtx_insn *prev_stage_insn = insn_copies[i][j];
5648 gcc_assert (INSN_CODE (this_iter) == INSN_CODE (prev_stage_insn));
5649
5650 if (INSN_CODE (this_iter) >= 0
5651 && (get_attr_type (this_iter) == TYPE_LOAD_SHADOW
5652 || get_attr_type (this_iter) == TYPE_MULT_SHADOW))
5653 {
5654 rtx_insn *prev = PREV_INSN (this_iter);
5655 record_delay_slot_pair (prev, this_iter,
5656 get_attr_cycles (prev) - 1, 0);
5657 }
5658 else
5659 record_delay_slot_pair (prev_stage_insn, this_iter, i, 1);
5660
5661 insn_copies[i + 1][j] = this_iter;
5662 j++;
5663 this_iter = next_nonnote_nondebug_insn (this_iter);
5664 }
5665 }
5666 new_insns = get_insns ();
5667 last_insn = insn_copies[max_parallel][n_real_insns - 1];
5668 end_sequence ();
5669 emit_insn_before (new_insns, BB_END (bb));
5670
5671 /* Try to schedule the loop using varying initiation intervals,
5672 starting with the smallest possible and incrementing it
5673 on failure. */
5674 for (sp_ii = min_ii; sp_ii <= max_ii; sp_ii++)
5675 {
5676 basic_block tmp_bb;
5677 if (dump_file)
5678 fprintf (dump_file, "Trying to schedule for II %d\n", sp_ii);
5679
5680 df_clear_flags (DF_LR_RUN_DCE);
5681
5682 schedule_ebbs_init ();
5683 set_modulo_params (sp_ii, max_parallel, n_real_insns,
5684 sploop_max_uid_iter0);
5685 tmp_bb = schedule_ebb (BB_HEAD (bb), last_insn, true);
5686 schedule_ebbs_finish ();
5687
5688 if (tmp_bb)
5689 {
5690 if (dump_file)
5691 fprintf (dump_file, "Found schedule with II %d\n", sp_ii);
5692 break;
5693 }
5694 }
5695
5696 discard_delay_pairs_above (max_uid_before);
5697
5698 if (sp_ii > max_ii)
5699 goto restore_loop;
5700
5701 stages = insn_get_clock (ss.last_scheduled_iter0) / sp_ii + 1;
5702
5703 if (stages == 1 && sp_ii > 5)
5704 goto restore_loop;
5705
5706 /* At this point, we know we've been successful, unless we find later that
5707 there are too many execute packets for the loop buffer to hold. */
5708
5709 /* Assign reservations to the instructions in the loop. We must find
5710 the stage that contains the full loop kernel, and transfer the
5711 reservations of the instructions contained in it to the corresponding
5712 instructions from iteration 0, which are the only ones we'll keep. */
5713 assign_reservations (BB_HEAD (bb), ss.last_scheduled_insn);
5714 SET_PREV_INSN (BB_END (bb)) = ss.last_scheduled_iter0;
5715 SET_NEXT_INSN (ss.last_scheduled_iter0) = BB_END (bb);
5716 filter_insns_above (bb, sploop_max_uid_iter0);
5717
5718 for (i = 0; i < n_real_insns; i++)
5719 {
5720 rtx insn = insn_copies[0][i];
5721 int uid = INSN_UID (insn);
5722 int stage = insn_uid_get_clock (uid) / sp_ii;
5723
5724 if (stage + 1 < stages)
5725 {
5726 int copy_uid;
5727 stage = stages - stage - 1;
5728 copy_uid = INSN_UID (insn_copies[stage][i]);
5729 INSN_INFO_ENTRY (uid).reservation
5730 = INSN_INFO_ENTRY (copy_uid).reservation;
5731 }
5732 }
5733 if (stages == 1)
5734 stages++;
5735
5736 /* Compute the number of execute packets the pipelined form of the loop will
5737 require. */
5738 prev = NULL;
5739 n_execute_packets = 0;
5740 for (insn = loop->start_label;
5741 insn != loop->loop_end;
5742 insn = NEXT_INSN (insn))
5743 {
5744 if (NONDEBUG_INSN_P (insn) && GET_MODE (insn) == TImode
5745 && !shadow_p (insn))
5746 {
5747 n_execute_packets++;
5748 if (prev && insn_get_clock (prev) + 1 != insn_get_clock (insn))
5749 /* We need an extra NOP instruction. */
5750 n_execute_packets++;
5751
5752 prev = insn;
5753 }
5754 }
5755
5756 end_packet = ss.last_scheduled_iter0;
5757 while (!NONDEBUG_INSN_P (end_packet) || GET_MODE (end_packet) != TImode)
5758 end_packet = PREV_INSN (end_packet);
5759
5760 /* The earliest cycle in which we can emit the SPKERNEL instruction. */
5761 loop_earliest = (stages - 1) * sp_ii;
5762 if (loop_earliest > insn_get_clock (end_packet))
5763 {
5764 n_execute_packets++;
5765 end_packet = loop->loop_end;
5766 }
5767 else
5768 loop_earliest = insn_get_clock (end_packet);
5769
5770 if (n_execute_packets > 14)
5771 goto restore_loop;
5772
5773 /* Generate the spkernel instruction, and place it at the appropriate
5774 spot. */
5775 PUT_MODE (end_packet, VOIDmode);
5776
5777 insn = emit_jump_insn_before (
5778 gen_spkernel (GEN_INT (stages - 1),
5779 const0_rtx, JUMP_LABEL (loop->loop_end)),
5780 end_packet);
5781 JUMP_LABEL (insn) = JUMP_LABEL (loop->loop_end);
5782 insn_set_clock (insn, loop_earliest);
5783 PUT_MODE (insn, TImode);
5784 INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start = false;
5785 delete_insn (loop->loop_end);
5786
5787 /* Place the mvc and sploop instructions before the loop. */
5788 entry_bb = entry_edge->src;
5789
5790 start_sequence ();
5791
5792 insn = emit_insn (gen_mvilc (loop->iter_reg));
5793 insn = emit_insn (gen_sploop (GEN_INT (sp_ii)));
5794
5795 seq = get_insns ();
5796
5797 if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1)
5798 {
5799 basic_block new_bb;
5800 edge e;
5801 edge_iterator ei;
5802
5803 emit_insn_before (seq, BB_HEAD (loop->head));
5804 seq = emit_label_before (gen_label_rtx (), seq);
5805
5806 new_bb = create_basic_block (seq, insn, entry_bb);
5807 FOR_EACH_EDGE (e, ei, loop->incoming)
5808 {
5809 if (!(e->flags & EDGE_FALLTHRU))
5810 redirect_edge_and_branch_force (e, new_bb);
5811 else
5812 redirect_edge_succ (e, new_bb);
5813 }
5814 make_edge (new_bb, loop->head, 0);
5815 }
5816 else
5817 {
5818 entry_after = BB_END (entry_bb);
5819 while (DEBUG_INSN_P (entry_after)
5820 || (NOTE_P (entry_after)
5821 && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK))
5822 entry_after = PREV_INSN (entry_after);
5823 emit_insn_after (seq, entry_after);
5824 }
5825
5826 end_sequence ();
5827
5828 /* Make sure we don't try to schedule this loop again. */
5829 for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
5830 bb->flags |= BB_DISABLE_SCHEDULE;
5831
5832 return true;
5833
5834 restore_loop:
5835 if (dump_file)
5836 fprintf (dump_file, "Unable to pipeline loop.\n");
5837
5838 for (i = 1; i < n_insns; i++)
5839 {
5840 SET_NEXT_INSN (orig_vec[i - 1]) = orig_vec[i];
5841 SET_PREV_INSN (orig_vec[i]) = orig_vec[i - 1];
5842 }
5843 SET_PREV_INSN (orig_vec[0]) = PREV_INSN (BB_HEAD (bb));
5844 SET_NEXT_INSN (PREV_INSN (BB_HEAD (bb))) = orig_vec[0];
5845 SET_NEXT_INSN (orig_vec[n_insns - 1]) = NEXT_INSN (BB_END (bb));
5846 SET_PREV_INSN (NEXT_INSN (BB_END (bb))) = orig_vec[n_insns - 1];
5847 BB_HEAD (bb) = orig_vec[0];
5848 BB_END (bb) = orig_vec[n_insns - 1];
5849 undo_splits:
5850 free_delay_pairs ();
5851 FOR_BB_INSNS (bb, insn)
5852 if (NONDEBUG_INSN_P (insn))
5853 undo_split_delayed_nonbranch (insn);
5854 return false;
5855 }
5856
5857 /* A callback for the hw-doloop pass. Called when a loop we have discovered
5858 turns out not to be optimizable; we have to split the doloop_end pattern
5859 into a subtract and a test. */
5860 static void
hwloop_fail(hwloop_info loop)5861 hwloop_fail (hwloop_info loop)
5862 {
5863 rtx insn, test, testreg;
5864
5865 if (dump_file)
5866 fprintf (dump_file, "splitting doloop insn %d\n",
5867 INSN_UID (loop->loop_end));
5868 insn = gen_addsi3 (loop->iter_reg, loop->iter_reg, constm1_rtx);
5869 /* See if we can emit the add at the head of the loop rather than at the
5870 end. */
5871 if (loop->head == NULL
5872 || loop->iter_reg_used_outside
5873 || loop->iter_reg_used
5874 || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REGNO (loop->iter_reg))
5875 || loop->incoming_dest != loop->head
5876 || EDGE_COUNT (loop->head->preds) != 2)
5877 emit_insn_before (insn, loop->loop_end);
5878 else
5879 {
5880 rtx_insn *t = loop->start_label;
5881 while (!NOTE_P (t) || NOTE_KIND (t) != NOTE_INSN_BASIC_BLOCK)
5882 t = NEXT_INSN (t);
5883 emit_insn_after (insn, t);
5884 }
5885
5886 testreg = SET_DEST (XVECEXP (PATTERN (loop->loop_end), 0, 2));
5887 if (GET_CODE (testreg) == SCRATCH)
5888 testreg = loop->iter_reg;
5889 else
5890 emit_insn_before (gen_movsi (testreg, loop->iter_reg), loop->loop_end);
5891
5892 test = gen_rtx_NE (VOIDmode, testreg, const0_rtx);
5893 insn = emit_jump_insn_before (gen_cbranchsi4 (test, testreg, const0_rtx,
5894 loop->start_label),
5895 loop->loop_end);
5896
5897 JUMP_LABEL (insn) = loop->start_label;
5898 LABEL_NUSES (loop->start_label)++;
5899 delete_insn (loop->loop_end);
5900 }
5901
5902 static struct hw_doloop_hooks c6x_doloop_hooks =
5903 {
5904 hwloop_pattern_reg,
5905 hwloop_optimize,
5906 hwloop_fail
5907 };
5908
5909 /* Run the hw-doloop pass to modulo-schedule hardware loops, or split the
5910 doloop_end patterns where such optimizations are impossible. */
5911 static void
c6x_hwloops(void)5912 c6x_hwloops (void)
5913 {
5914 if (optimize)
5915 reorg_loops (true, &c6x_doloop_hooks);
5916 }
5917
5918 /* Implement the TARGET_MACHINE_DEPENDENT_REORG pass. We split call insns here
5919 into a sequence that loads the return register and performs the call,
5920 and emit the return label.
5921 If scheduling after reload is requested, it happens here. */
5922
5923 static void
c6x_reorg(void)5924 c6x_reorg (void)
5925 {
5926 basic_block bb;
5927 rtx *call_labels;
5928 bool do_selsched = (c6x_flag_schedule_insns2 && flag_selective_scheduling2
5929 && !maybe_skip_selective_scheduling ());
5930
5931 /* We are freeing block_for_insn in the toplev to keep compatibility
5932 with old MDEP_REORGS that are not CFG based. Recompute it now. */
5933 compute_bb_for_insn ();
5934
5935 df_clear_flags (DF_LR_RUN_DCE);
5936 df_note_add_problem ();
5937
5938 /* If optimizing, we'll have split before scheduling. */
5939 if (optimize == 0)
5940 split_all_insns ();
5941
5942 df_analyze ();
5943
5944 if (c6x_flag_schedule_insns2)
5945 {
5946 int sz = get_max_uid () * 3 / 2 + 1;
5947
5948 insn_info.create (sz);
5949 }
5950
5951 /* Make sure the real-jump insns we create are not deleted. When modulo-
5952 scheduling, situations where a reg is only stored in a loop can also
5953 cause dead code when doing the initial unrolling. */
5954 sched_no_dce = true;
5955
5956 c6x_hwloops ();
5957
5958 if (c6x_flag_schedule_insns2)
5959 {
5960 split_delayed_insns ();
5961 timevar_push (TV_SCHED2);
5962 if (do_selsched)
5963 run_selective_scheduling ();
5964 else
5965 schedule_ebbs ();
5966 conditionalize_after_sched ();
5967 timevar_pop (TV_SCHED2);
5968
5969 free_delay_pairs ();
5970 }
5971 sched_no_dce = false;
5972
5973 call_labels = XCNEWVEC (rtx, get_max_uid () + 1);
5974
5975 reorg_split_calls (call_labels);
5976
5977 if (c6x_flag_schedule_insns2)
5978 {
5979 FOR_EACH_BB_FN (bb, cfun)
5980 if ((bb->flags & BB_DISABLE_SCHEDULE) == 0)
5981 assign_reservations (BB_HEAD (bb), BB_END (bb));
5982 }
5983
5984 if (c6x_flag_var_tracking)
5985 {
5986 timevar_push (TV_VAR_TRACKING);
5987 variable_tracking_main ();
5988 timevar_pop (TV_VAR_TRACKING);
5989 }
5990
5991 reorg_emit_nops (call_labels);
5992
5993 /* Post-process the schedule to move parallel insns into SEQUENCEs. */
5994 if (c6x_flag_schedule_insns2)
5995 {
5996 free_delay_pairs ();
5997 c6x_gen_bundles ();
5998 }
5999
6000 df_finish_pass (false);
6001 }
6002
6003 /* Called when a function has been assembled. It should perform all the
6004 tasks of ASM_DECLARE_FUNCTION_SIZE in elfos.h, plus target-specific
6005 tasks.
6006 We free the reservation (and other scheduling) information here now that
6007 all insns have been output. */
6008 void
c6x_function_end(FILE * file,const char * fname)6009 c6x_function_end (FILE *file, const char *fname)
6010 {
6011 c6x_output_fn_unwind (file);
6012
6013 insn_info.release ();
6014
6015 if (!flag_inhibit_size_directive)
6016 ASM_OUTPUT_MEASURED_SIZE (file, fname);
6017 }
6018
6019 /* Determine whether X is a shift with code CODE and an integer amount
6020 AMOUNT. */
6021 static bool
shift_p(rtx x,enum rtx_code code,int amount)6022 shift_p (rtx x, enum rtx_code code, int amount)
6023 {
6024 return (GET_CODE (x) == code && GET_CODE (XEXP (x, 1)) == CONST_INT
6025 && INTVAL (XEXP (x, 1)) == amount);
6026 }
6027
6028 /* Compute a (partial) cost for rtx X. Return true if the complete
6029 cost has been computed, and false if subexpressions should be
6030 scanned. In either case, *TOTAL contains the cost result. */
6031
6032 static bool
c6x_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno,int * total,bool speed)6033 c6x_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total,
6034 bool speed)
6035 {
6036 int cost2 = COSTS_N_INSNS (1);
6037 rtx op0, op1;
6038 int code = GET_CODE (x);
6039
6040 switch (code)
6041 {
6042 case CONST_INT:
6043 if (outer_code == SET || outer_code == PLUS)
6044 *total = satisfies_constraint_IsB (x) ? 0 : cost2;
6045 else if (outer_code == AND || outer_code == IOR || outer_code == XOR
6046 || outer_code == MINUS)
6047 *total = satisfies_constraint_Is5 (x) ? 0 : cost2;
6048 else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE
6049 || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE)
6050 *total = satisfies_constraint_Iu4 (x) ? 0 : cost2;
6051 else if (outer_code == ASHIFT || outer_code == ASHIFTRT
6052 || outer_code == LSHIFTRT)
6053 *total = satisfies_constraint_Iu5 (x) ? 0 : cost2;
6054 else
6055 *total = cost2;
6056 return true;
6057
6058 case CONST:
6059 case LABEL_REF:
6060 case SYMBOL_REF:
6061 case CONST_DOUBLE:
6062 *total = COSTS_N_INSNS (2);
6063 return true;
6064
6065 case TRUNCATE:
6066 /* Recognize a mult_highpart operation. */
6067 if ((mode == HImode || mode == SImode)
6068 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6069 && GET_MODE (XEXP (x, 0)) == GET_MODE_2XWIDER_MODE (mode)
6070 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6071 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6072 && INTVAL (XEXP (XEXP (x, 0), 1)) == GET_MODE_BITSIZE (mode))
6073 {
6074 rtx mul = XEXP (XEXP (x, 0), 0);
6075 rtx op0 = XEXP (mul, 0);
6076 rtx op1 = XEXP (mul, 1);
6077 enum rtx_code code0 = GET_CODE (op0);
6078 enum rtx_code code1 = GET_CODE (op1);
6079
6080 if ((code0 == code1
6081 && (code0 == SIGN_EXTEND || code0 == ZERO_EXTEND))
6082 || (mode == HImode
6083 && code0 == ZERO_EXTEND && code1 == SIGN_EXTEND))
6084 {
6085 if (mode == HImode)
6086 *total = COSTS_N_INSNS (2);
6087 else
6088 *total = COSTS_N_INSNS (12);
6089 mode = GET_MODE (XEXP (op0, 0));
6090 *total += rtx_cost (XEXP (op0, 0), mode, code0, 0, speed);
6091 *total += rtx_cost (XEXP (op1, 0), mode, code1, 0, speed);
6092 return true;
6093 }
6094 }
6095 return false;
6096
6097 case ASHIFT:
6098 case ASHIFTRT:
6099 case LSHIFTRT:
6100 if (mode == DImode)
6101 *total = COSTS_N_INSNS (CONSTANT_P (XEXP (x, 1)) ? 4 : 15);
6102 else
6103 *total = COSTS_N_INSNS (1);
6104 return false;
6105
6106 case PLUS:
6107 case MINUS:
6108 *total = COSTS_N_INSNS (1);
6109 op0 = code == PLUS ? XEXP (x, 0) : XEXP (x, 1);
6110 op1 = code == PLUS ? XEXP (x, 1) : XEXP (x, 0);
6111 if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6112 && INTEGRAL_MODE_P (mode)
6113 && GET_CODE (op0) == MULT
6114 && GET_CODE (XEXP (op0, 1)) == CONST_INT
6115 && (INTVAL (XEXP (op0, 1)) == 2
6116 || INTVAL (XEXP (op0, 1)) == 4
6117 || (code == PLUS && INTVAL (XEXP (op0, 1)) == 8)))
6118 {
6119 *total += rtx_cost (XEXP (op0, 0), mode, ASHIFT, 0, speed);
6120 *total += rtx_cost (op1, mode, (enum rtx_code) code, 1, speed);
6121 return true;
6122 }
6123 return false;
6124
6125 case MULT:
6126 op0 = XEXP (x, 0);
6127 op1 = XEXP (x, 1);
6128 if (mode == DFmode)
6129 {
6130 if (TARGET_FP)
6131 *total = COSTS_N_INSNS (speed ? 10 : 1);
6132 else
6133 *total = COSTS_N_INSNS (speed ? 200 : 4);
6134 }
6135 else if (mode == SFmode)
6136 {
6137 if (TARGET_FP)
6138 *total = COSTS_N_INSNS (speed ? 4 : 1);
6139 else
6140 *total = COSTS_N_INSNS (speed ? 100 : 4);
6141 }
6142 else if (mode == DImode)
6143 {
6144 if (TARGET_MPY32
6145 && GET_CODE (op0) == GET_CODE (op1)
6146 && (GET_CODE (op0) == ZERO_EXTEND
6147 || GET_CODE (op0) == SIGN_EXTEND))
6148 {
6149 *total = COSTS_N_INSNS (speed ? 2 : 1);
6150 op0 = XEXP (op0, 0);
6151 op1 = XEXP (op1, 0);
6152 }
6153 else
6154 /* Maybe improve this laster. */
6155 *total = COSTS_N_INSNS (20);
6156 }
6157 else if (mode == SImode)
6158 {
6159 if (((GET_CODE (op0) == ZERO_EXTEND
6160 || GET_CODE (op0) == SIGN_EXTEND
6161 || shift_p (op0, LSHIFTRT, 16))
6162 && (GET_CODE (op1) == SIGN_EXTEND
6163 || GET_CODE (op1) == ZERO_EXTEND
6164 || scst5_operand (op1, SImode)
6165 || shift_p (op1, ASHIFTRT, 16)
6166 || shift_p (op1, LSHIFTRT, 16)))
6167 || (shift_p (op0, ASHIFTRT, 16)
6168 && (GET_CODE (op1) == SIGN_EXTEND
6169 || shift_p (op1, ASHIFTRT, 16))))
6170 {
6171 *total = COSTS_N_INSNS (speed ? 2 : 1);
6172 op0 = XEXP (op0, 0);
6173 if (scst5_operand (op1, SImode))
6174 op1 = NULL_RTX;
6175 else
6176 op1 = XEXP (op1, 0);
6177 }
6178 else if (!speed)
6179 *total = COSTS_N_INSNS (1);
6180 else if (TARGET_MPY32)
6181 *total = COSTS_N_INSNS (4);
6182 else
6183 *total = COSTS_N_INSNS (6);
6184 }
6185 else if (mode == HImode)
6186 *total = COSTS_N_INSNS (speed ? 2 : 1);
6187
6188 if (GET_CODE (op0) != REG
6189 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
6190 *total += rtx_cost (op0, mode, MULT, 0, speed);
6191 if (op1 && GET_CODE (op1) != REG
6192 && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
6193 *total += rtx_cost (op1, mode, MULT, 1, speed);
6194 return true;
6195
6196 case UDIV:
6197 case DIV:
6198 /* This is a bit random; assuming on average there'll be 16 leading
6199 zeros. FIXME: estimate better for constant dividends. */
6200 *total = COSTS_N_INSNS (6 + 3 * 16);
6201 return false;
6202
6203 case IF_THEN_ELSE:
6204 /* Recognize the cmp_and/ior patterns. */
6205 op0 = XEXP (x, 0);
6206 if ((GET_CODE (op0) == EQ || GET_CODE (op0) == NE)
6207 && REG_P (XEXP (op0, 0))
6208 && XEXP (op0, 1) == const0_rtx
6209 && rtx_equal_p (XEXP (x, 1), XEXP (op0, 0)))
6210 {
6211 *total = rtx_cost (XEXP (x, 1), VOIDmode, (enum rtx_code) outer_code,
6212 opno, speed);
6213 return false;
6214 }
6215 return false;
6216
6217 default:
6218 return false;
6219 }
6220 }
6221
6222 /* Implements target hook vector_mode_supported_p. */
6223
6224 static bool
c6x_vector_mode_supported_p(machine_mode mode)6225 c6x_vector_mode_supported_p (machine_mode mode)
6226 {
6227 switch (mode)
6228 {
6229 case V2HImode:
6230 case V4QImode:
6231 case V2SImode:
6232 case V4HImode:
6233 case V8QImode:
6234 return true;
6235 default:
6236 return false;
6237 }
6238 }
6239
6240 /* Implements TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
6241 static machine_mode
c6x_preferred_simd_mode(machine_mode mode)6242 c6x_preferred_simd_mode (machine_mode mode)
6243 {
6244 switch (mode)
6245 {
6246 case HImode:
6247 return V2HImode;
6248 case QImode:
6249 return V4QImode;
6250
6251 default:
6252 return word_mode;
6253 }
6254 }
6255
6256 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
6257
6258 static bool
c6x_scalar_mode_supported_p(machine_mode mode)6259 c6x_scalar_mode_supported_p (machine_mode mode)
6260 {
6261 if (ALL_FIXED_POINT_MODE_P (mode)
6262 && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD)
6263 return true;
6264
6265 return default_scalar_mode_supported_p (mode);
6266 }
6267
6268 /* Output a reference from a function exception table to the type_info
6269 object X. Output these via a special assembly directive. */
6270
6271 static bool
c6x_output_ttype(rtx x)6272 c6x_output_ttype (rtx x)
6273 {
6274 /* Use special relocations for symbol references. */
6275 if (GET_CODE (x) != CONST_INT)
6276 fputs ("\t.ehtype\t", asm_out_file);
6277 else
6278 fputs ("\t.word\t", asm_out_file);
6279 output_addr_const (asm_out_file, x);
6280 fputc ('\n', asm_out_file);
6281
6282 return TRUE;
6283 }
6284
6285 /* Modify the return address of the current function. */
6286
6287 void
c6x_set_return_address(rtx source,rtx scratch)6288 c6x_set_return_address (rtx source, rtx scratch)
6289 {
6290 struct c6x_frame frame;
6291 rtx addr;
6292 HOST_WIDE_INT offset;
6293
6294 c6x_compute_frame_layout (&frame);
6295 if (! c6x_save_reg (RETURN_ADDR_REGNO))
6296 emit_move_insn (gen_rtx_REG (Pmode, RETURN_ADDR_REGNO), source);
6297 else
6298 {
6299
6300 if (frame_pointer_needed)
6301 {
6302 addr = hard_frame_pointer_rtx;
6303 offset = frame.b3_offset;
6304 }
6305 else
6306 {
6307 addr = stack_pointer_rtx;
6308 offset = frame.to_allocate - frame.b3_offset;
6309 }
6310
6311 /* TODO: Use base+offset loads where possible. */
6312 if (offset)
6313 {
6314 HOST_WIDE_INT low = trunc_int_for_mode (offset, HImode);
6315
6316 emit_insn (gen_movsi_high (scratch, GEN_INT (low)));
6317 if (low != offset)
6318 emit_insn (gen_movsi_lo_sum (scratch, scratch, GEN_INT(offset)));
6319 emit_insn (gen_addsi3 (scratch, addr, scratch));
6320 addr = scratch;
6321 }
6322
6323 emit_move_insn (gen_frame_mem (Pmode, addr), source);
6324 }
6325 }
6326
6327 /* We save pairs of registers using a DImode store. Describe the component
6328 registers for DWARF generation code. */
6329
6330 static rtx
c6x_dwarf_register_span(rtx rtl)6331 c6x_dwarf_register_span (rtx rtl)
6332 {
6333 unsigned regno;
6334 unsigned real_regno;
6335 int nregs;
6336 int i;
6337 rtx p;
6338
6339 regno = REGNO (rtl);
6340 nregs = HARD_REGNO_NREGS (regno, GET_MODE (rtl));
6341 if (nregs == 1)
6342 return NULL_RTX;
6343
6344 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc(nregs));
6345 for (i = 0; i < nregs; i++)
6346 {
6347 if (TARGET_BIG_ENDIAN)
6348 real_regno = regno + nregs - (i + 1);
6349 else
6350 real_regno = regno + i;
6351
6352 XVECEXP (p, 0, i) = gen_rtx_REG (SImode, real_regno);
6353 }
6354
6355 return p;
6356 }
6357
6358 /* Codes for all the C6X builtins. */
6359 enum c6x_builtins
6360 {
6361 C6X_BUILTIN_SADD,
6362 C6X_BUILTIN_SSUB,
6363 C6X_BUILTIN_ADD2,
6364 C6X_BUILTIN_SUB2,
6365 C6X_BUILTIN_ADD4,
6366 C6X_BUILTIN_SUB4,
6367 C6X_BUILTIN_SADD2,
6368 C6X_BUILTIN_SSUB2,
6369 C6X_BUILTIN_SADDU4,
6370
6371 C6X_BUILTIN_SMPY,
6372 C6X_BUILTIN_SMPYH,
6373 C6X_BUILTIN_SMPYHL,
6374 C6X_BUILTIN_SMPYLH,
6375 C6X_BUILTIN_MPY2,
6376 C6X_BUILTIN_SMPY2,
6377
6378 C6X_BUILTIN_CLRR,
6379 C6X_BUILTIN_EXTR,
6380 C6X_BUILTIN_EXTRU,
6381
6382 C6X_BUILTIN_SSHL,
6383 C6X_BUILTIN_SUBC,
6384 C6X_BUILTIN_ABS,
6385 C6X_BUILTIN_ABS2,
6386 C6X_BUILTIN_AVG2,
6387 C6X_BUILTIN_AVGU4,
6388
6389 C6X_BUILTIN_MAX
6390 };
6391
6392
6393 static GTY(()) tree c6x_builtin_decls[C6X_BUILTIN_MAX];
6394
6395 /* Return the C6X builtin for CODE. */
6396 static tree
c6x_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)6397 c6x_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6398 {
6399 if (code >= C6X_BUILTIN_MAX)
6400 return error_mark_node;
6401
6402 return c6x_builtin_decls[code];
6403 }
6404
6405 #define def_builtin(NAME, TYPE, CODE) \
6406 do { \
6407 tree bdecl; \
6408 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
6409 NULL, NULL_TREE); \
6410 c6x_builtin_decls[CODE] = bdecl; \
6411 } while (0)
6412
6413 /* Set up all builtin functions for this target. */
6414 static void
c6x_init_builtins(void)6415 c6x_init_builtins (void)
6416 {
6417 tree V4QI_type_node = build_vector_type (unsigned_intQI_type_node, 4);
6418 tree V2HI_type_node = build_vector_type (intHI_type_node, 2);
6419 tree V2SI_type_node = build_vector_type (intSI_type_node, 2);
6420 tree int_ftype_int
6421 = build_function_type_list (integer_type_node, integer_type_node,
6422 NULL_TREE);
6423 tree int_ftype_int_int
6424 = build_function_type_list (integer_type_node, integer_type_node,
6425 integer_type_node, NULL_TREE);
6426 tree v2hi_ftype_v2hi
6427 = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE);
6428 tree v4qi_ftype_v4qi_v4qi
6429 = build_function_type_list (V4QI_type_node, V4QI_type_node,
6430 V4QI_type_node, NULL_TREE);
6431 tree v2hi_ftype_v2hi_v2hi
6432 = build_function_type_list (V2HI_type_node, V2HI_type_node,
6433 V2HI_type_node, NULL_TREE);
6434 tree v2si_ftype_v2hi_v2hi
6435 = build_function_type_list (V2SI_type_node, V2HI_type_node,
6436 V2HI_type_node, NULL_TREE);
6437
6438 def_builtin ("__builtin_c6x_sadd", int_ftype_int_int,
6439 C6X_BUILTIN_SADD);
6440 def_builtin ("__builtin_c6x_ssub", int_ftype_int_int,
6441 C6X_BUILTIN_SSUB);
6442 def_builtin ("__builtin_c6x_add2", v2hi_ftype_v2hi_v2hi,
6443 C6X_BUILTIN_ADD2);
6444 def_builtin ("__builtin_c6x_sub2", v2hi_ftype_v2hi_v2hi,
6445 C6X_BUILTIN_SUB2);
6446 def_builtin ("__builtin_c6x_add4", v4qi_ftype_v4qi_v4qi,
6447 C6X_BUILTIN_ADD4);
6448 def_builtin ("__builtin_c6x_sub4", v4qi_ftype_v4qi_v4qi,
6449 C6X_BUILTIN_SUB4);
6450 def_builtin ("__builtin_c6x_mpy2", v2si_ftype_v2hi_v2hi,
6451 C6X_BUILTIN_MPY2);
6452 def_builtin ("__builtin_c6x_sadd2", v2hi_ftype_v2hi_v2hi,
6453 C6X_BUILTIN_SADD2);
6454 def_builtin ("__builtin_c6x_ssub2", v2hi_ftype_v2hi_v2hi,
6455 C6X_BUILTIN_SSUB2);
6456 def_builtin ("__builtin_c6x_saddu4", v4qi_ftype_v4qi_v4qi,
6457 C6X_BUILTIN_SADDU4);
6458 def_builtin ("__builtin_c6x_smpy2", v2si_ftype_v2hi_v2hi,
6459 C6X_BUILTIN_SMPY2);
6460
6461 def_builtin ("__builtin_c6x_smpy", int_ftype_int_int,
6462 C6X_BUILTIN_SMPY);
6463 def_builtin ("__builtin_c6x_smpyh", int_ftype_int_int,
6464 C6X_BUILTIN_SMPYH);
6465 def_builtin ("__builtin_c6x_smpyhl", int_ftype_int_int,
6466 C6X_BUILTIN_SMPYHL);
6467 def_builtin ("__builtin_c6x_smpylh", int_ftype_int_int,
6468 C6X_BUILTIN_SMPYLH);
6469
6470 def_builtin ("__builtin_c6x_sshl", int_ftype_int_int,
6471 C6X_BUILTIN_SSHL);
6472 def_builtin ("__builtin_c6x_subc", int_ftype_int_int,
6473 C6X_BUILTIN_SUBC);
6474
6475 def_builtin ("__builtin_c6x_avg2", v2hi_ftype_v2hi_v2hi,
6476 C6X_BUILTIN_AVG2);
6477 def_builtin ("__builtin_c6x_avgu4", v4qi_ftype_v4qi_v4qi,
6478 C6X_BUILTIN_AVGU4);
6479
6480 def_builtin ("__builtin_c6x_clrr", int_ftype_int_int,
6481 C6X_BUILTIN_CLRR);
6482 def_builtin ("__builtin_c6x_extr", int_ftype_int_int,
6483 C6X_BUILTIN_EXTR);
6484 def_builtin ("__builtin_c6x_extru", int_ftype_int_int,
6485 C6X_BUILTIN_EXTRU);
6486
6487 def_builtin ("__builtin_c6x_abs", int_ftype_int, C6X_BUILTIN_ABS);
6488 def_builtin ("__builtin_c6x_abs2", v2hi_ftype_v2hi, C6X_BUILTIN_ABS2);
6489 }
6490
6491
6492 struct builtin_description
6493 {
6494 const enum insn_code icode;
6495 const char *const name;
6496 const enum c6x_builtins code;
6497 };
6498
6499 static const struct builtin_description bdesc_2arg[] =
6500 {
6501 { CODE_FOR_saddsi3, "__builtin_c6x_sadd", C6X_BUILTIN_SADD },
6502 { CODE_FOR_ssubsi3, "__builtin_c6x_ssub", C6X_BUILTIN_SSUB },
6503 { CODE_FOR_addv2hi3, "__builtin_c6x_add2", C6X_BUILTIN_ADD2 },
6504 { CODE_FOR_subv2hi3, "__builtin_c6x_sub2", C6X_BUILTIN_SUB2 },
6505 { CODE_FOR_addv4qi3, "__builtin_c6x_add4", C6X_BUILTIN_ADD4 },
6506 { CODE_FOR_subv4qi3, "__builtin_c6x_sub4", C6X_BUILTIN_SUB4 },
6507 { CODE_FOR_ss_addv2hi3, "__builtin_c6x_sadd2", C6X_BUILTIN_SADD2 },
6508 { CODE_FOR_ss_subv2hi3, "__builtin_c6x_ssub2", C6X_BUILTIN_SSUB2 },
6509 { CODE_FOR_us_addv4qi3, "__builtin_c6x_saddu4", C6X_BUILTIN_SADDU4 },
6510
6511 { CODE_FOR_subcsi3, "__builtin_c6x_subc", C6X_BUILTIN_SUBC },
6512 { CODE_FOR_ss_ashlsi3, "__builtin_c6x_sshl", C6X_BUILTIN_SSHL },
6513
6514 { CODE_FOR_avgv2hi3, "__builtin_c6x_avg2", C6X_BUILTIN_AVG2 },
6515 { CODE_FOR_uavgv4qi3, "__builtin_c6x_avgu4", C6X_BUILTIN_AVGU4 },
6516
6517 { CODE_FOR_mulhqsq3, "__builtin_c6x_smpy", C6X_BUILTIN_SMPY },
6518 { CODE_FOR_mulhqsq3_hh, "__builtin_c6x_smpyh", C6X_BUILTIN_SMPYH },
6519 { CODE_FOR_mulhqsq3_lh, "__builtin_c6x_smpylh", C6X_BUILTIN_SMPYLH },
6520 { CODE_FOR_mulhqsq3_hl, "__builtin_c6x_smpyhl", C6X_BUILTIN_SMPYHL },
6521
6522 { CODE_FOR_mulv2hqv2sq3, "__builtin_c6x_smpy2", C6X_BUILTIN_SMPY2 },
6523
6524 { CODE_FOR_clrr, "__builtin_c6x_clrr", C6X_BUILTIN_CLRR },
6525 { CODE_FOR_extr, "__builtin_c6x_extr", C6X_BUILTIN_EXTR },
6526 { CODE_FOR_extru, "__builtin_c6x_extru", C6X_BUILTIN_EXTRU }
6527 };
6528
6529 static const struct builtin_description bdesc_1arg[] =
6530 {
6531 { CODE_FOR_ssabssi2, "__builtin_c6x_abs", C6X_BUILTIN_ABS },
6532 { CODE_FOR_ssabsv2hi2, "__builtin_c6x_abs2", C6X_BUILTIN_ABS2 }
6533 };
6534
6535 /* Errors in the source file can cause expand_expr to return const0_rtx
6536 where we expect a vector. To avoid crashing, use one of the vector
6537 clear instructions. */
6538 static rtx
safe_vector_operand(rtx x,machine_mode mode)6539 safe_vector_operand (rtx x, machine_mode mode)
6540 {
6541 if (x != const0_rtx)
6542 return x;
6543 x = gen_reg_rtx (SImode);
6544
6545 emit_insn (gen_movsi (x, CONST0_RTX (SImode)));
6546 return gen_lowpart (mode, x);
6547 }
6548
6549 /* Subroutine of c6x_expand_builtin to take care of binop insns. MACFLAG is -1
6550 if this is a normal binary op, or one of the MACFLAG_xxx constants. */
6551
6552 static rtx
c6x_expand_binop_builtin(enum insn_code icode,tree exp,rtx target,bool match_op)6553 c6x_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
6554 bool match_op)
6555 {
6556 int offs = match_op ? 1 : 0;
6557 rtx pat;
6558 tree arg0 = CALL_EXPR_ARG (exp, 0);
6559 tree arg1 = CALL_EXPR_ARG (exp, 1);
6560 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6561 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6562 machine_mode op0mode = GET_MODE (op0);
6563 machine_mode op1mode = GET_MODE (op1);
6564 machine_mode tmode = insn_data[icode].operand[0].mode;
6565 machine_mode mode0 = insn_data[icode].operand[1 + offs].mode;
6566 machine_mode mode1 = insn_data[icode].operand[2 + offs].mode;
6567 rtx ret = target;
6568
6569 if (VECTOR_MODE_P (mode0))
6570 op0 = safe_vector_operand (op0, mode0);
6571 if (VECTOR_MODE_P (mode1))
6572 op1 = safe_vector_operand (op1, mode1);
6573
6574 if (! target
6575 || GET_MODE (target) != tmode
6576 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6577 {
6578 if (tmode == SQmode || tmode == V2SQmode)
6579 {
6580 ret = gen_reg_rtx (tmode == SQmode ? SImode : V2SImode);
6581 target = gen_lowpart (tmode, ret);
6582 }
6583 else
6584 target = gen_reg_rtx (tmode);
6585 }
6586
6587 if ((op0mode == V2HImode || op0mode == SImode || op0mode == VOIDmode)
6588 && (mode0 == V2HQmode || mode0 == HQmode || mode0 == SQmode))
6589 {
6590 op0mode = mode0;
6591 op0 = gen_lowpart (mode0, op0);
6592 }
6593 if ((op1mode == V2HImode || op1mode == SImode || op1mode == VOIDmode)
6594 && (mode1 == V2HQmode || mode1 == HQmode || mode1 == SQmode))
6595 {
6596 op1mode = mode1;
6597 op1 = gen_lowpart (mode1, op1);
6598 }
6599 /* In case the insn wants input operands in modes different from
6600 the result, abort. */
6601 gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
6602 && (op1mode == mode1 || op1mode == VOIDmode));
6603
6604 if (! (*insn_data[icode].operand[1 + offs].predicate) (op0, mode0))
6605 op0 = copy_to_mode_reg (mode0, op0);
6606 if (! (*insn_data[icode].operand[2 + offs].predicate) (op1, mode1))
6607 op1 = copy_to_mode_reg (mode1, op1);
6608
6609 if (match_op)
6610 pat = GEN_FCN (icode) (target, target, op0, op1);
6611 else
6612 pat = GEN_FCN (icode) (target, op0, op1);
6613
6614 if (! pat)
6615 return 0;
6616
6617 emit_insn (pat);
6618
6619 return ret;
6620 }
6621
6622 /* Subroutine of c6x_expand_builtin to take care of unop insns. */
6623
6624 static rtx
c6x_expand_unop_builtin(enum insn_code icode,tree exp,rtx target)6625 c6x_expand_unop_builtin (enum insn_code icode, tree exp,
6626 rtx target)
6627 {
6628 rtx pat;
6629 tree arg0 = CALL_EXPR_ARG (exp, 0);
6630 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6631 machine_mode op0mode = GET_MODE (op0);
6632 machine_mode tmode = insn_data[icode].operand[0].mode;
6633 machine_mode mode0 = insn_data[icode].operand[1].mode;
6634
6635 if (! target
6636 || GET_MODE (target) != tmode
6637 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6638 target = gen_reg_rtx (tmode);
6639
6640 if (VECTOR_MODE_P (mode0))
6641 op0 = safe_vector_operand (op0, mode0);
6642
6643 if (op0mode == SImode && mode0 == HImode)
6644 {
6645 op0mode = HImode;
6646 op0 = gen_lowpart (HImode, op0);
6647 }
6648 gcc_assert (op0mode == mode0 || op0mode == VOIDmode);
6649
6650 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6651 op0 = copy_to_mode_reg (mode0, op0);
6652
6653 pat = GEN_FCN (icode) (target, op0);
6654 if (! pat)
6655 return 0;
6656 emit_insn (pat);
6657 return target;
6658 }
6659
6660 /* Expand an expression EXP that calls a built-in function,
6661 with result going to TARGET if that's convenient
6662 (and in mode MODE if that's convenient).
6663 SUBTARGET may be used as the target for computing one of EXP's operands.
6664 IGNORE is nonzero if the value is to be ignored. */
6665
6666 static rtx
c6x_expand_builtin(tree exp,rtx target ATTRIBUTE_UNUSED,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)6667 c6x_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
6668 rtx subtarget ATTRIBUTE_UNUSED,
6669 machine_mode mode ATTRIBUTE_UNUSED,
6670 int ignore ATTRIBUTE_UNUSED)
6671 {
6672 size_t i;
6673 const struct builtin_description *d;
6674 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6675 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6676
6677 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
6678 if (d->code == fcode)
6679 return c6x_expand_binop_builtin (d->icode, exp, target,
6680 fcode == C6X_BUILTIN_CLRR);
6681
6682 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
6683 if (d->code == fcode)
6684 return c6x_expand_unop_builtin (d->icode, exp, target);
6685
6686 gcc_unreachable ();
6687 }
6688
6689 /* Target unwind frame info is generated from dwarf CFI directives, so
6690 always output dwarf2 unwind info. */
6691
6692 static enum unwind_info_type
c6x_debug_unwind_info(void)6693 c6x_debug_unwind_info (void)
6694 {
6695 if (flag_unwind_tables || flag_exceptions)
6696 return UI_DWARF2;
6697
6698 return default_debug_unwind_info ();
6699 }
6700
6701 /* Target Structure. */
6702
6703 /* Initialize the GCC target structure. */
6704 #undef TARGET_FUNCTION_ARG
6705 #define TARGET_FUNCTION_ARG c6x_function_arg
6706 #undef TARGET_FUNCTION_ARG_ADVANCE
6707 #define TARGET_FUNCTION_ARG_ADVANCE c6x_function_arg_advance
6708 #undef TARGET_FUNCTION_ARG_BOUNDARY
6709 #define TARGET_FUNCTION_ARG_BOUNDARY c6x_function_arg_boundary
6710 #undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY
6711 #define TARGET_FUNCTION_ARG_ROUND_BOUNDARY \
6712 c6x_function_arg_round_boundary
6713 #undef TARGET_FUNCTION_VALUE_REGNO_P
6714 #define TARGET_FUNCTION_VALUE_REGNO_P c6x_function_value_regno_p
6715 #undef TARGET_FUNCTION_VALUE
6716 #define TARGET_FUNCTION_VALUE c6x_function_value
6717 #undef TARGET_LIBCALL_VALUE
6718 #define TARGET_LIBCALL_VALUE c6x_libcall_value
6719 #undef TARGET_RETURN_IN_MEMORY
6720 #define TARGET_RETURN_IN_MEMORY c6x_return_in_memory
6721 #undef TARGET_RETURN_IN_MSB
6722 #define TARGET_RETURN_IN_MSB c6x_return_in_msb
6723 #undef TARGET_PASS_BY_REFERENCE
6724 #define TARGET_PASS_BY_REFERENCE c6x_pass_by_reference
6725 #undef TARGET_CALLEE_COPIES
6726 #define TARGET_CALLEE_COPIES c6x_callee_copies
6727 #undef TARGET_STRUCT_VALUE_RTX
6728 #define TARGET_STRUCT_VALUE_RTX c6x_struct_value_rtx
6729 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
6730 #define TARGET_FUNCTION_OK_FOR_SIBCALL c6x_function_ok_for_sibcall
6731
6732 #undef TARGET_ASM_OUTPUT_MI_THUNK
6733 #define TARGET_ASM_OUTPUT_MI_THUNK c6x_output_mi_thunk
6734 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
6735 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK c6x_can_output_mi_thunk
6736
6737 #undef TARGET_BUILD_BUILTIN_VA_LIST
6738 #define TARGET_BUILD_BUILTIN_VA_LIST c6x_build_builtin_va_list
6739
6740 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
6741 #define TARGET_ASM_TRAMPOLINE_TEMPLATE c6x_asm_trampoline_template
6742 #undef TARGET_TRAMPOLINE_INIT
6743 #define TARGET_TRAMPOLINE_INIT c6x_initialize_trampoline
6744
6745 #undef TARGET_LEGITIMATE_CONSTANT_P
6746 #define TARGET_LEGITIMATE_CONSTANT_P c6x_legitimate_constant_p
6747 #undef TARGET_LEGITIMATE_ADDRESS_P
6748 #define TARGET_LEGITIMATE_ADDRESS_P c6x_legitimate_address_p
6749
6750 #undef TARGET_IN_SMALL_DATA_P
6751 #define TARGET_IN_SMALL_DATA_P c6x_in_small_data_p
6752 #undef TARGET_ASM_SELECT_RTX_SECTION
6753 #define TARGET_ASM_SELECT_RTX_SECTION c6x_select_rtx_section
6754 #undef TARGET_ASM_SELECT_SECTION
6755 #define TARGET_ASM_SELECT_SECTION c6x_elf_select_section
6756 #undef TARGET_ASM_UNIQUE_SECTION
6757 #define TARGET_ASM_UNIQUE_SECTION c6x_elf_unique_section
6758 #undef TARGET_SECTION_TYPE_FLAGS
6759 #define TARGET_SECTION_TYPE_FLAGS c6x_section_type_flags
6760 #undef TARGET_HAVE_SRODATA_SECTION
6761 #define TARGET_HAVE_SRODATA_SECTION true
6762 #undef TARGET_ASM_MERGEABLE_RODATA_PREFIX
6763 #define TARGET_ASM_MERGEABLE_RODATA_PREFIX ".const"
6764
6765 #undef TARGET_OPTION_OVERRIDE
6766 #define TARGET_OPTION_OVERRIDE c6x_option_override
6767 #undef TARGET_CONDITIONAL_REGISTER_USAGE
6768 #define TARGET_CONDITIONAL_REGISTER_USAGE c6x_conditional_register_usage
6769
6770 #undef TARGET_INIT_LIBFUNCS
6771 #define TARGET_INIT_LIBFUNCS c6x_init_libfuncs
6772 #undef TARGET_LIBFUNC_GNU_PREFIX
6773 #define TARGET_LIBFUNC_GNU_PREFIX true
6774
6775 #undef TARGET_SCALAR_MODE_SUPPORTED_P
6776 #define TARGET_SCALAR_MODE_SUPPORTED_P c6x_scalar_mode_supported_p
6777 #undef TARGET_VECTOR_MODE_SUPPORTED_P
6778 #define TARGET_VECTOR_MODE_SUPPORTED_P c6x_vector_mode_supported_p
6779 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
6780 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE c6x_preferred_simd_mode
6781
6782 #undef TARGET_RTX_COSTS
6783 #define TARGET_RTX_COSTS c6x_rtx_costs
6784
6785 #undef TARGET_SCHED_INIT
6786 #define TARGET_SCHED_INIT c6x_sched_init
6787 #undef TARGET_SCHED_SET_SCHED_FLAGS
6788 #define TARGET_SCHED_SET_SCHED_FLAGS c6x_set_sched_flags
6789 #undef TARGET_SCHED_ADJUST_COST
6790 #define TARGET_SCHED_ADJUST_COST c6x_adjust_cost
6791 #undef TARGET_SCHED_ISSUE_RATE
6792 #define TARGET_SCHED_ISSUE_RATE c6x_issue_rate
6793 #undef TARGET_SCHED_VARIABLE_ISSUE
6794 #define TARGET_SCHED_VARIABLE_ISSUE c6x_variable_issue
6795 #undef TARGET_SCHED_REORDER
6796 #define TARGET_SCHED_REORDER c6x_sched_reorder
6797 #undef TARGET_SCHED_REORDER2
6798 #define TARGET_SCHED_REORDER2 c6x_sched_reorder2
6799 #undef TARGET_SCHED_DFA_NEW_CYCLE
6800 #define TARGET_SCHED_DFA_NEW_CYCLE c6x_dfa_new_cycle
6801 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
6802 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN c6x_sched_dfa_pre_cycle_insn
6803 #undef TARGET_SCHED_EXPOSED_PIPELINE
6804 #define TARGET_SCHED_EXPOSED_PIPELINE true
6805
6806 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
6807 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT c6x_alloc_sched_context
6808 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
6809 #define TARGET_SCHED_INIT_SCHED_CONTEXT c6x_init_sched_context
6810 #undef TARGET_SCHED_SET_SCHED_CONTEXT
6811 #define TARGET_SCHED_SET_SCHED_CONTEXT c6x_set_sched_context
6812 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
6813 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT c6x_clear_sched_context
6814 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
6815 #define TARGET_SCHED_FREE_SCHED_CONTEXT c6x_free_sched_context
6816
6817 #undef TARGET_CAN_ELIMINATE
6818 #define TARGET_CAN_ELIMINATE c6x_can_eliminate
6819
6820 #undef TARGET_PREFERRED_RENAME_CLASS
6821 #define TARGET_PREFERRED_RENAME_CLASS c6x_preferred_rename_class
6822
6823 #undef TARGET_MACHINE_DEPENDENT_REORG
6824 #define TARGET_MACHINE_DEPENDENT_REORG c6x_reorg
6825
6826 #undef TARGET_ASM_FILE_START
6827 #define TARGET_ASM_FILE_START c6x_file_start
6828
6829 #undef TARGET_PRINT_OPERAND
6830 #define TARGET_PRINT_OPERAND c6x_print_operand
6831 #undef TARGET_PRINT_OPERAND_ADDRESS
6832 #define TARGET_PRINT_OPERAND_ADDRESS c6x_print_operand_address
6833 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
6834 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P c6x_print_operand_punct_valid_p
6835
6836 /* C6x unwinding tables use a different format for the typeinfo tables. */
6837 #undef TARGET_ASM_TTYPE
6838 #define TARGET_ASM_TTYPE c6x_output_ttype
6839
6840 /* The C6x ABI follows the ARM EABI exception handling rules. */
6841 #undef TARGET_ARM_EABI_UNWINDER
6842 #define TARGET_ARM_EABI_UNWINDER true
6843
6844 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
6845 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY c6x_asm_emit_except_personality
6846
6847 #undef TARGET_ASM_INIT_SECTIONS
6848 #define TARGET_ASM_INIT_SECTIONS c6x_asm_init_sections
6849
6850 #undef TARGET_DEBUG_UNWIND_INFO
6851 #define TARGET_DEBUG_UNWIND_INFO c6x_debug_unwind_info
6852
6853 #undef TARGET_DWARF_REGISTER_SPAN
6854 #define TARGET_DWARF_REGISTER_SPAN c6x_dwarf_register_span
6855
6856 #undef TARGET_INIT_BUILTINS
6857 #define TARGET_INIT_BUILTINS c6x_init_builtins
6858 #undef TARGET_EXPAND_BUILTIN
6859 #define TARGET_EXPAND_BUILTIN c6x_expand_builtin
6860 #undef TARGET_BUILTIN_DECL
6861 #define TARGET_BUILTIN_DECL c6x_builtin_decl
6862
6863 struct gcc_target targetm = TARGET_INITIALIZER;
6864
6865 #include "gt-c6x.h"
6866