1 /* Target Code for TI C6X
2 Copyright (C) 2010-2018 Free Software Foundation, Inc.
3 Contributed by Andrew Jenner <andrew@codesourcery.com>
4 Contributed by Bernd Schmidt <bernds@codesourcery.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #define IN_TARGET_CODE 1
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple-expr.h"
32 #include "cfghooks.h"
33 #include "df.h"
34 #include "memmodel.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "emit-rtl.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "stor-layout.h"
45 #include "varasm.h"
46 #include "calls.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "cfgrtl.h"
52 #include "sched-int.h"
53 #include "tm-constrs.h"
54 #include "langhooks.h"
55 #include "sel-sched.h"
56 #include "debug.h"
57 #include "hw-doloop.h"
58 #include "regrename.h"
59 #include "dumpfile.h"
60 #include "builtins.h"
61
62 /* This file should be included last. */
63 #include "target-def.h"
64
65 /* Table of supported architecture variants. */
66 typedef struct
67 {
68 const char *arch;
69 enum c6x_cpu_type type;
70 unsigned short features;
71 } c6x_arch_table;
72
73 /* A list of all ISAs, mapping each one to a representative device.
74 Used for -march selection. */
75 static const c6x_arch_table all_isas[] =
76 {
77 #define C6X_ISA(NAME,DEVICE,FLAGS) \
78 { NAME, DEVICE, FLAGS },
79 #include "c6x-isas.def"
80 #undef C6X_ISA
81 { NULL, C6X_CPU_C62X, 0 }
82 };
83
84 /* This is the parsed result of the "-march=" option, if given. */
85 enum c6x_cpu_type c6x_arch = C6X_DEFAULT_ARCH;
86
87 /* A mask of insn types that are allowed by the architecture selected by
88 the -march option. */
89 unsigned long c6x_insn_mask = C6X_DEFAULT_INSN_MASK;
90
91 /* The instruction that is being output (as obtained from FINAL_PRESCAN_INSN).
92 */
93 static rtx_insn *c6x_current_insn = NULL;
94
95 /* A decl we build to access __c6xabi_DSBT_base. */
96 static GTY(()) tree dsbt_decl;
97
98 /* Determines whether we run our final scheduling pass or not. We always
99 avoid the normal second scheduling pass. */
100 static int c6x_flag_schedule_insns2;
101
102 /* Determines whether we run variable tracking in machine dependent
103 reorganization. */
104 static int c6x_flag_var_tracking;
105
106 /* Determines whether we use modulo scheduling. */
107 static int c6x_flag_modulo_sched;
108
109 /* Record the state of flag_pic before we set it to 1 for DSBT. */
110 int c6x_initial_flag_pic;
111
112 typedef struct
113 {
114 /* We record the clock cycle for every insn during scheduling. */
115 int clock;
116 /* After scheduling, we run assign_reservations to choose unit
117 reservations for all insns. These are recorded here. */
118 int reservation;
119 /* Records the new condition for insns which must be made
120 conditional after scheduling. An entry of NULL_RTX means no such
121 change is necessary. */
122 rtx new_cond;
123 /* True for the first insn that was scheduled in an ebb. */
124 bool ebb_start;
125 /* The scheduler state after the insn, transformed into a mask of UNIT_QID
126 bits rather than storing the state. Meaningful only for the last
127 insn in a cycle. */
128 unsigned int unit_mask;
129 } c6x_sched_insn_info;
130
131
132 /* Record a c6x_sched_insn_info structure for every insn in the function. */
133 static vec<c6x_sched_insn_info> insn_info;
134
135 #define INSN_INFO_LENGTH (insn_info).length ()
136 #define INSN_INFO_ENTRY(N) (insn_info[(N)])
137
138 static bool done_cfi_sections;
139
140 #define RESERVATION_FLAG_D 1
141 #define RESERVATION_FLAG_L 2
142 #define RESERVATION_FLAG_S 4
143 #define RESERVATION_FLAG_M 8
144 #define RESERVATION_FLAG_DL (RESERVATION_FLAG_D | RESERVATION_FLAG_L)
145 #define RESERVATION_FLAG_DS (RESERVATION_FLAG_D | RESERVATION_FLAG_S)
146 #define RESERVATION_FLAG_LS (RESERVATION_FLAG_L | RESERVATION_FLAG_S)
147 #define RESERVATION_FLAG_DLS (RESERVATION_FLAG_D | RESERVATION_FLAG_LS)
148
149 /* The DFA names of the units. */
150 static const char *const c6x_unit_names[] =
151 {
152 "d1", "l1", "s1", "m1", "fps1", "fpl1", "adddps1", "adddpl1",
153 "d2", "l2", "s2", "m2", "fps2", "fpl2", "adddps2", "adddpl2"
154 };
155
156 /* The DFA unit number for each unit in c6x_unit_names[]. */
157 static int c6x_unit_codes[ARRAY_SIZE (c6x_unit_names)];
158
159 /* Unit query IDs. */
160 #define UNIT_QID_D1 0
161 #define UNIT_QID_L1 1
162 #define UNIT_QID_S1 2
163 #define UNIT_QID_M1 3
164 #define UNIT_QID_FPS1 4
165 #define UNIT_QID_FPL1 5
166 #define UNIT_QID_ADDDPS1 6
167 #define UNIT_QID_ADDDPL1 7
168 #define UNIT_QID_SIDE_OFFSET 8
169
170 #define RESERVATION_S1 2
171 #define RESERVATION_S2 10
172
173 /* An enum for the unit requirements we count in the UNIT_REQS table. */
174 enum unitreqs
175 {
176 UNIT_REQ_D,
177 UNIT_REQ_L,
178 UNIT_REQ_S,
179 UNIT_REQ_M,
180 UNIT_REQ_DL,
181 UNIT_REQ_DS,
182 UNIT_REQ_LS,
183 UNIT_REQ_DLS,
184 UNIT_REQ_T,
185 UNIT_REQ_X,
186 UNIT_REQ_MAX
187 };
188
189 /* A table used to count unit requirements. Used when computing minimum
190 iteration intervals. */
191 typedef int unit_req_table[2][UNIT_REQ_MAX];
192 static unit_req_table unit_reqs;
193
194 /* Register map for debugging. */
195 unsigned const dbx_register_map[FIRST_PSEUDO_REGISTER] =
196 {
197 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* A0 - A15. */
198 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, /* A16 - A32. */
199 50, 51, 52,
200 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, /* B0 - B15. */
201 29, 30, 31,
202 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, /* B16 - B32. */
203 66, 67, 68,
204 -1, -1, -1 /* FP, ARGP, ILC. */
205 };
206
207 /* Allocate a new, cleared machine_function structure. */
208
209 static struct machine_function *
c6x_init_machine_status(void)210 c6x_init_machine_status (void)
211 {
212 return ggc_cleared_alloc<machine_function> ();
213 }
214
215 /* Implement TARGET_OPTION_OVERRIDE. */
216
217 static void
c6x_option_override(void)218 c6x_option_override (void)
219 {
220 unsigned i;
221
222 if (global_options_set.x_c6x_arch_option)
223 {
224 c6x_arch = all_isas[c6x_arch_option].type;
225 c6x_insn_mask &= ~C6X_INSNS_ALL_CPU_BITS;
226 c6x_insn_mask |= all_isas[c6x_arch_option].features;
227 }
228
229 c6x_flag_schedule_insns2 = flag_schedule_insns_after_reload;
230 flag_schedule_insns_after_reload = 0;
231
232 c6x_flag_modulo_sched = flag_modulo_sched;
233 flag_modulo_sched = 0;
234
235 init_machine_status = c6x_init_machine_status;
236
237 for (i = 0; i < ARRAY_SIZE (c6x_unit_names); i++)
238 c6x_unit_codes[i] = get_cpu_unit_code (c6x_unit_names[i]);
239
240 if (flag_pic && !TARGET_DSBT)
241 {
242 error ("-fpic and -fPIC not supported without -mdsbt on this target");
243 flag_pic = 0;
244 }
245 c6x_initial_flag_pic = flag_pic;
246 if (TARGET_DSBT && !flag_pic)
247 flag_pic = 1;
248 }
249
250
251 /* Implement the TARGET_CONDITIONAL_REGISTER_USAGE hook. */
252
253 static void
c6x_conditional_register_usage(void)254 c6x_conditional_register_usage (void)
255 {
256 int i;
257 if (c6x_arch == C6X_CPU_C62X || c6x_arch == C6X_CPU_C67X)
258 for (i = 16; i < 32; i++)
259 {
260 fixed_regs[i] = 1;
261 fixed_regs[32 + i] = 1;
262 }
263 if (TARGET_INSNS_64)
264 {
265 SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_A_REGS],
266 REG_A0);
267 SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_REGS],
268 REG_A0);
269 CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_A_REGS],
270 REG_A0);
271 CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_REGS],
272 REG_A0);
273 }
274 }
275
276 static GTY(()) rtx eqdf_libfunc;
277 static GTY(()) rtx nedf_libfunc;
278 static GTY(()) rtx ledf_libfunc;
279 static GTY(()) rtx ltdf_libfunc;
280 static GTY(()) rtx gedf_libfunc;
281 static GTY(()) rtx gtdf_libfunc;
282 static GTY(()) rtx eqsf_libfunc;
283 static GTY(()) rtx nesf_libfunc;
284 static GTY(()) rtx lesf_libfunc;
285 static GTY(()) rtx ltsf_libfunc;
286 static GTY(()) rtx gesf_libfunc;
287 static GTY(()) rtx gtsf_libfunc;
288 static GTY(()) rtx strasgi_libfunc;
289 static GTY(()) rtx strasgi64p_libfunc;
290
291 /* Implement the TARGET_INIT_LIBFUNCS macro. We use this to rename library
292 functions to match the C6x ABI. */
293
294 static void
c6x_init_libfuncs(void)295 c6x_init_libfuncs (void)
296 {
297 /* Double-precision floating-point arithmetic. */
298 set_optab_libfunc (add_optab, DFmode, "__c6xabi_addd");
299 set_optab_libfunc (sdiv_optab, DFmode, "__c6xabi_divd");
300 set_optab_libfunc (smul_optab, DFmode, "__c6xabi_mpyd");
301 set_optab_libfunc (neg_optab, DFmode, "__c6xabi_negd");
302 set_optab_libfunc (sub_optab, DFmode, "__c6xabi_subd");
303
304 /* Single-precision floating-point arithmetic. */
305 set_optab_libfunc (add_optab, SFmode, "__c6xabi_addf");
306 set_optab_libfunc (sdiv_optab, SFmode, "__c6xabi_divf");
307 set_optab_libfunc (smul_optab, SFmode, "__c6xabi_mpyf");
308 set_optab_libfunc (neg_optab, SFmode, "__c6xabi_negf");
309 set_optab_libfunc (sub_optab, SFmode, "__c6xabi_subf");
310
311 /* Floating-point comparisons. */
312 eqsf_libfunc = init_one_libfunc ("__c6xabi_eqf");
313 nesf_libfunc = init_one_libfunc ("__c6xabi_neqf");
314 lesf_libfunc = init_one_libfunc ("__c6xabi_lef");
315 ltsf_libfunc = init_one_libfunc ("__c6xabi_ltf");
316 gesf_libfunc = init_one_libfunc ("__c6xabi_gef");
317 gtsf_libfunc = init_one_libfunc ("__c6xabi_gtf");
318 eqdf_libfunc = init_one_libfunc ("__c6xabi_eqd");
319 nedf_libfunc = init_one_libfunc ("__c6xabi_neqd");
320 ledf_libfunc = init_one_libfunc ("__c6xabi_led");
321 ltdf_libfunc = init_one_libfunc ("__c6xabi_ltd");
322 gedf_libfunc = init_one_libfunc ("__c6xabi_ged");
323 gtdf_libfunc = init_one_libfunc ("__c6xabi_gtd");
324
325 set_optab_libfunc (eq_optab, SFmode, NULL);
326 set_optab_libfunc (ne_optab, SFmode, "__c6xabi_neqf");
327 set_optab_libfunc (gt_optab, SFmode, NULL);
328 set_optab_libfunc (ge_optab, SFmode, NULL);
329 set_optab_libfunc (lt_optab, SFmode, NULL);
330 set_optab_libfunc (le_optab, SFmode, NULL);
331 set_optab_libfunc (unord_optab, SFmode, "__c6xabi_unordf");
332 set_optab_libfunc (eq_optab, DFmode, NULL);
333 set_optab_libfunc (ne_optab, DFmode, "__c6xabi_neqd");
334 set_optab_libfunc (gt_optab, DFmode, NULL);
335 set_optab_libfunc (ge_optab, DFmode, NULL);
336 set_optab_libfunc (lt_optab, DFmode, NULL);
337 set_optab_libfunc (le_optab, DFmode, NULL);
338 set_optab_libfunc (unord_optab, DFmode, "__c6xabi_unordd");
339
340 /* Floating-point to integer conversions. */
341 set_conv_libfunc (sfix_optab, SImode, DFmode, "__c6xabi_fixdi");
342 set_conv_libfunc (ufix_optab, SImode, DFmode, "__c6xabi_fixdu");
343 set_conv_libfunc (sfix_optab, DImode, DFmode, "__c6xabi_fixdlli");
344 set_conv_libfunc (ufix_optab, DImode, DFmode, "__c6xabi_fixdull");
345 set_conv_libfunc (sfix_optab, SImode, SFmode, "__c6xabi_fixfi");
346 set_conv_libfunc (ufix_optab, SImode, SFmode, "__c6xabi_fixfu");
347 set_conv_libfunc (sfix_optab, DImode, SFmode, "__c6xabi_fixflli");
348 set_conv_libfunc (ufix_optab, DImode, SFmode, "__c6xabi_fixfull");
349
350 /* Conversions between floating types. */
351 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__c6xabi_cvtdf");
352 set_conv_libfunc (sext_optab, DFmode, SFmode, "__c6xabi_cvtfd");
353
354 /* Integer to floating-point conversions. */
355 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__c6xabi_fltid");
356 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__c6xabi_fltud");
357 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__c6xabi_fltllid");
358 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__c6xabi_fltulld");
359 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__c6xabi_fltif");
360 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__c6xabi_fltuf");
361 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__c6xabi_fltllif");
362 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__c6xabi_fltullf");
363
364 /* Long long. */
365 set_optab_libfunc (smul_optab, DImode, "__c6xabi_mpyll");
366 set_optab_libfunc (ashl_optab, DImode, "__c6xabi_llshl");
367 set_optab_libfunc (lshr_optab, DImode, "__c6xabi_llshru");
368 set_optab_libfunc (ashr_optab, DImode, "__c6xabi_llshr");
369
370 set_optab_libfunc (sdiv_optab, SImode, "__c6xabi_divi");
371 set_optab_libfunc (udiv_optab, SImode, "__c6xabi_divu");
372 set_optab_libfunc (smod_optab, SImode, "__c6xabi_remi");
373 set_optab_libfunc (umod_optab, SImode, "__c6xabi_remu");
374 set_optab_libfunc (sdivmod_optab, SImode, "__c6xabi_divremi");
375 set_optab_libfunc (udivmod_optab, SImode, "__c6xabi_divremu");
376 set_optab_libfunc (sdiv_optab, DImode, "__c6xabi_divlli");
377 set_optab_libfunc (udiv_optab, DImode, "__c6xabi_divull");
378 set_optab_libfunc (smod_optab, DImode, "__c6xabi_remlli");
379 set_optab_libfunc (umod_optab, DImode, "__c6xabi_remull");
380 set_optab_libfunc (udivmod_optab, DImode, "__c6xabi_divremull");
381
382 /* Block move. */
383 strasgi_libfunc = init_one_libfunc ("__c6xabi_strasgi");
384 strasgi64p_libfunc = init_one_libfunc ("__c6xabi_strasgi_64plus");
385 }
386
387 /* Begin the assembly file. */
388
389 static void
c6x_file_start(void)390 c6x_file_start (void)
391 {
392 /* Variable tracking should be run after all optimizations which change order
393 of insns. It also needs a valid CFG. This can't be done in
394 c6x_override_options, because flag_var_tracking is finalized after
395 that. */
396 c6x_flag_var_tracking = flag_var_tracking;
397 flag_var_tracking = 0;
398
399 done_cfi_sections = false;
400 default_file_start ();
401
402 /* Arrays are aligned to 8-byte boundaries. */
403 asm_fprintf (asm_out_file,
404 "\t.c6xabi_attribute Tag_ABI_array_object_alignment, 0\n");
405 asm_fprintf (asm_out_file,
406 "\t.c6xabi_attribute Tag_ABI_array_object_align_expected, 0\n");
407
408 /* Stack alignment is 8 bytes. */
409 asm_fprintf (asm_out_file,
410 "\t.c6xabi_attribute Tag_ABI_stack_align_needed, 0\n");
411 asm_fprintf (asm_out_file,
412 "\t.c6xabi_attribute Tag_ABI_stack_align_preserved, 0\n");
413
414 #if 0 /* FIXME: Reenable when TI's tools are fixed. */
415 /* ??? Ideally we'd check flag_short_wchar somehow. */
416 asm_fprintf (asm_out_file, "\t.c6xabi_attribute Tag_ABI_wchar_t, %d\n", 2);
417 #endif
418
419 /* We conform to version 1.0 of the ABI. */
420 asm_fprintf (asm_out_file,
421 "\t.c6xabi_attribute Tag_ABI_conformance, \"1.0\"\n");
422
423 }
424
425 /* The LTO frontend only enables exceptions when it sees a function that
426 uses it. This changes the return value of dwarf2out_do_frame, so we
427 have to check before every function. */
428
429 void
c6x_output_file_unwind(FILE * f)430 c6x_output_file_unwind (FILE * f)
431 {
432 if (done_cfi_sections)
433 return;
434
435 /* Output a .cfi_sections directive. */
436 if (dwarf2out_do_frame ())
437 {
438 if (flag_unwind_tables || flag_exceptions)
439 {
440 if (write_symbols == DWARF2_DEBUG
441 || write_symbols == VMS_AND_DWARF2_DEBUG)
442 asm_fprintf (f, "\t.cfi_sections .debug_frame, .c6xabi.exidx\n");
443 else
444 asm_fprintf (f, "\t.cfi_sections .c6xabi.exidx\n");
445 }
446 else
447 asm_fprintf (f, "\t.cfi_sections .debug_frame\n");
448 done_cfi_sections = true;
449 }
450 }
451
452 /* Output unwind directives at the end of a function. */
453
454 static void
c6x_output_fn_unwind(FILE * f)455 c6x_output_fn_unwind (FILE * f)
456 {
457 /* Return immediately if we are not generating unwinding tables. */
458 if (! (flag_unwind_tables || flag_exceptions))
459 return;
460
461 /* If this function will never be unwound, then mark it as such. */
462 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
463 && (TREE_NOTHROW (current_function_decl)
464 || crtl->all_throwers_are_sibcalls))
465 fputs("\t.cantunwind\n", f);
466
467 fputs ("\t.endp\n", f);
468 }
469
470
471 /* Stack and Calling. */
472
473 int argument_registers[10] =
474 {
475 REG_A4, REG_B4,
476 REG_A6, REG_B6,
477 REG_A8, REG_B8,
478 REG_A10, REG_B10,
479 REG_A12, REG_B12
480 };
481
482 /* Implements the macro INIT_CUMULATIVE_ARGS defined in c6x.h. */
483
484 void
c6x_init_cumulative_args(CUMULATIVE_ARGS * cum,const_tree fntype,rtx libname,int n_named_args ATTRIBUTE_UNUSED)485 c6x_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype, rtx libname,
486 int n_named_args ATTRIBUTE_UNUSED)
487 {
488 cum->count = 0;
489 cum->nregs = 10;
490 if (!libname && fntype)
491 {
492 /* We need to find out the number of named arguments. Unfortunately,
493 for incoming arguments, N_NAMED_ARGS is set to -1. */
494 if (stdarg_p (fntype))
495 cum->nregs = type_num_arguments (fntype) - 1;
496 if (cum->nregs > 10)
497 cum->nregs = 10;
498 }
499 }
500
501 /* Implements the macro FUNCTION_ARG defined in c6x.h. */
502
503 static rtx
c6x_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)504 c6x_function_arg (cumulative_args_t cum_v, machine_mode mode,
505 const_tree type, bool named ATTRIBUTE_UNUSED)
506 {
507 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
508 if (cum->count >= cum->nregs)
509 return NULL_RTX;
510 if (type)
511 {
512 HOST_WIDE_INT size = int_size_in_bytes (type);
513 if (TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (type))
514 {
515 if (size > 4)
516 {
517 rtx reg1 = gen_rtx_REG (SImode, argument_registers[cum->count] + 1);
518 rtx reg2 = gen_rtx_REG (SImode, argument_registers[cum->count]);
519 rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx),
520 gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4)));
521 return gen_rtx_PARALLEL (mode, vec);
522 }
523 }
524 }
525 return gen_rtx_REG (mode, argument_registers[cum->count]);
526 }
527
528 static void
c6x_function_arg_advance(cumulative_args_t cum_v,machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,bool named ATTRIBUTE_UNUSED)529 c6x_function_arg_advance (cumulative_args_t cum_v,
530 machine_mode mode ATTRIBUTE_UNUSED,
531 const_tree type ATTRIBUTE_UNUSED,
532 bool named ATTRIBUTE_UNUSED)
533 {
534 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
535 cum->count++;
536 }
537
538
539 /* Return true if BLOCK_REG_PADDING (MODE, TYPE, FIRST) should return
540 upward rather than downward. */
541
542 bool
c6x_block_reg_pad_upward(machine_mode mode ATTRIBUTE_UNUSED,const_tree type,bool first)543 c6x_block_reg_pad_upward (machine_mode mode ATTRIBUTE_UNUSED,
544 const_tree type, bool first)
545 {
546 HOST_WIDE_INT size;
547
548 if (!TARGET_BIG_ENDIAN)
549 return true;
550 if (!first)
551 return true;
552 if (!type)
553 return true;
554 size = int_size_in_bytes (type);
555 return size == 3;
556 }
557
558 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. */
559
560 static unsigned int
c6x_function_arg_boundary(machine_mode mode,const_tree type)561 c6x_function_arg_boundary (machine_mode mode, const_tree type)
562 {
563 unsigned int boundary = type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode);
564
565 if (boundary > BITS_PER_WORD)
566 return 2 * BITS_PER_WORD;
567
568 if (mode == BLKmode)
569 {
570 HOST_WIDE_INT size = int_size_in_bytes (type);
571 if (size > 4)
572 return 2 * BITS_PER_WORD;
573 if (boundary < BITS_PER_WORD)
574 {
575 if (size >= 3)
576 return BITS_PER_WORD;
577 if (size >= 2)
578 return 2 * BITS_PER_UNIT;
579 }
580 }
581 return boundary;
582 }
583
584 /* Implement TARGET_FUNCTION_ARG_ROUND_BOUNDARY. */
585 static unsigned int
c6x_function_arg_round_boundary(machine_mode mode,const_tree type)586 c6x_function_arg_round_boundary (machine_mode mode, const_tree type)
587 {
588 return c6x_function_arg_boundary (mode, type);
589 }
590
591 /* TARGET_FUNCTION_VALUE implementation. Returns an RTX representing the place
592 where function FUNC returns or receives a value of data type TYPE. */
593
594 static rtx
c6x_function_value(const_tree type,const_tree func ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)595 c6x_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED,
596 bool outgoing ATTRIBUTE_UNUSED)
597 {
598 /* Functions return values in register A4. When returning aggregates, we may
599 have to adjust for endianness. */
600 if (TARGET_BIG_ENDIAN && type && AGGREGATE_TYPE_P (type))
601 {
602 HOST_WIDE_INT size = int_size_in_bytes (type);
603 if (size > 4)
604 {
605
606 rtx reg1 = gen_rtx_REG (SImode, REG_A4 + 1);
607 rtx reg2 = gen_rtx_REG (SImode, REG_A4);
608 rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx),
609 gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4)));
610 return gen_rtx_PARALLEL (TYPE_MODE (type), vec);
611 }
612 }
613 return gen_rtx_REG (TYPE_MODE (type), REG_A4);
614 }
615
616 /* Implement TARGET_LIBCALL_VALUE. */
617
618 static rtx
c6x_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)619 c6x_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
620 {
621 return gen_rtx_REG (mode, REG_A4);
622 }
623
624 /* TARGET_STRUCT_VALUE_RTX implementation. */
625
626 static rtx
c6x_struct_value_rtx(tree type ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)627 c6x_struct_value_rtx (tree type ATTRIBUTE_UNUSED, int incoming ATTRIBUTE_UNUSED)
628 {
629 return gen_rtx_REG (Pmode, REG_A3);
630 }
631
632 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
633
634 static bool
c6x_function_value_regno_p(const unsigned int regno)635 c6x_function_value_regno_p (const unsigned int regno)
636 {
637 return regno == REG_A4;
638 }
639
640 /* Types larger than 64 bit, and variable sized types, are passed by
641 reference. The callee must copy them; see c6x_callee_copies. */
642
643 static bool
c6x_pass_by_reference(cumulative_args_t cum_v ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)644 c6x_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
645 machine_mode mode, const_tree type,
646 bool named ATTRIBUTE_UNUSED)
647 {
648 int size = -1;
649 if (type)
650 size = int_size_in_bytes (type);
651 else if (mode != VOIDmode)
652 size = GET_MODE_SIZE (mode);
653 return size > 2 * UNITS_PER_WORD || size == -1;
654 }
655
656 /* Decide whether a type should be returned in memory (true)
657 or in a register (false). This is called by the macro
658 TARGET_RETURN_IN_MEMORY. */
659
660 static bool
c6x_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)661 c6x_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
662 {
663 int size = int_size_in_bytes (type);
664 return size > 2 * UNITS_PER_WORD || size == -1;
665 }
666
667 /* Values which must be returned in the most-significant end of the return
668 register. */
669
670 static bool
c6x_return_in_msb(const_tree valtype)671 c6x_return_in_msb (const_tree valtype)
672 {
673 HOST_WIDE_INT size = int_size_in_bytes (valtype);
674 return TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (valtype) && size == 3;
675 }
676
677 /* Implement TARGET_CALLEE_COPIES. */
678
679 static bool
c6x_callee_copies(cumulative_args_t cum_v ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,bool named ATTRIBUTE_UNUSED)680 c6x_callee_copies (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
681 machine_mode mode ATTRIBUTE_UNUSED,
682 const_tree type ATTRIBUTE_UNUSED,
683 bool named ATTRIBUTE_UNUSED)
684 {
685 return true;
686 }
687
688 /* Return the type to use as __builtin_va_list. */
689 static tree
c6x_build_builtin_va_list(void)690 c6x_build_builtin_va_list (void)
691 {
692 return build_pointer_type (char_type_node);
693 }
694
695 static void
c6x_asm_trampoline_template(FILE * f)696 c6x_asm_trampoline_template (FILE *f)
697 {
698 fprintf (f, "\t.long\t0x0000002b\n"); /* mvkl .s2 fnlow,B0 */
699 fprintf (f, "\t.long\t0x01000028\n"); /* || mvkl .s1 sclow,A2 */
700 fprintf (f, "\t.long\t0x0000006b\n"); /* mvkh .s2 fnhigh,B0 */
701 fprintf (f, "\t.long\t0x01000068\n"); /* || mvkh .s1 schigh,A2 */
702 fprintf (f, "\t.long\t0x00000362\n"); /* b .s2 B0 */
703 fprintf (f, "\t.long\t0x00008000\n"); /* nop 5 */
704 fprintf (f, "\t.long\t0x00000000\n"); /* nop */
705 fprintf (f, "\t.long\t0x00000000\n"); /* nop */
706 }
707
708 /* Emit RTL insns to initialize the variable parts of a trampoline at
709 TRAMP. FNADDR is an RTX for the address of the function's pure
710 code. CXT is an RTX for the static chain value for the function. */
711
712 static void
c6x_initialize_trampoline(rtx tramp,tree fndecl,rtx cxt)713 c6x_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
714 {
715 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
716 rtx t1 = copy_to_reg (fnaddr);
717 rtx t2 = copy_to_reg (cxt);
718 rtx mask = gen_reg_rtx (SImode);
719 int i;
720
721 emit_block_move (tramp, assemble_trampoline_template (),
722 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
723
724 emit_move_insn (mask, GEN_INT (0xffff << 7));
725
726 for (i = 0; i < 4; i++)
727 {
728 rtx mem = adjust_address (tramp, SImode, i * 4);
729 rtx t = (i & 1) ? t2 : t1;
730 rtx v1 = gen_reg_rtx (SImode);
731 rtx v2 = gen_reg_rtx (SImode);
732 emit_move_insn (v1, mem);
733 if (i < 2)
734 emit_insn (gen_ashlsi3 (v2, t, GEN_INT (7)));
735 else
736 emit_insn (gen_lshrsi3 (v2, t, GEN_INT (9)));
737 emit_insn (gen_andsi3 (v2, v2, mask));
738 emit_insn (gen_iorsi3 (v2, v2, v1));
739 emit_move_insn (mem, v2);
740 }
741 #ifdef CLEAR_INSN_CACHE
742 tramp = XEXP (tramp, 0);
743 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__gnu_clear_cache"),
744 LCT_NORMAL, VOIDmode, tramp, Pmode,
745 plus_constant (Pmode, tramp, TRAMPOLINE_SIZE), Pmode);
746 #endif
747 }
748
749 /* Determine whether c6x_output_mi_thunk can succeed. */
750
751 static bool
c6x_can_output_mi_thunk(const_tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta ATTRIBUTE_UNUSED,HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,const_tree function ATTRIBUTE_UNUSED)752 c6x_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
753 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
754 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
755 const_tree function ATTRIBUTE_UNUSED)
756 {
757 return !TARGET_LONG_CALLS;
758 }
759
760 /* Output the assembler code for a thunk function. THUNK is the
761 declaration for the thunk function itself, FUNCTION is the decl for
762 the target function. DELTA is an immediate constant offset to be
763 added to THIS. If VCALL_OFFSET is nonzero, the word at
764 *(*this + vcall_offset) should be added to THIS. */
765
766 static void
c6x_output_mi_thunk(FILE * file ATTRIBUTE_UNUSED,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)767 c6x_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
768 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
769 HOST_WIDE_INT vcall_offset, tree function)
770 {
771 rtx xops[5];
772 /* The this parameter is passed as the first argument. */
773 rtx this_rtx = gen_rtx_REG (Pmode, REG_A4);
774
775 c6x_current_insn = NULL;
776
777 xops[4] = XEXP (DECL_RTL (function), 0);
778 if (!vcall_offset)
779 {
780 output_asm_insn ("b .s2 \t%4", xops);
781 if (!delta)
782 output_asm_insn ("nop 5", xops);
783 }
784
785 /* Adjust the this parameter by a fixed constant. */
786 if (delta)
787 {
788 xops[0] = GEN_INT (delta);
789 xops[1] = this_rtx;
790 if (delta >= -16 && delta <= 15)
791 {
792 output_asm_insn ("add .s1 %0, %1, %1", xops);
793 if (!vcall_offset)
794 output_asm_insn ("nop 4", xops);
795 }
796 else if (delta >= 16 && delta < 32)
797 {
798 output_asm_insn ("add .d1 %0, %1, %1", xops);
799 if (!vcall_offset)
800 output_asm_insn ("nop 4", xops);
801 }
802 else if (delta >= -32768 && delta < 32768)
803 {
804 output_asm_insn ("mvk .s1 %0, A0", xops);
805 output_asm_insn ("add .d1 %1, A0, %1", xops);
806 if (!vcall_offset)
807 output_asm_insn ("nop 3", xops);
808 }
809 else
810 {
811 output_asm_insn ("mvkl .s1 %0, A0", xops);
812 output_asm_insn ("mvkh .s1 %0, A0", xops);
813 output_asm_insn ("add .d1 %1, A0, %1", xops);
814 if (!vcall_offset)
815 output_asm_insn ("nop 3", xops);
816 }
817 }
818
819 /* Adjust the this parameter by a value stored in the vtable. */
820 if (vcall_offset)
821 {
822 rtx a0tmp = gen_rtx_REG (Pmode, REG_A0);
823 rtx a3tmp = gen_rtx_REG (Pmode, REG_A3);
824
825 xops[1] = a3tmp;
826 xops[2] = a0tmp;
827 xops[3] = gen_rtx_MEM (Pmode, a0tmp);
828 output_asm_insn ("mv .s1 a4, %2", xops);
829 output_asm_insn ("ldw .d1t1 %3, %2", xops);
830
831 /* Adjust the this parameter. */
832 xops[0] = gen_rtx_MEM (Pmode, plus_constant (Pmode, a0tmp,
833 vcall_offset));
834 if (!memory_operand (xops[0], Pmode))
835 {
836 rtx tmp2 = gen_rtx_REG (Pmode, REG_A1);
837 xops[0] = GEN_INT (vcall_offset);
838 xops[1] = tmp2;
839 output_asm_insn ("mvkl .s1 %0, %1", xops);
840 output_asm_insn ("mvkh .s1 %0, %1", xops);
841 output_asm_insn ("nop 2", xops);
842 output_asm_insn ("add .d1 %2, %1, %2", xops);
843 xops[0] = gen_rtx_MEM (Pmode, a0tmp);
844 }
845 else
846 output_asm_insn ("nop 4", xops);
847 xops[2] = this_rtx;
848 output_asm_insn ("ldw .d1t1 %0, %1", xops);
849 output_asm_insn ("|| b .s2 \t%4", xops);
850 output_asm_insn ("nop 4", xops);
851 output_asm_insn ("add .d1 %2, %1, %2", xops);
852 }
853 }
854
855 /* Return true if EXP goes in small data/bss. */
856
857 static bool
c6x_in_small_data_p(const_tree exp)858 c6x_in_small_data_p (const_tree exp)
859 {
860 /* We want to merge strings, so we never consider them small data. */
861 if (TREE_CODE (exp) == STRING_CST)
862 return false;
863
864 /* Functions are never small data. */
865 if (TREE_CODE (exp) == FUNCTION_DECL)
866 return false;
867
868 if (TREE_CODE (exp) == VAR_DECL && DECL_WEAK (exp))
869 return false;
870
871 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
872 {
873 const char *section = DECL_SECTION_NAME (exp);
874
875 if (strcmp (section, ".neardata") == 0
876 || strncmp (section, ".neardata.", 10) == 0
877 || strncmp (section, ".gnu.linkonce.s.", 16) == 0
878 || strcmp (section, ".bss") == 0
879 || strncmp (section, ".bss.", 5) == 0
880 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0
881 || strcmp (section, ".rodata") == 0
882 || strncmp (section, ".rodata.", 8) == 0
883 || strncmp (section, ".gnu.linkonce.s2.", 17) == 0)
884 return true;
885 }
886 else
887 return PLACE_IN_SDATA_P (exp);
888
889 return false;
890 }
891
892 /* Return a section for X. The only special thing we do here is to
893 honor small data. We don't have a tree type, so we can't use the
894 PLACE_IN_SDATA_P macro we use everywhere else; we choose to place
895 everything sized 8 bytes or smaller into small data. */
896
897 static section *
c6x_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)898 c6x_select_rtx_section (machine_mode mode, rtx x,
899 unsigned HOST_WIDE_INT align)
900 {
901 if (c6x_sdata_mode == C6X_SDATA_ALL
902 || (c6x_sdata_mode != C6X_SDATA_NONE && GET_MODE_SIZE (mode) <= 8))
903 /* ??? Consider using mergeable sdata sections. */
904 return sdata_section;
905 else
906 return default_elf_select_rtx_section (mode, x, align);
907 }
908
909 static section *
c6x_elf_select_section(tree decl,int reloc,unsigned HOST_WIDE_INT align)910 c6x_elf_select_section (tree decl, int reloc,
911 unsigned HOST_WIDE_INT align)
912 {
913 const char *sname = NULL;
914 unsigned int flags = SECTION_WRITE;
915 if (c6x_in_small_data_p (decl))
916 {
917 switch (categorize_decl_for_section (decl, reloc))
918 {
919 case SECCAT_SRODATA:
920 sname = ".rodata";
921 flags = 0;
922 break;
923 case SECCAT_SDATA:
924 sname = ".neardata";
925 break;
926 case SECCAT_SBSS:
927 sname = ".bss";
928 flags |= SECTION_BSS;
929 default:
930 break;
931 }
932 }
933 else
934 {
935 switch (categorize_decl_for_section (decl, reloc))
936 {
937 case SECCAT_DATA:
938 sname = ".fardata";
939 break;
940 case SECCAT_DATA_REL:
941 sname = ".fardata.rel";
942 break;
943 case SECCAT_DATA_REL_LOCAL:
944 sname = ".fardata.rel.local";
945 break;
946 case SECCAT_DATA_REL_RO:
947 sname = ".fardata.rel.ro";
948 break;
949 case SECCAT_DATA_REL_RO_LOCAL:
950 sname = ".fardata.rel.ro.local";
951 break;
952 case SECCAT_BSS:
953 sname = ".far";
954 flags |= SECTION_BSS;
955 break;
956 case SECCAT_RODATA:
957 sname = ".const";
958 flags = 0;
959 break;
960 case SECCAT_SRODATA:
961 case SECCAT_SDATA:
962 case SECCAT_SBSS:
963 gcc_unreachable ();
964 default:
965 break;
966 }
967 }
968 if (sname)
969 {
970 /* We might get called with string constants, but get_named_section
971 doesn't like them as they are not DECLs. Also, we need to set
972 flags in that case. */
973 if (!DECL_P (decl))
974 return get_section (sname, flags, NULL);
975 return get_named_section (decl, sname, reloc);
976 }
977
978 return default_elf_select_section (decl, reloc, align);
979 }
980
981 /* Build up a unique section name, expressed as a
982 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
983 RELOC indicates whether the initial value of EXP requires
984 link-time relocations. */
985
986 static void ATTRIBUTE_UNUSED
c6x_elf_unique_section(tree decl,int reloc)987 c6x_elf_unique_section (tree decl, int reloc)
988 {
989 const char *prefix = NULL;
990 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
991 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
992
993 if (c6x_in_small_data_p (decl))
994 {
995 switch (categorize_decl_for_section (decl, reloc))
996 {
997 case SECCAT_SDATA:
998 prefix = one_only ? ".s" : ".neardata";
999 break;
1000 case SECCAT_SBSS:
1001 prefix = one_only ? ".sb" : ".bss";
1002 break;
1003 case SECCAT_SRODATA:
1004 prefix = one_only ? ".s2" : ".rodata";
1005 break;
1006 case SECCAT_RODATA_MERGE_STR:
1007 case SECCAT_RODATA_MERGE_STR_INIT:
1008 case SECCAT_RODATA_MERGE_CONST:
1009 case SECCAT_RODATA:
1010 case SECCAT_DATA:
1011 case SECCAT_DATA_REL:
1012 case SECCAT_DATA_REL_LOCAL:
1013 case SECCAT_DATA_REL_RO:
1014 case SECCAT_DATA_REL_RO_LOCAL:
1015 gcc_unreachable ();
1016 default:
1017 /* Everything else we place into default sections and hope for the
1018 best. */
1019 break;
1020 }
1021 }
1022 else
1023 {
1024 switch (categorize_decl_for_section (decl, reloc))
1025 {
1026 case SECCAT_DATA:
1027 case SECCAT_DATA_REL:
1028 case SECCAT_DATA_REL_LOCAL:
1029 case SECCAT_DATA_REL_RO:
1030 case SECCAT_DATA_REL_RO_LOCAL:
1031 prefix = one_only ? ".fd" : ".fardata";
1032 break;
1033 case SECCAT_BSS:
1034 prefix = one_only ? ".fb" : ".far";
1035 break;
1036 case SECCAT_RODATA:
1037 case SECCAT_RODATA_MERGE_STR:
1038 case SECCAT_RODATA_MERGE_STR_INIT:
1039 case SECCAT_RODATA_MERGE_CONST:
1040 prefix = one_only ? ".fr" : ".const";
1041 break;
1042 case SECCAT_SRODATA:
1043 case SECCAT_SDATA:
1044 case SECCAT_SBSS:
1045 gcc_unreachable ();
1046 default:
1047 break;
1048 }
1049 }
1050
1051 if (prefix)
1052 {
1053 const char *name, *linkonce;
1054 char *string;
1055
1056 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
1057 name = targetm.strip_name_encoding (name);
1058
1059 /* If we're using one_only, then there needs to be a .gnu.linkonce
1060 prefix to the section name. */
1061 linkonce = one_only ? ".gnu.linkonce" : "";
1062
1063 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
1064
1065 set_decl_section_name (decl, string);
1066 return;
1067 }
1068 default_unique_section (decl, reloc);
1069 }
1070
1071 static unsigned int
c6x_section_type_flags(tree decl,const char * name,int reloc)1072 c6x_section_type_flags (tree decl, const char *name, int reloc)
1073 {
1074 unsigned int flags = 0;
1075
1076 if (strcmp (name, ".far") == 0
1077 || strncmp (name, ".far.", 5) == 0)
1078 flags |= SECTION_BSS;
1079
1080 flags |= default_section_type_flags (decl, name, reloc);
1081
1082 return flags;
1083 }
1084
1085 /* Checks whether the given CALL_EXPR would use a caller saved
1086 register. This is used to decide whether sibling call optimization
1087 could be performed on the respective function call. */
1088
1089 static bool
c6x_call_saved_register_used(tree call_expr)1090 c6x_call_saved_register_used (tree call_expr)
1091 {
1092 CUMULATIVE_ARGS cum_v;
1093 cumulative_args_t cum;
1094 HARD_REG_SET call_saved_regset;
1095 tree parameter;
1096 machine_mode mode;
1097 tree type;
1098 rtx parm_rtx;
1099 int i;
1100
1101 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
1102 cum = pack_cumulative_args (&cum_v);
1103
1104 COMPL_HARD_REG_SET (call_saved_regset, call_used_reg_set);
1105 for (i = 0; i < call_expr_nargs (call_expr); i++)
1106 {
1107 parameter = CALL_EXPR_ARG (call_expr, i);
1108 gcc_assert (parameter);
1109
1110 /* For an undeclared variable passed as parameter we will get
1111 an ERROR_MARK node here. */
1112 if (TREE_CODE (parameter) == ERROR_MARK)
1113 return true;
1114
1115 type = TREE_TYPE (parameter);
1116 gcc_assert (type);
1117
1118 mode = TYPE_MODE (type);
1119 gcc_assert (mode);
1120
1121 if (pass_by_reference (&cum_v, mode, type, true))
1122 {
1123 mode = Pmode;
1124 type = build_pointer_type (type);
1125 }
1126
1127 parm_rtx = c6x_function_arg (cum, mode, type, 0);
1128
1129 c6x_function_arg_advance (cum, mode, type, 0);
1130
1131 if (!parm_rtx)
1132 continue;
1133
1134 if (REG_P (parm_rtx)
1135 && overlaps_hard_reg_set_p (call_saved_regset, GET_MODE (parm_rtx),
1136 REGNO (parm_rtx)))
1137 return true;
1138 if (GET_CODE (parm_rtx) == PARALLEL)
1139 {
1140 int n = XVECLEN (parm_rtx, 0);
1141 while (n-- > 0)
1142 {
1143 rtx x = XEXP (XVECEXP (parm_rtx, 0, n), 0);
1144 if (REG_P (x)
1145 && overlaps_hard_reg_set_p (call_saved_regset,
1146 GET_MODE (x), REGNO (x)))
1147 return true;
1148 }
1149 }
1150 }
1151 return false;
1152 }
1153
1154 /* Decide whether we can make a sibling call to a function. DECL is the
1155 declaration of the function being targeted by the call and EXP is the
1156 CALL_EXPR representing the call. */
1157
1158 static bool
c6x_function_ok_for_sibcall(tree decl,tree exp)1159 c6x_function_ok_for_sibcall (tree decl, tree exp)
1160 {
1161 /* Registers A10, A12, B10 and B12 are available as arguments
1162 register but unfortunately caller saved. This makes functions
1163 needing these registers for arguments not suitable for
1164 sibcalls. */
1165 if (c6x_call_saved_register_used (exp))
1166 return false;
1167
1168 if (!flag_pic)
1169 return true;
1170
1171 if (TARGET_DSBT)
1172 {
1173 /* When compiling for DSBT, the calling function must be local,
1174 so that when we reload B14 in the sibcall epilogue, it will
1175 not change its value. */
1176 struct cgraph_local_info *this_func;
1177
1178 if (!decl)
1179 /* Not enough information. */
1180 return false;
1181
1182 this_func = cgraph_node::local_info (current_function_decl);
1183 return this_func->local;
1184 }
1185
1186 return true;
1187 }
1188
1189 /* Return true if DECL is known to be linked into section SECTION. */
1190
1191 static bool
c6x_function_in_section_p(tree decl,section * section)1192 c6x_function_in_section_p (tree decl, section *section)
1193 {
1194 /* We can only be certain about functions defined in the same
1195 compilation unit. */
1196 if (!TREE_STATIC (decl))
1197 return false;
1198
1199 /* Make sure that SYMBOL always binds to the definition in this
1200 compilation unit. */
1201 if (!targetm.binds_local_p (decl))
1202 return false;
1203
1204 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
1205 if (!DECL_SECTION_NAME (decl))
1206 {
1207 /* Make sure that we will not create a unique section for DECL. */
1208 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
1209 return false;
1210 }
1211
1212 return function_section (decl) == section;
1213 }
1214
1215 /* Return true if a call to OP, which is a SYMBOL_REF, must be expanded
1216 as a long call. */
1217 bool
c6x_long_call_p(rtx op)1218 c6x_long_call_p (rtx op)
1219 {
1220 tree decl;
1221
1222 if (!TARGET_LONG_CALLS)
1223 return false;
1224
1225 decl = SYMBOL_REF_DECL (op);
1226
1227 /* Try to determine whether the symbol is in the same section as the current
1228 function. Be conservative, and only cater for cases in which the
1229 whole of the current function is placed in the same section. */
1230 if (decl != NULL_TREE
1231 && !flag_reorder_blocks_and_partition
1232 && TREE_CODE (decl) == FUNCTION_DECL
1233 && c6x_function_in_section_p (decl, current_function_section ()))
1234 return false;
1235
1236 return true;
1237 }
1238
1239 /* Emit the sequence for a call. */
1240 void
c6x_expand_call(rtx retval,rtx address,bool sibcall)1241 c6x_expand_call (rtx retval, rtx address, bool sibcall)
1242 {
1243 rtx callee = XEXP (address, 0);
1244 rtx call_insn;
1245
1246 if (!c6x_call_operand (callee, Pmode))
1247 {
1248 callee = force_reg (Pmode, callee);
1249 address = change_address (address, Pmode, callee);
1250 }
1251 call_insn = gen_rtx_CALL (VOIDmode, address, const0_rtx);
1252 if (sibcall)
1253 {
1254 call_insn = emit_call_insn (call_insn);
1255 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
1256 gen_rtx_REG (Pmode, REG_B3));
1257 }
1258 else
1259 {
1260 if (retval == NULL_RTX)
1261 call_insn = emit_call_insn (call_insn);
1262 else
1263 call_insn = emit_call_insn (gen_rtx_SET (retval, call_insn));
1264 }
1265 if (flag_pic)
1266 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
1267 }
1268
1269 /* Legitimize PIC addresses. If the address is already position-independent,
1270 we return ORIG. Newly generated position-independent addresses go into a
1271 reg. This is REG if nonzero, otherwise we allocate register(s) as
1272 necessary. PICREG is the register holding the pointer to the PIC offset
1273 table. */
1274
1275 static rtx
legitimize_pic_address(rtx orig,rtx reg,rtx picreg)1276 legitimize_pic_address (rtx orig, rtx reg, rtx picreg)
1277 {
1278 rtx addr = orig;
1279 rtx new_rtx = orig;
1280
1281 if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
1282 {
1283 int unspec = UNSPEC_LOAD_GOT;
1284 rtx tmp;
1285
1286 if (reg == 0)
1287 {
1288 gcc_assert (can_create_pseudo_p ());
1289 reg = gen_reg_rtx (Pmode);
1290 }
1291 if (flag_pic == 2)
1292 {
1293 if (can_create_pseudo_p ())
1294 tmp = gen_reg_rtx (Pmode);
1295 else
1296 tmp = reg;
1297 emit_insn (gen_movsi_gotoff_high (tmp, addr));
1298 emit_insn (gen_movsi_gotoff_lo_sum (tmp, tmp, addr));
1299 emit_insn (gen_load_got_gotoff (reg, picreg, tmp));
1300 }
1301 else
1302 {
1303 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec);
1304 new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
1305
1306 emit_move_insn (reg, new_rtx);
1307 }
1308 if (picreg == pic_offset_table_rtx)
1309 crtl->uses_pic_offset_table = 1;
1310 return reg;
1311 }
1312
1313 else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS)
1314 {
1315 rtx base;
1316
1317 if (GET_CODE (addr) == CONST)
1318 {
1319 addr = XEXP (addr, 0);
1320 gcc_assert (GET_CODE (addr) == PLUS);
1321 }
1322
1323 if (XEXP (addr, 0) == picreg)
1324 return orig;
1325
1326 if (reg == 0)
1327 {
1328 gcc_assert (can_create_pseudo_p ());
1329 reg = gen_reg_rtx (Pmode);
1330 }
1331
1332 base = legitimize_pic_address (XEXP (addr, 0), reg, picreg);
1333 addr = legitimize_pic_address (XEXP (addr, 1),
1334 base == reg ? NULL_RTX : reg,
1335 picreg);
1336
1337 if (GET_CODE (addr) == CONST_INT)
1338 {
1339 gcc_assert (! reload_in_progress && ! reload_completed);
1340 addr = force_reg (Pmode, addr);
1341 }
1342
1343 if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1)))
1344 {
1345 base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0));
1346 addr = XEXP (addr, 1);
1347 }
1348
1349 return gen_rtx_PLUS (Pmode, base, addr);
1350 }
1351
1352 return new_rtx;
1353 }
1354
1355 /* Expand a move operation in mode MODE. The operands are in OPERANDS.
1356 Returns true if no further code must be generated, false if the caller
1357 should generate an insn to move OPERANDS[1] to OPERANDS[0]. */
1358
1359 bool
expand_move(rtx * operands,machine_mode mode)1360 expand_move (rtx *operands, machine_mode mode)
1361 {
1362 rtx dest = operands[0];
1363 rtx op = operands[1];
1364
1365 if ((reload_in_progress | reload_completed) == 0
1366 && GET_CODE (dest) == MEM && GET_CODE (op) != REG)
1367 operands[1] = force_reg (mode, op);
1368 else if (mode == SImode && symbolic_operand (op, SImode))
1369 {
1370 if (flag_pic)
1371 {
1372 if (sdata_symbolic_operand (op, SImode))
1373 {
1374 emit_insn (gen_load_sdata_pic (dest, pic_offset_table_rtx, op));
1375 crtl->uses_pic_offset_table = 1;
1376 return true;
1377 }
1378 else
1379 {
1380 rtx temp = (reload_completed || reload_in_progress
1381 ? dest : gen_reg_rtx (Pmode));
1382
1383 operands[1] = legitimize_pic_address (op, temp,
1384 pic_offset_table_rtx);
1385 }
1386 }
1387 else if (reload_completed
1388 && !sdata_symbolic_operand (op, SImode))
1389 {
1390 emit_insn (gen_movsi_high (dest, op));
1391 emit_insn (gen_movsi_lo_sum (dest, dest, op));
1392 return true;
1393 }
1394 }
1395 return false;
1396 }
1397
1398 /* This function is called when we're about to expand an integer compare
1399 operation which performs COMPARISON. It examines the second operand,
1400 and if it is an integer constant that cannot be used directly on the
1401 current machine in a comparison insn, it returns true. */
1402 bool
c6x_force_op_for_comparison_p(enum rtx_code code,rtx op)1403 c6x_force_op_for_comparison_p (enum rtx_code code, rtx op)
1404 {
1405 if (!CONST_INT_P (op) || satisfies_constraint_Iu4 (op))
1406 return false;
1407
1408 if ((code == EQ || code == LT || code == GT)
1409 && !satisfies_constraint_Is5 (op))
1410 return true;
1411 if ((code == GTU || code == LTU)
1412 && (!TARGET_INSNS_64 || !satisfies_constraint_Iu5 (op)))
1413 return true;
1414
1415 return false;
1416 }
1417
1418 /* Emit comparison instruction if necessary, returning the expression
1419 that holds the compare result in the proper mode. Return the comparison
1420 that should be used in the jump insn. */
1421
1422 rtx
c6x_expand_compare(rtx comparison,machine_mode mode)1423 c6x_expand_compare (rtx comparison, machine_mode mode)
1424 {
1425 enum rtx_code code = GET_CODE (comparison);
1426 rtx op0 = XEXP (comparison, 0);
1427 rtx op1 = XEXP (comparison, 1);
1428 rtx cmp;
1429 enum rtx_code jump_code = code;
1430 machine_mode op_mode = GET_MODE (op0);
1431
1432 if (op_mode == DImode && (code == NE || code == EQ) && op1 == const0_rtx)
1433 {
1434 rtx t = gen_reg_rtx (SImode);
1435 emit_insn (gen_iorsi3 (t, gen_lowpart (SImode, op0),
1436 gen_highpart (SImode, op0)));
1437 op_mode = SImode;
1438 cmp = t;
1439 }
1440 else if (op_mode == DImode)
1441 {
1442 rtx lo[2], high[2];
1443 rtx cmp1, cmp2;
1444
1445 if (code == NE || code == GEU || code == LEU || code == GE || code == LE)
1446 {
1447 code = reverse_condition (code);
1448 jump_code = EQ;
1449 }
1450 else
1451 jump_code = NE;
1452
1453 split_di (&op0, 1, lo, high);
1454 split_di (&op1, 1, lo + 1, high + 1);
1455
1456 if (c6x_force_op_for_comparison_p (code, high[1])
1457 || c6x_force_op_for_comparison_p (EQ, high[1]))
1458 high[1] = force_reg (SImode, high[1]);
1459
1460 cmp1 = gen_reg_rtx (SImode);
1461 cmp2 = gen_reg_rtx (SImode);
1462 emit_insn (gen_rtx_SET (cmp1, gen_rtx_fmt_ee (code, SImode,
1463 high[0], high[1])));
1464 if (code == EQ)
1465 {
1466 if (c6x_force_op_for_comparison_p (code, lo[1]))
1467 lo[1] = force_reg (SImode, lo[1]);
1468 emit_insn (gen_rtx_SET (cmp2, gen_rtx_fmt_ee (code, SImode,
1469 lo[0], lo[1])));
1470 emit_insn (gen_andsi3 (cmp1, cmp1, cmp2));
1471 }
1472 else
1473 {
1474 emit_insn (gen_rtx_SET (cmp2, gen_rtx_EQ (SImode, high[0],
1475 high[1])));
1476 if (code == GT)
1477 code = GTU;
1478 else if (code == LT)
1479 code = LTU;
1480 if (c6x_force_op_for_comparison_p (code, lo[1]))
1481 lo[1] = force_reg (SImode, lo[1]);
1482 emit_insn (gen_cmpsi_and (cmp2, gen_rtx_fmt_ee (code, SImode,
1483 lo[0], lo[1]),
1484 lo[0], lo[1], cmp2));
1485 emit_insn (gen_iorsi3 (cmp1, cmp1, cmp2));
1486 }
1487 cmp = cmp1;
1488 }
1489 else if (TARGET_FP && !flag_finite_math_only
1490 && (op_mode == DFmode || op_mode == SFmode)
1491 && code != EQ && code != NE && code != LT && code != GT
1492 && code != UNLE && code != UNGE)
1493 {
1494 enum rtx_code code1, code2, code3;
1495 rtx (*fn) (rtx, rtx, rtx, rtx, rtx);
1496
1497 jump_code = NE;
1498 code3 = UNKNOWN;
1499 switch (code)
1500 {
1501 case UNLT:
1502 case UNGT:
1503 jump_code = EQ;
1504 /* fall through */
1505 case LE:
1506 case GE:
1507 code1 = code == LE || code == UNGT ? LT : GT;
1508 code2 = EQ;
1509 break;
1510
1511 case UNORDERED:
1512 jump_code = EQ;
1513 /* fall through */
1514 case ORDERED:
1515 code3 = EQ;
1516 /* fall through */
1517 case LTGT:
1518 code1 = LT;
1519 code2 = GT;
1520 break;
1521
1522 case UNEQ:
1523 code1 = LT;
1524 code2 = GT;
1525 jump_code = EQ;
1526 break;
1527
1528 default:
1529 gcc_unreachable ();
1530 }
1531
1532 cmp = gen_reg_rtx (SImode);
1533 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code1, SImode, op0, op1)));
1534 fn = op_mode == DFmode ? gen_cmpdf_ior : gen_cmpsf_ior;
1535 emit_insn (fn (cmp, gen_rtx_fmt_ee (code2, SImode, op0, op1),
1536 op0, op1, cmp));
1537 if (code3 != UNKNOWN)
1538 emit_insn (fn (cmp, gen_rtx_fmt_ee (code3, SImode, op0, op1),
1539 op0, op1, cmp));
1540 }
1541 else if (op_mode == SImode && (code == NE || code == EQ) && op1 == const0_rtx)
1542 cmp = op0;
1543 else
1544 {
1545 bool is_fp_libfunc;
1546 is_fp_libfunc = !TARGET_FP && (op_mode == DFmode || op_mode == SFmode);
1547
1548 if ((code == NE || code == GEU || code == LEU || code == GE || code == LE)
1549 && !is_fp_libfunc)
1550 {
1551 code = reverse_condition (code);
1552 jump_code = EQ;
1553 }
1554 else if (code == UNGE)
1555 {
1556 code = LT;
1557 jump_code = EQ;
1558 }
1559 else if (code == UNLE)
1560 {
1561 code = GT;
1562 jump_code = EQ;
1563 }
1564 else
1565 jump_code = NE;
1566
1567 if (is_fp_libfunc)
1568 {
1569 rtx_insn *insns;
1570 rtx libfunc;
1571 switch (code)
1572 {
1573 case EQ:
1574 libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc;
1575 break;
1576 case NE:
1577 libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc;
1578 break;
1579 case GT:
1580 libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc;
1581 break;
1582 case GE:
1583 libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc;
1584 break;
1585 case LT:
1586 libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc;
1587 break;
1588 case LE:
1589 libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc;
1590 break;
1591 default:
1592 gcc_unreachable ();
1593 }
1594 start_sequence ();
1595
1596 cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode,
1597 op0, op_mode, op1, op_mode);
1598 insns = get_insns ();
1599 end_sequence ();
1600
1601 emit_libcall_block (insns, cmp, cmp,
1602 gen_rtx_fmt_ee (code, SImode, op0, op1));
1603 }
1604 else
1605 {
1606 cmp = gen_reg_rtx (SImode);
1607 if (c6x_force_op_for_comparison_p (code, op1))
1608 op1 = force_reg (SImode, op1);
1609 emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, SImode,
1610 op0, op1)));
1611 }
1612 }
1613
1614 return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx);
1615 }
1616
1617 /* Return one word of double-word value OP. HIGH_P is true to select the
1618 high part, false to select the low part. When encountering auto-increment
1619 addressing, we make the assumption that the low part is going to be accessed
1620 first. */
1621
1622 rtx
c6x_subword(rtx op,bool high_p)1623 c6x_subword (rtx op, bool high_p)
1624 {
1625 unsigned int byte;
1626 machine_mode mode;
1627
1628 mode = GET_MODE (op);
1629 if (mode == VOIDmode)
1630 mode = DImode;
1631
1632 if (TARGET_BIG_ENDIAN ? !high_p : high_p)
1633 byte = UNITS_PER_WORD;
1634 else
1635 byte = 0;
1636
1637 if (MEM_P (op))
1638 {
1639 rtx addr = XEXP (op, 0);
1640 if (GET_CODE (addr) == PLUS || REG_P (addr))
1641 return adjust_address (op, word_mode, byte);
1642 /* FIXME: should really support autoincrement addressing for
1643 multi-word modes. */
1644 gcc_unreachable ();
1645 }
1646
1647 return simplify_gen_subreg (word_mode, op, mode, byte);
1648 }
1649
1650 /* Split one or more DImode RTL references into pairs of SImode
1651 references. The RTL can be REG, offsettable MEM, integer constant, or
1652 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
1653 split and "num" is its length. lo_half and hi_half are output arrays
1654 that parallel "operands". */
1655
1656 void
split_di(rtx operands[],int num,rtx lo_half[],rtx hi_half[])1657 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
1658 {
1659 while (num--)
1660 {
1661 rtx op = operands[num];
1662
1663 lo_half[num] = c6x_subword (op, false);
1664 hi_half[num] = c6x_subword (op, true);
1665 }
1666 }
1667
1668 /* Return true if VAL is a mask valid for a clr instruction. */
1669 bool
c6x_valid_mask_p(HOST_WIDE_INT val)1670 c6x_valid_mask_p (HOST_WIDE_INT val)
1671 {
1672 int i;
1673 for (i = 0; i < 32; i++)
1674 if (!(val & ((unsigned HOST_WIDE_INT)1 << i)))
1675 break;
1676 for (; i < 32; i++)
1677 if (val & ((unsigned HOST_WIDE_INT)1 << i))
1678 break;
1679 for (; i < 32; i++)
1680 if (!(val & ((unsigned HOST_WIDE_INT)1 << i)))
1681 return false;
1682 return true;
1683 }
1684
1685 /* Expand a block move for a movmemM pattern. */
1686
1687 bool
c6x_expand_movmem(rtx dst,rtx src,rtx count_exp,rtx align_exp,rtx expected_align_exp ATTRIBUTE_UNUSED,rtx expected_size_exp ATTRIBUTE_UNUSED)1688 c6x_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
1689 rtx expected_align_exp ATTRIBUTE_UNUSED,
1690 rtx expected_size_exp ATTRIBUTE_UNUSED)
1691 {
1692 unsigned HOST_WIDE_INT align = 1;
1693 unsigned HOST_WIDE_INT src_mem_align, dst_mem_align, min_mem_align;
1694 unsigned HOST_WIDE_INT count = 0, offset = 0;
1695 unsigned int biggest_move = TARGET_STDW ? 8 : 4;
1696
1697 if (CONST_INT_P (align_exp))
1698 align = INTVAL (align_exp);
1699
1700 src_mem_align = MEM_ALIGN (src) / BITS_PER_UNIT;
1701 dst_mem_align = MEM_ALIGN (dst) / BITS_PER_UNIT;
1702 min_mem_align = MIN (src_mem_align, dst_mem_align);
1703
1704 if (min_mem_align > align)
1705 align = min_mem_align / BITS_PER_UNIT;
1706 if (src_mem_align < align)
1707 src_mem_align = align;
1708 if (dst_mem_align < align)
1709 dst_mem_align = align;
1710
1711 if (CONST_INT_P (count_exp))
1712 count = INTVAL (count_exp);
1713 else
1714 return false;
1715
1716 /* Make sure we don't need to care about overflow later on. */
1717 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
1718 return false;
1719
1720 if (count >= 28 && (count & 3) == 0 && align >= 4)
1721 {
1722 tree dst_expr = MEM_EXPR (dst);
1723 tree src_expr = MEM_EXPR (src);
1724 rtx fn = TARGET_INSNS_64PLUS ? strasgi64p_libfunc : strasgi_libfunc;
1725 rtx srcreg = force_reg (Pmode, XEXP (src, 0));
1726 rtx dstreg = force_reg (Pmode, XEXP (dst, 0));
1727
1728 if (src_expr)
1729 mark_addressable (src_expr);
1730 if (dst_expr)
1731 mark_addressable (dst_expr);
1732 emit_library_call (fn, LCT_NORMAL, VOIDmode,
1733 dstreg, Pmode, srcreg, Pmode, count_exp, SImode);
1734 return true;
1735 }
1736
1737 if (biggest_move > align && !TARGET_INSNS_64)
1738 biggest_move = align;
1739
1740 if (count / biggest_move > 7)
1741 return false;
1742
1743 while (count > 0)
1744 {
1745 rtx reg, reg_lowpart;
1746 machine_mode srcmode, dstmode;
1747 unsigned HOST_WIDE_INT src_size, dst_size, src_left;
1748 int shift;
1749 rtx srcmem, dstmem;
1750
1751 while (biggest_move > count)
1752 biggest_move /= 2;
1753
1754 src_size = dst_size = biggest_move;
1755 if (src_size > src_mem_align && src_size == 2)
1756 src_size = 1;
1757 if (dst_size > dst_mem_align && dst_size == 2)
1758 dst_size = 1;
1759
1760 if (dst_size > src_size)
1761 dst_size = src_size;
1762
1763 srcmode = int_mode_for_size (src_size * BITS_PER_UNIT, 0).require ();
1764 dstmode = int_mode_for_size (dst_size * BITS_PER_UNIT, 0).require ();
1765 if (src_size >= 4)
1766 reg_lowpart = reg = gen_reg_rtx (srcmode);
1767 else
1768 {
1769 reg = gen_reg_rtx (SImode);
1770 reg_lowpart = gen_lowpart (srcmode, reg);
1771 }
1772
1773 srcmem = adjust_address (copy_rtx (src), srcmode, offset);
1774
1775 if (src_size > src_mem_align)
1776 {
1777 enum insn_code icode = (srcmode == SImode ? CODE_FOR_movmisalignsi
1778 : CODE_FOR_movmisaligndi);
1779 emit_insn (GEN_FCN (icode) (reg_lowpart, srcmem));
1780 }
1781 else
1782 emit_move_insn (reg_lowpart, srcmem);
1783
1784 src_left = src_size;
1785 shift = TARGET_BIG_ENDIAN ? (src_size - dst_size) * BITS_PER_UNIT : 0;
1786 while (src_left > 0)
1787 {
1788 rtx dstreg = reg_lowpart;
1789
1790 if (src_size > dst_size)
1791 {
1792 rtx srcword = reg;
1793 int shift_amount = shift & (BITS_PER_WORD - 1);
1794 if (src_size > 4)
1795 srcword = operand_subword_force (srcword, src_left >= 4 ? 0 : 4,
1796 SImode);
1797 if (shift_amount > 0)
1798 {
1799 dstreg = gen_reg_rtx (SImode);
1800 emit_insn (gen_lshrsi3 (dstreg, srcword,
1801 GEN_INT (shift_amount)));
1802 }
1803 else
1804 dstreg = srcword;
1805 dstreg = gen_lowpart (dstmode, dstreg);
1806 }
1807
1808 dstmem = adjust_address (copy_rtx (dst), dstmode, offset);
1809 if (dst_size > dst_mem_align)
1810 {
1811 enum insn_code icode = (dstmode == SImode ? CODE_FOR_movmisalignsi
1812 : CODE_FOR_movmisaligndi);
1813 emit_insn (GEN_FCN (icode) (dstmem, dstreg));
1814 }
1815 else
1816 emit_move_insn (dstmem, dstreg);
1817
1818 if (TARGET_BIG_ENDIAN)
1819 shift -= dst_size * BITS_PER_UNIT;
1820 else
1821 shift += dst_size * BITS_PER_UNIT;
1822 offset += dst_size;
1823 src_left -= dst_size;
1824 }
1825 count -= src_size;
1826 }
1827 return true;
1828 }
1829
1830 /* Subroutine of print_address_operand, print a single address offset OFF for
1831 a memory access of mode MEM_MODE, choosing between normal form and scaled
1832 form depending on the type of the insn. Misaligned memory references must
1833 use the scaled form. */
1834
1835 static void
print_address_offset(FILE * file,rtx off,machine_mode mem_mode)1836 print_address_offset (FILE *file, rtx off, machine_mode mem_mode)
1837 {
1838 rtx pat;
1839
1840 if (c6x_current_insn != NULL_RTX)
1841 {
1842 pat = PATTERN (c6x_current_insn);
1843 if (GET_CODE (pat) == COND_EXEC)
1844 pat = COND_EXEC_CODE (pat);
1845 if (GET_CODE (pat) == PARALLEL)
1846 pat = XVECEXP (pat, 0, 0);
1847
1848 if (GET_CODE (pat) == SET
1849 && GET_CODE (SET_SRC (pat)) == UNSPEC
1850 && XINT (SET_SRC (pat), 1) == UNSPEC_MISALIGNED_ACCESS)
1851 {
1852 gcc_assert (CONST_INT_P (off)
1853 && (INTVAL (off) & (GET_MODE_SIZE (mem_mode) - 1)) == 0);
1854 fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]",
1855 INTVAL (off) / GET_MODE_SIZE (mem_mode));
1856 return;
1857 }
1858 }
1859 fputs ("(", file);
1860 output_address (mem_mode, off);
1861 fputs (")", file);
1862 }
1863
1864 static bool
c6x_print_operand_punct_valid_p(unsigned char c)1865 c6x_print_operand_punct_valid_p (unsigned char c)
1866 {
1867 return c == '$' || c == '.' || c == '|';
1868 }
1869
1870 static void c6x_print_operand (FILE *, rtx, int);
1871
1872 /* Subroutine of c6x_print_operand; used to print a memory reference X to FILE. */
1873
1874 static void
c6x_print_address_operand(FILE * file,rtx x,machine_mode mem_mode)1875 c6x_print_address_operand (FILE *file, rtx x, machine_mode mem_mode)
1876 {
1877 rtx off;
1878 switch (GET_CODE (x))
1879 {
1880 case PRE_MODIFY:
1881 case POST_MODIFY:
1882 if (GET_CODE (x) == POST_MODIFY)
1883 output_address (mem_mode, XEXP (x, 0));
1884 off = XEXP (XEXP (x, 1), 1);
1885 if (XEXP (x, 0) == stack_pointer_rtx)
1886 {
1887 if (GET_CODE (x) == PRE_MODIFY)
1888 gcc_assert (INTVAL (off) > 0);
1889 else
1890 gcc_assert (INTVAL (off) < 0);
1891 }
1892 if (CONST_INT_P (off) && INTVAL (off) < 0)
1893 {
1894 fprintf (file, "--");
1895 off = GEN_INT (-INTVAL (off));
1896 }
1897 else
1898 fprintf (file, "++");
1899 if (GET_CODE (x) == PRE_MODIFY)
1900 output_address (mem_mode, XEXP (x, 0));
1901 print_address_offset (file, off, mem_mode);
1902 break;
1903
1904 case PLUS:
1905 off = XEXP (x, 1);
1906 if (CONST_INT_P (off) && INTVAL (off) < 0)
1907 {
1908 fprintf (file, "-");
1909 off = GEN_INT (-INTVAL (off));
1910 }
1911 else
1912 fprintf (file, "+");
1913 output_address (mem_mode, XEXP (x, 0));
1914 print_address_offset (file, off, mem_mode);
1915 break;
1916
1917 case PRE_DEC:
1918 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
1919 fprintf (file, "--");
1920 output_address (mem_mode, XEXP (x, 0));
1921 fprintf (file, "[1]");
1922 break;
1923 case PRE_INC:
1924 fprintf (file, "++");
1925 output_address (mem_mode, XEXP (x, 0));
1926 fprintf (file, "[1]");
1927 break;
1928 case POST_INC:
1929 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
1930 output_address (mem_mode, XEXP (x, 0));
1931 fprintf (file, "++[1]");
1932 break;
1933 case POST_DEC:
1934 output_address (mem_mode, XEXP (x, 0));
1935 fprintf (file, "--[1]");
1936 break;
1937
1938 case SYMBOL_REF:
1939 case CONST:
1940 case LABEL_REF:
1941 gcc_assert (sdata_symbolic_operand (x, Pmode));
1942 fprintf (file, "+B14(");
1943 output_addr_const (file, x);
1944 fprintf (file, ")");
1945 break;
1946
1947 case UNSPEC:
1948 switch (XINT (x, 1))
1949 {
1950 case UNSPEC_LOAD_GOT:
1951 fputs ("$GOT(", file);
1952 output_addr_const (file, XVECEXP (x, 0, 0));
1953 fputs (")", file);
1954 break;
1955 case UNSPEC_LOAD_SDATA:
1956 output_addr_const (file, XVECEXP (x, 0, 0));
1957 break;
1958 default:
1959 gcc_unreachable ();
1960 }
1961 break;
1962
1963 default:
1964 gcc_assert (GET_CODE (x) != MEM);
1965 c6x_print_operand (file, x, 0);
1966 break;
1967 }
1968 }
1969
1970 /* Return a single character, which is either 'l', 's', 'd' or 'm', which
1971 specifies the functional unit used by INSN. */
1972
1973 char
c6x_get_unit_specifier(rtx_insn * insn)1974 c6x_get_unit_specifier (rtx_insn *insn)
1975 {
1976 enum attr_units units;
1977
1978 if (insn_info.exists ())
1979 {
1980 int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation;
1981 return c6x_unit_names[unit][0];
1982 }
1983
1984 units = get_attr_units (insn);
1985 switch (units)
1986 {
1987 case UNITS_D:
1988 case UNITS_DL:
1989 case UNITS_DS:
1990 case UNITS_DLS:
1991 case UNITS_D_ADDR:
1992 return 'd';
1993 case UNITS_L:
1994 case UNITS_LS:
1995 return 'l';
1996 case UNITS_S:
1997 return 's';
1998 case UNITS_M:
1999 return 'm';
2000 default:
2001 gcc_unreachable ();
2002 }
2003 }
2004
2005 /* Prints the unit specifier field. */
2006 static void
c6x_print_unit_specifier_field(FILE * file,rtx_insn * insn)2007 c6x_print_unit_specifier_field (FILE *file, rtx_insn *insn)
2008 {
2009 enum attr_units units = get_attr_units (insn);
2010 enum attr_cross cross = get_attr_cross (insn);
2011 enum attr_dest_regfile rf = get_attr_dest_regfile (insn);
2012 int half;
2013 char unitspec;
2014
2015 if (units == UNITS_D_ADDR)
2016 {
2017 enum attr_addr_regfile arf = get_attr_addr_regfile (insn);
2018 int t_half;
2019 gcc_assert (arf != ADDR_REGFILE_UNKNOWN);
2020 half = arf == ADDR_REGFILE_A ? 1 : 2;
2021 t_half = rf == DEST_REGFILE_A ? 1 : 2;
2022 fprintf (file, ".d%dt%d", half, t_half);
2023 return;
2024 }
2025
2026 if (insn_info.exists ())
2027 {
2028 int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation;
2029 fputs (".", file);
2030 fputs (c6x_unit_names[unit], file);
2031 if (cross == CROSS_Y)
2032 fputs ("x", file);
2033 return;
2034 }
2035
2036 gcc_assert (rf != DEST_REGFILE_UNKNOWN);
2037 unitspec = c6x_get_unit_specifier (insn);
2038 half = rf == DEST_REGFILE_A ? 1 : 2;
2039 fprintf (file, ".%c%d%s", unitspec, half, cross == CROSS_Y ? "x" : "");
2040 }
2041
2042 /* Output assembly language output for the address ADDR to FILE. */
2043 static void
c6x_print_operand_address(FILE * file,machine_mode mode,rtx addr)2044 c6x_print_operand_address (FILE *file, machine_mode mode, rtx addr)
2045 {
2046 c6x_print_address_operand (file, addr, mode);
2047 }
2048
2049 /* Print an operand, X, to FILE, with an optional modifier in CODE.
2050
2051 Meaning of CODE:
2052 $ -- print the unit specifier field for the instruction.
2053 . -- print the predicate for the instruction or an emptry string for an
2054 unconditional one.
2055 | -- print "||" if the insn should be issued in parallel with the previous
2056 one.
2057
2058 C -- print an opcode suffix for a reversed condition
2059 d -- H, W or D as a suffix for ADDA, based on the factor given by the
2060 operand
2061 D -- print either B, H, W or D as a suffix for ADDA, based on the size of
2062 the operand
2063 J -- print a predicate
2064 j -- like J, but use reverse predicate
2065 k -- treat a CONST_INT as a register number and print it as a register
2066 k -- like k, but print out a doubleword register
2067 n -- print an integer operand, negated
2068 p -- print the low part of a DImode register
2069 P -- print the high part of a DImode register
2070 r -- print the absolute value of an integer operand, shifted right by 1
2071 R -- print the absolute value of an integer operand, shifted right by 2
2072 f -- the first clear bit in an integer operand assumed to be a mask for
2073 a clr instruction
2074 F -- the last clear bit in such a mask
2075 s -- the first set bit in an integer operand assumed to be a mask for
2076 a set instruction
2077 S -- the last set bit in such a mask
2078 U -- print either 1 or 2, depending on the side of the machine used by
2079 the operand */
2080
2081 static void
c6x_print_operand(FILE * file,rtx x,int code)2082 c6x_print_operand (FILE *file, rtx x, int code)
2083 {
2084 int i;
2085 HOST_WIDE_INT v;
2086 tree t;
2087 machine_mode mode;
2088
2089 if (code == '|')
2090 {
2091 if (GET_MODE (c6x_current_insn) != TImode)
2092 fputs ("||", file);
2093 return;
2094 }
2095 if (code == '$')
2096 {
2097 c6x_print_unit_specifier_field (file, c6x_current_insn);
2098 return;
2099 }
2100
2101 if (code == '.')
2102 {
2103 x = current_insn_predicate;
2104 if (x)
2105 {
2106 unsigned int regno = REGNO (XEXP (x, 0));
2107 fputs ("[", file);
2108 if (GET_CODE (x) == EQ)
2109 fputs ("!", file);
2110 fputs (reg_names [regno], file);
2111 fputs ("]", file);
2112 }
2113 return;
2114 }
2115
2116 mode = GET_MODE (x);
2117
2118 switch (code)
2119 {
2120 case 'C':
2121 case 'c':
2122 {
2123 enum rtx_code c = GET_CODE (x);
2124 if (code == 'C')
2125 c = swap_condition (c);
2126 fputs (GET_RTX_NAME (c), file);
2127 }
2128 return;
2129
2130 case 'J':
2131 case 'j':
2132 {
2133 unsigned int regno = REGNO (XEXP (x, 0));
2134 if ((GET_CODE (x) == EQ) == (code == 'J'))
2135 fputs ("!", file);
2136 fputs (reg_names [regno], file);
2137 }
2138 return;
2139
2140 case 'k':
2141 gcc_assert (GET_CODE (x) == CONST_INT);
2142 v = INTVAL (x);
2143 fprintf (file, "%s", reg_names[v]);
2144 return;
2145 case 'K':
2146 gcc_assert (GET_CODE (x) == CONST_INT);
2147 v = INTVAL (x);
2148 gcc_assert ((v & 1) == 0);
2149 fprintf (file, "%s:%s", reg_names[v + 1], reg_names[v]);
2150 return;
2151
2152 case 's':
2153 case 'S':
2154 case 'f':
2155 case 'F':
2156 gcc_assert (GET_CODE (x) == CONST_INT);
2157 v = INTVAL (x);
2158 for (i = 0; i < 32; i++)
2159 {
2160 HOST_WIDE_INT tst = v & 1;
2161 if (((code == 'f' || code == 'F') && !tst)
2162 || ((code == 's' || code == 'S') && tst))
2163 break;
2164 v >>= 1;
2165 }
2166 if (code == 'f' || code == 's')
2167 {
2168 fprintf (file, "%d", i);
2169 return;
2170 }
2171 for (;i < 32; i++)
2172 {
2173 HOST_WIDE_INT tst = v & 1;
2174 if ((code == 'F' && tst) || (code == 'S' && !tst))
2175 break;
2176 v >>= 1;
2177 }
2178 fprintf (file, "%d", i - 1);
2179 return;
2180
2181 case 'n':
2182 gcc_assert (GET_CODE (x) == CONST_INT);
2183 output_addr_const (file, GEN_INT (-INTVAL (x)));
2184 return;
2185
2186 case 'r':
2187 gcc_assert (GET_CODE (x) == CONST_INT);
2188 v = INTVAL (x);
2189 if (v < 0)
2190 v = -v;
2191 output_addr_const (file, GEN_INT (v >> 1));
2192 return;
2193
2194 case 'R':
2195 gcc_assert (GET_CODE (x) == CONST_INT);
2196 v = INTVAL (x);
2197 if (v < 0)
2198 v = -v;
2199 output_addr_const (file, GEN_INT (v >> 2));
2200 return;
2201
2202 case 'd':
2203 gcc_assert (GET_CODE (x) == CONST_INT);
2204 v = INTVAL (x);
2205 fputs (v == 2 ? "h" : v == 4 ? "w" : "d", file);
2206 return;
2207
2208 case 'p':
2209 case 'P':
2210 gcc_assert (GET_CODE (x) == REG);
2211 v = REGNO (x);
2212 if (code == 'P')
2213 v++;
2214 fputs (reg_names[v], file);
2215 return;
2216
2217 case 'D':
2218 v = 0;
2219 if (GET_CODE (x) == CONST)
2220 {
2221 x = XEXP (x, 0);
2222 gcc_assert (GET_CODE (x) == PLUS);
2223 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
2224 v = INTVAL (XEXP (x, 1));
2225 x = XEXP (x, 0);
2226
2227 }
2228 gcc_assert (GET_CODE (x) == SYMBOL_REF);
2229
2230 t = SYMBOL_REF_DECL (x);
2231 if (DECL_P (t))
2232 v |= DECL_ALIGN_UNIT (t);
2233 else
2234 v |= TYPE_ALIGN_UNIT (TREE_TYPE (t));
2235 if (v & 1)
2236 fputs ("b", file);
2237 else if (v & 2)
2238 fputs ("h", file);
2239 else
2240 fputs ("w", file);
2241 return;
2242
2243 case 'U':
2244 if (MEM_P (x))
2245 {
2246 x = XEXP (x, 0);
2247 if (GET_CODE (x) == PLUS
2248 || GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
2249 x = XEXP (x, 0);
2250 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
2251 {
2252 gcc_assert (sdata_symbolic_operand (x, Pmode));
2253 fputs ("2", file);
2254 return;
2255 }
2256 }
2257 gcc_assert (REG_P (x));
2258 if (A_REGNO_P (REGNO (x)))
2259 fputs ("1", file);
2260 if (B_REGNO_P (REGNO (x)))
2261 fputs ("2", file);
2262 return;
2263
2264 default:
2265 switch (GET_CODE (x))
2266 {
2267 case REG:
2268 if (GET_MODE_SIZE (mode) == 8)
2269 fprintf (file, "%s:%s", reg_names[REGNO (x) + 1],
2270 reg_names[REGNO (x)]);
2271 else
2272 fprintf (file, "%s", reg_names[REGNO (x)]);
2273 break;
2274
2275 case MEM:
2276 fputc ('*', file);
2277 gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
2278 c6x_print_address_operand (file, XEXP (x, 0), GET_MODE (x));
2279 break;
2280
2281 case SYMBOL_REF:
2282 fputc ('(', file);
2283 output_addr_const (file, x);
2284 fputc (')', file);
2285 break;
2286
2287 case CONST_INT:
2288 output_addr_const (file, x);
2289 break;
2290
2291 case CONST_DOUBLE:
2292 output_operand_lossage ("invalid const_double operand");
2293 break;
2294
2295 default:
2296 output_addr_const (file, x);
2297 }
2298 }
2299 }
2300
2301 /* Return TRUE if OP is a valid memory address with a base register of
2302 class C. If SMALL_OFFSET is true, we disallow memory references which would
2303 require a long offset with B14/B15. */
2304
2305 bool
c6x_mem_operand(rtx op,enum reg_class c,bool small_offset)2306 c6x_mem_operand (rtx op, enum reg_class c, bool small_offset)
2307 {
2308 machine_mode mode = GET_MODE (op);
2309 rtx base = XEXP (op, 0);
2310 switch (GET_CODE (base))
2311 {
2312 case REG:
2313 break;
2314 case PLUS:
2315 if (small_offset
2316 && (XEXP (base, 0) == stack_pointer_rtx
2317 || XEXP (base, 0) == pic_offset_table_rtx))
2318 {
2319 if (!c6x_legitimate_address_p_1 (mode, base, true, true))
2320 return false;
2321 }
2322
2323 /* fall through */
2324 case PRE_INC:
2325 case PRE_DEC:
2326 case PRE_MODIFY:
2327 case POST_INC:
2328 case POST_DEC:
2329 case POST_MODIFY:
2330 base = XEXP (base, 0);
2331 break;
2332
2333 case CONST:
2334 case LABEL_REF:
2335 case SYMBOL_REF:
2336 gcc_assert (sdata_symbolic_operand (base, Pmode));
2337 return !small_offset && c == B_REGS;
2338
2339 default:
2340 return false;
2341 }
2342 return TEST_HARD_REG_BIT (reg_class_contents[ (int) (c)], REGNO (base));
2343 }
2344
2345 /* Returns true if X is a valid address for use in a memory reference
2346 of mode MODE. If STRICT is true, we do not allow pseudo registers
2347 in the address. NO_LARGE_OFFSET is true if we are examining an
2348 address for use in a load or store misaligned instruction, or
2349 recursively examining an operand inside a PRE/POST_MODIFY. */
2350
2351 bool
c6x_legitimate_address_p_1(machine_mode mode,rtx x,bool strict,bool no_large_offset)2352 c6x_legitimate_address_p_1 (machine_mode mode, rtx x, bool strict,
2353 bool no_large_offset)
2354 {
2355 int size, size1;
2356 HOST_WIDE_INT off;
2357 enum rtx_code code = GET_CODE (x);
2358
2359 switch (code)
2360 {
2361 case PRE_MODIFY:
2362 case POST_MODIFY:
2363 /* We can't split these into word-sized pieces yet. */
2364 if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
2365 return false;
2366 if (GET_CODE (XEXP (x, 1)) != PLUS)
2367 return false;
2368 if (!c6x_legitimate_address_p_1 (mode, XEXP (x, 1), strict, true))
2369 return false;
2370 if (!rtx_equal_p (XEXP (x, 0), XEXP (XEXP (x, 1), 0)))
2371 return false;
2372
2373 /* fall through */
2374 case PRE_INC:
2375 case PRE_DEC:
2376 case POST_INC:
2377 case POST_DEC:
2378 /* We can't split these into word-sized pieces yet. */
2379 if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
2380 return false;
2381 x = XEXP (x, 0);
2382 if (!REG_P (x))
2383 return false;
2384
2385 /* fall through */
2386 case REG:
2387 if (strict)
2388 return REGNO_OK_FOR_BASE_STRICT_P (REGNO (x));
2389 else
2390 return REGNO_OK_FOR_BASE_NONSTRICT_P (REGNO (x));
2391
2392 case PLUS:
2393 if (!REG_P (XEXP (x, 0))
2394 || !c6x_legitimate_address_p_1 (mode, XEXP (x, 0), strict, false))
2395 return false;
2396 /* We cannot ensure currently that both registers end up in the
2397 same register file. */
2398 if (REG_P (XEXP (x, 1)))
2399 return false;
2400
2401 if (mode == BLKmode)
2402 size = 4;
2403 else if (mode == VOIDmode)
2404 /* ??? This can happen during ivopts. */
2405 size = 1;
2406 else
2407 size = GET_MODE_SIZE (mode);
2408
2409 if (flag_pic
2410 && GET_CODE (XEXP (x, 1)) == UNSPEC
2411 && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_SDATA
2412 && XEXP (x, 0) == pic_offset_table_rtx
2413 && sdata_symbolic_operand (XVECEXP (XEXP (x, 1), 0, 0), SImode))
2414 return !no_large_offset && size <= 4;
2415 if (flag_pic == 1
2416 && mode == Pmode
2417 && GET_CODE (XEXP (x, 1)) == UNSPEC
2418 && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_GOT
2419 && XEXP (x, 0) == pic_offset_table_rtx
2420 && (GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == SYMBOL_REF
2421 || GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == LABEL_REF))
2422 return !no_large_offset;
2423 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2424 return false;
2425
2426 off = INTVAL (XEXP (x, 1));
2427
2428 /* If the machine does not have doubleword load/stores, we'll use
2429 word size accesses. */
2430 size1 = size;
2431 if (size == 2 * UNITS_PER_WORD && !TARGET_STDW)
2432 size = UNITS_PER_WORD;
2433
2434 if (((HOST_WIDE_INT)size1 - 1) & off)
2435 return false;
2436 off /= size;
2437 if (off > -32 && off < (size1 == size ? 32 : 28))
2438 return true;
2439 if (no_large_offset || code != PLUS || XEXP (x, 0) != stack_pointer_rtx
2440 || size1 > UNITS_PER_WORD)
2441 return false;
2442 return off >= 0 && off < 32768;
2443
2444 case CONST:
2445 case SYMBOL_REF:
2446 case LABEL_REF:
2447 return (!no_large_offset
2448 /* With -fpic, we must wrap it in an unspec to show the B14
2449 dependency. */
2450 && !flag_pic
2451 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
2452 && sdata_symbolic_operand (x, Pmode));
2453
2454 default:
2455 return false;
2456 }
2457 }
2458
2459 static bool
c6x_legitimate_address_p(machine_mode mode,rtx x,bool strict)2460 c6x_legitimate_address_p (machine_mode mode, rtx x, bool strict)
2461 {
2462 return c6x_legitimate_address_p_1 (mode, x, strict, false);
2463 }
2464
2465 static bool
c6x_legitimate_constant_p(machine_mode mode ATTRIBUTE_UNUSED,rtx x ATTRIBUTE_UNUSED)2466 c6x_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
2467 rtx x ATTRIBUTE_UNUSED)
2468 {
2469 return true;
2470 }
2471
2472 /* Implements TARGET_PREFERRED_RENAME_CLASS. */
2473 static reg_class_t
c6x_preferred_rename_class(reg_class_t cl)2474 c6x_preferred_rename_class (reg_class_t cl)
2475 {
2476 if (cl == A_REGS)
2477 return NONPREDICATE_A_REGS;
2478 if (cl == B_REGS)
2479 return NONPREDICATE_B_REGS;
2480 if (cl == ALL_REGS || cl == GENERAL_REGS)
2481 return NONPREDICATE_REGS;
2482 return NO_REGS;
2483 }
2484
2485 /* Implements FINAL_PRESCAN_INSN. */
2486 void
c6x_final_prescan_insn(rtx_insn * insn,rtx * opvec ATTRIBUTE_UNUSED,int noperands ATTRIBUTE_UNUSED)2487 c6x_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
2488 int noperands ATTRIBUTE_UNUSED)
2489 {
2490 c6x_current_insn = insn;
2491 }
2492
2493 /* A structure to describe the stack layout of a function. The layout is
2494 as follows:
2495
2496 [saved frame pointer (or possibly padding0)]
2497 --> incoming stack pointer, new hard frame pointer
2498 [saved call-used regs]
2499 [optional padding1]
2500 --> soft frame pointer
2501 [frame]
2502 [outgoing arguments]
2503 [optional padding2]
2504
2505 The structure members are laid out in this order. */
2506
2507 struct c6x_frame
2508 {
2509 int padding0;
2510 /* Number of registers to save. */
2511 int nregs;
2512 int padding1;
2513 HOST_WIDE_INT frame;
2514 int outgoing_arguments_size;
2515 int padding2;
2516
2517 HOST_WIDE_INT to_allocate;
2518 /* The offsets relative to the incoming stack pointer (which
2519 becomes HARD_FRAME_POINTER). */
2520 HOST_WIDE_INT frame_pointer_offset;
2521 HOST_WIDE_INT b3_offset;
2522
2523 /* True if we should call push_rts/pop_rts to save and restore
2524 registers. */
2525 bool push_rts;
2526 };
2527
2528 /* Return true if we need to save and modify the PIC register in the
2529 prologue. */
2530
2531 static bool
must_reload_pic_reg_p(void)2532 must_reload_pic_reg_p (void)
2533 {
2534 struct cgraph_local_info *i = NULL;
2535
2536 if (!TARGET_DSBT)
2537 return false;
2538
2539 i = cgraph_node::local_info (current_function_decl);
2540
2541 if ((crtl->uses_pic_offset_table || !crtl->is_leaf) && !i->local)
2542 return true;
2543 return false;
2544 }
2545
2546 /* Return 1 if we need to save REGNO. */
2547 static int
c6x_save_reg(unsigned int regno)2548 c6x_save_reg (unsigned int regno)
2549 {
2550 return ((df_regs_ever_live_p (regno)
2551 && !call_used_regs[regno]
2552 && !fixed_regs[regno])
2553 || (regno == RETURN_ADDR_REGNO
2554 && (df_regs_ever_live_p (regno)
2555 || !crtl->is_leaf))
2556 || (regno == PIC_OFFSET_TABLE_REGNUM && must_reload_pic_reg_p ()));
2557 }
2558
2559 /* Examine the number of regs NREGS we've determined we must save.
2560 Return true if we should use __c6xabi_push_rts/__c6xabi_pop_rts for
2561 prologue and epilogue. */
2562
2563 static bool
use_push_rts_p(int nregs)2564 use_push_rts_p (int nregs)
2565 {
2566 if (TARGET_INSNS_64PLUS && optimize_function_for_size_p (cfun)
2567 && !cfun->machine->contains_sibcall
2568 && !cfun->returns_struct
2569 && !TARGET_LONG_CALLS
2570 && nregs >= 6 && !frame_pointer_needed)
2571 return true;
2572 return false;
2573 }
2574
2575 /* Return number of saved general prupose registers. */
2576
2577 int
c6x_nsaved_regs(void)2578 c6x_nsaved_regs (void)
2579 {
2580 int nregs = 0;
2581 int regno;
2582
2583 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2584 if (c6x_save_reg (regno))
2585 nregs++;
2586 return nregs;
2587 }
2588
2589 /* The safe debug order mandated by the ABI. */
2590 static unsigned reg_save_order[] =
2591 {
2592 REG_A10, REG_A11, REG_A12, REG_A13,
2593 REG_A14, REG_B3,
2594 REG_B10, REG_B11, REG_B12, REG_B13,
2595 REG_B14, REG_A15
2596 };
2597
2598 #define N_SAVE_ORDER (sizeof reg_save_order / sizeof *reg_save_order)
2599
2600 /* Compute the layout of the stack frame and store it in FRAME. */
2601
2602 static void
c6x_compute_frame_layout(struct c6x_frame * frame)2603 c6x_compute_frame_layout (struct c6x_frame *frame)
2604 {
2605 HOST_WIDE_INT size = get_frame_size ();
2606 HOST_WIDE_INT offset;
2607 int nregs;
2608
2609 /* We use the four bytes which are technically inside the caller's frame,
2610 usually to save the frame pointer. */
2611 offset = -4;
2612 frame->padding0 = 0;
2613 nregs = c6x_nsaved_regs ();
2614 frame->push_rts = false;
2615 frame->b3_offset = 0;
2616 if (use_push_rts_p (nregs))
2617 {
2618 frame->push_rts = true;
2619 frame->b3_offset = (TARGET_BIG_ENDIAN ? -12 : -13) * 4;
2620 nregs = 14;
2621 }
2622 else if (c6x_save_reg (REG_B3))
2623 {
2624 int idx;
2625 for (idx = N_SAVE_ORDER - 1; reg_save_order[idx] != REG_B3; idx--)
2626 {
2627 if (c6x_save_reg (reg_save_order[idx]))
2628 frame->b3_offset -= 4;
2629 }
2630 }
2631 frame->nregs = nregs;
2632
2633 if (size == 0 && nregs == 0)
2634 {
2635 frame->padding0 = 4;
2636 frame->padding1 = frame->padding2 = 0;
2637 frame->frame_pointer_offset = frame->to_allocate = 0;
2638 frame->outgoing_arguments_size = 0;
2639 return;
2640 }
2641
2642 if (!frame->push_rts)
2643 offset += frame->nregs * 4;
2644
2645 if (offset == 0 && size == 0 && crtl->outgoing_args_size == 0
2646 && !crtl->is_leaf)
2647 /* Don't use the bottom of the caller's frame if we have no
2648 allocation of our own and call other functions. */
2649 frame->padding0 = frame->padding1 = 4;
2650 else if (offset & 4)
2651 frame->padding1 = 4;
2652 else
2653 frame->padding1 = 0;
2654
2655 offset += frame->padding0 + frame->padding1;
2656 frame->frame_pointer_offset = offset;
2657 offset += size;
2658
2659 frame->outgoing_arguments_size = crtl->outgoing_args_size;
2660 offset += frame->outgoing_arguments_size;
2661
2662 if ((offset & 4) == 0)
2663 frame->padding2 = 8;
2664 else
2665 frame->padding2 = 4;
2666 frame->to_allocate = offset + frame->padding2;
2667 }
2668
2669 /* Return the offset between two registers, one to be eliminated, and the other
2670 its replacement, at the start of a routine. */
2671
2672 HOST_WIDE_INT
c6x_initial_elimination_offset(int from,int to)2673 c6x_initial_elimination_offset (int from, int to)
2674 {
2675 struct c6x_frame frame;
2676 c6x_compute_frame_layout (&frame);
2677
2678 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2679 return 0;
2680 else if (from == FRAME_POINTER_REGNUM
2681 && to == HARD_FRAME_POINTER_REGNUM)
2682 return -frame.frame_pointer_offset;
2683 else
2684 {
2685 gcc_assert (to == STACK_POINTER_REGNUM);
2686
2687 if (from == ARG_POINTER_REGNUM)
2688 return frame.to_allocate + (frame.push_rts ? 56 : 0);
2689
2690 gcc_assert (from == FRAME_POINTER_REGNUM);
2691 return frame.to_allocate - frame.frame_pointer_offset;
2692 }
2693 }
2694
2695 /* Given FROM and TO register numbers, say whether this elimination is
2696 allowed. Frame pointer elimination is automatically handled. */
2697
2698 static bool
c6x_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)2699 c6x_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2700 {
2701 if (to == STACK_POINTER_REGNUM)
2702 return !frame_pointer_needed;
2703 return true;
2704 }
2705
2706 /* Emit insns to increment the stack pointer by OFFSET. If
2707 FRAME_RELATED_P, set the RTX_FRAME_RELATED_P flag on the insns.
2708 Does nothing if the offset is zero. */
2709
2710 static void
emit_add_sp_const(HOST_WIDE_INT offset,bool frame_related_p)2711 emit_add_sp_const (HOST_WIDE_INT offset, bool frame_related_p)
2712 {
2713 rtx to_add = GEN_INT (offset);
2714 rtx orig_to_add = to_add;
2715 rtx_insn *insn;
2716
2717 if (offset == 0)
2718 return;
2719
2720 if (offset < -32768 || offset > 32767)
2721 {
2722 rtx reg = gen_rtx_REG (SImode, REG_A0);
2723 rtx low = GEN_INT (trunc_int_for_mode (offset, HImode));
2724
2725 insn = emit_insn (gen_movsi_high (reg, low));
2726 if (frame_related_p)
2727 RTX_FRAME_RELATED_P (insn) = 1;
2728 insn = emit_insn (gen_movsi_lo_sum (reg, reg, to_add));
2729 if (frame_related_p)
2730 RTX_FRAME_RELATED_P (insn) = 1;
2731 to_add = reg;
2732 }
2733 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2734 to_add));
2735 if (frame_related_p)
2736 {
2737 if (REG_P (to_add))
2738 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
2739 gen_rtx_SET (stack_pointer_rtx,
2740 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2741 orig_to_add)));
2742
2743 RTX_FRAME_RELATED_P (insn) = 1;
2744 }
2745 }
2746
2747 /* Prologue and epilogue. */
2748 void
c6x_expand_prologue(void)2749 c6x_expand_prologue (void)
2750 {
2751 struct c6x_frame frame;
2752 rtx_insn *insn;
2753 rtx mem;
2754 int nsaved = 0;
2755 HOST_WIDE_INT initial_offset, off, added_already;
2756
2757 c6x_compute_frame_layout (&frame);
2758
2759 if (flag_stack_usage_info)
2760 current_function_static_stack_size = frame.to_allocate;
2761
2762 initial_offset = -frame.to_allocate;
2763 if (frame.push_rts)
2764 {
2765 emit_insn (gen_push_rts ());
2766 nsaved = frame.nregs;
2767 }
2768
2769 /* If the offsets would be too large for the memory references we will
2770 create to save registers, do the stack allocation in two parts.
2771 Ensure by subtracting 8 that we don't store to the word pointed to
2772 by the stack pointer. */
2773 if (initial_offset < -32768)
2774 initial_offset = -frame.frame_pointer_offset - 8;
2775
2776 if (frame.to_allocate > 0)
2777 gcc_assert (initial_offset != 0);
2778
2779 off = -initial_offset + 4 - frame.padding0;
2780
2781 mem = gen_frame_mem (Pmode, stack_pointer_rtx);
2782
2783 added_already = 0;
2784 if (frame_pointer_needed)
2785 {
2786 rtx fp_reg = gen_rtx_REG (SImode, REG_A15);
2787 /* We go through some contortions here to both follow the ABI's
2788 recommendation that FP == incoming SP, and to avoid writing or
2789 reading the word pointed to by the stack pointer. */
2790 rtx addr = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx,
2791 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2792 GEN_INT (-8)));
2793 insn = emit_move_insn (gen_frame_mem (Pmode, addr), fp_reg);
2794 RTX_FRAME_RELATED_P (insn) = 1;
2795 nsaved++;
2796 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, stack_pointer_rtx,
2797 GEN_INT (8)));
2798 RTX_FRAME_RELATED_P (insn) = 1;
2799 off -= 4;
2800 added_already = -8;
2801 }
2802
2803 emit_add_sp_const (initial_offset - added_already, true);
2804
2805 if (nsaved < frame.nregs)
2806 {
2807 unsigned i;
2808
2809 for (i = 0; i < N_SAVE_ORDER; i++)
2810 {
2811 int idx = N_SAVE_ORDER - i - 1;
2812 unsigned regno = reg_save_order[idx];
2813 rtx reg;
2814 machine_mode save_mode = SImode;
2815
2816 if (regno == REG_A15 && frame_pointer_needed)
2817 /* Already saved. */
2818 continue;
2819 if (!c6x_save_reg (regno))
2820 continue;
2821
2822 if (TARGET_STDW && (off & 4) == 0 && off <= 256
2823 && (regno & 1) == 1
2824 && i + 1 < N_SAVE_ORDER
2825 && reg_save_order[idx - 1] == regno - 1
2826 && c6x_save_reg (regno - 1))
2827 {
2828 save_mode = DImode;
2829 regno--;
2830 i++;
2831 }
2832 reg = gen_rtx_REG (save_mode, regno);
2833 off -= GET_MODE_SIZE (save_mode);
2834
2835 insn = emit_move_insn (adjust_address (mem, save_mode, off),
2836 reg);
2837 RTX_FRAME_RELATED_P (insn) = 1;
2838
2839 nsaved += hard_regno_nregs (regno, save_mode);
2840 }
2841 }
2842 gcc_assert (nsaved == frame.nregs);
2843 emit_add_sp_const (-frame.to_allocate - initial_offset, true);
2844 if (must_reload_pic_reg_p ())
2845 {
2846 if (dsbt_decl == NULL)
2847 {
2848 tree t;
2849
2850 t = build_index_type (integer_one_node);
2851 t = build_array_type (integer_type_node, t);
2852 t = build_decl (BUILTINS_LOCATION, VAR_DECL,
2853 get_identifier ("__c6xabi_DSBT_BASE"), t);
2854 DECL_ARTIFICIAL (t) = 1;
2855 DECL_IGNORED_P (t) = 1;
2856 DECL_EXTERNAL (t) = 1;
2857 TREE_STATIC (t) = 1;
2858 TREE_PUBLIC (t) = 1;
2859 TREE_USED (t) = 1;
2860
2861 dsbt_decl = t;
2862 }
2863 emit_insn (gen_setup_dsbt (pic_offset_table_rtx,
2864 XEXP (DECL_RTL (dsbt_decl), 0)));
2865 }
2866 }
2867
2868 void
c6x_expand_epilogue(bool sibcall)2869 c6x_expand_epilogue (bool sibcall)
2870 {
2871 unsigned i;
2872 struct c6x_frame frame;
2873 rtx mem;
2874 HOST_WIDE_INT off;
2875 int nsaved = 0;
2876
2877 c6x_compute_frame_layout (&frame);
2878
2879 mem = gen_frame_mem (Pmode, stack_pointer_rtx);
2880
2881 /* Insert a dummy set/use of the stack pointer. This creates a
2882 scheduler barrier between the prologue saves and epilogue restores. */
2883 emit_insn (gen_epilogue_barrier (stack_pointer_rtx, stack_pointer_rtx));
2884
2885 /* If the offsets would be too large for the memory references we will
2886 create to restore registers, do a preliminary stack adjustment here. */
2887 off = frame.to_allocate - frame.frame_pointer_offset + frame.padding1;
2888 if (frame.push_rts)
2889 {
2890 nsaved = frame.nregs;
2891 }
2892 else
2893 {
2894 if (frame.to_allocate > 32768)
2895 {
2896 /* Don't add the entire offset so that we leave an unused word
2897 above the stack pointer. */
2898 emit_add_sp_const ((off - 16) & ~7, false);
2899 off &= 7;
2900 off += 16;
2901 }
2902 for (i = 0; i < N_SAVE_ORDER; i++)
2903 {
2904 unsigned regno = reg_save_order[i];
2905 rtx reg;
2906 machine_mode save_mode = SImode;
2907
2908 if (!c6x_save_reg (regno))
2909 continue;
2910 if (regno == REG_A15 && frame_pointer_needed)
2911 continue;
2912
2913 if (TARGET_STDW && (off & 4) == 0 && off < 256
2914 && (regno & 1) == 0
2915 && i + 1 < N_SAVE_ORDER
2916 && reg_save_order[i + 1] == regno + 1
2917 && c6x_save_reg (regno + 1))
2918 {
2919 save_mode = DImode;
2920 i++;
2921 }
2922 reg = gen_rtx_REG (save_mode, regno);
2923
2924 emit_move_insn (reg, adjust_address (mem, save_mode, off));
2925
2926 off += GET_MODE_SIZE (save_mode);
2927 nsaved += hard_regno_nregs (regno, save_mode);
2928 }
2929 }
2930 if (!frame_pointer_needed)
2931 emit_add_sp_const (off + frame.padding0 - 4, false);
2932 else
2933 {
2934 rtx fp_reg = gen_rtx_REG (SImode, REG_A15);
2935 rtx addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
2936 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2937 GEN_INT (8)));
2938 emit_insn (gen_addsi3 (stack_pointer_rtx, hard_frame_pointer_rtx,
2939 GEN_INT (-8)));
2940 emit_move_insn (fp_reg, gen_frame_mem (Pmode, addr));
2941 nsaved++;
2942 }
2943 gcc_assert (nsaved == frame.nregs);
2944 if (!sibcall)
2945 {
2946 if (frame.push_rts)
2947 emit_jump_insn (gen_pop_rts ());
2948 else
2949 emit_jump_insn (gen_return_internal (gen_rtx_REG (SImode,
2950 RETURN_ADDR_REGNO)));
2951 }
2952 }
2953
2954 /* Return the value of the return address for the frame COUNT steps up
2955 from the current frame, after the prologue.
2956 We punt for everything but the current frame by returning const0_rtx. */
2957
2958 rtx
c6x_return_addr_rtx(int count)2959 c6x_return_addr_rtx (int count)
2960 {
2961 if (count != 0)
2962 return const0_rtx;
2963
2964 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNO);
2965 }
2966
2967 /* Return true iff TYPE is one of the shadow types. */
2968 static bool
shadow_type_p(enum attr_type type)2969 shadow_type_p (enum attr_type type)
2970 {
2971 return (type == TYPE_SHADOW || type == TYPE_LOAD_SHADOW
2972 || type == TYPE_MULT_SHADOW);
2973 }
2974
2975 /* Return true iff INSN is a shadow pattern. */
2976 static bool
shadow_p(rtx_insn * insn)2977 shadow_p (rtx_insn *insn)
2978 {
2979 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
2980 return false;
2981 return shadow_type_p (get_attr_type (insn));
2982 }
2983
2984 /* Return true iff INSN is a shadow or blockage pattern. */
2985 static bool
shadow_or_blockage_p(rtx_insn * insn)2986 shadow_or_blockage_p (rtx_insn *insn)
2987 {
2988 enum attr_type type;
2989 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
2990 return false;
2991 type = get_attr_type (insn);
2992 return shadow_type_p (type) || type == TYPE_BLOCKAGE;
2993 }
2994
2995 /* Translate UNITS into a bitmask of units we can reserve for this
2996 insn. */
2997 static int
get_reservation_flags(enum attr_units units)2998 get_reservation_flags (enum attr_units units)
2999 {
3000 switch (units)
3001 {
3002 case UNITS_D:
3003 case UNITS_D_ADDR:
3004 return RESERVATION_FLAG_D;
3005 case UNITS_L:
3006 return RESERVATION_FLAG_L;
3007 case UNITS_S:
3008 return RESERVATION_FLAG_S;
3009 case UNITS_M:
3010 return RESERVATION_FLAG_M;
3011 case UNITS_LS:
3012 return RESERVATION_FLAG_LS;
3013 case UNITS_DL:
3014 return RESERVATION_FLAG_DL;
3015 case UNITS_DS:
3016 return RESERVATION_FLAG_DS;
3017 case UNITS_DLS:
3018 return RESERVATION_FLAG_DLS;
3019 default:
3020 return 0;
3021 }
3022 }
3023
3024 /* Compute the side of the machine used by INSN, which reserves UNITS.
3025 This must match the reservations in the scheduling description. */
3026 static int
get_insn_side(rtx_insn * insn,enum attr_units units)3027 get_insn_side (rtx_insn *insn, enum attr_units units)
3028 {
3029 if (units == UNITS_D_ADDR)
3030 return (get_attr_addr_regfile (insn) == ADDR_REGFILE_A ? 0 : 1);
3031 else
3032 {
3033 enum attr_dest_regfile rf = get_attr_dest_regfile (insn);
3034 if (rf == DEST_REGFILE_ANY)
3035 return get_attr_type (insn) == TYPE_BRANCH ? 0 : 1;
3036 else
3037 return rf == DEST_REGFILE_A ? 0 : 1;
3038 }
3039 }
3040
3041 /* After scheduling, walk the insns between HEAD and END and assign unit
3042 reservations. */
3043 static void
assign_reservations(rtx_insn * head,rtx_insn * end)3044 assign_reservations (rtx_insn *head, rtx_insn *end)
3045 {
3046 rtx_insn *insn;
3047 for (insn = head; insn != NEXT_INSN (end); insn = NEXT_INSN (insn))
3048 {
3049 unsigned int sched_mask, reserved;
3050 rtx_insn *within, *last;
3051 int pass;
3052 int rsrv[2];
3053 int rsrv_count[2][4];
3054 int i;
3055
3056 if (GET_MODE (insn) != TImode)
3057 continue;
3058
3059 reserved = 0;
3060 last = NULL;
3061 /* Find the last insn in the packet. It has a state recorded for it,
3062 which we can use to determine the units we should be using. */
3063 for (within = insn;
3064 (within != NEXT_INSN (end)
3065 && (within == insn || GET_MODE (within) != TImode));
3066 within = NEXT_INSN (within))
3067 {
3068 int icode;
3069 if (!NONDEBUG_INSN_P (within))
3070 continue;
3071 icode = recog_memoized (within);
3072 if (icode < 0)
3073 continue;
3074 if (shadow_p (within))
3075 continue;
3076 if (INSN_INFO_ENTRY (INSN_UID (within)).reservation != 0)
3077 reserved |= 1 << INSN_INFO_ENTRY (INSN_UID (within)).reservation;
3078 last = within;
3079 }
3080 if (last == NULL_RTX)
3081 continue;
3082
3083 sched_mask = INSN_INFO_ENTRY (INSN_UID (last)).unit_mask;
3084 sched_mask &= ~reserved;
3085
3086 memset (rsrv_count, 0, sizeof rsrv_count);
3087 rsrv[0] = rsrv[1] = ~0;
3088 for (i = 0; i < 8; i++)
3089 {
3090 int side = i / 4;
3091 int unit = i & 3;
3092 unsigned unit_bit = 1 << (unit + side * UNIT_QID_SIDE_OFFSET);
3093 /* Clear the bits which we expect to reserve in the following loop,
3094 leaving the ones set which aren't present in the scheduler's
3095 state and shouldn't be reserved. */
3096 if (sched_mask & unit_bit)
3097 rsrv[i / 4] &= ~(1 << unit);
3098 }
3099
3100 /* Walk through the insns that occur in the same cycle. We use multiple
3101 passes to assign units, assigning for insns with the most specific
3102 requirements first. */
3103 for (pass = 0; pass < 4; pass++)
3104 for (within = insn;
3105 (within != NEXT_INSN (end)
3106 && (within == insn || GET_MODE (within) != TImode));
3107 within = NEXT_INSN (within))
3108 {
3109 int uid = INSN_UID (within);
3110 int this_rsrv, side;
3111 int icode;
3112 enum attr_units units;
3113 enum attr_type type;
3114 int j;
3115
3116 if (!NONDEBUG_INSN_P (within))
3117 continue;
3118 icode = recog_memoized (within);
3119 if (icode < 0)
3120 continue;
3121 if (INSN_INFO_ENTRY (uid).reservation != 0)
3122 continue;
3123 units = get_attr_units (within);
3124 type = get_attr_type (within);
3125 this_rsrv = get_reservation_flags (units);
3126 if (this_rsrv == 0)
3127 continue;
3128 side = get_insn_side (within, units);
3129
3130 /* Certain floating point instructions are treated specially. If
3131 an insn can choose between units it can reserve, and its
3132 reservation spans more than one cycle, the reservation contains
3133 special markers in the first cycle to help us reconstruct what
3134 the automaton chose. */
3135 if ((type == TYPE_ADDDP || type == TYPE_FP4)
3136 && units == UNITS_LS)
3137 {
3138 int test1_code = ((type == TYPE_FP4 ? UNIT_QID_FPL1 : UNIT_QID_ADDDPL1)
3139 + side * UNIT_QID_SIDE_OFFSET);
3140 int test2_code = ((type == TYPE_FP4 ? UNIT_QID_FPS1 : UNIT_QID_ADDDPS1)
3141 + side * UNIT_QID_SIDE_OFFSET);
3142 if ((sched_mask & (1 << test1_code)) != 0)
3143 {
3144 this_rsrv = RESERVATION_FLAG_L;
3145 sched_mask &= ~(1 << test1_code);
3146 }
3147 else if ((sched_mask & (1 << test2_code)) != 0)
3148 {
3149 this_rsrv = RESERVATION_FLAG_S;
3150 sched_mask &= ~(1 << test2_code);
3151 }
3152 }
3153
3154 if ((this_rsrv & (this_rsrv - 1)) == 0)
3155 {
3156 int t = exact_log2 (this_rsrv) + side * UNIT_QID_SIDE_OFFSET;
3157 rsrv[side] |= this_rsrv;
3158 INSN_INFO_ENTRY (uid).reservation = t;
3159 continue;
3160 }
3161
3162 if (pass == 1)
3163 {
3164 for (j = 0; j < 4; j++)
3165 if (this_rsrv & (1 << j))
3166 rsrv_count[side][j]++;
3167 continue;
3168 }
3169 if ((pass == 2 && this_rsrv != RESERVATION_FLAG_DLS)
3170 || (pass == 3 && this_rsrv == RESERVATION_FLAG_DLS))
3171 {
3172 int best = -1, best_cost = INT_MAX;
3173 for (j = 0; j < 4; j++)
3174 if ((this_rsrv & (1 << j))
3175 && !(rsrv[side] & (1 << j))
3176 && rsrv_count[side][j] < best_cost)
3177 {
3178 best_cost = rsrv_count[side][j];
3179 best = j;
3180 }
3181 gcc_assert (best != -1);
3182 rsrv[side] |= 1 << best;
3183 for (j = 0; j < 4; j++)
3184 if ((this_rsrv & (1 << j)) && j != best)
3185 rsrv_count[side][j]--;
3186
3187 INSN_INFO_ENTRY (uid).reservation
3188 = best + side * UNIT_QID_SIDE_OFFSET;
3189 }
3190 }
3191 }
3192 }
3193
3194 /* Return a factor by which to weight unit imbalances for a reservation
3195 R. */
3196 static int
unit_req_factor(enum unitreqs r)3197 unit_req_factor (enum unitreqs r)
3198 {
3199 switch (r)
3200 {
3201 case UNIT_REQ_D:
3202 case UNIT_REQ_L:
3203 case UNIT_REQ_S:
3204 case UNIT_REQ_M:
3205 case UNIT_REQ_X:
3206 case UNIT_REQ_T:
3207 return 1;
3208 case UNIT_REQ_DL:
3209 case UNIT_REQ_LS:
3210 case UNIT_REQ_DS:
3211 return 2;
3212 case UNIT_REQ_DLS:
3213 return 3;
3214 default:
3215 gcc_unreachable ();
3216 }
3217 }
3218
3219 /* Examine INSN, and store in REQ1/SIDE1 and REQ2/SIDE2 the unit
3220 requirements. Returns zero if INSN can't be handled, otherwise
3221 either one or two to show how many of the two pairs are in use.
3222 REQ1 is always used, it holds what is normally thought of as the
3223 instructions reservation, e.g. UNIT_REQ_DL. REQ2 is used to either
3224 describe a cross path, or for loads/stores, the T unit. */
3225 static int
get_unit_reqs(rtx_insn * insn,int * req1,int * side1,int * req2,int * side2)3226 get_unit_reqs (rtx_insn *insn, int *req1, int *side1, int *req2, int *side2)
3227 {
3228 enum attr_units units;
3229 enum attr_cross cross;
3230 int side, req;
3231
3232 if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
3233 return 0;
3234 units = get_attr_units (insn);
3235 if (units == UNITS_UNKNOWN)
3236 return 0;
3237 side = get_insn_side (insn, units);
3238 cross = get_attr_cross (insn);
3239
3240 req = (units == UNITS_D ? UNIT_REQ_D
3241 : units == UNITS_D_ADDR ? UNIT_REQ_D
3242 : units == UNITS_DL ? UNIT_REQ_DL
3243 : units == UNITS_DS ? UNIT_REQ_DS
3244 : units == UNITS_L ? UNIT_REQ_L
3245 : units == UNITS_LS ? UNIT_REQ_LS
3246 : units == UNITS_S ? UNIT_REQ_S
3247 : units == UNITS_M ? UNIT_REQ_M
3248 : units == UNITS_DLS ? UNIT_REQ_DLS
3249 : -1);
3250 gcc_assert (req != -1);
3251 *req1 = req;
3252 *side1 = side;
3253 if (units == UNITS_D_ADDR)
3254 {
3255 *req2 = UNIT_REQ_T;
3256 *side2 = side ^ (cross == CROSS_Y ? 1 : 0);
3257 return 2;
3258 }
3259 else if (cross == CROSS_Y)
3260 {
3261 *req2 = UNIT_REQ_X;
3262 *side2 = side;
3263 return 2;
3264 }
3265 return 1;
3266 }
3267
3268 /* Walk the insns between and including HEAD and TAIL, and mark the
3269 resource requirements in the unit_reqs table. */
3270 static void
count_unit_reqs(unit_req_table reqs,rtx_insn * head,rtx_insn * tail)3271 count_unit_reqs (unit_req_table reqs, rtx_insn *head, rtx_insn *tail)
3272 {
3273 rtx_insn *insn;
3274
3275 memset (reqs, 0, sizeof (unit_req_table));
3276
3277 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
3278 {
3279 int side1, side2, req1, req2;
3280
3281 switch (get_unit_reqs (insn, &req1, &side1, &req2, &side2))
3282 {
3283 case 2:
3284 reqs[side2][req2]++;
3285 /* fall through */
3286 case 1:
3287 reqs[side1][req1]++;
3288 break;
3289 }
3290 }
3291 }
3292
3293 /* Update the table REQS by merging more specific unit reservations into
3294 more general ones, i.e. counting (for example) UNIT_REQ_D also in
3295 UNIT_REQ_DL, DS, and DLS. */
3296 static void
merge_unit_reqs(unit_req_table reqs)3297 merge_unit_reqs (unit_req_table reqs)
3298 {
3299 int side;
3300 for (side = 0; side < 2; side++)
3301 {
3302 int d = reqs[side][UNIT_REQ_D];
3303 int l = reqs[side][UNIT_REQ_L];
3304 int s = reqs[side][UNIT_REQ_S];
3305 int dl = reqs[side][UNIT_REQ_DL];
3306 int ls = reqs[side][UNIT_REQ_LS];
3307 int ds = reqs[side][UNIT_REQ_DS];
3308
3309 reqs[side][UNIT_REQ_DL] += d;
3310 reqs[side][UNIT_REQ_DL] += l;
3311 reqs[side][UNIT_REQ_DS] += d;
3312 reqs[side][UNIT_REQ_DS] += s;
3313 reqs[side][UNIT_REQ_LS] += l;
3314 reqs[side][UNIT_REQ_LS] += s;
3315 reqs[side][UNIT_REQ_DLS] += ds + dl + ls + d + l + s;
3316 }
3317 }
3318
3319 /* Examine the table REQS and return a measure of unit imbalance by comparing
3320 the two sides of the machine. If, for example, D1 is used twice and D2
3321 used not at all, the return value should be 1 in the absence of other
3322 imbalances. */
3323 static int
unit_req_imbalance(unit_req_table reqs)3324 unit_req_imbalance (unit_req_table reqs)
3325 {
3326 int val = 0;
3327 int i;
3328
3329 for (i = 0; i < UNIT_REQ_MAX; i++)
3330 {
3331 int factor = unit_req_factor ((enum unitreqs) i);
3332 int diff = abs (reqs[0][i] - reqs[1][i]);
3333 val += (diff + factor - 1) / factor / 2;
3334 }
3335 return val;
3336 }
3337
3338 /* Return the resource-constrained minimum iteration interval given the
3339 data in the REQS table. This must have been processed with
3340 merge_unit_reqs already. */
3341 static int
res_mii(unit_req_table reqs)3342 res_mii (unit_req_table reqs)
3343 {
3344 int side, req;
3345 int worst = 1;
3346 for (side = 0; side < 2; side++)
3347 for (req = 0; req < UNIT_REQ_MAX; req++)
3348 {
3349 int factor = unit_req_factor ((enum unitreqs) req);
3350 worst = MAX ((reqs[side][UNIT_REQ_D] + factor - 1) / factor, worst);
3351 }
3352
3353 return worst;
3354 }
3355
3356 /* Examine INSN, and store in PMASK1 and PMASK2 bitmasks that represent
3357 the operands that are involved in the (up to) two reservations, as
3358 found by get_unit_reqs. Return true if we did this successfully, false
3359 if we couldn't identify what to do with INSN. */
3360 static bool
get_unit_operand_masks(rtx_insn * insn,unsigned int * pmask1,unsigned int * pmask2)3361 get_unit_operand_masks (rtx_insn *insn, unsigned int *pmask1,
3362 unsigned int *pmask2)
3363 {
3364 enum attr_op_pattern op_pat;
3365
3366 if (recog_memoized (insn) < 0)
3367 return 0;
3368 if (GET_CODE (PATTERN (insn)) == COND_EXEC)
3369 return false;
3370 extract_insn (insn);
3371 op_pat = get_attr_op_pattern (insn);
3372 if (op_pat == OP_PATTERN_DT)
3373 {
3374 gcc_assert (recog_data.n_operands == 2);
3375 *pmask1 = 1 << 0;
3376 *pmask2 = 1 << 1;
3377 return true;
3378 }
3379 else if (op_pat == OP_PATTERN_TD)
3380 {
3381 gcc_assert (recog_data.n_operands == 2);
3382 *pmask1 = 1 << 1;
3383 *pmask2 = 1 << 0;
3384 return true;
3385 }
3386 else if (op_pat == OP_PATTERN_SXS)
3387 {
3388 gcc_assert (recog_data.n_operands == 3);
3389 *pmask1 = (1 << 0) | (1 << 2);
3390 *pmask2 = 1 << 1;
3391 return true;
3392 }
3393 else if (op_pat == OP_PATTERN_SX)
3394 {
3395 gcc_assert (recog_data.n_operands == 2);
3396 *pmask1 = 1 << 0;
3397 *pmask2 = 1 << 1;
3398 return true;
3399 }
3400 else if (op_pat == OP_PATTERN_SSX)
3401 {
3402 gcc_assert (recog_data.n_operands == 3);
3403 *pmask1 = (1 << 0) | (1 << 1);
3404 *pmask2 = 1 << 2;
3405 return true;
3406 }
3407 return false;
3408 }
3409
3410 /* Try to replace a register in INSN, which has corresponding rename info
3411 from regrename_analyze in INFO. OP_MASK and ORIG_SIDE provide information
3412 about the operands that must be renamed and the side they are on.
3413 REQS is the table of unit reservations in the loop between HEAD and TAIL.
3414 We recompute this information locally after our transformation, and keep
3415 it only if we managed to improve the balance. */
3416 static void
try_rename_operands(rtx_insn * head,rtx_insn * tail,unit_req_table reqs,rtx insn,insn_rr_info * info,unsigned int op_mask,int orig_side)3417 try_rename_operands (rtx_insn *head, rtx_insn *tail, unit_req_table reqs,
3418 rtx insn,
3419 insn_rr_info *info, unsigned int op_mask, int orig_side)
3420 {
3421 enum reg_class super_class = orig_side == 0 ? B_REGS : A_REGS;
3422 HARD_REG_SET unavailable;
3423 du_head_p this_head;
3424 struct du_chain *chain;
3425 int i;
3426 unsigned tmp_mask;
3427 int best_reg, old_reg;
3428 vec<du_head_p> involved_chains = vNULL;
3429 unit_req_table new_reqs;
3430 bool ok;
3431
3432 for (i = 0, tmp_mask = op_mask; tmp_mask; i++)
3433 {
3434 du_head_p op_chain;
3435 if ((tmp_mask & (1 << i)) == 0)
3436 continue;
3437 if (info->op_info[i].n_chains != 1)
3438 goto out_fail;
3439 op_chain = regrename_chain_from_id (info->op_info[i].heads[0]->id);
3440 involved_chains.safe_push (op_chain);
3441 tmp_mask &= ~(1 << i);
3442 }
3443
3444 if (involved_chains.length () > 1)
3445 goto out_fail;
3446
3447 this_head = involved_chains[0];
3448 if (this_head->cannot_rename)
3449 goto out_fail;
3450
3451 for (chain = this_head->first; chain; chain = chain->next_use)
3452 {
3453 unsigned int mask1, mask2, mask_changed;
3454 int count, side1, side2, req1, req2;
3455 insn_rr_info *this_rr = &insn_rr[INSN_UID (chain->insn)];
3456
3457 count = get_unit_reqs (chain->insn, &req1, &side1, &req2, &side2);
3458
3459 if (count == 0)
3460 goto out_fail;
3461
3462 if (!get_unit_operand_masks (chain->insn, &mask1, &mask2))
3463 goto out_fail;
3464
3465 extract_insn (chain->insn);
3466
3467 mask_changed = 0;
3468 for (i = 0; i < recog_data.n_operands; i++)
3469 {
3470 int j;
3471 int n_this_op = this_rr->op_info[i].n_chains;
3472 for (j = 0; j < n_this_op; j++)
3473 {
3474 du_head_p other = this_rr->op_info[i].heads[j];
3475 if (regrename_chain_from_id (other->id) == this_head)
3476 break;
3477 }
3478 if (j == n_this_op)
3479 continue;
3480
3481 if (n_this_op != 1)
3482 goto out_fail;
3483 mask_changed |= 1 << i;
3484 }
3485 gcc_assert (mask_changed != 0);
3486 if (mask_changed != mask1 && mask_changed != mask2)
3487 goto out_fail;
3488 }
3489
3490 /* If we get here, we can do the renaming. */
3491 COMPL_HARD_REG_SET (unavailable, reg_class_contents[(int) super_class]);
3492
3493 old_reg = this_head->regno;
3494 best_reg =
3495 find_rename_reg (this_head, super_class, &unavailable, old_reg, true);
3496
3497 ok = regrename_do_replace (this_head, best_reg);
3498 gcc_assert (ok);
3499
3500 count_unit_reqs (new_reqs, head, PREV_INSN (tail));
3501 merge_unit_reqs (new_reqs);
3502 if (dump_file)
3503 {
3504 fprintf (dump_file, "reshuffle for insn %d, op_mask %x, "
3505 "original side %d, new reg %d\n",
3506 INSN_UID (insn), op_mask, orig_side, best_reg);
3507 fprintf (dump_file, " imbalance %d -> %d\n",
3508 unit_req_imbalance (reqs), unit_req_imbalance (new_reqs));
3509 }
3510 if (unit_req_imbalance (new_reqs) > unit_req_imbalance (reqs))
3511 {
3512 ok = regrename_do_replace (this_head, old_reg);
3513 gcc_assert (ok);
3514 }
3515 else
3516 memcpy (reqs, new_reqs, sizeof (unit_req_table));
3517
3518 out_fail:
3519 involved_chains.release ();
3520 }
3521
3522 /* Find insns in LOOP which would, if shifted to the other side
3523 of the machine, reduce an imbalance in the unit reservations. */
3524 static void
reshuffle_units(basic_block loop)3525 reshuffle_units (basic_block loop)
3526 {
3527 rtx_insn *head = BB_HEAD (loop);
3528 rtx_insn *tail = BB_END (loop);
3529 rtx_insn *insn;
3530 unit_req_table reqs;
3531 edge e;
3532 edge_iterator ei;
3533 bitmap_head bbs;
3534
3535 count_unit_reqs (reqs, head, PREV_INSN (tail));
3536 merge_unit_reqs (reqs);
3537
3538 regrename_init (true);
3539
3540 bitmap_initialize (&bbs, &bitmap_default_obstack);
3541
3542 FOR_EACH_EDGE (e, ei, loop->preds)
3543 bitmap_set_bit (&bbs, e->src->index);
3544
3545 bitmap_set_bit (&bbs, loop->index);
3546 regrename_analyze (&bbs);
3547
3548 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
3549 {
3550 enum attr_units units;
3551 int count, side1, side2, req1, req2;
3552 unsigned int mask1, mask2;
3553 insn_rr_info *info;
3554
3555 if (!NONDEBUG_INSN_P (insn))
3556 continue;
3557
3558 count = get_unit_reqs (insn, &req1, &side1, &req2, &side2);
3559
3560 if (count == 0)
3561 continue;
3562
3563 if (!get_unit_operand_masks (insn, &mask1, &mask2))
3564 continue;
3565
3566 info = &insn_rr[INSN_UID (insn)];
3567 if (info->op_info == NULL)
3568 continue;
3569
3570 if (reqs[side1][req1] > 1
3571 && reqs[side1][req1] > 2 * reqs[side1 ^ 1][req1])
3572 {
3573 try_rename_operands (head, tail, reqs, insn, info, mask1, side1);
3574 }
3575
3576 units = get_attr_units (insn);
3577 if (units == UNITS_D_ADDR)
3578 {
3579 gcc_assert (count == 2);
3580 if (reqs[side2][req2] > 1
3581 && reqs[side2][req2] > 2 * reqs[side2 ^ 1][req2])
3582 {
3583 try_rename_operands (head, tail, reqs, insn, info, mask2, side2);
3584 }
3585 }
3586 }
3587 regrename_finish ();
3588 }
3589
3590 /* Backend scheduling state. */
3591 typedef struct c6x_sched_context
3592 {
3593 /* The current scheduler clock, saved in the sched_reorder hook. */
3594 int curr_sched_clock;
3595
3596 /* Number of insns issued so far in this cycle. */
3597 int issued_this_cycle;
3598
3599 /* We record the time at which each jump occurs in JUMP_CYCLES. The
3600 theoretical maximum for number of jumps in flight is 12: 2 every
3601 cycle, with a latency of 6 cycles each. This is a circular
3602 buffer; JUMP_CYCLE_INDEX is the pointer to the start. Earlier
3603 jumps have a higher index. This array should be accessed through
3604 the jump_cycle function. */
3605 int jump_cycles[12];
3606 int jump_cycle_index;
3607
3608 /* In parallel with jump_cycles, this array records the opposite of
3609 the condition used in each pending jump. This is used to
3610 predicate insns that are scheduled in the jump's delay slots. If
3611 this is NULL_RTX no such predication happens. */
3612 rtx jump_cond[12];
3613
3614 /* Similar to the jump_cycles mechanism, but here we take into
3615 account all insns with delay slots, to avoid scheduling asms into
3616 the delay slots. */
3617 int delays_finished_at;
3618
3619 /* The following variable value is the last issued insn. */
3620 rtx_insn *last_scheduled_insn;
3621 /* The last issued insn that isn't a shadow of another. */
3622 rtx_insn *last_scheduled_iter0;
3623
3624 /* The following variable value is DFA state before issuing the
3625 first insn in the current clock cycle. We do not use this member
3626 of the structure directly; we copy the data in and out of
3627 prev_cycle_state. */
3628 state_t prev_cycle_state_ctx;
3629
3630 int reg_n_accesses[FIRST_PSEUDO_REGISTER];
3631 int reg_n_xaccesses[FIRST_PSEUDO_REGISTER];
3632 int reg_set_in_cycle[FIRST_PSEUDO_REGISTER];
3633
3634 int tmp_reg_n_accesses[FIRST_PSEUDO_REGISTER];
3635 int tmp_reg_n_xaccesses[FIRST_PSEUDO_REGISTER];
3636 } *c6x_sched_context_t;
3637
3638 /* The current scheduling state. */
3639 static struct c6x_sched_context ss;
3640
3641 /* The following variable value is DFA state before issuing the first insn
3642 in the current clock cycle. This is used in c6x_variable_issue for
3643 comparison with the state after issuing the last insn in a cycle. */
3644 static state_t prev_cycle_state;
3645
3646 /* Set when we discover while processing an insn that it would lead to too
3647 many accesses of the same register. */
3648 static bool reg_access_stall;
3649
3650 /* The highest insn uid after delayed insns were split, but before loop bodies
3651 were copied by the modulo scheduling code. */
3652 static int sploop_max_uid_iter0;
3653
3654 /* Look up the jump cycle with index N. For an out-of-bounds N, we return 0,
3655 so the caller does not specifically have to test for it. */
3656 static int
get_jump_cycle(int n)3657 get_jump_cycle (int n)
3658 {
3659 if (n >= 12)
3660 return 0;
3661 n += ss.jump_cycle_index;
3662 if (n >= 12)
3663 n -= 12;
3664 return ss.jump_cycles[n];
3665 }
3666
3667 /* Look up the jump condition with index N. */
3668 static rtx
get_jump_cond(int n)3669 get_jump_cond (int n)
3670 {
3671 if (n >= 12)
3672 return NULL_RTX;
3673 n += ss.jump_cycle_index;
3674 if (n >= 12)
3675 n -= 12;
3676 return ss.jump_cond[n];
3677 }
3678
3679 /* Return the index of the first jump that occurs after CLOCK_VAR. If no jump
3680 has delay slots beyond CLOCK_VAR, return -1. */
3681 static int
first_jump_index(int clock_var)3682 first_jump_index (int clock_var)
3683 {
3684 int retval = -1;
3685 int n = 0;
3686 for (;;)
3687 {
3688 int t = get_jump_cycle (n);
3689 if (t <= clock_var)
3690 break;
3691 retval = n;
3692 n++;
3693 }
3694 return retval;
3695 }
3696
3697 /* Add a new entry in our scheduling state for a jump that occurs in CYCLE
3698 and has the opposite condition of COND. */
3699 static void
record_jump(int cycle,rtx cond)3700 record_jump (int cycle, rtx cond)
3701 {
3702 if (ss.jump_cycle_index == 0)
3703 ss.jump_cycle_index = 11;
3704 else
3705 ss.jump_cycle_index--;
3706 ss.jump_cycles[ss.jump_cycle_index] = cycle;
3707 ss.jump_cond[ss.jump_cycle_index] = cond;
3708 }
3709
3710 /* Set the clock cycle of INSN to CYCLE. Also clears the insn's entry in
3711 new_conditions. */
3712 static void
insn_set_clock(rtx insn,int cycle)3713 insn_set_clock (rtx insn, int cycle)
3714 {
3715 unsigned uid = INSN_UID (insn);
3716
3717 if (uid >= INSN_INFO_LENGTH)
3718 insn_info.safe_grow (uid * 5 / 4 + 10);
3719
3720 INSN_INFO_ENTRY (uid).clock = cycle;
3721 INSN_INFO_ENTRY (uid).new_cond = NULL;
3722 INSN_INFO_ENTRY (uid).reservation = 0;
3723 INSN_INFO_ENTRY (uid).ebb_start = false;
3724 }
3725
3726 /* Return the clock cycle we set for the insn with uid UID. */
3727 static int
insn_uid_get_clock(int uid)3728 insn_uid_get_clock (int uid)
3729 {
3730 return INSN_INFO_ENTRY (uid).clock;
3731 }
3732
3733 /* Return the clock cycle we set for INSN. */
3734 static int
insn_get_clock(rtx insn)3735 insn_get_clock (rtx insn)
3736 {
3737 return insn_uid_get_clock (INSN_UID (insn));
3738 }
3739
3740 /* Examine INSN, and if it is a conditional jump of any kind, return
3741 the opposite of the condition in which it branches. Otherwise,
3742 return NULL_RTX. */
3743 static rtx
condjump_opposite_condition(rtx insn)3744 condjump_opposite_condition (rtx insn)
3745 {
3746 rtx pat = PATTERN (insn);
3747 int icode = INSN_CODE (insn);
3748 rtx x = NULL;
3749
3750 if (icode == CODE_FOR_br_true || icode == CODE_FOR_br_false)
3751 {
3752 x = XEXP (SET_SRC (pat), 0);
3753 if (icode == CODE_FOR_br_false)
3754 return x;
3755 }
3756 if (GET_CODE (pat) == COND_EXEC)
3757 {
3758 rtx t = COND_EXEC_CODE (pat);
3759 if ((GET_CODE (t) == PARALLEL
3760 && GET_CODE (XVECEXP (t, 0, 0)) == RETURN)
3761 || (GET_CODE (t) == UNSPEC && XINT (t, 1) == UNSPEC_REAL_JUMP)
3762 || (GET_CODE (t) == SET && SET_DEST (t) == pc_rtx))
3763 x = COND_EXEC_TEST (pat);
3764 }
3765
3766 if (x != NULL_RTX)
3767 {
3768 enum rtx_code code = GET_CODE (x);
3769 x = gen_rtx_fmt_ee (code == EQ ? NE : EQ,
3770 GET_MODE (x), XEXP (x, 0),
3771 XEXP (x, 1));
3772 }
3773 return x;
3774 }
3775
3776 /* Return true iff COND1 and COND2 are exactly opposite conditions
3777 one of them NE and the other EQ. */
3778 static bool
conditions_opposite_p(rtx cond1,rtx cond2)3779 conditions_opposite_p (rtx cond1, rtx cond2)
3780 {
3781 return (rtx_equal_p (XEXP (cond1, 0), XEXP (cond2, 0))
3782 && rtx_equal_p (XEXP (cond1, 1), XEXP (cond2, 1))
3783 && GET_CODE (cond1) == reverse_condition (GET_CODE (cond2)));
3784 }
3785
3786 /* Return true if we can add a predicate COND to INSN, or if INSN
3787 already has that predicate. If DOIT is true, also perform the
3788 modification. */
3789 static bool
predicate_insn(rtx_insn * insn,rtx cond,bool doit)3790 predicate_insn (rtx_insn *insn, rtx cond, bool doit)
3791 {
3792 int icode;
3793 if (cond == NULL_RTX)
3794 {
3795 gcc_assert (!doit);
3796 return false;
3797 }
3798
3799 if (get_attr_predicable (insn) == PREDICABLE_YES
3800 && GET_CODE (PATTERN (insn)) != COND_EXEC)
3801 {
3802 if (doit)
3803 {
3804 cond = copy_rtx (cond);
3805 rtx newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn));
3806 PATTERN (insn) = newpat;
3807 INSN_CODE (insn) = -1;
3808 }
3809 return true;
3810 }
3811 if (GET_CODE (PATTERN (insn)) == COND_EXEC
3812 && rtx_equal_p (COND_EXEC_TEST (PATTERN (insn)), cond))
3813 return true;
3814 icode = INSN_CODE (insn);
3815 if (icode == CODE_FOR_real_jump
3816 || icode == CODE_FOR_jump
3817 || icode == CODE_FOR_indirect_jump)
3818 {
3819 rtx pat = PATTERN (insn);
3820 rtx dest = (icode == CODE_FOR_real_jump ? XVECEXP (pat, 0, 0)
3821 : icode == CODE_FOR_jump ? XEXP (SET_SRC (pat), 0)
3822 : SET_SRC (pat));
3823 if (doit)
3824 {
3825 rtx newpat;
3826 if (REG_P (dest))
3827 newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn));
3828 else
3829 newpat = gen_br_true (cond, XEXP (cond, 0), dest);
3830 PATTERN (insn) = newpat;
3831 INSN_CODE (insn) = -1;
3832 }
3833 return true;
3834 }
3835 if (INSN_CODE (insn) == CODE_FOR_br_true)
3836 {
3837 rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0);
3838 return rtx_equal_p (br_cond, cond);
3839 }
3840 if (INSN_CODE (insn) == CODE_FOR_br_false)
3841 {
3842 rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0);
3843 return conditions_opposite_p (br_cond, cond);
3844 }
3845 return false;
3846 }
3847
3848 /* Initialize SC. Used by c6x_init_sched_context and c6x_sched_init. */
3849 static void
init_sched_state(c6x_sched_context_t sc)3850 init_sched_state (c6x_sched_context_t sc)
3851 {
3852 sc->last_scheduled_insn = NULL;
3853 sc->last_scheduled_iter0 = NULL;
3854 sc->issued_this_cycle = 0;
3855 memset (sc->jump_cycles, 0, sizeof sc->jump_cycles);
3856 memset (sc->jump_cond, 0, sizeof sc->jump_cond);
3857 sc->jump_cycle_index = 0;
3858 sc->delays_finished_at = 0;
3859 sc->curr_sched_clock = 0;
3860
3861 sc->prev_cycle_state_ctx = xmalloc (dfa_state_size);
3862
3863 memset (sc->reg_n_accesses, 0, sizeof sc->reg_n_accesses);
3864 memset (sc->reg_n_xaccesses, 0, sizeof sc->reg_n_xaccesses);
3865 memset (sc->reg_set_in_cycle, 0, sizeof sc->reg_set_in_cycle);
3866
3867 state_reset (sc->prev_cycle_state_ctx);
3868 }
3869
3870 /* Allocate store for new scheduling context. */
3871 static void *
c6x_alloc_sched_context(void)3872 c6x_alloc_sched_context (void)
3873 {
3874 return xmalloc (sizeof (struct c6x_sched_context));
3875 }
3876
3877 /* If CLEAN_P is true then initializes _SC with clean data,
3878 and from the global context otherwise. */
3879 static void
c6x_init_sched_context(void * _sc,bool clean_p)3880 c6x_init_sched_context (void *_sc, bool clean_p)
3881 {
3882 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3883
3884 if (clean_p)
3885 {
3886 init_sched_state (sc);
3887 }
3888 else
3889 {
3890 *sc = ss;
3891 sc->prev_cycle_state_ctx = xmalloc (dfa_state_size);
3892 memcpy (sc->prev_cycle_state_ctx, prev_cycle_state, dfa_state_size);
3893 }
3894 }
3895
3896 /* Sets the global scheduling context to the one pointed to by _SC. */
3897 static void
c6x_set_sched_context(void * _sc)3898 c6x_set_sched_context (void *_sc)
3899 {
3900 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3901
3902 gcc_assert (sc != NULL);
3903 ss = *sc;
3904 memcpy (prev_cycle_state, sc->prev_cycle_state_ctx, dfa_state_size);
3905 }
3906
3907 /* Clear data in _SC. */
3908 static void
c6x_clear_sched_context(void * _sc)3909 c6x_clear_sched_context (void *_sc)
3910 {
3911 c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3912 gcc_assert (_sc != NULL);
3913
3914 free (sc->prev_cycle_state_ctx);
3915 }
3916
3917 /* Free _SC. */
3918 static void
c6x_free_sched_context(void * _sc)3919 c6x_free_sched_context (void *_sc)
3920 {
3921 free (_sc);
3922 }
3923
3924 /* True if we are currently performing a preliminary scheduling
3925 pass before modulo scheduling; we can't allow the scheduler to
3926 modify instruction patterns using packetization assumptions,
3927 since there will be another scheduling pass later if modulo
3928 scheduling fails. */
3929 static bool in_hwloop;
3930
3931 /* Provide information about speculation capabilities, and set the
3932 DO_BACKTRACKING flag. */
3933 static void
c6x_set_sched_flags(spec_info_t spec_info)3934 c6x_set_sched_flags (spec_info_t spec_info)
3935 {
3936 unsigned int *flags = &(current_sched_info->flags);
3937
3938 if (*flags & SCHED_EBB)
3939 {
3940 *flags |= DO_BACKTRACKING | DO_PREDICATION;
3941 }
3942 if (in_hwloop)
3943 *flags |= DONT_BREAK_DEPENDENCIES;
3944
3945 spec_info->mask = 0;
3946 }
3947
3948 /* Implement the TARGET_SCHED_ISSUE_RATE hook. */
3949
3950 static int
c6x_issue_rate(void)3951 c6x_issue_rate (void)
3952 {
3953 return 8;
3954 }
3955
3956 /* Used together with the collapse_ndfa option, this ensures that we reach a
3957 deterministic automaton state before trying to advance a cycle.
3958 With collapse_ndfa, genautomata creates advance cycle arcs only for
3959 such deterministic states. */
3960
3961 static rtx
c6x_sched_dfa_pre_cycle_insn(void)3962 c6x_sched_dfa_pre_cycle_insn (void)
3963 {
3964 return const0_rtx;
3965 }
3966
3967 /* We're beginning a new block. Initialize data structures as necessary. */
3968
3969 static void
c6x_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)3970 c6x_sched_init (FILE *dump ATTRIBUTE_UNUSED,
3971 int sched_verbose ATTRIBUTE_UNUSED,
3972 int max_ready ATTRIBUTE_UNUSED)
3973 {
3974 if (prev_cycle_state == NULL)
3975 {
3976 prev_cycle_state = xmalloc (dfa_state_size);
3977 }
3978 init_sched_state (&ss);
3979 state_reset (prev_cycle_state);
3980 }
3981
3982 /* We are about to being issuing INSN. Return nonzero if we cannot
3983 issue it on given cycle CLOCK and return zero if we should not sort
3984 the ready queue on the next clock start.
3985 For C6X, we use this function just to copy the previous DFA state
3986 for comparison purposes. */
3987
3988 static int
c6x_dfa_new_cycle(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,rtx_insn * insn ATTRIBUTE_UNUSED,int last_clock ATTRIBUTE_UNUSED,int clock ATTRIBUTE_UNUSED,int * sort_p ATTRIBUTE_UNUSED)3989 c6x_dfa_new_cycle (FILE *dump ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3990 rtx_insn *insn ATTRIBUTE_UNUSED,
3991 int last_clock ATTRIBUTE_UNUSED,
3992 int clock ATTRIBUTE_UNUSED, int *sort_p ATTRIBUTE_UNUSED)
3993 {
3994 if (clock != last_clock)
3995 memcpy (prev_cycle_state, curr_state, dfa_state_size);
3996 return 0;
3997 }
3998
3999 static void
c6x_mark_regno_read(int regno,bool cross)4000 c6x_mark_regno_read (int regno, bool cross)
4001 {
4002 int t = ++ss.tmp_reg_n_accesses[regno];
4003
4004 if (t > 4)
4005 reg_access_stall = true;
4006
4007 if (cross)
4008 {
4009 int set_cycle = ss.reg_set_in_cycle[regno];
4010 /* This must be done in this way rather than by tweaking things in
4011 adjust_cost, since the stall occurs even for insns with opposite
4012 predicates, and the scheduler may not even see a dependency. */
4013 if (set_cycle > 0 && set_cycle == ss.curr_sched_clock)
4014 reg_access_stall = true;
4015 /* This doesn't quite do anything yet as we're only modeling one
4016 x unit. */
4017 ++ss.tmp_reg_n_xaccesses[regno];
4018 }
4019 }
4020
4021 /* Note that REG is read in the insn being examined. If CROSS, it
4022 means the access is through a cross path. Update the temporary reg
4023 access arrays, and set REG_ACCESS_STALL if the insn can't be issued
4024 in the current cycle. */
4025
4026 static void
c6x_mark_reg_read(rtx reg,bool cross)4027 c6x_mark_reg_read (rtx reg, bool cross)
4028 {
4029 unsigned regno = REGNO (reg);
4030 unsigned nregs = REG_NREGS (reg);
4031
4032 while (nregs-- > 0)
4033 c6x_mark_regno_read (regno + nregs, cross);
4034 }
4035
4036 /* Note that register REG is written in cycle CYCLES. */
4037
4038 static void
c6x_mark_reg_written(rtx reg,int cycles)4039 c6x_mark_reg_written (rtx reg, int cycles)
4040 {
4041 unsigned regno = REGNO (reg);
4042 unsigned nregs = REG_NREGS (reg);
4043
4044 while (nregs-- > 0)
4045 ss.reg_set_in_cycle[regno + nregs] = cycles;
4046 }
4047
4048 /* Update the register state information for an instruction whose
4049 body is X. Return true if the instruction has to be delayed until the
4050 next cycle. */
4051
4052 static bool
c6x_registers_update(rtx_insn * insn)4053 c6x_registers_update (rtx_insn *insn)
4054 {
4055 enum attr_cross cross;
4056 enum attr_dest_regfile destrf;
4057 int i, nops;
4058 rtx x;
4059
4060 if (!reload_completed || recog_memoized (insn) < 0)
4061 return false;
4062
4063 reg_access_stall = false;
4064 memcpy (ss.tmp_reg_n_accesses, ss.reg_n_accesses,
4065 sizeof ss.tmp_reg_n_accesses);
4066 memcpy (ss.tmp_reg_n_xaccesses, ss.reg_n_xaccesses,
4067 sizeof ss.tmp_reg_n_xaccesses);
4068
4069 extract_insn (insn);
4070
4071 cross = get_attr_cross (insn);
4072 destrf = get_attr_dest_regfile (insn);
4073
4074 nops = recog_data.n_operands;
4075 x = PATTERN (insn);
4076 if (GET_CODE (x) == COND_EXEC)
4077 {
4078 c6x_mark_reg_read (XEXP (XEXP (x, 0), 0), false);
4079 nops -= 2;
4080 }
4081
4082 for (i = 0; i < nops; i++)
4083 {
4084 rtx op = recog_data.operand[i];
4085 if (recog_data.operand_type[i] == OP_OUT)
4086 continue;
4087 if (REG_P (op))
4088 {
4089 bool this_cross = cross;
4090 if (destrf == DEST_REGFILE_A && A_REGNO_P (REGNO (op)))
4091 this_cross = false;
4092 if (destrf == DEST_REGFILE_B && B_REGNO_P (REGNO (op)))
4093 this_cross = false;
4094 c6x_mark_reg_read (op, this_cross);
4095 }
4096 else if (MEM_P (op))
4097 {
4098 op = XEXP (op, 0);
4099 switch (GET_CODE (op))
4100 {
4101 case POST_INC:
4102 case PRE_INC:
4103 case POST_DEC:
4104 case PRE_DEC:
4105 op = XEXP (op, 0);
4106 /* fall through */
4107 case REG:
4108 c6x_mark_reg_read (op, false);
4109 break;
4110 case POST_MODIFY:
4111 case PRE_MODIFY:
4112 op = XEXP (op, 1);
4113 gcc_assert (GET_CODE (op) == PLUS);
4114 /* fall through */
4115 case PLUS:
4116 c6x_mark_reg_read (XEXP (op, 0), false);
4117 if (REG_P (XEXP (op, 1)))
4118 c6x_mark_reg_read (XEXP (op, 1), false);
4119 break;
4120 case SYMBOL_REF:
4121 case LABEL_REF:
4122 case CONST:
4123 c6x_mark_regno_read (REG_B14, false);
4124 break;
4125 default:
4126 gcc_unreachable ();
4127 }
4128 }
4129 else if (!CONSTANT_P (op) && strlen (recog_data.constraints[i]) > 0)
4130 gcc_unreachable ();
4131 }
4132 return reg_access_stall;
4133 }
4134
4135 /* Helper function for the TARGET_SCHED_REORDER and
4136 TARGET_SCHED_REORDER2 hooks. If scheduling an insn would be unsafe
4137 in the current cycle, move it down in the ready list and return the
4138 number of non-unsafe insns. */
4139
4140 static int
c6x_sched_reorder_1(rtx_insn ** ready,int * pn_ready,int clock_var)4141 c6x_sched_reorder_1 (rtx_insn **ready, int *pn_ready, int clock_var)
4142 {
4143 int n_ready = *pn_ready;
4144 rtx_insn **e_ready = ready + n_ready;
4145 rtx_insn **insnp;
4146 int first_jump;
4147
4148 /* Keep track of conflicts due to a limit number of register accesses,
4149 and due to stalls incurred by too early accesses of registers using
4150 cross paths. */
4151
4152 for (insnp = ready; insnp < e_ready; insnp++)
4153 {
4154 rtx_insn *insn = *insnp;
4155 int icode = recog_memoized (insn);
4156 bool is_asm = (icode < 0
4157 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
4158 || asm_noperands (PATTERN (insn)) >= 0));
4159 bool no_parallel = (is_asm || icode == CODE_FOR_sploop
4160 || (icode >= 0
4161 && get_attr_type (insn) == TYPE_ATOMIC));
4162
4163 /* We delay asm insns until all delay slots are exhausted. We can't
4164 accurately tell how many cycles an asm takes, and the main scheduling
4165 code always assumes at least 1 cycle, which may be wrong. */
4166 if ((no_parallel
4167 && (ss.issued_this_cycle > 0 || clock_var < ss.delays_finished_at))
4168 || c6x_registers_update (insn)
4169 || (ss.issued_this_cycle > 0 && icode == CODE_FOR_sploop))
4170 {
4171 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4172 *ready = insn;
4173 n_ready--;
4174 ready++;
4175 }
4176 else if (shadow_p (insn))
4177 {
4178 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4179 *ready = insn;
4180 }
4181 }
4182
4183 /* Ensure that no other jump is scheduled in jump delay slots, since
4184 it would put the machine into the wrong state. Also, we must
4185 avoid scheduling insns that have a latency longer than the
4186 remaining jump delay slots, as the code at the jump destination
4187 won't be prepared for it.
4188
4189 However, we can relax this condition somewhat. The rest of the
4190 scheduler will automatically avoid scheduling an insn on which
4191 the jump shadow depends so late that its side effect happens
4192 after the jump. This means that if we see an insn with a longer
4193 latency here, it can safely be scheduled if we can ensure that it
4194 has a predicate opposite of the previous jump: the side effect
4195 will happen in what we think of as the same basic block. In
4196 c6x_variable_issue, we will record the necessary predicate in
4197 new_conditions, and after scheduling is finished, we will modify
4198 the insn.
4199
4200 Special care must be taken whenever there is more than one jump
4201 in flight. */
4202
4203 first_jump = first_jump_index (clock_var);
4204 if (first_jump != -1)
4205 {
4206 int first_cycle = get_jump_cycle (first_jump);
4207 rtx first_cond = get_jump_cond (first_jump);
4208 int second_cycle = 0;
4209
4210 if (first_jump > 0)
4211 second_cycle = get_jump_cycle (first_jump - 1);
4212
4213 for (insnp = ready; insnp < e_ready; insnp++)
4214 {
4215 rtx_insn *insn = *insnp;
4216 int icode = recog_memoized (insn);
4217 bool is_asm = (icode < 0
4218 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
4219 || asm_noperands (PATTERN (insn)) >= 0));
4220 int this_cycles, rsrv_cycles;
4221 enum attr_type type;
4222
4223 gcc_assert (!is_asm);
4224 if (icode < 0)
4225 continue;
4226 this_cycles = get_attr_cycles (insn);
4227 rsrv_cycles = get_attr_reserve_cycles (insn);
4228 type = get_attr_type (insn);
4229 /* Treat branches specially; there is also a hazard if two jumps
4230 end at the same cycle. */
4231 if (type == TYPE_BRANCH || type == TYPE_CALL)
4232 this_cycles++;
4233 if (clock_var + this_cycles <= first_cycle)
4234 continue;
4235 if ((first_jump > 0 && clock_var + this_cycles > second_cycle)
4236 || clock_var + rsrv_cycles > first_cycle
4237 || !predicate_insn (insn, first_cond, false))
4238 {
4239 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4240 *ready = insn;
4241 n_ready--;
4242 ready++;
4243 }
4244 }
4245 }
4246
4247 return n_ready;
4248 }
4249
4250 /* Implement the TARGET_SCHED_REORDER hook. We save the current clock
4251 for later and clear the register access information for the new
4252 cycle. We also move asm statements out of the way if they would be
4253 scheduled in a delay slot. */
4254
4255 static int
c6x_sched_reorder(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn ** ready ATTRIBUTE_UNUSED,int * pn_ready ATTRIBUTE_UNUSED,int clock_var)4256 c6x_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
4257 int sched_verbose ATTRIBUTE_UNUSED,
4258 rtx_insn **ready ATTRIBUTE_UNUSED,
4259 int *pn_ready ATTRIBUTE_UNUSED, int clock_var)
4260 {
4261 ss.curr_sched_clock = clock_var;
4262 ss.issued_this_cycle = 0;
4263 memset (ss.reg_n_accesses, 0, sizeof ss.reg_n_accesses);
4264 memset (ss.reg_n_xaccesses, 0, sizeof ss.reg_n_xaccesses);
4265
4266 if (ready == NULL)
4267 return 0;
4268
4269 return c6x_sched_reorder_1 (ready, pn_ready, clock_var);
4270 }
4271
4272 /* Implement the TARGET_SCHED_REORDER2 hook. We use this to record the clock
4273 cycle for every insn. */
4274
4275 static int
c6x_sched_reorder2(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn ** ready ATTRIBUTE_UNUSED,int * pn_ready ATTRIBUTE_UNUSED,int clock_var)4276 c6x_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
4277 int sched_verbose ATTRIBUTE_UNUSED,
4278 rtx_insn **ready ATTRIBUTE_UNUSED,
4279 int *pn_ready ATTRIBUTE_UNUSED, int clock_var)
4280 {
4281 /* FIXME: the assembler rejects labels inside an execute packet.
4282 This can occur if prologue insns are scheduled in parallel with
4283 others, so we avoid this here. Also make sure that nothing is
4284 scheduled in parallel with a TYPE_ATOMIC insn or after a jump. */
4285 if (RTX_FRAME_RELATED_P (ss.last_scheduled_insn)
4286 || JUMP_P (ss.last_scheduled_insn)
4287 || (recog_memoized (ss.last_scheduled_insn) >= 0
4288 && get_attr_type (ss.last_scheduled_insn) == TYPE_ATOMIC))
4289 {
4290 int n_ready = *pn_ready;
4291 rtx_insn **e_ready = ready + n_ready;
4292 rtx_insn **insnp;
4293
4294 for (insnp = ready; insnp < e_ready; insnp++)
4295 {
4296 rtx_insn *insn = *insnp;
4297 if (!shadow_p (insn))
4298 {
4299 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4300 *ready = insn;
4301 n_ready--;
4302 ready++;
4303 }
4304 }
4305 return n_ready;
4306 }
4307
4308 return c6x_sched_reorder_1 (ready, pn_ready, clock_var);
4309 }
4310
4311 /* Subroutine of maybe_clobber_cond, called through note_stores. */
4312
4313 static void
clobber_cond_1(rtx x,const_rtx pat ATTRIBUTE_UNUSED,void * data1)4314 clobber_cond_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data1)
4315 {
4316 rtx *cond = (rtx *)data1;
4317 if (*cond != NULL_RTX && reg_overlap_mentioned_p (x, *cond))
4318 *cond = NULL_RTX;
4319 }
4320
4321 /* Examine INSN, and if it destroys the conditions have recorded for
4322 any of the jumps in flight, clear that condition so that we don't
4323 predicate any more insns. CLOCK_VAR helps us limit the search to
4324 only those jumps which are still in flight. */
4325
4326 static void
maybe_clobber_cond(rtx insn,int clock_var)4327 maybe_clobber_cond (rtx insn, int clock_var)
4328 {
4329 int n, idx;
4330 idx = ss.jump_cycle_index;
4331 for (n = 0; n < 12; n++, idx++)
4332 {
4333 rtx cond, link;
4334 int cycle;
4335
4336 if (idx >= 12)
4337 idx -= 12;
4338 cycle = ss.jump_cycles[idx];
4339 if (cycle <= clock_var)
4340 return;
4341
4342 cond = ss.jump_cond[idx];
4343 if (cond == NULL_RTX)
4344 continue;
4345
4346 if (CALL_P (insn))
4347 {
4348 ss.jump_cond[idx] = NULL_RTX;
4349 continue;
4350 }
4351
4352 note_stores (PATTERN (insn), clobber_cond_1, ss.jump_cond + idx);
4353 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
4354 if (REG_NOTE_KIND (link) == REG_INC)
4355 clobber_cond_1 (XEXP (link, 0), NULL_RTX, ss.jump_cond + idx);
4356 }
4357 }
4358
4359 /* Implement the TARGET_SCHED_VARIABLE_ISSUE hook. We are about to
4360 issue INSN. Return the number of insns left on the ready queue
4361 that can be issued this cycle.
4362 We use this hook to record clock cycles and reservations for every insn. */
4363
4364 static int
c6x_variable_issue(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn * insn,int can_issue_more ATTRIBUTE_UNUSED)4365 c6x_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
4366 int sched_verbose ATTRIBUTE_UNUSED,
4367 rtx_insn *insn, int can_issue_more ATTRIBUTE_UNUSED)
4368 {
4369 ss.last_scheduled_insn = insn;
4370 if (INSN_UID (insn) < sploop_max_uid_iter0 && !JUMP_P (insn))
4371 ss.last_scheduled_iter0 = insn;
4372 if (GET_CODE (PATTERN (insn)) != USE && GET_CODE (PATTERN (insn)) != CLOBBER)
4373 ss.issued_this_cycle++;
4374 if (insn_info.exists ())
4375 {
4376 state_t st_after = alloca (dfa_state_size);
4377 int curr_clock = ss.curr_sched_clock;
4378 int uid = INSN_UID (insn);
4379 int icode = recog_memoized (insn);
4380 rtx first_cond;
4381 int first, first_cycle;
4382 unsigned int mask;
4383 int i;
4384
4385 insn_set_clock (insn, curr_clock);
4386 INSN_INFO_ENTRY (uid).ebb_start
4387 = curr_clock == 0 && ss.issued_this_cycle == 1;
4388
4389 first = first_jump_index (ss.curr_sched_clock);
4390 if (first == -1)
4391 {
4392 first_cycle = 0;
4393 first_cond = NULL_RTX;
4394 }
4395 else
4396 {
4397 first_cycle = get_jump_cycle (first);
4398 first_cond = get_jump_cond (first);
4399 }
4400 if (icode >= 0
4401 && first_cycle > curr_clock
4402 && first_cond != NULL_RTX
4403 && (curr_clock + get_attr_cycles (insn) > first_cycle
4404 || get_attr_type (insn) == TYPE_BRANCH
4405 || get_attr_type (insn) == TYPE_CALL))
4406 INSN_INFO_ENTRY (uid).new_cond = first_cond;
4407
4408 memcpy (st_after, curr_state, dfa_state_size);
4409 state_transition (st_after, const0_rtx);
4410
4411 mask = 0;
4412 for (i = 0; i < 2 * UNIT_QID_SIDE_OFFSET; i++)
4413 if (cpu_unit_reservation_p (st_after, c6x_unit_codes[i])
4414 && !cpu_unit_reservation_p (prev_cycle_state, c6x_unit_codes[i]))
4415 mask |= 1 << i;
4416 INSN_INFO_ENTRY (uid).unit_mask = mask;
4417
4418 maybe_clobber_cond (insn, curr_clock);
4419
4420 if (icode >= 0)
4421 {
4422 int i, cycles;
4423
4424 c6x_registers_update (insn);
4425 memcpy (ss.reg_n_accesses, ss.tmp_reg_n_accesses,
4426 sizeof ss.reg_n_accesses);
4427 memcpy (ss.reg_n_xaccesses, ss.tmp_reg_n_accesses,
4428 sizeof ss.reg_n_xaccesses);
4429
4430 cycles = get_attr_cycles (insn);
4431 if (ss.delays_finished_at < ss.curr_sched_clock + cycles)
4432 ss.delays_finished_at = ss.curr_sched_clock + cycles;
4433 if (get_attr_type (insn) == TYPE_BRANCH
4434 || get_attr_type (insn) == TYPE_CALL)
4435 {
4436 rtx opposite = condjump_opposite_condition (insn);
4437 record_jump (ss.curr_sched_clock + cycles, opposite);
4438 }
4439
4440 /* Mark the cycles in which the destination registers are written.
4441 This is used for calculating stalls when using cross units. */
4442 extract_insn (insn);
4443 /* Cross-path stalls don't apply to results of load insns. */
4444 if (get_attr_type (insn) == TYPE_LOAD
4445 || get_attr_type (insn) == TYPE_LOADN
4446 || get_attr_type (insn) == TYPE_LOAD_SHADOW)
4447 cycles--;
4448 for (i = 0; i < recog_data.n_operands; i++)
4449 {
4450 rtx op = recog_data.operand[i];
4451 if (MEM_P (op))
4452 {
4453 rtx addr = XEXP (op, 0);
4454 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4455 c6x_mark_reg_written (XEXP (addr, 0),
4456 insn_uid_get_clock (uid) + 1);
4457 }
4458 if (recog_data.operand_type[i] != OP_IN
4459 && REG_P (op))
4460 {
4461 c6x_mark_reg_written (op,
4462 insn_uid_get_clock (uid) + cycles);
4463 }
4464 }
4465 }
4466 }
4467 return can_issue_more;
4468 }
4469
4470 /* Implement the TARGET_SCHED_ADJUST_COST hook. We need special handling for
4471 anti- and output dependencies. */
4472
4473 static int
c6x_adjust_cost(rtx_insn * insn,int dep_type,rtx_insn * dep_insn,int cost,unsigned int)4474 c6x_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4475 unsigned int)
4476 {
4477 enum attr_type insn_type = TYPE_UNKNOWN, dep_insn_type = TYPE_UNKNOWN;
4478 int dep_insn_code_number, insn_code_number;
4479 int shadow_bonus = 0;
4480 enum reg_note kind;
4481 dep_insn_code_number = recog_memoized (dep_insn);
4482 insn_code_number = recog_memoized (insn);
4483
4484 if (dep_insn_code_number >= 0)
4485 dep_insn_type = get_attr_type (dep_insn);
4486
4487 if (insn_code_number >= 0)
4488 insn_type = get_attr_type (insn);
4489
4490 kind = (reg_note) dep_type;
4491 if (kind == 0)
4492 {
4493 /* If we have a dependency on a load, and it's not for the result of
4494 the load, it must be for an autoincrement. Reduce the cost in that
4495 case. */
4496 if (dep_insn_type == TYPE_LOAD)
4497 {
4498 rtx set = PATTERN (dep_insn);
4499 if (GET_CODE (set) == COND_EXEC)
4500 set = COND_EXEC_CODE (set);
4501 if (GET_CODE (set) == UNSPEC)
4502 cost = 1;
4503 else
4504 {
4505 gcc_assert (GET_CODE (set) == SET);
4506 if (!reg_overlap_mentioned_p (SET_DEST (set), PATTERN (insn)))
4507 cost = 1;
4508 }
4509 }
4510 }
4511
4512 /* A jump shadow needs to have its latency decreased by one. Conceptually,
4513 it occurs in between two cycles, but we schedule it at the end of the
4514 first cycle. */
4515 if (shadow_type_p (insn_type))
4516 shadow_bonus = 1;
4517
4518 /* Anti and output dependencies usually have zero cost, but we want
4519 to insert a stall after a jump, and after certain floating point
4520 insns that take more than one cycle to read their inputs. In the
4521 future, we should try to find a better algorithm for scheduling
4522 jumps. */
4523 if (kind != 0)
4524 {
4525 /* We can get anti-dependencies against shadow insns. Treat these
4526 like output dependencies, so that the insn is entirely finished
4527 before the branch takes place. */
4528 if (kind == REG_DEP_ANTI && insn_type == TYPE_SHADOW)
4529 kind = REG_DEP_OUTPUT;
4530 switch (dep_insn_type)
4531 {
4532 case TYPE_CALLP:
4533 return 1;
4534 case TYPE_BRANCH:
4535 case TYPE_CALL:
4536 if (get_attr_has_shadow (dep_insn) == HAS_SHADOW_Y)
4537 /* This is a real_jump/real_call insn. These don't have
4538 outputs, and ensuring the validity of scheduling things
4539 in the delay slot is the job of
4540 c6x_sched_reorder_1. */
4541 return 0;
4542 /* Unsplit calls can happen - e.g. for divide insns. */
4543 return 6;
4544 case TYPE_LOAD:
4545 case TYPE_LOADN:
4546 case TYPE_INTDP:
4547 if (kind == REG_DEP_OUTPUT)
4548 return 5 - shadow_bonus;
4549 return 0;
4550 case TYPE_MPY4:
4551 case TYPE_FP4:
4552 if (kind == REG_DEP_OUTPUT)
4553 return 4 - shadow_bonus;
4554 return 0;
4555 case TYPE_MPY2:
4556 if (kind == REG_DEP_OUTPUT)
4557 return 2 - shadow_bonus;
4558 return 0;
4559 case TYPE_CMPDP:
4560 if (kind == REG_DEP_OUTPUT)
4561 return 2 - shadow_bonus;
4562 return 2;
4563 case TYPE_ADDDP:
4564 case TYPE_MPYSPDP:
4565 if (kind == REG_DEP_OUTPUT)
4566 return 7 - shadow_bonus;
4567 return 2;
4568 case TYPE_MPYSP2DP:
4569 if (kind == REG_DEP_OUTPUT)
4570 return 5 - shadow_bonus;
4571 return 2;
4572 case TYPE_MPYI:
4573 if (kind == REG_DEP_OUTPUT)
4574 return 9 - shadow_bonus;
4575 return 4;
4576 case TYPE_MPYID:
4577 case TYPE_MPYDP:
4578 if (kind == REG_DEP_OUTPUT)
4579 return 10 - shadow_bonus;
4580 return 4;
4581
4582 default:
4583 if (insn_type == TYPE_SPKERNEL)
4584 return 0;
4585 if (kind == REG_DEP_OUTPUT)
4586 return 1 - shadow_bonus;
4587
4588 return 0;
4589 }
4590 }
4591
4592 return cost - shadow_bonus;
4593 }
4594
4595 /* Create a SEQUENCE rtx to replace the instructions in SLOT, of which there
4596 are N_FILLED. REAL_FIRST identifies the slot if the insn that appears
4597 first in the original stream. */
4598
4599 static void
gen_one_bundle(rtx_insn ** slot,int n_filled,int real_first)4600 gen_one_bundle (rtx_insn **slot, int n_filled, int real_first)
4601 {
4602 rtx seq;
4603 rtx_insn *bundle;
4604 rtx_insn *t;
4605 int i;
4606
4607 seq = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (n_filled, slot));
4608 bundle = make_insn_raw (seq);
4609 BLOCK_FOR_INSN (bundle) = BLOCK_FOR_INSN (slot[0]);
4610 INSN_LOCATION (bundle) = INSN_LOCATION (slot[0]);
4611 SET_PREV_INSN (bundle) = SET_PREV_INSN (slot[real_first]);
4612
4613 t = NULL;
4614
4615 for (i = 0; i < n_filled; i++)
4616 {
4617 rtx_insn *insn = slot[i];
4618 remove_insn (insn);
4619 SET_PREV_INSN (insn) = t ? t : PREV_INSN (bundle);
4620 if (t != NULL_RTX)
4621 SET_NEXT_INSN (t) = insn;
4622 t = insn;
4623 if (i > 0)
4624 INSN_LOCATION (slot[i]) = INSN_LOCATION (bundle);
4625 }
4626
4627 SET_NEXT_INSN (bundle) = NEXT_INSN (PREV_INSN (bundle));
4628 SET_NEXT_INSN (t) = NEXT_INSN (bundle);
4629 SET_NEXT_INSN (PREV_INSN (bundle)) = bundle;
4630 SET_PREV_INSN (NEXT_INSN (bundle)) = bundle;
4631 }
4632
4633 /* Move all parallel instructions into SEQUENCEs, so that no subsequent passes
4634 try to insert labels in the middle. */
4635
4636 static void
c6x_gen_bundles(void)4637 c6x_gen_bundles (void)
4638 {
4639 basic_block bb;
4640
4641 FOR_EACH_BB_FN (bb, cfun)
4642 {
4643 rtx_insn *insn, *next;
4644 /* The machine is eight insns wide. We can have up to six shadow
4645 insns, plus an extra slot for merging the jump shadow. */
4646 rtx_insn *slot[15];
4647 int n_filled = 0;
4648 int first_slot = 0;
4649
4650 for (insn = BB_HEAD (bb);; insn = next)
4651 {
4652 int at_end;
4653 rtx delete_this = NULL_RTX;
4654
4655 if (NONDEBUG_INSN_P (insn))
4656 {
4657 /* Put calls at the start of the sequence. */
4658 if (CALL_P (insn))
4659 {
4660 first_slot++;
4661 if (n_filled)
4662 {
4663 memmove (&slot[1], &slot[0],
4664 n_filled * sizeof (slot[0]));
4665 }
4666 if (!shadow_p (insn))
4667 {
4668 PUT_MODE (insn, TImode);
4669 if (n_filled)
4670 PUT_MODE (slot[1], VOIDmode);
4671 }
4672 n_filled++;
4673 slot[0] = insn;
4674 }
4675 else
4676 {
4677 slot[n_filled++] = insn;
4678 }
4679 }
4680
4681 next = NEXT_INSN (insn);
4682 while (next && insn != BB_END (bb)
4683 && !(NONDEBUG_INSN_P (next)
4684 && GET_CODE (PATTERN (next)) != USE
4685 && GET_CODE (PATTERN (next)) != CLOBBER))
4686 {
4687 insn = next;
4688 next = NEXT_INSN (insn);
4689 }
4690
4691 at_end = insn == BB_END (bb);
4692 if (delete_this == NULL_RTX
4693 && (at_end || (GET_MODE (next) == TImode
4694 && !(shadow_p (next) && CALL_P (next)))))
4695 {
4696 if (n_filled >= 2)
4697 gen_one_bundle (slot, n_filled, first_slot);
4698
4699 n_filled = 0;
4700 first_slot = 0;
4701 }
4702 if (at_end)
4703 break;
4704 }
4705 }
4706 }
4707
4708 /* Emit a NOP instruction for CYCLES cycles after insn AFTER. Return it. */
4709
4710 static rtx_insn *
emit_nop_after(int cycles,rtx_insn * after)4711 emit_nop_after (int cycles, rtx_insn *after)
4712 {
4713 rtx_insn *insn;
4714
4715 /* mpydp has 9 delay slots, and we may schedule a stall for a cross-path
4716 operation. We don't need the extra NOP since in this case, the hardware
4717 will automatically insert the required stall. */
4718 if (cycles == 10)
4719 cycles--;
4720
4721 gcc_assert (cycles < 10);
4722
4723 insn = emit_insn_after (gen_nop_count (GEN_INT (cycles)), after);
4724 PUT_MODE (insn, TImode);
4725
4726 return insn;
4727 }
4728
4729 /* Determine whether INSN is a call that needs to have a return label
4730 placed. */
4731
4732 static bool
returning_call_p(rtx_insn * insn)4733 returning_call_p (rtx_insn *insn)
4734 {
4735 if (CALL_P (insn))
4736 return (!SIBLING_CALL_P (insn)
4737 && get_attr_type (insn) != TYPE_CALLP
4738 && get_attr_type (insn) != TYPE_SHADOW);
4739 if (recog_memoized (insn) < 0)
4740 return false;
4741 if (get_attr_type (insn) == TYPE_CALL)
4742 return true;
4743 return false;
4744 }
4745
4746 /* Determine whether INSN's pattern can be converted to use callp. */
4747 static bool
can_use_callp(rtx_insn * insn)4748 can_use_callp (rtx_insn *insn)
4749 {
4750 int icode = recog_memoized (insn);
4751 if (!TARGET_INSNS_64PLUS
4752 || icode < 0
4753 || GET_CODE (PATTERN (insn)) == COND_EXEC)
4754 return false;
4755
4756 return ((icode == CODE_FOR_real_call
4757 || icode == CODE_FOR_call_internal
4758 || icode == CODE_FOR_call_value_internal)
4759 && get_attr_dest_regfile (insn) == DEST_REGFILE_ANY);
4760 }
4761
4762 /* Convert the pattern of INSN, which must be a CALL_INSN, into a callp. */
4763 static void
convert_to_callp(rtx_insn * insn)4764 convert_to_callp (rtx_insn *insn)
4765 {
4766 rtx lab;
4767 extract_insn (insn);
4768 if (GET_CODE (PATTERN (insn)) == SET)
4769 {
4770 rtx dest = recog_data.operand[0];
4771 lab = recog_data.operand[1];
4772 PATTERN (insn) = gen_callp_value (dest, lab);
4773 INSN_CODE (insn) = CODE_FOR_callp_value;
4774 }
4775 else
4776 {
4777 lab = recog_data.operand[0];
4778 PATTERN (insn) = gen_callp (lab);
4779 INSN_CODE (insn) = CODE_FOR_callp;
4780 }
4781 }
4782
4783 /* Scan forwards from INSN until we find the next insn that has mode TImode
4784 (indicating it starts a new cycle), and occurs in cycle CLOCK.
4785 Return it if we find such an insn, NULL_RTX otherwise. */
4786 static rtx_insn *
find_next_cycle_insn(rtx_insn * insn,int clock)4787 find_next_cycle_insn (rtx_insn *insn, int clock)
4788 {
4789 rtx_insn *t = insn;
4790 if (GET_MODE (t) == TImode)
4791 t = next_real_insn (t);
4792 while (t && GET_MODE (t) != TImode)
4793 t = next_real_insn (t);
4794
4795 if (t && insn_get_clock (t) == clock)
4796 return t;
4797 return NULL;
4798 }
4799
4800 /* If COND_INSN has a COND_EXEC condition, wrap the same condition
4801 around PAT. Return PAT either unchanged or modified in this
4802 way. */
4803 static rtx
duplicate_cond(rtx pat,rtx cond_insn)4804 duplicate_cond (rtx pat, rtx cond_insn)
4805 {
4806 rtx cond_pat = PATTERN (cond_insn);
4807 if (GET_CODE (cond_pat) == COND_EXEC)
4808 pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (cond_pat)),
4809 pat);
4810 return pat;
4811 }
4812
4813 /* Walk forward from INSN to find the last insn that issues in the same clock
4814 cycle. */
4815 static rtx_insn *
find_last_same_clock(rtx_insn * insn)4816 find_last_same_clock (rtx_insn *insn)
4817 {
4818 rtx_insn *retval = insn;
4819 rtx_insn *t = next_real_insn (insn);
4820
4821 while (t && GET_MODE (t) != TImode)
4822 {
4823 if (!DEBUG_INSN_P (t) && recog_memoized (t) >= 0)
4824 retval = t;
4825 t = next_real_insn (t);
4826 }
4827 return retval;
4828 }
4829
4830 /* For every call insn in the function, emit code to load the return
4831 address. For each call we create a return label and store it in
4832 CALL_LABELS. If are not scheduling, we emit the labels here,
4833 otherwise the caller will do it later.
4834 This function is called after final insn scheduling, but before creating
4835 the SEQUENCEs that represent execute packets. */
4836
4837 static void
reorg_split_calls(rtx_insn ** call_labels)4838 reorg_split_calls (rtx_insn **call_labels)
4839 {
4840 unsigned int reservation_mask = 0;
4841 rtx_insn *insn = get_insns ();
4842 gcc_assert (NOTE_P (insn));
4843 insn = next_real_insn (insn);
4844 while (insn)
4845 {
4846 int uid;
4847 rtx_insn *next = next_real_insn (insn);
4848
4849 if (DEBUG_INSN_P (insn))
4850 goto done;
4851
4852 if (GET_MODE (insn) == TImode)
4853 reservation_mask = 0;
4854 uid = INSN_UID (insn);
4855 if (c6x_flag_schedule_insns2 && recog_memoized (insn) >= 0)
4856 reservation_mask |= 1 << INSN_INFO_ENTRY (uid).reservation;
4857
4858 if (returning_call_p (insn))
4859 {
4860 rtx_code_label *label = gen_label_rtx ();
4861 rtx labelref = gen_rtx_LABEL_REF (Pmode, label);
4862 rtx reg = gen_rtx_REG (SImode, RETURN_ADDR_REGNO);
4863
4864 LABEL_NUSES (label) = 2;
4865 if (!c6x_flag_schedule_insns2)
4866 {
4867 if (can_use_callp (insn))
4868 convert_to_callp (insn);
4869 else
4870 {
4871 rtx t;
4872 rtx_insn *slot[4];
4873 emit_label_after (label, insn);
4874
4875 /* Bundle the call and its delay slots into a single
4876 SEQUENCE. While these do not issue in parallel
4877 we need to group them into a single EH region. */
4878 slot[0] = insn;
4879 PUT_MODE (insn, TImode);
4880 if (TARGET_INSNS_64)
4881 {
4882 t = gen_addkpc (reg, labelref, GEN_INT (4));
4883 slot[1] = emit_insn_after (duplicate_cond (t, insn),
4884 insn);
4885 PUT_MODE (slot[1], TImode);
4886 gen_one_bundle (slot, 2, 0);
4887 }
4888 else
4889 {
4890 slot[3] = emit_insn_after (gen_nop_count (GEN_INT (3)),
4891 insn);
4892 PUT_MODE (slot[3], TImode);
4893 t = gen_movsi_lo_sum (reg, reg, labelref);
4894 slot[2] = emit_insn_after (duplicate_cond (t, insn),
4895 insn);
4896 PUT_MODE (slot[2], TImode);
4897 t = gen_movsi_high (reg, labelref);
4898 slot[1] = emit_insn_after (duplicate_cond (t, insn),
4899 insn);
4900 PUT_MODE (slot[1], TImode);
4901 gen_one_bundle (slot, 4, 0);
4902 }
4903 }
4904 }
4905 else
4906 {
4907 /* If we scheduled, we reserved the .S2 unit for one or two
4908 cycles after the call. Emit the insns in these slots,
4909 unless it's possible to create a CALLP insn.
4910 Note that this works because the dependencies ensure that
4911 no insn setting/using B3 is scheduled in the delay slots of
4912 a call. */
4913 int this_clock = insn_get_clock (insn);
4914 rtx_insn *after1;
4915
4916 call_labels[INSN_UID (insn)] = label;
4917
4918 rtx_insn *last_same_clock = find_last_same_clock (insn);
4919
4920 if (can_use_callp (insn))
4921 {
4922 /* Find the first insn of the next execute packet. If it
4923 is the shadow insn corresponding to this call, we may
4924 use a CALLP insn. */
4925 rtx_insn *shadow =
4926 next_nonnote_nondebug_insn (last_same_clock);
4927
4928 if (CALL_P (shadow)
4929 && insn_get_clock (shadow) == this_clock + 5)
4930 {
4931 convert_to_callp (shadow);
4932 insn_set_clock (shadow, this_clock);
4933 INSN_INFO_ENTRY (INSN_UID (shadow)).reservation
4934 = RESERVATION_S2;
4935 INSN_INFO_ENTRY (INSN_UID (shadow)).unit_mask
4936 = INSN_INFO_ENTRY (INSN_UID (last_same_clock)).unit_mask;
4937 if (GET_MODE (insn) == TImode)
4938 {
4939 rtx_insn *new_cycle_first = NEXT_INSN (insn);
4940 while (!NONDEBUG_INSN_P (new_cycle_first)
4941 || GET_CODE (PATTERN (new_cycle_first)) == USE
4942 || GET_CODE (PATTERN (new_cycle_first)) == CLOBBER)
4943 new_cycle_first = NEXT_INSN (new_cycle_first);
4944 PUT_MODE (new_cycle_first, TImode);
4945 if (new_cycle_first != shadow)
4946 PUT_MODE (shadow, VOIDmode);
4947 INSN_INFO_ENTRY (INSN_UID (new_cycle_first)).ebb_start
4948 = INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start;
4949 }
4950 else
4951 PUT_MODE (shadow, VOIDmode);
4952 delete_insn (insn);
4953 goto done;
4954 }
4955 }
4956 after1 = find_next_cycle_insn (last_same_clock, this_clock + 1);
4957 if (after1 == NULL_RTX)
4958 after1 = last_same_clock;
4959 else
4960 after1 = find_last_same_clock (after1);
4961 if (TARGET_INSNS_64)
4962 {
4963 rtx x1 = gen_addkpc (reg, labelref, const0_rtx);
4964 x1 = emit_insn_after (duplicate_cond (x1, insn), after1);
4965 insn_set_clock (x1, this_clock + 1);
4966 INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2;
4967 if (after1 == last_same_clock)
4968 PUT_MODE (x1, TImode);
4969 else
4970 INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask
4971 = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask;
4972 }
4973 else
4974 {
4975 rtx x1, x2;
4976 rtx_insn *after2 = find_next_cycle_insn (after1,
4977 this_clock + 2);
4978 if (after2 == NULL_RTX)
4979 after2 = after1;
4980 x2 = gen_movsi_lo_sum (reg, reg, labelref);
4981 x2 = emit_insn_after (duplicate_cond (x2, insn), after2);
4982 x1 = gen_movsi_high (reg, labelref);
4983 x1 = emit_insn_after (duplicate_cond (x1, insn), after1);
4984 insn_set_clock (x1, this_clock + 1);
4985 insn_set_clock (x2, this_clock + 2);
4986 INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2;
4987 INSN_INFO_ENTRY (INSN_UID (x2)).reservation = RESERVATION_S2;
4988 if (after1 == last_same_clock)
4989 PUT_MODE (x1, TImode);
4990 else
4991 INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask
4992 = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask;
4993 if (after1 == after2)
4994 PUT_MODE (x2, TImode);
4995 else
4996 INSN_INFO_ENTRY (INSN_UID (x2)).unit_mask
4997 = INSN_INFO_ENTRY (INSN_UID (after2)).unit_mask;
4998 }
4999 }
5000 }
5001 done:
5002 insn = next;
5003 }
5004 }
5005
5006 /* Called as part of c6x_reorg. This function emits multi-cycle NOP
5007 insns as required for correctness. CALL_LABELS is the array that
5008 holds the return labels for call insns; we emit these here if
5009 scheduling was run earlier. */
5010
5011 static void
reorg_emit_nops(rtx_insn ** call_labels)5012 reorg_emit_nops (rtx_insn **call_labels)
5013 {
5014 bool first;
5015 rtx last_call;
5016 rtx_insn *prev;
5017 int prev_clock, earliest_bb_end;
5018 int prev_implicit_nops;
5019 rtx_insn *insn = get_insns ();
5020
5021 /* We look at one insn (or bundle inside a sequence) in each iteration, storing
5022 its issue time in PREV_CLOCK for the next iteration. If there is a gap in
5023 clocks, we must insert a NOP.
5024 EARLIEST_BB_END tracks in which cycle all insns that have been issued in the
5025 current basic block will finish. We must not allow the next basic block to
5026 begin before this cycle.
5027 PREV_IMPLICIT_NOPS tells us whether we've seen an insn that implicitly contains
5028 a multi-cycle nop. The code is scheduled such that subsequent insns will
5029 show the cycle gap, but we needn't insert a real NOP instruction. */
5030 insn = next_real_insn (insn);
5031 last_call = prev = NULL;
5032 prev_clock = -1;
5033 earliest_bb_end = 0;
5034 prev_implicit_nops = 0;
5035 first = true;
5036 while (insn)
5037 {
5038 int this_clock = -1;
5039 rtx_insn *next;
5040 int max_cycles = 0;
5041
5042 next = next_real_insn (insn);
5043
5044 if (DEBUG_INSN_P (insn)
5045 || GET_CODE (PATTERN (insn)) == USE
5046 || GET_CODE (PATTERN (insn)) == CLOBBER
5047 || shadow_or_blockage_p (insn)
5048 || JUMP_TABLE_DATA_P (insn))
5049 goto next_insn;
5050
5051 if (!c6x_flag_schedule_insns2)
5052 /* No scheduling; ensure that no parallel issue happens. */
5053 PUT_MODE (insn, TImode);
5054 else
5055 {
5056 int cycles;
5057
5058 this_clock = insn_get_clock (insn);
5059 if (this_clock != prev_clock)
5060 {
5061 PUT_MODE (insn, TImode);
5062
5063 if (!first)
5064 {
5065 cycles = this_clock - prev_clock;
5066
5067 cycles -= prev_implicit_nops;
5068 if (cycles > 1)
5069 {
5070 rtx nop = emit_nop_after (cycles - 1, prev);
5071 insn_set_clock (nop, prev_clock + prev_implicit_nops + 1);
5072 }
5073 }
5074 prev_clock = this_clock;
5075
5076 if (last_call
5077 && insn_get_clock (last_call) + 6 <= this_clock)
5078 {
5079 emit_label_before (call_labels[INSN_UID (last_call)], insn);
5080 last_call = NULL_RTX;
5081 }
5082 prev_implicit_nops = 0;
5083 }
5084 }
5085
5086 /* Examine how many cycles the current insn takes, and adjust
5087 LAST_CALL, EARLIEST_BB_END and PREV_IMPLICIT_NOPS. */
5088 if (recog_memoized (insn) >= 0
5089 /* If not scheduling, we've emitted NOPs after calls already. */
5090 && (c6x_flag_schedule_insns2 || !returning_call_p (insn)))
5091 {
5092 max_cycles = get_attr_cycles (insn);
5093 if (get_attr_type (insn) == TYPE_CALLP)
5094 prev_implicit_nops = 5;
5095 }
5096 else
5097 max_cycles = 1;
5098 if (returning_call_p (insn))
5099 last_call = insn;
5100
5101 if (c6x_flag_schedule_insns2)
5102 {
5103 gcc_assert (this_clock >= 0);
5104 if (earliest_bb_end < this_clock + max_cycles)
5105 earliest_bb_end = this_clock + max_cycles;
5106 }
5107 else if (max_cycles > 1)
5108 emit_nop_after (max_cycles - 1, insn);
5109
5110 prev = insn;
5111 first = false;
5112
5113 next_insn:
5114 if (c6x_flag_schedule_insns2
5115 && (next == NULL_RTX
5116 || (GET_MODE (next) == TImode
5117 && INSN_INFO_ENTRY (INSN_UID (next)).ebb_start))
5118 && earliest_bb_end > 0)
5119 {
5120 int cycles = earliest_bb_end - prev_clock;
5121 if (cycles > 1)
5122 {
5123 prev = emit_nop_after (cycles - 1, prev);
5124 insn_set_clock (prev, prev_clock + prev_implicit_nops + 1);
5125 }
5126 earliest_bb_end = 0;
5127 prev_clock = -1;
5128 first = true;
5129
5130 if (last_call)
5131 emit_label_after (call_labels[INSN_UID (last_call)], prev);
5132 last_call = NULL_RTX;
5133 }
5134 insn = next;
5135 }
5136 }
5137
5138 /* If possible, split INSN, which we know is either a jump or a call, into a real
5139 insn and its shadow. */
5140 static void
split_delayed_branch(rtx_insn * insn)5141 split_delayed_branch (rtx_insn *insn)
5142 {
5143 int code = recog_memoized (insn);
5144 rtx_insn *i1;
5145 rtx newpat;
5146 rtx pat = PATTERN (insn);
5147
5148 if (GET_CODE (pat) == COND_EXEC)
5149 pat = COND_EXEC_CODE (pat);
5150
5151 if (CALL_P (insn))
5152 {
5153 rtx src = pat, dest = NULL_RTX;
5154 rtx callee;
5155 if (GET_CODE (pat) == SET)
5156 {
5157 dest = SET_DEST (pat);
5158 src = SET_SRC (pat);
5159 }
5160 callee = XEXP (XEXP (src, 0), 0);
5161 if (SIBLING_CALL_P (insn))
5162 {
5163 if (REG_P (callee))
5164 newpat = gen_indirect_sibcall_shadow ();
5165 else
5166 newpat = gen_sibcall_shadow (callee);
5167 pat = gen_real_jump (callee);
5168 }
5169 else if (dest != NULL_RTX)
5170 {
5171 if (REG_P (callee))
5172 newpat = gen_indirect_call_value_shadow (dest);
5173 else
5174 newpat = gen_call_value_shadow (dest, callee);
5175 pat = gen_real_call (callee);
5176 }
5177 else
5178 {
5179 if (REG_P (callee))
5180 newpat = gen_indirect_call_shadow ();
5181 else
5182 newpat = gen_call_shadow (callee);
5183 pat = gen_real_call (callee);
5184 }
5185 pat = duplicate_cond (pat, insn);
5186 newpat = duplicate_cond (newpat, insn);
5187 }
5188 else
5189 {
5190 rtx src, op;
5191 if (GET_CODE (pat) == PARALLEL
5192 && GET_CODE (XVECEXP (pat, 0, 0)) == RETURN)
5193 {
5194 newpat = gen_return_shadow ();
5195 pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0));
5196 newpat = duplicate_cond (newpat, insn);
5197 }
5198 else
5199 switch (code)
5200 {
5201 case CODE_FOR_br_true:
5202 case CODE_FOR_br_false:
5203 src = SET_SRC (pat);
5204 op = XEXP (src, code == CODE_FOR_br_true ? 1 : 2);
5205 newpat = gen_condjump_shadow (op);
5206 pat = gen_real_jump (op);
5207 if (code == CODE_FOR_br_true)
5208 pat = gen_rtx_COND_EXEC (VOIDmode, XEXP (src, 0), pat);
5209 else
5210 pat = gen_rtx_COND_EXEC (VOIDmode,
5211 reversed_comparison (XEXP (src, 0),
5212 VOIDmode),
5213 pat);
5214 break;
5215
5216 case CODE_FOR_jump:
5217 op = SET_SRC (pat);
5218 newpat = gen_jump_shadow (op);
5219 break;
5220
5221 case CODE_FOR_indirect_jump:
5222 newpat = gen_indirect_jump_shadow ();
5223 break;
5224
5225 case CODE_FOR_return_internal:
5226 newpat = gen_return_shadow ();
5227 pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0));
5228 break;
5229
5230 default:
5231 return;
5232 }
5233 }
5234 i1 = emit_insn_before (pat, insn);
5235 PATTERN (insn) = newpat;
5236 INSN_CODE (insn) = -1;
5237 record_delay_slot_pair (i1, insn, 5, 0);
5238 }
5239
5240 /* If INSN is a multi-cycle insn that should be handled properly in
5241 modulo-scheduling, split it into a real insn and a shadow.
5242 Return true if we made a change.
5243
5244 It is valid for us to fail to split an insn; the caller has to deal
5245 with the possibility. Currently we handle loads and most mpy2 and
5246 mpy4 insns. */
5247 static bool
split_delayed_nonbranch(rtx_insn * insn)5248 split_delayed_nonbranch (rtx_insn *insn)
5249 {
5250 int code = recog_memoized (insn);
5251 enum attr_type type;
5252 rtx_insn *i1;
5253 rtx newpat, src, dest;
5254 rtx pat = PATTERN (insn);
5255 rtvec rtv;
5256 int delay;
5257
5258 if (GET_CODE (pat) == COND_EXEC)
5259 pat = COND_EXEC_CODE (pat);
5260
5261 if (code < 0 || GET_CODE (pat) != SET)
5262 return false;
5263 src = SET_SRC (pat);
5264 dest = SET_DEST (pat);
5265 if (!REG_P (dest))
5266 return false;
5267
5268 type = get_attr_type (insn);
5269 if (code >= 0
5270 && (type == TYPE_LOAD
5271 || type == TYPE_LOADN))
5272 {
5273 if (!MEM_P (src)
5274 && (GET_CODE (src) != ZERO_EXTEND
5275 || !MEM_P (XEXP (src, 0))))
5276 return false;
5277
5278 if (GET_MODE_SIZE (GET_MODE (dest)) > 4
5279 && (GET_MODE_SIZE (GET_MODE (dest)) != 8 || !TARGET_LDDW))
5280 return false;
5281
5282 rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))),
5283 SET_SRC (pat));
5284 newpat = gen_load_shadow (SET_DEST (pat));
5285 pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_LOAD);
5286 delay = 4;
5287 }
5288 else if (code >= 0
5289 && (type == TYPE_MPY2
5290 || type == TYPE_MPY4))
5291 {
5292 /* We don't handle floating point multiplies yet. */
5293 if (GET_MODE (dest) == SFmode)
5294 return false;
5295
5296 rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))),
5297 SET_SRC (pat));
5298 newpat = gen_mult_shadow (SET_DEST (pat));
5299 pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_MULT);
5300 delay = type == TYPE_MPY2 ? 1 : 3;
5301 }
5302 else
5303 return false;
5304
5305 pat = duplicate_cond (pat, insn);
5306 newpat = duplicate_cond (newpat, insn);
5307 i1 = emit_insn_before (pat, insn);
5308 PATTERN (insn) = newpat;
5309 INSN_CODE (insn) = -1;
5310 recog_memoized (insn);
5311 recog_memoized (i1);
5312 record_delay_slot_pair (i1, insn, delay, 0);
5313 return true;
5314 }
5315
5316 /* Examine if INSN is the result of splitting a load into a real load and a
5317 shadow, and if so, undo the transformation. */
5318 static void
undo_split_delayed_nonbranch(rtx_insn * insn)5319 undo_split_delayed_nonbranch (rtx_insn *insn)
5320 {
5321 int icode = recog_memoized (insn);
5322 enum attr_type type;
5323 rtx prev_pat, insn_pat;
5324 rtx_insn *prev;
5325
5326 if (icode < 0)
5327 return;
5328 type = get_attr_type (insn);
5329 if (type != TYPE_LOAD_SHADOW && type != TYPE_MULT_SHADOW)
5330 return;
5331 prev = PREV_INSN (insn);
5332 prev_pat = PATTERN (prev);
5333 insn_pat = PATTERN (insn);
5334 if (GET_CODE (prev_pat) == COND_EXEC)
5335 {
5336 prev_pat = COND_EXEC_CODE (prev_pat);
5337 insn_pat = COND_EXEC_CODE (insn_pat);
5338 }
5339
5340 gcc_assert (GET_CODE (prev_pat) == UNSPEC
5341 && ((XINT (prev_pat, 1) == UNSPEC_REAL_LOAD
5342 && type == TYPE_LOAD_SHADOW)
5343 || (XINT (prev_pat, 1) == UNSPEC_REAL_MULT
5344 && type == TYPE_MULT_SHADOW)));
5345 insn_pat = gen_rtx_SET (SET_DEST (insn_pat),
5346 XVECEXP (prev_pat, 0, 1));
5347 insn_pat = duplicate_cond (insn_pat, prev);
5348 PATTERN (insn) = insn_pat;
5349 INSN_CODE (insn) = -1;
5350 delete_insn (prev);
5351 }
5352
5353 /* Split every insn (i.e. jumps and calls) which can have delay slots into
5354 two parts: the first one is scheduled normally and emits the instruction,
5355 while the second one is a shadow insn which shows the side effect taking
5356 place. The second one is placed in the right cycle by the scheduler, but
5357 not emitted as an assembly instruction. */
5358
5359 static void
split_delayed_insns(void)5360 split_delayed_insns (void)
5361 {
5362 rtx_insn *insn;
5363 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5364 {
5365 if (JUMP_P (insn) || CALL_P (insn))
5366 split_delayed_branch (insn);
5367 }
5368 }
5369
5370 /* For every insn that has an entry in the new_conditions vector, give it
5371 the appropriate predicate. */
5372 static void
conditionalize_after_sched(void)5373 conditionalize_after_sched (void)
5374 {
5375 basic_block bb;
5376 rtx_insn *insn;
5377 FOR_EACH_BB_FN (bb, cfun)
5378 FOR_BB_INSNS (bb, insn)
5379 {
5380 unsigned uid = INSN_UID (insn);
5381 rtx cond;
5382 if (!NONDEBUG_INSN_P (insn) || uid >= INSN_INFO_LENGTH)
5383 continue;
5384 cond = INSN_INFO_ENTRY (uid).new_cond;
5385 if (cond == NULL_RTX)
5386 continue;
5387 if (dump_file)
5388 fprintf (dump_file, "Conditionalizing insn %d\n", uid);
5389 predicate_insn (insn, cond, true);
5390 }
5391 }
5392
5393 /* A callback for the hw-doloop pass. This function examines INSN; if
5394 it is a loop_end pattern we recognize, return the reg rtx for the
5395 loop counter. Otherwise, return NULL_RTX. */
5396
5397 static rtx
hwloop_pattern_reg(rtx_insn * insn)5398 hwloop_pattern_reg (rtx_insn *insn)
5399 {
5400 rtx pat, reg;
5401
5402 if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
5403 return NULL_RTX;
5404
5405 pat = PATTERN (insn);
5406 reg = SET_DEST (XVECEXP (pat, 0, 1));
5407 if (!REG_P (reg))
5408 return NULL_RTX;
5409 return reg;
5410 }
5411
5412 /* Return the number of cycles taken by BB, as computed by scheduling,
5413 including the latencies of all insns with delay slots. IGNORE is
5414 an insn we should ignore in the calculation, usually the final
5415 branch. */
5416 static int
bb_earliest_end_cycle(basic_block bb,rtx ignore)5417 bb_earliest_end_cycle (basic_block bb, rtx ignore)
5418 {
5419 int earliest = 0;
5420 rtx_insn *insn;
5421
5422 FOR_BB_INSNS (bb, insn)
5423 {
5424 int cycles, this_clock;
5425
5426 if (LABEL_P (insn) || NOTE_P (insn) || DEBUG_INSN_P (insn)
5427 || GET_CODE (PATTERN (insn)) == USE
5428 || GET_CODE (PATTERN (insn)) == CLOBBER
5429 || insn == ignore)
5430 continue;
5431
5432 this_clock = insn_get_clock (insn);
5433 cycles = get_attr_cycles (insn);
5434
5435 if (earliest < this_clock + cycles)
5436 earliest = this_clock + cycles;
5437 }
5438 return earliest;
5439 }
5440
5441 /* Examine the insns in BB and remove all which have a uid greater or
5442 equal to MAX_UID. */
5443 static void
filter_insns_above(basic_block bb,int max_uid)5444 filter_insns_above (basic_block bb, int max_uid)
5445 {
5446 rtx_insn *insn, *next;
5447 bool prev_ti = false;
5448 int prev_cycle = -1;
5449
5450 FOR_BB_INSNS_SAFE (bb, insn, next)
5451 {
5452 int this_cycle;
5453 if (!NONDEBUG_INSN_P (insn))
5454 continue;
5455 if (insn == BB_END (bb))
5456 return;
5457 this_cycle = insn_get_clock (insn);
5458 if (prev_ti && this_cycle == prev_cycle)
5459 {
5460 gcc_assert (GET_MODE (insn) != TImode);
5461 PUT_MODE (insn, TImode);
5462 }
5463 prev_ti = false;
5464 if (INSN_UID (insn) >= max_uid)
5465 {
5466 if (GET_MODE (insn) == TImode)
5467 {
5468 prev_ti = true;
5469 prev_cycle = this_cycle;
5470 }
5471 delete_insn (insn);
5472 }
5473 }
5474 }
5475
5476 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
5477
5478 static void
c6x_asm_emit_except_personality(rtx personality)5479 c6x_asm_emit_except_personality (rtx personality)
5480 {
5481 fputs ("\t.personality\t", asm_out_file);
5482 output_addr_const (asm_out_file, personality);
5483 fputc ('\n', asm_out_file);
5484 }
5485
5486 /* Use a special assembly directive rather than a regular setion for
5487 unwind table data. */
5488
5489 static void
c6x_asm_init_sections(void)5490 c6x_asm_init_sections (void)
5491 {
5492 exception_section = get_unnamed_section (0, output_section_asm_op,
5493 "\t.handlerdata");
5494 }
5495
5496 /* A callback for the hw-doloop pass. Called to optimize LOOP in a
5497 machine-specific fashion; returns true if successful and false if
5498 the hwloop_fail function should be called. */
5499
5500 static bool
hwloop_optimize(hwloop_info loop)5501 hwloop_optimize (hwloop_info loop)
5502 {
5503 basic_block entry_bb, bb;
5504 rtx_insn *seq, *insn, *prev, *entry_after, *end_packet;
5505 rtx_insn *head_insn, *tail_insn, *new_insns, *last_insn;
5506 int loop_earliest;
5507 int n_execute_packets;
5508 edge entry_edge;
5509 unsigned ix;
5510 int max_uid_before, delayed_splits;
5511 int i, sp_ii, min_ii, max_ii, max_parallel, n_insns, n_real_insns, stages;
5512 rtx_insn **orig_vec;
5513 rtx_insn **copies;
5514 rtx_insn ***insn_copies;
5515
5516 if (!c6x_flag_modulo_sched || !c6x_flag_schedule_insns2
5517 || !TARGET_INSNS_64PLUS)
5518 return false;
5519
5520 if (loop->iter_reg_used || loop->depth > 1)
5521 return false;
5522 if (loop->has_call || loop->has_asm)
5523 return false;
5524
5525 if (loop->head != loop->tail)
5526 return false;
5527
5528 gcc_assert (loop->incoming_dest == loop->head);
5529
5530 entry_edge = NULL;
5531 FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge)
5532 if (entry_edge->flags & EDGE_FALLTHRU)
5533 break;
5534 if (entry_edge == NULL)
5535 return false;
5536
5537 reshuffle_units (loop->head);
5538
5539 in_hwloop = true;
5540 schedule_ebbs_init ();
5541 schedule_ebb (BB_HEAD (loop->tail), loop->loop_end, true);
5542 schedule_ebbs_finish ();
5543 in_hwloop = false;
5544
5545 bb = loop->head;
5546 loop_earliest = bb_earliest_end_cycle (bb, loop->loop_end) + 1;
5547
5548 max_uid_before = get_max_uid ();
5549
5550 /* Split all multi-cycle operations, such as loads. For normal
5551 scheduling, we only do this for branches, as the generated code
5552 would otherwise not be interrupt-safe. When using sploop, it is
5553 safe and beneficial to split them. If any multi-cycle operations
5554 remain after splitting (because we don't handle them yet), we
5555 cannot pipeline the loop. */
5556 delayed_splits = 0;
5557 FOR_BB_INSNS (bb, insn)
5558 {
5559 if (NONDEBUG_INSN_P (insn))
5560 {
5561 recog_memoized (insn);
5562 if (split_delayed_nonbranch (insn))
5563 delayed_splits++;
5564 else if (INSN_CODE (insn) >= 0
5565 && get_attr_cycles (insn) > 1)
5566 goto undo_splits;
5567 }
5568 }
5569
5570 /* Count the number of insns as well as the number real insns, and save
5571 the original sequence of insns in case we must restore it later. */
5572 n_insns = n_real_insns = 0;
5573 FOR_BB_INSNS (bb, insn)
5574 {
5575 n_insns++;
5576 if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end)
5577 n_real_insns++;
5578 }
5579 orig_vec = XNEWVEC (rtx_insn *, n_insns);
5580 n_insns = 0;
5581 FOR_BB_INSNS (bb, insn)
5582 orig_vec[n_insns++] = insn;
5583
5584 /* Count the unit reservations, and compute a minimum II from that
5585 table. */
5586 count_unit_reqs (unit_reqs, loop->start_label,
5587 PREV_INSN (loop->loop_end));
5588 merge_unit_reqs (unit_reqs);
5589
5590 min_ii = res_mii (unit_reqs);
5591 max_ii = loop_earliest < 15 ? loop_earliest : 14;
5592
5593 /* Make copies of the loop body, up to a maximum number of stages we want
5594 to handle. */
5595 max_parallel = loop_earliest / min_ii + 1;
5596
5597 copies = XCNEWVEC (rtx_insn *, (max_parallel + 1) * n_real_insns);
5598 insn_copies = XNEWVEC (rtx_insn **, max_parallel + 1);
5599 for (i = 0; i < max_parallel + 1; i++)
5600 insn_copies[i] = copies + i * n_real_insns;
5601
5602 head_insn = next_nonnote_nondebug_insn (loop->start_label);
5603 tail_insn = prev_real_insn (BB_END (bb));
5604
5605 i = 0;
5606 FOR_BB_INSNS (bb, insn)
5607 if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end)
5608 insn_copies[0][i++] = insn;
5609
5610 sploop_max_uid_iter0 = get_max_uid ();
5611
5612 /* Generate the copies of the loop body, and save them in the
5613 INSN_COPIES array. */
5614 start_sequence ();
5615 for (i = 0; i < max_parallel; i++)
5616 {
5617 int j;
5618 rtx_insn *this_iter;
5619
5620 this_iter = duplicate_insn_chain (head_insn, tail_insn);
5621 j = 0;
5622 while (this_iter)
5623 {
5624 rtx_insn *prev_stage_insn = insn_copies[i][j];
5625 gcc_assert (INSN_CODE (this_iter) == INSN_CODE (prev_stage_insn));
5626
5627 if (INSN_CODE (this_iter) >= 0
5628 && (get_attr_type (this_iter) == TYPE_LOAD_SHADOW
5629 || get_attr_type (this_iter) == TYPE_MULT_SHADOW))
5630 {
5631 rtx_insn *prev = PREV_INSN (this_iter);
5632 record_delay_slot_pair (prev, this_iter,
5633 get_attr_cycles (prev) - 1, 0);
5634 }
5635 else
5636 record_delay_slot_pair (prev_stage_insn, this_iter, i, 1);
5637
5638 insn_copies[i + 1][j] = this_iter;
5639 j++;
5640 this_iter = next_nonnote_nondebug_insn (this_iter);
5641 }
5642 }
5643 new_insns = get_insns ();
5644 last_insn = insn_copies[max_parallel][n_real_insns - 1];
5645 end_sequence ();
5646 emit_insn_before (new_insns, BB_END (bb));
5647
5648 /* Try to schedule the loop using varying initiation intervals,
5649 starting with the smallest possible and incrementing it
5650 on failure. */
5651 for (sp_ii = min_ii; sp_ii <= max_ii; sp_ii++)
5652 {
5653 basic_block tmp_bb;
5654 if (dump_file)
5655 fprintf (dump_file, "Trying to schedule for II %d\n", sp_ii);
5656
5657 df_clear_flags (DF_LR_RUN_DCE);
5658
5659 schedule_ebbs_init ();
5660 set_modulo_params (sp_ii, max_parallel, n_real_insns,
5661 sploop_max_uid_iter0);
5662 tmp_bb = schedule_ebb (BB_HEAD (bb), last_insn, true);
5663 schedule_ebbs_finish ();
5664
5665 if (tmp_bb)
5666 {
5667 if (dump_file)
5668 fprintf (dump_file, "Found schedule with II %d\n", sp_ii);
5669 break;
5670 }
5671 }
5672
5673 discard_delay_pairs_above (max_uid_before);
5674
5675 if (sp_ii > max_ii)
5676 goto restore_loop;
5677
5678 stages = insn_get_clock (ss.last_scheduled_iter0) / sp_ii + 1;
5679
5680 if (stages == 1 && sp_ii > 5)
5681 goto restore_loop;
5682
5683 /* At this point, we know we've been successful, unless we find later that
5684 there are too many execute packets for the loop buffer to hold. */
5685
5686 /* Assign reservations to the instructions in the loop. We must find
5687 the stage that contains the full loop kernel, and transfer the
5688 reservations of the instructions contained in it to the corresponding
5689 instructions from iteration 0, which are the only ones we'll keep. */
5690 assign_reservations (BB_HEAD (bb), ss.last_scheduled_insn);
5691 SET_PREV_INSN (BB_END (bb)) = ss.last_scheduled_iter0;
5692 SET_NEXT_INSN (ss.last_scheduled_iter0) = BB_END (bb);
5693 filter_insns_above (bb, sploop_max_uid_iter0);
5694
5695 for (i = 0; i < n_real_insns; i++)
5696 {
5697 rtx insn = insn_copies[0][i];
5698 int uid = INSN_UID (insn);
5699 int stage = insn_uid_get_clock (uid) / sp_ii;
5700
5701 if (stage + 1 < stages)
5702 {
5703 int copy_uid;
5704 stage = stages - stage - 1;
5705 copy_uid = INSN_UID (insn_copies[stage][i]);
5706 INSN_INFO_ENTRY (uid).reservation
5707 = INSN_INFO_ENTRY (copy_uid).reservation;
5708 }
5709 }
5710 if (stages == 1)
5711 stages++;
5712
5713 /* Compute the number of execute packets the pipelined form of the loop will
5714 require. */
5715 prev = NULL;
5716 n_execute_packets = 0;
5717 for (insn = loop->start_label;
5718 insn != loop->loop_end;
5719 insn = NEXT_INSN (insn))
5720 {
5721 if (NONDEBUG_INSN_P (insn) && GET_MODE (insn) == TImode
5722 && !shadow_p (insn))
5723 {
5724 n_execute_packets++;
5725 if (prev && insn_get_clock (prev) + 1 != insn_get_clock (insn))
5726 /* We need an extra NOP instruction. */
5727 n_execute_packets++;
5728
5729 prev = insn;
5730 }
5731 }
5732
5733 end_packet = ss.last_scheduled_iter0;
5734 while (!NONDEBUG_INSN_P (end_packet) || GET_MODE (end_packet) != TImode)
5735 end_packet = PREV_INSN (end_packet);
5736
5737 /* The earliest cycle in which we can emit the SPKERNEL instruction. */
5738 loop_earliest = (stages - 1) * sp_ii;
5739 if (loop_earliest > insn_get_clock (end_packet))
5740 {
5741 n_execute_packets++;
5742 end_packet = loop->loop_end;
5743 }
5744 else
5745 loop_earliest = insn_get_clock (end_packet);
5746
5747 if (n_execute_packets > 14)
5748 goto restore_loop;
5749
5750 /* Generate the spkernel instruction, and place it at the appropriate
5751 spot. */
5752 PUT_MODE (end_packet, VOIDmode);
5753
5754 insn = emit_jump_insn_before (
5755 gen_spkernel (GEN_INT (stages - 1),
5756 const0_rtx, JUMP_LABEL (loop->loop_end)),
5757 end_packet);
5758 JUMP_LABEL (insn) = JUMP_LABEL (loop->loop_end);
5759 insn_set_clock (insn, loop_earliest);
5760 PUT_MODE (insn, TImode);
5761 INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start = false;
5762 delete_insn (loop->loop_end);
5763
5764 /* Place the mvc and sploop instructions before the loop. */
5765 entry_bb = entry_edge->src;
5766
5767 start_sequence ();
5768
5769 insn = emit_insn (gen_mvilc (loop->iter_reg));
5770 if (loop->iter_reg_used_outside)
5771 insn = emit_move_insn (loop->iter_reg, const0_rtx);
5772 insn = emit_insn (gen_sploop (GEN_INT (sp_ii)));
5773 seq = get_insns ();
5774
5775 if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1)
5776 {
5777 basic_block new_bb;
5778 edge e;
5779 edge_iterator ei;
5780
5781 emit_insn_before (seq, BB_HEAD (loop->head));
5782 seq = emit_label_before (gen_label_rtx (), seq);
5783
5784 new_bb = create_basic_block (seq, insn, entry_bb);
5785 FOR_EACH_EDGE (e, ei, loop->incoming)
5786 {
5787 if (!(e->flags & EDGE_FALLTHRU))
5788 redirect_edge_and_branch_force (e, new_bb);
5789 else
5790 redirect_edge_succ (e, new_bb);
5791 }
5792 make_edge (new_bb, loop->head, 0);
5793 }
5794 else
5795 {
5796 entry_after = BB_END (entry_bb);
5797 while (DEBUG_INSN_P (entry_after)
5798 || (NOTE_P (entry_after)
5799 && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK))
5800 entry_after = PREV_INSN (entry_after);
5801 emit_insn_after (seq, entry_after);
5802 }
5803
5804 end_sequence ();
5805
5806 /* Make sure we don't try to schedule this loop again. */
5807 for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
5808 bb->flags |= BB_DISABLE_SCHEDULE;
5809
5810 return true;
5811
5812 restore_loop:
5813 if (dump_file)
5814 fprintf (dump_file, "Unable to pipeline loop.\n");
5815
5816 for (i = 1; i < n_insns; i++)
5817 {
5818 SET_NEXT_INSN (orig_vec[i - 1]) = orig_vec[i];
5819 SET_PREV_INSN (orig_vec[i]) = orig_vec[i - 1];
5820 }
5821 SET_PREV_INSN (orig_vec[0]) = PREV_INSN (BB_HEAD (bb));
5822 SET_NEXT_INSN (PREV_INSN (BB_HEAD (bb))) = orig_vec[0];
5823 SET_NEXT_INSN (orig_vec[n_insns - 1]) = NEXT_INSN (BB_END (bb));
5824 SET_PREV_INSN (NEXT_INSN (BB_END (bb))) = orig_vec[n_insns - 1];
5825 BB_HEAD (bb) = orig_vec[0];
5826 BB_END (bb) = orig_vec[n_insns - 1];
5827 undo_splits:
5828 free_delay_pairs ();
5829 FOR_BB_INSNS (bb, insn)
5830 if (NONDEBUG_INSN_P (insn))
5831 undo_split_delayed_nonbranch (insn);
5832 return false;
5833 }
5834
5835 /* A callback for the hw-doloop pass. Called when a loop we have discovered
5836 turns out not to be optimizable; we have to split the doloop_end pattern
5837 into a subtract and a test. */
5838 static void
hwloop_fail(hwloop_info loop)5839 hwloop_fail (hwloop_info loop)
5840 {
5841 rtx insn, test, testreg;
5842
5843 if (dump_file)
5844 fprintf (dump_file, "splitting doloop insn %d\n",
5845 INSN_UID (loop->loop_end));
5846 insn = gen_addsi3 (loop->iter_reg, loop->iter_reg, constm1_rtx);
5847 /* See if we can emit the add at the head of the loop rather than at the
5848 end. */
5849 if (loop->head == NULL
5850 || loop->iter_reg_used_outside
5851 || loop->iter_reg_used
5852 || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REGNO (loop->iter_reg))
5853 || loop->incoming_dest != loop->head
5854 || EDGE_COUNT (loop->head->preds) != 2)
5855 emit_insn_before (insn, loop->loop_end);
5856 else
5857 {
5858 rtx_insn *t = loop->start_label;
5859 while (!NOTE_P (t) || NOTE_KIND (t) != NOTE_INSN_BASIC_BLOCK)
5860 t = NEXT_INSN (t);
5861 emit_insn_after (insn, t);
5862 }
5863
5864 testreg = SET_DEST (XVECEXP (PATTERN (loop->loop_end), 0, 2));
5865 if (GET_CODE (testreg) == SCRATCH)
5866 testreg = loop->iter_reg;
5867 else
5868 emit_insn_before (gen_movsi (testreg, loop->iter_reg), loop->loop_end);
5869
5870 test = gen_rtx_NE (VOIDmode, testreg, const0_rtx);
5871 insn = emit_jump_insn_before (gen_cbranchsi4 (test, testreg, const0_rtx,
5872 loop->start_label),
5873 loop->loop_end);
5874
5875 JUMP_LABEL (insn) = loop->start_label;
5876 LABEL_NUSES (loop->start_label)++;
5877 delete_insn (loop->loop_end);
5878 }
5879
5880 static struct hw_doloop_hooks c6x_doloop_hooks =
5881 {
5882 hwloop_pattern_reg,
5883 hwloop_optimize,
5884 hwloop_fail
5885 };
5886
5887 /* Run the hw-doloop pass to modulo-schedule hardware loops, or split the
5888 doloop_end patterns where such optimizations are impossible. */
5889 static void
c6x_hwloops(void)5890 c6x_hwloops (void)
5891 {
5892 if (optimize)
5893 reorg_loops (true, &c6x_doloop_hooks);
5894 }
5895
5896 /* Implement the TARGET_MACHINE_DEPENDENT_REORG pass. We split call insns here
5897 into a sequence that loads the return register and performs the call,
5898 and emit the return label.
5899 If scheduling after reload is requested, it happens here. */
5900
5901 static void
c6x_reorg(void)5902 c6x_reorg (void)
5903 {
5904 basic_block bb;
5905 bool do_selsched = (c6x_flag_schedule_insns2 && flag_selective_scheduling2
5906 && !maybe_skip_selective_scheduling ());
5907
5908 /* We are freeing block_for_insn in the toplev to keep compatibility
5909 with old MDEP_REORGS that are not CFG based. Recompute it now. */
5910 compute_bb_for_insn ();
5911
5912 df_clear_flags (DF_LR_RUN_DCE);
5913 df_note_add_problem ();
5914
5915 /* If optimizing, we'll have split before scheduling. */
5916 if (optimize == 0)
5917 split_all_insns ();
5918
5919 df_analyze ();
5920
5921 if (c6x_flag_schedule_insns2)
5922 {
5923 int sz = get_max_uid () * 3 / 2 + 1;
5924
5925 insn_info.create (sz);
5926 }
5927
5928 /* Make sure the real-jump insns we create are not deleted. When modulo-
5929 scheduling, situations where a reg is only stored in a loop can also
5930 cause dead code when doing the initial unrolling. */
5931 sched_no_dce = true;
5932
5933 c6x_hwloops ();
5934
5935 if (c6x_flag_schedule_insns2)
5936 {
5937 split_delayed_insns ();
5938 timevar_push (TV_SCHED2);
5939 if (do_selsched)
5940 run_selective_scheduling ();
5941 else
5942 schedule_ebbs ();
5943 conditionalize_after_sched ();
5944 timevar_pop (TV_SCHED2);
5945
5946 free_delay_pairs ();
5947 }
5948 sched_no_dce = false;
5949
5950 rtx_insn **call_labels = XCNEWVEC (rtx_insn *, get_max_uid () + 1);
5951
5952 reorg_split_calls (call_labels);
5953
5954 if (c6x_flag_schedule_insns2)
5955 {
5956 FOR_EACH_BB_FN (bb, cfun)
5957 if ((bb->flags & BB_DISABLE_SCHEDULE) == 0)
5958 assign_reservations (BB_HEAD (bb), BB_END (bb));
5959 }
5960
5961 if (c6x_flag_var_tracking)
5962 {
5963 timevar_push (TV_VAR_TRACKING);
5964 variable_tracking_main ();
5965 timevar_pop (TV_VAR_TRACKING);
5966 }
5967
5968 reorg_emit_nops (call_labels);
5969
5970 /* Post-process the schedule to move parallel insns into SEQUENCEs. */
5971 if (c6x_flag_schedule_insns2)
5972 {
5973 free_delay_pairs ();
5974 c6x_gen_bundles ();
5975 }
5976
5977 df_finish_pass (false);
5978 }
5979
5980 /* Called when a function has been assembled. It should perform all the
5981 tasks of ASM_DECLARE_FUNCTION_SIZE in elfos.h, plus target-specific
5982 tasks.
5983 We free the reservation (and other scheduling) information here now that
5984 all insns have been output. */
5985 void
c6x_function_end(FILE * file,const char * fname)5986 c6x_function_end (FILE *file, const char *fname)
5987 {
5988 c6x_output_fn_unwind (file);
5989
5990 insn_info.release ();
5991
5992 if (!flag_inhibit_size_directive)
5993 ASM_OUTPUT_MEASURED_SIZE (file, fname);
5994 }
5995
5996 /* Determine whether X is a shift with code CODE and an integer amount
5997 AMOUNT. */
5998 static bool
shift_p(rtx x,enum rtx_code code,int amount)5999 shift_p (rtx x, enum rtx_code code, int amount)
6000 {
6001 return (GET_CODE (x) == code && GET_CODE (XEXP (x, 1)) == CONST_INT
6002 && INTVAL (XEXP (x, 1)) == amount);
6003 }
6004
6005 /* Compute a (partial) cost for rtx X. Return true if the complete
6006 cost has been computed, and false if subexpressions should be
6007 scanned. In either case, *TOTAL contains the cost result. */
6008
6009 static bool
c6x_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno,int * total,bool speed)6010 c6x_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total,
6011 bool speed)
6012 {
6013 int cost2 = COSTS_N_INSNS (1);
6014 rtx op0, op1;
6015 int code = GET_CODE (x);
6016
6017 switch (code)
6018 {
6019 case CONST_INT:
6020 if (outer_code == SET || outer_code == PLUS)
6021 *total = satisfies_constraint_IsB (x) ? 0 : cost2;
6022 else if (outer_code == AND || outer_code == IOR || outer_code == XOR
6023 || outer_code == MINUS)
6024 *total = satisfies_constraint_Is5 (x) ? 0 : cost2;
6025 else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE
6026 || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE)
6027 *total = satisfies_constraint_Iu4 (x) ? 0 : cost2;
6028 else if (outer_code == ASHIFT || outer_code == ASHIFTRT
6029 || outer_code == LSHIFTRT)
6030 *total = satisfies_constraint_Iu5 (x) ? 0 : cost2;
6031 else
6032 *total = cost2;
6033 return true;
6034
6035 case CONST:
6036 case LABEL_REF:
6037 case SYMBOL_REF:
6038 case CONST_DOUBLE:
6039 *total = COSTS_N_INSNS (2);
6040 return true;
6041
6042 case TRUNCATE:
6043 /* Recognize a mult_highpart operation. */
6044 if ((mode == HImode || mode == SImode)
6045 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6046 && GET_MODE (XEXP (x, 0)) == GET_MODE_2XWIDER_MODE (mode).require ()
6047 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6048 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6049 && INTVAL (XEXP (XEXP (x, 0), 1)) == GET_MODE_BITSIZE (mode))
6050 {
6051 rtx mul = XEXP (XEXP (x, 0), 0);
6052 rtx op0 = XEXP (mul, 0);
6053 rtx op1 = XEXP (mul, 1);
6054 enum rtx_code code0 = GET_CODE (op0);
6055 enum rtx_code code1 = GET_CODE (op1);
6056
6057 if ((code0 == code1
6058 && (code0 == SIGN_EXTEND || code0 == ZERO_EXTEND))
6059 || (mode == HImode
6060 && code0 == ZERO_EXTEND && code1 == SIGN_EXTEND))
6061 {
6062 if (mode == HImode)
6063 *total = COSTS_N_INSNS (2);
6064 else
6065 *total = COSTS_N_INSNS (12);
6066 mode = GET_MODE (XEXP (op0, 0));
6067 *total += rtx_cost (XEXP (op0, 0), mode, code0, 0, speed);
6068 *total += rtx_cost (XEXP (op1, 0), mode, code1, 0, speed);
6069 return true;
6070 }
6071 }
6072 return false;
6073
6074 case ASHIFT:
6075 case ASHIFTRT:
6076 case LSHIFTRT:
6077 if (mode == DImode)
6078 *total = COSTS_N_INSNS (CONSTANT_P (XEXP (x, 1)) ? 4 : 15);
6079 else
6080 *total = COSTS_N_INSNS (1);
6081 return false;
6082
6083 case PLUS:
6084 case MINUS:
6085 *total = COSTS_N_INSNS (1);
6086 op0 = code == PLUS ? XEXP (x, 0) : XEXP (x, 1);
6087 op1 = code == PLUS ? XEXP (x, 1) : XEXP (x, 0);
6088 if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6089 && INTEGRAL_MODE_P (mode)
6090 && GET_CODE (op0) == MULT
6091 && GET_CODE (XEXP (op0, 1)) == CONST_INT
6092 && (INTVAL (XEXP (op0, 1)) == 2
6093 || INTVAL (XEXP (op0, 1)) == 4
6094 || (code == PLUS && INTVAL (XEXP (op0, 1)) == 8)))
6095 {
6096 *total += rtx_cost (XEXP (op0, 0), mode, ASHIFT, 0, speed);
6097 *total += rtx_cost (op1, mode, (enum rtx_code) code, 1, speed);
6098 return true;
6099 }
6100 return false;
6101
6102 case MULT:
6103 op0 = XEXP (x, 0);
6104 op1 = XEXP (x, 1);
6105 if (mode == DFmode)
6106 {
6107 if (TARGET_FP)
6108 *total = COSTS_N_INSNS (speed ? 10 : 1);
6109 else
6110 *total = COSTS_N_INSNS (speed ? 200 : 4);
6111 }
6112 else if (mode == SFmode)
6113 {
6114 if (TARGET_FP)
6115 *total = COSTS_N_INSNS (speed ? 4 : 1);
6116 else
6117 *total = COSTS_N_INSNS (speed ? 100 : 4);
6118 }
6119 else if (mode == DImode)
6120 {
6121 if (TARGET_MPY32
6122 && GET_CODE (op0) == GET_CODE (op1)
6123 && (GET_CODE (op0) == ZERO_EXTEND
6124 || GET_CODE (op0) == SIGN_EXTEND))
6125 {
6126 *total = COSTS_N_INSNS (speed ? 2 : 1);
6127 op0 = XEXP (op0, 0);
6128 op1 = XEXP (op1, 0);
6129 }
6130 else
6131 /* Maybe improve this laster. */
6132 *total = COSTS_N_INSNS (20);
6133 }
6134 else if (mode == SImode)
6135 {
6136 if (((GET_CODE (op0) == ZERO_EXTEND
6137 || GET_CODE (op0) == SIGN_EXTEND
6138 || shift_p (op0, LSHIFTRT, 16))
6139 && (GET_CODE (op1) == SIGN_EXTEND
6140 || GET_CODE (op1) == ZERO_EXTEND
6141 || scst5_operand (op1, SImode)
6142 || shift_p (op1, ASHIFTRT, 16)
6143 || shift_p (op1, LSHIFTRT, 16)))
6144 || (shift_p (op0, ASHIFTRT, 16)
6145 && (GET_CODE (op1) == SIGN_EXTEND
6146 || shift_p (op1, ASHIFTRT, 16))))
6147 {
6148 *total = COSTS_N_INSNS (speed ? 2 : 1);
6149 op0 = XEXP (op0, 0);
6150 if (scst5_operand (op1, SImode))
6151 op1 = NULL_RTX;
6152 else
6153 op1 = XEXP (op1, 0);
6154 }
6155 else if (!speed)
6156 *total = COSTS_N_INSNS (1);
6157 else if (TARGET_MPY32)
6158 *total = COSTS_N_INSNS (4);
6159 else
6160 *total = COSTS_N_INSNS (6);
6161 }
6162 else if (mode == HImode)
6163 *total = COSTS_N_INSNS (speed ? 2 : 1);
6164
6165 if (GET_CODE (op0) != REG
6166 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
6167 *total += rtx_cost (op0, mode, MULT, 0, speed);
6168 if (op1 && GET_CODE (op1) != REG
6169 && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
6170 *total += rtx_cost (op1, mode, MULT, 1, speed);
6171 return true;
6172
6173 case UDIV:
6174 case DIV:
6175 /* This is a bit random; assuming on average there'll be 16 leading
6176 zeros. FIXME: estimate better for constant dividends. */
6177 *total = COSTS_N_INSNS (6 + 3 * 16);
6178 return false;
6179
6180 case IF_THEN_ELSE:
6181 /* Recognize the cmp_and/ior patterns. */
6182 op0 = XEXP (x, 0);
6183 if ((GET_CODE (op0) == EQ || GET_CODE (op0) == NE)
6184 && REG_P (XEXP (op0, 0))
6185 && XEXP (op0, 1) == const0_rtx
6186 && rtx_equal_p (XEXP (x, 1), XEXP (op0, 0)))
6187 {
6188 *total = rtx_cost (XEXP (x, 1), VOIDmode, (enum rtx_code) outer_code,
6189 opno, speed);
6190 return false;
6191 }
6192 return false;
6193
6194 default:
6195 return false;
6196 }
6197 }
6198
6199 /* Implements target hook vector_mode_supported_p. */
6200
6201 static bool
c6x_vector_mode_supported_p(machine_mode mode)6202 c6x_vector_mode_supported_p (machine_mode mode)
6203 {
6204 switch (mode)
6205 {
6206 case E_V2HImode:
6207 case E_V4QImode:
6208 case E_V2SImode:
6209 case E_V4HImode:
6210 case E_V8QImode:
6211 return true;
6212 default:
6213 return false;
6214 }
6215 }
6216
6217 /* Implements TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
6218 static machine_mode
c6x_preferred_simd_mode(scalar_mode mode)6219 c6x_preferred_simd_mode (scalar_mode mode)
6220 {
6221 switch (mode)
6222 {
6223 case E_HImode:
6224 return V2HImode;
6225 case E_QImode:
6226 return V4QImode;
6227
6228 default:
6229 return word_mode;
6230 }
6231 }
6232
6233 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
6234
6235 static bool
c6x_scalar_mode_supported_p(scalar_mode mode)6236 c6x_scalar_mode_supported_p (scalar_mode mode)
6237 {
6238 if (ALL_FIXED_POINT_MODE_P (mode)
6239 && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD)
6240 return true;
6241
6242 return default_scalar_mode_supported_p (mode);
6243 }
6244
6245 /* Output a reference from a function exception table to the type_info
6246 object X. Output these via a special assembly directive. */
6247
6248 static bool
c6x_output_ttype(rtx x)6249 c6x_output_ttype (rtx x)
6250 {
6251 /* Use special relocations for symbol references. */
6252 if (GET_CODE (x) != CONST_INT)
6253 fputs ("\t.ehtype\t", asm_out_file);
6254 else
6255 fputs ("\t.word\t", asm_out_file);
6256 output_addr_const (asm_out_file, x);
6257 fputc ('\n', asm_out_file);
6258
6259 return TRUE;
6260 }
6261
6262 /* Modify the return address of the current function. */
6263
6264 void
c6x_set_return_address(rtx source,rtx scratch)6265 c6x_set_return_address (rtx source, rtx scratch)
6266 {
6267 struct c6x_frame frame;
6268 rtx addr;
6269 HOST_WIDE_INT offset;
6270
6271 c6x_compute_frame_layout (&frame);
6272 if (! c6x_save_reg (RETURN_ADDR_REGNO))
6273 emit_move_insn (gen_rtx_REG (Pmode, RETURN_ADDR_REGNO), source);
6274 else
6275 {
6276
6277 if (frame_pointer_needed)
6278 {
6279 addr = hard_frame_pointer_rtx;
6280 offset = frame.b3_offset;
6281 }
6282 else
6283 {
6284 addr = stack_pointer_rtx;
6285 offset = frame.to_allocate - frame.b3_offset;
6286 }
6287
6288 /* TODO: Use base+offset loads where possible. */
6289 if (offset)
6290 {
6291 HOST_WIDE_INT low = trunc_int_for_mode (offset, HImode);
6292
6293 emit_insn (gen_movsi_high (scratch, GEN_INT (low)));
6294 if (low != offset)
6295 emit_insn (gen_movsi_lo_sum (scratch, scratch, GEN_INT(offset)));
6296 emit_insn (gen_addsi3 (scratch, addr, scratch));
6297 addr = scratch;
6298 }
6299
6300 emit_move_insn (gen_frame_mem (Pmode, addr), source);
6301 }
6302 }
6303
6304 /* We save pairs of registers using a DImode store. Describe the component
6305 registers for DWARF generation code. */
6306
6307 static rtx
c6x_dwarf_register_span(rtx rtl)6308 c6x_dwarf_register_span (rtx rtl)
6309 {
6310 unsigned regno;
6311 unsigned real_regno;
6312 int nregs;
6313 int i;
6314 rtx p;
6315
6316 regno = REGNO (rtl);
6317 nregs = REG_NREGS (rtl);
6318 if (nregs == 1)
6319 return NULL_RTX;
6320
6321 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc(nregs));
6322 for (i = 0; i < nregs; i++)
6323 {
6324 if (TARGET_BIG_ENDIAN)
6325 real_regno = regno + nregs - (i + 1);
6326 else
6327 real_regno = regno + i;
6328
6329 XVECEXP (p, 0, i) = gen_rtx_REG (SImode, real_regno);
6330 }
6331
6332 return p;
6333 }
6334
6335 /* Codes for all the C6X builtins. */
6336 enum c6x_builtins
6337 {
6338 C6X_BUILTIN_SADD,
6339 C6X_BUILTIN_SSUB,
6340 C6X_BUILTIN_ADD2,
6341 C6X_BUILTIN_SUB2,
6342 C6X_BUILTIN_ADD4,
6343 C6X_BUILTIN_SUB4,
6344 C6X_BUILTIN_SADD2,
6345 C6X_BUILTIN_SSUB2,
6346 C6X_BUILTIN_SADDU4,
6347
6348 C6X_BUILTIN_SMPY,
6349 C6X_BUILTIN_SMPYH,
6350 C6X_BUILTIN_SMPYHL,
6351 C6X_BUILTIN_SMPYLH,
6352 C6X_BUILTIN_MPY2,
6353 C6X_BUILTIN_SMPY2,
6354
6355 C6X_BUILTIN_CLRR,
6356 C6X_BUILTIN_EXTR,
6357 C6X_BUILTIN_EXTRU,
6358
6359 C6X_BUILTIN_SSHL,
6360 C6X_BUILTIN_SUBC,
6361 C6X_BUILTIN_ABS,
6362 C6X_BUILTIN_ABS2,
6363 C6X_BUILTIN_AVG2,
6364 C6X_BUILTIN_AVGU4,
6365
6366 C6X_BUILTIN_MAX
6367 };
6368
6369
6370 static GTY(()) tree c6x_builtin_decls[C6X_BUILTIN_MAX];
6371
6372 /* Return the C6X builtin for CODE. */
6373 static tree
c6x_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)6374 c6x_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6375 {
6376 if (code >= C6X_BUILTIN_MAX)
6377 return error_mark_node;
6378
6379 return c6x_builtin_decls[code];
6380 }
6381
6382 #define def_builtin(NAME, TYPE, CODE) \
6383 do { \
6384 tree bdecl; \
6385 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
6386 NULL, NULL_TREE); \
6387 c6x_builtin_decls[CODE] = bdecl; \
6388 } while (0)
6389
6390 /* Set up all builtin functions for this target. */
6391 static void
c6x_init_builtins(void)6392 c6x_init_builtins (void)
6393 {
6394 tree V4QI_type_node = build_vector_type (unsigned_intQI_type_node, 4);
6395 tree V2HI_type_node = build_vector_type (intHI_type_node, 2);
6396 tree V2SI_type_node = build_vector_type (intSI_type_node, 2);
6397 tree int_ftype_int
6398 = build_function_type_list (integer_type_node, integer_type_node,
6399 NULL_TREE);
6400 tree int_ftype_int_int
6401 = build_function_type_list (integer_type_node, integer_type_node,
6402 integer_type_node, NULL_TREE);
6403 tree v2hi_ftype_v2hi
6404 = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE);
6405 tree v4qi_ftype_v4qi_v4qi
6406 = build_function_type_list (V4QI_type_node, V4QI_type_node,
6407 V4QI_type_node, NULL_TREE);
6408 tree v2hi_ftype_v2hi_v2hi
6409 = build_function_type_list (V2HI_type_node, V2HI_type_node,
6410 V2HI_type_node, NULL_TREE);
6411 tree v2si_ftype_v2hi_v2hi
6412 = build_function_type_list (V2SI_type_node, V2HI_type_node,
6413 V2HI_type_node, NULL_TREE);
6414
6415 def_builtin ("__builtin_c6x_sadd", int_ftype_int_int,
6416 C6X_BUILTIN_SADD);
6417 def_builtin ("__builtin_c6x_ssub", int_ftype_int_int,
6418 C6X_BUILTIN_SSUB);
6419 def_builtin ("__builtin_c6x_add2", v2hi_ftype_v2hi_v2hi,
6420 C6X_BUILTIN_ADD2);
6421 def_builtin ("__builtin_c6x_sub2", v2hi_ftype_v2hi_v2hi,
6422 C6X_BUILTIN_SUB2);
6423 def_builtin ("__builtin_c6x_add4", v4qi_ftype_v4qi_v4qi,
6424 C6X_BUILTIN_ADD4);
6425 def_builtin ("__builtin_c6x_sub4", v4qi_ftype_v4qi_v4qi,
6426 C6X_BUILTIN_SUB4);
6427 def_builtin ("__builtin_c6x_mpy2", v2si_ftype_v2hi_v2hi,
6428 C6X_BUILTIN_MPY2);
6429 def_builtin ("__builtin_c6x_sadd2", v2hi_ftype_v2hi_v2hi,
6430 C6X_BUILTIN_SADD2);
6431 def_builtin ("__builtin_c6x_ssub2", v2hi_ftype_v2hi_v2hi,
6432 C6X_BUILTIN_SSUB2);
6433 def_builtin ("__builtin_c6x_saddu4", v4qi_ftype_v4qi_v4qi,
6434 C6X_BUILTIN_SADDU4);
6435 def_builtin ("__builtin_c6x_smpy2", v2si_ftype_v2hi_v2hi,
6436 C6X_BUILTIN_SMPY2);
6437
6438 def_builtin ("__builtin_c6x_smpy", int_ftype_int_int,
6439 C6X_BUILTIN_SMPY);
6440 def_builtin ("__builtin_c6x_smpyh", int_ftype_int_int,
6441 C6X_BUILTIN_SMPYH);
6442 def_builtin ("__builtin_c6x_smpyhl", int_ftype_int_int,
6443 C6X_BUILTIN_SMPYHL);
6444 def_builtin ("__builtin_c6x_smpylh", int_ftype_int_int,
6445 C6X_BUILTIN_SMPYLH);
6446
6447 def_builtin ("__builtin_c6x_sshl", int_ftype_int_int,
6448 C6X_BUILTIN_SSHL);
6449 def_builtin ("__builtin_c6x_subc", int_ftype_int_int,
6450 C6X_BUILTIN_SUBC);
6451
6452 def_builtin ("__builtin_c6x_avg2", v2hi_ftype_v2hi_v2hi,
6453 C6X_BUILTIN_AVG2);
6454 def_builtin ("__builtin_c6x_avgu4", v4qi_ftype_v4qi_v4qi,
6455 C6X_BUILTIN_AVGU4);
6456
6457 def_builtin ("__builtin_c6x_clrr", int_ftype_int_int,
6458 C6X_BUILTIN_CLRR);
6459 def_builtin ("__builtin_c6x_extr", int_ftype_int_int,
6460 C6X_BUILTIN_EXTR);
6461 def_builtin ("__builtin_c6x_extru", int_ftype_int_int,
6462 C6X_BUILTIN_EXTRU);
6463
6464 def_builtin ("__builtin_c6x_abs", int_ftype_int, C6X_BUILTIN_ABS);
6465 def_builtin ("__builtin_c6x_abs2", v2hi_ftype_v2hi, C6X_BUILTIN_ABS2);
6466 }
6467
6468
6469 struct builtin_description
6470 {
6471 const enum insn_code icode;
6472 const char *const name;
6473 const enum c6x_builtins code;
6474 };
6475
6476 static const struct builtin_description bdesc_2arg[] =
6477 {
6478 { CODE_FOR_saddsi3, "__builtin_c6x_sadd", C6X_BUILTIN_SADD },
6479 { CODE_FOR_ssubsi3, "__builtin_c6x_ssub", C6X_BUILTIN_SSUB },
6480 { CODE_FOR_addv2hi3, "__builtin_c6x_add2", C6X_BUILTIN_ADD2 },
6481 { CODE_FOR_subv2hi3, "__builtin_c6x_sub2", C6X_BUILTIN_SUB2 },
6482 { CODE_FOR_addv4qi3, "__builtin_c6x_add4", C6X_BUILTIN_ADD4 },
6483 { CODE_FOR_subv4qi3, "__builtin_c6x_sub4", C6X_BUILTIN_SUB4 },
6484 { CODE_FOR_ss_addv2hi3, "__builtin_c6x_sadd2", C6X_BUILTIN_SADD2 },
6485 { CODE_FOR_ss_subv2hi3, "__builtin_c6x_ssub2", C6X_BUILTIN_SSUB2 },
6486 { CODE_FOR_us_addv4qi3, "__builtin_c6x_saddu4", C6X_BUILTIN_SADDU4 },
6487
6488 { CODE_FOR_subcsi3, "__builtin_c6x_subc", C6X_BUILTIN_SUBC },
6489 { CODE_FOR_ss_ashlsi3, "__builtin_c6x_sshl", C6X_BUILTIN_SSHL },
6490
6491 { CODE_FOR_avgv2hi3, "__builtin_c6x_avg2", C6X_BUILTIN_AVG2 },
6492 { CODE_FOR_uavgv4qi3, "__builtin_c6x_avgu4", C6X_BUILTIN_AVGU4 },
6493
6494 { CODE_FOR_mulhqsq3, "__builtin_c6x_smpy", C6X_BUILTIN_SMPY },
6495 { CODE_FOR_mulhqsq3_hh, "__builtin_c6x_smpyh", C6X_BUILTIN_SMPYH },
6496 { CODE_FOR_mulhqsq3_lh, "__builtin_c6x_smpylh", C6X_BUILTIN_SMPYLH },
6497 { CODE_FOR_mulhqsq3_hl, "__builtin_c6x_smpyhl", C6X_BUILTIN_SMPYHL },
6498
6499 { CODE_FOR_mulv2hqv2sq3, "__builtin_c6x_smpy2", C6X_BUILTIN_SMPY2 },
6500
6501 { CODE_FOR_clrr, "__builtin_c6x_clrr", C6X_BUILTIN_CLRR },
6502 { CODE_FOR_extr, "__builtin_c6x_extr", C6X_BUILTIN_EXTR },
6503 { CODE_FOR_extru, "__builtin_c6x_extru", C6X_BUILTIN_EXTRU }
6504 };
6505
6506 static const struct builtin_description bdesc_1arg[] =
6507 {
6508 { CODE_FOR_ssabssi2, "__builtin_c6x_abs", C6X_BUILTIN_ABS },
6509 { CODE_FOR_ssabsv2hi2, "__builtin_c6x_abs2", C6X_BUILTIN_ABS2 }
6510 };
6511
6512 /* Errors in the source file can cause expand_expr to return const0_rtx
6513 where we expect a vector. To avoid crashing, use one of the vector
6514 clear instructions. */
6515 static rtx
safe_vector_operand(rtx x,machine_mode mode)6516 safe_vector_operand (rtx x, machine_mode mode)
6517 {
6518 if (x != const0_rtx)
6519 return x;
6520 x = gen_reg_rtx (SImode);
6521
6522 emit_insn (gen_movsi (x, CONST0_RTX (SImode)));
6523 return gen_lowpart (mode, x);
6524 }
6525
6526 /* Subroutine of c6x_expand_builtin to take care of binop insns. MACFLAG is -1
6527 if this is a normal binary op, or one of the MACFLAG_xxx constants. */
6528
6529 static rtx
c6x_expand_binop_builtin(enum insn_code icode,tree exp,rtx target,bool match_op)6530 c6x_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
6531 bool match_op)
6532 {
6533 int offs = match_op ? 1 : 0;
6534 rtx pat;
6535 tree arg0 = CALL_EXPR_ARG (exp, 0);
6536 tree arg1 = CALL_EXPR_ARG (exp, 1);
6537 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6538 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6539 machine_mode op0mode = GET_MODE (op0);
6540 machine_mode op1mode = GET_MODE (op1);
6541 machine_mode tmode = insn_data[icode].operand[0].mode;
6542 machine_mode mode0 = insn_data[icode].operand[1 + offs].mode;
6543 machine_mode mode1 = insn_data[icode].operand[2 + offs].mode;
6544 rtx ret = target;
6545
6546 if (VECTOR_MODE_P (mode0))
6547 op0 = safe_vector_operand (op0, mode0);
6548 if (VECTOR_MODE_P (mode1))
6549 op1 = safe_vector_operand (op1, mode1);
6550
6551 if (! target
6552 || GET_MODE (target) != tmode
6553 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6554 {
6555 if (tmode == SQmode || tmode == V2SQmode)
6556 {
6557 ret = gen_reg_rtx (tmode == SQmode ? SImode : V2SImode);
6558 target = gen_lowpart (tmode, ret);
6559 }
6560 else
6561 target = gen_reg_rtx (tmode);
6562 }
6563
6564 if ((op0mode == V2HImode || op0mode == SImode || op0mode == VOIDmode)
6565 && (mode0 == V2HQmode || mode0 == HQmode || mode0 == SQmode))
6566 {
6567 op0mode = mode0;
6568 op0 = gen_lowpart (mode0, op0);
6569 }
6570 if ((op1mode == V2HImode || op1mode == SImode || op1mode == VOIDmode)
6571 && (mode1 == V2HQmode || mode1 == HQmode || mode1 == SQmode))
6572 {
6573 op1mode = mode1;
6574 op1 = gen_lowpart (mode1, op1);
6575 }
6576 /* In case the insn wants input operands in modes different from
6577 the result, abort. */
6578 gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
6579 && (op1mode == mode1 || op1mode == VOIDmode));
6580
6581 if (! (*insn_data[icode].operand[1 + offs].predicate) (op0, mode0))
6582 op0 = copy_to_mode_reg (mode0, op0);
6583 if (! (*insn_data[icode].operand[2 + offs].predicate) (op1, mode1))
6584 op1 = copy_to_mode_reg (mode1, op1);
6585
6586 if (match_op)
6587 pat = GEN_FCN (icode) (target, target, op0, op1);
6588 else
6589 pat = GEN_FCN (icode) (target, op0, op1);
6590
6591 if (! pat)
6592 return 0;
6593
6594 emit_insn (pat);
6595
6596 return ret;
6597 }
6598
6599 /* Subroutine of c6x_expand_builtin to take care of unop insns. */
6600
6601 static rtx
c6x_expand_unop_builtin(enum insn_code icode,tree exp,rtx target)6602 c6x_expand_unop_builtin (enum insn_code icode, tree exp,
6603 rtx target)
6604 {
6605 rtx pat;
6606 tree arg0 = CALL_EXPR_ARG (exp, 0);
6607 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6608 machine_mode op0mode = GET_MODE (op0);
6609 machine_mode tmode = insn_data[icode].operand[0].mode;
6610 machine_mode mode0 = insn_data[icode].operand[1].mode;
6611
6612 if (! target
6613 || GET_MODE (target) != tmode
6614 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6615 target = gen_reg_rtx (tmode);
6616
6617 if (VECTOR_MODE_P (mode0))
6618 op0 = safe_vector_operand (op0, mode0);
6619
6620 if (op0mode == SImode && mode0 == HImode)
6621 {
6622 op0mode = HImode;
6623 op0 = gen_lowpart (HImode, op0);
6624 }
6625 gcc_assert (op0mode == mode0 || op0mode == VOIDmode);
6626
6627 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6628 op0 = copy_to_mode_reg (mode0, op0);
6629
6630 pat = GEN_FCN (icode) (target, op0);
6631 if (! pat)
6632 return 0;
6633 emit_insn (pat);
6634 return target;
6635 }
6636
6637 /* Expand an expression EXP that calls a built-in function,
6638 with result going to TARGET if that's convenient
6639 (and in mode MODE if that's convenient).
6640 SUBTARGET may be used as the target for computing one of EXP's operands.
6641 IGNORE is nonzero if the value is to be ignored. */
6642
6643 static rtx
c6x_expand_builtin(tree exp,rtx target ATTRIBUTE_UNUSED,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)6644 c6x_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
6645 rtx subtarget ATTRIBUTE_UNUSED,
6646 machine_mode mode ATTRIBUTE_UNUSED,
6647 int ignore ATTRIBUTE_UNUSED)
6648 {
6649 size_t i;
6650 const struct builtin_description *d;
6651 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6652 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6653
6654 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
6655 if (d->code == fcode)
6656 return c6x_expand_binop_builtin (d->icode, exp, target,
6657 fcode == C6X_BUILTIN_CLRR);
6658
6659 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
6660 if (d->code == fcode)
6661 return c6x_expand_unop_builtin (d->icode, exp, target);
6662
6663 gcc_unreachable ();
6664 }
6665
6666 /* Target unwind frame info is generated from dwarf CFI directives, so
6667 always output dwarf2 unwind info. */
6668
6669 static enum unwind_info_type
c6x_debug_unwind_info(void)6670 c6x_debug_unwind_info (void)
6671 {
6672 if (flag_unwind_tables || flag_exceptions)
6673 return UI_DWARF2;
6674
6675 return default_debug_unwind_info ();
6676 }
6677
6678 /* Implement TARGET_HARD_REGNO_MODE_OK. */
6679
6680 static bool
c6x_hard_regno_mode_ok(unsigned int regno,machine_mode mode)6681 c6x_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
6682 {
6683 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD || (regno & 1) == 0;
6684 }
6685
6686 /* Implement TARGET_MODES_TIEABLE_P. */
6687
6688 static bool
c6x_modes_tieable_p(machine_mode mode1,machine_mode mode2)6689 c6x_modes_tieable_p (machine_mode mode1, machine_mode mode2)
6690 {
6691 return (mode1 == mode2
6692 || (GET_MODE_SIZE (mode1) <= UNITS_PER_WORD
6693 && GET_MODE_SIZE (mode2) <= UNITS_PER_WORD));
6694 }
6695
6696
6697 /* Target Structure. */
6698
6699 /* Initialize the GCC target structure. */
6700 #undef TARGET_FUNCTION_ARG
6701 #define TARGET_FUNCTION_ARG c6x_function_arg
6702 #undef TARGET_FUNCTION_ARG_ADVANCE
6703 #define TARGET_FUNCTION_ARG_ADVANCE c6x_function_arg_advance
6704 #undef TARGET_FUNCTION_ARG_BOUNDARY
6705 #define TARGET_FUNCTION_ARG_BOUNDARY c6x_function_arg_boundary
6706 #undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY
6707 #define TARGET_FUNCTION_ARG_ROUND_BOUNDARY \
6708 c6x_function_arg_round_boundary
6709 #undef TARGET_FUNCTION_VALUE_REGNO_P
6710 #define TARGET_FUNCTION_VALUE_REGNO_P c6x_function_value_regno_p
6711 #undef TARGET_FUNCTION_VALUE
6712 #define TARGET_FUNCTION_VALUE c6x_function_value
6713 #undef TARGET_LIBCALL_VALUE
6714 #define TARGET_LIBCALL_VALUE c6x_libcall_value
6715 #undef TARGET_RETURN_IN_MEMORY
6716 #define TARGET_RETURN_IN_MEMORY c6x_return_in_memory
6717 #undef TARGET_RETURN_IN_MSB
6718 #define TARGET_RETURN_IN_MSB c6x_return_in_msb
6719 #undef TARGET_PASS_BY_REFERENCE
6720 #define TARGET_PASS_BY_REFERENCE c6x_pass_by_reference
6721 #undef TARGET_CALLEE_COPIES
6722 #define TARGET_CALLEE_COPIES c6x_callee_copies
6723 #undef TARGET_STRUCT_VALUE_RTX
6724 #define TARGET_STRUCT_VALUE_RTX c6x_struct_value_rtx
6725 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
6726 #define TARGET_FUNCTION_OK_FOR_SIBCALL c6x_function_ok_for_sibcall
6727
6728 #undef TARGET_ASM_OUTPUT_MI_THUNK
6729 #define TARGET_ASM_OUTPUT_MI_THUNK c6x_output_mi_thunk
6730 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
6731 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK c6x_can_output_mi_thunk
6732
6733 #undef TARGET_BUILD_BUILTIN_VA_LIST
6734 #define TARGET_BUILD_BUILTIN_VA_LIST c6x_build_builtin_va_list
6735
6736 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
6737 #define TARGET_ASM_TRAMPOLINE_TEMPLATE c6x_asm_trampoline_template
6738 #undef TARGET_TRAMPOLINE_INIT
6739 #define TARGET_TRAMPOLINE_INIT c6x_initialize_trampoline
6740
6741 #undef TARGET_LEGITIMATE_CONSTANT_P
6742 #define TARGET_LEGITIMATE_CONSTANT_P c6x_legitimate_constant_p
6743 #undef TARGET_LEGITIMATE_ADDRESS_P
6744 #define TARGET_LEGITIMATE_ADDRESS_P c6x_legitimate_address_p
6745
6746 #undef TARGET_LRA_P
6747 #define TARGET_LRA_P hook_bool_void_false
6748
6749 #undef TARGET_IN_SMALL_DATA_P
6750 #define TARGET_IN_SMALL_DATA_P c6x_in_small_data_p
6751 #undef TARGET_ASM_SELECT_RTX_SECTION
6752 #define TARGET_ASM_SELECT_RTX_SECTION c6x_select_rtx_section
6753 #undef TARGET_ASM_SELECT_SECTION
6754 #define TARGET_ASM_SELECT_SECTION c6x_elf_select_section
6755 #undef TARGET_ASM_UNIQUE_SECTION
6756 #define TARGET_ASM_UNIQUE_SECTION c6x_elf_unique_section
6757 #undef TARGET_SECTION_TYPE_FLAGS
6758 #define TARGET_SECTION_TYPE_FLAGS c6x_section_type_flags
6759 #undef TARGET_HAVE_SRODATA_SECTION
6760 #define TARGET_HAVE_SRODATA_SECTION true
6761 #undef TARGET_ASM_MERGEABLE_RODATA_PREFIX
6762 #define TARGET_ASM_MERGEABLE_RODATA_PREFIX ".const"
6763
6764 #undef TARGET_OPTION_OVERRIDE
6765 #define TARGET_OPTION_OVERRIDE c6x_option_override
6766 #undef TARGET_CONDITIONAL_REGISTER_USAGE
6767 #define TARGET_CONDITIONAL_REGISTER_USAGE c6x_conditional_register_usage
6768
6769 #undef TARGET_INIT_LIBFUNCS
6770 #define TARGET_INIT_LIBFUNCS c6x_init_libfuncs
6771 #undef TARGET_LIBFUNC_GNU_PREFIX
6772 #define TARGET_LIBFUNC_GNU_PREFIX true
6773
6774 #undef TARGET_SCALAR_MODE_SUPPORTED_P
6775 #define TARGET_SCALAR_MODE_SUPPORTED_P c6x_scalar_mode_supported_p
6776 #undef TARGET_VECTOR_MODE_SUPPORTED_P
6777 #define TARGET_VECTOR_MODE_SUPPORTED_P c6x_vector_mode_supported_p
6778 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
6779 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE c6x_preferred_simd_mode
6780
6781 #undef TARGET_RTX_COSTS
6782 #define TARGET_RTX_COSTS c6x_rtx_costs
6783
6784 #undef TARGET_SCHED_INIT
6785 #define TARGET_SCHED_INIT c6x_sched_init
6786 #undef TARGET_SCHED_SET_SCHED_FLAGS
6787 #define TARGET_SCHED_SET_SCHED_FLAGS c6x_set_sched_flags
6788 #undef TARGET_SCHED_ADJUST_COST
6789 #define TARGET_SCHED_ADJUST_COST c6x_adjust_cost
6790 #undef TARGET_SCHED_ISSUE_RATE
6791 #define TARGET_SCHED_ISSUE_RATE c6x_issue_rate
6792 #undef TARGET_SCHED_VARIABLE_ISSUE
6793 #define TARGET_SCHED_VARIABLE_ISSUE c6x_variable_issue
6794 #undef TARGET_SCHED_REORDER
6795 #define TARGET_SCHED_REORDER c6x_sched_reorder
6796 #undef TARGET_SCHED_REORDER2
6797 #define TARGET_SCHED_REORDER2 c6x_sched_reorder2
6798 #undef TARGET_SCHED_DFA_NEW_CYCLE
6799 #define TARGET_SCHED_DFA_NEW_CYCLE c6x_dfa_new_cycle
6800 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
6801 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN c6x_sched_dfa_pre_cycle_insn
6802 #undef TARGET_SCHED_EXPOSED_PIPELINE
6803 #define TARGET_SCHED_EXPOSED_PIPELINE true
6804
6805 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
6806 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT c6x_alloc_sched_context
6807 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
6808 #define TARGET_SCHED_INIT_SCHED_CONTEXT c6x_init_sched_context
6809 #undef TARGET_SCHED_SET_SCHED_CONTEXT
6810 #define TARGET_SCHED_SET_SCHED_CONTEXT c6x_set_sched_context
6811 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
6812 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT c6x_clear_sched_context
6813 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
6814 #define TARGET_SCHED_FREE_SCHED_CONTEXT c6x_free_sched_context
6815
6816 #undef TARGET_CAN_ELIMINATE
6817 #define TARGET_CAN_ELIMINATE c6x_can_eliminate
6818
6819 #undef TARGET_PREFERRED_RENAME_CLASS
6820 #define TARGET_PREFERRED_RENAME_CLASS c6x_preferred_rename_class
6821
6822 #undef TARGET_MACHINE_DEPENDENT_REORG
6823 #define TARGET_MACHINE_DEPENDENT_REORG c6x_reorg
6824
6825 #undef TARGET_ASM_FILE_START
6826 #define TARGET_ASM_FILE_START c6x_file_start
6827
6828 #undef TARGET_PRINT_OPERAND
6829 #define TARGET_PRINT_OPERAND c6x_print_operand
6830 #undef TARGET_PRINT_OPERAND_ADDRESS
6831 #define TARGET_PRINT_OPERAND_ADDRESS c6x_print_operand_address
6832 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
6833 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P c6x_print_operand_punct_valid_p
6834
6835 /* C6x unwinding tables use a different format for the typeinfo tables. */
6836 #undef TARGET_ASM_TTYPE
6837 #define TARGET_ASM_TTYPE c6x_output_ttype
6838
6839 /* The C6x ABI follows the ARM EABI exception handling rules. */
6840 #undef TARGET_ARM_EABI_UNWINDER
6841 #define TARGET_ARM_EABI_UNWINDER true
6842
6843 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
6844 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY c6x_asm_emit_except_personality
6845
6846 #undef TARGET_ASM_INIT_SECTIONS
6847 #define TARGET_ASM_INIT_SECTIONS c6x_asm_init_sections
6848
6849 #undef TARGET_DEBUG_UNWIND_INFO
6850 #define TARGET_DEBUG_UNWIND_INFO c6x_debug_unwind_info
6851
6852 #undef TARGET_DWARF_REGISTER_SPAN
6853 #define TARGET_DWARF_REGISTER_SPAN c6x_dwarf_register_span
6854
6855 #undef TARGET_INIT_BUILTINS
6856 #define TARGET_INIT_BUILTINS c6x_init_builtins
6857 #undef TARGET_EXPAND_BUILTIN
6858 #define TARGET_EXPAND_BUILTIN c6x_expand_builtin
6859 #undef TARGET_BUILTIN_DECL
6860 #define TARGET_BUILTIN_DECL c6x_builtin_decl
6861
6862 #undef TARGET_HARD_REGNO_MODE_OK
6863 #define TARGET_HARD_REGNO_MODE_OK c6x_hard_regno_mode_ok
6864 #undef TARGET_MODES_TIEABLE_P
6865 #define TARGET_MODES_TIEABLE_P c6x_modes_tieable_p
6866
6867 struct gcc_target targetm = TARGET_INITIALIZER;
6868
6869 #include "gt-c6x.h"
6870