1 /* Target Code for TI C6X
2    Copyright (C) 2010-2016 Free Software Foundation, Inc.
3    Contributed by Andrew Jenner <andrew@codesourcery.com>
4    Contributed by Bernd Schmidt <bernds@codesourcery.com>
5 
6    This file is part of GCC.
7 
8    GCC is free software; you can redistribute it and/or modify it
9    under the terms of the GNU General Public License as published
10    by the Free Software Foundation; either version 3, or (at your
11    option) any later version.
12 
13    GCC is distributed in the hope that it will be useful, but WITHOUT
14    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16    License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with GCC; see the file COPYING3.  If not see
20    <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple-expr.h"
30 #include "cfghooks.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "emit-rtl.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic-core.h"
40 #include "stor-layout.h"
41 #include "varasm.h"
42 #include "calls.h"
43 #include "output.h"
44 #include "insn-attr.h"
45 #include "explow.h"
46 #include "expr.h"
47 #include "cfgrtl.h"
48 #include "sched-int.h"
49 #include "tm-constrs.h"
50 #include "langhooks.h"
51 #include "sel-sched.h"
52 #include "debug.h"
53 #include "hw-doloop.h"
54 #include "regrename.h"
55 #include "dumpfile.h"
56 #include "builtins.h"
57 
58 /* This file should be included last.  */
59 #include "target-def.h"
60 
61 /* Table of supported architecture variants.  */
62 typedef struct
63 {
64   const char *arch;
65   enum c6x_cpu_type type;
66   unsigned short features;
67 } c6x_arch_table;
68 
69 /* A list of all ISAs, mapping each one to a representative device.
70    Used for -march selection.  */
71 static const c6x_arch_table all_isas[] =
72 {
73 #define C6X_ISA(NAME,DEVICE,FLAGS) \
74   { NAME, DEVICE, FLAGS },
75 #include "c6x-isas.def"
76 #undef C6X_ISA
77   { NULL, C6X_CPU_C62X, 0 }
78 };
79 
80 /* This is the parsed result of the "-march=" option, if given.  */
81 enum c6x_cpu_type c6x_arch = C6X_DEFAULT_ARCH;
82 
83 /* A mask of insn types that are allowed by the architecture selected by
84    the -march option.  */
85 unsigned long c6x_insn_mask = C6X_DEFAULT_INSN_MASK;
86 
87 /* The instruction that is being output (as obtained from FINAL_PRESCAN_INSN).
88  */
89 static rtx_insn *c6x_current_insn = NULL;
90 
91 /* A decl we build to access __c6xabi_DSBT_base.  */
92 static GTY(()) tree dsbt_decl;
93 
94 /* Determines whether we run our final scheduling pass or not.  We always
95    avoid the normal second scheduling pass.  */
96 static int c6x_flag_schedule_insns2;
97 
98 /* Determines whether we run variable tracking in machine dependent
99    reorganization.  */
100 static int c6x_flag_var_tracking;
101 
102 /* Determines whether we use modulo scheduling.  */
103 static int c6x_flag_modulo_sched;
104 
105 /* Record the state of flag_pic before we set it to 1 for DSBT.  */
106 int c6x_initial_flag_pic;
107 
108 typedef struct
109 {
110   /* We record the clock cycle for every insn during scheduling.  */
111   int clock;
112   /* After scheduling, we run assign_reservations to choose unit
113      reservations for all insns.  These are recorded here.  */
114   int reservation;
115   /* Records the new condition for insns which must be made
116      conditional after scheduling.  An entry of NULL_RTX means no such
117      change is necessary.  */
118   rtx new_cond;
119   /* True for the first insn that was scheduled in an ebb.  */
120   bool ebb_start;
121   /* The scheduler state after the insn, transformed into a mask of UNIT_QID
122      bits rather than storing the state.  Meaningful only for the last
123      insn in a cycle.  */
124   unsigned int unit_mask;
125 } c6x_sched_insn_info;
126 
127 
128 /* Record a c6x_sched_insn_info structure for every insn in the function.  */
129 static vec<c6x_sched_insn_info> insn_info;
130 
131 #define INSN_INFO_LENGTH (insn_info).length ()
132 #define INSN_INFO_ENTRY(N) (insn_info[(N)])
133 
134 static bool done_cfi_sections;
135 
136 #define RESERVATION_FLAG_D 1
137 #define RESERVATION_FLAG_L 2
138 #define RESERVATION_FLAG_S 4
139 #define RESERVATION_FLAG_M 8
140 #define RESERVATION_FLAG_DL (RESERVATION_FLAG_D | RESERVATION_FLAG_L)
141 #define RESERVATION_FLAG_DS (RESERVATION_FLAG_D | RESERVATION_FLAG_S)
142 #define RESERVATION_FLAG_LS (RESERVATION_FLAG_L | RESERVATION_FLAG_S)
143 #define RESERVATION_FLAG_DLS (RESERVATION_FLAG_D | RESERVATION_FLAG_LS)
144 
145 /* The DFA names of the units.  */
146 static const char *const c6x_unit_names[] =
147 {
148   "d1", "l1", "s1", "m1", "fps1", "fpl1", "adddps1", "adddpl1",
149   "d2", "l2", "s2", "m2", "fps2", "fpl2", "adddps2", "adddpl2"
150 };
151 
152 /* The DFA unit number for each unit in c6x_unit_names[].  */
153 static int c6x_unit_codes[ARRAY_SIZE (c6x_unit_names)];
154 
155 /* Unit query IDs.  */
156 #define UNIT_QID_D1 0
157 #define UNIT_QID_L1 1
158 #define UNIT_QID_S1 2
159 #define UNIT_QID_M1 3
160 #define UNIT_QID_FPS1 4
161 #define UNIT_QID_FPL1 5
162 #define UNIT_QID_ADDDPS1 6
163 #define UNIT_QID_ADDDPL1 7
164 #define UNIT_QID_SIDE_OFFSET 8
165 
166 #define RESERVATION_S1 2
167 #define RESERVATION_S2 10
168 
169 /* An enum for the unit requirements we count in the UNIT_REQS table.  */
170 enum unitreqs
171 {
172   UNIT_REQ_D,
173   UNIT_REQ_L,
174   UNIT_REQ_S,
175   UNIT_REQ_M,
176   UNIT_REQ_DL,
177   UNIT_REQ_DS,
178   UNIT_REQ_LS,
179   UNIT_REQ_DLS,
180   UNIT_REQ_T,
181   UNIT_REQ_X,
182   UNIT_REQ_MAX
183 };
184 
185 /* A table used to count unit requirements.  Used when computing minimum
186    iteration intervals.  */
187 typedef int unit_req_table[2][UNIT_REQ_MAX];
188 static unit_req_table unit_reqs;
189 
190 /* Register map for debugging.  */
191 unsigned const dbx_register_map[FIRST_PSEUDO_REGISTER] =
192 {
193   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,	/* A0 - A15.  */
194   37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,	/* A16 - A32.  */
195   50, 51, 52,
196   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,	/* B0 - B15.  */
197   29, 30, 31,
198   53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,	/* B16 - B32.  */
199   66, 67, 68,
200   -1, -1, -1						/* FP, ARGP, ILC.  */
201 };
202 
203 /* Allocate a new, cleared machine_function structure.  */
204 
205 static struct machine_function *
c6x_init_machine_status(void)206 c6x_init_machine_status (void)
207 {
208   return ggc_cleared_alloc<machine_function> ();
209 }
210 
211 /* Implement TARGET_OPTION_OVERRIDE.  */
212 
213 static void
c6x_option_override(void)214 c6x_option_override (void)
215 {
216   unsigned i;
217 
218   if (global_options_set.x_c6x_arch_option)
219     {
220       c6x_arch = all_isas[c6x_arch_option].type;
221       c6x_insn_mask &= ~C6X_INSNS_ALL_CPU_BITS;
222       c6x_insn_mask |= all_isas[c6x_arch_option].features;
223     }
224 
225   c6x_flag_schedule_insns2 = flag_schedule_insns_after_reload;
226   flag_schedule_insns_after_reload = 0;
227 
228   c6x_flag_modulo_sched = flag_modulo_sched;
229   flag_modulo_sched = 0;
230 
231   init_machine_status = c6x_init_machine_status;
232 
233   for (i = 0; i < ARRAY_SIZE (c6x_unit_names); i++)
234     c6x_unit_codes[i] = get_cpu_unit_code (c6x_unit_names[i]);
235 
236   if (flag_pic && !TARGET_DSBT)
237     {
238       error ("-fpic and -fPIC not supported without -mdsbt on this target");
239       flag_pic = 0;
240     }
241   c6x_initial_flag_pic = flag_pic;
242   if (TARGET_DSBT && !flag_pic)
243     flag_pic = 1;
244 }
245 
246 
247 /* Implement the TARGET_CONDITIONAL_REGISTER_USAGE hook.  */
248 
249 static void
c6x_conditional_register_usage(void)250 c6x_conditional_register_usage (void)
251 {
252   int i;
253   if (c6x_arch == C6X_CPU_C62X || c6x_arch == C6X_CPU_C67X)
254     for (i = 16; i < 32; i++)
255       {
256 	fixed_regs[i] = 1;
257 	fixed_regs[32 + i] = 1;
258       }
259   if (TARGET_INSNS_64)
260     {
261       SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_A_REGS],
262 			REG_A0);
263       SET_HARD_REG_BIT (reg_class_contents[(int)PREDICATE_REGS],
264 			REG_A0);
265       CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_A_REGS],
266 			  REG_A0);
267       CLEAR_HARD_REG_BIT (reg_class_contents[(int)NONPREDICATE_REGS],
268 			  REG_A0);
269     }
270 }
271 
272 static GTY(()) rtx eqdf_libfunc;
273 static GTY(()) rtx nedf_libfunc;
274 static GTY(()) rtx ledf_libfunc;
275 static GTY(()) rtx ltdf_libfunc;
276 static GTY(()) rtx gedf_libfunc;
277 static GTY(()) rtx gtdf_libfunc;
278 static GTY(()) rtx eqsf_libfunc;
279 static GTY(()) rtx nesf_libfunc;
280 static GTY(()) rtx lesf_libfunc;
281 static GTY(()) rtx ltsf_libfunc;
282 static GTY(()) rtx gesf_libfunc;
283 static GTY(()) rtx gtsf_libfunc;
284 static GTY(()) rtx strasgi_libfunc;
285 static GTY(()) rtx strasgi64p_libfunc;
286 
287 /* Implement the TARGET_INIT_LIBFUNCS macro.  We use this to rename library
288    functions to match the C6x ABI.  */
289 
290 static void
c6x_init_libfuncs(void)291 c6x_init_libfuncs (void)
292 {
293   /* Double-precision floating-point arithmetic.  */
294   set_optab_libfunc (add_optab, DFmode, "__c6xabi_addd");
295   set_optab_libfunc (sdiv_optab, DFmode, "__c6xabi_divd");
296   set_optab_libfunc (smul_optab, DFmode, "__c6xabi_mpyd");
297   set_optab_libfunc (neg_optab, DFmode, "__c6xabi_negd");
298   set_optab_libfunc (sub_optab, DFmode, "__c6xabi_subd");
299 
300   /* Single-precision floating-point arithmetic.  */
301   set_optab_libfunc (add_optab, SFmode, "__c6xabi_addf");
302   set_optab_libfunc (sdiv_optab, SFmode, "__c6xabi_divf");
303   set_optab_libfunc (smul_optab, SFmode, "__c6xabi_mpyf");
304   set_optab_libfunc (neg_optab, SFmode, "__c6xabi_negf");
305   set_optab_libfunc (sub_optab, SFmode, "__c6xabi_subf");
306 
307   /* Floating-point comparisons.  */
308   eqsf_libfunc = init_one_libfunc ("__c6xabi_eqf");
309   nesf_libfunc = init_one_libfunc ("__c6xabi_neqf");
310   lesf_libfunc = init_one_libfunc ("__c6xabi_lef");
311   ltsf_libfunc = init_one_libfunc ("__c6xabi_ltf");
312   gesf_libfunc = init_one_libfunc ("__c6xabi_gef");
313   gtsf_libfunc = init_one_libfunc ("__c6xabi_gtf");
314   eqdf_libfunc = init_one_libfunc ("__c6xabi_eqd");
315   nedf_libfunc = init_one_libfunc ("__c6xabi_neqd");
316   ledf_libfunc = init_one_libfunc ("__c6xabi_led");
317   ltdf_libfunc = init_one_libfunc ("__c6xabi_ltd");
318   gedf_libfunc = init_one_libfunc ("__c6xabi_ged");
319   gtdf_libfunc = init_one_libfunc ("__c6xabi_gtd");
320 
321   set_optab_libfunc (eq_optab, SFmode, NULL);
322   set_optab_libfunc (ne_optab, SFmode, "__c6xabi_neqf");
323   set_optab_libfunc (gt_optab, SFmode, NULL);
324   set_optab_libfunc (ge_optab, SFmode, NULL);
325   set_optab_libfunc (lt_optab, SFmode, NULL);
326   set_optab_libfunc (le_optab, SFmode, NULL);
327   set_optab_libfunc (unord_optab, SFmode, "__c6xabi_unordf");
328   set_optab_libfunc (eq_optab, DFmode, NULL);
329   set_optab_libfunc (ne_optab, DFmode, "__c6xabi_neqd");
330   set_optab_libfunc (gt_optab, DFmode, NULL);
331   set_optab_libfunc (ge_optab, DFmode, NULL);
332   set_optab_libfunc (lt_optab, DFmode, NULL);
333   set_optab_libfunc (le_optab, DFmode, NULL);
334   set_optab_libfunc (unord_optab, DFmode, "__c6xabi_unordd");
335 
336   /* Floating-point to integer conversions.  */
337   set_conv_libfunc (sfix_optab, SImode, DFmode, "__c6xabi_fixdi");
338   set_conv_libfunc (ufix_optab, SImode, DFmode, "__c6xabi_fixdu");
339   set_conv_libfunc (sfix_optab, DImode, DFmode, "__c6xabi_fixdlli");
340   set_conv_libfunc (ufix_optab, DImode, DFmode, "__c6xabi_fixdull");
341   set_conv_libfunc (sfix_optab, SImode, SFmode, "__c6xabi_fixfi");
342   set_conv_libfunc (ufix_optab, SImode, SFmode, "__c6xabi_fixfu");
343   set_conv_libfunc (sfix_optab, DImode, SFmode, "__c6xabi_fixflli");
344   set_conv_libfunc (ufix_optab, DImode, SFmode, "__c6xabi_fixfull");
345 
346   /* Conversions between floating types.  */
347   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__c6xabi_cvtdf");
348   set_conv_libfunc (sext_optab, DFmode, SFmode, "__c6xabi_cvtfd");
349 
350   /* Integer to floating-point conversions.  */
351   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__c6xabi_fltid");
352   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__c6xabi_fltud");
353   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__c6xabi_fltllid");
354   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__c6xabi_fltulld");
355   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__c6xabi_fltif");
356   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__c6xabi_fltuf");
357   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__c6xabi_fltllif");
358   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__c6xabi_fltullf");
359 
360   /* Long long.  */
361   set_optab_libfunc (smul_optab, DImode, "__c6xabi_mpyll");
362   set_optab_libfunc (ashl_optab, DImode, "__c6xabi_llshl");
363   set_optab_libfunc (lshr_optab, DImode, "__c6xabi_llshru");
364   set_optab_libfunc (ashr_optab, DImode, "__c6xabi_llshr");
365 
366   set_optab_libfunc (sdiv_optab, SImode, "__c6xabi_divi");
367   set_optab_libfunc (udiv_optab, SImode, "__c6xabi_divu");
368   set_optab_libfunc (smod_optab, SImode, "__c6xabi_remi");
369   set_optab_libfunc (umod_optab, SImode, "__c6xabi_remu");
370   set_optab_libfunc (sdivmod_optab, SImode, "__c6xabi_divremi");
371   set_optab_libfunc (udivmod_optab, SImode, "__c6xabi_divremu");
372   set_optab_libfunc (sdiv_optab, DImode, "__c6xabi_divlli");
373   set_optab_libfunc (udiv_optab, DImode, "__c6xabi_divull");
374   set_optab_libfunc (smod_optab, DImode, "__c6xabi_remlli");
375   set_optab_libfunc (umod_optab, DImode, "__c6xabi_remull");
376   set_optab_libfunc (udivmod_optab, DImode, "__c6xabi_divremull");
377 
378   /* Block move.  */
379   strasgi_libfunc = init_one_libfunc ("__c6xabi_strasgi");
380   strasgi64p_libfunc = init_one_libfunc ("__c6xabi_strasgi_64plus");
381 }
382 
383 /* Begin the assembly file.  */
384 
385 static void
c6x_file_start(void)386 c6x_file_start (void)
387 {
388   /* Variable tracking should be run after all optimizations which change order
389      of insns.  It also needs a valid CFG.  This can't be done in
390      c6x_override_options, because flag_var_tracking is finalized after
391      that.  */
392   c6x_flag_var_tracking = flag_var_tracking;
393   flag_var_tracking = 0;
394 
395   done_cfi_sections = false;
396   default_file_start ();
397 
398   /* Arrays are aligned to 8-byte boundaries.  */
399   asm_fprintf (asm_out_file,
400 	       "\t.c6xabi_attribute Tag_ABI_array_object_alignment, 0\n");
401   asm_fprintf (asm_out_file,
402 	       "\t.c6xabi_attribute Tag_ABI_array_object_align_expected, 0\n");
403 
404   /* Stack alignment is 8 bytes.  */
405   asm_fprintf (asm_out_file,
406 	       "\t.c6xabi_attribute Tag_ABI_stack_align_needed, 0\n");
407   asm_fprintf (asm_out_file,
408 	       "\t.c6xabi_attribute Tag_ABI_stack_align_preserved, 0\n");
409 
410 #if 0 /* FIXME: Reenable when TI's tools are fixed.  */
411   /* ??? Ideally we'd check flag_short_wchar somehow.  */
412   asm_fprintf (asm_out_file, "\t.c6xabi_attribute Tag_ABI_wchar_t, %d\n", 2);
413 #endif
414 
415   /* We conform to version 1.0 of the ABI.  */
416   asm_fprintf (asm_out_file,
417 	       "\t.c6xabi_attribute Tag_ABI_conformance, \"1.0\"\n");
418 
419 }
420 
421 /* The LTO frontend only enables exceptions when it sees a function that
422    uses it.  This changes the return value of dwarf2out_do_frame, so we
423    have to check before every function.  */
424 
425 void
c6x_output_file_unwind(FILE * f)426 c6x_output_file_unwind (FILE * f)
427 {
428   if (done_cfi_sections)
429     return;
430 
431   /* Output a .cfi_sections directive.  */
432   if (dwarf2out_do_frame ())
433     {
434       if (flag_unwind_tables || flag_exceptions)
435 	{
436 	  if (write_symbols == DWARF2_DEBUG
437 	      || write_symbols == VMS_AND_DWARF2_DEBUG)
438 	    asm_fprintf (f, "\t.cfi_sections .debug_frame, .c6xabi.exidx\n");
439 	  else
440 	    asm_fprintf (f, "\t.cfi_sections .c6xabi.exidx\n");
441 	}
442       else
443 	asm_fprintf (f, "\t.cfi_sections .debug_frame\n");
444       done_cfi_sections = true;
445     }
446 }
447 
448 /* Output unwind directives at the end of a function.  */
449 
450 static void
c6x_output_fn_unwind(FILE * f)451 c6x_output_fn_unwind (FILE * f)
452 {
453   /* Return immediately if we are not generating unwinding tables.  */
454   if (! (flag_unwind_tables || flag_exceptions))
455     return;
456 
457   /* If this function will never be unwound, then mark it as such.  */
458   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
459       && (TREE_NOTHROW (current_function_decl)
460 	  || crtl->all_throwers_are_sibcalls))
461     fputs("\t.cantunwind\n", f);
462 
463   fputs ("\t.endp\n", f);
464 }
465 
466 
467 /* Stack and Calling.  */
468 
469 int argument_registers[10] =
470 {
471   REG_A4, REG_B4,
472   REG_A6, REG_B6,
473   REG_A8, REG_B8,
474   REG_A10, REG_B10,
475   REG_A12, REG_B12
476 };
477 
478 /* Implements the macro INIT_CUMULATIVE_ARGS defined in c6x.h.  */
479 
480 void
c6x_init_cumulative_args(CUMULATIVE_ARGS * cum,const_tree fntype,rtx libname,int n_named_args ATTRIBUTE_UNUSED)481 c6x_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype, rtx libname,
482 			  int n_named_args ATTRIBUTE_UNUSED)
483 {
484   cum->count = 0;
485   cum->nregs = 10;
486   if (!libname && fntype)
487     {
488       /* We need to find out the number of named arguments.  Unfortunately,
489 	 for incoming arguments, N_NAMED_ARGS is set to -1.  */
490       if (stdarg_p (fntype))
491 	cum->nregs = type_num_arguments (fntype) - 1;
492       if (cum->nregs > 10)
493 	cum->nregs = 10;
494     }
495 }
496 
497 /* Implements the macro FUNCTION_ARG defined in c6x.h.  */
498 
499 static rtx
c6x_function_arg(cumulative_args_t cum_v,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)500 c6x_function_arg (cumulative_args_t cum_v, machine_mode mode,
501 		  const_tree type, bool named ATTRIBUTE_UNUSED)
502 {
503   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
504   if (cum->count >= cum->nregs)
505     return NULL_RTX;
506   if (type)
507     {
508       HOST_WIDE_INT size = int_size_in_bytes (type);
509       if (TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (type))
510 	{
511 	  if (size > 4)
512 	    {
513 	      rtx reg1 = gen_rtx_REG (SImode, argument_registers[cum->count] + 1);
514 	      rtx reg2 = gen_rtx_REG (SImode, argument_registers[cum->count]);
515 	      rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx),
516 				     gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4)));
517 	      return gen_rtx_PARALLEL (mode, vec);
518 	    }
519 	}
520     }
521   return gen_rtx_REG (mode, argument_registers[cum->count]);
522 }
523 
524 static void
c6x_function_arg_advance(cumulative_args_t cum_v,machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,bool named ATTRIBUTE_UNUSED)525 c6x_function_arg_advance (cumulative_args_t cum_v,
526 			  machine_mode mode ATTRIBUTE_UNUSED,
527 			  const_tree type ATTRIBUTE_UNUSED,
528 			  bool named ATTRIBUTE_UNUSED)
529 {
530   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
531   cum->count++;
532 }
533 
534 
535 /* Return true if BLOCK_REG_PADDING (MODE, TYPE, FIRST) should return
536    upward rather than downward.  */
537 
538 bool
c6x_block_reg_pad_upward(machine_mode mode ATTRIBUTE_UNUSED,const_tree type,bool first)539 c6x_block_reg_pad_upward (machine_mode mode ATTRIBUTE_UNUSED,
540 			  const_tree type, bool first)
541 {
542   HOST_WIDE_INT size;
543 
544   if (!TARGET_BIG_ENDIAN)
545     return true;
546   if (!first)
547     return true;
548   if (!type)
549     return true;
550   size = int_size_in_bytes (type);
551   return size == 3;
552 }
553 
554 /* Implement TARGET_FUNCTION_ARG_BOUNDARY.  */
555 
556 static unsigned int
c6x_function_arg_boundary(machine_mode mode,const_tree type)557 c6x_function_arg_boundary (machine_mode mode, const_tree type)
558 {
559   unsigned int boundary = type ? TYPE_ALIGN (type) : GET_MODE_BITSIZE (mode);
560 
561   if (boundary > BITS_PER_WORD)
562     return 2 * BITS_PER_WORD;
563 
564   if (mode == BLKmode)
565     {
566       HOST_WIDE_INT size = int_size_in_bytes (type);
567       if (size > 4)
568 	return 2 * BITS_PER_WORD;
569       if (boundary < BITS_PER_WORD)
570 	{
571 	  if (size >= 3)
572 	    return BITS_PER_WORD;
573 	  if (size >= 2)
574 	    return 2 * BITS_PER_UNIT;
575 	}
576     }
577   return boundary;
578 }
579 
580 /* Implement TARGET_FUNCTION_ARG_ROUND_BOUNDARY.  */
581 static unsigned int
c6x_function_arg_round_boundary(machine_mode mode,const_tree type)582 c6x_function_arg_round_boundary (machine_mode mode, const_tree type)
583 {
584   return c6x_function_arg_boundary (mode, type);
585 }
586 
587 /* TARGET_FUNCTION_VALUE implementation.  Returns an RTX representing the place
588    where function FUNC returns or receives a value of data type TYPE.  */
589 
590 static rtx
c6x_function_value(const_tree type,const_tree func ATTRIBUTE_UNUSED,bool outgoing ATTRIBUTE_UNUSED)591 c6x_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED,
592 		    bool outgoing ATTRIBUTE_UNUSED)
593 {
594   /* Functions return values in register A4.  When returning aggregates, we may
595      have to adjust for endianness.  */
596   if (TARGET_BIG_ENDIAN && type && AGGREGATE_TYPE_P (type))
597     {
598       HOST_WIDE_INT size = int_size_in_bytes (type);
599       if (size > 4)
600 	{
601 
602 	  rtx reg1 = gen_rtx_REG (SImode, REG_A4 + 1);
603 	  rtx reg2 = gen_rtx_REG (SImode, REG_A4);
604 	  rtvec vec = gen_rtvec (2, gen_rtx_EXPR_LIST (VOIDmode, reg1, const0_rtx),
605 				 gen_rtx_EXPR_LIST (VOIDmode, reg2, GEN_INT (4)));
606 	  return gen_rtx_PARALLEL (TYPE_MODE (type), vec);
607 	}
608     }
609   return gen_rtx_REG (TYPE_MODE (type), REG_A4);
610 }
611 
612 /* Implement TARGET_LIBCALL_VALUE.  */
613 
614 static rtx
c6x_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)615 c6x_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
616 {
617   return gen_rtx_REG (mode, REG_A4);
618 }
619 
620 /* TARGET_STRUCT_VALUE_RTX implementation.  */
621 
622 static rtx
c6x_struct_value_rtx(tree type ATTRIBUTE_UNUSED,int incoming ATTRIBUTE_UNUSED)623 c6x_struct_value_rtx (tree type ATTRIBUTE_UNUSED, int incoming ATTRIBUTE_UNUSED)
624 {
625   return gen_rtx_REG (Pmode, REG_A3);
626 }
627 
628 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
629 
630 static bool
c6x_function_value_regno_p(const unsigned int regno)631 c6x_function_value_regno_p (const unsigned int regno)
632 {
633   return regno == REG_A4;
634 }
635 
636 /* Types larger than 64 bit, and variable sized types, are passed by
637    reference.  The callee must copy them; see c6x_callee_copies.  */
638 
639 static bool
c6x_pass_by_reference(cumulative_args_t cum_v ATTRIBUTE_UNUSED,machine_mode mode,const_tree type,bool named ATTRIBUTE_UNUSED)640 c6x_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
641 		       machine_mode mode, const_tree type,
642 		       bool named ATTRIBUTE_UNUSED)
643 {
644   int size = -1;
645   if (type)
646     size = int_size_in_bytes (type);
647   else if (mode != VOIDmode)
648     size = GET_MODE_SIZE (mode);
649   return size > 2 * UNITS_PER_WORD || size == -1;
650 }
651 
652 /* Decide whether a type should be returned in memory (true)
653    or in a register (false).  This is called by the macro
654    TARGET_RETURN_IN_MEMORY.  */
655 
656 static bool
c6x_return_in_memory(const_tree type,const_tree fntype ATTRIBUTE_UNUSED)657 c6x_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
658 {
659   int size = int_size_in_bytes (type);
660   return size > 2 * UNITS_PER_WORD || size == -1;
661 }
662 
663 /* Values which must be returned in the most-significant end of the return
664    register.  */
665 
666 static bool
c6x_return_in_msb(const_tree valtype)667 c6x_return_in_msb (const_tree valtype)
668 {
669   HOST_WIDE_INT size = int_size_in_bytes (valtype);
670   return TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (valtype) && size == 3;
671 }
672 
673 /* Implement TARGET_CALLEE_COPIES.  */
674 
675 static bool
c6x_callee_copies(cumulative_args_t cum_v ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,bool named ATTRIBUTE_UNUSED)676 c6x_callee_copies (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
677 		   machine_mode mode ATTRIBUTE_UNUSED,
678 		   const_tree type ATTRIBUTE_UNUSED,
679 		   bool named ATTRIBUTE_UNUSED)
680 {
681   return true;
682 }
683 
684 /* Return the type to use as __builtin_va_list.  */
685 static tree
c6x_build_builtin_va_list(void)686 c6x_build_builtin_va_list (void)
687 {
688   return build_pointer_type (char_type_node);
689 }
690 
691 static void
c6x_asm_trampoline_template(FILE * f)692 c6x_asm_trampoline_template (FILE *f)
693 {
694   fprintf (f, "\t.long\t0x0000002b\n"); /* mvkl .s2 fnlow,B0 */
695   fprintf (f, "\t.long\t0x01000028\n"); /* || mvkl .s1 sclow,A2 */
696   fprintf (f, "\t.long\t0x0000006b\n"); /* mvkh .s2 fnhigh,B0 */
697   fprintf (f, "\t.long\t0x01000068\n"); /* || mvkh .s1 schigh,A2 */
698   fprintf (f, "\t.long\t0x00000362\n"); /* b .s2 B0 */
699   fprintf (f, "\t.long\t0x00008000\n"); /* nop 5 */
700   fprintf (f, "\t.long\t0x00000000\n"); /* nop */
701   fprintf (f, "\t.long\t0x00000000\n"); /* nop */
702 }
703 
704 /* Emit RTL insns to initialize the variable parts of a trampoline at
705    TRAMP. FNADDR is an RTX for the address of the function's pure
706    code.  CXT is an RTX for the static chain value for the function.  */
707 
708 static void
c6x_initialize_trampoline(rtx tramp,tree fndecl,rtx cxt)709 c6x_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
710 {
711   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
712   rtx t1 = copy_to_reg (fnaddr);
713   rtx t2 = copy_to_reg (cxt);
714   rtx mask = gen_reg_rtx (SImode);
715   int i;
716 
717   emit_block_move (tramp, assemble_trampoline_template (),
718 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
719 
720   emit_move_insn (mask, GEN_INT (0xffff << 7));
721 
722   for (i = 0; i < 4; i++)
723     {
724       rtx mem = adjust_address (tramp, SImode, i * 4);
725       rtx t = (i & 1) ? t2 : t1;
726       rtx v1 = gen_reg_rtx (SImode);
727       rtx v2 = gen_reg_rtx (SImode);
728       emit_move_insn (v1, mem);
729       if (i < 2)
730 	emit_insn (gen_ashlsi3 (v2, t, GEN_INT (7)));
731       else
732 	emit_insn (gen_lshrsi3 (v2, t, GEN_INT (9)));
733       emit_insn (gen_andsi3 (v2, v2, mask));
734       emit_insn (gen_iorsi3 (v2, v2, v1));
735       emit_move_insn (mem, v2);
736     }
737 #ifdef CLEAR_INSN_CACHE
738   tramp = XEXP (tramp, 0);
739   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__gnu_clear_cache"),
740 		     LCT_NORMAL, VOIDmode, 2, tramp, Pmode,
741 		     plus_constant (Pmode, tramp, TRAMPOLINE_SIZE),
742 		     Pmode);
743 #endif
744 }
745 
746 /* Determine whether c6x_output_mi_thunk can succeed.  */
747 
748 static bool
c6x_can_output_mi_thunk(const_tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta ATTRIBUTE_UNUSED,HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,const_tree function ATTRIBUTE_UNUSED)749 c6x_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
750 			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
751 			 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
752 			 const_tree function ATTRIBUTE_UNUSED)
753 {
754   return !TARGET_LONG_CALLS;
755 }
756 
757 /* Output the assembler code for a thunk function.  THUNK is the
758    declaration for the thunk function itself, FUNCTION is the decl for
759    the target function.  DELTA is an immediate constant offset to be
760    added to THIS.  If VCALL_OFFSET is nonzero, the word at
761    *(*this + vcall_offset) should be added to THIS.  */
762 
763 static void
c6x_output_mi_thunk(FILE * file ATTRIBUTE_UNUSED,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)764 c6x_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
765 		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
766 		     HOST_WIDE_INT vcall_offset, tree function)
767 {
768   rtx xops[5];
769   /* The this parameter is passed as the first argument.  */
770   rtx this_rtx = gen_rtx_REG (Pmode, REG_A4);
771 
772   c6x_current_insn = NULL;
773 
774   xops[4] = XEXP (DECL_RTL (function), 0);
775   if (!vcall_offset)
776     {
777       output_asm_insn ("b .s2 \t%4", xops);
778       if (!delta)
779 	output_asm_insn ("nop 5", xops);
780     }
781 
782   /* Adjust the this parameter by a fixed constant.  */
783   if (delta)
784     {
785       xops[0] = GEN_INT (delta);
786       xops[1] = this_rtx;
787       if (delta >= -16 && delta <= 15)
788 	{
789 	  output_asm_insn ("add .s1 %0, %1, %1", xops);
790 	  if (!vcall_offset)
791 	    output_asm_insn ("nop 4", xops);
792 	}
793       else if (delta >= 16 && delta < 32)
794 	{
795 	  output_asm_insn ("add .d1 %0, %1, %1", xops);
796 	  if (!vcall_offset)
797 	    output_asm_insn ("nop 4", xops);
798 	}
799       else if (delta >= -32768 && delta < 32768)
800 	{
801 	  output_asm_insn ("mvk .s1 %0, A0", xops);
802 	  output_asm_insn ("add .d1 %1, A0, %1", xops);
803 	  if (!vcall_offset)
804 	    output_asm_insn ("nop 3", xops);
805 	}
806       else
807 	{
808 	  output_asm_insn ("mvkl .s1 %0, A0", xops);
809 	  output_asm_insn ("mvkh .s1 %0, A0", xops);
810 	  output_asm_insn ("add .d1 %1, A0, %1", xops);
811 	  if (!vcall_offset)
812 	    output_asm_insn ("nop 3", xops);
813 	}
814     }
815 
816   /* Adjust the this parameter by a value stored in the vtable.  */
817   if (vcall_offset)
818     {
819       rtx a0tmp = gen_rtx_REG (Pmode, REG_A0);
820       rtx a3tmp = gen_rtx_REG (Pmode, REG_A3);
821 
822       xops[1] = a3tmp;
823       xops[2] = a0tmp;
824       xops[3] = gen_rtx_MEM (Pmode, a0tmp);
825       output_asm_insn ("mv .s1 a4, %2", xops);
826       output_asm_insn ("ldw .d1t1 %3, %2", xops);
827 
828       /* Adjust the this parameter.  */
829       xops[0] = gen_rtx_MEM (Pmode, plus_constant (Pmode, a0tmp,
830 						   vcall_offset));
831       if (!memory_operand (xops[0], Pmode))
832 	{
833 	  rtx tmp2 = gen_rtx_REG (Pmode, REG_A1);
834 	  xops[0] = GEN_INT (vcall_offset);
835 	  xops[1] = tmp2;
836 	  output_asm_insn ("mvkl .s1 %0, %1", xops);
837 	  output_asm_insn ("mvkh .s1 %0, %1", xops);
838 	  output_asm_insn ("nop 2", xops);
839 	  output_asm_insn ("add .d1 %2, %1, %2", xops);
840 	  xops[0] = gen_rtx_MEM (Pmode, a0tmp);
841 	}
842       else
843 	output_asm_insn ("nop 4", xops);
844       xops[2] = this_rtx;
845       output_asm_insn ("ldw .d1t1 %0, %1", xops);
846       output_asm_insn ("|| b .s2 \t%4", xops);
847       output_asm_insn ("nop 4", xops);
848       output_asm_insn ("add .d1 %2, %1, %2", xops);
849     }
850 }
851 
852 /* Return true if EXP goes in small data/bss.  */
853 
854 static bool
c6x_in_small_data_p(const_tree exp)855 c6x_in_small_data_p (const_tree exp)
856 {
857   /* We want to merge strings, so we never consider them small data.  */
858   if (TREE_CODE (exp) == STRING_CST)
859     return false;
860 
861   /* Functions are never small data.  */
862   if (TREE_CODE (exp) == FUNCTION_DECL)
863     return false;
864 
865   if (TREE_CODE (exp) == VAR_DECL && DECL_WEAK (exp))
866     return false;
867 
868   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
869     {
870       const char *section = DECL_SECTION_NAME (exp);
871 
872       if (strcmp (section, ".neardata") == 0
873 	  || strncmp (section, ".neardata.", 10) == 0
874 	  || strncmp (section, ".gnu.linkonce.s.", 16) == 0
875 	  || strcmp (section, ".bss") == 0
876 	  || strncmp (section, ".bss.", 5) == 0
877 	  || strncmp (section, ".gnu.linkonce.sb.", 17) == 0
878 	  || strcmp (section, ".rodata") == 0
879 	  || strncmp (section, ".rodata.", 8) == 0
880 	  || strncmp (section, ".gnu.linkonce.s2.", 17) == 0)
881 	return true;
882     }
883   else
884     return PLACE_IN_SDATA_P (exp);
885 
886   return false;
887 }
888 
889 /* Return a section for X.  The only special thing we do here is to
890    honor small data.  We don't have a tree type, so we can't use the
891    PLACE_IN_SDATA_P macro we use everywhere else; we choose to place
892    everything sized 8 bytes or smaller into small data.  */
893 
894 static section *
c6x_select_rtx_section(machine_mode mode,rtx x,unsigned HOST_WIDE_INT align)895 c6x_select_rtx_section (machine_mode mode, rtx x,
896 			unsigned HOST_WIDE_INT align)
897 {
898   if (c6x_sdata_mode == C6X_SDATA_ALL
899       || (c6x_sdata_mode != C6X_SDATA_NONE && GET_MODE_SIZE (mode) <= 8))
900     /* ??? Consider using mergeable sdata sections.  */
901     return sdata_section;
902   else
903     return default_elf_select_rtx_section (mode, x, align);
904 }
905 
906 static section *
c6x_elf_select_section(tree decl,int reloc,unsigned HOST_WIDE_INT align)907 c6x_elf_select_section (tree decl, int reloc,
908 			unsigned HOST_WIDE_INT align)
909 {
910   const char *sname = NULL;
911   unsigned int flags = SECTION_WRITE;
912   if (c6x_in_small_data_p (decl))
913     {
914       switch (categorize_decl_for_section (decl, reloc))
915 	{
916 	case SECCAT_SRODATA:
917 	  sname = ".rodata";
918 	  flags = 0;
919 	  break;
920 	case SECCAT_SDATA:
921 	  sname = ".neardata";
922 	  break;
923 	case SECCAT_SBSS:
924 	  sname = ".bss";
925 	  flags |= SECTION_BSS;
926 	default:
927 	  break;
928 	}
929     }
930   else
931     {
932       switch (categorize_decl_for_section (decl, reloc))
933 	{
934 	case SECCAT_DATA:
935 	  sname = ".fardata";
936 	  break;
937 	case SECCAT_DATA_REL:
938 	  sname = ".fardata.rel";
939 	  break;
940 	case SECCAT_DATA_REL_LOCAL:
941 	  sname = ".fardata.rel.local";
942 	  break;
943 	case SECCAT_DATA_REL_RO:
944 	  sname = ".fardata.rel.ro";
945 	  break;
946 	case SECCAT_DATA_REL_RO_LOCAL:
947 	  sname = ".fardata.rel.ro.local";
948 	  break;
949 	case SECCAT_BSS:
950 	  sname = ".far";
951 	  flags |= SECTION_BSS;
952 	  break;
953 	case SECCAT_RODATA:
954 	  sname = ".const";
955 	  flags = 0;
956 	  break;
957 	case SECCAT_SRODATA:
958 	case SECCAT_SDATA:
959 	case SECCAT_SBSS:
960 	  gcc_unreachable ();
961 	default:
962 	  break;
963 	}
964     }
965   if (sname)
966     {
967       /* We might get called with string constants, but get_named_section
968 	 doesn't like them as they are not DECLs.  Also, we need to set
969 	 flags in that case.  */
970       if (!DECL_P (decl))
971 	return get_section (sname, flags, NULL);
972       return get_named_section (decl, sname, reloc);
973     }
974 
975   return default_elf_select_section (decl, reloc, align);
976 }
977 
978 /* Build up a unique section name, expressed as a
979    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
980    RELOC indicates whether the initial value of EXP requires
981    link-time relocations.  */
982 
983 static void ATTRIBUTE_UNUSED
c6x_elf_unique_section(tree decl,int reloc)984 c6x_elf_unique_section (tree decl, int reloc)
985 {
986   const char *prefix = NULL;
987   /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
988   bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
989 
990   if (c6x_in_small_data_p (decl))
991     {
992       switch (categorize_decl_for_section (decl, reloc))
993 	{
994 	case SECCAT_SDATA:
995           prefix = one_only ? ".s" : ".neardata";
996 	  break;
997 	case SECCAT_SBSS:
998           prefix = one_only ? ".sb" : ".bss";
999 	  break;
1000 	case SECCAT_SRODATA:
1001           prefix = one_only ? ".s2" : ".rodata";
1002 	  break;
1003 	case SECCAT_RODATA_MERGE_STR:
1004 	case SECCAT_RODATA_MERGE_STR_INIT:
1005 	case SECCAT_RODATA_MERGE_CONST:
1006 	case SECCAT_RODATA:
1007 	case SECCAT_DATA:
1008 	case SECCAT_DATA_REL:
1009 	case SECCAT_DATA_REL_LOCAL:
1010 	case SECCAT_DATA_REL_RO:
1011 	case SECCAT_DATA_REL_RO_LOCAL:
1012 	  gcc_unreachable ();
1013 	default:
1014 	  /* Everything else we place into default sections and hope for the
1015 	     best.  */
1016 	  break;
1017 	}
1018     }
1019   else
1020     {
1021       switch (categorize_decl_for_section (decl, reloc))
1022 	{
1023 	case SECCAT_DATA:
1024 	case SECCAT_DATA_REL:
1025 	case SECCAT_DATA_REL_LOCAL:
1026 	case SECCAT_DATA_REL_RO:
1027 	case SECCAT_DATA_REL_RO_LOCAL:
1028           prefix = one_only ? ".fd" : ".fardata";
1029 	  break;
1030 	case SECCAT_BSS:
1031           prefix = one_only ? ".fb" : ".far";
1032 	  break;
1033 	case SECCAT_RODATA:
1034 	case SECCAT_RODATA_MERGE_STR:
1035 	case SECCAT_RODATA_MERGE_STR_INIT:
1036 	case SECCAT_RODATA_MERGE_CONST:
1037           prefix = one_only ? ".fr" : ".const";
1038 	  break;
1039 	case SECCAT_SRODATA:
1040 	case SECCAT_SDATA:
1041 	case SECCAT_SBSS:
1042 	  gcc_unreachable ();
1043 	default:
1044 	  break;
1045 	}
1046     }
1047 
1048   if (prefix)
1049     {
1050       const char *name, *linkonce;
1051       char *string;
1052 
1053       name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
1054       name = targetm.strip_name_encoding (name);
1055 
1056       /* If we're using one_only, then there needs to be a .gnu.linkonce
1057 	 prefix to the section name.  */
1058       linkonce = one_only ? ".gnu.linkonce" : "";
1059 
1060       string = ACONCAT ((linkonce, prefix, ".", name, NULL));
1061 
1062       set_decl_section_name (decl, string);
1063       return;
1064     }
1065   default_unique_section (decl, reloc);
1066 }
1067 
1068 static unsigned int
c6x_section_type_flags(tree decl,const char * name,int reloc)1069 c6x_section_type_flags (tree decl, const char *name, int reloc)
1070 {
1071   unsigned int flags = 0;
1072 
1073   if (strcmp (name, ".far") == 0
1074       || strncmp (name, ".far.", 5) == 0)
1075     flags |= SECTION_BSS;
1076 
1077   flags |= default_section_type_flags (decl, name, reloc);
1078 
1079   return flags;
1080 }
1081 
1082 /* Checks whether the given CALL_EXPR would use a caller saved
1083    register.  This is used to decide whether sibling call optimization
1084    could be performed on the respective function call.  */
1085 
1086 static bool
c6x_call_saved_register_used(tree call_expr)1087 c6x_call_saved_register_used (tree call_expr)
1088 {
1089   CUMULATIVE_ARGS cum_v;
1090   cumulative_args_t cum;
1091   HARD_REG_SET call_saved_regset;
1092   tree parameter;
1093   machine_mode mode;
1094   tree type;
1095   rtx parm_rtx;
1096   int i;
1097 
1098   INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
1099   cum = pack_cumulative_args (&cum_v);
1100 
1101   COMPL_HARD_REG_SET (call_saved_regset, call_used_reg_set);
1102   for (i = 0; i < call_expr_nargs (call_expr); i++)
1103     {
1104       parameter = CALL_EXPR_ARG (call_expr, i);
1105       gcc_assert (parameter);
1106 
1107       /* For an undeclared variable passed as parameter we will get
1108 	 an ERROR_MARK node here.  */
1109       if (TREE_CODE (parameter) == ERROR_MARK)
1110 	return true;
1111 
1112       type = TREE_TYPE (parameter);
1113       gcc_assert (type);
1114 
1115       mode = TYPE_MODE (type);
1116       gcc_assert (mode);
1117 
1118       if (pass_by_reference (&cum_v, mode, type, true))
1119  	{
1120  	  mode = Pmode;
1121  	  type = build_pointer_type (type);
1122  	}
1123 
1124        parm_rtx = c6x_function_arg (cum, mode, type, 0);
1125 
1126        c6x_function_arg_advance (cum, mode, type, 0);
1127 
1128        if (!parm_rtx)
1129 	 continue;
1130 
1131        if (REG_P (parm_rtx)
1132 	   && overlaps_hard_reg_set_p (call_saved_regset, GET_MODE (parm_rtx),
1133 				       REGNO (parm_rtx)))
1134 	 return true;
1135        if (GET_CODE (parm_rtx) == PARALLEL)
1136 	 {
1137 	   int n = XVECLEN (parm_rtx, 0);
1138 	   while (n-- > 0)
1139 	     {
1140 	       rtx x = XEXP (XVECEXP (parm_rtx, 0, n), 0);
1141 	       if (REG_P (x)
1142 		   && overlaps_hard_reg_set_p (call_saved_regset,
1143 					       GET_MODE (x), REGNO (x)))
1144 		 return true;
1145 	     }
1146 	 }
1147     }
1148   return false;
1149 }
1150 
1151 /* Decide whether we can make a sibling call to a function.  DECL is the
1152    declaration of the function being targeted by the call and EXP is the
1153    CALL_EXPR representing the call.  */
1154 
1155 static bool
c6x_function_ok_for_sibcall(tree decl,tree exp)1156 c6x_function_ok_for_sibcall (tree decl, tree exp)
1157 {
1158   /* Registers A10, A12, B10 and B12 are available as arguments
1159      register but unfortunately caller saved. This makes functions
1160      needing these registers for arguments not suitable for
1161      sibcalls.  */
1162   if (c6x_call_saved_register_used (exp))
1163     return false;
1164 
1165   if (!flag_pic)
1166     return true;
1167 
1168   if (TARGET_DSBT)
1169     {
1170       /* When compiling for DSBT, the calling function must be local,
1171 	 so that when we reload B14 in the sibcall epilogue, it will
1172 	 not change its value.  */
1173       struct cgraph_local_info *this_func;
1174 
1175       if (!decl)
1176 	/* Not enough information.  */
1177 	return false;
1178 
1179       this_func = cgraph_node::local_info (current_function_decl);
1180       return this_func->local;
1181     }
1182 
1183   return true;
1184 }
1185 
1186 /* Return true if DECL is known to be linked into section SECTION.  */
1187 
1188 static bool
c6x_function_in_section_p(tree decl,section * section)1189 c6x_function_in_section_p (tree decl, section *section)
1190 {
1191   /* We can only be certain about functions defined in the same
1192      compilation unit.  */
1193   if (!TREE_STATIC (decl))
1194     return false;
1195 
1196   /* Make sure that SYMBOL always binds to the definition in this
1197      compilation unit.  */
1198   if (!targetm.binds_local_p (decl))
1199     return false;
1200 
1201   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
1202   if (!DECL_SECTION_NAME (decl))
1203     {
1204       /* Make sure that we will not create a unique section for DECL.  */
1205       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
1206 	return false;
1207     }
1208 
1209   return function_section (decl) == section;
1210 }
1211 
1212 /* Return true if a call to OP, which is a SYMBOL_REF, must be expanded
1213    as a long call.  */
1214 bool
c6x_long_call_p(rtx op)1215 c6x_long_call_p (rtx op)
1216 {
1217   tree decl;
1218 
1219   if (!TARGET_LONG_CALLS)
1220     return false;
1221 
1222   decl = SYMBOL_REF_DECL (op);
1223 
1224   /* Try to determine whether the symbol is in the same section as the current
1225      function.  Be conservative, and only cater for cases in which the
1226      whole of the current function is placed in the same section.  */
1227   if (decl != NULL_TREE
1228       && !flag_reorder_blocks_and_partition
1229       && TREE_CODE (decl) == FUNCTION_DECL
1230       && c6x_function_in_section_p (decl, current_function_section ()))
1231     return false;
1232 
1233   return true;
1234 }
1235 
1236 /* Emit the sequence for a call.  */
1237 void
c6x_expand_call(rtx retval,rtx address,bool sibcall)1238 c6x_expand_call (rtx retval, rtx address, bool sibcall)
1239 {
1240   rtx callee = XEXP (address, 0);
1241   rtx call_insn;
1242 
1243   if (!c6x_call_operand (callee, Pmode))
1244     {
1245       callee = force_reg (Pmode, callee);
1246       address = change_address (address, Pmode, callee);
1247     }
1248   call_insn = gen_rtx_CALL (VOIDmode, address, const0_rtx);
1249   if (sibcall)
1250     {
1251       call_insn = emit_call_insn (call_insn);
1252       use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
1253 	       gen_rtx_REG (Pmode, REG_B3));
1254     }
1255   else
1256     {
1257       if (retval == NULL_RTX)
1258 	call_insn = emit_call_insn (call_insn);
1259       else
1260 	call_insn = emit_call_insn (gen_rtx_SET (retval, call_insn));
1261     }
1262   if (flag_pic)
1263     use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
1264 }
1265 
1266 /* Legitimize PIC addresses.  If the address is already position-independent,
1267    we return ORIG.  Newly generated position-independent addresses go into a
1268    reg.  This is REG if nonzero, otherwise we allocate register(s) as
1269    necessary.  PICREG is the register holding the pointer to the PIC offset
1270    table.  */
1271 
1272 static rtx
legitimize_pic_address(rtx orig,rtx reg,rtx picreg)1273 legitimize_pic_address (rtx orig, rtx reg, rtx picreg)
1274 {
1275   rtx addr = orig;
1276   rtx new_rtx = orig;
1277 
1278   if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
1279     {
1280       int unspec = UNSPEC_LOAD_GOT;
1281       rtx tmp;
1282 
1283       if (reg == 0)
1284 	{
1285 	  gcc_assert (can_create_pseudo_p ());
1286 	  reg = gen_reg_rtx (Pmode);
1287 	}
1288       if (flag_pic == 2)
1289 	{
1290 	  if (can_create_pseudo_p ())
1291 	    tmp = gen_reg_rtx (Pmode);
1292 	  else
1293 	    tmp = reg;
1294 	  emit_insn (gen_movsi_gotoff_high (tmp, addr));
1295 	  emit_insn (gen_movsi_gotoff_lo_sum (tmp, tmp, addr));
1296 	  emit_insn (gen_load_got_gotoff (reg, picreg, tmp));
1297 	}
1298       else
1299 	{
1300 	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec);
1301 	  new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
1302 
1303 	  emit_move_insn (reg, new_rtx);
1304 	}
1305       if (picreg == pic_offset_table_rtx)
1306 	crtl->uses_pic_offset_table = 1;
1307       return reg;
1308     }
1309 
1310   else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS)
1311     {
1312       rtx base;
1313 
1314       if (GET_CODE (addr) == CONST)
1315 	{
1316 	  addr = XEXP (addr, 0);
1317 	  gcc_assert (GET_CODE (addr) == PLUS);
1318 	}
1319 
1320       if (XEXP (addr, 0) == picreg)
1321 	return orig;
1322 
1323       if (reg == 0)
1324 	{
1325 	  gcc_assert (can_create_pseudo_p ());
1326 	  reg = gen_reg_rtx (Pmode);
1327 	}
1328 
1329       base = legitimize_pic_address (XEXP (addr, 0), reg, picreg);
1330       addr = legitimize_pic_address (XEXP (addr, 1),
1331 				     base == reg ? NULL_RTX : reg,
1332 				     picreg);
1333 
1334       if (GET_CODE (addr) == CONST_INT)
1335 	{
1336 	  gcc_assert (! reload_in_progress && ! reload_completed);
1337 	  addr = force_reg (Pmode, addr);
1338 	}
1339 
1340       if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1)))
1341 	{
1342 	  base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0));
1343 	  addr = XEXP (addr, 1);
1344 	}
1345 
1346       return gen_rtx_PLUS (Pmode, base, addr);
1347     }
1348 
1349   return new_rtx;
1350 }
1351 
1352 /* Expand a move operation in mode MODE.  The operands are in OPERANDS.
1353    Returns true if no further code must be generated, false if the caller
1354    should generate an insn to move OPERANDS[1] to OPERANDS[0].  */
1355 
1356 bool
expand_move(rtx * operands,machine_mode mode)1357 expand_move (rtx *operands, machine_mode mode)
1358 {
1359   rtx dest = operands[0];
1360   rtx op = operands[1];
1361 
1362   if ((reload_in_progress | reload_completed) == 0
1363       && GET_CODE (dest) == MEM && GET_CODE (op) != REG)
1364     operands[1] = force_reg (mode, op);
1365   else if (mode == SImode && symbolic_operand (op, SImode))
1366     {
1367       if (flag_pic)
1368 	{
1369 	  if (sdata_symbolic_operand (op, SImode))
1370 	    {
1371 	      emit_insn (gen_load_sdata_pic (dest, pic_offset_table_rtx, op));
1372 	      crtl->uses_pic_offset_table = 1;
1373 	      return true;
1374 	    }
1375 	  else
1376 	    {
1377 	      rtx temp = (reload_completed || reload_in_progress
1378 			  ? dest : gen_reg_rtx (Pmode));
1379 
1380 	      operands[1] = legitimize_pic_address (op, temp,
1381 						    pic_offset_table_rtx);
1382 	    }
1383 	}
1384       else if (reload_completed
1385 	       && !sdata_symbolic_operand (op, SImode))
1386 	{
1387 	  emit_insn (gen_movsi_high (dest, op));
1388 	  emit_insn (gen_movsi_lo_sum (dest, dest, op));
1389 	  return true;
1390 	}
1391     }
1392   return false;
1393 }
1394 
1395 /* This function is called when we're about to expand an integer compare
1396    operation which performs COMPARISON.  It examines the second operand,
1397    and if it is an integer constant that cannot be used directly on the
1398    current machine in a comparison insn, it returns true.  */
1399 bool
c6x_force_op_for_comparison_p(enum rtx_code code,rtx op)1400 c6x_force_op_for_comparison_p (enum rtx_code code, rtx op)
1401 {
1402   if (!CONST_INT_P (op) || satisfies_constraint_Iu4 (op))
1403     return false;
1404 
1405   if ((code == EQ || code == LT || code == GT)
1406        && !satisfies_constraint_Is5 (op))
1407     return true;
1408   if ((code == GTU || code == LTU)
1409       && (!TARGET_INSNS_64 || !satisfies_constraint_Iu5 (op)))
1410     return true;
1411 
1412   return false;
1413 }
1414 
1415 /* Emit comparison instruction if necessary, returning the expression
1416    that holds the compare result in the proper mode.  Return the comparison
1417    that should be used in the jump insn.  */
1418 
1419 rtx
c6x_expand_compare(rtx comparison,machine_mode mode)1420 c6x_expand_compare (rtx comparison, machine_mode mode)
1421 {
1422   enum rtx_code code = GET_CODE (comparison);
1423   rtx op0 = XEXP (comparison, 0);
1424   rtx op1 = XEXP (comparison, 1);
1425   rtx cmp;
1426   enum rtx_code jump_code = code;
1427   machine_mode op_mode = GET_MODE (op0);
1428 
1429   if (op_mode == DImode && (code == NE || code == EQ) && op1 == const0_rtx)
1430     {
1431       rtx t = gen_reg_rtx (SImode);
1432       emit_insn (gen_iorsi3 (t, gen_lowpart (SImode, op0),
1433 			     gen_highpart (SImode, op0)));
1434       op_mode = SImode;
1435       cmp = t;
1436     }
1437   else if (op_mode == DImode)
1438     {
1439       rtx lo[2], high[2];
1440       rtx cmp1, cmp2;
1441 
1442       if (code == NE || code == GEU || code == LEU || code == GE || code == LE)
1443 	{
1444 	  code = reverse_condition (code);
1445 	  jump_code = EQ;
1446 	}
1447       else
1448 	jump_code = NE;
1449 
1450       split_di (&op0, 1, lo, high);
1451       split_di (&op1, 1, lo + 1, high + 1);
1452 
1453       if (c6x_force_op_for_comparison_p (code, high[1])
1454 	  || c6x_force_op_for_comparison_p (EQ, high[1]))
1455 	high[1] = force_reg (SImode, high[1]);
1456 
1457       cmp1 = gen_reg_rtx (SImode);
1458       cmp2 = gen_reg_rtx (SImode);
1459       emit_insn (gen_rtx_SET (cmp1, gen_rtx_fmt_ee (code, SImode,
1460 						    high[0], high[1])));
1461       if (code == EQ)
1462 	{
1463 	  if (c6x_force_op_for_comparison_p (code, lo[1]))
1464 	    lo[1] = force_reg (SImode, lo[1]);
1465 	  emit_insn (gen_rtx_SET (cmp2, gen_rtx_fmt_ee (code, SImode,
1466 							lo[0], lo[1])));
1467 	  emit_insn (gen_andsi3 (cmp1, cmp1, cmp2));
1468 	}
1469       else
1470 	{
1471 	  emit_insn (gen_rtx_SET (cmp2, gen_rtx_EQ (SImode, high[0],
1472 						    high[1])));
1473 	  if (code == GT)
1474 	    code = GTU;
1475 	  else if (code == LT)
1476 	    code = LTU;
1477 	  if (c6x_force_op_for_comparison_p (code, lo[1]))
1478 	    lo[1] = force_reg (SImode, lo[1]);
1479 	  emit_insn (gen_cmpsi_and (cmp2, gen_rtx_fmt_ee (code, SImode,
1480 							  lo[0], lo[1]),
1481 				    lo[0], lo[1], cmp2));
1482 	  emit_insn (gen_iorsi3 (cmp1, cmp1, cmp2));
1483 	}
1484       cmp = cmp1;
1485     }
1486   else if (TARGET_FP && !flag_finite_math_only
1487 	   && (op_mode == DFmode || op_mode == SFmode)
1488 	   && code != EQ && code != NE && code != LT && code != GT
1489 	   && code != UNLE && code != UNGE)
1490     {
1491       enum rtx_code code1, code2, code3;
1492       rtx (*fn) (rtx, rtx, rtx, rtx, rtx);
1493 
1494       jump_code = NE;
1495       code3 = UNKNOWN;
1496       switch (code)
1497 	{
1498 	case UNLT:
1499 	case UNGT:
1500 	  jump_code = EQ;
1501 	  /* fall through */
1502 	case LE:
1503 	case GE:
1504 	  code1 = code == LE || code == UNGT ? LT : GT;
1505 	  code2 = EQ;
1506 	  break;
1507 
1508 	case UNORDERED:
1509 	  jump_code = EQ;
1510 	  /* fall through */
1511 	case ORDERED:
1512 	  code3 = EQ;
1513 	  /* fall through */
1514 	case LTGT:
1515 	  code1 = LT;
1516 	  code2 = GT;
1517 	  break;
1518 
1519 	case UNEQ:
1520 	  code1 = LT;
1521 	  code2 = GT;
1522 	  jump_code = EQ;
1523 	  break;
1524 
1525 	default:
1526 	  gcc_unreachable ();
1527 	}
1528 
1529       cmp = gen_reg_rtx (SImode);
1530       emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code1, SImode, op0, op1)));
1531       fn = op_mode == DFmode ? gen_cmpdf_ior : gen_cmpsf_ior;
1532       emit_insn (fn (cmp, gen_rtx_fmt_ee (code2, SImode, op0, op1),
1533 		     op0, op1, cmp));
1534       if (code3 != UNKNOWN)
1535 	emit_insn (fn (cmp, gen_rtx_fmt_ee (code3, SImode, op0, op1),
1536 		       op0, op1, cmp));
1537     }
1538   else if (op_mode == SImode && (code == NE || code == EQ) && op1 == const0_rtx)
1539     cmp = op0;
1540   else
1541     {
1542       bool is_fp_libfunc;
1543       is_fp_libfunc = !TARGET_FP && (op_mode == DFmode || op_mode == SFmode);
1544 
1545       if ((code == NE || code == GEU || code == LEU || code == GE || code == LE)
1546 	  && !is_fp_libfunc)
1547 	{
1548 	  code = reverse_condition (code);
1549 	  jump_code = EQ;
1550 	}
1551       else if (code == UNGE)
1552 	{
1553 	  code = LT;
1554 	  jump_code = EQ;
1555 	}
1556       else if (code == UNLE)
1557 	{
1558 	  code = GT;
1559 	  jump_code = EQ;
1560 	}
1561       else
1562 	jump_code = NE;
1563 
1564       if (is_fp_libfunc)
1565 	{
1566 	  rtx_insn *insns;
1567 	  rtx libfunc;
1568 	  switch (code)
1569 	    {
1570 	    case EQ:
1571 	      libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc;
1572 	      break;
1573 	    case NE:
1574 	      libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc;
1575 	      break;
1576 	    case GT:
1577 	      libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc;
1578 	      break;
1579 	    case GE:
1580 	      libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc;
1581 	      break;
1582 	    case LT:
1583 	      libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc;
1584 	      break;
1585 	    case LE:
1586 	      libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc;
1587 	      break;
1588 	    default:
1589 	      gcc_unreachable ();
1590 	    }
1591 	  start_sequence ();
1592 
1593 	  cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode, 2,
1594 					 op0, op_mode, op1, op_mode);
1595 	  insns = get_insns ();
1596 	  end_sequence ();
1597 
1598 	  emit_libcall_block (insns, cmp, cmp,
1599 			      gen_rtx_fmt_ee (code, SImode, op0, op1));
1600 	}
1601       else
1602 	{
1603 	  cmp = gen_reg_rtx (SImode);
1604 	  if (c6x_force_op_for_comparison_p (code, op1))
1605 	    op1 = force_reg (SImode, op1);
1606 	  emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, SImode,
1607 						       op0, op1)));
1608 	}
1609     }
1610 
1611   return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx);
1612 }
1613 
1614 /* Return one word of double-word value OP.  HIGH_P is true to select the
1615    high part, false to select the low part.  When encountering auto-increment
1616    addressing, we make the assumption that the low part is going to be accessed
1617    first.  */
1618 
1619 rtx
c6x_subword(rtx op,bool high_p)1620 c6x_subword (rtx op, bool high_p)
1621 {
1622   unsigned int byte;
1623   machine_mode mode;
1624 
1625   mode = GET_MODE (op);
1626   if (mode == VOIDmode)
1627     mode = DImode;
1628 
1629   if (TARGET_BIG_ENDIAN ? !high_p : high_p)
1630     byte = UNITS_PER_WORD;
1631   else
1632     byte = 0;
1633 
1634   if (MEM_P (op))
1635     {
1636       rtx addr = XEXP (op, 0);
1637       if (GET_CODE (addr) == PLUS || REG_P (addr))
1638 	return adjust_address (op, word_mode, byte);
1639       /* FIXME: should really support autoincrement addressing for
1640 	 multi-word modes.  */
1641       gcc_unreachable ();
1642     }
1643 
1644   return simplify_gen_subreg (word_mode, op, mode, byte);
1645 }
1646 
1647 /* Split one or more DImode RTL references into pairs of SImode
1648    references.  The RTL can be REG, offsettable MEM, integer constant, or
1649    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
1650    split and "num" is its length.  lo_half and hi_half are output arrays
1651    that parallel "operands".  */
1652 
1653 void
split_di(rtx operands[],int num,rtx lo_half[],rtx hi_half[])1654 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
1655 {
1656   while (num--)
1657     {
1658       rtx op = operands[num];
1659 
1660       lo_half[num] = c6x_subword (op, false);
1661       hi_half[num] = c6x_subword (op, true);
1662     }
1663 }
1664 
1665 /* Return true if VAL is a mask valid for a clr instruction.  */
1666 bool
c6x_valid_mask_p(HOST_WIDE_INT val)1667 c6x_valid_mask_p (HOST_WIDE_INT val)
1668 {
1669   int i;
1670   for (i = 0; i < 32; i++)
1671     if (!(val & ((unsigned HOST_WIDE_INT)1 << i)))
1672       break;
1673   for (; i < 32; i++)
1674     if (val & ((unsigned HOST_WIDE_INT)1 << i))
1675       break;
1676   for (; i < 32; i++)
1677     if (!(val & ((unsigned HOST_WIDE_INT)1 << i)))
1678       return false;
1679   return true;
1680 }
1681 
1682 /* Expand a block move for a movmemM pattern.  */
1683 
1684 bool
c6x_expand_movmem(rtx dst,rtx src,rtx count_exp,rtx align_exp,rtx expected_align_exp ATTRIBUTE_UNUSED,rtx expected_size_exp ATTRIBUTE_UNUSED)1685 c6x_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
1686 		   rtx expected_align_exp ATTRIBUTE_UNUSED,
1687 		   rtx expected_size_exp ATTRIBUTE_UNUSED)
1688 {
1689   unsigned HOST_WIDE_INT align = 1;
1690   unsigned HOST_WIDE_INT src_mem_align, dst_mem_align, min_mem_align;
1691   unsigned HOST_WIDE_INT count = 0, offset = 0;
1692   unsigned int biggest_move = TARGET_STDW ? 8 : 4;
1693 
1694   if (CONST_INT_P (align_exp))
1695     align = INTVAL (align_exp);
1696 
1697   src_mem_align = MEM_ALIGN (src) / BITS_PER_UNIT;
1698   dst_mem_align = MEM_ALIGN (dst) / BITS_PER_UNIT;
1699   min_mem_align = MIN (src_mem_align, dst_mem_align);
1700 
1701   if (min_mem_align > align)
1702     align = min_mem_align / BITS_PER_UNIT;
1703   if (src_mem_align < align)
1704     src_mem_align = align;
1705   if (dst_mem_align < align)
1706     dst_mem_align = align;
1707 
1708   if (CONST_INT_P (count_exp))
1709     count = INTVAL (count_exp);
1710   else
1711     return false;
1712 
1713   /* Make sure we don't need to care about overflow later on.  */
1714   if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
1715     return false;
1716 
1717   if (count >= 28 && (count & 3) == 0 && align >= 4)
1718     {
1719       tree dst_expr = MEM_EXPR (dst);
1720       tree src_expr = MEM_EXPR (src);
1721       rtx fn = TARGET_INSNS_64PLUS ? strasgi64p_libfunc : strasgi_libfunc;
1722       rtx srcreg = force_reg (Pmode, XEXP (src, 0));
1723       rtx dstreg = force_reg (Pmode, XEXP (dst, 0));
1724 
1725       if (src_expr)
1726 	mark_addressable (src_expr);
1727       if (dst_expr)
1728 	mark_addressable (dst_expr);
1729       emit_library_call (fn, LCT_NORMAL, VOIDmode, 3,
1730 			 dstreg, Pmode, srcreg, Pmode, count_exp, SImode);
1731       return true;
1732     }
1733 
1734   if (biggest_move > align && !TARGET_INSNS_64)
1735     biggest_move = align;
1736 
1737   if (count / biggest_move > 7)
1738     return false;
1739 
1740   while (count > 0)
1741     {
1742       rtx reg, reg_lowpart;
1743       machine_mode srcmode, dstmode;
1744       unsigned HOST_WIDE_INT src_size, dst_size, src_left;
1745       int shift;
1746       rtx srcmem, dstmem;
1747 
1748       while (biggest_move > count)
1749 	biggest_move /= 2;
1750 
1751       src_size = dst_size = biggest_move;
1752       if (src_size > src_mem_align && src_size == 2)
1753 	src_size = 1;
1754       if (dst_size > dst_mem_align && dst_size == 2)
1755 	dst_size = 1;
1756 
1757       if (dst_size > src_size)
1758 	dst_size = src_size;
1759 
1760       srcmode = mode_for_size (src_size * BITS_PER_UNIT, MODE_INT, 0);
1761       dstmode = mode_for_size (dst_size * BITS_PER_UNIT, MODE_INT, 0);
1762       if (src_size >= 4)
1763 	reg_lowpart = reg = gen_reg_rtx (srcmode);
1764       else
1765 	{
1766 	  reg = gen_reg_rtx (SImode);
1767 	  reg_lowpart = gen_lowpart (srcmode, reg);
1768 	}
1769 
1770       srcmem = adjust_address (copy_rtx (src), srcmode, offset);
1771 
1772       if (src_size > src_mem_align)
1773 	{
1774 	  enum insn_code icode = (srcmode == SImode ? CODE_FOR_movmisalignsi
1775 				  : CODE_FOR_movmisaligndi);
1776 	  emit_insn (GEN_FCN (icode) (reg_lowpart, srcmem));
1777 	}
1778       else
1779 	emit_move_insn (reg_lowpart, srcmem);
1780 
1781       src_left = src_size;
1782       shift = TARGET_BIG_ENDIAN ? (src_size - dst_size) * BITS_PER_UNIT  : 0;
1783       while (src_left > 0)
1784 	{
1785 	  rtx dstreg = reg_lowpart;
1786 
1787 	  if (src_size > dst_size)
1788 	    {
1789 	      rtx srcword = reg;
1790 	      int shift_amount = shift & (BITS_PER_WORD - 1);
1791 	      if (src_size > 4)
1792 		srcword = operand_subword_force (srcword, src_left >= 4 ? 0 : 4,
1793 						 SImode);
1794 	      if (shift_amount > 0)
1795 		{
1796 		  dstreg = gen_reg_rtx (SImode);
1797 		  emit_insn (gen_lshrsi3 (dstreg, srcword,
1798 					  GEN_INT (shift_amount)));
1799 		}
1800 	      else
1801 		dstreg = srcword;
1802 	      dstreg = gen_lowpart (dstmode, dstreg);
1803 	    }
1804 
1805 	  dstmem = adjust_address (copy_rtx (dst), dstmode, offset);
1806 	  if (dst_size > dst_mem_align)
1807 	    {
1808 	      enum insn_code icode = (dstmode == SImode ? CODE_FOR_movmisalignsi
1809 				      : CODE_FOR_movmisaligndi);
1810 	      emit_insn (GEN_FCN (icode) (dstmem, dstreg));
1811 	    }
1812 	  else
1813 	    emit_move_insn (dstmem, dstreg);
1814 
1815 	  if (TARGET_BIG_ENDIAN)
1816 	    shift -= dst_size * BITS_PER_UNIT;
1817 	  else
1818 	    shift += dst_size * BITS_PER_UNIT;
1819 	  offset += dst_size;
1820 	  src_left -= dst_size;
1821 	}
1822       count -= src_size;
1823     }
1824   return true;
1825 }
1826 
1827 /* Subroutine of print_address_operand, print a single address offset OFF for
1828    a memory access of mode MEM_MODE, choosing between normal form and scaled
1829    form depending on the type of the insn.  Misaligned memory references must
1830    use the scaled form.  */
1831 
1832 static void
print_address_offset(FILE * file,rtx off,machine_mode mem_mode)1833 print_address_offset (FILE *file, rtx off, machine_mode mem_mode)
1834 {
1835   rtx pat;
1836 
1837   if (c6x_current_insn != NULL_RTX)
1838     {
1839       pat = PATTERN (c6x_current_insn);
1840       if (GET_CODE (pat) == COND_EXEC)
1841 	pat = COND_EXEC_CODE (pat);
1842       if (GET_CODE (pat) == PARALLEL)
1843 	pat = XVECEXP (pat, 0, 0);
1844 
1845       if (GET_CODE (pat) == SET
1846 	  && GET_CODE (SET_SRC (pat)) == UNSPEC
1847 	  && XINT (SET_SRC (pat), 1) == UNSPEC_MISALIGNED_ACCESS)
1848 	{
1849 	  gcc_assert (CONST_INT_P (off)
1850 		      && (INTVAL (off) & (GET_MODE_SIZE (mem_mode) - 1)) == 0);
1851 	  fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC "]",
1852 		   INTVAL (off) / GET_MODE_SIZE (mem_mode));
1853 	  return;
1854 	}
1855     }
1856   fputs ("(", file);
1857   output_address (mem_mode, off);
1858   fputs (")", file);
1859 }
1860 
1861 static bool
c6x_print_operand_punct_valid_p(unsigned char c)1862 c6x_print_operand_punct_valid_p (unsigned char c)
1863 {
1864   return c == '$' || c == '.' || c == '|';
1865 }
1866 
1867 static void c6x_print_operand (FILE *, rtx, int);
1868 
1869 /* Subroutine of c6x_print_operand; used to print a memory reference X to FILE.  */
1870 
1871 static void
c6x_print_address_operand(FILE * file,rtx x,machine_mode mem_mode)1872 c6x_print_address_operand (FILE *file, rtx x, machine_mode mem_mode)
1873 {
1874   rtx off;
1875   switch (GET_CODE (x))
1876     {
1877     case PRE_MODIFY:
1878     case POST_MODIFY:
1879       if (GET_CODE (x) == POST_MODIFY)
1880 	output_address (mem_mode, XEXP (x, 0));
1881       off = XEXP (XEXP (x, 1), 1);
1882       if (XEXP (x, 0) == stack_pointer_rtx)
1883 	{
1884 	  if (GET_CODE (x) == PRE_MODIFY)
1885 	    gcc_assert (INTVAL (off) > 0);
1886 	  else
1887 	    gcc_assert (INTVAL (off) < 0);
1888 	}
1889       if (CONST_INT_P (off) && INTVAL (off) < 0)
1890 	{
1891 	  fprintf (file, "--");
1892 	  off = GEN_INT (-INTVAL (off));
1893 	}
1894       else
1895 	fprintf (file, "++");
1896       if (GET_CODE (x) == PRE_MODIFY)
1897 	output_address (mem_mode, XEXP (x, 0));
1898       print_address_offset (file, off, mem_mode);
1899       break;
1900 
1901     case PLUS:
1902       off = XEXP (x, 1);
1903       if (CONST_INT_P (off) && INTVAL (off) < 0)
1904 	{
1905 	  fprintf (file, "-");
1906 	  off = GEN_INT (-INTVAL (off));
1907 	}
1908       else
1909 	fprintf (file, "+");
1910       output_address (mem_mode, XEXP (x, 0));
1911       print_address_offset (file, off, mem_mode);
1912       break;
1913 
1914     case PRE_DEC:
1915       gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
1916       fprintf (file, "--");
1917       output_address (mem_mode, XEXP (x, 0));
1918       fprintf (file, "[1]");
1919       break;
1920     case PRE_INC:
1921       fprintf (file, "++");
1922       output_address (mem_mode, XEXP (x, 0));
1923       fprintf (file, "[1]");
1924       break;
1925     case POST_INC:
1926       gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
1927       output_address (mem_mode, XEXP (x, 0));
1928       fprintf (file, "++[1]");
1929       break;
1930     case POST_DEC:
1931       output_address (mem_mode, XEXP (x, 0));
1932       fprintf (file, "--[1]");
1933       break;
1934 
1935     case SYMBOL_REF:
1936     case CONST:
1937     case LABEL_REF:
1938       gcc_assert (sdata_symbolic_operand (x, Pmode));
1939       fprintf (file, "+B14(");
1940       output_addr_const (file, x);
1941       fprintf (file, ")");
1942       break;
1943 
1944     case UNSPEC:
1945       switch (XINT (x, 1))
1946 	{
1947 	case UNSPEC_LOAD_GOT:
1948 	  fputs ("$GOT(", file);
1949 	  output_addr_const (file, XVECEXP (x, 0, 0));
1950 	  fputs (")", file);
1951 	  break;
1952 	case UNSPEC_LOAD_SDATA:
1953 	  output_addr_const (file, XVECEXP (x, 0, 0));
1954 	  break;
1955 	default:
1956 	  gcc_unreachable ();
1957 	}
1958       break;
1959 
1960     default:
1961       gcc_assert (GET_CODE (x) != MEM);
1962       c6x_print_operand (file, x, 0);
1963       break;
1964     }
1965 }
1966 
1967 /* Return a single character, which is either 'l', 's', 'd' or 'm', which
1968    specifies the functional unit used by INSN.  */
1969 
1970 char
c6x_get_unit_specifier(rtx_insn * insn)1971 c6x_get_unit_specifier (rtx_insn *insn)
1972 {
1973   enum attr_units units;
1974 
1975   if (insn_info.exists ())
1976     {
1977       int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation;
1978       return c6x_unit_names[unit][0];
1979     }
1980 
1981   units = get_attr_units (insn);
1982   switch (units)
1983     {
1984     case UNITS_D:
1985     case UNITS_DL:
1986     case UNITS_DS:
1987     case UNITS_DLS:
1988     case UNITS_D_ADDR:
1989       return 'd';
1990       break;
1991     case UNITS_L:
1992     case UNITS_LS:
1993       return 'l';
1994       break;
1995     case UNITS_S:
1996       return 's';
1997       break;
1998     case UNITS_M:
1999       return 'm';
2000       break;
2001     default:
2002       gcc_unreachable ();
2003     }
2004 }
2005 
2006 /* Prints the unit specifier field.  */
2007 static void
c6x_print_unit_specifier_field(FILE * file,rtx_insn * insn)2008 c6x_print_unit_specifier_field (FILE *file, rtx_insn *insn)
2009 {
2010   enum attr_units units = get_attr_units (insn);
2011   enum attr_cross cross = get_attr_cross (insn);
2012   enum attr_dest_regfile rf = get_attr_dest_regfile (insn);
2013   int half;
2014   char unitspec;
2015 
2016   if (units == UNITS_D_ADDR)
2017     {
2018       enum attr_addr_regfile arf = get_attr_addr_regfile (insn);
2019       int t_half;
2020       gcc_assert (arf != ADDR_REGFILE_UNKNOWN);
2021       half = arf == ADDR_REGFILE_A ? 1 : 2;
2022       t_half = rf == DEST_REGFILE_A ? 1 : 2;
2023       fprintf (file, ".d%dt%d", half, t_half);
2024       return;
2025     }
2026 
2027   if (insn_info.exists ())
2028     {
2029       int unit = INSN_INFO_ENTRY (INSN_UID (insn)).reservation;
2030       fputs (".", file);
2031       fputs (c6x_unit_names[unit], file);
2032       if (cross == CROSS_Y)
2033 	fputs ("x", file);
2034       return;
2035     }
2036 
2037   gcc_assert (rf != DEST_REGFILE_UNKNOWN);
2038   unitspec = c6x_get_unit_specifier (insn);
2039   half = rf == DEST_REGFILE_A ? 1 : 2;
2040   fprintf (file, ".%c%d%s", unitspec, half, cross == CROSS_Y ? "x" : "");
2041 }
2042 
2043 /* Output assembly language output for the address ADDR to FILE.  */
2044 static void
c6x_print_operand_address(FILE * file,machine_mode mode,rtx addr)2045 c6x_print_operand_address (FILE *file, machine_mode mode, rtx addr)
2046 {
2047   c6x_print_address_operand (file, addr, mode);
2048 }
2049 
2050 /* Print an operand, X, to FILE, with an optional modifier in CODE.
2051 
2052    Meaning of CODE:
2053    $ -- print the unit specifier field for the instruction.
2054    . -- print the predicate for the instruction or an emptry string for an
2055         unconditional one.
2056    | -- print "||" if the insn should be issued in parallel with the previous
2057         one.
2058 
2059    C -- print an opcode suffix for a reversed condition
2060    d -- H, W or D as a suffix for ADDA, based on the factor given by the
2061         operand
2062    D -- print either B, H, W or D as a suffix for ADDA, based on the size of
2063         the operand
2064    J -- print a predicate
2065    j -- like J, but use reverse predicate
2066    k -- treat a CONST_INT as a register number and print it as a register
2067    k -- like k, but print out a doubleword register
2068    n -- print an integer operand, negated
2069    p -- print the low part of a DImode register
2070    P -- print the high part of a DImode register
2071    r -- print the absolute value of an integer operand, shifted right by 1
2072    R -- print the absolute value of an integer operand, shifted right by 2
2073    f -- the first clear bit in an integer operand assumed to be a mask for
2074         a clr instruction
2075    F -- the last clear bit in such a mask
2076    s -- the first set bit in an integer operand assumed to be a mask for
2077         a set instruction
2078    S -- the last set bit in such a mask
2079    U -- print either 1 or 2, depending on the side of the machine used by
2080         the operand  */
2081 
2082 static void
c6x_print_operand(FILE * file,rtx x,int code)2083 c6x_print_operand (FILE *file, rtx x, int code)
2084 {
2085   int i;
2086   HOST_WIDE_INT v;
2087   tree t;
2088   machine_mode mode;
2089 
2090   if (code == '|')
2091     {
2092       if (GET_MODE (c6x_current_insn) != TImode)
2093 	fputs ("||", file);
2094       return;
2095     }
2096   if (code == '$')
2097     {
2098       c6x_print_unit_specifier_field (file, c6x_current_insn);
2099       return;
2100     }
2101 
2102   if (code == '.')
2103     {
2104       x = current_insn_predicate;
2105       if (x)
2106 	{
2107 	  unsigned int regno = REGNO (XEXP (x, 0));
2108 	  fputs ("[", file);
2109  	  if (GET_CODE (x) == EQ)
2110 	    fputs ("!", file);
2111 	  fputs (reg_names [regno], file);
2112 	  fputs ("]", file);
2113 	}
2114       return;
2115     }
2116 
2117   mode = GET_MODE (x);
2118 
2119   switch (code)
2120     {
2121     case 'C':
2122     case 'c':
2123       {
2124 	enum rtx_code c = GET_CODE (x);
2125 	if (code == 'C')
2126 	  c = swap_condition (c);
2127 	fputs (GET_RTX_NAME (c), file);
2128       }
2129       return;
2130 
2131     case 'J':
2132     case 'j':
2133       {
2134 	unsigned int regno = REGNO (XEXP (x, 0));
2135 	if ((GET_CODE (x) == EQ) == (code == 'J'))
2136 	  fputs ("!", file);
2137         fputs (reg_names [regno], file);
2138       }
2139       return;
2140 
2141     case 'k':
2142       gcc_assert (GET_CODE (x) == CONST_INT);
2143       v = INTVAL (x);
2144       fprintf (file, "%s", reg_names[v]);
2145       return;
2146     case 'K':
2147       gcc_assert (GET_CODE (x) == CONST_INT);
2148       v = INTVAL (x);
2149       gcc_assert ((v & 1) == 0);
2150       fprintf (file, "%s:%s", reg_names[v + 1], reg_names[v]);
2151       return;
2152 
2153     case 's':
2154     case 'S':
2155     case 'f':
2156     case 'F':
2157       gcc_assert (GET_CODE (x) == CONST_INT);
2158       v = INTVAL (x);
2159       for (i = 0; i < 32; i++)
2160 	{
2161 	  HOST_WIDE_INT tst = v & 1;
2162 	  if (((code == 'f' || code == 'F') && !tst)
2163 	      || ((code == 's' || code == 'S') && tst))
2164 	    break;
2165 	  v >>= 1;
2166 	}
2167       if (code == 'f' || code == 's')
2168 	{
2169 	  fprintf (file, "%d", i);
2170 	  return;
2171 	}
2172       for (;i < 32; i++)
2173 	{
2174 	  HOST_WIDE_INT tst = v & 1;
2175 	  if ((code == 'F' && tst) || (code == 'S' && !tst))
2176 	    break;
2177 	  v >>= 1;
2178 	}
2179       fprintf (file, "%d", i - 1);
2180       return;
2181 
2182     case 'n':
2183       gcc_assert (GET_CODE (x) == CONST_INT);
2184       output_addr_const (file, GEN_INT (-INTVAL (x)));
2185       return;
2186 
2187     case 'r':
2188       gcc_assert (GET_CODE (x) == CONST_INT);
2189       v = INTVAL (x);
2190       if (v < 0)
2191 	v = -v;
2192       output_addr_const (file, GEN_INT (v >> 1));
2193       return;
2194 
2195     case 'R':
2196       gcc_assert (GET_CODE (x) == CONST_INT);
2197       v = INTVAL (x);
2198       if (v < 0)
2199 	v = -v;
2200       output_addr_const (file, GEN_INT (v >> 2));
2201       return;
2202 
2203     case 'd':
2204       gcc_assert (GET_CODE (x) == CONST_INT);
2205       v = INTVAL (x);
2206       fputs (v == 2 ? "h" : v == 4 ? "w" : "d", file);
2207       return;
2208 
2209     case 'p':
2210     case 'P':
2211       gcc_assert (GET_CODE (x) == REG);
2212       v = REGNO (x);
2213       if (code == 'P')
2214 	v++;
2215       fputs (reg_names[v], file);
2216       return;
2217 
2218     case 'D':
2219       v = 0;
2220       if (GET_CODE (x) == CONST)
2221 	{
2222 	  x = XEXP (x, 0);
2223 	  gcc_assert (GET_CODE (x) == PLUS);
2224 	  gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
2225 	  v = INTVAL (XEXP (x, 1));
2226 	  x = XEXP (x, 0);
2227 
2228 	}
2229       gcc_assert (GET_CODE (x) == SYMBOL_REF);
2230 
2231       t = SYMBOL_REF_DECL (x);
2232       if (DECL_P (t))
2233 	v |= DECL_ALIGN_UNIT (t);
2234       else
2235 	v |= TYPE_ALIGN_UNIT (TREE_TYPE (t));
2236       if (v & 1)
2237 	fputs ("b", file);
2238       else if (v & 2)
2239 	fputs ("h", file);
2240       else
2241 	fputs ("w", file);
2242       return;
2243 
2244     case 'U':
2245       if (MEM_P (x))
2246 	{
2247 	  x = XEXP (x, 0);
2248 	  if (GET_CODE (x) == PLUS
2249 	      || GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
2250 	    x = XEXP (x, 0);
2251 	  if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
2252 	    {
2253 	      gcc_assert (sdata_symbolic_operand (x, Pmode));
2254 	      fputs ("2", file);
2255 	      return;
2256 	    }
2257 	}
2258       gcc_assert (REG_P (x));
2259       if (A_REGNO_P (REGNO (x)))
2260 	fputs ("1", file);
2261       if (B_REGNO_P (REGNO (x)))
2262 	fputs ("2", file);
2263       return;
2264 
2265     default:
2266       switch (GET_CODE (x))
2267 	{
2268 	case REG:
2269 	  if (GET_MODE_SIZE (mode) == 8)
2270 	    fprintf (file, "%s:%s", reg_names[REGNO (x) + 1],
2271 		     reg_names[REGNO (x)]);
2272 	  else
2273 	    fprintf (file, "%s", reg_names[REGNO (x)]);
2274 	  break;
2275 
2276 	case MEM:
2277 	  fputc ('*', file);
2278 	  gcc_assert (XEXP (x, 0) != stack_pointer_rtx);
2279 	  c6x_print_address_operand (file, XEXP (x, 0), GET_MODE (x));
2280 	  break;
2281 
2282 	case SYMBOL_REF:
2283 	  fputc ('(', file);
2284 	  output_addr_const (file, x);
2285 	  fputc (')', file);
2286 	  break;
2287 
2288 	case CONST_INT:
2289 	  output_addr_const (file, x);
2290 	  break;
2291 
2292 	case CONST_DOUBLE:
2293 	  output_operand_lossage ("invalid const_double operand");
2294 	  break;
2295 
2296 	default:
2297 	  output_addr_const (file, x);
2298 	}
2299     }
2300 }
2301 
2302 /* Return TRUE if OP is a valid memory address with a base register of
2303    class C.  If SMALL_OFFSET is true, we disallow memory references which would
2304    require a long offset with B14/B15.  */
2305 
2306 bool
c6x_mem_operand(rtx op,enum reg_class c,bool small_offset)2307 c6x_mem_operand (rtx op, enum reg_class c, bool small_offset)
2308 {
2309   machine_mode mode = GET_MODE (op);
2310   rtx base = XEXP (op, 0);
2311   switch (GET_CODE (base))
2312     {
2313     case REG:
2314       break;
2315     case PLUS:
2316       if (small_offset
2317 	  && (XEXP (base, 0) == stack_pointer_rtx
2318 	      || XEXP (base, 0) == pic_offset_table_rtx))
2319 	{
2320 	  if (!c6x_legitimate_address_p_1 (mode, base, true, true))
2321 	    return false;
2322 	}
2323 
2324       /* fall through */
2325     case PRE_INC:
2326     case PRE_DEC:
2327     case PRE_MODIFY:
2328     case POST_INC:
2329     case POST_DEC:
2330     case POST_MODIFY:
2331       base = XEXP (base, 0);
2332       break;
2333 
2334     case CONST:
2335     case LABEL_REF:
2336     case SYMBOL_REF:
2337       gcc_assert (sdata_symbolic_operand (base, Pmode));
2338       return !small_offset && c == B_REGS;
2339 
2340     default:
2341       return false;
2342     }
2343   return TEST_HARD_REG_BIT (reg_class_contents[ (int) (c)], REGNO (base));
2344 }
2345 
2346 /* Returns true if X is a valid address for use in a memory reference
2347    of mode MODE.  If STRICT is true, we do not allow pseudo registers
2348    in the address.  NO_LARGE_OFFSET is true if we are examining an
2349    address for use in a load or store misaligned instruction, or
2350    recursively examining an operand inside a PRE/POST_MODIFY.  */
2351 
2352 bool
c6x_legitimate_address_p_1(machine_mode mode,rtx x,bool strict,bool no_large_offset)2353 c6x_legitimate_address_p_1 (machine_mode mode, rtx x, bool strict,
2354 			    bool no_large_offset)
2355 {
2356   int size, size1;
2357   HOST_WIDE_INT off;
2358   enum rtx_code code = GET_CODE (x);
2359 
2360   switch (code)
2361     {
2362     case PRE_MODIFY:
2363     case POST_MODIFY:
2364       /* We can't split these into word-sized pieces yet.  */
2365       if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
2366 	return false;
2367       if (GET_CODE (XEXP (x, 1)) != PLUS)
2368 	return false;
2369       if (!c6x_legitimate_address_p_1 (mode, XEXP (x, 1), strict, true))
2370 	return false;
2371       if (!rtx_equal_p (XEXP (x, 0), XEXP (XEXP (x, 1), 0)))
2372 	return false;
2373 
2374       /* fall through */
2375     case PRE_INC:
2376     case PRE_DEC:
2377     case POST_INC:
2378     case POST_DEC:
2379       /* We can't split these into word-sized pieces yet.  */
2380       if (!TARGET_STDW && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
2381 	return false;
2382       x = XEXP (x, 0);
2383       if (!REG_P (x))
2384 	return false;
2385 
2386       /* fall through */
2387     case REG:
2388       if (strict)
2389 	return REGNO_OK_FOR_BASE_STRICT_P (REGNO (x));
2390       else
2391 	return REGNO_OK_FOR_BASE_NONSTRICT_P (REGNO (x));
2392 
2393     case PLUS:
2394       if (!REG_P (XEXP (x, 0))
2395 	  || !c6x_legitimate_address_p_1 (mode, XEXP (x, 0), strict, false))
2396 	return false;
2397       /* We cannot ensure currently that both registers end up in the
2398 	 same register file.  */
2399       if (REG_P (XEXP (x, 1)))
2400 	return false;
2401 
2402       if (mode == BLKmode)
2403 	size = 4;
2404       else if (mode == VOIDmode)
2405 	/* ??? This can happen during ivopts.  */
2406 	size = 1;
2407       else
2408 	size = GET_MODE_SIZE (mode);
2409 
2410       if (flag_pic
2411 	  && GET_CODE (XEXP (x, 1)) == UNSPEC
2412 	  && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_SDATA
2413 	  && XEXP (x, 0) == pic_offset_table_rtx
2414 	  && sdata_symbolic_operand (XVECEXP (XEXP (x, 1), 0, 0), SImode))
2415 	return !no_large_offset && size <= 4;
2416       if (flag_pic == 1
2417 	  && mode == Pmode
2418 	  && GET_CODE (XEXP (x, 1)) == UNSPEC
2419 	  && XINT (XEXP (x, 1), 1) == UNSPEC_LOAD_GOT
2420 	  && XEXP (x, 0) == pic_offset_table_rtx
2421 	  && (GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == SYMBOL_REF
2422 	      || GET_CODE (XVECEXP (XEXP (x, 1), 0, 0)) == LABEL_REF))
2423 	return !no_large_offset;
2424       if (GET_CODE (XEXP (x, 1)) != CONST_INT)
2425 	return false;
2426 
2427       off = INTVAL (XEXP (x, 1));
2428 
2429       /* If the machine does not have doubleword load/stores, we'll use
2430 	 word size accesses.  */
2431       size1 = size;
2432       if (size == 2 * UNITS_PER_WORD && !TARGET_STDW)
2433 	size = UNITS_PER_WORD;
2434 
2435       if (((HOST_WIDE_INT)size1 - 1) & off)
2436 	return false;
2437       off /= size;
2438       if (off > -32 && off < (size1 == size ? 32 : 28))
2439 	return true;
2440       if (no_large_offset || code != PLUS || XEXP (x, 0) != stack_pointer_rtx
2441 	  || size1 > UNITS_PER_WORD)
2442 	return false;
2443       return off >= 0 && off < 32768;
2444 
2445     case CONST:
2446     case SYMBOL_REF:
2447     case LABEL_REF:
2448       return (!no_large_offset
2449 	      /* With -fpic, we must wrap it in an unspec to show the B14
2450 		 dependency.  */
2451 	      && !flag_pic
2452 	      && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
2453 	      && sdata_symbolic_operand (x, Pmode));
2454 
2455     default:
2456       return false;
2457     }
2458 }
2459 
2460 static bool
c6x_legitimate_address_p(machine_mode mode,rtx x,bool strict)2461 c6x_legitimate_address_p (machine_mode mode, rtx x, bool strict)
2462 {
2463   return c6x_legitimate_address_p_1 (mode, x, strict, false);
2464 }
2465 
2466 static bool
c6x_legitimate_constant_p(machine_mode mode ATTRIBUTE_UNUSED,rtx x ATTRIBUTE_UNUSED)2467 c6x_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
2468 			   rtx x ATTRIBUTE_UNUSED)
2469 {
2470   return true;
2471 }
2472 
2473 /* Implements TARGET_PREFERRED_RENAME_CLASS.  */
2474 static reg_class_t
c6x_preferred_rename_class(reg_class_t cl)2475 c6x_preferred_rename_class (reg_class_t cl)
2476 {
2477   if (cl == A_REGS)
2478     return NONPREDICATE_A_REGS;
2479   if (cl == B_REGS)
2480     return NONPREDICATE_B_REGS;
2481   if (cl == ALL_REGS || cl == GENERAL_REGS)
2482     return NONPREDICATE_REGS;
2483   return NO_REGS;
2484 }
2485 
2486 /* Implements FINAL_PRESCAN_INSN.  */
2487 void
c6x_final_prescan_insn(rtx_insn * insn,rtx * opvec ATTRIBUTE_UNUSED,int noperands ATTRIBUTE_UNUSED)2488 c6x_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
2489 			int noperands ATTRIBUTE_UNUSED)
2490 {
2491   c6x_current_insn = insn;
2492 }
2493 
2494 /* A structure to describe the stack layout of a function.  The layout is
2495    as follows:
2496 
2497    [saved frame pointer (or possibly padding0)]
2498    --> incoming stack pointer, new hard frame pointer
2499    [saved call-used regs]
2500    [optional padding1]
2501    --> soft frame pointer
2502    [frame]
2503    [outgoing arguments]
2504    [optional padding2]
2505 
2506   The structure members are laid out in this order.  */
2507 
2508 struct c6x_frame
2509 {
2510   int padding0;
2511   /* Number of registers to save.  */
2512   int nregs;
2513   int padding1;
2514   HOST_WIDE_INT frame;
2515   int outgoing_arguments_size;
2516   int padding2;
2517 
2518   HOST_WIDE_INT to_allocate;
2519   /* The offsets relative to the incoming stack pointer (which
2520      becomes HARD_FRAME_POINTER).  */
2521   HOST_WIDE_INT frame_pointer_offset;
2522   HOST_WIDE_INT b3_offset;
2523 
2524   /* True if we should call push_rts/pop_rts to save and restore
2525      registers.  */
2526   bool push_rts;
2527 };
2528 
2529 /* Return true if we need to save and modify the PIC register in the
2530    prologue.  */
2531 
2532 static bool
must_reload_pic_reg_p(void)2533 must_reload_pic_reg_p (void)
2534 {
2535   struct cgraph_local_info *i = NULL;
2536 
2537   if (!TARGET_DSBT)
2538     return false;
2539 
2540   i = cgraph_node::local_info (current_function_decl);
2541 
2542   if ((crtl->uses_pic_offset_table || !crtl->is_leaf) && !i->local)
2543     return true;
2544   return false;
2545 }
2546 
2547 /* Return 1 if we need to save REGNO.  */
2548 static int
c6x_save_reg(unsigned int regno)2549 c6x_save_reg (unsigned int regno)
2550 {
2551   return ((df_regs_ever_live_p (regno)
2552 	   && !call_used_regs[regno]
2553 	   && !fixed_regs[regno])
2554 	  || (regno == RETURN_ADDR_REGNO
2555 	      && (df_regs_ever_live_p (regno)
2556 		  || !crtl->is_leaf))
2557 	  || (regno == PIC_OFFSET_TABLE_REGNUM && must_reload_pic_reg_p ()));
2558 }
2559 
2560 /* Examine the number of regs NREGS we've determined we must save.
2561    Return true if we should use __c6xabi_push_rts/__c6xabi_pop_rts for
2562    prologue and epilogue.  */
2563 
2564 static bool
use_push_rts_p(int nregs)2565 use_push_rts_p (int nregs)
2566 {
2567   if (TARGET_INSNS_64PLUS && optimize_function_for_size_p (cfun)
2568       && !cfun->machine->contains_sibcall
2569       && !cfun->returns_struct
2570       && !TARGET_LONG_CALLS
2571       && nregs >= 6 && !frame_pointer_needed)
2572     return true;
2573   return false;
2574 }
2575 
2576 /* Return number of saved general prupose registers.  */
2577 
2578 int
c6x_nsaved_regs(void)2579 c6x_nsaved_regs (void)
2580 {
2581   int nregs = 0;
2582   int regno;
2583 
2584   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2585     if (c6x_save_reg (regno))
2586       nregs++;
2587   return nregs;
2588 }
2589 
2590 /* The safe debug order mandated by the ABI.  */
2591 static unsigned reg_save_order[] =
2592 {
2593   REG_A10, REG_A11, REG_A12, REG_A13,
2594   REG_A14, REG_B3,
2595   REG_B10, REG_B11, REG_B12, REG_B13,
2596   REG_B14, REG_A15
2597 };
2598 
2599 #define N_SAVE_ORDER (sizeof reg_save_order / sizeof *reg_save_order)
2600 
2601 /* Compute the layout of the stack frame and store it in FRAME.  */
2602 
2603 static void
c6x_compute_frame_layout(struct c6x_frame * frame)2604 c6x_compute_frame_layout (struct c6x_frame *frame)
2605 {
2606   HOST_WIDE_INT size = get_frame_size ();
2607   HOST_WIDE_INT offset;
2608   int nregs;
2609 
2610   /* We use the four bytes which are technically inside the caller's frame,
2611      usually to save the frame pointer.  */
2612   offset = -4;
2613   frame->padding0 = 0;
2614   nregs = c6x_nsaved_regs ();
2615   frame->push_rts = false;
2616   frame->b3_offset = 0;
2617   if (use_push_rts_p (nregs))
2618     {
2619       frame->push_rts = true;
2620       frame->b3_offset = (TARGET_BIG_ENDIAN ? -12 : -13) * 4;
2621       nregs = 14;
2622     }
2623   else if (c6x_save_reg (REG_B3))
2624     {
2625       int idx;
2626       for (idx = N_SAVE_ORDER - 1; reg_save_order[idx] != REG_B3; idx--)
2627 	{
2628 	  if (c6x_save_reg (reg_save_order[idx]))
2629 	    frame->b3_offset -= 4;
2630 	}
2631     }
2632   frame->nregs = nregs;
2633 
2634   if (size == 0 && nregs == 0)
2635     {
2636       frame->padding0 = 4;
2637       frame->padding1 = frame->padding2 = 0;
2638       frame->frame_pointer_offset = frame->to_allocate = 0;
2639       frame->outgoing_arguments_size = 0;
2640       return;
2641     }
2642 
2643   if (!frame->push_rts)
2644     offset += frame->nregs * 4;
2645 
2646   if (offset == 0 && size == 0 && crtl->outgoing_args_size == 0
2647       && !crtl->is_leaf)
2648     /* Don't use the bottom of the caller's frame if we have no
2649        allocation of our own and call other functions.  */
2650     frame->padding0 = frame->padding1 = 4;
2651   else if (offset & 4)
2652     frame->padding1 = 4;
2653   else
2654     frame->padding1 = 0;
2655 
2656   offset += frame->padding0 + frame->padding1;
2657   frame->frame_pointer_offset = offset;
2658   offset += size;
2659 
2660   frame->outgoing_arguments_size = crtl->outgoing_args_size;
2661   offset += frame->outgoing_arguments_size;
2662 
2663   if ((offset & 4) == 0)
2664     frame->padding2 = 8;
2665   else
2666     frame->padding2 = 4;
2667   frame->to_allocate = offset + frame->padding2;
2668 }
2669 
2670 /* Return the offset between two registers, one to be eliminated, and the other
2671    its replacement, at the start of a routine.  */
2672 
2673 HOST_WIDE_INT
c6x_initial_elimination_offset(int from,int to)2674 c6x_initial_elimination_offset (int from, int to)
2675 {
2676   struct c6x_frame frame;
2677   c6x_compute_frame_layout (&frame);
2678 
2679   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
2680     return 0;
2681   else if (from == FRAME_POINTER_REGNUM
2682 	   && to == HARD_FRAME_POINTER_REGNUM)
2683     return -frame.frame_pointer_offset;
2684   else
2685     {
2686       gcc_assert (to == STACK_POINTER_REGNUM);
2687 
2688       if (from == ARG_POINTER_REGNUM)
2689 	return frame.to_allocate + (frame.push_rts ? 56 : 0);
2690 
2691       gcc_assert (from == FRAME_POINTER_REGNUM);
2692       return frame.to_allocate - frame.frame_pointer_offset;
2693     }
2694 }
2695 
2696 /* Given FROM and TO register numbers, say whether this elimination is
2697    allowed.  Frame pointer elimination is automatically handled.  */
2698 
2699 static bool
c6x_can_eliminate(const int from ATTRIBUTE_UNUSED,const int to)2700 c6x_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2701 {
2702   if (to == STACK_POINTER_REGNUM)
2703     return !frame_pointer_needed;
2704   return true;
2705 }
2706 
2707 /* Emit insns to increment the stack pointer by OFFSET.  If
2708    FRAME_RELATED_P, set the RTX_FRAME_RELATED_P flag on the insns.
2709    Does nothing if the offset is zero.  */
2710 
2711 static void
emit_add_sp_const(HOST_WIDE_INT offset,bool frame_related_p)2712 emit_add_sp_const (HOST_WIDE_INT offset, bool frame_related_p)
2713 {
2714   rtx to_add = GEN_INT (offset);
2715   rtx orig_to_add = to_add;
2716   rtx_insn *insn;
2717 
2718   if (offset == 0)
2719     return;
2720 
2721   if (offset < -32768 || offset > 32767)
2722     {
2723       rtx reg = gen_rtx_REG (SImode, REG_A0);
2724       rtx low = GEN_INT (trunc_int_for_mode (offset, HImode));
2725 
2726       insn = emit_insn (gen_movsi_high (reg, low));
2727       if (frame_related_p)
2728 	RTX_FRAME_RELATED_P (insn) = 1;
2729       insn = emit_insn (gen_movsi_lo_sum (reg, reg, to_add));
2730       if (frame_related_p)
2731 	RTX_FRAME_RELATED_P (insn) = 1;
2732       to_add = reg;
2733     }
2734   insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
2735 				to_add));
2736   if (frame_related_p)
2737     {
2738       if (REG_P (to_add))
2739 	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
2740 		      gen_rtx_SET (stack_pointer_rtx,
2741 				   gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2742 						 orig_to_add)));
2743 
2744       RTX_FRAME_RELATED_P (insn) = 1;
2745     }
2746 }
2747 
2748 /* Prologue and epilogue.  */
2749 void
c6x_expand_prologue(void)2750 c6x_expand_prologue (void)
2751 {
2752   struct c6x_frame frame;
2753   rtx_insn *insn;
2754   rtx mem;
2755   int nsaved = 0;
2756   HOST_WIDE_INT initial_offset, off, added_already;
2757 
2758   c6x_compute_frame_layout (&frame);
2759 
2760   if (flag_stack_usage_info)
2761     current_function_static_stack_size = frame.to_allocate;
2762 
2763   initial_offset = -frame.to_allocate;
2764   if (frame.push_rts)
2765     {
2766       emit_insn (gen_push_rts ());
2767       nsaved = frame.nregs;
2768     }
2769 
2770   /* If the offsets would be too large for the memory references we will
2771      create to save registers, do the stack allocation in two parts.
2772      Ensure by subtracting 8 that we don't store to the word pointed to
2773      by the stack pointer.  */
2774   if (initial_offset < -32768)
2775     initial_offset = -frame.frame_pointer_offset - 8;
2776 
2777   if (frame.to_allocate > 0)
2778     gcc_assert (initial_offset != 0);
2779 
2780   off = -initial_offset + 4 - frame.padding0;
2781 
2782   mem = gen_frame_mem (Pmode, stack_pointer_rtx);
2783 
2784   added_already = 0;
2785   if (frame_pointer_needed)
2786     {
2787       rtx fp_reg = gen_rtx_REG (SImode, REG_A15);
2788       /* We go through some contortions here to both follow the ABI's
2789 	 recommendation that FP == incoming SP, and to avoid writing or
2790 	 reading the word pointed to by the stack pointer.  */
2791       rtx addr = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx,
2792 				      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2793 						    GEN_INT (-8)));
2794       insn = emit_move_insn (gen_frame_mem (Pmode, addr), fp_reg);
2795       RTX_FRAME_RELATED_P (insn) = 1;
2796       nsaved++;
2797       insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, stack_pointer_rtx,
2798 				    GEN_INT (8)));
2799       RTX_FRAME_RELATED_P (insn) = 1;
2800       off -= 4;
2801       added_already = -8;
2802     }
2803 
2804   emit_add_sp_const (initial_offset - added_already, true);
2805 
2806   if (nsaved < frame.nregs)
2807     {
2808       unsigned i;
2809 
2810       for (i = 0; i < N_SAVE_ORDER; i++)
2811 	{
2812 	  int idx = N_SAVE_ORDER - i - 1;
2813 	  unsigned regno = reg_save_order[idx];
2814 	  rtx reg;
2815 	  machine_mode save_mode = SImode;
2816 
2817 	  if (regno == REG_A15 && frame_pointer_needed)
2818 	    /* Already saved.  */
2819 	    continue;
2820 	  if (!c6x_save_reg (regno))
2821 	    continue;
2822 
2823 	  if (TARGET_STDW && (off & 4) == 0 && off <= 256
2824 	      && (regno & 1) == 1
2825 	      && i + 1 < N_SAVE_ORDER
2826 	      && reg_save_order[idx - 1] == regno - 1
2827 	      && c6x_save_reg (regno - 1))
2828 	    {
2829 	      save_mode = DImode;
2830 	      regno--;
2831 	      i++;
2832 	    }
2833 	  reg = gen_rtx_REG (save_mode, regno);
2834 	  off -= GET_MODE_SIZE (save_mode);
2835 
2836 	  insn = emit_move_insn (adjust_address (mem, save_mode, off),
2837 				 reg);
2838 	  RTX_FRAME_RELATED_P (insn) = 1;
2839 
2840 	  nsaved += HARD_REGNO_NREGS (regno, save_mode);
2841 	}
2842     }
2843   gcc_assert (nsaved == frame.nregs);
2844   emit_add_sp_const (-frame.to_allocate - initial_offset, true);
2845   if (must_reload_pic_reg_p ())
2846     {
2847       if (dsbt_decl == NULL)
2848 	{
2849 	  tree t;
2850 
2851 	  t = build_index_type (integer_one_node);
2852 	  t = build_array_type (integer_type_node, t);
2853 	  t = build_decl (BUILTINS_LOCATION, VAR_DECL,
2854 			  get_identifier ("__c6xabi_DSBT_BASE"), t);
2855 	  DECL_ARTIFICIAL (t) = 1;
2856 	  DECL_IGNORED_P (t) = 1;
2857 	  DECL_EXTERNAL (t) = 1;
2858 	  TREE_STATIC (t) = 1;
2859 	  TREE_PUBLIC (t) = 1;
2860 	  TREE_USED (t) = 1;
2861 
2862 	  dsbt_decl = t;
2863 	}
2864       emit_insn (gen_setup_dsbt (pic_offset_table_rtx,
2865 				 XEXP (DECL_RTL (dsbt_decl), 0)));
2866     }
2867 }
2868 
2869 void
c6x_expand_epilogue(bool sibcall)2870 c6x_expand_epilogue (bool sibcall)
2871 {
2872   unsigned i;
2873   struct c6x_frame frame;
2874   rtx mem;
2875   HOST_WIDE_INT off;
2876   int nsaved = 0;
2877 
2878   c6x_compute_frame_layout (&frame);
2879 
2880   mem = gen_frame_mem (Pmode, stack_pointer_rtx);
2881 
2882   /* Insert a dummy set/use of the stack pointer.  This creates a
2883      scheduler barrier between the prologue saves and epilogue restores. */
2884   emit_insn (gen_epilogue_barrier (stack_pointer_rtx, stack_pointer_rtx));
2885 
2886   /* If the offsets would be too large for the memory references we will
2887      create to restore registers, do a preliminary stack adjustment here.  */
2888   off = frame.to_allocate - frame.frame_pointer_offset + frame.padding1;
2889   if (frame.push_rts)
2890     {
2891       nsaved = frame.nregs;
2892     }
2893   else
2894     {
2895       if (frame.to_allocate > 32768)
2896 	{
2897 	  /* Don't add the entire offset so that we leave an unused word
2898 	     above the stack pointer.  */
2899 	  emit_add_sp_const ((off - 16) & ~7, false);
2900 	  off &= 7;
2901 	  off += 16;
2902 	}
2903       for (i = 0; i < N_SAVE_ORDER; i++)
2904 	{
2905 	  unsigned regno = reg_save_order[i];
2906 	  rtx reg;
2907 	  machine_mode save_mode = SImode;
2908 
2909 	  if (!c6x_save_reg (regno))
2910 	    continue;
2911 	  if (regno == REG_A15 && frame_pointer_needed)
2912 	    continue;
2913 
2914 	  if (TARGET_STDW && (off & 4) == 0 && off < 256
2915 	      && (regno & 1) == 0
2916 	      && i + 1 < N_SAVE_ORDER
2917 	      && reg_save_order[i + 1] == regno + 1
2918 	      && c6x_save_reg (regno + 1))
2919 	    {
2920 	      save_mode = DImode;
2921 	      i++;
2922 	    }
2923 	  reg = gen_rtx_REG (save_mode, regno);
2924 
2925 	  emit_move_insn (reg, adjust_address (mem, save_mode, off));
2926 
2927 	  off += GET_MODE_SIZE (save_mode);
2928 	  nsaved += HARD_REGNO_NREGS (regno, save_mode);
2929 	}
2930     }
2931   if (!frame_pointer_needed)
2932     emit_add_sp_const (off + frame.padding0 - 4, false);
2933   else
2934     {
2935       rtx fp_reg = gen_rtx_REG (SImode, REG_A15);
2936       rtx addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
2937 				      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
2938 						    GEN_INT (8)));
2939       emit_insn (gen_addsi3 (stack_pointer_rtx, hard_frame_pointer_rtx,
2940 			     GEN_INT (-8)));
2941       emit_move_insn (fp_reg, gen_frame_mem (Pmode, addr));
2942       nsaved++;
2943     }
2944   gcc_assert (nsaved == frame.nregs);
2945   if (!sibcall)
2946     {
2947       if (frame.push_rts)
2948 	emit_jump_insn (gen_pop_rts ());
2949       else
2950 	emit_jump_insn (gen_return_internal (gen_rtx_REG (SImode,
2951 							  RETURN_ADDR_REGNO)));
2952     }
2953 }
2954 
2955 /* Return the value of the return address for the frame COUNT steps up
2956    from the current frame, after the prologue.
2957    We punt for everything but the current frame by returning const0_rtx.  */
2958 
2959 rtx
c6x_return_addr_rtx(int count)2960 c6x_return_addr_rtx (int count)
2961 {
2962   if (count != 0)
2963     return const0_rtx;
2964 
2965   return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNO);
2966 }
2967 
2968 /* Return true iff TYPE is one of the shadow types.  */
2969 static bool
shadow_type_p(enum attr_type type)2970 shadow_type_p (enum attr_type type)
2971 {
2972   return (type == TYPE_SHADOW || type == TYPE_LOAD_SHADOW
2973 	  || type == TYPE_MULT_SHADOW);
2974 }
2975 
2976 /* Return true iff INSN is a shadow pattern.  */
2977 static bool
shadow_p(rtx_insn * insn)2978 shadow_p (rtx_insn *insn)
2979 {
2980   if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
2981     return false;
2982   return shadow_type_p (get_attr_type (insn));
2983 }
2984 
2985 /* Return true iff INSN is a shadow or blockage pattern.  */
2986 static bool
shadow_or_blockage_p(rtx_insn * insn)2987 shadow_or_blockage_p (rtx_insn *insn)
2988 {
2989   enum attr_type type;
2990   if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
2991     return false;
2992   type = get_attr_type (insn);
2993   return shadow_type_p (type) || type == TYPE_BLOCKAGE;
2994 }
2995 
2996 /* Translate UNITS into a bitmask of units we can reserve for this
2997    insn.  */
2998 static int
get_reservation_flags(enum attr_units units)2999 get_reservation_flags (enum attr_units units)
3000 {
3001   switch (units)
3002     {
3003     case UNITS_D:
3004     case UNITS_D_ADDR:
3005       return RESERVATION_FLAG_D;
3006     case UNITS_L:
3007       return RESERVATION_FLAG_L;
3008     case UNITS_S:
3009       return RESERVATION_FLAG_S;
3010     case UNITS_M:
3011       return RESERVATION_FLAG_M;
3012     case UNITS_LS:
3013       return RESERVATION_FLAG_LS;
3014     case UNITS_DL:
3015       return RESERVATION_FLAG_DL;
3016     case UNITS_DS:
3017       return RESERVATION_FLAG_DS;
3018     case UNITS_DLS:
3019       return RESERVATION_FLAG_DLS;
3020     default:
3021       return 0;
3022     }
3023 }
3024 
3025 /* Compute the side of the machine used by INSN, which reserves UNITS.
3026    This must match the reservations in the scheduling description.  */
3027 static int
get_insn_side(rtx_insn * insn,enum attr_units units)3028 get_insn_side (rtx_insn *insn, enum attr_units units)
3029 {
3030   if (units == UNITS_D_ADDR)
3031     return (get_attr_addr_regfile (insn) == ADDR_REGFILE_A ? 0 : 1);
3032   else
3033     {
3034       enum attr_dest_regfile rf = get_attr_dest_regfile (insn);
3035       if (rf == DEST_REGFILE_ANY)
3036 	return get_attr_type (insn) == TYPE_BRANCH ? 0 : 1;
3037       else
3038 	return rf == DEST_REGFILE_A ? 0 : 1;
3039     }
3040 }
3041 
3042 /* After scheduling, walk the insns between HEAD and END and assign unit
3043    reservations.  */
3044 static void
assign_reservations(rtx_insn * head,rtx_insn * end)3045 assign_reservations (rtx_insn *head, rtx_insn *end)
3046 {
3047   rtx_insn *insn;
3048   for (insn = head; insn != NEXT_INSN (end); insn = NEXT_INSN (insn))
3049     {
3050       unsigned int sched_mask, reserved;
3051       rtx_insn *within, *last;
3052       int pass;
3053       int rsrv[2];
3054       int rsrv_count[2][4];
3055       int i;
3056 
3057       if (GET_MODE (insn) != TImode)
3058 	continue;
3059 
3060       reserved = 0;
3061       last = NULL;
3062       /* Find the last insn in the packet.  It has a state recorded for it,
3063 	 which we can use to determine the units we should be using.  */
3064       for (within = insn;
3065 	   (within != NEXT_INSN (end)
3066 	    && (within == insn || GET_MODE (within) != TImode));
3067 	   within = NEXT_INSN (within))
3068 	{
3069 	  int icode;
3070 	  if (!NONDEBUG_INSN_P (within))
3071 	    continue;
3072 	  icode = recog_memoized (within);
3073 	  if (icode < 0)
3074 	    continue;
3075 	  if (shadow_p (within))
3076 	    continue;
3077 	  if (INSN_INFO_ENTRY (INSN_UID (within)).reservation != 0)
3078 	    reserved |= 1 << INSN_INFO_ENTRY (INSN_UID (within)).reservation;
3079 	  last = within;
3080 	}
3081       if (last == NULL_RTX)
3082 	continue;
3083 
3084       sched_mask = INSN_INFO_ENTRY (INSN_UID (last)).unit_mask;
3085       sched_mask &= ~reserved;
3086 
3087       memset (rsrv_count, 0, sizeof rsrv_count);
3088       rsrv[0] = rsrv[1] = ~0;
3089       for (i = 0; i < 8; i++)
3090 	{
3091 	  int side = i / 4;
3092 	  int unit = i & 3;
3093 	  unsigned unit_bit = 1 << (unit + side * UNIT_QID_SIDE_OFFSET);
3094 	  /* Clear the bits which we expect to reserve in the following loop,
3095 	     leaving the ones set which aren't present in the scheduler's
3096 	     state and shouldn't be reserved.  */
3097 	  if (sched_mask & unit_bit)
3098 	    rsrv[i / 4] &= ~(1 << unit);
3099 	}
3100 
3101       /* Walk through the insns that occur in the same cycle.  We use multiple
3102 	 passes to assign units, assigning for insns with the most specific
3103 	 requirements first.  */
3104       for (pass = 0; pass < 4; pass++)
3105 	for (within = insn;
3106 	     (within != NEXT_INSN (end)
3107 	      && (within == insn || GET_MODE (within) != TImode));
3108 	     within = NEXT_INSN (within))
3109 	  {
3110 	    int uid = INSN_UID (within);
3111 	    int this_rsrv, side;
3112 	    int icode;
3113 	    enum attr_units units;
3114 	    enum attr_type type;
3115 	    int j;
3116 
3117 	    if (!NONDEBUG_INSN_P (within))
3118 	      continue;
3119 	    icode = recog_memoized (within);
3120 	    if (icode < 0)
3121 	      continue;
3122 	    if (INSN_INFO_ENTRY (uid).reservation != 0)
3123 	      continue;
3124 	    units = get_attr_units (within);
3125 	    type = get_attr_type (within);
3126 	    this_rsrv = get_reservation_flags (units);
3127 	    if (this_rsrv == 0)
3128 	      continue;
3129 	    side = get_insn_side (within, units);
3130 
3131 	    /* Certain floating point instructions are treated specially.  If
3132 	       an insn can choose between units it can reserve, and its
3133 	       reservation spans more than one cycle, the reservation contains
3134 	       special markers in the first cycle to help us reconstruct what
3135 	       the automaton chose.  */
3136 	    if ((type == TYPE_ADDDP || type == TYPE_FP4)
3137 		&& units == UNITS_LS)
3138 	      {
3139 		int test1_code = ((type == TYPE_FP4 ? UNIT_QID_FPL1 : UNIT_QID_ADDDPL1)
3140 				  + side * UNIT_QID_SIDE_OFFSET);
3141 		int test2_code = ((type == TYPE_FP4 ? UNIT_QID_FPS1 : UNIT_QID_ADDDPS1)
3142 				  + side * UNIT_QID_SIDE_OFFSET);
3143 		if ((sched_mask & (1 << test1_code)) != 0)
3144 		  {
3145 		    this_rsrv = RESERVATION_FLAG_L;
3146 		    sched_mask &= ~(1 << test1_code);
3147 		  }
3148 		else if ((sched_mask & (1 << test2_code)) != 0)
3149 		  {
3150 		    this_rsrv = RESERVATION_FLAG_S;
3151 		    sched_mask &= ~(1 << test2_code);
3152 		  }
3153 	      }
3154 
3155 	    if ((this_rsrv & (this_rsrv - 1)) == 0)
3156 	      {
3157 		int t = exact_log2 (this_rsrv) + side * UNIT_QID_SIDE_OFFSET;
3158 		rsrv[side] |= this_rsrv;
3159 		INSN_INFO_ENTRY (uid).reservation = t;
3160 		continue;
3161 	      }
3162 
3163 	    if (pass == 1)
3164 	      {
3165 		for (j = 0; j < 4; j++)
3166 		  if (this_rsrv & (1 << j))
3167 		    rsrv_count[side][j]++;
3168 		continue;
3169 	      }
3170 	    if ((pass == 2 && this_rsrv != RESERVATION_FLAG_DLS)
3171 		|| (pass == 3 && this_rsrv == RESERVATION_FLAG_DLS))
3172 	      {
3173 		int best = -1, best_cost = INT_MAX;
3174 		for (j = 0; j < 4; j++)
3175 		  if ((this_rsrv & (1 << j))
3176 		      && !(rsrv[side] & (1 << j))
3177 		      && rsrv_count[side][j] < best_cost)
3178 		    {
3179 		      best_cost = rsrv_count[side][j];
3180 		      best = j;
3181 		    }
3182 		gcc_assert (best != -1);
3183 		rsrv[side] |= 1 << best;
3184 		for (j = 0; j < 4; j++)
3185 		  if ((this_rsrv & (1 << j)) && j != best)
3186 		    rsrv_count[side][j]--;
3187 
3188 		INSN_INFO_ENTRY (uid).reservation
3189 		  = best + side * UNIT_QID_SIDE_OFFSET;
3190 	      }
3191 	  }
3192     }
3193 }
3194 
3195 /* Return a factor by which to weight unit imbalances for a reservation
3196    R.  */
3197 static int
unit_req_factor(enum unitreqs r)3198 unit_req_factor (enum unitreqs r)
3199 {
3200   switch (r)
3201     {
3202     case UNIT_REQ_D:
3203     case UNIT_REQ_L:
3204     case UNIT_REQ_S:
3205     case UNIT_REQ_M:
3206     case UNIT_REQ_X:
3207     case UNIT_REQ_T:
3208       return 1;
3209     case UNIT_REQ_DL:
3210     case UNIT_REQ_LS:
3211     case UNIT_REQ_DS:
3212       return 2;
3213     case UNIT_REQ_DLS:
3214       return 3;
3215     default:
3216       gcc_unreachable ();
3217     }
3218 }
3219 
3220 /* Examine INSN, and store in REQ1/SIDE1 and REQ2/SIDE2 the unit
3221    requirements.  Returns zero if INSN can't be handled, otherwise
3222    either one or two to show how many of the two pairs are in use.
3223    REQ1 is always used, it holds what is normally thought of as the
3224    instructions reservation, e.g. UNIT_REQ_DL.  REQ2 is used to either
3225    describe a cross path, or for loads/stores, the T unit.  */
3226 static int
get_unit_reqs(rtx_insn * insn,int * req1,int * side1,int * req2,int * side2)3227 get_unit_reqs (rtx_insn *insn, int *req1, int *side1, int *req2, int *side2)
3228 {
3229   enum attr_units units;
3230   enum attr_cross cross;
3231   int side, req;
3232 
3233   if (!NONDEBUG_INSN_P (insn) || recog_memoized (insn) < 0)
3234     return 0;
3235   units = get_attr_units (insn);
3236   if (units == UNITS_UNKNOWN)
3237     return 0;
3238   side = get_insn_side (insn, units);
3239   cross = get_attr_cross (insn);
3240 
3241   req = (units == UNITS_D ? UNIT_REQ_D
3242 	 : units == UNITS_D_ADDR ? UNIT_REQ_D
3243 	 : units == UNITS_DL ? UNIT_REQ_DL
3244 	 : units == UNITS_DS ? UNIT_REQ_DS
3245 	 : units == UNITS_L ? UNIT_REQ_L
3246 	 : units == UNITS_LS ? UNIT_REQ_LS
3247 	 : units == UNITS_S ? UNIT_REQ_S
3248 	 : units == UNITS_M ? UNIT_REQ_M
3249 	 : units == UNITS_DLS ? UNIT_REQ_DLS
3250 	 : -1);
3251   gcc_assert (req != -1);
3252   *req1 = req;
3253   *side1 = side;
3254   if (units == UNITS_D_ADDR)
3255     {
3256       *req2 = UNIT_REQ_T;
3257       *side2 = side ^ (cross == CROSS_Y ? 1 : 0);
3258       return 2;
3259     }
3260   else if (cross == CROSS_Y)
3261     {
3262       *req2 = UNIT_REQ_X;
3263       *side2 = side;
3264       return 2;
3265     }
3266   return 1;
3267 }
3268 
3269 /* Walk the insns between and including HEAD and TAIL, and mark the
3270    resource requirements in the unit_reqs table.  */
3271 static void
count_unit_reqs(unit_req_table reqs,rtx_insn * head,rtx_insn * tail)3272 count_unit_reqs (unit_req_table reqs, rtx_insn *head, rtx_insn *tail)
3273 {
3274   rtx_insn *insn;
3275 
3276   memset (reqs, 0, sizeof (unit_req_table));
3277 
3278   for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
3279     {
3280       int side1, side2, req1, req2;
3281 
3282       switch (get_unit_reqs (insn, &req1, &side1, &req2, &side2))
3283 	{
3284 	case 2:
3285 	  reqs[side2][req2]++;
3286 	  /* fall through */
3287 	case 1:
3288 	  reqs[side1][req1]++;
3289 	  break;
3290 	}
3291     }
3292 }
3293 
3294 /* Update the table REQS by merging more specific unit reservations into
3295    more general ones, i.e. counting (for example) UNIT_REQ_D also in
3296    UNIT_REQ_DL, DS, and DLS.  */
3297 static void
merge_unit_reqs(unit_req_table reqs)3298 merge_unit_reqs (unit_req_table reqs)
3299 {
3300   int side;
3301   for (side = 0; side < 2; side++)
3302     {
3303       int d = reqs[side][UNIT_REQ_D];
3304       int l = reqs[side][UNIT_REQ_L];
3305       int s = reqs[side][UNIT_REQ_S];
3306       int dl = reqs[side][UNIT_REQ_DL];
3307       int ls = reqs[side][UNIT_REQ_LS];
3308       int ds = reqs[side][UNIT_REQ_DS];
3309 
3310       reqs[side][UNIT_REQ_DL] += d;
3311       reqs[side][UNIT_REQ_DL] += l;
3312       reqs[side][UNIT_REQ_DS] += d;
3313       reqs[side][UNIT_REQ_DS] += s;
3314       reqs[side][UNIT_REQ_LS] += l;
3315       reqs[side][UNIT_REQ_LS] += s;
3316       reqs[side][UNIT_REQ_DLS] += ds + dl + ls + d + l + s;
3317     }
3318 }
3319 
3320 /* Examine the table REQS and return a measure of unit imbalance by comparing
3321    the two sides of the machine.  If, for example, D1 is used twice and D2
3322    used not at all, the return value should be 1 in the absence of other
3323    imbalances.  */
3324 static int
unit_req_imbalance(unit_req_table reqs)3325 unit_req_imbalance (unit_req_table reqs)
3326 {
3327   int val = 0;
3328   int i;
3329 
3330   for (i = 0; i < UNIT_REQ_MAX; i++)
3331     {
3332       int factor = unit_req_factor ((enum unitreqs) i);
3333       int diff = abs (reqs[0][i] - reqs[1][i]);
3334       val += (diff + factor - 1) / factor / 2;
3335     }
3336   return val;
3337 }
3338 
3339 /* Return the resource-constrained minimum iteration interval given the
3340    data in the REQS table.  This must have been processed with
3341    merge_unit_reqs already.  */
3342 static int
res_mii(unit_req_table reqs)3343 res_mii (unit_req_table reqs)
3344 {
3345   int side, req;
3346   int worst = 1;
3347   for (side = 0; side < 2; side++)
3348     for (req = 0; req < UNIT_REQ_MAX; req++)
3349       {
3350 	int factor = unit_req_factor ((enum unitreqs) req);
3351 	worst = MAX ((reqs[side][UNIT_REQ_D] + factor - 1) / factor, worst);
3352       }
3353 
3354   return worst;
3355 }
3356 
3357 /* Examine INSN, and store in PMASK1 and PMASK2 bitmasks that represent
3358    the operands that are involved in the (up to) two reservations, as
3359    found by get_unit_reqs.  Return true if we did this successfully, false
3360    if we couldn't identify what to do with INSN.  */
3361 static bool
get_unit_operand_masks(rtx_insn * insn,unsigned int * pmask1,unsigned int * pmask2)3362 get_unit_operand_masks (rtx_insn *insn, unsigned int *pmask1,
3363 			unsigned int *pmask2)
3364 {
3365   enum attr_op_pattern op_pat;
3366 
3367   if (recog_memoized (insn) < 0)
3368     return 0;
3369   if (GET_CODE (PATTERN (insn)) == COND_EXEC)
3370     return false;
3371   extract_insn (insn);
3372   op_pat = get_attr_op_pattern (insn);
3373   if (op_pat == OP_PATTERN_DT)
3374     {
3375       gcc_assert (recog_data.n_operands == 2);
3376       *pmask1 = 1 << 0;
3377       *pmask2 = 1 << 1;
3378       return true;
3379     }
3380   else if (op_pat == OP_PATTERN_TD)
3381     {
3382       gcc_assert (recog_data.n_operands == 2);
3383       *pmask1 = 1 << 1;
3384       *pmask2 = 1 << 0;
3385       return true;
3386     }
3387   else if (op_pat == OP_PATTERN_SXS)
3388     {
3389       gcc_assert (recog_data.n_operands == 3);
3390       *pmask1 = (1 << 0) | (1 << 2);
3391       *pmask2 = 1 << 1;
3392       return true;
3393     }
3394   else if (op_pat == OP_PATTERN_SX)
3395     {
3396       gcc_assert (recog_data.n_operands == 2);
3397       *pmask1 = 1 << 0;
3398       *pmask2 = 1 << 1;
3399       return true;
3400     }
3401   else if (op_pat == OP_PATTERN_SSX)
3402     {
3403       gcc_assert (recog_data.n_operands == 3);
3404       *pmask1 = (1 << 0) | (1 << 1);
3405       *pmask2 = 1 << 2;
3406       return true;
3407     }
3408   return false;
3409 }
3410 
3411 /* Try to replace a register in INSN, which has corresponding rename info
3412    from regrename_analyze in INFO.  OP_MASK and ORIG_SIDE provide information
3413    about the operands that must be renamed and the side they are on.
3414    REQS is the table of unit reservations in the loop between HEAD and TAIL.
3415    We recompute this information locally after our transformation, and keep
3416    it only if we managed to improve the balance.  */
3417 static void
try_rename_operands(rtx_insn * head,rtx_insn * tail,unit_req_table reqs,rtx insn,insn_rr_info * info,unsigned int op_mask,int orig_side)3418 try_rename_operands (rtx_insn *head, rtx_insn *tail, unit_req_table reqs,
3419 		     rtx insn,
3420 		     insn_rr_info *info, unsigned int op_mask, int orig_side)
3421 {
3422   enum reg_class super_class = orig_side == 0 ? B_REGS : A_REGS;
3423   HARD_REG_SET unavailable;
3424   du_head_p this_head;
3425   struct du_chain *chain;
3426   int i;
3427   unsigned tmp_mask;
3428   int best_reg, old_reg;
3429   vec<du_head_p> involved_chains = vNULL;
3430   unit_req_table new_reqs;
3431   bool ok;
3432 
3433   for (i = 0, tmp_mask = op_mask; tmp_mask; i++)
3434     {
3435       du_head_p op_chain;
3436       if ((tmp_mask & (1 << i)) == 0)
3437 	continue;
3438       if (info->op_info[i].n_chains != 1)
3439 	goto out_fail;
3440       op_chain = regrename_chain_from_id (info->op_info[i].heads[0]->id);
3441       involved_chains.safe_push (op_chain);
3442       tmp_mask &= ~(1 << i);
3443     }
3444 
3445   if (involved_chains.length () > 1)
3446     goto out_fail;
3447 
3448   this_head = involved_chains[0];
3449   if (this_head->cannot_rename)
3450     goto out_fail;
3451 
3452   for (chain = this_head->first; chain; chain = chain->next_use)
3453     {
3454       unsigned int mask1, mask2, mask_changed;
3455       int count, side1, side2, req1, req2;
3456       insn_rr_info *this_rr = &insn_rr[INSN_UID (chain->insn)];
3457 
3458       count = get_unit_reqs (chain->insn, &req1, &side1, &req2, &side2);
3459 
3460       if (count == 0)
3461 	goto out_fail;
3462 
3463       if (!get_unit_operand_masks (chain->insn, &mask1, &mask2))
3464 	goto out_fail;
3465 
3466       extract_insn (chain->insn);
3467 
3468       mask_changed = 0;
3469       for (i = 0; i < recog_data.n_operands; i++)
3470 	{
3471 	  int j;
3472 	  int n_this_op = this_rr->op_info[i].n_chains;
3473 	  for (j = 0; j < n_this_op; j++)
3474 	    {
3475 	      du_head_p other = this_rr->op_info[i].heads[j];
3476 	      if (regrename_chain_from_id (other->id) == this_head)
3477 		break;
3478 	    }
3479 	  if (j == n_this_op)
3480 	    continue;
3481 
3482 	  if (n_this_op != 1)
3483 	    goto out_fail;
3484 	  mask_changed |= 1 << i;
3485 	}
3486       gcc_assert (mask_changed != 0);
3487       if (mask_changed != mask1 && mask_changed != mask2)
3488 	goto out_fail;
3489     }
3490 
3491   /* If we get here, we can do the renaming.  */
3492   COMPL_HARD_REG_SET (unavailable, reg_class_contents[(int) super_class]);
3493 
3494   old_reg = this_head->regno;
3495   best_reg =
3496     find_rename_reg (this_head, super_class, &unavailable, old_reg, true);
3497 
3498   ok = regrename_do_replace (this_head, best_reg);
3499   gcc_assert (ok);
3500 
3501   count_unit_reqs (new_reqs, head, PREV_INSN (tail));
3502   merge_unit_reqs (new_reqs);
3503   if (dump_file)
3504     {
3505       fprintf (dump_file, "reshuffle for insn %d, op_mask %x, "
3506 	       "original side %d, new reg %d\n",
3507 	       INSN_UID (insn), op_mask, orig_side, best_reg);
3508       fprintf (dump_file, "  imbalance %d -> %d\n",
3509 	       unit_req_imbalance (reqs), unit_req_imbalance (new_reqs));
3510     }
3511   if (unit_req_imbalance (new_reqs) > unit_req_imbalance (reqs))
3512     {
3513       ok = regrename_do_replace (this_head, old_reg);
3514       gcc_assert (ok);
3515     }
3516   else
3517     memcpy (reqs, new_reqs, sizeof (unit_req_table));
3518 
3519  out_fail:
3520   involved_chains.release ();
3521 }
3522 
3523 /* Find insns in LOOP which would, if shifted to the other side
3524    of the machine, reduce an imbalance in the unit reservations.  */
3525 static void
reshuffle_units(basic_block loop)3526 reshuffle_units (basic_block loop)
3527 {
3528   rtx_insn *head = BB_HEAD (loop);
3529   rtx_insn *tail = BB_END (loop);
3530   rtx_insn *insn;
3531   unit_req_table reqs;
3532   edge e;
3533   edge_iterator ei;
3534   bitmap_head bbs;
3535 
3536   count_unit_reqs (reqs, head, PREV_INSN (tail));
3537   merge_unit_reqs (reqs);
3538 
3539   regrename_init (true);
3540 
3541   bitmap_initialize (&bbs, &bitmap_default_obstack);
3542 
3543   FOR_EACH_EDGE (e, ei, loop->preds)
3544     bitmap_set_bit (&bbs, e->src->index);
3545 
3546   bitmap_set_bit (&bbs, loop->index);
3547   regrename_analyze (&bbs);
3548 
3549   for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
3550     {
3551       enum attr_units units;
3552       int count, side1, side2, req1, req2;
3553       unsigned int mask1, mask2;
3554       insn_rr_info *info;
3555 
3556       if (!NONDEBUG_INSN_P (insn))
3557 	continue;
3558 
3559       count = get_unit_reqs (insn, &req1, &side1, &req2, &side2);
3560 
3561       if (count == 0)
3562 	continue;
3563 
3564       if (!get_unit_operand_masks (insn, &mask1, &mask2))
3565 	continue;
3566 
3567       info = &insn_rr[INSN_UID (insn)];
3568       if (info->op_info == NULL)
3569 	continue;
3570 
3571       if (reqs[side1][req1] > 1
3572 	  && reqs[side1][req1] > 2 * reqs[side1 ^ 1][req1])
3573 	{
3574 	  try_rename_operands (head, tail, reqs, insn, info, mask1, side1);
3575 	}
3576 
3577       units = get_attr_units (insn);
3578       if (units == UNITS_D_ADDR)
3579 	{
3580 	  gcc_assert (count == 2);
3581 	  if (reqs[side2][req2] > 1
3582 	      && reqs[side2][req2] > 2 * reqs[side2 ^ 1][req2])
3583 	    {
3584 	      try_rename_operands (head, tail, reqs, insn, info, mask2, side2);
3585 	    }
3586 	}
3587     }
3588   regrename_finish ();
3589 }
3590 
3591 /* Backend scheduling state.  */
3592 typedef struct c6x_sched_context
3593 {
3594   /* The current scheduler clock, saved in the sched_reorder hook.  */
3595   int curr_sched_clock;
3596 
3597   /* Number of insns issued so far in this cycle.  */
3598   int issued_this_cycle;
3599 
3600   /* We record the time at which each jump occurs in JUMP_CYCLES.  The
3601      theoretical maximum for number of jumps in flight is 12: 2 every
3602      cycle, with a latency of 6 cycles each.  This is a circular
3603      buffer; JUMP_CYCLE_INDEX is the pointer to the start.  Earlier
3604      jumps have a higher index.  This array should be accessed through
3605      the jump_cycle function.  */
3606   int jump_cycles[12];
3607   int jump_cycle_index;
3608 
3609   /* In parallel with jump_cycles, this array records the opposite of
3610      the condition used in each pending jump.  This is used to
3611      predicate insns that are scheduled in the jump's delay slots.  If
3612      this is NULL_RTX no such predication happens.  */
3613   rtx jump_cond[12];
3614 
3615   /* Similar to the jump_cycles mechanism, but here we take into
3616      account all insns with delay slots, to avoid scheduling asms into
3617      the delay slots.  */
3618   int delays_finished_at;
3619 
3620   /* The following variable value is the last issued insn.  */
3621   rtx_insn *last_scheduled_insn;
3622   /* The last issued insn that isn't a shadow of another.  */
3623   rtx_insn *last_scheduled_iter0;
3624 
3625   /* The following variable value is DFA state before issuing the
3626      first insn in the current clock cycle.  We do not use this member
3627      of the structure directly; we copy the data in and out of
3628      prev_cycle_state.  */
3629   state_t prev_cycle_state_ctx;
3630 
3631   int reg_n_accesses[FIRST_PSEUDO_REGISTER];
3632   int reg_n_xaccesses[FIRST_PSEUDO_REGISTER];
3633   int reg_set_in_cycle[FIRST_PSEUDO_REGISTER];
3634 
3635   int tmp_reg_n_accesses[FIRST_PSEUDO_REGISTER];
3636   int tmp_reg_n_xaccesses[FIRST_PSEUDO_REGISTER];
3637 } *c6x_sched_context_t;
3638 
3639 /* The current scheduling state.  */
3640 static struct c6x_sched_context ss;
3641 
3642 /* The following variable value is DFA state before issuing the first insn
3643    in the current clock cycle.  This is used in c6x_variable_issue for
3644    comparison with the state after issuing the last insn in a cycle.  */
3645 static state_t prev_cycle_state;
3646 
3647 /* Set when we discover while processing an insn that it would lead to too
3648    many accesses of the same register.  */
3649 static bool reg_access_stall;
3650 
3651 /* The highest insn uid after delayed insns were split, but before loop bodies
3652    were copied by the modulo scheduling code.  */
3653 static int sploop_max_uid_iter0;
3654 
3655 /* Look up the jump cycle with index N.  For an out-of-bounds N, we return 0,
3656    so the caller does not specifically have to test for it.  */
3657 static int
get_jump_cycle(int n)3658 get_jump_cycle (int n)
3659 {
3660   if (n >= 12)
3661     return 0;
3662   n += ss.jump_cycle_index;
3663   if (n >= 12)
3664     n -= 12;
3665   return ss.jump_cycles[n];
3666 }
3667 
3668 /* Look up the jump condition with index N.  */
3669 static rtx
get_jump_cond(int n)3670 get_jump_cond (int n)
3671 {
3672   if (n >= 12)
3673     return NULL_RTX;
3674   n += ss.jump_cycle_index;
3675   if (n >= 12)
3676     n -= 12;
3677   return ss.jump_cond[n];
3678 }
3679 
3680 /* Return the index of the first jump that occurs after CLOCK_VAR.  If no jump
3681    has delay slots beyond CLOCK_VAR, return -1.  */
3682 static int
first_jump_index(int clock_var)3683 first_jump_index (int clock_var)
3684 {
3685   int retval = -1;
3686   int n = 0;
3687   for (;;)
3688     {
3689       int t = get_jump_cycle (n);
3690       if (t <= clock_var)
3691 	break;
3692       retval = n;
3693       n++;
3694     }
3695   return retval;
3696 }
3697 
3698 /* Add a new entry in our scheduling state for a jump that occurs in CYCLE
3699    and has the opposite condition of COND.  */
3700 static void
record_jump(int cycle,rtx cond)3701 record_jump (int cycle, rtx cond)
3702 {
3703   if (ss.jump_cycle_index == 0)
3704     ss.jump_cycle_index = 11;
3705   else
3706     ss.jump_cycle_index--;
3707   ss.jump_cycles[ss.jump_cycle_index] = cycle;
3708   ss.jump_cond[ss.jump_cycle_index] = cond;
3709 }
3710 
3711 /* Set the clock cycle of INSN to CYCLE.  Also clears the insn's entry in
3712    new_conditions.  */
3713 static void
insn_set_clock(rtx insn,int cycle)3714 insn_set_clock (rtx insn, int cycle)
3715 {
3716   unsigned uid = INSN_UID (insn);
3717 
3718   if (uid >= INSN_INFO_LENGTH)
3719     insn_info.safe_grow (uid * 5 / 4 + 10);
3720 
3721   INSN_INFO_ENTRY (uid).clock = cycle;
3722   INSN_INFO_ENTRY (uid).new_cond = NULL;
3723   INSN_INFO_ENTRY (uid).reservation = 0;
3724   INSN_INFO_ENTRY (uid).ebb_start = false;
3725 }
3726 
3727 /* Return the clock cycle we set for the insn with uid UID.  */
3728 static int
insn_uid_get_clock(int uid)3729 insn_uid_get_clock (int uid)
3730 {
3731   return INSN_INFO_ENTRY (uid).clock;
3732 }
3733 
3734 /* Return the clock cycle we set for INSN.  */
3735 static int
insn_get_clock(rtx insn)3736 insn_get_clock (rtx insn)
3737 {
3738   return insn_uid_get_clock (INSN_UID (insn));
3739 }
3740 
3741 /* Examine INSN, and if it is a conditional jump of any kind, return
3742    the opposite of the condition in which it branches.  Otherwise,
3743    return NULL_RTX.  */
3744 static rtx
condjump_opposite_condition(rtx insn)3745 condjump_opposite_condition (rtx insn)
3746 {
3747   rtx pat = PATTERN (insn);
3748   int icode = INSN_CODE (insn);
3749   rtx x = NULL;
3750 
3751   if (icode == CODE_FOR_br_true || icode == CODE_FOR_br_false)
3752     {
3753       x = XEXP (SET_SRC (pat), 0);
3754       if (icode == CODE_FOR_br_false)
3755 	return x;
3756     }
3757   if (GET_CODE (pat) == COND_EXEC)
3758     {
3759       rtx t = COND_EXEC_CODE (pat);
3760       if ((GET_CODE (t) == PARALLEL
3761 	   && GET_CODE (XVECEXP (t, 0, 0)) == RETURN)
3762 	  || (GET_CODE (t) == UNSPEC && XINT (t, 1) == UNSPEC_REAL_JUMP)
3763 	  || (GET_CODE (t) == SET && SET_DEST (t) == pc_rtx))
3764 	x = COND_EXEC_TEST (pat);
3765     }
3766 
3767   if (x != NULL_RTX)
3768     {
3769       enum rtx_code code = GET_CODE (x);
3770       x = gen_rtx_fmt_ee (code == EQ ? NE : EQ,
3771 			  GET_MODE (x), XEXP (x, 0),
3772 			  XEXP (x, 1));
3773     }
3774   return x;
3775 }
3776 
3777 /* Return true iff COND1 and COND2 are exactly opposite conditions
3778    one of them NE and the other EQ.  */
3779 static bool
conditions_opposite_p(rtx cond1,rtx cond2)3780 conditions_opposite_p (rtx cond1, rtx cond2)
3781 {
3782   return (rtx_equal_p (XEXP (cond1, 0), XEXP (cond2, 0))
3783 	  && rtx_equal_p (XEXP (cond1, 1), XEXP (cond2, 1))
3784 	  && GET_CODE (cond1) == reverse_condition (GET_CODE (cond2)));
3785 }
3786 
3787 /* Return true if we can add a predicate COND to INSN, or if INSN
3788    already has that predicate.  If DOIT is true, also perform the
3789    modification.  */
3790 static bool
predicate_insn(rtx_insn * insn,rtx cond,bool doit)3791 predicate_insn (rtx_insn *insn, rtx cond, bool doit)
3792 {
3793   int icode;
3794   if (cond == NULL_RTX)
3795     {
3796       gcc_assert (!doit);
3797       return false;
3798     }
3799 
3800   if (get_attr_predicable (insn) == PREDICABLE_YES
3801       && GET_CODE (PATTERN (insn)) != COND_EXEC)
3802     {
3803       if (doit)
3804 	{
3805 	  rtx newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn));
3806 	  PATTERN (insn) = newpat;
3807 	  INSN_CODE (insn) = -1;
3808 	}
3809       return true;
3810     }
3811   if (GET_CODE (PATTERN (insn)) == COND_EXEC
3812       && rtx_equal_p (COND_EXEC_TEST (PATTERN (insn)), cond))
3813     return true;
3814   icode = INSN_CODE (insn);
3815   if (icode == CODE_FOR_real_jump
3816       || icode == CODE_FOR_jump
3817       || icode == CODE_FOR_indirect_jump)
3818     {
3819       rtx pat = PATTERN (insn);
3820       rtx dest = (icode == CODE_FOR_real_jump ? XVECEXP (pat, 0, 0)
3821 		  : icode == CODE_FOR_jump ? XEXP (SET_SRC (pat), 0)
3822 		  : SET_SRC (pat));
3823       if (doit)
3824 	{
3825 	  rtx newpat;
3826 	  if (REG_P (dest))
3827 	    newpat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (insn));
3828 	  else
3829 	    newpat = gen_br_true (cond, XEXP (cond, 0), dest);
3830 	  PATTERN (insn) = newpat;
3831 	  INSN_CODE (insn) = -1;
3832 	}
3833       return true;
3834     }
3835   if (INSN_CODE (insn) == CODE_FOR_br_true)
3836     {
3837       rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0);
3838       return rtx_equal_p (br_cond, cond);
3839     }
3840   if (INSN_CODE (insn) == CODE_FOR_br_false)
3841     {
3842       rtx br_cond = XEXP (SET_SRC (PATTERN (insn)), 0);
3843       return conditions_opposite_p (br_cond, cond);
3844     }
3845   return false;
3846 }
3847 
3848 /* Initialize SC.  Used by c6x_init_sched_context and c6x_sched_init.  */
3849 static void
init_sched_state(c6x_sched_context_t sc)3850 init_sched_state (c6x_sched_context_t sc)
3851 {
3852   sc->last_scheduled_insn = NULL;
3853   sc->last_scheduled_iter0 = NULL;
3854   sc->issued_this_cycle = 0;
3855   memset (sc->jump_cycles, 0, sizeof sc->jump_cycles);
3856   memset (sc->jump_cond, 0, sizeof sc->jump_cond);
3857   sc->jump_cycle_index = 0;
3858   sc->delays_finished_at = 0;
3859   sc->curr_sched_clock = 0;
3860 
3861   sc->prev_cycle_state_ctx = xmalloc (dfa_state_size);
3862 
3863   memset (sc->reg_n_accesses, 0, sizeof sc->reg_n_accesses);
3864   memset (sc->reg_n_xaccesses, 0, sizeof sc->reg_n_xaccesses);
3865   memset (sc->reg_set_in_cycle, 0, sizeof sc->reg_set_in_cycle);
3866 
3867   state_reset (sc->prev_cycle_state_ctx);
3868 }
3869 
3870 /* Allocate store for new scheduling context.  */
3871 static void *
c6x_alloc_sched_context(void)3872 c6x_alloc_sched_context (void)
3873 {
3874   return xmalloc (sizeof (struct c6x_sched_context));
3875 }
3876 
3877 /* If CLEAN_P is true then initializes _SC with clean data,
3878    and from the global context otherwise.  */
3879 static void
c6x_init_sched_context(void * _sc,bool clean_p)3880 c6x_init_sched_context (void *_sc, bool clean_p)
3881 {
3882   c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3883 
3884   if (clean_p)
3885     {
3886       init_sched_state (sc);
3887     }
3888   else
3889     {
3890       *sc = ss;
3891       sc->prev_cycle_state_ctx = xmalloc (dfa_state_size);
3892       memcpy (sc->prev_cycle_state_ctx, prev_cycle_state, dfa_state_size);
3893     }
3894 }
3895 
3896 /* Sets the global scheduling context to the one pointed to by _SC.  */
3897 static void
c6x_set_sched_context(void * _sc)3898 c6x_set_sched_context (void *_sc)
3899 {
3900   c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3901 
3902   gcc_assert (sc != NULL);
3903   ss = *sc;
3904   memcpy (prev_cycle_state, sc->prev_cycle_state_ctx, dfa_state_size);
3905 }
3906 
3907 /* Clear data in _SC.  */
3908 static void
c6x_clear_sched_context(void * _sc)3909 c6x_clear_sched_context (void *_sc)
3910 {
3911   c6x_sched_context_t sc = (c6x_sched_context_t) _sc;
3912   gcc_assert (_sc != NULL);
3913 
3914   free (sc->prev_cycle_state_ctx);
3915 }
3916 
3917 /* Free _SC.  */
3918 static void
c6x_free_sched_context(void * _sc)3919 c6x_free_sched_context (void *_sc)
3920 {
3921   free (_sc);
3922 }
3923 
3924 /* True if we are currently performing a preliminary scheduling
3925    pass before modulo scheduling; we can't allow the scheduler to
3926    modify instruction patterns using packetization assumptions,
3927    since there will be another scheduling pass later if modulo
3928    scheduling fails.  */
3929 static bool in_hwloop;
3930 
3931 /* Provide information about speculation capabilities, and set the
3932    DO_BACKTRACKING flag.  */
3933 static void
c6x_set_sched_flags(spec_info_t spec_info)3934 c6x_set_sched_flags (spec_info_t spec_info)
3935 {
3936   unsigned int *flags = &(current_sched_info->flags);
3937 
3938   if (*flags & SCHED_EBB)
3939     {
3940       *flags |= DO_BACKTRACKING | DO_PREDICATION;
3941     }
3942   if (in_hwloop)
3943     *flags |= DONT_BREAK_DEPENDENCIES;
3944 
3945   spec_info->mask = 0;
3946 }
3947 
3948 /* Implement the TARGET_SCHED_ISSUE_RATE hook.  */
3949 
3950 static int
c6x_issue_rate(void)3951 c6x_issue_rate (void)
3952 {
3953   return 8;
3954 }
3955 
3956 /* Used together with the collapse_ndfa option, this ensures that we reach a
3957    deterministic automaton state before trying to advance a cycle.
3958    With collapse_ndfa, genautomata creates advance cycle arcs only for
3959    such deterministic states.  */
3960 
3961 static rtx
c6x_sched_dfa_pre_cycle_insn(void)3962 c6x_sched_dfa_pre_cycle_insn (void)
3963 {
3964   return const0_rtx;
3965 }
3966 
3967 /* We're beginning a new block.  Initialize data structures as necessary.  */
3968 
3969 static void
c6x_sched_init(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)3970 c6x_sched_init (FILE *dump ATTRIBUTE_UNUSED,
3971 		int sched_verbose ATTRIBUTE_UNUSED,
3972 		int max_ready ATTRIBUTE_UNUSED)
3973 {
3974   if (prev_cycle_state == NULL)
3975     {
3976       prev_cycle_state = xmalloc (dfa_state_size);
3977     }
3978   init_sched_state (&ss);
3979   state_reset (prev_cycle_state);
3980 }
3981 
3982 /* We are about to being issuing INSN.  Return nonzero if we cannot
3983    issue it on given cycle CLOCK and return zero if we should not sort
3984    the ready queue on the next clock start.
3985    For C6X, we use this function just to copy the previous DFA state
3986    for comparison purposes.  */
3987 
3988 static int
c6x_dfa_new_cycle(FILE * dump ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,rtx_insn * insn ATTRIBUTE_UNUSED,int last_clock ATTRIBUTE_UNUSED,int clock ATTRIBUTE_UNUSED,int * sort_p ATTRIBUTE_UNUSED)3989 c6x_dfa_new_cycle (FILE *dump ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3990 		   rtx_insn *insn ATTRIBUTE_UNUSED,
3991 		   int last_clock ATTRIBUTE_UNUSED,
3992 		   int clock ATTRIBUTE_UNUSED, int *sort_p ATTRIBUTE_UNUSED)
3993 {
3994   if (clock != last_clock)
3995     memcpy (prev_cycle_state, curr_state, dfa_state_size);
3996   return 0;
3997 }
3998 
3999 static void
c6x_mark_regno_read(int regno,bool cross)4000 c6x_mark_regno_read (int regno, bool cross)
4001 {
4002   int t = ++ss.tmp_reg_n_accesses[regno];
4003 
4004   if (t > 4)
4005     reg_access_stall = true;
4006 
4007   if (cross)
4008     {
4009       int set_cycle = ss.reg_set_in_cycle[regno];
4010       /* This must be done in this way rather than by tweaking things in
4011 	 adjust_cost, since the stall occurs even for insns with opposite
4012 	 predicates, and the scheduler may not even see a dependency.  */
4013       if (set_cycle > 0 && set_cycle == ss.curr_sched_clock)
4014 	reg_access_stall = true;
4015       /* This doesn't quite do anything yet as we're only modeling one
4016 	 x unit.  */
4017       ++ss.tmp_reg_n_xaccesses[regno];
4018     }
4019 }
4020 
4021 /* Note that REG is read in the insn being examined.  If CROSS, it
4022    means the access is through a cross path.  Update the temporary reg
4023    access arrays, and set REG_ACCESS_STALL if the insn can't be issued
4024    in the current cycle.  */
4025 
4026 static void
c6x_mark_reg_read(rtx reg,bool cross)4027 c6x_mark_reg_read (rtx reg, bool cross)
4028 {
4029   unsigned regno = REGNO (reg);
4030   unsigned nregs = hard_regno_nregs[regno][GET_MODE (reg)];
4031 
4032   while (nregs-- > 0)
4033     c6x_mark_regno_read (regno + nregs, cross);
4034 }
4035 
4036 /* Note that register REG is written in cycle CYCLES.  */
4037 
4038 static void
c6x_mark_reg_written(rtx reg,int cycles)4039 c6x_mark_reg_written (rtx reg, int cycles)
4040 {
4041   unsigned regno = REGNO (reg);
4042   unsigned nregs = hard_regno_nregs[regno][GET_MODE (reg)];
4043 
4044   while (nregs-- > 0)
4045     ss.reg_set_in_cycle[regno + nregs] = cycles;
4046 }
4047 
4048 /* Update the register state information for an instruction whose
4049    body is X.  Return true if the instruction has to be delayed until the
4050    next cycle.  */
4051 
4052 static bool
c6x_registers_update(rtx_insn * insn)4053 c6x_registers_update (rtx_insn *insn)
4054 {
4055   enum attr_cross cross;
4056   enum attr_dest_regfile destrf;
4057   int i, nops;
4058   rtx x;
4059 
4060   if (!reload_completed || recog_memoized (insn) < 0)
4061     return false;
4062 
4063   reg_access_stall = false;
4064   memcpy (ss.tmp_reg_n_accesses, ss.reg_n_accesses,
4065 	  sizeof ss.tmp_reg_n_accesses);
4066   memcpy (ss.tmp_reg_n_xaccesses, ss.reg_n_xaccesses,
4067 	  sizeof ss.tmp_reg_n_xaccesses);
4068 
4069   extract_insn (insn);
4070 
4071   cross = get_attr_cross (insn);
4072   destrf = get_attr_dest_regfile (insn);
4073 
4074   nops = recog_data.n_operands;
4075   x = PATTERN (insn);
4076   if (GET_CODE (x) == COND_EXEC)
4077     {
4078       c6x_mark_reg_read (XEXP (XEXP (x, 0), 0), false);
4079       nops -= 2;
4080     }
4081 
4082   for (i = 0; i < nops; i++)
4083     {
4084       rtx op = recog_data.operand[i];
4085       if (recog_data.operand_type[i] == OP_OUT)
4086 	continue;
4087       if (REG_P (op))
4088 	{
4089 	  bool this_cross = cross;
4090 	  if (destrf == DEST_REGFILE_A && A_REGNO_P (REGNO (op)))
4091 	    this_cross = false;
4092 	  if (destrf == DEST_REGFILE_B && B_REGNO_P (REGNO (op)))
4093 	    this_cross = false;
4094 	  c6x_mark_reg_read (op, this_cross);
4095 	}
4096       else if (MEM_P (op))
4097 	{
4098 	  op = XEXP (op, 0);
4099 	  switch (GET_CODE (op))
4100 	    {
4101 	    case POST_INC:
4102 	    case PRE_INC:
4103 	    case POST_DEC:
4104 	    case PRE_DEC:
4105 	      op = XEXP (op, 0);
4106 	      /* fall through */
4107 	    case REG:
4108 	      c6x_mark_reg_read (op, false);
4109 	      break;
4110 	    case POST_MODIFY:
4111 	    case PRE_MODIFY:
4112 	      op = XEXP (op, 1);
4113 	      gcc_assert (GET_CODE (op) == PLUS);
4114 	      /* fall through */
4115 	    case PLUS:
4116 	      c6x_mark_reg_read (XEXP (op, 0), false);
4117 	      if (REG_P (XEXP (op, 1)))
4118 		c6x_mark_reg_read (XEXP (op, 1), false);
4119 	      break;
4120 	    case SYMBOL_REF:
4121 	    case LABEL_REF:
4122 	    case CONST:
4123 	      c6x_mark_regno_read (REG_B14, false);
4124 	      break;
4125 	    default:
4126 	      gcc_unreachable ();
4127 	    }
4128 	}
4129       else if (!CONSTANT_P (op) && strlen (recog_data.constraints[i]) > 0)
4130 	gcc_unreachable ();
4131     }
4132   return reg_access_stall;
4133 }
4134 
4135 /* Helper function for the TARGET_SCHED_REORDER and
4136    TARGET_SCHED_REORDER2 hooks.  If scheduling an insn would be unsafe
4137    in the current cycle, move it down in the ready list and return the
4138    number of non-unsafe insns.  */
4139 
4140 static int
c6x_sched_reorder_1(rtx_insn ** ready,int * pn_ready,int clock_var)4141 c6x_sched_reorder_1 (rtx_insn **ready, int *pn_ready, int clock_var)
4142 {
4143   int n_ready = *pn_ready;
4144   rtx_insn **e_ready = ready + n_ready;
4145   rtx_insn **insnp;
4146   int first_jump;
4147 
4148   /* Keep track of conflicts due to a limit number of register accesses,
4149      and due to stalls incurred by too early accesses of registers using
4150      cross paths.  */
4151 
4152   for (insnp = ready; insnp < e_ready; insnp++)
4153     {
4154       rtx_insn *insn = *insnp;
4155       int icode = recog_memoized (insn);
4156       bool is_asm = (icode < 0
4157 		     && (GET_CODE (PATTERN (insn)) == ASM_INPUT
4158 			 || asm_noperands (PATTERN (insn)) >= 0));
4159       bool no_parallel = (is_asm || icode == CODE_FOR_sploop
4160 			  || (icode >= 0
4161 			      && get_attr_type (insn) == TYPE_ATOMIC));
4162 
4163       /* We delay asm insns until all delay slots are exhausted.  We can't
4164 	 accurately tell how many cycles an asm takes, and the main scheduling
4165 	 code always assumes at least 1 cycle, which may be wrong.  */
4166       if ((no_parallel
4167 	   && (ss.issued_this_cycle > 0 || clock_var < ss.delays_finished_at))
4168 	  || c6x_registers_update (insn)
4169 	  || (ss.issued_this_cycle > 0 && icode == CODE_FOR_sploop))
4170 	{
4171 	  memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4172 	  *ready = insn;
4173 	  n_ready--;
4174 	  ready++;
4175 	}
4176       else if (shadow_p (insn))
4177 	{
4178 	  memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4179 	  *ready = insn;
4180 	}
4181     }
4182 
4183   /* Ensure that no other jump is scheduled in jump delay slots, since
4184      it would put the machine into the wrong state.  Also, we must
4185      avoid scheduling insns that have a latency longer than the
4186      remaining jump delay slots, as the code at the jump destination
4187      won't be prepared for it.
4188 
4189      However, we can relax this condition somewhat.  The rest of the
4190      scheduler will automatically avoid scheduling an insn on which
4191      the jump shadow depends so late that its side effect happens
4192      after the jump.  This means that if we see an insn with a longer
4193      latency here, it can safely be scheduled if we can ensure that it
4194      has a predicate opposite of the previous jump: the side effect
4195      will happen in what we think of as the same basic block.  In
4196      c6x_variable_issue, we will record the necessary predicate in
4197      new_conditions, and after scheduling is finished, we will modify
4198      the insn.
4199 
4200      Special care must be taken whenever there is more than one jump
4201      in flight.  */
4202 
4203   first_jump = first_jump_index (clock_var);
4204   if (first_jump != -1)
4205     {
4206       int first_cycle = get_jump_cycle (first_jump);
4207       rtx first_cond = get_jump_cond (first_jump);
4208       int second_cycle = 0;
4209 
4210       if (first_jump > 0)
4211 	second_cycle = get_jump_cycle (first_jump - 1);
4212 
4213       for (insnp = ready; insnp < e_ready; insnp++)
4214 	{
4215 	  rtx_insn *insn = *insnp;
4216 	  int icode = recog_memoized (insn);
4217 	  bool is_asm = (icode < 0
4218 			 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
4219 			     || asm_noperands (PATTERN (insn)) >= 0));
4220 	  int this_cycles, rsrv_cycles;
4221 	  enum attr_type type;
4222 
4223 	  gcc_assert (!is_asm);
4224 	  if (icode < 0)
4225 	    continue;
4226 	  this_cycles = get_attr_cycles (insn);
4227 	  rsrv_cycles = get_attr_reserve_cycles (insn);
4228 	  type = get_attr_type (insn);
4229 	  /* Treat branches specially; there is also a hazard if two jumps
4230 	     end at the same cycle.  */
4231 	  if (type == TYPE_BRANCH || type == TYPE_CALL)
4232 	    this_cycles++;
4233 	  if (clock_var + this_cycles <= first_cycle)
4234 	    continue;
4235 	  if ((first_jump > 0 && clock_var + this_cycles > second_cycle)
4236 	      || clock_var + rsrv_cycles > first_cycle
4237 	      || !predicate_insn (insn, first_cond, false))
4238 	    {
4239 	      memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4240 	      *ready = insn;
4241 	      n_ready--;
4242 	      ready++;
4243 	    }
4244 	}
4245     }
4246 
4247   return n_ready;
4248 }
4249 
4250 /* Implement the TARGET_SCHED_REORDER hook.  We save the current clock
4251    for later and clear the register access information for the new
4252    cycle.  We also move asm statements out of the way if they would be
4253    scheduled in a delay slot.  */
4254 
4255 static int
c6x_sched_reorder(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn ** ready ATTRIBUTE_UNUSED,int * pn_ready ATTRIBUTE_UNUSED,int clock_var)4256 c6x_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
4257 		   int sched_verbose ATTRIBUTE_UNUSED,
4258 		   rtx_insn **ready ATTRIBUTE_UNUSED,
4259 		   int *pn_ready ATTRIBUTE_UNUSED, int clock_var)
4260 {
4261   ss.curr_sched_clock = clock_var;
4262   ss.issued_this_cycle = 0;
4263   memset (ss.reg_n_accesses, 0, sizeof ss.reg_n_accesses);
4264   memset (ss.reg_n_xaccesses, 0, sizeof ss.reg_n_xaccesses);
4265 
4266   if (ready == NULL)
4267     return 0;
4268 
4269   return c6x_sched_reorder_1 (ready, pn_ready, clock_var);
4270 }
4271 
4272 /* Implement the TARGET_SCHED_REORDER2 hook.  We use this to record the clock
4273    cycle for every insn.  */
4274 
4275 static int
c6x_sched_reorder2(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn ** ready ATTRIBUTE_UNUSED,int * pn_ready ATTRIBUTE_UNUSED,int clock_var)4276 c6x_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
4277 		    int sched_verbose ATTRIBUTE_UNUSED,
4278 		    rtx_insn **ready ATTRIBUTE_UNUSED,
4279 		    int *pn_ready ATTRIBUTE_UNUSED, int clock_var)
4280 {
4281   /* FIXME: the assembler rejects labels inside an execute packet.
4282      This can occur if prologue insns are scheduled in parallel with
4283      others, so we avoid this here.  Also make sure that nothing is
4284      scheduled in parallel with a TYPE_ATOMIC insn or after a jump.  */
4285   if (RTX_FRAME_RELATED_P (ss.last_scheduled_insn)
4286       || JUMP_P (ss.last_scheduled_insn)
4287       || (recog_memoized (ss.last_scheduled_insn) >= 0
4288 	  && get_attr_type (ss.last_scheduled_insn) == TYPE_ATOMIC))
4289     {
4290       int n_ready = *pn_ready;
4291       rtx_insn **e_ready = ready + n_ready;
4292       rtx_insn **insnp;
4293 
4294       for (insnp = ready; insnp < e_ready; insnp++)
4295 	{
4296 	  rtx_insn *insn = *insnp;
4297 	  if (!shadow_p (insn))
4298 	    {
4299 	      memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
4300 	      *ready = insn;
4301 	      n_ready--;
4302 	      ready++;
4303 	    }
4304 	}
4305       return n_ready;
4306     }
4307 
4308   return c6x_sched_reorder_1 (ready, pn_ready, clock_var);
4309 }
4310 
4311 /* Subroutine of maybe_clobber_cond, called through note_stores.  */
4312 
4313 static void
clobber_cond_1(rtx x,const_rtx pat ATTRIBUTE_UNUSED,void * data1)4314 clobber_cond_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data1)
4315 {
4316   rtx *cond = (rtx *)data1;
4317   if (*cond != NULL_RTX && reg_overlap_mentioned_p (x, *cond))
4318     *cond = NULL_RTX;
4319 }
4320 
4321 /* Examine INSN, and if it destroys the conditions have recorded for
4322    any of the jumps in flight, clear that condition so that we don't
4323    predicate any more insns.  CLOCK_VAR helps us limit the search to
4324    only those jumps which are still in flight.  */
4325 
4326 static void
maybe_clobber_cond(rtx insn,int clock_var)4327 maybe_clobber_cond (rtx insn, int clock_var)
4328 {
4329   int n, idx;
4330   idx = ss.jump_cycle_index;
4331   for (n = 0; n < 12; n++, idx++)
4332     {
4333       rtx cond, link;
4334       int cycle;
4335 
4336       if (idx >= 12)
4337 	idx -= 12;
4338       cycle = ss.jump_cycles[idx];
4339       if (cycle <= clock_var)
4340 	return;
4341 
4342       cond = ss.jump_cond[idx];
4343       if (cond == NULL_RTX)
4344 	continue;
4345 
4346       if (CALL_P (insn))
4347 	{
4348 	  ss.jump_cond[idx] = NULL_RTX;
4349 	  continue;
4350 	}
4351 
4352       note_stores (PATTERN (insn), clobber_cond_1, ss.jump_cond + idx);
4353       for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
4354 	if (REG_NOTE_KIND (link) == REG_INC)
4355 	  clobber_cond_1 (XEXP (link, 0), NULL_RTX, ss.jump_cond + idx);
4356     }
4357 }
4358 
4359 /* Implement the TARGET_SCHED_VARIABLE_ISSUE hook.  We are about to
4360    issue INSN.  Return the number of insns left on the ready queue
4361    that can be issued this cycle.
4362    We use this hook to record clock cycles and reservations for every insn.  */
4363 
4364 static int
c6x_variable_issue(FILE * dump ATTRIBUTE_UNUSED,int sched_verbose ATTRIBUTE_UNUSED,rtx_insn * insn,int can_issue_more ATTRIBUTE_UNUSED)4365 c6x_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
4366 		    int sched_verbose ATTRIBUTE_UNUSED,
4367 		    rtx_insn *insn, int can_issue_more ATTRIBUTE_UNUSED)
4368 {
4369   ss.last_scheduled_insn = insn;
4370   if (INSN_UID (insn) < sploop_max_uid_iter0 && !JUMP_P (insn))
4371     ss.last_scheduled_iter0 = insn;
4372   if (GET_CODE (PATTERN (insn)) != USE && GET_CODE (PATTERN (insn)) != CLOBBER)
4373     ss.issued_this_cycle++;
4374   if (insn_info.exists ())
4375     {
4376       state_t st_after = alloca (dfa_state_size);
4377       int curr_clock = ss.curr_sched_clock;
4378       int uid = INSN_UID (insn);
4379       int icode = recog_memoized (insn);
4380       rtx first_cond;
4381       int first, first_cycle;
4382       unsigned int mask;
4383       int i;
4384 
4385       insn_set_clock (insn, curr_clock);
4386       INSN_INFO_ENTRY (uid).ebb_start
4387 	= curr_clock == 0 && ss.issued_this_cycle == 1;
4388 
4389       first = first_jump_index (ss.curr_sched_clock);
4390       if (first == -1)
4391 	{
4392 	  first_cycle = 0;
4393 	  first_cond = NULL_RTX;
4394 	}
4395       else
4396 	{
4397 	  first_cycle = get_jump_cycle (first);
4398 	  first_cond = get_jump_cond (first);
4399 	}
4400       if (icode >= 0
4401 	  && first_cycle > curr_clock
4402 	  && first_cond != NULL_RTX
4403 	  && (curr_clock + get_attr_cycles (insn) > first_cycle
4404 	      || get_attr_type (insn) == TYPE_BRANCH
4405 	      || get_attr_type (insn) == TYPE_CALL))
4406 	INSN_INFO_ENTRY (uid).new_cond = first_cond;
4407 
4408       memcpy (st_after, curr_state, dfa_state_size);
4409       state_transition (st_after, const0_rtx);
4410 
4411       mask = 0;
4412       for (i = 0; i < 2 * UNIT_QID_SIDE_OFFSET; i++)
4413 	if (cpu_unit_reservation_p (st_after, c6x_unit_codes[i])
4414 	    && !cpu_unit_reservation_p (prev_cycle_state, c6x_unit_codes[i]))
4415 	  mask |= 1 << i;
4416       INSN_INFO_ENTRY (uid).unit_mask = mask;
4417 
4418       maybe_clobber_cond (insn, curr_clock);
4419 
4420       if (icode >= 0)
4421 	{
4422 	  int i, cycles;
4423 
4424 	  c6x_registers_update (insn);
4425 	  memcpy (ss.reg_n_accesses, ss.tmp_reg_n_accesses,
4426 		  sizeof ss.reg_n_accesses);
4427 	  memcpy (ss.reg_n_xaccesses, ss.tmp_reg_n_accesses,
4428 		  sizeof ss.reg_n_xaccesses);
4429 
4430 	  cycles = get_attr_cycles (insn);
4431 	  if (ss.delays_finished_at < ss.curr_sched_clock + cycles)
4432 	    ss.delays_finished_at = ss.curr_sched_clock + cycles;
4433 	  if (get_attr_type (insn) == TYPE_BRANCH
4434 	      || get_attr_type (insn) == TYPE_CALL)
4435 	    {
4436 	      rtx opposite = condjump_opposite_condition (insn);
4437 	      record_jump (ss.curr_sched_clock + cycles, opposite);
4438 	    }
4439 
4440 	  /* Mark the cycles in which the destination registers are written.
4441 	     This is used for calculating stalls when using cross units.  */
4442 	  extract_insn (insn);
4443 	  /* Cross-path stalls don't apply to results of load insns.  */
4444 	  if (get_attr_type (insn) == TYPE_LOAD
4445 	      || get_attr_type (insn) == TYPE_LOADN
4446 	      || get_attr_type (insn) == TYPE_LOAD_SHADOW)
4447 	    cycles--;
4448 	  for (i = 0; i < recog_data.n_operands; i++)
4449 	    {
4450 	      rtx op = recog_data.operand[i];
4451 	      if (MEM_P (op))
4452 		{
4453 		  rtx addr = XEXP (op, 0);
4454 		  if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4455 		    c6x_mark_reg_written (XEXP (addr, 0),
4456 					  insn_uid_get_clock (uid) + 1);
4457 		}
4458 	      if (recog_data.operand_type[i] != OP_IN
4459 		  && REG_P (op))
4460 		{
4461 		  c6x_mark_reg_written (op,
4462 					insn_uid_get_clock (uid) + cycles);
4463 		}
4464 	    }
4465 	}
4466     }
4467   return can_issue_more;
4468 }
4469 
4470 /* Implement the TARGET_SCHED_ADJUST_COST hook.  We need special handling for
4471    anti- and output dependencies.  */
4472 
4473 static int
c6x_adjust_cost(rtx_insn * insn,rtx link,rtx_insn * dep_insn,int cost)4474 c6x_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
4475 {
4476   enum attr_type insn_type = TYPE_UNKNOWN, dep_insn_type = TYPE_UNKNOWN;
4477   int dep_insn_code_number, insn_code_number;
4478   int shadow_bonus = 0;
4479   enum reg_note kind;
4480   dep_insn_code_number = recog_memoized (dep_insn);
4481   insn_code_number = recog_memoized (insn);
4482 
4483   if (dep_insn_code_number >= 0)
4484     dep_insn_type = get_attr_type (dep_insn);
4485 
4486   if (insn_code_number >= 0)
4487     insn_type = get_attr_type (insn);
4488 
4489   kind = REG_NOTE_KIND (link);
4490   if (kind == 0)
4491     {
4492       /* If we have a dependency on a load, and it's not for the result of
4493 	 the load, it must be for an autoincrement.  Reduce the cost in that
4494 	 case.  */
4495       if (dep_insn_type == TYPE_LOAD)
4496 	{
4497 	  rtx set = PATTERN (dep_insn);
4498 	  if (GET_CODE (set) == COND_EXEC)
4499 	    set = COND_EXEC_CODE (set);
4500 	  if (GET_CODE (set) == UNSPEC)
4501 	    cost = 1;
4502 	  else
4503 	    {
4504 	      gcc_assert (GET_CODE (set) == SET);
4505 	      if (!reg_overlap_mentioned_p (SET_DEST (set), PATTERN (insn)))
4506 		cost = 1;
4507 	    }
4508 	}
4509     }
4510 
4511   /* A jump shadow needs to have its latency decreased by one.  Conceptually,
4512      it occurs in between two cycles, but we schedule it at the end of the
4513      first cycle.  */
4514   if (shadow_type_p (insn_type))
4515     shadow_bonus = 1;
4516 
4517   /* Anti and output dependencies usually have zero cost, but we want
4518      to insert a stall after a jump, and after certain floating point
4519      insns that take more than one cycle to read their inputs.  In the
4520      future, we should try to find a better algorithm for scheduling
4521      jumps.  */
4522   if (kind != 0)
4523     {
4524       /* We can get anti-dependencies against shadow insns.  Treat these
4525 	 like output dependencies, so that the insn is entirely finished
4526 	 before the branch takes place.  */
4527       if (kind == REG_DEP_ANTI && insn_type == TYPE_SHADOW)
4528 	kind = REG_DEP_OUTPUT;
4529       switch (dep_insn_type)
4530 	{
4531 	case TYPE_CALLP:
4532 	  return 1;
4533 	case TYPE_BRANCH:
4534 	case TYPE_CALL:
4535 	  if (get_attr_has_shadow (dep_insn) == HAS_SHADOW_Y)
4536 	    /* This is a real_jump/real_call insn.  These don't have
4537 	       outputs, and ensuring the validity of scheduling things
4538 	       in the delay slot is the job of
4539 	       c6x_sched_reorder_1.  */
4540 	    return 0;
4541 	  /* Unsplit calls can happen - e.g. for divide insns.  */
4542 	  return 6;
4543 	case TYPE_LOAD:
4544 	case TYPE_LOADN:
4545 	case TYPE_INTDP:
4546 	  if (kind == REG_DEP_OUTPUT)
4547 	    return 5 - shadow_bonus;
4548 	  return 0;
4549 	case TYPE_MPY4:
4550 	case TYPE_FP4:
4551 	  if (kind == REG_DEP_OUTPUT)
4552 	    return 4 - shadow_bonus;
4553 	  return 0;
4554 	case TYPE_MPY2:
4555 	  if (kind == REG_DEP_OUTPUT)
4556 	    return 2 - shadow_bonus;
4557 	  return 0;
4558 	case TYPE_CMPDP:
4559 	  if (kind == REG_DEP_OUTPUT)
4560 	    return 2 - shadow_bonus;
4561 	  return 2;
4562 	case TYPE_ADDDP:
4563 	case TYPE_MPYSPDP:
4564 	  if (kind == REG_DEP_OUTPUT)
4565 	    return 7 - shadow_bonus;
4566 	  return 2;
4567 	case TYPE_MPYSP2DP:
4568 	  if (kind == REG_DEP_OUTPUT)
4569 	    return 5 - shadow_bonus;
4570 	  return 2;
4571 	case TYPE_MPYI:
4572 	  if (kind == REG_DEP_OUTPUT)
4573 	    return 9 - shadow_bonus;
4574 	  return 4;
4575 	case TYPE_MPYID:
4576 	case TYPE_MPYDP:
4577 	  if (kind == REG_DEP_OUTPUT)
4578 	    return 10 - shadow_bonus;
4579 	  return 4;
4580 
4581 	default:
4582 	  if (insn_type == TYPE_SPKERNEL)
4583 	    return 0;
4584 	  if (kind == REG_DEP_OUTPUT)
4585 	    return 1 - shadow_bonus;
4586 
4587 	  return 0;
4588 	}
4589     }
4590 
4591   return cost - shadow_bonus;
4592 }
4593 
4594 /* Create a SEQUENCE rtx to replace the instructions in SLOT, of which there
4595    are N_FILLED.  REAL_FIRST identifies the slot if the insn that appears
4596    first in the original stream.  */
4597 
4598 static void
gen_one_bundle(rtx_insn ** slot,int n_filled,int real_first)4599 gen_one_bundle (rtx_insn **slot, int n_filled, int real_first)
4600 {
4601   rtx seq;
4602   rtx_insn *bundle;
4603   rtx_insn *t;
4604   int i;
4605 
4606   seq = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (n_filled, slot));
4607   bundle = make_insn_raw (seq);
4608   BLOCK_FOR_INSN (bundle) = BLOCK_FOR_INSN (slot[0]);
4609   INSN_LOCATION (bundle) = INSN_LOCATION (slot[0]);
4610   SET_PREV_INSN (bundle) = SET_PREV_INSN (slot[real_first]);
4611 
4612   t = NULL;
4613 
4614   for (i = 0; i < n_filled; i++)
4615     {
4616       rtx_insn *insn = slot[i];
4617       remove_insn (insn);
4618       SET_PREV_INSN (insn) = t ? t : PREV_INSN (bundle);
4619       if (t != NULL_RTX)
4620 	SET_NEXT_INSN (t) = insn;
4621       t = insn;
4622       if (i > 0)
4623 	INSN_LOCATION (slot[i]) = INSN_LOCATION (bundle);
4624     }
4625 
4626   SET_NEXT_INSN (bundle) = NEXT_INSN (PREV_INSN (bundle));
4627   SET_NEXT_INSN (t) = NEXT_INSN (bundle);
4628   SET_NEXT_INSN (PREV_INSN (bundle)) = bundle;
4629   SET_PREV_INSN (NEXT_INSN (bundle)) = bundle;
4630 }
4631 
4632 /* Move all parallel instructions into SEQUENCEs, so that no subsequent passes
4633    try to insert labels in the middle.  */
4634 
4635 static void
c6x_gen_bundles(void)4636 c6x_gen_bundles (void)
4637 {
4638   basic_block bb;
4639   rtx_insn *insn, *next, *last_call;
4640 
4641   FOR_EACH_BB_FN (bb, cfun)
4642     {
4643       rtx_insn *insn, *next;
4644       /* The machine is eight insns wide.  We can have up to six shadow
4645 	 insns, plus an extra slot for merging the jump shadow.  */
4646       rtx_insn *slot[15];
4647       int n_filled = 0;
4648       int first_slot = 0;
4649 
4650       for (insn = BB_HEAD (bb);; insn = next)
4651 	{
4652 	  int at_end;
4653 	  rtx delete_this = NULL_RTX;
4654 
4655 	  if (NONDEBUG_INSN_P (insn))
4656 	    {
4657 	      /* Put calls at the start of the sequence.  */
4658 	      if (CALL_P (insn))
4659 		{
4660 		  first_slot++;
4661 		  if (n_filled)
4662 		    {
4663 		      memmove (&slot[1], &slot[0],
4664 			       n_filled * sizeof (slot[0]));
4665 		    }
4666 		  if (!shadow_p (insn))
4667 		    {
4668 		      PUT_MODE (insn, TImode);
4669 		      if (n_filled)
4670 			PUT_MODE (slot[1], VOIDmode);
4671 		    }
4672 		  n_filled++;
4673 		  slot[0] = insn;
4674 		}
4675 	      else
4676 		{
4677 		  slot[n_filled++] = insn;
4678 		}
4679 	    }
4680 
4681 	  next = NEXT_INSN (insn);
4682 	  while (next && insn != BB_END (bb)
4683 		 && !(NONDEBUG_INSN_P (next)
4684 		      && GET_CODE (PATTERN (next)) != USE
4685 		      && GET_CODE (PATTERN (next)) != CLOBBER))
4686 	    {
4687 	      insn = next;
4688 	      next = NEXT_INSN (insn);
4689 	    }
4690 
4691 	  at_end = insn == BB_END (bb);
4692 	  if (delete_this == NULL_RTX
4693 	      && (at_end || (GET_MODE (next) == TImode
4694 			     && !(shadow_p (next) && CALL_P (next)))))
4695 	    {
4696 	      if (n_filled >= 2)
4697 		gen_one_bundle (slot, n_filled, first_slot);
4698 
4699 	      n_filled = 0;
4700 	      first_slot = 0;
4701 	    }
4702 	  if (at_end)
4703 	    break;
4704 	}
4705     }
4706   /* Bundling, and emitting nops, can separate
4707      NOTE_INSN_CALL_ARG_LOCATION from the corresponding calls.  Fix
4708      that up here.  */
4709   last_call = NULL;
4710   for (insn = get_insns (); insn; insn = next)
4711     {
4712       next = NEXT_INSN (insn);
4713       if (CALL_P (insn)
4714 	  || (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE
4715 	      && CALL_P (XVECEXP (PATTERN (insn), 0, 0))))
4716 	last_call = insn;
4717       if (!NOTE_P (insn) || NOTE_KIND (insn) != NOTE_INSN_CALL_ARG_LOCATION)
4718 	continue;
4719       if (NEXT_INSN (last_call) == insn)
4720 	continue;
4721       SET_NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
4722       SET_PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
4723       SET_PREV_INSN (insn) = last_call;
4724       SET_NEXT_INSN (insn) = NEXT_INSN (last_call);
4725       SET_PREV_INSN (NEXT_INSN (insn)) = insn;
4726       SET_NEXT_INSN (PREV_INSN (insn)) = insn;
4727       last_call = insn;
4728     }
4729 }
4730 
4731 /* Emit a NOP instruction for CYCLES cycles after insn AFTER.  Return it.  */
4732 
4733 static rtx_insn *
emit_nop_after(int cycles,rtx after)4734 emit_nop_after (int cycles, rtx after)
4735 {
4736   rtx_insn *insn;
4737 
4738   /* mpydp has 9 delay slots, and we may schedule a stall for a cross-path
4739      operation.  We don't need the extra NOP since in this case, the hardware
4740      will automatically insert the required stall.  */
4741   if (cycles == 10)
4742     cycles--;
4743 
4744   gcc_assert (cycles < 10);
4745 
4746   insn = emit_insn_after (gen_nop_count (GEN_INT (cycles)), after);
4747   PUT_MODE (insn, TImode);
4748 
4749   return insn;
4750 }
4751 
4752 /* Determine whether INSN is a call that needs to have a return label
4753    placed.  */
4754 
4755 static bool
returning_call_p(rtx_insn * insn)4756 returning_call_p (rtx_insn *insn)
4757 {
4758   if (CALL_P (insn))
4759     return (!SIBLING_CALL_P (insn)
4760 	    && get_attr_type (insn) != TYPE_CALLP
4761 	    && get_attr_type (insn) != TYPE_SHADOW);
4762   if (recog_memoized (insn) < 0)
4763     return false;
4764   if (get_attr_type (insn) == TYPE_CALL)
4765     return true;
4766   return false;
4767 }
4768 
4769 /* Determine whether INSN's pattern can be converted to use callp.  */
4770 static bool
can_use_callp(rtx_insn * insn)4771 can_use_callp (rtx_insn *insn)
4772 {
4773   int icode = recog_memoized (insn);
4774   if (!TARGET_INSNS_64PLUS
4775       || icode < 0
4776       || GET_CODE (PATTERN (insn)) == COND_EXEC)
4777     return false;
4778 
4779   return ((icode == CODE_FOR_real_call
4780 	   || icode == CODE_FOR_call_internal
4781 	   || icode == CODE_FOR_call_value_internal)
4782 	  && get_attr_dest_regfile (insn) == DEST_REGFILE_ANY);
4783 }
4784 
4785 /* Convert the pattern of INSN, which must be a CALL_INSN, into a callp.  */
4786 static void
convert_to_callp(rtx_insn * insn)4787 convert_to_callp (rtx_insn *insn)
4788 {
4789   rtx lab;
4790   extract_insn (insn);
4791   if (GET_CODE (PATTERN (insn)) == SET)
4792     {
4793       rtx dest = recog_data.operand[0];
4794       lab = recog_data.operand[1];
4795       PATTERN (insn) = gen_callp_value (dest, lab);
4796       INSN_CODE (insn) = CODE_FOR_callp_value;
4797     }
4798   else
4799     {
4800       lab = recog_data.operand[0];
4801       PATTERN (insn) = gen_callp (lab);
4802       INSN_CODE (insn) = CODE_FOR_callp;
4803     }
4804 }
4805 
4806 /* Scan forwards from INSN until we find the next insn that has mode TImode
4807    (indicating it starts a new cycle), and occurs in cycle CLOCK.
4808    Return it if we find such an insn, NULL_RTX otherwise.  */
4809 static rtx
find_next_cycle_insn(rtx insn,int clock)4810 find_next_cycle_insn (rtx insn, int clock)
4811 {
4812   rtx t = insn;
4813   if (GET_MODE (t) == TImode)
4814     t = next_real_insn (t);
4815   while (t && GET_MODE (t) != TImode)
4816     t = next_real_insn (t);
4817 
4818   if (t && insn_get_clock (t) == clock)
4819     return t;
4820   return NULL_RTX;
4821 }
4822 
4823 /* If COND_INSN has a COND_EXEC condition, wrap the same condition
4824    around PAT.  Return PAT either unchanged or modified in this
4825    way.  */
4826 static rtx
duplicate_cond(rtx pat,rtx cond_insn)4827 duplicate_cond (rtx pat, rtx cond_insn)
4828 {
4829   rtx cond_pat = PATTERN (cond_insn);
4830   if (GET_CODE (cond_pat) == COND_EXEC)
4831     pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (cond_pat)),
4832 			     pat);
4833   return pat;
4834 }
4835 
4836 /* Walk forward from INSN to find the last insn that issues in the same clock
4837    cycle.  */
4838 static rtx
find_last_same_clock(rtx insn)4839 find_last_same_clock (rtx insn)
4840 {
4841   rtx retval = insn;
4842   rtx_insn *t = next_real_insn (insn);
4843 
4844   while (t && GET_MODE (t) != TImode)
4845     {
4846       if (!DEBUG_INSN_P (t) && recog_memoized (t) >= 0)
4847 	retval = t;
4848       t = next_real_insn (t);
4849     }
4850   return retval;
4851 }
4852 
4853 /* For every call insn in the function, emit code to load the return
4854    address.  For each call we create a return label and store it in
4855    CALL_LABELS.  If are not scheduling, we emit the labels here,
4856    otherwise the caller will do it later.
4857    This function is called after final insn scheduling, but before creating
4858    the SEQUENCEs that represent execute packets.  */
4859 
4860 static void
reorg_split_calls(rtx * call_labels)4861 reorg_split_calls (rtx *call_labels)
4862 {
4863   unsigned int reservation_mask = 0;
4864   rtx_insn *insn = get_insns ();
4865   gcc_assert (NOTE_P (insn));
4866   insn = next_real_insn (insn);
4867   while (insn)
4868     {
4869       int uid;
4870       rtx_insn *next = next_real_insn (insn);
4871 
4872       if (DEBUG_INSN_P (insn))
4873 	goto done;
4874 
4875       if (GET_MODE (insn) == TImode)
4876 	reservation_mask = 0;
4877       uid = INSN_UID (insn);
4878       if (c6x_flag_schedule_insns2 && recog_memoized (insn) >= 0)
4879 	reservation_mask |= 1 << INSN_INFO_ENTRY (uid).reservation;
4880 
4881       if (returning_call_p (insn))
4882 	{
4883 	  rtx label = gen_label_rtx ();
4884 	  rtx labelref = gen_rtx_LABEL_REF (Pmode, label);
4885 	  rtx reg = gen_rtx_REG (SImode, RETURN_ADDR_REGNO);
4886 
4887 	  LABEL_NUSES (label) = 2;
4888 	  if (!c6x_flag_schedule_insns2)
4889 	    {
4890 	      if (can_use_callp (insn))
4891 		convert_to_callp (insn);
4892 	      else
4893 		{
4894 		  rtx t;
4895 		  rtx_insn *slot[4];
4896 		  emit_label_after (label, insn);
4897 
4898 		  /* Bundle the call and its delay slots into a single
4899 		     SEQUENCE.  While these do not issue in parallel
4900 		     we need to group them into a single EH region.  */
4901 		  slot[0] = insn;
4902 		  PUT_MODE (insn, TImode);
4903 		  if (TARGET_INSNS_64)
4904 		    {
4905 		      t = gen_addkpc (reg, labelref, GEN_INT (4));
4906 		      slot[1] = emit_insn_after (duplicate_cond (t, insn),
4907 						 insn);
4908 		      PUT_MODE (slot[1], TImode);
4909 		      gen_one_bundle (slot, 2, 0);
4910 		    }
4911 		  else
4912 		    {
4913 		      slot[3] = emit_insn_after (gen_nop_count (GEN_INT (3)),
4914 						 insn);
4915 		      PUT_MODE (slot[3], TImode);
4916 		      t = gen_movsi_lo_sum (reg, reg, labelref);
4917 		      slot[2] = emit_insn_after (duplicate_cond (t, insn),
4918 						  insn);
4919 		      PUT_MODE (slot[2], TImode);
4920 		      t = gen_movsi_high (reg, labelref);
4921 		      slot[1] = emit_insn_after (duplicate_cond (t, insn),
4922 						 insn);
4923 		      PUT_MODE (slot[1], TImode);
4924 		      gen_one_bundle (slot, 4, 0);
4925 		    }
4926 		}
4927 	    }
4928 	  else
4929 	    {
4930 	      /* If we scheduled, we reserved the .S2 unit for one or two
4931 		 cycles after the call.  Emit the insns in these slots,
4932 		 unless it's possible to create a CALLP insn.
4933 		 Note that this works because the dependencies ensure that
4934 		 no insn setting/using B3 is scheduled in the delay slots of
4935 		 a call.  */
4936 	      int this_clock = insn_get_clock (insn);
4937 	      rtx last_same_clock;
4938 	      rtx after1;
4939 
4940 	      call_labels[INSN_UID (insn)] = label;
4941 
4942 	      last_same_clock = find_last_same_clock (insn);
4943 
4944 	      if (can_use_callp (insn))
4945 		{
4946 		  /* Find the first insn of the next execute packet.  If it
4947 		     is the shadow insn corresponding to this call, we may
4948 		     use a CALLP insn.  */
4949 		  rtx_insn *shadow =
4950 		    next_nonnote_nondebug_insn (last_same_clock);
4951 
4952 		  if (CALL_P (shadow)
4953 		      && insn_get_clock (shadow) == this_clock + 5)
4954 		    {
4955 		      convert_to_callp (shadow);
4956 		      insn_set_clock (shadow, this_clock);
4957 		      INSN_INFO_ENTRY (INSN_UID (shadow)).reservation
4958 			= RESERVATION_S2;
4959 		      INSN_INFO_ENTRY (INSN_UID (shadow)).unit_mask
4960 			= INSN_INFO_ENTRY (INSN_UID (last_same_clock)).unit_mask;
4961 		      if (GET_MODE (insn) == TImode)
4962 			{
4963 			  rtx_insn *new_cycle_first = NEXT_INSN (insn);
4964 			  while (!NONDEBUG_INSN_P (new_cycle_first)
4965 				 || GET_CODE (PATTERN (new_cycle_first)) == USE
4966 				 || GET_CODE (PATTERN (new_cycle_first)) == CLOBBER)
4967 			    new_cycle_first = NEXT_INSN (new_cycle_first);
4968 			  PUT_MODE (new_cycle_first, TImode);
4969 			  if (new_cycle_first != shadow)
4970 			    PUT_MODE (shadow, VOIDmode);
4971 			  INSN_INFO_ENTRY (INSN_UID (new_cycle_first)).ebb_start
4972 			    = INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start;
4973 			}
4974 		      else
4975 			PUT_MODE (shadow, VOIDmode);
4976 		      delete_insn (insn);
4977 		      goto done;
4978 		    }
4979 		}
4980 	      after1 = find_next_cycle_insn (last_same_clock, this_clock + 1);
4981 	      if (after1 == NULL_RTX)
4982 		after1 = last_same_clock;
4983 	      else
4984 		after1 = find_last_same_clock (after1);
4985 	      if (TARGET_INSNS_64)
4986 		{
4987 		  rtx x1 = gen_addkpc (reg, labelref, const0_rtx);
4988 		  x1 = emit_insn_after (duplicate_cond (x1, insn), after1);
4989 		  insn_set_clock (x1, this_clock + 1);
4990 		  INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2;
4991 		  if (after1 == last_same_clock)
4992 		    PUT_MODE (x1, TImode);
4993 		  else
4994 		    INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask
4995 		      = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask;
4996 		}
4997 	      else
4998 		{
4999 		  rtx x1, x2;
5000 		  rtx after2 = find_next_cycle_insn (after1, this_clock + 2);
5001 		  if (after2 == NULL_RTX)
5002 		    after2 = after1;
5003 		  x2 = gen_movsi_lo_sum (reg, reg, labelref);
5004 		  x2 = emit_insn_after (duplicate_cond (x2, insn), after2);
5005 		  x1 = gen_movsi_high (reg, labelref);
5006 		  x1 = emit_insn_after (duplicate_cond (x1, insn), after1);
5007 		  insn_set_clock (x1, this_clock + 1);
5008 		  insn_set_clock (x2, this_clock + 2);
5009 		  INSN_INFO_ENTRY (INSN_UID (x1)).reservation = RESERVATION_S2;
5010 		  INSN_INFO_ENTRY (INSN_UID (x2)).reservation = RESERVATION_S2;
5011 		  if (after1 == last_same_clock)
5012 		    PUT_MODE (x1, TImode);
5013 		  else
5014 		    INSN_INFO_ENTRY (INSN_UID (x1)).unit_mask
5015 		      = INSN_INFO_ENTRY (INSN_UID (after1)).unit_mask;
5016 		  if (after1 == after2)
5017 		    PUT_MODE (x2, TImode);
5018 		  else
5019 		    INSN_INFO_ENTRY (INSN_UID (x2)).unit_mask
5020 		      = INSN_INFO_ENTRY (INSN_UID (after2)).unit_mask;
5021 		}
5022 	    }
5023 	}
5024     done:
5025       insn = next;
5026     }
5027 }
5028 
5029 /* Called as part of c6x_reorg.  This function emits multi-cycle NOP
5030    insns as required for correctness.  CALL_LABELS is the array that
5031    holds the return labels for call insns; we emit these here if
5032    scheduling was run earlier.  */
5033 
5034 static void
reorg_emit_nops(rtx * call_labels)5035 reorg_emit_nops (rtx *call_labels)
5036 {
5037   bool first;
5038   rtx last_call;
5039   rtx_insn *prev;
5040   int prev_clock, earliest_bb_end;
5041   int prev_implicit_nops;
5042   rtx_insn *insn = get_insns ();
5043 
5044   /* We look at one insn (or bundle inside a sequence) in each iteration, storing
5045      its issue time in PREV_CLOCK for the next iteration.  If there is a gap in
5046      clocks, we must insert a NOP.
5047      EARLIEST_BB_END tracks in which cycle all insns that have been issued in the
5048      current basic block will finish.  We must not allow the next basic block to
5049      begin before this cycle.
5050      PREV_IMPLICIT_NOPS tells us whether we've seen an insn that implicitly contains
5051      a multi-cycle nop.  The code is scheduled such that subsequent insns will
5052      show the cycle gap, but we needn't insert a real NOP instruction.  */
5053   insn = next_real_insn (insn);
5054   last_call = prev = NULL;
5055   prev_clock = -1;
5056   earliest_bb_end = 0;
5057   prev_implicit_nops = 0;
5058   first = true;
5059   while (insn)
5060     {
5061       int this_clock = -1;
5062       rtx_insn *next;
5063       int max_cycles = 0;
5064 
5065       next = next_real_insn (insn);
5066 
5067       if (DEBUG_INSN_P (insn)
5068 	  || GET_CODE (PATTERN (insn)) == USE
5069 	  || GET_CODE (PATTERN (insn)) == CLOBBER
5070 	  || shadow_or_blockage_p (insn)
5071 	  || JUMP_TABLE_DATA_P (insn))
5072 	goto next_insn;
5073 
5074       if (!c6x_flag_schedule_insns2)
5075 	/* No scheduling; ensure that no parallel issue happens.  */
5076 	PUT_MODE (insn, TImode);
5077       else
5078 	{
5079 	  int cycles;
5080 
5081 	  this_clock = insn_get_clock (insn);
5082 	  if (this_clock != prev_clock)
5083 	    {
5084 	      PUT_MODE (insn, TImode);
5085 
5086 	      if (!first)
5087 		{
5088 		  cycles = this_clock - prev_clock;
5089 
5090 		  cycles -= prev_implicit_nops;
5091 		  if (cycles > 1)
5092 		    {
5093 		      rtx nop = emit_nop_after (cycles - 1, prev);
5094 		      insn_set_clock (nop, prev_clock + prev_implicit_nops + 1);
5095 		    }
5096 		}
5097 	      prev_clock = this_clock;
5098 
5099 	      if (last_call
5100 		  && insn_get_clock (last_call) + 6 <= this_clock)
5101 		{
5102 		  emit_label_before (call_labels[INSN_UID (last_call)], insn);
5103 		  last_call = NULL_RTX;
5104 		}
5105 	      prev_implicit_nops = 0;
5106 	    }
5107 	}
5108 
5109       /* Examine how many cycles the current insn takes, and adjust
5110 	 LAST_CALL, EARLIEST_BB_END and PREV_IMPLICIT_NOPS.  */
5111       if (recog_memoized (insn) >= 0
5112 	  /* If not scheduling, we've emitted NOPs after calls already.  */
5113 	  && (c6x_flag_schedule_insns2 || !returning_call_p (insn)))
5114 	{
5115 	  max_cycles = get_attr_cycles (insn);
5116 	  if (get_attr_type (insn) == TYPE_CALLP)
5117 	    prev_implicit_nops = 5;
5118 	}
5119       else
5120 	max_cycles = 1;
5121       if (returning_call_p (insn))
5122 	last_call = insn;
5123 
5124       if (c6x_flag_schedule_insns2)
5125 	{
5126 	  gcc_assert (this_clock >= 0);
5127 	  if (earliest_bb_end < this_clock + max_cycles)
5128 	    earliest_bb_end = this_clock + max_cycles;
5129 	}
5130       else if (max_cycles > 1)
5131 	emit_nop_after (max_cycles - 1, insn);
5132 
5133       prev = insn;
5134       first = false;
5135 
5136     next_insn:
5137       if (c6x_flag_schedule_insns2
5138 	  && (next == NULL_RTX
5139 	      || (GET_MODE (next) == TImode
5140 		  && INSN_INFO_ENTRY (INSN_UID (next)).ebb_start))
5141 	  && earliest_bb_end > 0)
5142 	{
5143 	  int cycles = earliest_bb_end - prev_clock;
5144 	  if (cycles > 1)
5145 	    {
5146 	      prev = emit_nop_after (cycles - 1, prev);
5147 	      insn_set_clock (prev, prev_clock + prev_implicit_nops + 1);
5148 	    }
5149 	  earliest_bb_end = 0;
5150 	  prev_clock = -1;
5151 	  first = true;
5152 
5153 	  if (last_call)
5154 	    emit_label_after (call_labels[INSN_UID (last_call)], prev);
5155 	  last_call = NULL_RTX;
5156 	}
5157       insn = next;
5158     }
5159 }
5160 
5161 /* If possible, split INSN, which we know is either a jump or a call, into a real
5162    insn and its shadow.  */
5163 static void
split_delayed_branch(rtx_insn * insn)5164 split_delayed_branch (rtx_insn *insn)
5165 {
5166   int code = recog_memoized (insn);
5167   rtx_insn *i1;
5168   rtx newpat;
5169   rtx pat = PATTERN (insn);
5170 
5171   if (GET_CODE (pat) == COND_EXEC)
5172     pat = COND_EXEC_CODE (pat);
5173 
5174   if (CALL_P (insn))
5175     {
5176       rtx src = pat, dest = NULL_RTX;
5177       rtx callee;
5178       if (GET_CODE (pat) == SET)
5179 	{
5180 	  dest = SET_DEST (pat);
5181 	  src = SET_SRC (pat);
5182 	}
5183       callee = XEXP (XEXP (src, 0), 0);
5184       if (SIBLING_CALL_P (insn))
5185 	{
5186 	  if (REG_P (callee))
5187 	    newpat = gen_indirect_sibcall_shadow ();
5188 	  else
5189 	    newpat = gen_sibcall_shadow (callee);
5190 	  pat = gen_real_jump (callee);
5191 	}
5192       else if (dest != NULL_RTX)
5193 	{
5194 	  if (REG_P (callee))
5195 	    newpat = gen_indirect_call_value_shadow (dest);
5196 	  else
5197 	    newpat = gen_call_value_shadow (dest, callee);
5198 	  pat = gen_real_call (callee);
5199 	}
5200       else
5201 	{
5202 	  if (REG_P (callee))
5203 	    newpat = gen_indirect_call_shadow ();
5204 	  else
5205 	    newpat = gen_call_shadow (callee);
5206 	  pat = gen_real_call (callee);
5207 	}
5208       pat = duplicate_cond (pat, insn);
5209       newpat = duplicate_cond (newpat, insn);
5210     }
5211   else
5212     {
5213       rtx src, op;
5214       if (GET_CODE (pat) == PARALLEL
5215 	  && GET_CODE (XVECEXP (pat, 0, 0)) == RETURN)
5216 	{
5217 	  newpat = gen_return_shadow ();
5218 	  pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0));
5219 	  newpat = duplicate_cond (newpat, insn);
5220 	}
5221       else
5222 	switch (code)
5223 	  {
5224 	  case CODE_FOR_br_true:
5225 	  case CODE_FOR_br_false:
5226 	    src = SET_SRC (pat);
5227 	    op = XEXP (src, code == CODE_FOR_br_true ? 1 : 2);
5228 	    newpat = gen_condjump_shadow (op);
5229 	    pat = gen_real_jump (op);
5230 	    if (code == CODE_FOR_br_true)
5231 	      pat = gen_rtx_COND_EXEC (VOIDmode, XEXP (src, 0), pat);
5232 	    else
5233 	      pat = gen_rtx_COND_EXEC (VOIDmode,
5234 				       reversed_comparison (XEXP (src, 0),
5235 							    VOIDmode),
5236 				       pat);
5237 	    break;
5238 
5239 	  case CODE_FOR_jump:
5240 	    op = SET_SRC (pat);
5241 	    newpat = gen_jump_shadow (op);
5242 	    break;
5243 
5244 	  case CODE_FOR_indirect_jump:
5245 	    newpat = gen_indirect_jump_shadow ();
5246 	    break;
5247 
5248 	  case CODE_FOR_return_internal:
5249 	    newpat = gen_return_shadow ();
5250 	    pat = gen_real_ret (XEXP (XVECEXP (pat, 0, 1), 0));
5251 	    break;
5252 
5253 	  default:
5254 	    return;
5255 	  }
5256     }
5257   i1 = emit_insn_before (pat, insn);
5258   PATTERN (insn) = newpat;
5259   INSN_CODE (insn) = -1;
5260   record_delay_slot_pair (i1, insn, 5, 0);
5261 }
5262 
5263 /* If INSN is a multi-cycle insn that should be handled properly in
5264    modulo-scheduling, split it into a real insn and a shadow.
5265    Return true if we made a change.
5266 
5267    It is valid for us to fail to split an insn; the caller has to deal
5268    with the possibility.  Currently we handle loads and most mpy2 and
5269    mpy4 insns.  */
5270 static bool
split_delayed_nonbranch(rtx_insn * insn)5271 split_delayed_nonbranch (rtx_insn *insn)
5272 {
5273   int code = recog_memoized (insn);
5274   enum attr_type type;
5275   rtx_insn *i1;
5276   rtx newpat, src, dest;
5277   rtx pat = PATTERN (insn);
5278   rtvec rtv;
5279   int delay;
5280 
5281   if (GET_CODE (pat) == COND_EXEC)
5282     pat = COND_EXEC_CODE (pat);
5283 
5284   if (code < 0 || GET_CODE (pat) != SET)
5285     return false;
5286   src = SET_SRC (pat);
5287   dest = SET_DEST (pat);
5288   if (!REG_P (dest))
5289     return false;
5290 
5291   type = get_attr_type (insn);
5292   if (code >= 0
5293       && (type == TYPE_LOAD
5294 	  || type == TYPE_LOADN))
5295     {
5296       if (!MEM_P (src)
5297 	  && (GET_CODE (src) != ZERO_EXTEND
5298 	      || !MEM_P (XEXP (src, 0))))
5299 	return false;
5300 
5301       if (GET_MODE_SIZE (GET_MODE (dest)) > 4
5302 	  && (GET_MODE_SIZE (GET_MODE (dest)) != 8 || !TARGET_LDDW))
5303 	return false;
5304 
5305       rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))),
5306 		       SET_SRC (pat));
5307       newpat = gen_load_shadow (SET_DEST (pat));
5308       pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_LOAD);
5309       delay = 4;
5310     }
5311   else if (code >= 0
5312 	   && (type == TYPE_MPY2
5313 	       || type == TYPE_MPY4))
5314     {
5315       /* We don't handle floating point multiplies yet.  */
5316       if (GET_MODE (dest) == SFmode)
5317 	return false;
5318 
5319       rtv = gen_rtvec (2, GEN_INT (REGNO (SET_DEST (pat))),
5320 		       SET_SRC (pat));
5321       newpat = gen_mult_shadow (SET_DEST (pat));
5322       pat = gen_rtx_UNSPEC (VOIDmode, rtv, UNSPEC_REAL_MULT);
5323       delay = type == TYPE_MPY2 ? 1 : 3;
5324     }
5325   else
5326     return false;
5327 
5328   pat = duplicate_cond (pat, insn);
5329   newpat = duplicate_cond (newpat, insn);
5330   i1 = emit_insn_before (pat, insn);
5331   PATTERN (insn) = newpat;
5332   INSN_CODE (insn) = -1;
5333   recog_memoized (insn);
5334   recog_memoized (i1);
5335   record_delay_slot_pair (i1, insn, delay, 0);
5336   return true;
5337 }
5338 
5339 /* Examine if INSN is the result of splitting a load into a real load and a
5340    shadow, and if so, undo the transformation.  */
5341 static void
undo_split_delayed_nonbranch(rtx_insn * insn)5342 undo_split_delayed_nonbranch (rtx_insn *insn)
5343 {
5344   int icode = recog_memoized (insn);
5345   enum attr_type type;
5346   rtx prev_pat, insn_pat;
5347   rtx_insn *prev;
5348 
5349   if (icode < 0)
5350     return;
5351   type = get_attr_type (insn);
5352   if (type != TYPE_LOAD_SHADOW && type != TYPE_MULT_SHADOW)
5353     return;
5354   prev = PREV_INSN (insn);
5355   prev_pat = PATTERN (prev);
5356   insn_pat = PATTERN (insn);
5357   if (GET_CODE (prev_pat) == COND_EXEC)
5358     {
5359       prev_pat = COND_EXEC_CODE (prev_pat);
5360       insn_pat = COND_EXEC_CODE (insn_pat);
5361     }
5362 
5363   gcc_assert (GET_CODE (prev_pat) == UNSPEC
5364 	      && ((XINT (prev_pat, 1) == UNSPEC_REAL_LOAD
5365 		   && type == TYPE_LOAD_SHADOW)
5366 		  || (XINT (prev_pat, 1) == UNSPEC_REAL_MULT
5367 		      && type == TYPE_MULT_SHADOW)));
5368   insn_pat = gen_rtx_SET (SET_DEST (insn_pat),
5369 			  XVECEXP (prev_pat, 0, 1));
5370   insn_pat = duplicate_cond (insn_pat, prev);
5371   PATTERN (insn) = insn_pat;
5372   INSN_CODE (insn) = -1;
5373   delete_insn (prev);
5374 }
5375 
5376 /* Split every insn (i.e. jumps and calls) which can have delay slots into
5377    two parts: the first one is scheduled normally and emits the instruction,
5378    while the second one is a shadow insn which shows the side effect taking
5379    place. The second one is placed in the right cycle by the scheduler, but
5380    not emitted as an assembly instruction.  */
5381 
5382 static void
split_delayed_insns(void)5383 split_delayed_insns (void)
5384 {
5385   rtx_insn *insn;
5386   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5387     {
5388       if (JUMP_P (insn) || CALL_P (insn))
5389 	split_delayed_branch (insn);
5390     }
5391 }
5392 
5393 /* For every insn that has an entry in the new_conditions vector, give it
5394    the appropriate predicate.  */
5395 static void
conditionalize_after_sched(void)5396 conditionalize_after_sched (void)
5397 {
5398   basic_block bb;
5399   rtx_insn *insn;
5400   FOR_EACH_BB_FN (bb, cfun)
5401     FOR_BB_INSNS (bb, insn)
5402       {
5403 	unsigned uid = INSN_UID (insn);
5404 	rtx cond;
5405 	if (!NONDEBUG_INSN_P (insn) || uid >= INSN_INFO_LENGTH)
5406 	  continue;
5407 	cond = INSN_INFO_ENTRY (uid).new_cond;
5408 	if (cond == NULL_RTX)
5409 	  continue;
5410 	if (dump_file)
5411 	  fprintf (dump_file, "Conditionalizing insn %d\n", uid);
5412 	predicate_insn (insn, cond, true);
5413       }
5414 }
5415 
5416 /* A callback for the hw-doloop pass.  This function examines INSN; if
5417    it is a loop_end pattern we recognize, return the reg rtx for the
5418    loop counter.  Otherwise, return NULL_RTX.  */
5419 
5420 static rtx
hwloop_pattern_reg(rtx_insn * insn)5421 hwloop_pattern_reg (rtx_insn *insn)
5422 {
5423   rtx pat, reg;
5424 
5425   if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end)
5426     return NULL_RTX;
5427 
5428   pat = PATTERN (insn);
5429   reg = SET_DEST (XVECEXP (pat, 0, 1));
5430   if (!REG_P (reg))
5431     return NULL_RTX;
5432   return reg;
5433 }
5434 
5435 /* Return the number of cycles taken by BB, as computed by scheduling,
5436    including the latencies of all insns with delay slots.  IGNORE is
5437    an insn we should ignore in the calculation, usually the final
5438    branch.  */
5439 static int
bb_earliest_end_cycle(basic_block bb,rtx ignore)5440 bb_earliest_end_cycle (basic_block bb, rtx ignore)
5441 {
5442   int earliest = 0;
5443   rtx_insn *insn;
5444 
5445   FOR_BB_INSNS (bb, insn)
5446     {
5447       int cycles, this_clock;
5448 
5449       if (LABEL_P (insn) || NOTE_P (insn) || DEBUG_INSN_P (insn)
5450 	  || GET_CODE (PATTERN (insn)) == USE
5451 	  || GET_CODE (PATTERN (insn)) == CLOBBER
5452 	  || insn == ignore)
5453 	continue;
5454 
5455       this_clock = insn_get_clock (insn);
5456       cycles = get_attr_cycles (insn);
5457 
5458       if (earliest < this_clock + cycles)
5459 	earliest = this_clock + cycles;
5460     }
5461   return earliest;
5462 }
5463 
5464 /* Examine the insns in BB and remove all which have a uid greater or
5465    equal to MAX_UID.  */
5466 static void
filter_insns_above(basic_block bb,int max_uid)5467 filter_insns_above (basic_block bb, int max_uid)
5468 {
5469   rtx_insn *insn, *next;
5470   bool prev_ti = false;
5471   int prev_cycle = -1;
5472 
5473   FOR_BB_INSNS_SAFE (bb, insn, next)
5474     {
5475       int this_cycle;
5476       if (!NONDEBUG_INSN_P (insn))
5477 	continue;
5478       if (insn == BB_END (bb))
5479 	return;
5480       this_cycle = insn_get_clock (insn);
5481       if (prev_ti && this_cycle == prev_cycle)
5482 	{
5483 	  gcc_assert (GET_MODE (insn) != TImode);
5484 	  PUT_MODE (insn, TImode);
5485 	}
5486       prev_ti = false;
5487       if (INSN_UID (insn) >= max_uid)
5488 	{
5489 	  if (GET_MODE (insn) == TImode)
5490 	    {
5491 	      prev_ti = true;
5492 	      prev_cycle = this_cycle;
5493 	    }
5494 	  delete_insn (insn);
5495 	}
5496     }
5497 }
5498 
5499 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
5500 
5501 static void
c6x_asm_emit_except_personality(rtx personality)5502 c6x_asm_emit_except_personality (rtx personality)
5503 {
5504   fputs ("\t.personality\t", asm_out_file);
5505   output_addr_const (asm_out_file, personality);
5506   fputc ('\n', asm_out_file);
5507 }
5508 
5509 /* Use a special assembly directive rather than a regular setion for
5510    unwind table data.  */
5511 
5512 static void
c6x_asm_init_sections(void)5513 c6x_asm_init_sections (void)
5514 {
5515   exception_section = get_unnamed_section (0, output_section_asm_op,
5516 					   "\t.handlerdata");
5517 }
5518 
5519 /* A callback for the hw-doloop pass.  Called to optimize LOOP in a
5520    machine-specific fashion; returns true if successful and false if
5521    the hwloop_fail function should be called.  */
5522 
5523 static bool
hwloop_optimize(hwloop_info loop)5524 hwloop_optimize (hwloop_info loop)
5525 {
5526   basic_block entry_bb, bb;
5527   rtx_insn *seq, *insn, *prev, *entry_after, *end_packet;
5528   rtx_insn *head_insn, *tail_insn, *new_insns, *last_insn;
5529   int loop_earliest;
5530   int n_execute_packets;
5531   edge entry_edge;
5532   unsigned ix;
5533   int max_uid_before, delayed_splits;
5534   int i, sp_ii, min_ii, max_ii, max_parallel, n_insns, n_real_insns, stages;
5535   rtx_insn **orig_vec;
5536   rtx_insn **copies;
5537   rtx_insn ***insn_copies;
5538 
5539   if (!c6x_flag_modulo_sched || !c6x_flag_schedule_insns2
5540       || !TARGET_INSNS_64PLUS)
5541     return false;
5542 
5543   if (loop->iter_reg_used || loop->depth > 1)
5544     return false;
5545   if (loop->has_call || loop->has_asm)
5546     return false;
5547 
5548   if (loop->head != loop->tail)
5549     return false;
5550 
5551   gcc_assert (loop->incoming_dest == loop->head);
5552 
5553   entry_edge = NULL;
5554   FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge)
5555     if (entry_edge->flags & EDGE_FALLTHRU)
5556       break;
5557   if (entry_edge == NULL)
5558     return false;
5559 
5560   reshuffle_units (loop->head);
5561 
5562   in_hwloop = true;
5563   schedule_ebbs_init ();
5564   schedule_ebb (BB_HEAD (loop->tail), loop->loop_end, true);
5565   schedule_ebbs_finish ();
5566   in_hwloop = false;
5567 
5568   bb = loop->head;
5569   loop_earliest = bb_earliest_end_cycle (bb, loop->loop_end) + 1;
5570 
5571   max_uid_before = get_max_uid ();
5572 
5573   /* Split all multi-cycle operations, such as loads.  For normal
5574      scheduling, we only do this for branches, as the generated code
5575      would otherwise not be interrupt-safe.  When using sploop, it is
5576      safe and beneficial to split them.  If any multi-cycle operations
5577      remain after splitting (because we don't handle them yet), we
5578      cannot pipeline the loop.  */
5579   delayed_splits = 0;
5580   FOR_BB_INSNS (bb, insn)
5581     {
5582       if (NONDEBUG_INSN_P (insn))
5583 	{
5584 	  recog_memoized (insn);
5585 	  if (split_delayed_nonbranch (insn))
5586 	    delayed_splits++;
5587 	  else if (INSN_CODE (insn) >= 0
5588 		   && get_attr_cycles (insn) > 1)
5589 	    goto undo_splits;
5590 	}
5591     }
5592 
5593   /* Count the number of insns as well as the number real insns, and save
5594      the original sequence of insns in case we must restore it later.  */
5595   n_insns = n_real_insns = 0;
5596   FOR_BB_INSNS (bb, insn)
5597     {
5598       n_insns++;
5599       if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end)
5600 	n_real_insns++;
5601     }
5602   orig_vec = XNEWVEC (rtx_insn *, n_insns);
5603   n_insns = 0;
5604   FOR_BB_INSNS (bb, insn)
5605     orig_vec[n_insns++] = insn;
5606 
5607   /* Count the unit reservations, and compute a minimum II from that
5608      table.  */
5609   count_unit_reqs (unit_reqs, loop->start_label,
5610 		   PREV_INSN (loop->loop_end));
5611   merge_unit_reqs (unit_reqs);
5612 
5613   min_ii = res_mii (unit_reqs);
5614   max_ii = loop_earliest < 15 ? loop_earliest : 14;
5615 
5616   /* Make copies of the loop body, up to a maximum number of stages we want
5617      to handle.  */
5618   max_parallel = loop_earliest / min_ii + 1;
5619 
5620   copies = XCNEWVEC (rtx_insn *, (max_parallel + 1) * n_real_insns);
5621   insn_copies = XNEWVEC (rtx_insn **, max_parallel + 1);
5622   for (i = 0; i < max_parallel + 1; i++)
5623     insn_copies[i] = copies + i * n_real_insns;
5624 
5625   head_insn = next_nonnote_nondebug_insn (loop->start_label);
5626   tail_insn = prev_real_insn (BB_END (bb));
5627 
5628   i = 0;
5629   FOR_BB_INSNS (bb, insn)
5630     if (NONDEBUG_INSN_P (insn) && insn != loop->loop_end)
5631       insn_copies[0][i++] = insn;
5632 
5633   sploop_max_uid_iter0 = get_max_uid ();
5634 
5635   /* Generate the copies of the loop body, and save them in the
5636      INSN_COPIES array.  */
5637   start_sequence ();
5638   for (i = 0; i < max_parallel; i++)
5639     {
5640       int j;
5641       rtx_insn *this_iter;
5642 
5643       this_iter = duplicate_insn_chain (head_insn, tail_insn);
5644       j = 0;
5645       while (this_iter)
5646 	{
5647 	  rtx_insn *prev_stage_insn = insn_copies[i][j];
5648 	  gcc_assert (INSN_CODE (this_iter) == INSN_CODE (prev_stage_insn));
5649 
5650 	  if (INSN_CODE (this_iter) >= 0
5651 	      && (get_attr_type (this_iter) == TYPE_LOAD_SHADOW
5652 		  || get_attr_type (this_iter) == TYPE_MULT_SHADOW))
5653 	    {
5654 	      rtx_insn *prev = PREV_INSN (this_iter);
5655 	      record_delay_slot_pair (prev, this_iter,
5656 				      get_attr_cycles (prev) - 1, 0);
5657 	    }
5658 	  else
5659 	    record_delay_slot_pair (prev_stage_insn, this_iter, i, 1);
5660 
5661 	  insn_copies[i + 1][j] = this_iter;
5662 	  j++;
5663 	  this_iter = next_nonnote_nondebug_insn (this_iter);
5664 	}
5665     }
5666   new_insns = get_insns ();
5667   last_insn = insn_copies[max_parallel][n_real_insns - 1];
5668   end_sequence ();
5669   emit_insn_before (new_insns, BB_END (bb));
5670 
5671   /* Try to schedule the loop using varying initiation intervals,
5672      starting with the smallest possible and incrementing it
5673      on failure.  */
5674   for (sp_ii = min_ii; sp_ii <= max_ii; sp_ii++)
5675     {
5676       basic_block tmp_bb;
5677       if (dump_file)
5678 	fprintf (dump_file, "Trying to schedule for II %d\n", sp_ii);
5679 
5680       df_clear_flags (DF_LR_RUN_DCE);
5681 
5682       schedule_ebbs_init ();
5683       set_modulo_params (sp_ii, max_parallel, n_real_insns,
5684 			 sploop_max_uid_iter0);
5685       tmp_bb = schedule_ebb (BB_HEAD (bb), last_insn, true);
5686       schedule_ebbs_finish ();
5687 
5688       if (tmp_bb)
5689 	{
5690 	  if (dump_file)
5691 	    fprintf (dump_file, "Found schedule with II %d\n", sp_ii);
5692 	  break;
5693 	}
5694     }
5695 
5696   discard_delay_pairs_above (max_uid_before);
5697 
5698   if (sp_ii > max_ii)
5699     goto restore_loop;
5700 
5701   stages = insn_get_clock (ss.last_scheduled_iter0) / sp_ii + 1;
5702 
5703   if (stages == 1 && sp_ii > 5)
5704     goto restore_loop;
5705 
5706   /* At this point, we know we've been successful, unless we find later that
5707      there are too many execute packets for the loop buffer to hold.  */
5708 
5709   /* Assign reservations to the instructions in the loop.  We must find
5710      the stage that contains the full loop kernel, and transfer the
5711      reservations of the instructions contained in it to the corresponding
5712      instructions from iteration 0, which are the only ones we'll keep.  */
5713   assign_reservations (BB_HEAD (bb), ss.last_scheduled_insn);
5714   SET_PREV_INSN (BB_END (bb)) = ss.last_scheduled_iter0;
5715   SET_NEXT_INSN (ss.last_scheduled_iter0) = BB_END (bb);
5716   filter_insns_above (bb, sploop_max_uid_iter0);
5717 
5718   for (i = 0; i < n_real_insns; i++)
5719     {
5720       rtx insn = insn_copies[0][i];
5721       int uid = INSN_UID (insn);
5722       int stage = insn_uid_get_clock (uid) / sp_ii;
5723 
5724       if (stage + 1 < stages)
5725 	{
5726 	  int copy_uid;
5727 	  stage = stages - stage - 1;
5728 	  copy_uid = INSN_UID (insn_copies[stage][i]);
5729 	  INSN_INFO_ENTRY (uid).reservation
5730 	    = INSN_INFO_ENTRY (copy_uid).reservation;
5731 	}
5732     }
5733   if (stages == 1)
5734     stages++;
5735 
5736   /* Compute the number of execute packets the pipelined form of the loop will
5737      require.  */
5738   prev = NULL;
5739   n_execute_packets = 0;
5740   for (insn = loop->start_label;
5741        insn != loop->loop_end;
5742        insn = NEXT_INSN (insn))
5743     {
5744       if (NONDEBUG_INSN_P (insn) && GET_MODE (insn) == TImode
5745 	  && !shadow_p (insn))
5746 	{
5747 	  n_execute_packets++;
5748 	  if (prev && insn_get_clock (prev) + 1 != insn_get_clock (insn))
5749 	    /* We need an extra NOP instruction.  */
5750 	    n_execute_packets++;
5751 
5752 	  prev = insn;
5753 	}
5754     }
5755 
5756   end_packet = ss.last_scheduled_iter0;
5757   while (!NONDEBUG_INSN_P (end_packet) || GET_MODE (end_packet) != TImode)
5758     end_packet = PREV_INSN (end_packet);
5759 
5760   /* The earliest cycle in which we can emit the SPKERNEL instruction.  */
5761   loop_earliest = (stages - 1) * sp_ii;
5762   if (loop_earliest > insn_get_clock (end_packet))
5763     {
5764       n_execute_packets++;
5765       end_packet = loop->loop_end;
5766     }
5767   else
5768     loop_earliest = insn_get_clock (end_packet);
5769 
5770   if (n_execute_packets > 14)
5771     goto restore_loop;
5772 
5773   /* Generate the spkernel instruction, and place it at the appropriate
5774      spot.  */
5775   PUT_MODE (end_packet, VOIDmode);
5776 
5777   insn = emit_jump_insn_before (
5778 	   gen_spkernel (GEN_INT (stages - 1),
5779 			 const0_rtx, JUMP_LABEL (loop->loop_end)),
5780 	   end_packet);
5781   JUMP_LABEL (insn) = JUMP_LABEL (loop->loop_end);
5782   insn_set_clock (insn, loop_earliest);
5783   PUT_MODE (insn, TImode);
5784   INSN_INFO_ENTRY (INSN_UID (insn)).ebb_start = false;
5785   delete_insn (loop->loop_end);
5786 
5787   /* Place the mvc and sploop instructions before the loop.  */
5788   entry_bb = entry_edge->src;
5789 
5790   start_sequence ();
5791 
5792   insn = emit_insn (gen_mvilc (loop->iter_reg));
5793   insn = emit_insn (gen_sploop (GEN_INT (sp_ii)));
5794 
5795   seq = get_insns ();
5796 
5797   if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1)
5798     {
5799       basic_block new_bb;
5800       edge e;
5801       edge_iterator ei;
5802 
5803       emit_insn_before (seq, BB_HEAD (loop->head));
5804       seq = emit_label_before (gen_label_rtx (), seq);
5805 
5806       new_bb = create_basic_block (seq, insn, entry_bb);
5807       FOR_EACH_EDGE (e, ei, loop->incoming)
5808 	{
5809 	  if (!(e->flags & EDGE_FALLTHRU))
5810 	    redirect_edge_and_branch_force (e, new_bb);
5811 	  else
5812 	    redirect_edge_succ (e, new_bb);
5813 	}
5814       make_edge (new_bb, loop->head, 0);
5815     }
5816   else
5817     {
5818       entry_after = BB_END (entry_bb);
5819       while (DEBUG_INSN_P (entry_after)
5820 	     || (NOTE_P (entry_after)
5821 		 && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK))
5822 	entry_after = PREV_INSN (entry_after);
5823       emit_insn_after (seq, entry_after);
5824     }
5825 
5826   end_sequence ();
5827 
5828   /* Make sure we don't try to schedule this loop again.  */
5829   for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
5830     bb->flags |= BB_DISABLE_SCHEDULE;
5831 
5832   return true;
5833 
5834  restore_loop:
5835   if (dump_file)
5836     fprintf (dump_file, "Unable to pipeline loop.\n");
5837 
5838   for (i = 1; i < n_insns; i++)
5839     {
5840       SET_NEXT_INSN (orig_vec[i - 1]) = orig_vec[i];
5841       SET_PREV_INSN (orig_vec[i]) = orig_vec[i - 1];
5842     }
5843   SET_PREV_INSN (orig_vec[0]) = PREV_INSN (BB_HEAD (bb));
5844   SET_NEXT_INSN (PREV_INSN (BB_HEAD (bb))) = orig_vec[0];
5845   SET_NEXT_INSN (orig_vec[n_insns - 1]) = NEXT_INSN (BB_END (bb));
5846   SET_PREV_INSN (NEXT_INSN (BB_END (bb))) = orig_vec[n_insns - 1];
5847   BB_HEAD (bb) = orig_vec[0];
5848   BB_END (bb) = orig_vec[n_insns - 1];
5849  undo_splits:
5850   free_delay_pairs ();
5851   FOR_BB_INSNS (bb, insn)
5852     if (NONDEBUG_INSN_P (insn))
5853       undo_split_delayed_nonbranch (insn);
5854   return false;
5855 }
5856 
5857 /* A callback for the hw-doloop pass.  Called when a loop we have discovered
5858    turns out not to be optimizable; we have to split the doloop_end pattern
5859    into a subtract and a test.  */
5860 static void
hwloop_fail(hwloop_info loop)5861 hwloop_fail (hwloop_info loop)
5862 {
5863   rtx insn, test, testreg;
5864 
5865   if (dump_file)
5866     fprintf (dump_file, "splitting doloop insn %d\n",
5867 	     INSN_UID (loop->loop_end));
5868   insn = gen_addsi3 (loop->iter_reg, loop->iter_reg, constm1_rtx);
5869   /* See if we can emit the add at the head of the loop rather than at the
5870      end.  */
5871   if (loop->head == NULL
5872       || loop->iter_reg_used_outside
5873       || loop->iter_reg_used
5874       || TEST_HARD_REG_BIT (loop->regs_set_in_loop, REGNO (loop->iter_reg))
5875       || loop->incoming_dest != loop->head
5876       || EDGE_COUNT (loop->head->preds) != 2)
5877     emit_insn_before (insn, loop->loop_end);
5878   else
5879     {
5880       rtx_insn *t = loop->start_label;
5881       while (!NOTE_P (t) || NOTE_KIND (t) != NOTE_INSN_BASIC_BLOCK)
5882 	t = NEXT_INSN (t);
5883       emit_insn_after (insn, t);
5884     }
5885 
5886   testreg = SET_DEST (XVECEXP (PATTERN (loop->loop_end), 0, 2));
5887   if (GET_CODE (testreg) == SCRATCH)
5888     testreg = loop->iter_reg;
5889   else
5890     emit_insn_before (gen_movsi (testreg, loop->iter_reg), loop->loop_end);
5891 
5892   test = gen_rtx_NE (VOIDmode, testreg, const0_rtx);
5893   insn = emit_jump_insn_before (gen_cbranchsi4 (test, testreg, const0_rtx,
5894 						loop->start_label),
5895 				loop->loop_end);
5896 
5897   JUMP_LABEL (insn) = loop->start_label;
5898   LABEL_NUSES (loop->start_label)++;
5899   delete_insn (loop->loop_end);
5900 }
5901 
5902 static struct hw_doloop_hooks c6x_doloop_hooks =
5903 {
5904   hwloop_pattern_reg,
5905   hwloop_optimize,
5906   hwloop_fail
5907 };
5908 
5909 /* Run the hw-doloop pass to modulo-schedule hardware loops, or split the
5910    doloop_end patterns where such optimizations are impossible.  */
5911 static void
c6x_hwloops(void)5912 c6x_hwloops (void)
5913 {
5914   if (optimize)
5915     reorg_loops (true, &c6x_doloop_hooks);
5916 }
5917 
5918 /* Implement the TARGET_MACHINE_DEPENDENT_REORG pass.  We split call insns here
5919    into a sequence that loads the return register and performs the call,
5920    and emit the return label.
5921    If scheduling after reload is requested, it happens here.  */
5922 
5923 static void
c6x_reorg(void)5924 c6x_reorg (void)
5925 {
5926   basic_block bb;
5927   rtx *call_labels;
5928   bool do_selsched = (c6x_flag_schedule_insns2 && flag_selective_scheduling2
5929 		      && !maybe_skip_selective_scheduling ());
5930 
5931   /* We are freeing block_for_insn in the toplev to keep compatibility
5932      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
5933   compute_bb_for_insn ();
5934 
5935   df_clear_flags (DF_LR_RUN_DCE);
5936   df_note_add_problem ();
5937 
5938   /* If optimizing, we'll have split before scheduling.  */
5939   if (optimize == 0)
5940     split_all_insns ();
5941 
5942   df_analyze ();
5943 
5944   if (c6x_flag_schedule_insns2)
5945     {
5946       int sz = get_max_uid () * 3 / 2 + 1;
5947 
5948       insn_info.create (sz);
5949     }
5950 
5951   /* Make sure the real-jump insns we create are not deleted.  When modulo-
5952      scheduling, situations where a reg is only stored in a loop can also
5953      cause dead code when doing the initial unrolling.  */
5954   sched_no_dce = true;
5955 
5956   c6x_hwloops ();
5957 
5958   if (c6x_flag_schedule_insns2)
5959     {
5960       split_delayed_insns ();
5961       timevar_push (TV_SCHED2);
5962       if (do_selsched)
5963 	run_selective_scheduling ();
5964       else
5965 	schedule_ebbs ();
5966       conditionalize_after_sched ();
5967       timevar_pop (TV_SCHED2);
5968 
5969       free_delay_pairs ();
5970     }
5971   sched_no_dce = false;
5972 
5973   call_labels = XCNEWVEC (rtx, get_max_uid () + 1);
5974 
5975   reorg_split_calls (call_labels);
5976 
5977   if (c6x_flag_schedule_insns2)
5978     {
5979       FOR_EACH_BB_FN (bb, cfun)
5980 	if ((bb->flags & BB_DISABLE_SCHEDULE) == 0)
5981 	  assign_reservations (BB_HEAD (bb), BB_END (bb));
5982     }
5983 
5984   if (c6x_flag_var_tracking)
5985     {
5986       timevar_push (TV_VAR_TRACKING);
5987       variable_tracking_main ();
5988       timevar_pop (TV_VAR_TRACKING);
5989     }
5990 
5991   reorg_emit_nops (call_labels);
5992 
5993   /* Post-process the schedule to move parallel insns into SEQUENCEs.  */
5994   if (c6x_flag_schedule_insns2)
5995     {
5996       free_delay_pairs ();
5997       c6x_gen_bundles ();
5998     }
5999 
6000   df_finish_pass (false);
6001 }
6002 
6003 /* Called when a function has been assembled.  It should perform all the
6004    tasks of ASM_DECLARE_FUNCTION_SIZE in elfos.h, plus target-specific
6005    tasks.
6006    We free the reservation (and other scheduling) information here now that
6007    all insns have been output.  */
6008 void
c6x_function_end(FILE * file,const char * fname)6009 c6x_function_end (FILE *file, const char *fname)
6010 {
6011   c6x_output_fn_unwind (file);
6012 
6013   insn_info.release ();
6014 
6015   if (!flag_inhibit_size_directive)
6016     ASM_OUTPUT_MEASURED_SIZE (file, fname);
6017 }
6018 
6019 /* Determine whether X is a shift with code CODE and an integer amount
6020    AMOUNT.  */
6021 static bool
shift_p(rtx x,enum rtx_code code,int amount)6022 shift_p (rtx x, enum rtx_code code, int amount)
6023 {
6024   return (GET_CODE (x) == code && GET_CODE (XEXP (x, 1)) == CONST_INT
6025 	  && INTVAL (XEXP (x, 1)) == amount);
6026 }
6027 
6028 /* Compute a (partial) cost for rtx X.  Return true if the complete
6029    cost has been computed, and false if subexpressions should be
6030    scanned.  In either case, *TOTAL contains the cost result.  */
6031 
6032 static bool
c6x_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno,int * total,bool speed)6033 c6x_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total,
6034 	       bool speed)
6035 {
6036   int cost2 = COSTS_N_INSNS (1);
6037   rtx op0, op1;
6038   int code = GET_CODE (x);
6039 
6040   switch (code)
6041     {
6042     case CONST_INT:
6043       if (outer_code == SET || outer_code == PLUS)
6044         *total = satisfies_constraint_IsB (x) ? 0 : cost2;
6045       else if (outer_code == AND || outer_code == IOR || outer_code == XOR
6046 	       || outer_code == MINUS)
6047 	*total = satisfies_constraint_Is5 (x) ? 0 : cost2;
6048       else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE
6049 	       || GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE)
6050 	*total = satisfies_constraint_Iu4 (x) ? 0 : cost2;
6051       else if (outer_code == ASHIFT || outer_code == ASHIFTRT
6052 	       || outer_code == LSHIFTRT)
6053 	*total = satisfies_constraint_Iu5 (x) ? 0 : cost2;
6054       else
6055 	*total = cost2;
6056       return true;
6057 
6058     case CONST:
6059     case LABEL_REF:
6060     case SYMBOL_REF:
6061     case CONST_DOUBLE:
6062       *total = COSTS_N_INSNS (2);
6063       return true;
6064 
6065     case TRUNCATE:
6066       /* Recognize a mult_highpart operation.  */
6067       if ((mode == HImode || mode == SImode)
6068 	  && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6069 	  && GET_MODE (XEXP (x, 0)) == GET_MODE_2XWIDER_MODE (mode)
6070 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6071 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6072 	  && INTVAL (XEXP (XEXP (x, 0), 1)) == GET_MODE_BITSIZE (mode))
6073 	{
6074 	  rtx mul = XEXP (XEXP (x, 0), 0);
6075 	  rtx op0 = XEXP (mul, 0);
6076 	  rtx op1 = XEXP (mul, 1);
6077 	  enum rtx_code code0 = GET_CODE (op0);
6078 	  enum rtx_code code1 = GET_CODE (op1);
6079 
6080 	  if ((code0 == code1
6081 	       && (code0 == SIGN_EXTEND || code0 == ZERO_EXTEND))
6082 	      || (mode == HImode
6083 		  && code0 == ZERO_EXTEND && code1 == SIGN_EXTEND))
6084 	    {
6085 	      if (mode == HImode)
6086 		*total = COSTS_N_INSNS (2);
6087 	      else
6088 		*total = COSTS_N_INSNS (12);
6089 	      mode = GET_MODE (XEXP (op0, 0));
6090 	      *total += rtx_cost (XEXP (op0, 0), mode, code0, 0, speed);
6091 	      *total += rtx_cost (XEXP (op1, 0), mode, code1, 0, speed);
6092 	      return true;
6093 	    }
6094 	}
6095       return false;
6096 
6097     case ASHIFT:
6098     case ASHIFTRT:
6099     case LSHIFTRT:
6100       if (mode == DImode)
6101 	*total = COSTS_N_INSNS (CONSTANT_P (XEXP (x, 1)) ? 4 : 15);
6102       else
6103 	*total = COSTS_N_INSNS (1);
6104       return false;
6105 
6106     case PLUS:
6107     case MINUS:
6108       *total = COSTS_N_INSNS (1);
6109       op0 = code == PLUS ? XEXP (x, 0) : XEXP (x, 1);
6110       op1 = code == PLUS ? XEXP (x, 1) : XEXP (x, 0);
6111       if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6112 	  && INTEGRAL_MODE_P (mode)
6113 	  && GET_CODE (op0) == MULT
6114 	  && GET_CODE (XEXP (op0, 1)) == CONST_INT
6115 	  && (INTVAL (XEXP (op0, 1)) == 2
6116 	      || INTVAL (XEXP (op0, 1)) == 4
6117 	      || (code == PLUS && INTVAL (XEXP (op0, 1)) == 8)))
6118 	{
6119 	  *total += rtx_cost (XEXP (op0, 0), mode, ASHIFT, 0, speed);
6120 	  *total += rtx_cost (op1, mode, (enum rtx_code) code, 1, speed);
6121 	  return true;
6122 	}
6123       return false;
6124 
6125     case MULT:
6126       op0 = XEXP (x, 0);
6127       op1 = XEXP (x, 1);
6128       if (mode == DFmode)
6129 	{
6130 	  if (TARGET_FP)
6131 	    *total = COSTS_N_INSNS (speed ? 10 : 1);
6132 	  else
6133 	    *total = COSTS_N_INSNS (speed ? 200 : 4);
6134 	}
6135       else if (mode == SFmode)
6136 	{
6137 	  if (TARGET_FP)
6138 	    *total = COSTS_N_INSNS (speed ? 4 : 1);
6139 	  else
6140 	    *total = COSTS_N_INSNS (speed ? 100 : 4);
6141 	}
6142       else if (mode == DImode)
6143 	{
6144 	  if (TARGET_MPY32
6145 	      && GET_CODE (op0) == GET_CODE (op1)
6146 	      && (GET_CODE (op0) == ZERO_EXTEND
6147 		  || GET_CODE (op0) == SIGN_EXTEND))
6148 	    {
6149 	      *total = COSTS_N_INSNS (speed ? 2 : 1);
6150 	      op0 = XEXP (op0, 0);
6151 	      op1 = XEXP (op1, 0);
6152 	    }
6153 	  else
6154 	    /* Maybe improve this laster.  */
6155 	    *total = COSTS_N_INSNS (20);
6156 	}
6157       else if (mode == SImode)
6158 	{
6159 	  if (((GET_CODE (op0) == ZERO_EXTEND
6160 		|| GET_CODE (op0) == SIGN_EXTEND
6161 		|| shift_p (op0, LSHIFTRT, 16))
6162 	       && (GET_CODE (op1) == SIGN_EXTEND
6163 		   || GET_CODE (op1) == ZERO_EXTEND
6164 		   || scst5_operand (op1, SImode)
6165 		   || shift_p (op1, ASHIFTRT, 16)
6166 		   || shift_p (op1, LSHIFTRT, 16)))
6167 	      || (shift_p (op0, ASHIFTRT, 16)
6168 		  && (GET_CODE (op1) == SIGN_EXTEND
6169 		      || shift_p (op1, ASHIFTRT, 16))))
6170 	    {
6171 	      *total = COSTS_N_INSNS (speed ? 2 : 1);
6172 	      op0 = XEXP (op0, 0);
6173 	      if (scst5_operand (op1, SImode))
6174 		op1 = NULL_RTX;
6175 	      else
6176 		op1 = XEXP (op1, 0);
6177 	    }
6178 	  else if (!speed)
6179 	    *total = COSTS_N_INSNS (1);
6180 	  else if (TARGET_MPY32)
6181 	    *total = COSTS_N_INSNS (4);
6182 	  else
6183 	    *total = COSTS_N_INSNS (6);
6184 	}
6185       else if (mode == HImode)
6186 	*total = COSTS_N_INSNS (speed ? 2 : 1);
6187 
6188       if (GET_CODE (op0) != REG
6189 	  && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
6190 	*total += rtx_cost (op0, mode, MULT, 0, speed);
6191       if (op1 && GET_CODE (op1) != REG
6192 	  && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
6193 	*total += rtx_cost (op1, mode, MULT, 1, speed);
6194       return true;
6195 
6196     case UDIV:
6197     case DIV:
6198       /* This is a bit random; assuming on average there'll be 16 leading
6199 	 zeros.  FIXME: estimate better for constant dividends.  */
6200       *total = COSTS_N_INSNS (6 + 3 * 16);
6201       return false;
6202 
6203     case IF_THEN_ELSE:
6204       /* Recognize the cmp_and/ior patterns.  */
6205       op0 = XEXP (x, 0);
6206       if ((GET_CODE (op0) == EQ || GET_CODE (op0) == NE)
6207 	  && REG_P (XEXP (op0, 0))
6208 	  && XEXP (op0, 1) == const0_rtx
6209 	  && rtx_equal_p (XEXP (x, 1), XEXP (op0, 0)))
6210 	{
6211 	  *total = rtx_cost (XEXP (x, 1), VOIDmode, (enum rtx_code) outer_code,
6212 			     opno, speed);
6213 	  return false;
6214 	}
6215       return false;
6216 
6217     default:
6218       return false;
6219     }
6220 }
6221 
6222 /* Implements target hook vector_mode_supported_p.  */
6223 
6224 static bool
c6x_vector_mode_supported_p(machine_mode mode)6225 c6x_vector_mode_supported_p (machine_mode mode)
6226 {
6227   switch (mode)
6228     {
6229     case V2HImode:
6230     case V4QImode:
6231     case V2SImode:
6232     case V4HImode:
6233     case V8QImode:
6234       return true;
6235     default:
6236       return false;
6237     }
6238 }
6239 
6240 /* Implements TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
6241 static machine_mode
c6x_preferred_simd_mode(machine_mode mode)6242 c6x_preferred_simd_mode (machine_mode mode)
6243 {
6244   switch (mode)
6245     {
6246     case HImode:
6247       return V2HImode;
6248     case QImode:
6249       return V4QImode;
6250 
6251     default:
6252       return word_mode;
6253     }
6254 }
6255 
6256 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.  */
6257 
6258 static bool
c6x_scalar_mode_supported_p(machine_mode mode)6259 c6x_scalar_mode_supported_p (machine_mode mode)
6260 {
6261   if (ALL_FIXED_POINT_MODE_P (mode)
6262       && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD)
6263     return true;
6264 
6265   return default_scalar_mode_supported_p (mode);
6266 }
6267 
6268 /* Output a reference from a function exception table to the type_info
6269    object X.  Output these via a special assembly directive.  */
6270 
6271 static bool
c6x_output_ttype(rtx x)6272 c6x_output_ttype (rtx x)
6273 {
6274   /* Use special relocations for symbol references.  */
6275   if (GET_CODE (x) != CONST_INT)
6276     fputs ("\t.ehtype\t", asm_out_file);
6277   else
6278     fputs ("\t.word\t", asm_out_file);
6279   output_addr_const (asm_out_file, x);
6280   fputc ('\n', asm_out_file);
6281 
6282   return TRUE;
6283 }
6284 
6285 /* Modify the return address of the current function.  */
6286 
6287 void
c6x_set_return_address(rtx source,rtx scratch)6288 c6x_set_return_address (rtx source, rtx scratch)
6289 {
6290   struct c6x_frame frame;
6291   rtx addr;
6292   HOST_WIDE_INT offset;
6293 
6294   c6x_compute_frame_layout (&frame);
6295   if (! c6x_save_reg (RETURN_ADDR_REGNO))
6296     emit_move_insn (gen_rtx_REG (Pmode, RETURN_ADDR_REGNO), source);
6297   else
6298     {
6299 
6300       if (frame_pointer_needed)
6301 	{
6302 	  addr = hard_frame_pointer_rtx;
6303 	  offset = frame.b3_offset;
6304 	}
6305       else
6306 	{
6307 	  addr = stack_pointer_rtx;
6308 	  offset = frame.to_allocate - frame.b3_offset;
6309 	}
6310 
6311       /* TODO: Use base+offset loads where possible.  */
6312       if (offset)
6313 	{
6314 	  HOST_WIDE_INT low = trunc_int_for_mode (offset, HImode);
6315 
6316 	  emit_insn (gen_movsi_high (scratch, GEN_INT (low)));
6317 	  if (low != offset)
6318 	    emit_insn (gen_movsi_lo_sum (scratch, scratch, GEN_INT(offset)));
6319 	  emit_insn (gen_addsi3 (scratch, addr, scratch));
6320 	  addr = scratch;
6321 	}
6322 
6323       emit_move_insn (gen_frame_mem (Pmode, addr), source);
6324     }
6325 }
6326 
6327 /* We save pairs of registers using a DImode store.  Describe the component
6328    registers for DWARF generation code.  */
6329 
6330 static rtx
c6x_dwarf_register_span(rtx rtl)6331 c6x_dwarf_register_span (rtx rtl)
6332 {
6333     unsigned regno;
6334     unsigned real_regno;
6335     int nregs;
6336     int i;
6337     rtx p;
6338 
6339     regno = REGNO (rtl);
6340     nregs = HARD_REGNO_NREGS (regno, GET_MODE (rtl));
6341     if (nregs == 1)
6342       return  NULL_RTX;
6343 
6344     p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc(nregs));
6345     for (i = 0; i < nregs; i++)
6346       {
6347 	if (TARGET_BIG_ENDIAN)
6348 	  real_regno = regno + nregs - (i + 1);
6349 	else
6350 	  real_regno = regno + i;
6351 
6352 	XVECEXP (p, 0, i) = gen_rtx_REG (SImode, real_regno);
6353       }
6354 
6355     return p;
6356 }
6357 
6358 /* Codes for all the C6X builtins.  */
6359 enum c6x_builtins
6360 {
6361   C6X_BUILTIN_SADD,
6362   C6X_BUILTIN_SSUB,
6363   C6X_BUILTIN_ADD2,
6364   C6X_BUILTIN_SUB2,
6365   C6X_BUILTIN_ADD4,
6366   C6X_BUILTIN_SUB4,
6367   C6X_BUILTIN_SADD2,
6368   C6X_BUILTIN_SSUB2,
6369   C6X_BUILTIN_SADDU4,
6370 
6371   C6X_BUILTIN_SMPY,
6372   C6X_BUILTIN_SMPYH,
6373   C6X_BUILTIN_SMPYHL,
6374   C6X_BUILTIN_SMPYLH,
6375   C6X_BUILTIN_MPY2,
6376   C6X_BUILTIN_SMPY2,
6377 
6378   C6X_BUILTIN_CLRR,
6379   C6X_BUILTIN_EXTR,
6380   C6X_BUILTIN_EXTRU,
6381 
6382   C6X_BUILTIN_SSHL,
6383   C6X_BUILTIN_SUBC,
6384   C6X_BUILTIN_ABS,
6385   C6X_BUILTIN_ABS2,
6386   C6X_BUILTIN_AVG2,
6387   C6X_BUILTIN_AVGU4,
6388 
6389   C6X_BUILTIN_MAX
6390 };
6391 
6392 
6393 static GTY(()) tree c6x_builtin_decls[C6X_BUILTIN_MAX];
6394 
6395 /* Return the C6X builtin for CODE.  */
6396 static tree
c6x_builtin_decl(unsigned code,bool initialize_p ATTRIBUTE_UNUSED)6397 c6x_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6398 {
6399   if (code >= C6X_BUILTIN_MAX)
6400     return error_mark_node;
6401 
6402   return c6x_builtin_decls[code];
6403 }
6404 
6405 #define def_builtin(NAME, TYPE, CODE)					\
6406 do {									\
6407   tree bdecl;								\
6408   bdecl = add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
6409 				NULL, NULL_TREE);			\
6410   c6x_builtin_decls[CODE] = bdecl;					\
6411 } while (0)
6412 
6413 /* Set up all builtin functions for this target.  */
6414 static void
c6x_init_builtins(void)6415 c6x_init_builtins (void)
6416 {
6417   tree V4QI_type_node = build_vector_type (unsigned_intQI_type_node, 4);
6418   tree V2HI_type_node = build_vector_type (intHI_type_node, 2);
6419   tree V2SI_type_node = build_vector_type (intSI_type_node, 2);
6420   tree int_ftype_int
6421     = build_function_type_list (integer_type_node, integer_type_node,
6422 				NULL_TREE);
6423   tree int_ftype_int_int
6424     = build_function_type_list (integer_type_node, integer_type_node,
6425 				integer_type_node, NULL_TREE);
6426   tree v2hi_ftype_v2hi
6427     = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE);
6428   tree v4qi_ftype_v4qi_v4qi
6429     = build_function_type_list (V4QI_type_node, V4QI_type_node,
6430 				V4QI_type_node, NULL_TREE);
6431   tree v2hi_ftype_v2hi_v2hi
6432     = build_function_type_list (V2HI_type_node, V2HI_type_node,
6433 				V2HI_type_node, NULL_TREE);
6434   tree v2si_ftype_v2hi_v2hi
6435     = build_function_type_list (V2SI_type_node, V2HI_type_node,
6436 				V2HI_type_node, NULL_TREE);
6437 
6438   def_builtin ("__builtin_c6x_sadd", int_ftype_int_int,
6439 	       C6X_BUILTIN_SADD);
6440   def_builtin ("__builtin_c6x_ssub", int_ftype_int_int,
6441 	       C6X_BUILTIN_SSUB);
6442   def_builtin ("__builtin_c6x_add2", v2hi_ftype_v2hi_v2hi,
6443 	       C6X_BUILTIN_ADD2);
6444   def_builtin ("__builtin_c6x_sub2", v2hi_ftype_v2hi_v2hi,
6445 	       C6X_BUILTIN_SUB2);
6446   def_builtin ("__builtin_c6x_add4", v4qi_ftype_v4qi_v4qi,
6447 	       C6X_BUILTIN_ADD4);
6448   def_builtin ("__builtin_c6x_sub4", v4qi_ftype_v4qi_v4qi,
6449 	       C6X_BUILTIN_SUB4);
6450   def_builtin ("__builtin_c6x_mpy2", v2si_ftype_v2hi_v2hi,
6451 	       C6X_BUILTIN_MPY2);
6452   def_builtin ("__builtin_c6x_sadd2", v2hi_ftype_v2hi_v2hi,
6453 	       C6X_BUILTIN_SADD2);
6454   def_builtin ("__builtin_c6x_ssub2", v2hi_ftype_v2hi_v2hi,
6455 	       C6X_BUILTIN_SSUB2);
6456   def_builtin ("__builtin_c6x_saddu4", v4qi_ftype_v4qi_v4qi,
6457 	       C6X_BUILTIN_SADDU4);
6458   def_builtin ("__builtin_c6x_smpy2", v2si_ftype_v2hi_v2hi,
6459 	       C6X_BUILTIN_SMPY2);
6460 
6461   def_builtin ("__builtin_c6x_smpy", int_ftype_int_int,
6462 	       C6X_BUILTIN_SMPY);
6463   def_builtin ("__builtin_c6x_smpyh", int_ftype_int_int,
6464 	       C6X_BUILTIN_SMPYH);
6465   def_builtin ("__builtin_c6x_smpyhl", int_ftype_int_int,
6466 	       C6X_BUILTIN_SMPYHL);
6467   def_builtin ("__builtin_c6x_smpylh", int_ftype_int_int,
6468 	       C6X_BUILTIN_SMPYLH);
6469 
6470   def_builtin ("__builtin_c6x_sshl", int_ftype_int_int,
6471 	       C6X_BUILTIN_SSHL);
6472   def_builtin ("__builtin_c6x_subc", int_ftype_int_int,
6473 	       C6X_BUILTIN_SUBC);
6474 
6475   def_builtin ("__builtin_c6x_avg2", v2hi_ftype_v2hi_v2hi,
6476 	       C6X_BUILTIN_AVG2);
6477   def_builtin ("__builtin_c6x_avgu4", v4qi_ftype_v4qi_v4qi,
6478 	       C6X_BUILTIN_AVGU4);
6479 
6480   def_builtin ("__builtin_c6x_clrr", int_ftype_int_int,
6481 	       C6X_BUILTIN_CLRR);
6482   def_builtin ("__builtin_c6x_extr", int_ftype_int_int,
6483 	       C6X_BUILTIN_EXTR);
6484   def_builtin ("__builtin_c6x_extru", int_ftype_int_int,
6485 	       C6X_BUILTIN_EXTRU);
6486 
6487   def_builtin ("__builtin_c6x_abs", int_ftype_int, C6X_BUILTIN_ABS);
6488   def_builtin ("__builtin_c6x_abs2", v2hi_ftype_v2hi, C6X_BUILTIN_ABS2);
6489 }
6490 
6491 
6492 struct builtin_description
6493 {
6494   const enum insn_code icode;
6495   const char *const name;
6496   const enum c6x_builtins code;
6497 };
6498 
6499 static const struct builtin_description bdesc_2arg[] =
6500 {
6501   { CODE_FOR_saddsi3, "__builtin_c6x_sadd", C6X_BUILTIN_SADD },
6502   { CODE_FOR_ssubsi3, "__builtin_c6x_ssub", C6X_BUILTIN_SSUB },
6503   { CODE_FOR_addv2hi3, "__builtin_c6x_add2", C6X_BUILTIN_ADD2 },
6504   { CODE_FOR_subv2hi3, "__builtin_c6x_sub2", C6X_BUILTIN_SUB2 },
6505   { CODE_FOR_addv4qi3, "__builtin_c6x_add4", C6X_BUILTIN_ADD4 },
6506   { CODE_FOR_subv4qi3, "__builtin_c6x_sub4", C6X_BUILTIN_SUB4 },
6507   { CODE_FOR_ss_addv2hi3, "__builtin_c6x_sadd2", C6X_BUILTIN_SADD2 },
6508   { CODE_FOR_ss_subv2hi3, "__builtin_c6x_ssub2", C6X_BUILTIN_SSUB2 },
6509   { CODE_FOR_us_addv4qi3, "__builtin_c6x_saddu4", C6X_BUILTIN_SADDU4 },
6510 
6511   { CODE_FOR_subcsi3, "__builtin_c6x_subc", C6X_BUILTIN_SUBC },
6512   { CODE_FOR_ss_ashlsi3, "__builtin_c6x_sshl", C6X_BUILTIN_SSHL },
6513 
6514   { CODE_FOR_avgv2hi3, "__builtin_c6x_avg2", C6X_BUILTIN_AVG2 },
6515   { CODE_FOR_uavgv4qi3, "__builtin_c6x_avgu4", C6X_BUILTIN_AVGU4 },
6516 
6517   { CODE_FOR_mulhqsq3, "__builtin_c6x_smpy", C6X_BUILTIN_SMPY },
6518   { CODE_FOR_mulhqsq3_hh, "__builtin_c6x_smpyh", C6X_BUILTIN_SMPYH },
6519   { CODE_FOR_mulhqsq3_lh, "__builtin_c6x_smpylh", C6X_BUILTIN_SMPYLH },
6520   { CODE_FOR_mulhqsq3_hl, "__builtin_c6x_smpyhl", C6X_BUILTIN_SMPYHL },
6521 
6522   { CODE_FOR_mulv2hqv2sq3, "__builtin_c6x_smpy2", C6X_BUILTIN_SMPY2 },
6523 
6524   { CODE_FOR_clrr, "__builtin_c6x_clrr", C6X_BUILTIN_CLRR },
6525   { CODE_FOR_extr, "__builtin_c6x_extr", C6X_BUILTIN_EXTR },
6526   { CODE_FOR_extru, "__builtin_c6x_extru", C6X_BUILTIN_EXTRU }
6527 };
6528 
6529 static const struct builtin_description bdesc_1arg[] =
6530 {
6531   { CODE_FOR_ssabssi2, "__builtin_c6x_abs", C6X_BUILTIN_ABS },
6532   { CODE_FOR_ssabsv2hi2, "__builtin_c6x_abs2", C6X_BUILTIN_ABS2 }
6533 };
6534 
6535 /* Errors in the source file can cause expand_expr to return const0_rtx
6536    where we expect a vector.  To avoid crashing, use one of the vector
6537    clear instructions.  */
6538 static rtx
safe_vector_operand(rtx x,machine_mode mode)6539 safe_vector_operand (rtx x, machine_mode mode)
6540 {
6541   if (x != const0_rtx)
6542     return x;
6543   x = gen_reg_rtx (SImode);
6544 
6545   emit_insn (gen_movsi (x, CONST0_RTX (SImode)));
6546   return gen_lowpart (mode, x);
6547 }
6548 
6549 /* Subroutine of c6x_expand_builtin to take care of binop insns.  MACFLAG is -1
6550    if this is a normal binary op, or one of the MACFLAG_xxx constants.  */
6551 
6552 static rtx
c6x_expand_binop_builtin(enum insn_code icode,tree exp,rtx target,bool match_op)6553 c6x_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
6554 			  bool match_op)
6555 {
6556   int offs = match_op ? 1 : 0;
6557   rtx pat;
6558   tree arg0 = CALL_EXPR_ARG (exp, 0);
6559   tree arg1 = CALL_EXPR_ARG (exp, 1);
6560   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6561   rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6562   machine_mode op0mode = GET_MODE (op0);
6563   machine_mode op1mode = GET_MODE (op1);
6564   machine_mode tmode = insn_data[icode].operand[0].mode;
6565   machine_mode mode0 = insn_data[icode].operand[1 + offs].mode;
6566   machine_mode mode1 = insn_data[icode].operand[2 + offs].mode;
6567   rtx ret = target;
6568 
6569   if (VECTOR_MODE_P (mode0))
6570     op0 = safe_vector_operand (op0, mode0);
6571   if (VECTOR_MODE_P (mode1))
6572     op1 = safe_vector_operand (op1, mode1);
6573 
6574   if (! target
6575       || GET_MODE (target) != tmode
6576       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6577     {
6578       if (tmode == SQmode || tmode == V2SQmode)
6579 	{
6580 	  ret = gen_reg_rtx (tmode == SQmode ? SImode : V2SImode);
6581 	  target = gen_lowpart (tmode, ret);
6582 	}
6583       else
6584 	target = gen_reg_rtx (tmode);
6585     }
6586 
6587   if ((op0mode == V2HImode || op0mode == SImode || op0mode == VOIDmode)
6588       && (mode0 == V2HQmode || mode0 == HQmode || mode0 == SQmode))
6589     {
6590       op0mode = mode0;
6591       op0 = gen_lowpart (mode0, op0);
6592     }
6593   if ((op1mode == V2HImode || op1mode == SImode || op1mode == VOIDmode)
6594       && (mode1 == V2HQmode || mode1 == HQmode || mode1 == SQmode))
6595     {
6596       op1mode = mode1;
6597       op1 = gen_lowpart (mode1, op1);
6598     }
6599   /* In case the insn wants input operands in modes different from
6600      the result, abort.  */
6601   gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
6602 	      && (op1mode == mode1 || op1mode == VOIDmode));
6603 
6604   if (! (*insn_data[icode].operand[1 + offs].predicate) (op0, mode0))
6605     op0 = copy_to_mode_reg (mode0, op0);
6606   if (! (*insn_data[icode].operand[2 + offs].predicate) (op1, mode1))
6607     op1 = copy_to_mode_reg (mode1, op1);
6608 
6609   if (match_op)
6610     pat = GEN_FCN (icode) (target, target, op0, op1);
6611   else
6612     pat = GEN_FCN (icode) (target, op0, op1);
6613 
6614   if (! pat)
6615     return 0;
6616 
6617   emit_insn (pat);
6618 
6619   return ret;
6620 }
6621 
6622 /* Subroutine of c6x_expand_builtin to take care of unop insns.  */
6623 
6624 static rtx
c6x_expand_unop_builtin(enum insn_code icode,tree exp,rtx target)6625 c6x_expand_unop_builtin (enum insn_code icode, tree exp,
6626 			  rtx target)
6627 {
6628   rtx pat;
6629   tree arg0 = CALL_EXPR_ARG (exp, 0);
6630   rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6631   machine_mode op0mode = GET_MODE (op0);
6632   machine_mode tmode = insn_data[icode].operand[0].mode;
6633   machine_mode mode0 = insn_data[icode].operand[1].mode;
6634 
6635   if (! target
6636       || GET_MODE (target) != tmode
6637       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6638     target = gen_reg_rtx (tmode);
6639 
6640   if (VECTOR_MODE_P (mode0))
6641     op0 = safe_vector_operand (op0, mode0);
6642 
6643   if (op0mode == SImode && mode0 == HImode)
6644     {
6645       op0mode = HImode;
6646       op0 = gen_lowpart (HImode, op0);
6647     }
6648   gcc_assert (op0mode == mode0 || op0mode == VOIDmode);
6649 
6650   if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6651     op0 = copy_to_mode_reg (mode0, op0);
6652 
6653   pat = GEN_FCN (icode) (target, op0);
6654   if (! pat)
6655     return 0;
6656   emit_insn (pat);
6657   return target;
6658 }
6659 
6660 /* Expand an expression EXP that calls a built-in function,
6661    with result going to TARGET if that's convenient
6662    (and in mode MODE if that's convenient).
6663    SUBTARGET may be used as the target for computing one of EXP's operands.
6664    IGNORE is nonzero if the value is to be ignored.  */
6665 
6666 static rtx
c6x_expand_builtin(tree exp,rtx target ATTRIBUTE_UNUSED,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)6667 c6x_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
6668 		     rtx subtarget ATTRIBUTE_UNUSED,
6669 		     machine_mode mode ATTRIBUTE_UNUSED,
6670 		     int ignore ATTRIBUTE_UNUSED)
6671 {
6672   size_t i;
6673   const struct builtin_description *d;
6674   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6675   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6676 
6677   for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
6678     if (d->code == fcode)
6679       return c6x_expand_binop_builtin (d->icode, exp, target,
6680 				       fcode == C6X_BUILTIN_CLRR);
6681 
6682   for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
6683     if (d->code == fcode)
6684       return c6x_expand_unop_builtin (d->icode, exp, target);
6685 
6686   gcc_unreachable ();
6687 }
6688 
6689 /* Target unwind frame info is generated from dwarf CFI directives, so
6690    always output dwarf2 unwind info.  */
6691 
6692 static enum unwind_info_type
c6x_debug_unwind_info(void)6693 c6x_debug_unwind_info (void)
6694 {
6695   if (flag_unwind_tables || flag_exceptions)
6696     return UI_DWARF2;
6697 
6698   return default_debug_unwind_info ();
6699 }
6700 
6701 /* Target Structure.  */
6702 
6703 /* Initialize the GCC target structure.  */
6704 #undef TARGET_FUNCTION_ARG
6705 #define TARGET_FUNCTION_ARG c6x_function_arg
6706 #undef TARGET_FUNCTION_ARG_ADVANCE
6707 #define TARGET_FUNCTION_ARG_ADVANCE c6x_function_arg_advance
6708 #undef TARGET_FUNCTION_ARG_BOUNDARY
6709 #define TARGET_FUNCTION_ARG_BOUNDARY c6x_function_arg_boundary
6710 #undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY
6711 #define TARGET_FUNCTION_ARG_ROUND_BOUNDARY \
6712   c6x_function_arg_round_boundary
6713 #undef TARGET_FUNCTION_VALUE_REGNO_P
6714 #define TARGET_FUNCTION_VALUE_REGNO_P c6x_function_value_regno_p
6715 #undef TARGET_FUNCTION_VALUE
6716 #define TARGET_FUNCTION_VALUE c6x_function_value
6717 #undef TARGET_LIBCALL_VALUE
6718 #define TARGET_LIBCALL_VALUE c6x_libcall_value
6719 #undef TARGET_RETURN_IN_MEMORY
6720 #define TARGET_RETURN_IN_MEMORY c6x_return_in_memory
6721 #undef TARGET_RETURN_IN_MSB
6722 #define TARGET_RETURN_IN_MSB c6x_return_in_msb
6723 #undef TARGET_PASS_BY_REFERENCE
6724 #define TARGET_PASS_BY_REFERENCE c6x_pass_by_reference
6725 #undef TARGET_CALLEE_COPIES
6726 #define TARGET_CALLEE_COPIES c6x_callee_copies
6727 #undef TARGET_STRUCT_VALUE_RTX
6728 #define TARGET_STRUCT_VALUE_RTX c6x_struct_value_rtx
6729 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
6730 #define TARGET_FUNCTION_OK_FOR_SIBCALL c6x_function_ok_for_sibcall
6731 
6732 #undef TARGET_ASM_OUTPUT_MI_THUNK
6733 #define TARGET_ASM_OUTPUT_MI_THUNK c6x_output_mi_thunk
6734 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
6735 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK c6x_can_output_mi_thunk
6736 
6737 #undef TARGET_BUILD_BUILTIN_VA_LIST
6738 #define TARGET_BUILD_BUILTIN_VA_LIST c6x_build_builtin_va_list
6739 
6740 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
6741 #define TARGET_ASM_TRAMPOLINE_TEMPLATE c6x_asm_trampoline_template
6742 #undef TARGET_TRAMPOLINE_INIT
6743 #define TARGET_TRAMPOLINE_INIT c6x_initialize_trampoline
6744 
6745 #undef TARGET_LEGITIMATE_CONSTANT_P
6746 #define TARGET_LEGITIMATE_CONSTANT_P c6x_legitimate_constant_p
6747 #undef TARGET_LEGITIMATE_ADDRESS_P
6748 #define TARGET_LEGITIMATE_ADDRESS_P c6x_legitimate_address_p
6749 
6750 #undef TARGET_IN_SMALL_DATA_P
6751 #define TARGET_IN_SMALL_DATA_P c6x_in_small_data_p
6752 #undef	TARGET_ASM_SELECT_RTX_SECTION
6753 #define TARGET_ASM_SELECT_RTX_SECTION  c6x_select_rtx_section
6754 #undef TARGET_ASM_SELECT_SECTION
6755 #define TARGET_ASM_SELECT_SECTION  c6x_elf_select_section
6756 #undef TARGET_ASM_UNIQUE_SECTION
6757 #define TARGET_ASM_UNIQUE_SECTION  c6x_elf_unique_section
6758 #undef TARGET_SECTION_TYPE_FLAGS
6759 #define TARGET_SECTION_TYPE_FLAGS  c6x_section_type_flags
6760 #undef TARGET_HAVE_SRODATA_SECTION
6761 #define TARGET_HAVE_SRODATA_SECTION true
6762 #undef TARGET_ASM_MERGEABLE_RODATA_PREFIX
6763 #define TARGET_ASM_MERGEABLE_RODATA_PREFIX ".const"
6764 
6765 #undef TARGET_OPTION_OVERRIDE
6766 #define TARGET_OPTION_OVERRIDE c6x_option_override
6767 #undef TARGET_CONDITIONAL_REGISTER_USAGE
6768 #define TARGET_CONDITIONAL_REGISTER_USAGE c6x_conditional_register_usage
6769 
6770 #undef TARGET_INIT_LIBFUNCS
6771 #define TARGET_INIT_LIBFUNCS c6x_init_libfuncs
6772 #undef TARGET_LIBFUNC_GNU_PREFIX
6773 #define TARGET_LIBFUNC_GNU_PREFIX true
6774 
6775 #undef TARGET_SCALAR_MODE_SUPPORTED_P
6776 #define TARGET_SCALAR_MODE_SUPPORTED_P c6x_scalar_mode_supported_p
6777 #undef TARGET_VECTOR_MODE_SUPPORTED_P
6778 #define TARGET_VECTOR_MODE_SUPPORTED_P c6x_vector_mode_supported_p
6779 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
6780 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE c6x_preferred_simd_mode
6781 
6782 #undef TARGET_RTX_COSTS
6783 #define TARGET_RTX_COSTS c6x_rtx_costs
6784 
6785 #undef TARGET_SCHED_INIT
6786 #define TARGET_SCHED_INIT c6x_sched_init
6787 #undef TARGET_SCHED_SET_SCHED_FLAGS
6788 #define TARGET_SCHED_SET_SCHED_FLAGS c6x_set_sched_flags
6789 #undef TARGET_SCHED_ADJUST_COST
6790 #define TARGET_SCHED_ADJUST_COST c6x_adjust_cost
6791 #undef TARGET_SCHED_ISSUE_RATE
6792 #define TARGET_SCHED_ISSUE_RATE c6x_issue_rate
6793 #undef TARGET_SCHED_VARIABLE_ISSUE
6794 #define TARGET_SCHED_VARIABLE_ISSUE c6x_variable_issue
6795 #undef TARGET_SCHED_REORDER
6796 #define TARGET_SCHED_REORDER c6x_sched_reorder
6797 #undef TARGET_SCHED_REORDER2
6798 #define TARGET_SCHED_REORDER2 c6x_sched_reorder2
6799 #undef TARGET_SCHED_DFA_NEW_CYCLE
6800 #define TARGET_SCHED_DFA_NEW_CYCLE c6x_dfa_new_cycle
6801 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
6802 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN c6x_sched_dfa_pre_cycle_insn
6803 #undef TARGET_SCHED_EXPOSED_PIPELINE
6804 #define TARGET_SCHED_EXPOSED_PIPELINE true
6805 
6806 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
6807 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT c6x_alloc_sched_context
6808 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
6809 #define TARGET_SCHED_INIT_SCHED_CONTEXT c6x_init_sched_context
6810 #undef TARGET_SCHED_SET_SCHED_CONTEXT
6811 #define TARGET_SCHED_SET_SCHED_CONTEXT c6x_set_sched_context
6812 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
6813 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT c6x_clear_sched_context
6814 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
6815 #define TARGET_SCHED_FREE_SCHED_CONTEXT c6x_free_sched_context
6816 
6817 #undef TARGET_CAN_ELIMINATE
6818 #define TARGET_CAN_ELIMINATE c6x_can_eliminate
6819 
6820 #undef TARGET_PREFERRED_RENAME_CLASS
6821 #define TARGET_PREFERRED_RENAME_CLASS c6x_preferred_rename_class
6822 
6823 #undef TARGET_MACHINE_DEPENDENT_REORG
6824 #define TARGET_MACHINE_DEPENDENT_REORG c6x_reorg
6825 
6826 #undef TARGET_ASM_FILE_START
6827 #define TARGET_ASM_FILE_START c6x_file_start
6828 
6829 #undef  TARGET_PRINT_OPERAND
6830 #define TARGET_PRINT_OPERAND c6x_print_operand
6831 #undef  TARGET_PRINT_OPERAND_ADDRESS
6832 #define TARGET_PRINT_OPERAND_ADDRESS c6x_print_operand_address
6833 #undef  TARGET_PRINT_OPERAND_PUNCT_VALID_P
6834 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P c6x_print_operand_punct_valid_p
6835 
6836 /* C6x unwinding tables use a different format for the typeinfo tables.  */
6837 #undef TARGET_ASM_TTYPE
6838 #define TARGET_ASM_TTYPE c6x_output_ttype
6839 
6840 /* The C6x ABI follows the ARM EABI exception handling rules.  */
6841 #undef TARGET_ARM_EABI_UNWINDER
6842 #define TARGET_ARM_EABI_UNWINDER true
6843 
6844 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
6845 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY c6x_asm_emit_except_personality
6846 
6847 #undef TARGET_ASM_INIT_SECTIONS
6848 #define TARGET_ASM_INIT_SECTIONS c6x_asm_init_sections
6849 
6850 #undef TARGET_DEBUG_UNWIND_INFO
6851 #define TARGET_DEBUG_UNWIND_INFO  c6x_debug_unwind_info
6852 
6853 #undef TARGET_DWARF_REGISTER_SPAN
6854 #define TARGET_DWARF_REGISTER_SPAN c6x_dwarf_register_span
6855 
6856 #undef TARGET_INIT_BUILTINS
6857 #define TARGET_INIT_BUILTINS c6x_init_builtins
6858 #undef TARGET_EXPAND_BUILTIN
6859 #define TARGET_EXPAND_BUILTIN c6x_expand_builtin
6860 #undef  TARGET_BUILTIN_DECL
6861 #define TARGET_BUILTIN_DECL c6x_builtin_decl
6862 
6863 struct gcc_target targetm = TARGET_INITIALIZER;
6864 
6865 #include "gt-c6x.h"
6866